amatch 0.2.4 → 0.2.5
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGES +3 -0
- data/README +116 -11
- data/Rakefile +57 -29
- data/VERSION +1 -1
- data/amatch.gemspec +31 -0
- data/lib/amatch.so +0 -0
- data/lib/amatch/version.rb +1 -1
- metadata +19 -16
- data/doc-main.txt +0 -115
data/CHANGES
CHANGED
data/README
CHANGED
@@ -1,25 +1,130 @@
|
|
1
|
-
|
2
|
-
|
1
|
+
== amatch - Approximate Matching Extension for Ruby
|
2
|
+
|
3
|
+
=== Description
|
4
|
+
|
5
|
+
This is a collection of classes that can be used for Approximate
|
6
|
+
matching, searching, and comparing of Strings. They implement algorithms
|
7
|
+
that compute the Levenshtein edit distance, Sellers edit distance, the
|
8
|
+
Hamming distance, the longest common subsequence length, the longest common
|
9
|
+
substring length, the pair distance metric, the Jaro-Winkler metric.
|
10
|
+
|
11
|
+
=== Download
|
12
|
+
|
13
|
+
The latest version of <b>amatch</b> can be found at
|
14
|
+
|
15
|
+
* http://rubyforge.org/frs/?group_id=390
|
16
|
+
|
17
|
+
Online Documentation should be located at
|
18
|
+
|
19
|
+
* http://amatch.rubyforge.org
|
20
|
+
|
21
|
+
=== Installation
|
3
22
|
|
4
23
|
Just type into the command line as root:
|
5
24
|
|
6
|
-
# ruby install.rb
|
25
|
+
# ruby install.rb
|
7
26
|
|
8
27
|
If you have installed rake (rake.rubyforge.org), you can also type:
|
9
28
|
|
10
|
-
# rake install
|
29
|
+
# rake install
|
11
30
|
|
12
31
|
To install this extension as a gem type
|
13
32
|
|
14
|
-
# gem install amatch
|
33
|
+
# gem install amatch
|
34
|
+
|
35
|
+
=== Examples
|
36
|
+
require 'amatch'
|
37
|
+
# => true
|
38
|
+
include Amatch
|
39
|
+
# => Object
|
40
|
+
|
41
|
+
m = Sellers.new("pattern")
|
42
|
+
# => #<Amatch::Sellers:0x40366324>
|
43
|
+
m.match("pattren")
|
44
|
+
# => 2.0
|
45
|
+
m.substitution = m.insertion = 3
|
46
|
+
# => 3
|
47
|
+
m.match("pattren")
|
48
|
+
# => 4.0
|
49
|
+
m.reset_weights
|
50
|
+
# => #<Amatch::Sellers:0x40366324>
|
51
|
+
m.match(["pattren","parent"])
|
52
|
+
# => [2.0, 4.0]
|
53
|
+
m.search("abcpattrendef")
|
54
|
+
# => 2.0
|
55
|
+
|
56
|
+
m = Levenshtein.new("pattern")
|
57
|
+
# => #<Amatch::Levenshtein:0x4035919c>
|
58
|
+
m.match("pattren")
|
59
|
+
# => 2
|
60
|
+
m.search("abcpattrendef")
|
61
|
+
# => 2
|
62
|
+
"pattern language".levenshtein_similar("language of patterns")
|
63
|
+
# => 0.2
|
64
|
+
|
65
|
+
m = Hamming.new("pattern")
|
66
|
+
# => #<Amatch::Hamming:0x40350858>
|
67
|
+
m.match("pattren")
|
68
|
+
# => 2
|
69
|
+
"pattern language".hamming_similar("language of patterns")
|
70
|
+
# => 0.1
|
71
|
+
|
72
|
+
m = PairDistance.new("pattern")
|
73
|
+
# => #<Amatch::PairDistance:0x40349be8>
|
74
|
+
m.match("pattr en")
|
75
|
+
# => 0.545454545454545
|
76
|
+
m.match("pattr en", nil)
|
77
|
+
# => 0.461538461538462
|
78
|
+
m.match("pattr en", /t+/)
|
79
|
+
# => 0.285714285714286
|
80
|
+
"pattern language".pair_distance_similar("language of patterns")
|
81
|
+
# => 0.928571428571429
|
82
|
+
|
83
|
+
m = LongestSubsequence.new("pattern")
|
84
|
+
# => #<Amatch::LongestSubsequence:0x4033e900>
|
85
|
+
m.match("pattren")
|
86
|
+
# => 6
|
87
|
+
"pattern language".longest_subsequence_similar("language of patterns")
|
88
|
+
# => 0.4
|
89
|
+
|
90
|
+
m = LongestSubstring.new("pattern")
|
91
|
+
# => #<Amatch::LongestSubstring:0x403378d0>
|
92
|
+
m.match("pattren")
|
93
|
+
# => 4
|
94
|
+
"pattern language".longest_substring_similar("language of patterns")
|
95
|
+
# => 0.4
|
96
|
+
|
97
|
+
m = Jaro.new("pattern")
|
98
|
+
# => #<Amatch::Jaro:0x363b70>
|
99
|
+
m.match("paTTren")
|
100
|
+
# => 0.952380952380952
|
101
|
+
m.ignore_case = false
|
102
|
+
m.match("paTTren")
|
103
|
+
# => 0.742857142857143
|
104
|
+
"pattern language".jaro_similar("language of patterns")
|
105
|
+
# => 0.672222222222222
|
106
|
+
|
107
|
+
m = JaroWinkler.new("pattern")
|
108
|
+
# #<Amatch::JaroWinkler:0x3530b8>
|
109
|
+
m.match("paTTren")
|
110
|
+
# => 0.971428571712403
|
111
|
+
m.ignore_case = false
|
112
|
+
m.match("paTTren")
|
113
|
+
# => 0.79428571505206
|
114
|
+
m.scaling_factor = 0.05
|
115
|
+
m.match("pattren")
|
116
|
+
# => 0.961904762046678
|
117
|
+
"pattern language".jarowinkler_similar("language of patterns")
|
118
|
+
# => 0.672222222222222
|
119
|
+
|
120
|
+
=== Author
|
15
121
|
|
16
|
-
|
17
|
-
======
|
122
|
+
Florian Frank mailto:flori@ping.de
|
18
123
|
|
19
|
-
|
124
|
+
=== License
|
20
125
|
|
21
|
-
|
22
|
-
|
126
|
+
This is free software; you can redistribute it and/or modify it under
|
127
|
+
the terms of the GNU General Public License Version 2 as published by
|
128
|
+
the Free Software Foundation: http://www.gnu.org/copyleft/gpl.html
|
23
129
|
|
24
|
-
GNU General Public License, Version 2 (GPLv2)
|
25
130
|
|
data/Rakefile
CHANGED
@@ -2,10 +2,13 @@
|
|
2
2
|
|
3
3
|
begin
|
4
4
|
require 'rake/gempackagetask'
|
5
|
+
require 'rake/extensiontask'
|
5
6
|
rescue LoadError
|
6
7
|
end
|
8
|
+
|
7
9
|
require 'rbconfig'
|
8
10
|
include Config
|
11
|
+
|
9
12
|
require 'rake/clean'
|
10
13
|
CLEAN.include 'coverage', 'doc'
|
11
14
|
require 'rake/testtask'
|
@@ -13,10 +16,8 @@ require 'rake/testtask'
|
|
13
16
|
MAKE = ENV['MAKE'] || %w[gmake make].find { |c| system(c, '-v') }
|
14
17
|
PKG_NAME = 'amatch'
|
15
18
|
PKG_VERSION = File.read('VERSION').chomp
|
16
|
-
PKG_FILES = FileList["**/*"].exclude(/^(pkg|coverage|doc)/)
|
17
|
-
PKG_DOC_FILES = [ "ext/amatch.c" ].concat(Dir['lib/**/*.rb']) << '
|
18
|
-
|
19
|
-
task :default => :test
|
19
|
+
PKG_FILES = FileList["**/*"].exclude(/^(pkg|coverage|doc|tmp)/)
|
20
|
+
PKG_DOC_FILES = [ "ext/amatch.c" ].concat(Dir['lib/**/*.rb']) << 'README'
|
20
21
|
|
21
22
|
desc "Run unit tests"
|
22
23
|
task :test => :compile_ext do
|
@@ -49,45 +50,66 @@ end
|
|
49
50
|
|
50
51
|
desc "Build the documentation"
|
51
52
|
task :doc do
|
52
|
-
sh "rdoc -m
|
53
|
+
sh "rdoc -m README -t '#{PKG_NAME} - Approximate Matching' #{PKG_DOC_FILES * ' '}"
|
53
54
|
end
|
54
55
|
|
55
|
-
if defined?
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
56
|
+
if defined?(Gem) and defined?(Rake::GemPackageTask) and
|
57
|
+
defined?(Rake::ExtensionTask)
|
58
|
+
then
|
59
|
+
spec_src = <<-GEM
|
60
|
+
Gem::Specification.new do |s|
|
61
|
+
s.name = '#{PKG_NAME}'
|
62
|
+
s.version = '#{PKG_VERSION}'
|
63
|
+
s.summary = "Approximate String Matching library"
|
64
|
+
s.description = <<EOF
|
61
65
|
Amatch is a library for approximate string matching and searching in strings.
|
62
66
|
Several algorithms can be used to do this, and it's also possible to compute a
|
63
67
|
similarity metric number between 0.0 and 1.0 for two given strings.
|
64
68
|
EOF
|
65
69
|
|
66
|
-
|
70
|
+
s.files = #{PKG_FILES.sort.inspect}
|
71
|
+
|
72
|
+
s.extensions << "ext/extconf.rb"
|
67
73
|
|
68
|
-
|
74
|
+
s.require_paths << 'ext' << 'lib'
|
69
75
|
|
70
|
-
|
76
|
+
s.bindir = "bin"
|
77
|
+
s.executables = ["agrep.rb"]
|
78
|
+
s.default_executable = "agrep.rb"
|
71
79
|
|
72
|
-
|
73
|
-
|
74
|
-
|
80
|
+
s.has_rdoc = true
|
81
|
+
s.extra_rdoc_files.concat #{PKG_DOC_FILES.sort.inspect}
|
82
|
+
s.rdoc_options << '--main' << 'README' <<
|
83
|
+
'--title' << "#{PKG_NAME} - Approximate Matching"
|
84
|
+
s.test_files.concat Dir['tests/test_*.rb']
|
75
85
|
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
86
|
+
s.author = "Florian Frank"
|
87
|
+
s.email = "flori@ping.de"
|
88
|
+
s.homepage = "http://amatch.rubyforge.org"
|
89
|
+
s.rubyforge_project = '#{PKG_NAME}'
|
90
|
+
end
|
91
|
+
GEM
|
81
92
|
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
93
|
+
desc 'Create a gemspec file'
|
94
|
+
task :gemspec do
|
95
|
+
File.open("#{PKG_NAME}.gemspec", 'w') do |f|
|
96
|
+
f.puts spec_src
|
97
|
+
end
|
86
98
|
end
|
87
99
|
|
100
|
+
spec = eval(spec_src)
|
88
101
|
Rake::GemPackageTask.new(spec) do |pkg|
|
89
102
|
pkg.need_tar = true
|
90
|
-
pkg.package_files
|
103
|
+
pkg.package_files = PKG_FILES
|
104
|
+
end
|
105
|
+
|
106
|
+
Rake::ExtensionTask.new do |ext|
|
107
|
+
ext.name = PKG_NAME
|
108
|
+
ext.gem_spec = spec
|
109
|
+
ext.cross_compile = true
|
110
|
+
ext.cross_platform = 'i386-mswin32'
|
111
|
+
ext.ext_dir = 'ext'
|
112
|
+
ext.lib_dir = 'lib'
|
91
113
|
end
|
92
114
|
end
|
93
115
|
|
@@ -109,5 +131,11 @@ EOT
|
|
109
131
|
end
|
110
132
|
|
111
133
|
|
112
|
-
desc "
|
113
|
-
task :
|
134
|
+
desc "Default task"
|
135
|
+
task :default => [ :version, :gemspec, :test ]
|
136
|
+
|
137
|
+
desc "Build all gems and archives for a new release."
|
138
|
+
task :release => [ :clean, :version, :gemspec, :cross, :native, :gem ] do
|
139
|
+
system "#$0 clean native gem"
|
140
|
+
system "#$0 clean package"
|
141
|
+
end
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.2.
|
1
|
+
0.2.5
|
data/amatch.gemspec
ADDED
@@ -0,0 +1,31 @@
|
|
1
|
+
Gem::Specification.new do |s|
|
2
|
+
s.name = 'amatch'
|
3
|
+
s.version = '0.2.5'
|
4
|
+
s.summary = "Approximate String Matching library"
|
5
|
+
s.description = <<EOF
|
6
|
+
Amatch is a library for approximate string matching and searching in strings.
|
7
|
+
Several algorithms can be used to do this, and it's also possible to compute a
|
8
|
+
similarity metric number between 0.0 and 1.0 for two given strings.
|
9
|
+
EOF
|
10
|
+
|
11
|
+
s.files = ["CHANGES", "COPYING", "README", "Rakefile", "VERSION", "amatch.gemspec", "bin", "bin/agrep.rb", "ext", "ext/amatch.c", "ext/common.h", "ext/extconf.rb", "ext/pair.c", "ext/pair.h", "install.rb", "lib", "lib/amatch", "lib/amatch/version.rb", "tests", "tests/test_hamming.rb", "tests/test_jaro.rb", "tests/test_jaro_winkler.rb", "tests/test_levenshtein.rb", "tests/test_longest_subsequence.rb", "tests/test_longest_substring.rb", "tests/test_pair_distance.rb", "tests/test_sellers.rb"]
|
12
|
+
|
13
|
+
s.extensions << "ext/extconf.rb"
|
14
|
+
|
15
|
+
s.require_paths << 'ext' << 'lib'
|
16
|
+
|
17
|
+
s.bindir = "bin"
|
18
|
+
s.executables = ["agrep.rb"]
|
19
|
+
s.default_executable = "agrep.rb"
|
20
|
+
|
21
|
+
s.has_rdoc = true
|
22
|
+
s.extra_rdoc_files.concat ["README", "ext/amatch.c", "lib/amatch/version.rb"]
|
23
|
+
s.rdoc_options << '--main' << 'README' <<
|
24
|
+
'--title' << "amatch - Approximate Matching"
|
25
|
+
s.test_files.concat Dir['tests/test_*.rb']
|
26
|
+
|
27
|
+
s.author = "Florian Frank"
|
28
|
+
s.email = "flori@ping.de"
|
29
|
+
s.homepage = "http://amatch.rubyforge.org"
|
30
|
+
s.rubyforge_project = 'amatch'
|
31
|
+
end
|
data/lib/amatch.so
ADDED
Binary file
|
data/lib/amatch/version.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: amatch
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Florian Frank
|
@@ -9,7 +9,7 @@ autorequire:
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
|
12
|
-
date: 2009-
|
12
|
+
date: 2009-09-25 00:00:00 +02:00
|
13
13
|
default_executable: agrep.rb
|
14
14
|
dependencies: []
|
15
15
|
|
@@ -24,32 +24,33 @@ executables:
|
|
24
24
|
extensions:
|
25
25
|
- ext/extconf.rb
|
26
26
|
extra_rdoc_files:
|
27
|
+
- README
|
27
28
|
- ext/amatch.c
|
28
29
|
- lib/amatch/version.rb
|
29
|
-
- doc-main.txt
|
30
30
|
files:
|
31
31
|
- CHANGES
|
32
|
-
-
|
33
|
-
- VERSION
|
32
|
+
- COPYING
|
34
33
|
- README
|
34
|
+
- Rakefile
|
35
|
+
- VERSION
|
36
|
+
- amatch.gemspec
|
37
|
+
- bin/agrep.rb
|
38
|
+
- ext/amatch.c
|
35
39
|
- ext/common.h
|
36
40
|
- ext/extconf.rb
|
37
|
-
- ext/amatch.c
|
38
|
-
- ext/pair.h
|
39
41
|
- ext/pair.c
|
40
|
-
-
|
42
|
+
- ext/pair.h
|
43
|
+
- install.rb
|
44
|
+
- lib/amatch.so
|
41
45
|
- lib/amatch/version.rb
|
42
|
-
- tests/test_longest_substring.rb
|
43
46
|
- tests/test_hamming.rb
|
47
|
+
- tests/test_jaro.rb
|
48
|
+
- tests/test_jaro_winkler.rb
|
49
|
+
- tests/test_levenshtein.rb
|
44
50
|
- tests/test_longest_subsequence.rb
|
51
|
+
- tests/test_longest_substring.rb
|
45
52
|
- tests/test_pair_distance.rb
|
46
|
-
- tests/test_levenshtein.rb
|
47
|
-
- tests/test_jaro.rb
|
48
53
|
- tests/test_sellers.rb
|
49
|
-
- tests/test_jaro_winkler.rb
|
50
|
-
- COPYING
|
51
|
-
- install.rb
|
52
|
-
- doc-main.txt
|
53
54
|
has_rdoc: true
|
54
55
|
homepage: http://amatch.rubyforge.org
|
55
56
|
licenses: []
|
@@ -57,11 +58,13 @@ licenses: []
|
|
57
58
|
post_install_message:
|
58
59
|
rdoc_options:
|
59
60
|
- --main
|
60
|
-
-
|
61
|
+
- README
|
61
62
|
- --title
|
62
63
|
- amatch - Approximate Matching
|
63
64
|
require_paths:
|
65
|
+
- lib
|
64
66
|
- ext
|
67
|
+
- lib
|
65
68
|
required_ruby_version: !ruby/object:Gem::Requirement
|
66
69
|
requirements:
|
67
70
|
- - ">="
|
data/doc-main.txt
DELETED
@@ -1,115 +0,0 @@
|
|
1
|
-
== amatch - Approximate Matching Extension for Ruby
|
2
|
-
|
3
|
-
=== Description
|
4
|
-
|
5
|
-
This is a collection of classes that can be used for Approximate
|
6
|
-
matching, searching, and comparing of Strings. They implement algorithms
|
7
|
-
that compute the Levenshtein edit distance, Sellers edit distance, the
|
8
|
-
Hamming distance, the longest common subsequence length, the longest common
|
9
|
-
substring length, the pair distance metric, the Jaro-Winkler metric.
|
10
|
-
|
11
|
-
=== Author
|
12
|
-
|
13
|
-
Florian Frank mailto:flori@ping.de
|
14
|
-
|
15
|
-
=== License
|
16
|
-
|
17
|
-
This is free software; you can redistribute it and/or modify it under
|
18
|
-
the terms of the GNU General Public License Version 2 as published by
|
19
|
-
the Free Software Foundation: http://www.gnu.org/copyleft/gpl.html
|
20
|
-
|
21
|
-
=== Download
|
22
|
-
|
23
|
-
The latest version of <b>amatch</b> can be found at
|
24
|
-
|
25
|
-
* http://rubyforge.org/frs/?group_id=390
|
26
|
-
|
27
|
-
Online Documentation should be located at
|
28
|
-
|
29
|
-
* http://amatch.rubyforge.org
|
30
|
-
|
31
|
-
=== Examples
|
32
|
-
require 'amatch'
|
33
|
-
# => true
|
34
|
-
include Amatch
|
35
|
-
# => Object
|
36
|
-
|
37
|
-
m = Sellers.new("pattern")
|
38
|
-
# => #<Amatch::Sellers:0x40366324>
|
39
|
-
m.match("pattren")
|
40
|
-
# => 2.0
|
41
|
-
m.substitution = m.insertion = 3
|
42
|
-
# => 3
|
43
|
-
m.match("pattren")
|
44
|
-
# => 4.0
|
45
|
-
m.reset_weights
|
46
|
-
# => #<Amatch::Sellers:0x40366324>
|
47
|
-
m.match(["pattren","parent"])
|
48
|
-
# => [2.0, 4.0]
|
49
|
-
m.search("abcpattrendef")
|
50
|
-
# => 2.0
|
51
|
-
|
52
|
-
m = Levenshtein.new("pattern")
|
53
|
-
# => #<Amatch::Levenshtein:0x4035919c>
|
54
|
-
m.match("pattren")
|
55
|
-
# => 2
|
56
|
-
m.search("abcpattrendef")
|
57
|
-
# => 2
|
58
|
-
"pattern language".levenshtein_similar("language of patterns")
|
59
|
-
# => 0.2
|
60
|
-
|
61
|
-
m = Hamming.new("pattern")
|
62
|
-
# => #<Amatch::Hamming:0x40350858>
|
63
|
-
m.match("pattren")
|
64
|
-
# => 2
|
65
|
-
"pattern language".hamming_similar("language of patterns")
|
66
|
-
# => 0.1
|
67
|
-
|
68
|
-
m = PairDistance.new("pattern")
|
69
|
-
# => #<Amatch::PairDistance:0x40349be8>
|
70
|
-
m.match("pattr en")
|
71
|
-
# => 0.545454545454545
|
72
|
-
m.match("pattr en", nil)
|
73
|
-
# => 0.461538461538462
|
74
|
-
m.match("pattr en", /t+/)
|
75
|
-
# => 0.285714285714286
|
76
|
-
"pattern language".pair_distance_similar("language of patterns")
|
77
|
-
# => 0.928571428571429
|
78
|
-
|
79
|
-
m = LongestSubsequence.new("pattern")
|
80
|
-
# => #<Amatch::LongestSubsequence:0x4033e900>
|
81
|
-
m.match("pattren")
|
82
|
-
# => 6
|
83
|
-
"pattern language".longest_subsequence_similar("language of patterns")
|
84
|
-
# => 0.4
|
85
|
-
|
86
|
-
m = LongestSubstring.new("pattern")
|
87
|
-
# => #<Amatch::LongestSubstring:0x403378d0>
|
88
|
-
m.match("pattren")
|
89
|
-
# => 4
|
90
|
-
"pattern language".longest_substring_similar("language of patterns")
|
91
|
-
# => 0.4
|
92
|
-
|
93
|
-
m = Jaro.new("pattern")
|
94
|
-
# => #<Amatch::Jaro:0x363b70>
|
95
|
-
m.match("paTTren")
|
96
|
-
# => 0.952380952380952
|
97
|
-
m.ignore_case = false
|
98
|
-
m.match("paTTren")
|
99
|
-
# => 0.742857142857143
|
100
|
-
"pattern language".jaro_similar("language of patterns")
|
101
|
-
# => 0.672222222222222
|
102
|
-
|
103
|
-
m = JaroWinkler.new("pattern")
|
104
|
-
# #<Amatch::JaroWinkler:0x3530b8>
|
105
|
-
m.match("paTTren")
|
106
|
-
# => 0.971428571712403
|
107
|
-
m.ignore_case = false
|
108
|
-
m.match("paTTren")
|
109
|
-
# => 0.79428571505206
|
110
|
-
m.scaling_factor = 0.05
|
111
|
-
m.match("pattren")
|
112
|
-
# => 0.961904762046678
|
113
|
-
"pattern language".jarowinkler_similar("language of patterns")
|
114
|
-
# => 0.672222222222222
|
115
|
-
|