amatch 0.2.4 → 0.2.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/CHANGES CHANGED
@@ -1,3 +1,6 @@
1
+ 2009-09-23 (0.2.5)
2
+ * Added lib to gem's require_paths.
3
+ * Using rake-compiler now.
1
4
  2009-08-25 (0.2.4)
2
5
  * Included Jaro and Jaro-Winkler metrics implementation of Kevin Ballard
3
6
  <kevin@rapleaf.com>. Thanks a lot.
data/README CHANGED
@@ -1,25 +1,130 @@
1
- Installation
2
- ============
1
+ == amatch - Approximate Matching Extension for Ruby
2
+
3
+ === Description
4
+
5
+ This is a collection of classes that can be used for Approximate
6
+ matching, searching, and comparing of Strings. They implement algorithms
7
+ that compute the Levenshtein edit distance, Sellers edit distance, the
8
+ Hamming distance, the longest common subsequence length, the longest common
9
+ substring length, the pair distance metric, the Jaro-Winkler metric.
10
+
11
+ === Download
12
+
13
+ The latest version of <b>amatch</b> can be found at
14
+
15
+ * http://rubyforge.org/frs/?group_id=390
16
+
17
+ Online Documentation should be located at
18
+
19
+ * http://amatch.rubyforge.org
20
+
21
+ === Installation
3
22
 
4
23
  Just type into the command line as root:
5
24
 
6
- # ruby install.rb
25
+ # ruby install.rb
7
26
 
8
27
  If you have installed rake (rake.rubyforge.org), you can also type:
9
28
 
10
- # rake install
29
+ # rake install
11
30
 
12
31
  To install this extension as a gem type
13
32
 
14
- # gem install amatch
33
+ # gem install amatch
34
+
35
+ === Examples
36
+ require 'amatch'
37
+ # => true
38
+ include Amatch
39
+ # => Object
40
+
41
+ m = Sellers.new("pattern")
42
+ # => #<Amatch::Sellers:0x40366324>
43
+ m.match("pattren")
44
+ # => 2.0
45
+ m.substitution = m.insertion = 3
46
+ # => 3
47
+ m.match("pattren")
48
+ # => 4.0
49
+ m.reset_weights
50
+ # => #<Amatch::Sellers:0x40366324>
51
+ m.match(["pattren","parent"])
52
+ # => [2.0, 4.0]
53
+ m.search("abcpattrendef")
54
+ # => 2.0
55
+
56
+ m = Levenshtein.new("pattern")
57
+ # => #<Amatch::Levenshtein:0x4035919c>
58
+ m.match("pattren")
59
+ # => 2
60
+ m.search("abcpattrendef")
61
+ # => 2
62
+ "pattern language".levenshtein_similar("language of patterns")
63
+ # => 0.2
64
+
65
+ m = Hamming.new("pattern")
66
+ # => #<Amatch::Hamming:0x40350858>
67
+ m.match("pattren")
68
+ # => 2
69
+ "pattern language".hamming_similar("language of patterns")
70
+ # => 0.1
71
+
72
+ m = PairDistance.new("pattern")
73
+ # => #<Amatch::PairDistance:0x40349be8>
74
+ m.match("pattr en")
75
+ # => 0.545454545454545
76
+ m.match("pattr en", nil)
77
+ # => 0.461538461538462
78
+ m.match("pattr en", /t+/)
79
+ # => 0.285714285714286
80
+ "pattern language".pair_distance_similar("language of patterns")
81
+ # => 0.928571428571429
82
+
83
+ m = LongestSubsequence.new("pattern")
84
+ # => #<Amatch::LongestSubsequence:0x4033e900>
85
+ m.match("pattren")
86
+ # => 6
87
+ "pattern language".longest_subsequence_similar("language of patterns")
88
+ # => 0.4
89
+
90
+ m = LongestSubstring.new("pattern")
91
+ # => #<Amatch::LongestSubstring:0x403378d0>
92
+ m.match("pattren")
93
+ # => 4
94
+ "pattern language".longest_substring_similar("language of patterns")
95
+ # => 0.4
96
+
97
+ m = Jaro.new("pattern")
98
+ # => #<Amatch::Jaro:0x363b70>
99
+ m.match("paTTren")
100
+ # => 0.952380952380952
101
+ m.ignore_case = false
102
+ m.match("paTTren")
103
+ # => 0.742857142857143
104
+ "pattern language".jaro_similar("language of patterns")
105
+ # => 0.672222222222222
106
+
107
+ m = JaroWinkler.new("pattern")
108
+ # #<Amatch::JaroWinkler:0x3530b8>
109
+ m.match("paTTren")
110
+ # => 0.971428571712403
111
+ m.ignore_case = false
112
+ m.match("paTTren")
113
+ # => 0.79428571505206
114
+ m.scaling_factor = 0.05
115
+ m.match("pattren")
116
+ # => 0.961904762046678
117
+ "pattern language".jarowinkler_similar("language of patterns")
118
+ # => 0.672222222222222
119
+
120
+ === Author
15
121
 
16
- Author
17
- ======
122
+ Florian Frank mailto:flori@ping.de
18
123
 
19
- Florian Frank <flori@ping.de>
124
+ === License
20
125
 
21
- License
22
- =======
126
+ This is free software; you can redistribute it and/or modify it under
127
+ the terms of the GNU General Public License Version 2 as published by
128
+ the Free Software Foundation: http://www.gnu.org/copyleft/gpl.html
23
129
 
24
- GNU General Public License, Version 2 (GPLv2)
25
130
 
data/Rakefile CHANGED
@@ -2,10 +2,13 @@
2
2
 
3
3
  begin
4
4
  require 'rake/gempackagetask'
5
+ require 'rake/extensiontask'
5
6
  rescue LoadError
6
7
  end
8
+
7
9
  require 'rbconfig'
8
10
  include Config
11
+
9
12
  require 'rake/clean'
10
13
  CLEAN.include 'coverage', 'doc'
11
14
  require 'rake/testtask'
@@ -13,10 +16,8 @@ require 'rake/testtask'
13
16
  MAKE = ENV['MAKE'] || %w[gmake make].find { |c| system(c, '-v') }
14
17
  PKG_NAME = 'amatch'
15
18
  PKG_VERSION = File.read('VERSION').chomp
16
- PKG_FILES = FileList["**/*"].exclude(/^(pkg|coverage|doc)/)
17
- PKG_DOC_FILES = [ "ext/amatch.c" ].concat(Dir['lib/**/*.rb']) << 'doc-main.txt'
18
-
19
- task :default => :test
19
+ PKG_FILES = FileList["**/*"].exclude(/^(pkg|coverage|doc|tmp)/)
20
+ PKG_DOC_FILES = [ "ext/amatch.c" ].concat(Dir['lib/**/*.rb']) << 'README'
20
21
 
21
22
  desc "Run unit tests"
22
23
  task :test => :compile_ext do
@@ -49,45 +50,66 @@ end
49
50
 
50
51
  desc "Build the documentation"
51
52
  task :doc do
52
- sh "rdoc -m doc-main.txt -t '#{PKG_NAME} - Approximate Matching' #{PKG_DOC_FILES * ' '}"
53
+ sh "rdoc -m README -t '#{PKG_NAME} - Approximate Matching' #{PKG_DOC_FILES * ' '}"
53
54
  end
54
55
 
55
- if defined? Gem
56
- spec = Gem::Specification.new do |s|
57
- s.name = 'amatch'
58
- s.version = PKG_VERSION
59
- s.summary = "Approximate String Matching library"
60
- s.description = <<EOF
56
+ if defined?(Gem) and defined?(Rake::GemPackageTask) and
57
+ defined?(Rake::ExtensionTask)
58
+ then
59
+ spec_src = <<-GEM
60
+ Gem::Specification.new do |s|
61
+ s.name = '#{PKG_NAME}'
62
+ s.version = '#{PKG_VERSION}'
63
+ s.summary = "Approximate String Matching library"
64
+ s.description = <<EOF
61
65
  Amatch is a library for approximate string matching and searching in strings.
62
66
  Several algorithms can be used to do this, and it's also possible to compute a
63
67
  similarity metric number between 0.0 and 1.0 for two given strings.
64
68
  EOF
65
69
 
66
- s.files = PKG_FILES
70
+ s.files = #{PKG_FILES.sort.inspect}
71
+
72
+ s.extensions << "ext/extconf.rb"
67
73
 
68
- s.extensions << "ext/extconf.rb"
74
+ s.require_paths << 'ext' << 'lib'
69
75
 
70
- s.require_path = 'ext'
76
+ s.bindir = "bin"
77
+ s.executables = ["agrep.rb"]
78
+ s.default_executable = "agrep.rb"
71
79
 
72
- s.bindir = "bin"
73
- s.executables = ["agrep.rb"]
74
- s.default_executable = "agrep.rb"
80
+ s.has_rdoc = true
81
+ s.extra_rdoc_files.concat #{PKG_DOC_FILES.sort.inspect}
82
+ s.rdoc_options << '--main' << 'README' <<
83
+ '--title' << "#{PKG_NAME} - Approximate Matching"
84
+ s.test_files.concat Dir['tests/test_*.rb']
75
85
 
76
- s.has_rdoc = true
77
- s.extra_rdoc_files.concat PKG_DOC_FILES
78
- s.rdoc_options << '--main' << 'doc-main.txt' <<
79
- '--title' << "#{PKG_NAME} - Approximate Matching"
80
- s.test_files.concat Dir['tests/test_*.rb']
86
+ s.author = "Florian Frank"
87
+ s.email = "flori@ping.de"
88
+ s.homepage = "http://amatch.rubyforge.org"
89
+ s.rubyforge_project = '#{PKG_NAME}'
90
+ end
91
+ GEM
81
92
 
82
- s.author = "Florian Frank"
83
- s.email = "flori@ping.de"
84
- s.homepage = "http://amatch.rubyforge.org"
85
- s.rubyforge_project = "amatch"
93
+ desc 'Create a gemspec file'
94
+ task :gemspec do
95
+ File.open("#{PKG_NAME}.gemspec", 'w') do |f|
96
+ f.puts spec_src
97
+ end
86
98
  end
87
99
 
100
+ spec = eval(spec_src)
88
101
  Rake::GemPackageTask.new(spec) do |pkg|
89
102
  pkg.need_tar = true
90
- pkg.package_files += PKG_FILES
103
+ pkg.package_files = PKG_FILES
104
+ end
105
+
106
+ Rake::ExtensionTask.new do |ext|
107
+ ext.name = PKG_NAME
108
+ ext.gem_spec = spec
109
+ ext.cross_compile = true
110
+ ext.cross_platform = 'i386-mswin32'
111
+ ext.ext_dir = 'ext'
112
+ ext.lib_dir = 'lib'
91
113
  end
92
114
  end
93
115
 
@@ -109,5 +131,11 @@ EOT
109
131
  end
110
132
 
111
133
 
112
- desc "Prepare a new release"
113
- task :release => [ :clean, :version, :package ]
134
+ desc "Default task"
135
+ task :default => [ :version, :gemspec, :test ]
136
+
137
+ desc "Build all gems and archives for a new release."
138
+ task :release => [ :clean, :version, :gemspec, :cross, :native, :gem ] do
139
+ system "#$0 clean native gem"
140
+ system "#$0 clean package"
141
+ end
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.2.4
1
+ 0.2.5
data/amatch.gemspec ADDED
@@ -0,0 +1,31 @@
1
+ Gem::Specification.new do |s|
2
+ s.name = 'amatch'
3
+ s.version = '0.2.5'
4
+ s.summary = "Approximate String Matching library"
5
+ s.description = <<EOF
6
+ Amatch is a library for approximate string matching and searching in strings.
7
+ Several algorithms can be used to do this, and it's also possible to compute a
8
+ similarity metric number between 0.0 and 1.0 for two given strings.
9
+ EOF
10
+
11
+ s.files = ["CHANGES", "COPYING", "README", "Rakefile", "VERSION", "amatch.gemspec", "bin", "bin/agrep.rb", "ext", "ext/amatch.c", "ext/common.h", "ext/extconf.rb", "ext/pair.c", "ext/pair.h", "install.rb", "lib", "lib/amatch", "lib/amatch/version.rb", "tests", "tests/test_hamming.rb", "tests/test_jaro.rb", "tests/test_jaro_winkler.rb", "tests/test_levenshtein.rb", "tests/test_longest_subsequence.rb", "tests/test_longest_substring.rb", "tests/test_pair_distance.rb", "tests/test_sellers.rb"]
12
+
13
+ s.extensions << "ext/extconf.rb"
14
+
15
+ s.require_paths << 'ext' << 'lib'
16
+
17
+ s.bindir = "bin"
18
+ s.executables = ["agrep.rb"]
19
+ s.default_executable = "agrep.rb"
20
+
21
+ s.has_rdoc = true
22
+ s.extra_rdoc_files.concat ["README", "ext/amatch.c", "lib/amatch/version.rb"]
23
+ s.rdoc_options << '--main' << 'README' <<
24
+ '--title' << "amatch - Approximate Matching"
25
+ s.test_files.concat Dir['tests/test_*.rb']
26
+
27
+ s.author = "Florian Frank"
28
+ s.email = "flori@ping.de"
29
+ s.homepage = "http://amatch.rubyforge.org"
30
+ s.rubyforge_project = 'amatch'
31
+ end
data/lib/amatch.so ADDED
Binary file
@@ -1,6 +1,6 @@
1
1
  module Amatch
2
2
  # Amatch version
3
- VERSION = '0.2.4'
3
+ VERSION = '0.2.5'
4
4
  VERSION_ARRAY = VERSION.split(/\./).map { |x| x.to_i } # :nodoc:
5
5
  VERSION_MAJOR = VERSION_ARRAY[0] # :nodoc:
6
6
  VERSION_MINOR = VERSION_ARRAY[1] # :nodoc:
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: amatch
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.4
4
+ version: 0.2.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - Florian Frank
@@ -9,7 +9,7 @@ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
 
12
- date: 2009-08-26 00:00:00 +02:00
12
+ date: 2009-09-25 00:00:00 +02:00
13
13
  default_executable: agrep.rb
14
14
  dependencies: []
15
15
 
@@ -24,32 +24,33 @@ executables:
24
24
  extensions:
25
25
  - ext/extconf.rb
26
26
  extra_rdoc_files:
27
+ - README
27
28
  - ext/amatch.c
28
29
  - lib/amatch/version.rb
29
- - doc-main.txt
30
30
  files:
31
31
  - CHANGES
32
- - bin/agrep.rb
33
- - VERSION
32
+ - COPYING
34
33
  - README
34
+ - Rakefile
35
+ - VERSION
36
+ - amatch.gemspec
37
+ - bin/agrep.rb
38
+ - ext/amatch.c
35
39
  - ext/common.h
36
40
  - ext/extconf.rb
37
- - ext/amatch.c
38
- - ext/pair.h
39
41
  - ext/pair.c
40
- - Rakefile
42
+ - ext/pair.h
43
+ - install.rb
44
+ - lib/amatch.so
41
45
  - lib/amatch/version.rb
42
- - tests/test_longest_substring.rb
43
46
  - tests/test_hamming.rb
47
+ - tests/test_jaro.rb
48
+ - tests/test_jaro_winkler.rb
49
+ - tests/test_levenshtein.rb
44
50
  - tests/test_longest_subsequence.rb
51
+ - tests/test_longest_substring.rb
45
52
  - tests/test_pair_distance.rb
46
- - tests/test_levenshtein.rb
47
- - tests/test_jaro.rb
48
53
  - tests/test_sellers.rb
49
- - tests/test_jaro_winkler.rb
50
- - COPYING
51
- - install.rb
52
- - doc-main.txt
53
54
  has_rdoc: true
54
55
  homepage: http://amatch.rubyforge.org
55
56
  licenses: []
@@ -57,11 +58,13 @@ licenses: []
57
58
  post_install_message:
58
59
  rdoc_options:
59
60
  - --main
60
- - doc-main.txt
61
+ - README
61
62
  - --title
62
63
  - amatch - Approximate Matching
63
64
  require_paths:
65
+ - lib
64
66
  - ext
67
+ - lib
65
68
  required_ruby_version: !ruby/object:Gem::Requirement
66
69
  requirements:
67
70
  - - ">="
data/doc-main.txt DELETED
@@ -1,115 +0,0 @@
1
- == amatch - Approximate Matching Extension for Ruby
2
-
3
- === Description
4
-
5
- This is a collection of classes that can be used for Approximate
6
- matching, searching, and comparing of Strings. They implement algorithms
7
- that compute the Levenshtein edit distance, Sellers edit distance, the
8
- Hamming distance, the longest common subsequence length, the longest common
9
- substring length, the pair distance metric, the Jaro-Winkler metric.
10
-
11
- === Author
12
-
13
- Florian Frank mailto:flori@ping.de
14
-
15
- === License
16
-
17
- This is free software; you can redistribute it and/or modify it under
18
- the terms of the GNU General Public License Version 2 as published by
19
- the Free Software Foundation: http://www.gnu.org/copyleft/gpl.html
20
-
21
- === Download
22
-
23
- The latest version of <b>amatch</b> can be found at
24
-
25
- * http://rubyforge.org/frs/?group_id=390
26
-
27
- Online Documentation should be located at
28
-
29
- * http://amatch.rubyforge.org
30
-
31
- === Examples
32
- require 'amatch'
33
- # => true
34
- include Amatch
35
- # => Object
36
-
37
- m = Sellers.new("pattern")
38
- # => #<Amatch::Sellers:0x40366324>
39
- m.match("pattren")
40
- # => 2.0
41
- m.substitution = m.insertion = 3
42
- # => 3
43
- m.match("pattren")
44
- # => 4.0
45
- m.reset_weights
46
- # => #<Amatch::Sellers:0x40366324>
47
- m.match(["pattren","parent"])
48
- # => [2.0, 4.0]
49
- m.search("abcpattrendef")
50
- # => 2.0
51
-
52
- m = Levenshtein.new("pattern")
53
- # => #<Amatch::Levenshtein:0x4035919c>
54
- m.match("pattren")
55
- # => 2
56
- m.search("abcpattrendef")
57
- # => 2
58
- "pattern language".levenshtein_similar("language of patterns")
59
- # => 0.2
60
-
61
- m = Hamming.new("pattern")
62
- # => #<Amatch::Hamming:0x40350858>
63
- m.match("pattren")
64
- # => 2
65
- "pattern language".hamming_similar("language of patterns")
66
- # => 0.1
67
-
68
- m = PairDistance.new("pattern")
69
- # => #<Amatch::PairDistance:0x40349be8>
70
- m.match("pattr en")
71
- # => 0.545454545454545
72
- m.match("pattr en", nil)
73
- # => 0.461538461538462
74
- m.match("pattr en", /t+/)
75
- # => 0.285714285714286
76
- "pattern language".pair_distance_similar("language of patterns")
77
- # => 0.928571428571429
78
-
79
- m = LongestSubsequence.new("pattern")
80
- # => #<Amatch::LongestSubsequence:0x4033e900>
81
- m.match("pattren")
82
- # => 6
83
- "pattern language".longest_subsequence_similar("language of patterns")
84
- # => 0.4
85
-
86
- m = LongestSubstring.new("pattern")
87
- # => #<Amatch::LongestSubstring:0x403378d0>
88
- m.match("pattren")
89
- # => 4
90
- "pattern language".longest_substring_similar("language of patterns")
91
- # => 0.4
92
-
93
- m = Jaro.new("pattern")
94
- # => #<Amatch::Jaro:0x363b70>
95
- m.match("paTTren")
96
- # => 0.952380952380952
97
- m.ignore_case = false
98
- m.match("paTTren")
99
- # => 0.742857142857143
100
- "pattern language".jaro_similar("language of patterns")
101
- # => 0.672222222222222
102
-
103
- m = JaroWinkler.new("pattern")
104
- # #<Amatch::JaroWinkler:0x3530b8>
105
- m.match("paTTren")
106
- # => 0.971428571712403
107
- m.ignore_case = false
108
- m.match("paTTren")
109
- # => 0.79428571505206
110
- m.scaling_factor = 0.05
111
- m.match("pattren")
112
- # => 0.961904762046678
113
- "pattern language".jarowinkler_similar("language of patterns")
114
- # => 0.672222222222222
115
-