amatch 0.2.4 → 0.2.5

Sign up to get free protection for your applications and to get access to all the features.
data/CHANGES CHANGED
@@ -1,3 +1,6 @@
1
+ 2009-09-23 (0.2.5)
2
+ * Added lib to gem's require_paths.
3
+ * Using rake-compiler now.
1
4
  2009-08-25 (0.2.4)
2
5
  * Included Jaro and Jaro-Winkler metrics implementation of Kevin Ballard
3
6
  <kevin@rapleaf.com>. Thanks a lot.
data/README CHANGED
@@ -1,25 +1,130 @@
1
- Installation
2
- ============
1
+ == amatch - Approximate Matching Extension for Ruby
2
+
3
+ === Description
4
+
5
+ This is a collection of classes that can be used for Approximate
6
+ matching, searching, and comparing of Strings. They implement algorithms
7
+ that compute the Levenshtein edit distance, Sellers edit distance, the
8
+ Hamming distance, the longest common subsequence length, the longest common
9
+ substring length, the pair distance metric, the Jaro-Winkler metric.
10
+
11
+ === Download
12
+
13
+ The latest version of <b>amatch</b> can be found at
14
+
15
+ * http://rubyforge.org/frs/?group_id=390
16
+
17
+ Online Documentation should be located at
18
+
19
+ * http://amatch.rubyforge.org
20
+
21
+ === Installation
3
22
 
4
23
  Just type into the command line as root:
5
24
 
6
- # ruby install.rb
25
+ # ruby install.rb
7
26
 
8
27
  If you have installed rake (rake.rubyforge.org), you can also type:
9
28
 
10
- # rake install
29
+ # rake install
11
30
 
12
31
  To install this extension as a gem type
13
32
 
14
- # gem install amatch
33
+ # gem install amatch
34
+
35
+ === Examples
36
+ require 'amatch'
37
+ # => true
38
+ include Amatch
39
+ # => Object
40
+
41
+ m = Sellers.new("pattern")
42
+ # => #<Amatch::Sellers:0x40366324>
43
+ m.match("pattren")
44
+ # => 2.0
45
+ m.substitution = m.insertion = 3
46
+ # => 3
47
+ m.match("pattren")
48
+ # => 4.0
49
+ m.reset_weights
50
+ # => #<Amatch::Sellers:0x40366324>
51
+ m.match(["pattren","parent"])
52
+ # => [2.0, 4.0]
53
+ m.search("abcpattrendef")
54
+ # => 2.0
55
+
56
+ m = Levenshtein.new("pattern")
57
+ # => #<Amatch::Levenshtein:0x4035919c>
58
+ m.match("pattren")
59
+ # => 2
60
+ m.search("abcpattrendef")
61
+ # => 2
62
+ "pattern language".levenshtein_similar("language of patterns")
63
+ # => 0.2
64
+
65
+ m = Hamming.new("pattern")
66
+ # => #<Amatch::Hamming:0x40350858>
67
+ m.match("pattren")
68
+ # => 2
69
+ "pattern language".hamming_similar("language of patterns")
70
+ # => 0.1
71
+
72
+ m = PairDistance.new("pattern")
73
+ # => #<Amatch::PairDistance:0x40349be8>
74
+ m.match("pattr en")
75
+ # => 0.545454545454545
76
+ m.match("pattr en", nil)
77
+ # => 0.461538461538462
78
+ m.match("pattr en", /t+/)
79
+ # => 0.285714285714286
80
+ "pattern language".pair_distance_similar("language of patterns")
81
+ # => 0.928571428571429
82
+
83
+ m = LongestSubsequence.new("pattern")
84
+ # => #<Amatch::LongestSubsequence:0x4033e900>
85
+ m.match("pattren")
86
+ # => 6
87
+ "pattern language".longest_subsequence_similar("language of patterns")
88
+ # => 0.4
89
+
90
+ m = LongestSubstring.new("pattern")
91
+ # => #<Amatch::LongestSubstring:0x403378d0>
92
+ m.match("pattren")
93
+ # => 4
94
+ "pattern language".longest_substring_similar("language of patterns")
95
+ # => 0.4
96
+
97
+ m = Jaro.new("pattern")
98
+ # => #<Amatch::Jaro:0x363b70>
99
+ m.match("paTTren")
100
+ # => 0.952380952380952
101
+ m.ignore_case = false
102
+ m.match("paTTren")
103
+ # => 0.742857142857143
104
+ "pattern language".jaro_similar("language of patterns")
105
+ # => 0.672222222222222
106
+
107
+ m = JaroWinkler.new("pattern")
108
+ # #<Amatch::JaroWinkler:0x3530b8>
109
+ m.match("paTTren")
110
+ # => 0.971428571712403
111
+ m.ignore_case = false
112
+ m.match("paTTren")
113
+ # => 0.79428571505206
114
+ m.scaling_factor = 0.05
115
+ m.match("pattren")
116
+ # => 0.961904762046678
117
+ "pattern language".jarowinkler_similar("language of patterns")
118
+ # => 0.672222222222222
119
+
120
+ === Author
15
121
 
16
- Author
17
- ======
122
+ Florian Frank mailto:flori@ping.de
18
123
 
19
- Florian Frank <flori@ping.de>
124
+ === License
20
125
 
21
- License
22
- =======
126
+ This is free software; you can redistribute it and/or modify it under
127
+ the terms of the GNU General Public License Version 2 as published by
128
+ the Free Software Foundation: http://www.gnu.org/copyleft/gpl.html
23
129
 
24
- GNU General Public License, Version 2 (GPLv2)
25
130
 
data/Rakefile CHANGED
@@ -2,10 +2,13 @@
2
2
 
3
3
  begin
4
4
  require 'rake/gempackagetask'
5
+ require 'rake/extensiontask'
5
6
  rescue LoadError
6
7
  end
8
+
7
9
  require 'rbconfig'
8
10
  include Config
11
+
9
12
  require 'rake/clean'
10
13
  CLEAN.include 'coverage', 'doc'
11
14
  require 'rake/testtask'
@@ -13,10 +16,8 @@ require 'rake/testtask'
13
16
  MAKE = ENV['MAKE'] || %w[gmake make].find { |c| system(c, '-v') }
14
17
  PKG_NAME = 'amatch'
15
18
  PKG_VERSION = File.read('VERSION').chomp
16
- PKG_FILES = FileList["**/*"].exclude(/^(pkg|coverage|doc)/)
17
- PKG_DOC_FILES = [ "ext/amatch.c" ].concat(Dir['lib/**/*.rb']) << 'doc-main.txt'
18
-
19
- task :default => :test
19
+ PKG_FILES = FileList["**/*"].exclude(/^(pkg|coverage|doc|tmp)/)
20
+ PKG_DOC_FILES = [ "ext/amatch.c" ].concat(Dir['lib/**/*.rb']) << 'README'
20
21
 
21
22
  desc "Run unit tests"
22
23
  task :test => :compile_ext do
@@ -49,45 +50,66 @@ end
49
50
 
50
51
  desc "Build the documentation"
51
52
  task :doc do
52
- sh "rdoc -m doc-main.txt -t '#{PKG_NAME} - Approximate Matching' #{PKG_DOC_FILES * ' '}"
53
+ sh "rdoc -m README -t '#{PKG_NAME} - Approximate Matching' #{PKG_DOC_FILES * ' '}"
53
54
  end
54
55
 
55
- if defined? Gem
56
- spec = Gem::Specification.new do |s|
57
- s.name = 'amatch'
58
- s.version = PKG_VERSION
59
- s.summary = "Approximate String Matching library"
60
- s.description = <<EOF
56
+ if defined?(Gem) and defined?(Rake::GemPackageTask) and
57
+ defined?(Rake::ExtensionTask)
58
+ then
59
+ spec_src = <<-GEM
60
+ Gem::Specification.new do |s|
61
+ s.name = '#{PKG_NAME}'
62
+ s.version = '#{PKG_VERSION}'
63
+ s.summary = "Approximate String Matching library"
64
+ s.description = <<EOF
61
65
  Amatch is a library for approximate string matching and searching in strings.
62
66
  Several algorithms can be used to do this, and it's also possible to compute a
63
67
  similarity metric number between 0.0 and 1.0 for two given strings.
64
68
  EOF
65
69
 
66
- s.files = PKG_FILES
70
+ s.files = #{PKG_FILES.sort.inspect}
71
+
72
+ s.extensions << "ext/extconf.rb"
67
73
 
68
- s.extensions << "ext/extconf.rb"
74
+ s.require_paths << 'ext' << 'lib'
69
75
 
70
- s.require_path = 'ext'
76
+ s.bindir = "bin"
77
+ s.executables = ["agrep.rb"]
78
+ s.default_executable = "agrep.rb"
71
79
 
72
- s.bindir = "bin"
73
- s.executables = ["agrep.rb"]
74
- s.default_executable = "agrep.rb"
80
+ s.has_rdoc = true
81
+ s.extra_rdoc_files.concat #{PKG_DOC_FILES.sort.inspect}
82
+ s.rdoc_options << '--main' << 'README' <<
83
+ '--title' << "#{PKG_NAME} - Approximate Matching"
84
+ s.test_files.concat Dir['tests/test_*.rb']
75
85
 
76
- s.has_rdoc = true
77
- s.extra_rdoc_files.concat PKG_DOC_FILES
78
- s.rdoc_options << '--main' << 'doc-main.txt' <<
79
- '--title' << "#{PKG_NAME} - Approximate Matching"
80
- s.test_files.concat Dir['tests/test_*.rb']
86
+ s.author = "Florian Frank"
87
+ s.email = "flori@ping.de"
88
+ s.homepage = "http://amatch.rubyforge.org"
89
+ s.rubyforge_project = '#{PKG_NAME}'
90
+ end
91
+ GEM
81
92
 
82
- s.author = "Florian Frank"
83
- s.email = "flori@ping.de"
84
- s.homepage = "http://amatch.rubyforge.org"
85
- s.rubyforge_project = "amatch"
93
+ desc 'Create a gemspec file'
94
+ task :gemspec do
95
+ File.open("#{PKG_NAME}.gemspec", 'w') do |f|
96
+ f.puts spec_src
97
+ end
86
98
  end
87
99
 
100
+ spec = eval(spec_src)
88
101
  Rake::GemPackageTask.new(spec) do |pkg|
89
102
  pkg.need_tar = true
90
- pkg.package_files += PKG_FILES
103
+ pkg.package_files = PKG_FILES
104
+ end
105
+
106
+ Rake::ExtensionTask.new do |ext|
107
+ ext.name = PKG_NAME
108
+ ext.gem_spec = spec
109
+ ext.cross_compile = true
110
+ ext.cross_platform = 'i386-mswin32'
111
+ ext.ext_dir = 'ext'
112
+ ext.lib_dir = 'lib'
91
113
  end
92
114
  end
93
115
 
@@ -109,5 +131,11 @@ EOT
109
131
  end
110
132
 
111
133
 
112
- desc "Prepare a new release"
113
- task :release => [ :clean, :version, :package ]
134
+ desc "Default task"
135
+ task :default => [ :version, :gemspec, :test ]
136
+
137
+ desc "Build all gems and archives for a new release."
138
+ task :release => [ :clean, :version, :gemspec, :cross, :native, :gem ] do
139
+ system "#$0 clean native gem"
140
+ system "#$0 clean package"
141
+ end
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.2.4
1
+ 0.2.5
data/amatch.gemspec ADDED
@@ -0,0 +1,31 @@
1
+ Gem::Specification.new do |s|
2
+ s.name = 'amatch'
3
+ s.version = '0.2.5'
4
+ s.summary = "Approximate String Matching library"
5
+ s.description = <<EOF
6
+ Amatch is a library for approximate string matching and searching in strings.
7
+ Several algorithms can be used to do this, and it's also possible to compute a
8
+ similarity metric number between 0.0 and 1.0 for two given strings.
9
+ EOF
10
+
11
+ s.files = ["CHANGES", "COPYING", "README", "Rakefile", "VERSION", "amatch.gemspec", "bin", "bin/agrep.rb", "ext", "ext/amatch.c", "ext/common.h", "ext/extconf.rb", "ext/pair.c", "ext/pair.h", "install.rb", "lib", "lib/amatch", "lib/amatch/version.rb", "tests", "tests/test_hamming.rb", "tests/test_jaro.rb", "tests/test_jaro_winkler.rb", "tests/test_levenshtein.rb", "tests/test_longest_subsequence.rb", "tests/test_longest_substring.rb", "tests/test_pair_distance.rb", "tests/test_sellers.rb"]
12
+
13
+ s.extensions << "ext/extconf.rb"
14
+
15
+ s.require_paths << 'ext' << 'lib'
16
+
17
+ s.bindir = "bin"
18
+ s.executables = ["agrep.rb"]
19
+ s.default_executable = "agrep.rb"
20
+
21
+ s.has_rdoc = true
22
+ s.extra_rdoc_files.concat ["README", "ext/amatch.c", "lib/amatch/version.rb"]
23
+ s.rdoc_options << '--main' << 'README' <<
24
+ '--title' << "amatch - Approximate Matching"
25
+ s.test_files.concat Dir['tests/test_*.rb']
26
+
27
+ s.author = "Florian Frank"
28
+ s.email = "flori@ping.de"
29
+ s.homepage = "http://amatch.rubyforge.org"
30
+ s.rubyforge_project = 'amatch'
31
+ end
data/lib/amatch.so ADDED
Binary file
@@ -1,6 +1,6 @@
1
1
  module Amatch
2
2
  # Amatch version
3
- VERSION = '0.2.4'
3
+ VERSION = '0.2.5'
4
4
  VERSION_ARRAY = VERSION.split(/\./).map { |x| x.to_i } # :nodoc:
5
5
  VERSION_MAJOR = VERSION_ARRAY[0] # :nodoc:
6
6
  VERSION_MINOR = VERSION_ARRAY[1] # :nodoc:
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: amatch
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.4
4
+ version: 0.2.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - Florian Frank
@@ -9,7 +9,7 @@ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
 
12
- date: 2009-08-26 00:00:00 +02:00
12
+ date: 2009-09-25 00:00:00 +02:00
13
13
  default_executable: agrep.rb
14
14
  dependencies: []
15
15
 
@@ -24,32 +24,33 @@ executables:
24
24
  extensions:
25
25
  - ext/extconf.rb
26
26
  extra_rdoc_files:
27
+ - README
27
28
  - ext/amatch.c
28
29
  - lib/amatch/version.rb
29
- - doc-main.txt
30
30
  files:
31
31
  - CHANGES
32
- - bin/agrep.rb
33
- - VERSION
32
+ - COPYING
34
33
  - README
34
+ - Rakefile
35
+ - VERSION
36
+ - amatch.gemspec
37
+ - bin/agrep.rb
38
+ - ext/amatch.c
35
39
  - ext/common.h
36
40
  - ext/extconf.rb
37
- - ext/amatch.c
38
- - ext/pair.h
39
41
  - ext/pair.c
40
- - Rakefile
42
+ - ext/pair.h
43
+ - install.rb
44
+ - lib/amatch.so
41
45
  - lib/amatch/version.rb
42
- - tests/test_longest_substring.rb
43
46
  - tests/test_hamming.rb
47
+ - tests/test_jaro.rb
48
+ - tests/test_jaro_winkler.rb
49
+ - tests/test_levenshtein.rb
44
50
  - tests/test_longest_subsequence.rb
51
+ - tests/test_longest_substring.rb
45
52
  - tests/test_pair_distance.rb
46
- - tests/test_levenshtein.rb
47
- - tests/test_jaro.rb
48
53
  - tests/test_sellers.rb
49
- - tests/test_jaro_winkler.rb
50
- - COPYING
51
- - install.rb
52
- - doc-main.txt
53
54
  has_rdoc: true
54
55
  homepage: http://amatch.rubyforge.org
55
56
  licenses: []
@@ -57,11 +58,13 @@ licenses: []
57
58
  post_install_message:
58
59
  rdoc_options:
59
60
  - --main
60
- - doc-main.txt
61
+ - README
61
62
  - --title
62
63
  - amatch - Approximate Matching
63
64
  require_paths:
65
+ - lib
64
66
  - ext
67
+ - lib
65
68
  required_ruby_version: !ruby/object:Gem::Requirement
66
69
  requirements:
67
70
  - - ">="
data/doc-main.txt DELETED
@@ -1,115 +0,0 @@
1
- == amatch - Approximate Matching Extension for Ruby
2
-
3
- === Description
4
-
5
- This is a collection of classes that can be used for Approximate
6
- matching, searching, and comparing of Strings. They implement algorithms
7
- that compute the Levenshtein edit distance, Sellers edit distance, the
8
- Hamming distance, the longest common subsequence length, the longest common
9
- substring length, the pair distance metric, the Jaro-Winkler metric.
10
-
11
- === Author
12
-
13
- Florian Frank mailto:flori@ping.de
14
-
15
- === License
16
-
17
- This is free software; you can redistribute it and/or modify it under
18
- the terms of the GNU General Public License Version 2 as published by
19
- the Free Software Foundation: http://www.gnu.org/copyleft/gpl.html
20
-
21
- === Download
22
-
23
- The latest version of <b>amatch</b> can be found at
24
-
25
- * http://rubyforge.org/frs/?group_id=390
26
-
27
- Online Documentation should be located at
28
-
29
- * http://amatch.rubyforge.org
30
-
31
- === Examples
32
- require 'amatch'
33
- # => true
34
- include Amatch
35
- # => Object
36
-
37
- m = Sellers.new("pattern")
38
- # => #<Amatch::Sellers:0x40366324>
39
- m.match("pattren")
40
- # => 2.0
41
- m.substitution = m.insertion = 3
42
- # => 3
43
- m.match("pattren")
44
- # => 4.0
45
- m.reset_weights
46
- # => #<Amatch::Sellers:0x40366324>
47
- m.match(["pattren","parent"])
48
- # => [2.0, 4.0]
49
- m.search("abcpattrendef")
50
- # => 2.0
51
-
52
- m = Levenshtein.new("pattern")
53
- # => #<Amatch::Levenshtein:0x4035919c>
54
- m.match("pattren")
55
- # => 2
56
- m.search("abcpattrendef")
57
- # => 2
58
- "pattern language".levenshtein_similar("language of patterns")
59
- # => 0.2
60
-
61
- m = Hamming.new("pattern")
62
- # => #<Amatch::Hamming:0x40350858>
63
- m.match("pattren")
64
- # => 2
65
- "pattern language".hamming_similar("language of patterns")
66
- # => 0.1
67
-
68
- m = PairDistance.new("pattern")
69
- # => #<Amatch::PairDistance:0x40349be8>
70
- m.match("pattr en")
71
- # => 0.545454545454545
72
- m.match("pattr en", nil)
73
- # => 0.461538461538462
74
- m.match("pattr en", /t+/)
75
- # => 0.285714285714286
76
- "pattern language".pair_distance_similar("language of patterns")
77
- # => 0.928571428571429
78
-
79
- m = LongestSubsequence.new("pattern")
80
- # => #<Amatch::LongestSubsequence:0x4033e900>
81
- m.match("pattren")
82
- # => 6
83
- "pattern language".longest_subsequence_similar("language of patterns")
84
- # => 0.4
85
-
86
- m = LongestSubstring.new("pattern")
87
- # => #<Amatch::LongestSubstring:0x403378d0>
88
- m.match("pattren")
89
- # => 4
90
- "pattern language".longest_substring_similar("language of patterns")
91
- # => 0.4
92
-
93
- m = Jaro.new("pattern")
94
- # => #<Amatch::Jaro:0x363b70>
95
- m.match("paTTren")
96
- # => 0.952380952380952
97
- m.ignore_case = false
98
- m.match("paTTren")
99
- # => 0.742857142857143
100
- "pattern language".jaro_similar("language of patterns")
101
- # => 0.672222222222222
102
-
103
- m = JaroWinkler.new("pattern")
104
- # #<Amatch::JaroWinkler:0x3530b8>
105
- m.match("paTTren")
106
- # => 0.971428571712403
107
- m.ignore_case = false
108
- m.match("paTTren")
109
- # => 0.79428571505206
110
- m.scaling_factor = 0.05
111
- m.match("pattren")
112
- # => 0.961904762046678
113
- "pattern language".jarowinkler_similar("language of patterns")
114
- # => 0.672222222222222
115
-