fuzzy_match 1.5.0 → 2.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (55) hide show
  1. checksums.yaml +8 -8
  2. data/.rspec +2 -0
  3. data/CHANGELOG +14 -0
  4. data/Gemfile +8 -0
  5. data/README.markdown +58 -38
  6. data/Rakefile +0 -9
  7. data/bin/fuzzy_match +106 -0
  8. data/fuzzy_match.gemspec +4 -4
  9. data/groupings-screenshot.png +0 -0
  10. data/highlevel.graffle +0 -0
  11. data/highlevel.png +0 -0
  12. data/lib/fuzzy_match/record.rb +58 -0
  13. data/lib/fuzzy_match/result.rb +11 -8
  14. data/lib/fuzzy_match/rule/grouping.rb +70 -12
  15. data/lib/fuzzy_match/rule/identity.rb +3 -3
  16. data/lib/fuzzy_match/rule.rb +1 -1
  17. data/lib/fuzzy_match/score/amatch.rb +0 -4
  18. data/lib/fuzzy_match/score/pure_ruby.rb +2 -8
  19. data/lib/fuzzy_match/score.rb +4 -0
  20. data/lib/fuzzy_match/similarity.rb +10 -32
  21. data/lib/fuzzy_match/version.rb +1 -1
  22. data/lib/fuzzy_match.rb +78 -94
  23. data/{test/test_amatch.rb → spec/amatch_spec.rb} +1 -2
  24. data/{test/test_cache.rb → spec/cache_spec.rb} +7 -7
  25. data/spec/foo.rb +9 -0
  26. data/spec/fuzzy_match_spec.rb +354 -0
  27. data/spec/grouping_spec.rb +60 -0
  28. data/spec/identity_spec.rb +29 -0
  29. data/{test/test_wrapper.rb → spec/record_spec.rb} +3 -7
  30. data/spec/spec_helper.rb +21 -0
  31. metadata +56 -50
  32. data/bin/fuzzy_match_checker +0 -71
  33. data/examples/bts_aircraft/5-2-A.htm +0 -10305
  34. data/examples/bts_aircraft/5-2-B.htm +0 -9576
  35. data/examples/bts_aircraft/5-2-D.htm +0 -7094
  36. data/examples/bts_aircraft/5-2-E.htm +0 -2349
  37. data/examples/bts_aircraft/5-2-G.htm +0 -2922
  38. data/examples/bts_aircraft/groupings.csv +0 -1
  39. data/examples/bts_aircraft/identities.csv +0 -1
  40. data/examples/bts_aircraft/negatives.csv +0 -1
  41. data/examples/bts_aircraft/normalizers.csv +0 -1
  42. data/examples/bts_aircraft/number_260.csv +0 -334
  43. data/examples/bts_aircraft/positives.csv +0 -1
  44. data/examples/bts_aircraft/test_bts_aircraft.rb +0 -116
  45. data/examples/first_name_matching.rb +0 -15
  46. data/examples/icao-bts.xls +0 -0
  47. data/lib/fuzzy_match/rule/normalizer.rb +0 -20
  48. data/lib/fuzzy_match/rule/stop_word.rb +0 -11
  49. data/lib/fuzzy_match/wrapper.rb +0 -73
  50. data/test/helper.rb +0 -12
  51. data/test/test_fuzzy_match.rb +0 -304
  52. data/test/test_fuzzy_match_convoluted.rb.disabled +0 -268
  53. data/test/test_grouping.rb +0 -28
  54. data/test/test_identity.rb +0 -34
  55. data/test/test_normalizer.rb +0 -10
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fuzzy_match
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.5.0
4
+ version: 2.0.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Seamus Abshere
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2013-04-03 00:00:00.000000000 Z
11
+ date: 2013-05-22 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: active_record_inline_schema
@@ -25,7 +25,7 @@ dependencies:
25
25
  - !ruby/object:Gem::Version
26
26
  version: 0.4.0
27
27
  - !ruby/object:Gem::Dependency
28
- name: minitest
28
+ name: pry
29
29
  requirement: !ruby/object:Gem::Requirement
30
30
  requirements:
31
31
  - - ! '>='
@@ -39,21 +39,21 @@ dependencies:
39
39
  - !ruby/object:Gem::Version
40
40
  version: '0'
41
41
  - !ruby/object:Gem::Dependency
42
- name: activerecord
42
+ name: rspec-core
43
43
  requirement: !ruby/object:Gem::Requirement
44
44
  requirements:
45
45
  - - ! '>='
46
46
  - !ruby/object:Gem::Version
47
- version: '3'
47
+ version: '0'
48
48
  type: :development
49
49
  prerelease: false
50
50
  version_requirements: !ruby/object:Gem::Requirement
51
51
  requirements:
52
52
  - - ! '>='
53
53
  - !ruby/object:Gem::Version
54
- version: '3'
54
+ version: '0'
55
55
  - !ruby/object:Gem::Dependency
56
- name: mysql2
56
+ name: rspec-expectations
57
57
  requirement: !ruby/object:Gem::Requirement
58
58
  requirements:
59
59
  - - ! '>='
@@ -67,7 +67,7 @@ dependencies:
67
67
  - !ruby/object:Gem::Version
68
68
  version: '0'
69
69
  - !ruby/object:Gem::Dependency
70
- name: cohort_analysis
70
+ name: rspec-mocks
71
71
  requirement: !ruby/object:Gem::Requirement
72
72
  requirements:
73
73
  - - ! '>='
@@ -81,7 +81,21 @@ dependencies:
81
81
  - !ruby/object:Gem::Version
82
82
  version: '0'
83
83
  - !ruby/object:Gem::Dependency
84
- name: weighted_average
84
+ name: activerecord
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - ! '>='
88
+ - !ruby/object:Gem::Version
89
+ version: '3'
90
+ type: :development
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - ! '>='
95
+ - !ruby/object:Gem::Version
96
+ version: '3'
97
+ - !ruby/object:Gem::Dependency
98
+ name: mysql2
85
99
  requirement: !ruby/object:Gem::Requirement
86
100
  requirements:
87
101
  - - ! '>='
@@ -95,7 +109,7 @@ dependencies:
95
109
  - !ruby/object:Gem::Version
96
110
  version: '0'
97
111
  - !ruby/object:Gem::Dependency
98
- name: yard
112
+ name: cohort_analysis
99
113
  requirement: !ruby/object:Gem::Requirement
100
114
  requirements:
101
115
  - - ! '>='
@@ -109,7 +123,7 @@ dependencies:
109
123
  - !ruby/object:Gem::Version
110
124
  version: '0'
111
125
  - !ruby/object:Gem::Dependency
112
- name: amatch
126
+ name: weighted_average
113
127
  requirement: !ruby/object:Gem::Requirement
114
128
  requirements:
115
129
  - - ! '>='
@@ -123,7 +137,21 @@ dependencies:
123
137
  - !ruby/object:Gem::Version
124
138
  version: '0'
125
139
  - !ruby/object:Gem::Dependency
126
- name: minitest-reporters
140
+ name: yard
141
+ requirement: !ruby/object:Gem::Requirement
142
+ requirements:
143
+ - - ! '>='
144
+ - !ruby/object:Gem::Version
145
+ version: '0'
146
+ type: :development
147
+ prerelease: false
148
+ version_requirements: !ruby/object:Gem::Requirement
149
+ requirements:
150
+ - - ! '>='
151
+ - !ruby/object:Gem::Version
152
+ version: '0'
153
+ - !ruby/object:Gem::Dependency
154
+ name: amatch
127
155
  requirement: !ruby/object:Gem::Requirement
128
156
  requirements:
129
157
  - - ! '>='
@@ -141,11 +169,12 @@ description: Find a needle in a haystack using string similarity and (optionally
141
169
  email:
142
170
  - seamus@abshere.net
143
171
  executables:
144
- - fuzzy_match_checker
172
+ - fuzzy_match
145
173
  extensions: []
146
174
  extra_rdoc_files: []
147
175
  files:
148
176
  - .gitignore
177
+ - .rspec
149
178
  - CHANGELOG
150
179
  - Gemfile
151
180
  - LICENSE
@@ -156,45 +185,31 @@ files:
156
185
  - benchmark/before-without-last-result.txt
157
186
  - benchmark/before.txt
158
187
  - benchmark/memory.rb
159
- - bin/fuzzy_match_checker
160
- - examples/bts_aircraft/5-2-A.htm
161
- - examples/bts_aircraft/5-2-B.htm
162
- - examples/bts_aircraft/5-2-D.htm
163
- - examples/bts_aircraft/5-2-E.htm
164
- - examples/bts_aircraft/5-2-G.htm
165
- - examples/bts_aircraft/groupings.csv
166
- - examples/bts_aircraft/identities.csv
167
- - examples/bts_aircraft/negatives.csv
168
- - examples/bts_aircraft/normalizers.csv
169
- - examples/bts_aircraft/number_260.csv
170
- - examples/bts_aircraft/positives.csv
171
- - examples/bts_aircraft/test_bts_aircraft.rb
172
- - examples/first_name_matching.rb
173
- - examples/icao-bts.xls
188
+ - bin/fuzzy_match
174
189
  - fuzzy_match.gemspec
190
+ - groupings-screenshot.png
191
+ - highlevel.graffle
192
+ - highlevel.png
175
193
  - lib/fuzzy_match.rb
176
194
  - lib/fuzzy_match/cached_result.rb
195
+ - lib/fuzzy_match/record.rb
177
196
  - lib/fuzzy_match/result.rb
178
197
  - lib/fuzzy_match/rule.rb
179
198
  - lib/fuzzy_match/rule/grouping.rb
180
199
  - lib/fuzzy_match/rule/identity.rb
181
- - lib/fuzzy_match/rule/normalizer.rb
182
- - lib/fuzzy_match/rule/stop_word.rb
183
200
  - lib/fuzzy_match/score.rb
184
201
  - lib/fuzzy_match/score/amatch.rb
185
202
  - lib/fuzzy_match/score/pure_ruby.rb
186
203
  - lib/fuzzy_match/similarity.rb
187
204
  - lib/fuzzy_match/version.rb
188
- - lib/fuzzy_match/wrapper.rb
189
- - test/helper.rb
190
- - test/test_amatch.rb
191
- - test/test_cache.rb
192
- - test/test_fuzzy_match.rb
193
- - test/test_fuzzy_match_convoluted.rb.disabled
194
- - test/test_grouping.rb
195
- - test/test_identity.rb
196
- - test/test_normalizer.rb
197
- - test/test_wrapper.rb
205
+ - spec/amatch_spec.rb
206
+ - spec/cache_spec.rb
207
+ - spec/foo.rb
208
+ - spec/fuzzy_match_spec.rb
209
+ - spec/grouping_spec.rb
210
+ - spec/identity_spec.rb
211
+ - spec/record_spec.rb
212
+ - spec/spec_helper.rb
198
213
  homepage: https://github.com/seamusabshere/fuzzy_match
199
214
  licenses: []
200
215
  metadata: {}
@@ -219,14 +234,5 @@ signing_key:
219
234
  specification_version: 4
220
235
  summary: Find a needle in a haystack using string similarity and (optionally) regexp
221
236
  rules. Replaces loose_tight_dictionary.
222
- test_files:
223
- - test/helper.rb
224
- - test/test_amatch.rb
225
- - test/test_cache.rb
226
- - test/test_fuzzy_match.rb
227
- - test/test_fuzzy_match_convoluted.rb.disabled
228
- - test/test_grouping.rb
229
- - test/test_identity.rb
230
- - test/test_normalizer.rb
231
- - test/test_wrapper.rb
237
+ test_files: []
232
238
  has_rdoc:
@@ -1,71 +0,0 @@
1
- #!/usr/bin/env ruby
2
-
3
- if File.exist?(File.join(Dir.pwd, 'lib', 'fuzzy_match.rb'))
4
- $LOAD_PATH.unshift File.join(Dir.pwd, 'lib')
5
- require File.join(Dir.pwd, 'lib', 'fuzzy_match')
6
- else
7
- require 'fuzzy_match'
8
- end
9
- require 'fuzzy_match/version'
10
-
11
- # note: not included in gemfile but neither is bundler used here
12
- require 'active_support/core_ext'
13
- require 'remote_table'
14
- require 'thor'
15
-
16
- class FuzzyMatch
17
- class Checker < ::Thor
18
- # for example: https://docs.google.com/spreadsheet/pub?key=0AkCJNpm9Ks6JdHZURUI2S2xOa3ZFVzlZb205VVhpQnc&single=true&gid=0&output=csv
19
- desc :check, "Check a spreadsheet containing columns with these headers: haystack, needles, correct_matches, groupings, stop_words, identities, normalizers, find_options (listing an option like must_match_grouping makes it true)"
20
- method_option :show_success, :default => false, :type => :boolean, :desc => "Whether to print successful matches as you go"
21
- method_option :downcase, :default => false, :type => :boolean, :desc => "Whether to downcase everything (except regexes, where you have to do /foo/i)"
22
- def check(url)
23
- puts "Checking matches using fuzzy_match version #{FuzzyMatch::VERSION}..."
24
-
25
- t = RemoteTable.new(url, :headers => :first_row)
26
- if (violators = %w{needle grouping correct_match stop_word identity normalizer find_option} & t.rows.first.keys).any?
27
- raise ArgumentError, "Make sure you pluralize your right row headers (violators: #{violators.map(&:inspect).join(', ')}"
28
- end
29
- haystack = t.rows.map { |row| row['haystack'] }.select(&:present?)
30
- haystack.map!(&:downcase) if options.downcase
31
- find_options = t.rows.map { |row| row['find_options'] }
32
- fm = FuzzyMatch.new(
33
- haystack,
34
- :groupings => t.rows.map { |row| row['groupings'] }.select(&:present?),
35
- :identities => t.rows.map { |row| row['identities'] }.select(&:present?),
36
- :stop_words => t.rows.map { |row| row['stop_words'] }.select(&:present?),
37
- :normalizers => t.rows.map { |row| row['normalizers'] }.select(&:present?),
38
- :must_match_grouping => find_options.include?('must_match_grouping'),
39
- :must_match_at_least_one_word => find_options.include?('must_match_at_least_one_word'),
40
- :first_grouping_decides => find_options.include?('first_grouping_decides')
41
- )
42
-
43
- count = 0
44
- t.each do |row|
45
- needle = row['needles']
46
- correct_match = row['correct_matches']
47
- next unless needle.present?
48
- if options.downcase
49
- needle.to_s.downcase!
50
- correct_match.to_s.downcase!
51
- end
52
- correct_match = nil if correct_match.blank?
53
- match = fm.find needle
54
- if options.show_success? or match != correct_match
55
- puts " #{needle.inspect} => #{match.inspect}"
56
- end
57
- unless match == correct_match
58
- puts "MISMATCH: #{needle.inspect} should match #{correct_match.inspect}"
59
- puts fm.explain needle
60
- exit 1
61
- end
62
- count += 1
63
- end
64
-
65
- puts "Correctly matched #{count} needles."
66
- exit 0
67
- end
68
- end
69
- end
70
-
71
- FuzzyMatch::Checker.start