fuzzy_match 1.5.0 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. checksums.yaml +8 -8
  2. data/.rspec +2 -0
  3. data/CHANGELOG +14 -0
  4. data/Gemfile +8 -0
  5. data/README.markdown +58 -38
  6. data/Rakefile +0 -9
  7. data/bin/fuzzy_match +106 -0
  8. data/fuzzy_match.gemspec +4 -4
  9. data/groupings-screenshot.png +0 -0
  10. data/highlevel.graffle +0 -0
  11. data/highlevel.png +0 -0
  12. data/lib/fuzzy_match/record.rb +58 -0
  13. data/lib/fuzzy_match/result.rb +11 -8
  14. data/lib/fuzzy_match/rule/grouping.rb +70 -12
  15. data/lib/fuzzy_match/rule/identity.rb +3 -3
  16. data/lib/fuzzy_match/rule.rb +1 -1
  17. data/lib/fuzzy_match/score/amatch.rb +0 -4
  18. data/lib/fuzzy_match/score/pure_ruby.rb +2 -8
  19. data/lib/fuzzy_match/score.rb +4 -0
  20. data/lib/fuzzy_match/similarity.rb +10 -32
  21. data/lib/fuzzy_match/version.rb +1 -1
  22. data/lib/fuzzy_match.rb +78 -94
  23. data/{test/test_amatch.rb → spec/amatch_spec.rb} +1 -2
  24. data/{test/test_cache.rb → spec/cache_spec.rb} +7 -7
  25. data/spec/foo.rb +9 -0
  26. data/spec/fuzzy_match_spec.rb +354 -0
  27. data/spec/grouping_spec.rb +60 -0
  28. data/spec/identity_spec.rb +29 -0
  29. data/{test/test_wrapper.rb → spec/record_spec.rb} +3 -7
  30. data/spec/spec_helper.rb +21 -0
  31. metadata +56 -50
  32. data/bin/fuzzy_match_checker +0 -71
  33. data/examples/bts_aircraft/5-2-A.htm +0 -10305
  34. data/examples/bts_aircraft/5-2-B.htm +0 -9576
  35. data/examples/bts_aircraft/5-2-D.htm +0 -7094
  36. data/examples/bts_aircraft/5-2-E.htm +0 -2349
  37. data/examples/bts_aircraft/5-2-G.htm +0 -2922
  38. data/examples/bts_aircraft/groupings.csv +0 -1
  39. data/examples/bts_aircraft/identities.csv +0 -1
  40. data/examples/bts_aircraft/negatives.csv +0 -1
  41. data/examples/bts_aircraft/normalizers.csv +0 -1
  42. data/examples/bts_aircraft/number_260.csv +0 -334
  43. data/examples/bts_aircraft/positives.csv +0 -1
  44. data/examples/bts_aircraft/test_bts_aircraft.rb +0 -116
  45. data/examples/first_name_matching.rb +0 -15
  46. data/examples/icao-bts.xls +0 -0
  47. data/lib/fuzzy_match/rule/normalizer.rb +0 -20
  48. data/lib/fuzzy_match/rule/stop_word.rb +0 -11
  49. data/lib/fuzzy_match/wrapper.rb +0 -73
  50. data/test/helper.rb +0 -12
  51. data/test/test_fuzzy_match.rb +0 -304
  52. data/test/test_fuzzy_match_convoluted.rb.disabled +0 -268
  53. data/test/test_grouping.rb +0 -28
  54. data/test/test_identity.rb +0 -34
  55. data/test/test_normalizer.rb +0 -10
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fuzzy_match
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.5.0
4
+ version: 2.0.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Seamus Abshere
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2013-04-03 00:00:00.000000000 Z
11
+ date: 2013-05-22 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: active_record_inline_schema
@@ -25,7 +25,7 @@ dependencies:
25
25
  - !ruby/object:Gem::Version
26
26
  version: 0.4.0
27
27
  - !ruby/object:Gem::Dependency
28
- name: minitest
28
+ name: pry
29
29
  requirement: !ruby/object:Gem::Requirement
30
30
  requirements:
31
31
  - - ! '>='
@@ -39,21 +39,21 @@ dependencies:
39
39
  - !ruby/object:Gem::Version
40
40
  version: '0'
41
41
  - !ruby/object:Gem::Dependency
42
- name: activerecord
42
+ name: rspec-core
43
43
  requirement: !ruby/object:Gem::Requirement
44
44
  requirements:
45
45
  - - ! '>='
46
46
  - !ruby/object:Gem::Version
47
- version: '3'
47
+ version: '0'
48
48
  type: :development
49
49
  prerelease: false
50
50
  version_requirements: !ruby/object:Gem::Requirement
51
51
  requirements:
52
52
  - - ! '>='
53
53
  - !ruby/object:Gem::Version
54
- version: '3'
54
+ version: '0'
55
55
  - !ruby/object:Gem::Dependency
56
- name: mysql2
56
+ name: rspec-expectations
57
57
  requirement: !ruby/object:Gem::Requirement
58
58
  requirements:
59
59
  - - ! '>='
@@ -67,7 +67,7 @@ dependencies:
67
67
  - !ruby/object:Gem::Version
68
68
  version: '0'
69
69
  - !ruby/object:Gem::Dependency
70
- name: cohort_analysis
70
+ name: rspec-mocks
71
71
  requirement: !ruby/object:Gem::Requirement
72
72
  requirements:
73
73
  - - ! '>='
@@ -81,7 +81,21 @@ dependencies:
81
81
  - !ruby/object:Gem::Version
82
82
  version: '0'
83
83
  - !ruby/object:Gem::Dependency
84
- name: weighted_average
84
+ name: activerecord
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - ! '>='
88
+ - !ruby/object:Gem::Version
89
+ version: '3'
90
+ type: :development
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - ! '>='
95
+ - !ruby/object:Gem::Version
96
+ version: '3'
97
+ - !ruby/object:Gem::Dependency
98
+ name: mysql2
85
99
  requirement: !ruby/object:Gem::Requirement
86
100
  requirements:
87
101
  - - ! '>='
@@ -95,7 +109,7 @@ dependencies:
95
109
  - !ruby/object:Gem::Version
96
110
  version: '0'
97
111
  - !ruby/object:Gem::Dependency
98
- name: yard
112
+ name: cohort_analysis
99
113
  requirement: !ruby/object:Gem::Requirement
100
114
  requirements:
101
115
  - - ! '>='
@@ -109,7 +123,7 @@ dependencies:
109
123
  - !ruby/object:Gem::Version
110
124
  version: '0'
111
125
  - !ruby/object:Gem::Dependency
112
- name: amatch
126
+ name: weighted_average
113
127
  requirement: !ruby/object:Gem::Requirement
114
128
  requirements:
115
129
  - - ! '>='
@@ -123,7 +137,21 @@ dependencies:
123
137
  - !ruby/object:Gem::Version
124
138
  version: '0'
125
139
  - !ruby/object:Gem::Dependency
126
- name: minitest-reporters
140
+ name: yard
141
+ requirement: !ruby/object:Gem::Requirement
142
+ requirements:
143
+ - - ! '>='
144
+ - !ruby/object:Gem::Version
145
+ version: '0'
146
+ type: :development
147
+ prerelease: false
148
+ version_requirements: !ruby/object:Gem::Requirement
149
+ requirements:
150
+ - - ! '>='
151
+ - !ruby/object:Gem::Version
152
+ version: '0'
153
+ - !ruby/object:Gem::Dependency
154
+ name: amatch
127
155
  requirement: !ruby/object:Gem::Requirement
128
156
  requirements:
129
157
  - - ! '>='
@@ -141,11 +169,12 @@ description: Find a needle in a haystack using string similarity and (optionally
141
169
  email:
142
170
  - seamus@abshere.net
143
171
  executables:
144
- - fuzzy_match_checker
172
+ - fuzzy_match
145
173
  extensions: []
146
174
  extra_rdoc_files: []
147
175
  files:
148
176
  - .gitignore
177
+ - .rspec
149
178
  - CHANGELOG
150
179
  - Gemfile
151
180
  - LICENSE
@@ -156,45 +185,31 @@ files:
156
185
  - benchmark/before-without-last-result.txt
157
186
  - benchmark/before.txt
158
187
  - benchmark/memory.rb
159
- - bin/fuzzy_match_checker
160
- - examples/bts_aircraft/5-2-A.htm
161
- - examples/bts_aircraft/5-2-B.htm
162
- - examples/bts_aircraft/5-2-D.htm
163
- - examples/bts_aircraft/5-2-E.htm
164
- - examples/bts_aircraft/5-2-G.htm
165
- - examples/bts_aircraft/groupings.csv
166
- - examples/bts_aircraft/identities.csv
167
- - examples/bts_aircraft/negatives.csv
168
- - examples/bts_aircraft/normalizers.csv
169
- - examples/bts_aircraft/number_260.csv
170
- - examples/bts_aircraft/positives.csv
171
- - examples/bts_aircraft/test_bts_aircraft.rb
172
- - examples/first_name_matching.rb
173
- - examples/icao-bts.xls
188
+ - bin/fuzzy_match
174
189
  - fuzzy_match.gemspec
190
+ - groupings-screenshot.png
191
+ - highlevel.graffle
192
+ - highlevel.png
175
193
  - lib/fuzzy_match.rb
176
194
  - lib/fuzzy_match/cached_result.rb
195
+ - lib/fuzzy_match/record.rb
177
196
  - lib/fuzzy_match/result.rb
178
197
  - lib/fuzzy_match/rule.rb
179
198
  - lib/fuzzy_match/rule/grouping.rb
180
199
  - lib/fuzzy_match/rule/identity.rb
181
- - lib/fuzzy_match/rule/normalizer.rb
182
- - lib/fuzzy_match/rule/stop_word.rb
183
200
  - lib/fuzzy_match/score.rb
184
201
  - lib/fuzzy_match/score/amatch.rb
185
202
  - lib/fuzzy_match/score/pure_ruby.rb
186
203
  - lib/fuzzy_match/similarity.rb
187
204
  - lib/fuzzy_match/version.rb
188
- - lib/fuzzy_match/wrapper.rb
189
- - test/helper.rb
190
- - test/test_amatch.rb
191
- - test/test_cache.rb
192
- - test/test_fuzzy_match.rb
193
- - test/test_fuzzy_match_convoluted.rb.disabled
194
- - test/test_grouping.rb
195
- - test/test_identity.rb
196
- - test/test_normalizer.rb
197
- - test/test_wrapper.rb
205
+ - spec/amatch_spec.rb
206
+ - spec/cache_spec.rb
207
+ - spec/foo.rb
208
+ - spec/fuzzy_match_spec.rb
209
+ - spec/grouping_spec.rb
210
+ - spec/identity_spec.rb
211
+ - spec/record_spec.rb
212
+ - spec/spec_helper.rb
198
213
  homepage: https://github.com/seamusabshere/fuzzy_match
199
214
  licenses: []
200
215
  metadata: {}
@@ -219,14 +234,5 @@ signing_key:
219
234
  specification_version: 4
220
235
  summary: Find a needle in a haystack using string similarity and (optionally) regexp
221
236
  rules. Replaces loose_tight_dictionary.
222
- test_files:
223
- - test/helper.rb
224
- - test/test_amatch.rb
225
- - test/test_cache.rb
226
- - test/test_fuzzy_match.rb
227
- - test/test_fuzzy_match_convoluted.rb.disabled
228
- - test/test_grouping.rb
229
- - test/test_identity.rb
230
- - test/test_normalizer.rb
231
- - test/test_wrapper.rb
237
+ test_files: []
232
238
  has_rdoc:
@@ -1,71 +0,0 @@
1
- #!/usr/bin/env ruby
2
-
3
- if File.exist?(File.join(Dir.pwd, 'lib', 'fuzzy_match.rb'))
4
- $LOAD_PATH.unshift File.join(Dir.pwd, 'lib')
5
- require File.join(Dir.pwd, 'lib', 'fuzzy_match')
6
- else
7
- require 'fuzzy_match'
8
- end
9
- require 'fuzzy_match/version'
10
-
11
- # note: not included in gemfile but neither is bundler used here
12
- require 'active_support/core_ext'
13
- require 'remote_table'
14
- require 'thor'
15
-
16
- class FuzzyMatch
17
- class Checker < ::Thor
18
- # for example: https://docs.google.com/spreadsheet/pub?key=0AkCJNpm9Ks6JdHZURUI2S2xOa3ZFVzlZb205VVhpQnc&single=true&gid=0&output=csv
19
- desc :check, "Check a spreadsheet containing columns with these headers: haystack, needles, correct_matches, groupings, stop_words, identities, normalizers, find_options (listing an option like must_match_grouping makes it true)"
20
- method_option :show_success, :default => false, :type => :boolean, :desc => "Whether to print successful matches as you go"
21
- method_option :downcase, :default => false, :type => :boolean, :desc => "Whether to downcase everything (except regexes, where you have to do /foo/i)"
22
- def check(url)
23
- puts "Checking matches using fuzzy_match version #{FuzzyMatch::VERSION}..."
24
-
25
- t = RemoteTable.new(url, :headers => :first_row)
26
- if (violators = %w{needle grouping correct_match stop_word identity normalizer find_option} & t.rows.first.keys).any?
27
- raise ArgumentError, "Make sure you pluralize your right row headers (violators: #{violators.map(&:inspect).join(', ')}"
28
- end
29
- haystack = t.rows.map { |row| row['haystack'] }.select(&:present?)
30
- haystack.map!(&:downcase) if options.downcase
31
- find_options = t.rows.map { |row| row['find_options'] }
32
- fm = FuzzyMatch.new(
33
- haystack,
34
- :groupings => t.rows.map { |row| row['groupings'] }.select(&:present?),
35
- :identities => t.rows.map { |row| row['identities'] }.select(&:present?),
36
- :stop_words => t.rows.map { |row| row['stop_words'] }.select(&:present?),
37
- :normalizers => t.rows.map { |row| row['normalizers'] }.select(&:present?),
38
- :must_match_grouping => find_options.include?('must_match_grouping'),
39
- :must_match_at_least_one_word => find_options.include?('must_match_at_least_one_word'),
40
- :first_grouping_decides => find_options.include?('first_grouping_decides')
41
- )
42
-
43
- count = 0
44
- t.each do |row|
45
- needle = row['needles']
46
- correct_match = row['correct_matches']
47
- next unless needle.present?
48
- if options.downcase
49
- needle.to_s.downcase!
50
- correct_match.to_s.downcase!
51
- end
52
- correct_match = nil if correct_match.blank?
53
- match = fm.find needle
54
- if options.show_success? or match != correct_match
55
- puts " #{needle.inspect} => #{match.inspect}"
56
- end
57
- unless match == correct_match
58
- puts "MISMATCH: #{needle.inspect} should match #{correct_match.inspect}"
59
- puts fm.explain needle
60
- exit 1
61
- end
62
- count += 1
63
- end
64
-
65
- puts "Correctly matched #{count} needles."
66
- exit 0
67
- end
68
- end
69
- end
70
-
71
- FuzzyMatch::Checker.start