fuzzy_match 1.3.1 → 1.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -42,7 +42,7 @@ class TestFuzzyMatchConvoluted < MiniTest::Spec
42
42
  ]
43
43
  @tightenings = []
44
44
  @identities = []
45
- @blockings = []
45
+ @groupings = []
46
46
  @positives = []
47
47
  @negatives = []
48
48
  end
@@ -55,23 +55,23 @@ class TestFuzzyMatchConvoluted < MiniTest::Spec
55
55
  @_ltd ||= FuzzyMatch.new @haystack,
56
56
  :tightenings => @tightenings,
57
57
  :identities => @identities,
58
- :blockings => @blockings,
58
+ :groupings => @groupings,
59
59
  :positives => @positives,
60
60
  :negatives => @negatives,
61
- :blocking_only => @blocking_only,
61
+ :grouping_only => @grouping_only,
62
62
  :log => $log
63
63
  end
64
64
 
65
- should "optionally only pay attention to things that match blockings" do
65
+ should "optionally only pay attention to things that match groupings" do
66
66
  assert_equal @a_haystack, ltd.improver.match(@a_needle)
67
67
 
68
68
  clear_ltd
69
- @blocking_only = true
69
+ @grouping_only = true
70
70
  assert_equal nil, ltd.improver.match(@a_needle)
71
71
 
72
72
  clear_ltd
73
- @blocking_only = true
74
- @blockings.push ['/dash/i']
73
+ @grouping_only = true
74
+ @groupings.push ['/dash/i']
75
75
  assert_equal @a_haystack, ltd.improver.match(@a_needle)
76
76
  end
77
77
 
@@ -111,7 +111,7 @@ class TestFuzzyMatchConvoluted < MiniTest::Spec
111
111
  end
112
112
  end
113
113
 
114
- should "have a false match without blocking" do
114
+ should "have a false match without grouping" do
115
115
  # @d_needle will be our victim
116
116
  @haystack.push @d_lookalike
117
117
  @tightenings.push @t_1
@@ -119,19 +119,19 @@ class TestFuzzyMatchConvoluted < MiniTest::Spec
119
119
  assert_equal @d_lookalike, ltd.improver.match(@d_needle)
120
120
  end
121
121
 
122
- should "do blocking if the needle matches a block" do
122
+ should "do grouping if the needle matches a group" do
123
123
  # @d_needle will be our victim
124
124
  @haystack.push @d_lookalike
125
125
  @tightenings.push @t_1
126
- @blockings.push ['/(bombardier|de ?havilland)/i']
126
+ @groupings.push ['/(bombardier|de ?havilland)/i']
127
127
 
128
128
  assert_equal @d_haystack, ltd.improver.match(@d_needle)
129
129
  end
130
130
 
131
- should "treat blocks as exclusive" do
131
+ should "treat groups as exclusive" do
132
132
  @haystack = [ @d_needle ]
133
133
  @tightenings.push @t_1
134
- @blockings.push ['/(bombardier|de ?havilland)/i']
134
+ @groupings.push ['/(bombardier|de ?havilland)/i']
135
135
 
136
136
  assert_equal nil, ltd.improver.match(@d_lookalike)
137
137
  end
@@ -1,28 +1,28 @@
1
1
  require 'helper'
2
2
 
3
- class TestBlocking < MiniTest::Spec
3
+ describe FuzzyMatch::Rule::Grouping do
4
4
  it %{matches a single string argument} do
5
- b = FuzzyMatch::Blocking.new %r{apple}
5
+ b = FuzzyMatch::Rule::Grouping.new %r{apple}
6
6
  b.match?('2 apples').must_equal true
7
7
  end
8
8
 
9
9
  it %{embraces case insensitivity} do
10
- b = FuzzyMatch::Blocking.new %r{apple}i
10
+ b = FuzzyMatch::Rule::Grouping.new %r{apple}i
11
11
  b.match?('2 Apples').must_equal true
12
12
  end
13
13
 
14
14
  it %{joins two string arguments} do
15
- b = FuzzyMatch::Blocking.new %r{apple}
15
+ b = FuzzyMatch::Rule::Grouping.new %r{apple}
16
16
  b.join?('apple', '2 apples').must_equal true
17
17
  end
18
18
 
19
19
  it %{fails to join two string arguments} do
20
- b = FuzzyMatch::Blocking.new %r{apple}
20
+ b = FuzzyMatch::Rule::Grouping.new %r{apple}
21
21
  b.join?('orange', '2 apples').must_equal false
22
22
  end
23
23
 
24
24
  it %{returns nil instead of false when it has no information} do
25
- b = FuzzyMatch::Blocking.new %r{apple}
25
+ b = FuzzyMatch::Rule::Grouping.new %r{apple}
26
26
  b.join?('orange', 'orange').must_be_nil
27
27
  end
28
28
  end
@@ -1,36 +1,36 @@
1
1
  require 'helper'
2
2
 
3
- class TestIdentity < MiniTest::Spec
3
+ describe FuzzyMatch::Rule::Identity do
4
4
  it %{determines whether two records COULD be identical} do
5
- i = FuzzyMatch::Identity.new %r{(A)[ ]*(\d)}
5
+ i = FuzzyMatch::Rule::Identity.new %r{(A)[ ]*(\d)}
6
6
  i.identical?('A1', 'A 1foobar').must_equal true
7
7
  end
8
8
 
9
9
  it %{determines that two records MUST NOT be identical} do
10
- i = FuzzyMatch::Identity.new %r{(A)[ ]*(\d)}
10
+ i = FuzzyMatch::Rule::Identity.new %r{(A)[ ]*(\d)}
11
11
  i.identical?('A1', 'A 2foobar').must_equal false
12
12
  end
13
13
 
14
14
  it %{returns nil indicating no information} do
15
- i = FuzzyMatch::Identity.new %r{(A)[ ]*(\d)}
15
+ i = FuzzyMatch::Rule::Identity.new %r{(A)[ ]*(\d)}
16
16
  i.identical?('B1', 'A 2foobar').must_equal nil
17
17
  end
18
18
 
19
19
  it %{can be initialized with a regexp} do
20
- i = FuzzyMatch::Identity.new %r{\A\\?/(.*)etc/mysql\$$}
20
+ i = FuzzyMatch::Rule::Identity.new %r{\A\\?/(.*)etc/mysql\$$}
21
21
  i.regexp.must_equal %r{\A\\?/(.*)etc/mysql\$$}
22
22
  end
23
23
 
24
24
  it %{can be initialized from a string (via to_regexp gem)} do
25
- i = FuzzyMatch::Identity.new '%r{\A\\\?/(.*)etc/mysql\$$}'
25
+ i = FuzzyMatch::Rule::Identity.new '%r{\A\\\?/(.*)etc/mysql\$$}'
26
26
  i.regexp.must_equal %r{\A\\?/(.*)etc/mysql\$$}
27
27
 
28
- i = FuzzyMatch::Identity.new '/\A\\\?\/(.*)etc\/mysql\$$/'
28
+ i = FuzzyMatch::Rule::Identity.new '/\A\\\?\/(.*)etc\/mysql\$$/'
29
29
  i.regexp.must_equal %r{\A\\?/(.*)etc/mysql\$$}
30
30
  end
31
31
 
32
32
  it %{embraces case insensitivity} do
33
- i = FuzzyMatch::Identity.new %r{(A)[ ]*(\d)}i
33
+ i = FuzzyMatch::Rule::Identity.new %r{(A)[ ]*(\d)}i
34
34
  i.identical?('A1', 'a 1foobar').must_equal true
35
35
  end
36
36
  end
@@ -1,8 +1,8 @@
1
1
  require 'helper'
2
2
 
3
- class TestNormalizer < MiniTest::Spec
3
+ describe FuzzyMatch::Rule::Normalizer do
4
4
  it %{applies itself to a string argument} do
5
- t = FuzzyMatch::Normalizer.new %r{(Ford )[ ]*(F)[\- ]*(\d\d\d)}i
5
+ t = FuzzyMatch::Rule::Normalizer.new %r{(Ford )[ ]*(F)[\- ]*(\d\d\d)}i
6
6
  t.apply('Ford F-350').must_equal 'Ford F350'
7
7
  t.apply('Ford F150').must_equal 'Ford F150'
8
8
  t.apply('Ford F 350').must_equal 'Ford F350'
data/test/test_wrapper.rb CHANGED
@@ -1,6 +1,6 @@
1
1
  require 'helper'
2
2
 
3
- class TestWrapper < MiniTest::Spec
3
+ describe FuzzyMatch::Wrapper do
4
4
  it %{does not treat "'s" as a word} do
5
5
  assert_split ["foo's", "bar"], "Foo's Bar"
6
6
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fuzzy_match
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.3.1
4
+ version: 1.3.2
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,99 +9,11 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-02-14 00:00:00.000000000 Z
12
+ date: 2012-02-24 00:00:00.000000000 Z
13
13
  dependencies:
14
- - !ruby/object:Gem::Dependency
15
- name: minitest
16
- requirement: &2165315840 !ruby/object:Gem::Requirement
17
- none: false
18
- requirements:
19
- - - ! '>='
20
- - !ruby/object:Gem::Version
21
- version: '0'
22
- type: :development
23
- prerelease: false
24
- version_requirements: *2165315840
25
- - !ruby/object:Gem::Dependency
26
- name: activerecord
27
- requirement: &2165314740 !ruby/object:Gem::Requirement
28
- none: false
29
- requirements:
30
- - - ! '>='
31
- - !ruby/object:Gem::Version
32
- version: '3'
33
- type: :development
34
- prerelease: false
35
- version_requirements: *2165314740
36
- - !ruby/object:Gem::Dependency
37
- name: mysql2
38
- requirement: &2165314140 !ruby/object:Gem::Requirement
39
- none: false
40
- requirements:
41
- - - ! '>='
42
- - !ruby/object:Gem::Version
43
- version: '0'
44
- type: :development
45
- prerelease: false
46
- version_requirements: *2165314140
47
- - !ruby/object:Gem::Dependency
48
- name: cohort_scope
49
- requirement: &2165313620 !ruby/object:Gem::Requirement
50
- none: false
51
- requirements:
52
- - - ! '>='
53
- - !ruby/object:Gem::Version
54
- version: '0'
55
- type: :development
56
- prerelease: false
57
- version_requirements: *2165313620
58
- - !ruby/object:Gem::Dependency
59
- name: weighted_average
60
- requirement: &2165312980 !ruby/object:Gem::Requirement
61
- none: false
62
- requirements:
63
- - - ! '>='
64
- - !ruby/object:Gem::Version
65
- version: '0'
66
- type: :development
67
- prerelease: false
68
- version_requirements: *2165312980
69
- - !ruby/object:Gem::Dependency
70
- name: rake
71
- requirement: &2165312300 !ruby/object:Gem::Requirement
72
- none: false
73
- requirements:
74
- - - ! '>='
75
- - !ruby/object:Gem::Version
76
- version: '0'
77
- type: :development
78
- prerelease: false
79
- version_requirements: *2165312300
80
- - !ruby/object:Gem::Dependency
81
- name: yard
82
- requirement: &2165311320 !ruby/object:Gem::Requirement
83
- none: false
84
- requirements:
85
- - - ! '>='
86
- - !ruby/object:Gem::Version
87
- version: '0'
88
- type: :development
89
- prerelease: false
90
- version_requirements: *2165311320
91
- - !ruby/object:Gem::Dependency
92
- name: amatch
93
- requirement: &2165310720 !ruby/object:Gem::Requirement
94
- none: false
95
- requirements:
96
- - - ! '>='
97
- - !ruby/object:Gem::Version
98
- version: '0'
99
- type: :development
100
- prerelease: false
101
- version_requirements: *2165310720
102
14
  - !ruby/object:Gem::Dependency
103
15
  name: activesupport
104
- requirement: &2165310000 !ruby/object:Gem::Requirement
16
+ requirement: &2151876580 !ruby/object:Gem::Requirement
105
17
  none: false
106
18
  requirements:
107
19
  - - ! '>='
@@ -109,10 +21,10 @@ dependencies:
109
21
  version: '3'
110
22
  type: :runtime
111
23
  prerelease: false
112
- version_requirements: *2165310000
24
+ version_requirements: *2151876580
113
25
  - !ruby/object:Gem::Dependency
114
26
  name: to_regexp
115
- requirement: &2165309100 !ruby/object:Gem::Requirement
27
+ requirement: &2151871760 !ruby/object:Gem::Requirement
116
28
  none: false
117
29
  requirements:
118
30
  - - ! '>='
@@ -120,7 +32,7 @@ dependencies:
120
32
  version: 0.0.3
121
33
  type: :runtime
122
34
  prerelease: false
123
- version_requirements: *2165309100
35
+ version_requirements: *2151871760
124
36
  description: Find a needle in a haystack using string similarity and (optionally)
125
37
  regexp rules. Replaces loose_tight_dictionary.
126
38
  email:
@@ -132,6 +44,7 @@ files:
132
44
  - .document
133
45
  - .gitignore
134
46
  - Gemfile
47
+ - History.txt
135
48
  - LICENSE
136
49
  - README.markdown
137
50
  - Rakefile
@@ -145,7 +58,7 @@ files:
145
58
  - examples/bts_aircraft/5-2-D.htm
146
59
  - examples/bts_aircraft/5-2-E.htm
147
60
  - examples/bts_aircraft/5-2-G.htm
148
- - examples/bts_aircraft/blockings.csv
61
+ - examples/bts_aircraft/groupings.csv
149
62
  - examples/bts_aircraft/identities.csv
150
63
  - examples/bts_aircraft/negatives.csv
151
64
  - examples/bts_aircraft/normalizers.csv
@@ -156,24 +69,25 @@ files:
156
69
  - examples/icao-bts.xls
157
70
  - fuzzy_match.gemspec
158
71
  - lib/fuzzy_match.rb
159
- - lib/fuzzy_match/blocking.rb
160
72
  - lib/fuzzy_match/cached_result.rb
161
- - lib/fuzzy_match/identity.rb
162
- - lib/fuzzy_match/normalizer.rb
163
73
  - lib/fuzzy_match/result.rb
74
+ - lib/fuzzy_match/rule.rb
75
+ - lib/fuzzy_match/rule/grouping.rb
76
+ - lib/fuzzy_match/rule/identity.rb
77
+ - lib/fuzzy_match/rule/normalizer.rb
78
+ - lib/fuzzy_match/rule/stop_word.rb
164
79
  - lib/fuzzy_match/score.rb
165
80
  - lib/fuzzy_match/score/amatch.rb
166
81
  - lib/fuzzy_match/score/pure_ruby.rb
167
82
  - lib/fuzzy_match/similarity.rb
168
- - lib/fuzzy_match/stop_word.rb
169
83
  - lib/fuzzy_match/version.rb
170
84
  - lib/fuzzy_match/wrapper.rb
171
85
  - test/helper.rb
172
86
  - test/test_amatch.rb
173
- - test/test_blocking.rb
174
87
  - test/test_cache.rb
175
88
  - test/test_fuzzy_match.rb
176
89
  - test/test_fuzzy_match_convoluted.rb.disabled
90
+ - test/test_grouping.rb
177
91
  - test/test_identity.rb
178
92
  - test/test_normalizer.rb
179
93
  - test/test_wrapper.rb
@@ -205,10 +119,10 @@ summary: Find a needle in a haystack using string similarity and (optionally) re
205
119
  test_files:
206
120
  - test/helper.rb
207
121
  - test/test_amatch.rb
208
- - test/test_blocking.rb
209
122
  - test/test_cache.rb
210
123
  - test/test_fuzzy_match.rb
211
124
  - test/test_fuzzy_match_convoluted.rb.disabled
125
+ - test/test_grouping.rb
212
126
  - test/test_identity.rb
213
127
  - test/test_normalizer.rb
214
128
  - test/test_wrapper.rb
@@ -1,36 +0,0 @@
1
- class FuzzyMatch
2
- # "Record linkage typically involves two main steps: blocking and scoring..."
3
- # http://en.wikipedia.org/wiki/Record_linkage
4
- #
5
- # Blockings effectively divide up the haystack into groups that match a pattern
6
- #
7
- # A blocking (as in a grouping) comes into effect when a str matches.
8
- # Then the needle must also match the blocking's regexp.
9
- class Blocking
10
- attr_reader :regexp
11
-
12
- def initialize(regexp_or_str)
13
- @regexp = regexp_or_str.to_regexp
14
- end
15
-
16
- def match?(str)
17
- !!(regexp.match(str))
18
- end
19
-
20
- # If a blocking "joins" two strings, that means they both fit into it.
21
- #
22
- # Returns false if they certainly don't fit this blocking.
23
- # Returns nil if the blocking doesn't apply, i.e. str2 doesn't fit the blocking.
24
- def join?(str1, str2)
25
- if str2_match_data = regexp.match(str2)
26
- if str1_match_data = regexp.match(str1)
27
- str2_match_data.captures.join.downcase == str1_match_data.captures.join.downcase
28
- else
29
- false
30
- end
31
- else
32
- nil
33
- end
34
- end
35
- end
36
- end
@@ -1,23 +0,0 @@
1
- class FuzzyMatch
2
- # Identities take effect when needle and haystack both match a regexp
3
- # Then the captured part of the regexp has to match exactly
4
- class Identity
5
- attr_reader :regexp
6
-
7
- def initialize(regexp_or_str)
8
- @regexp = regexp_or_str.to_regexp
9
- end
10
-
11
- # Two strings are "identical" if they both match this identity and the captures are equal.
12
- #
13
- # Only returns true/false if both strings match the regexp.
14
- # Otherwise returns nil.
15
- def identical?(str1, str2)
16
- if str1_match_data = regexp.match(str1) and match_data = regexp.match(str2)
17
- str1_match_data.captures.join.downcase == match_data.captures.join.downcase
18
- else
19
- nil
20
- end
21
- end
22
- end
23
- end
@@ -1,28 +0,0 @@
1
- class FuzzyMatch
2
- # A normalizer just strips a string down to its core
3
- class Normalizer
4
- attr_reader :regexp
5
-
6
- def initialize(regexp_or_str)
7
- @regexp = regexp_or_str.to_regexp
8
- end
9
-
10
- # A normalizer applies when its regexp matches and captures a new (shorter) string
11
- def apply?(str)
12
- !!(regexp.match(str))
13
- end
14
-
15
- # The result of applying a normalizer is just all the captures put together.
16
- def apply(str)
17
- if match_data = regexp.match(str)
18
- match_data.captures.join
19
- else
20
- str
21
- end
22
- end
23
-
24
- def inspect
25
- "#<FuzzyMatch::Normalizer regexp=#{regexp.inspect}>"
26
- end
27
- end
28
- end