fuzzy_match 1.3.1 → 1.3.2

Sign up to get free protection for your applications and to get access to all the features.
@@ -42,7 +42,7 @@ class TestFuzzyMatchConvoluted < MiniTest::Spec
42
42
  ]
43
43
  @tightenings = []
44
44
  @identities = []
45
- @blockings = []
45
+ @groupings = []
46
46
  @positives = []
47
47
  @negatives = []
48
48
  end
@@ -55,23 +55,23 @@ class TestFuzzyMatchConvoluted < MiniTest::Spec
55
55
  @_ltd ||= FuzzyMatch.new @haystack,
56
56
  :tightenings => @tightenings,
57
57
  :identities => @identities,
58
- :blockings => @blockings,
58
+ :groupings => @groupings,
59
59
  :positives => @positives,
60
60
  :negatives => @negatives,
61
- :blocking_only => @blocking_only,
61
+ :grouping_only => @grouping_only,
62
62
  :log => $log
63
63
  end
64
64
 
65
- should "optionally only pay attention to things that match blockings" do
65
+ should "optionally only pay attention to things that match groupings" do
66
66
  assert_equal @a_haystack, ltd.improver.match(@a_needle)
67
67
 
68
68
  clear_ltd
69
- @blocking_only = true
69
+ @grouping_only = true
70
70
  assert_equal nil, ltd.improver.match(@a_needle)
71
71
 
72
72
  clear_ltd
73
- @blocking_only = true
74
- @blockings.push ['/dash/i']
73
+ @grouping_only = true
74
+ @groupings.push ['/dash/i']
75
75
  assert_equal @a_haystack, ltd.improver.match(@a_needle)
76
76
  end
77
77
 
@@ -111,7 +111,7 @@ class TestFuzzyMatchConvoluted < MiniTest::Spec
111
111
  end
112
112
  end
113
113
 
114
- should "have a false match without blocking" do
114
+ should "have a false match without grouping" do
115
115
  # @d_needle will be our victim
116
116
  @haystack.push @d_lookalike
117
117
  @tightenings.push @t_1
@@ -119,19 +119,19 @@ class TestFuzzyMatchConvoluted < MiniTest::Spec
119
119
  assert_equal @d_lookalike, ltd.improver.match(@d_needle)
120
120
  end
121
121
 
122
- should "do blocking if the needle matches a block" do
122
+ should "do grouping if the needle matches a group" do
123
123
  # @d_needle will be our victim
124
124
  @haystack.push @d_lookalike
125
125
  @tightenings.push @t_1
126
- @blockings.push ['/(bombardier|de ?havilland)/i']
126
+ @groupings.push ['/(bombardier|de ?havilland)/i']
127
127
 
128
128
  assert_equal @d_haystack, ltd.improver.match(@d_needle)
129
129
  end
130
130
 
131
- should "treat blocks as exclusive" do
131
+ should "treat groups as exclusive" do
132
132
  @haystack = [ @d_needle ]
133
133
  @tightenings.push @t_1
134
- @blockings.push ['/(bombardier|de ?havilland)/i']
134
+ @groupings.push ['/(bombardier|de ?havilland)/i']
135
135
 
136
136
  assert_equal nil, ltd.improver.match(@d_lookalike)
137
137
  end
@@ -1,28 +1,28 @@
1
1
  require 'helper'
2
2
 
3
- class TestBlocking < MiniTest::Spec
3
+ describe FuzzyMatch::Rule::Grouping do
4
4
  it %{matches a single string argument} do
5
- b = FuzzyMatch::Blocking.new %r{apple}
5
+ b = FuzzyMatch::Rule::Grouping.new %r{apple}
6
6
  b.match?('2 apples').must_equal true
7
7
  end
8
8
 
9
9
  it %{embraces case insensitivity} do
10
- b = FuzzyMatch::Blocking.new %r{apple}i
10
+ b = FuzzyMatch::Rule::Grouping.new %r{apple}i
11
11
  b.match?('2 Apples').must_equal true
12
12
  end
13
13
 
14
14
  it %{joins two string arguments} do
15
- b = FuzzyMatch::Blocking.new %r{apple}
15
+ b = FuzzyMatch::Rule::Grouping.new %r{apple}
16
16
  b.join?('apple', '2 apples').must_equal true
17
17
  end
18
18
 
19
19
  it %{fails to join two string arguments} do
20
- b = FuzzyMatch::Blocking.new %r{apple}
20
+ b = FuzzyMatch::Rule::Grouping.new %r{apple}
21
21
  b.join?('orange', '2 apples').must_equal false
22
22
  end
23
23
 
24
24
  it %{returns nil instead of false when it has no information} do
25
- b = FuzzyMatch::Blocking.new %r{apple}
25
+ b = FuzzyMatch::Rule::Grouping.new %r{apple}
26
26
  b.join?('orange', 'orange').must_be_nil
27
27
  end
28
28
  end
@@ -1,36 +1,36 @@
1
1
  require 'helper'
2
2
 
3
- class TestIdentity < MiniTest::Spec
3
+ describe FuzzyMatch::Rule::Identity do
4
4
  it %{determines whether two records COULD be identical} do
5
- i = FuzzyMatch::Identity.new %r{(A)[ ]*(\d)}
5
+ i = FuzzyMatch::Rule::Identity.new %r{(A)[ ]*(\d)}
6
6
  i.identical?('A1', 'A 1foobar').must_equal true
7
7
  end
8
8
 
9
9
  it %{determines that two records MUST NOT be identical} do
10
- i = FuzzyMatch::Identity.new %r{(A)[ ]*(\d)}
10
+ i = FuzzyMatch::Rule::Identity.new %r{(A)[ ]*(\d)}
11
11
  i.identical?('A1', 'A 2foobar').must_equal false
12
12
  end
13
13
 
14
14
  it %{returns nil indicating no information} do
15
- i = FuzzyMatch::Identity.new %r{(A)[ ]*(\d)}
15
+ i = FuzzyMatch::Rule::Identity.new %r{(A)[ ]*(\d)}
16
16
  i.identical?('B1', 'A 2foobar').must_equal nil
17
17
  end
18
18
 
19
19
  it %{can be initialized with a regexp} do
20
- i = FuzzyMatch::Identity.new %r{\A\\?/(.*)etc/mysql\$$}
20
+ i = FuzzyMatch::Rule::Identity.new %r{\A\\?/(.*)etc/mysql\$$}
21
21
  i.regexp.must_equal %r{\A\\?/(.*)etc/mysql\$$}
22
22
  end
23
23
 
24
24
  it %{can be initialized from a string (via to_regexp gem)} do
25
- i = FuzzyMatch::Identity.new '%r{\A\\\?/(.*)etc/mysql\$$}'
25
+ i = FuzzyMatch::Rule::Identity.new '%r{\A\\\?/(.*)etc/mysql\$$}'
26
26
  i.regexp.must_equal %r{\A\\?/(.*)etc/mysql\$$}
27
27
 
28
- i = FuzzyMatch::Identity.new '/\A\\\?\/(.*)etc\/mysql\$$/'
28
+ i = FuzzyMatch::Rule::Identity.new '/\A\\\?\/(.*)etc\/mysql\$$/'
29
29
  i.regexp.must_equal %r{\A\\?/(.*)etc/mysql\$$}
30
30
  end
31
31
 
32
32
  it %{embraces case insensitivity} do
33
- i = FuzzyMatch::Identity.new %r{(A)[ ]*(\d)}i
33
+ i = FuzzyMatch::Rule::Identity.new %r{(A)[ ]*(\d)}i
34
34
  i.identical?('A1', 'a 1foobar').must_equal true
35
35
  end
36
36
  end
@@ -1,8 +1,8 @@
1
1
  require 'helper'
2
2
 
3
- class TestNormalizer < MiniTest::Spec
3
+ describe FuzzyMatch::Rule::Normalizer do
4
4
  it %{applies itself to a string argument} do
5
- t = FuzzyMatch::Normalizer.new %r{(Ford )[ ]*(F)[\- ]*(\d\d\d)}i
5
+ t = FuzzyMatch::Rule::Normalizer.new %r{(Ford )[ ]*(F)[\- ]*(\d\d\d)}i
6
6
  t.apply('Ford F-350').must_equal 'Ford F350'
7
7
  t.apply('Ford F150').must_equal 'Ford F150'
8
8
  t.apply('Ford F 350').must_equal 'Ford F350'
data/test/test_wrapper.rb CHANGED
@@ -1,6 +1,6 @@
1
1
  require 'helper'
2
2
 
3
- class TestWrapper < MiniTest::Spec
3
+ describe FuzzyMatch::Wrapper do
4
4
  it %{does not treat "'s" as a word} do
5
5
  assert_split ["foo's", "bar"], "Foo's Bar"
6
6
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fuzzy_match
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.3.1
4
+ version: 1.3.2
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,99 +9,11 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-02-14 00:00:00.000000000 Z
12
+ date: 2012-02-24 00:00:00.000000000 Z
13
13
  dependencies:
14
- - !ruby/object:Gem::Dependency
15
- name: minitest
16
- requirement: &2165315840 !ruby/object:Gem::Requirement
17
- none: false
18
- requirements:
19
- - - ! '>='
20
- - !ruby/object:Gem::Version
21
- version: '0'
22
- type: :development
23
- prerelease: false
24
- version_requirements: *2165315840
25
- - !ruby/object:Gem::Dependency
26
- name: activerecord
27
- requirement: &2165314740 !ruby/object:Gem::Requirement
28
- none: false
29
- requirements:
30
- - - ! '>='
31
- - !ruby/object:Gem::Version
32
- version: '3'
33
- type: :development
34
- prerelease: false
35
- version_requirements: *2165314740
36
- - !ruby/object:Gem::Dependency
37
- name: mysql2
38
- requirement: &2165314140 !ruby/object:Gem::Requirement
39
- none: false
40
- requirements:
41
- - - ! '>='
42
- - !ruby/object:Gem::Version
43
- version: '0'
44
- type: :development
45
- prerelease: false
46
- version_requirements: *2165314140
47
- - !ruby/object:Gem::Dependency
48
- name: cohort_scope
49
- requirement: &2165313620 !ruby/object:Gem::Requirement
50
- none: false
51
- requirements:
52
- - - ! '>='
53
- - !ruby/object:Gem::Version
54
- version: '0'
55
- type: :development
56
- prerelease: false
57
- version_requirements: *2165313620
58
- - !ruby/object:Gem::Dependency
59
- name: weighted_average
60
- requirement: &2165312980 !ruby/object:Gem::Requirement
61
- none: false
62
- requirements:
63
- - - ! '>='
64
- - !ruby/object:Gem::Version
65
- version: '0'
66
- type: :development
67
- prerelease: false
68
- version_requirements: *2165312980
69
- - !ruby/object:Gem::Dependency
70
- name: rake
71
- requirement: &2165312300 !ruby/object:Gem::Requirement
72
- none: false
73
- requirements:
74
- - - ! '>='
75
- - !ruby/object:Gem::Version
76
- version: '0'
77
- type: :development
78
- prerelease: false
79
- version_requirements: *2165312300
80
- - !ruby/object:Gem::Dependency
81
- name: yard
82
- requirement: &2165311320 !ruby/object:Gem::Requirement
83
- none: false
84
- requirements:
85
- - - ! '>='
86
- - !ruby/object:Gem::Version
87
- version: '0'
88
- type: :development
89
- prerelease: false
90
- version_requirements: *2165311320
91
- - !ruby/object:Gem::Dependency
92
- name: amatch
93
- requirement: &2165310720 !ruby/object:Gem::Requirement
94
- none: false
95
- requirements:
96
- - - ! '>='
97
- - !ruby/object:Gem::Version
98
- version: '0'
99
- type: :development
100
- prerelease: false
101
- version_requirements: *2165310720
102
14
  - !ruby/object:Gem::Dependency
103
15
  name: activesupport
104
- requirement: &2165310000 !ruby/object:Gem::Requirement
16
+ requirement: &2151876580 !ruby/object:Gem::Requirement
105
17
  none: false
106
18
  requirements:
107
19
  - - ! '>='
@@ -109,10 +21,10 @@ dependencies:
109
21
  version: '3'
110
22
  type: :runtime
111
23
  prerelease: false
112
- version_requirements: *2165310000
24
+ version_requirements: *2151876580
113
25
  - !ruby/object:Gem::Dependency
114
26
  name: to_regexp
115
- requirement: &2165309100 !ruby/object:Gem::Requirement
27
+ requirement: &2151871760 !ruby/object:Gem::Requirement
116
28
  none: false
117
29
  requirements:
118
30
  - - ! '>='
@@ -120,7 +32,7 @@ dependencies:
120
32
  version: 0.0.3
121
33
  type: :runtime
122
34
  prerelease: false
123
- version_requirements: *2165309100
35
+ version_requirements: *2151871760
124
36
  description: Find a needle in a haystack using string similarity and (optionally)
125
37
  regexp rules. Replaces loose_tight_dictionary.
126
38
  email:
@@ -132,6 +44,7 @@ files:
132
44
  - .document
133
45
  - .gitignore
134
46
  - Gemfile
47
+ - History.txt
135
48
  - LICENSE
136
49
  - README.markdown
137
50
  - Rakefile
@@ -145,7 +58,7 @@ files:
145
58
  - examples/bts_aircraft/5-2-D.htm
146
59
  - examples/bts_aircraft/5-2-E.htm
147
60
  - examples/bts_aircraft/5-2-G.htm
148
- - examples/bts_aircraft/blockings.csv
61
+ - examples/bts_aircraft/groupings.csv
149
62
  - examples/bts_aircraft/identities.csv
150
63
  - examples/bts_aircraft/negatives.csv
151
64
  - examples/bts_aircraft/normalizers.csv
@@ -156,24 +69,25 @@ files:
156
69
  - examples/icao-bts.xls
157
70
  - fuzzy_match.gemspec
158
71
  - lib/fuzzy_match.rb
159
- - lib/fuzzy_match/blocking.rb
160
72
  - lib/fuzzy_match/cached_result.rb
161
- - lib/fuzzy_match/identity.rb
162
- - lib/fuzzy_match/normalizer.rb
163
73
  - lib/fuzzy_match/result.rb
74
+ - lib/fuzzy_match/rule.rb
75
+ - lib/fuzzy_match/rule/grouping.rb
76
+ - lib/fuzzy_match/rule/identity.rb
77
+ - lib/fuzzy_match/rule/normalizer.rb
78
+ - lib/fuzzy_match/rule/stop_word.rb
164
79
  - lib/fuzzy_match/score.rb
165
80
  - lib/fuzzy_match/score/amatch.rb
166
81
  - lib/fuzzy_match/score/pure_ruby.rb
167
82
  - lib/fuzzy_match/similarity.rb
168
- - lib/fuzzy_match/stop_word.rb
169
83
  - lib/fuzzy_match/version.rb
170
84
  - lib/fuzzy_match/wrapper.rb
171
85
  - test/helper.rb
172
86
  - test/test_amatch.rb
173
- - test/test_blocking.rb
174
87
  - test/test_cache.rb
175
88
  - test/test_fuzzy_match.rb
176
89
  - test/test_fuzzy_match_convoluted.rb.disabled
90
+ - test/test_grouping.rb
177
91
  - test/test_identity.rb
178
92
  - test/test_normalizer.rb
179
93
  - test/test_wrapper.rb
@@ -205,10 +119,10 @@ summary: Find a needle in a haystack using string similarity and (optionally) re
205
119
  test_files:
206
120
  - test/helper.rb
207
121
  - test/test_amatch.rb
208
- - test/test_blocking.rb
209
122
  - test/test_cache.rb
210
123
  - test/test_fuzzy_match.rb
211
124
  - test/test_fuzzy_match_convoluted.rb.disabled
125
+ - test/test_grouping.rb
212
126
  - test/test_identity.rb
213
127
  - test/test_normalizer.rb
214
128
  - test/test_wrapper.rb
@@ -1,36 +0,0 @@
1
- class FuzzyMatch
2
- # "Record linkage typically involves two main steps: blocking and scoring..."
3
- # http://en.wikipedia.org/wiki/Record_linkage
4
- #
5
- # Blockings effectively divide up the haystack into groups that match a pattern
6
- #
7
- # A blocking (as in a grouping) comes into effect when a str matches.
8
- # Then the needle must also match the blocking's regexp.
9
- class Blocking
10
- attr_reader :regexp
11
-
12
- def initialize(regexp_or_str)
13
- @regexp = regexp_or_str.to_regexp
14
- end
15
-
16
- def match?(str)
17
- !!(regexp.match(str))
18
- end
19
-
20
- # If a blocking "joins" two strings, that means they both fit into it.
21
- #
22
- # Returns false if they certainly don't fit this blocking.
23
- # Returns nil if the blocking doesn't apply, i.e. str2 doesn't fit the blocking.
24
- def join?(str1, str2)
25
- if str2_match_data = regexp.match(str2)
26
- if str1_match_data = regexp.match(str1)
27
- str2_match_data.captures.join.downcase == str1_match_data.captures.join.downcase
28
- else
29
- false
30
- end
31
- else
32
- nil
33
- end
34
- end
35
- end
36
- end
@@ -1,23 +0,0 @@
1
- class FuzzyMatch
2
- # Identities take effect when needle and haystack both match a regexp
3
- # Then the captured part of the regexp has to match exactly
4
- class Identity
5
- attr_reader :regexp
6
-
7
- def initialize(regexp_or_str)
8
- @regexp = regexp_or_str.to_regexp
9
- end
10
-
11
- # Two strings are "identical" if they both match this identity and the captures are equal.
12
- #
13
- # Only returns true/false if both strings match the regexp.
14
- # Otherwise returns nil.
15
- def identical?(str1, str2)
16
- if str1_match_data = regexp.match(str1) and match_data = regexp.match(str2)
17
- str1_match_data.captures.join.downcase == match_data.captures.join.downcase
18
- else
19
- nil
20
- end
21
- end
22
- end
23
- end
@@ -1,28 +0,0 @@
1
- class FuzzyMatch
2
- # A normalizer just strips a string down to its core
3
- class Normalizer
4
- attr_reader :regexp
5
-
6
- def initialize(regexp_or_str)
7
- @regexp = regexp_or_str.to_regexp
8
- end
9
-
10
- # A normalizer applies when its regexp matches and captures a new (shorter) string
11
- def apply?(str)
12
- !!(regexp.match(str))
13
- end
14
-
15
- # The result of applying a normalizer is just all the captures put together.
16
- def apply(str)
17
- if match_data = regexp.match(str)
18
- match_data.captures.join
19
- else
20
- str
21
- end
22
- end
23
-
24
- def inspect
25
- "#<FuzzyMatch::Normalizer regexp=#{regexp.inspect}>"
26
- end
27
- end
28
- end