loose_tight_dictionary 1.0.4 → 1.0.5

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,5 +1,9 @@
1
1
  = loose_tight_dictionary
2
2
 
3
+ DEPRECATED: use [fuzzy_match](https://github.com/seamusabshere/fuzzy_match) instead. All further development will happen there.
4
+
5
+ FuzzyMatch 1.0.5 is identical to LooseTightDictionary 1.0.5 (except for the name).
6
+
3
7
  Find a needle in a haystack based on string similarity (using the Pair Distance algorithm and Levenshtein distance) and regular expressions.
4
8
 
5
9
  == Quickstart
@@ -22,9 +22,9 @@ class LooseTightDictionary
22
22
  attr_reader :identities
23
23
  attr_reader :tighteners
24
24
  attr_reader :stop_words
25
- attr_reader :first_blocking_decides
26
- attr_reader :must_match_blocking
27
- attr_reader :must_match_at_least_one_word
25
+ attr_reader :default_first_blocking_decides
26
+ attr_reader :default_must_match_blocking
27
+ attr_reader :default_must_match_at_least_one_word
28
28
 
29
29
  # haystack - a bunch of records
30
30
  # options
@@ -35,9 +35,9 @@ class LooseTightDictionary
35
35
  # * read: how to interpret each entry in the 'haystack', either a Proc or a symbol
36
36
  def initialize(records, options = {})
37
37
  options = options.symbolize_keys
38
- @first_blocking_decides = options.fetch :first_blocking_decides, false
39
- @must_match_blocking = options.fetch :must_match_blocking, false
40
- @must_match_at_least_one_word = options.fetch :must_match_at_least_one_word, false
38
+ @default_first_blocking_decides = options[:first_blocking_decides]
39
+ @default_must_match_blocking = options[:must_match_blocking]
40
+ @default_must_match_at_least_one_word = options[:must_match_at_least_one_word]
41
41
  @blockings = options.fetch(:blockings, []).map { |regexp_or_str| Blocking.new regexp_or_str }
42
42
  @identities = options.fetch(:identities, []).map { |regexp_or_str| Identity.new regexp_or_str }
43
43
  @tighteners = options.fetch(:tighteners, []).map { |regexp_or_str| Tightener.new regexp_or_str }
@@ -61,6 +61,9 @@ class LooseTightDictionary
61
61
  options = options.symbolize_keys
62
62
  gather_last_result = options.fetch(:gather_last_result, false)
63
63
  is_find_all = options.fetch(:find_all, false)
64
+ first_blocking_decides = options.fetch(:first_blocking_decides, default_first_blocking_decides)
65
+ must_match_blocking = options.fetch(:must_match_blocking, default_must_match_blocking)
66
+ must_match_at_least_one_word = options.fetch(:must_match_at_least_one_word, default_must_match_at_least_one_word)
64
67
 
65
68
  if gather_last_result
66
69
  free_last_result
@@ -112,10 +115,18 @@ class LooseTightDictionary
112
115
  [ candidates.dup, [] ]
113
116
  end
114
117
 
115
- # special case: the needle didn't fit anywhere, but must_match_blocking is false, so we'll try it against everything
116
118
  if joint.none?
117
- joint = disjoint
118
- disjoint = []
119
+ if must_match_blocking
120
+ if is_find_all
121
+ return []
122
+ else
123
+ return nil
124
+ end
125
+ else
126
+ # special case: the needle didn't fit anywhere, but must_match_blocking is false, so we'll try it against everything
127
+ joint = disjoint
128
+ disjoint = []
129
+ end
119
130
  end
120
131
 
121
132
  if gather_last_result
@@ -163,8 +174,8 @@ class LooseTightDictionary
163
174
  #
164
175
  # d = LooseTightDictionary.new ['737', '747', '757' ]
165
176
  # d.explain 'boeing 737-100'
166
- def explain(needle)
167
- record = find needle, :gather_last_result => true
177
+ def explain(needle, options = {})
178
+ record = find needle, options.merge(:gather_last_result => true)
168
179
  log "#" * 150
169
180
  log "# Match #{needle.inspect} => #{record.inspect}"
170
181
  log "#" * 150
@@ -27,9 +27,7 @@ class LooseTightDictionary
27
27
  end
28
28
 
29
29
  def utf8?
30
- return @utf8_query[0] if @utf8_query.is_a?(::Array)
31
- @utf8_query = [ (defined?(::Encoding) ? str1.encoding.to_s : $KCODE).downcase.start_with?('u') ]
32
- @utf8_query[0]
30
+ (defined?(::Encoding) ? str1.encoding.to_s : $KCODE).downcase.start_with?('u')
33
31
  end
34
32
 
35
33
  if defined?(::Amatch)
@@ -118,5 +116,10 @@ class LooseTightDictionary
118
116
  end
119
117
 
120
118
  end
119
+
120
+ extend ::ActiveSupport::Memoizable
121
+ memoize :dices_coefficient
122
+ memoize :levenshtein
123
+ memoize :utf8?
121
124
  end
122
125
  end
@@ -1,3 +1,3 @@
1
1
  class LooseTightDictionary
2
- VERSION = '1.0.4'
2
+ VERSION = '1.0.5'
3
3
  end
@@ -9,8 +9,8 @@ Gem::Specification.new do |s|
9
9
  s.authors = ["Seamus Abshere"]
10
10
  s.email = ["seamus@abshere.net"]
11
11
  s.homepage = "https://github.com/seamusabshere/loose_tight_dictionary"
12
- s.summary = %Q{Allows iterative development of dictionaries for big data sets.}
13
- s.description = %Q{Create dictionaries that link rows between two tables using loose matching (string similarity) by default and tight matching (regexp) by request.}
12
+ s.summary = %Q{DEPRECATED: use fuzzy_match instead. Find a needle in a haystack using string similarity and (optionally) regexp rules.}
13
+ s.description = %Q{DEPRECATED: use fuzzy_match instead. Find a needle in a haystack using string similarity and (optionally) regexp rules.}
14
14
 
15
15
  s.rubyforge_project = "loose_tight_dictionary"
16
16
 
@@ -62,6 +62,10 @@ class TestLooseTightDictionary < Test::Unit::TestCase
62
62
  d = LooseTightDictionary.new [ 'X' ], :blockings => [ /X/, /Y/ ], :must_match_blocking => true
63
63
  assert_equal 'X', d.find('X')
64
64
  assert_equal nil, d.find('A')
65
+
66
+ d = LooseTightDictionary.new [ 'X' ], :blockings => [ /X/, /Y/ ]
67
+ assert_equal 'X', d.find('X', :must_match_blocking => true)
68
+ assert_equal nil, d.find('A', :must_match_blocking => true)
65
69
  end
66
70
 
67
71
  def test_011_free
@@ -153,6 +157,9 @@ class TestLooseTightDictionary < Test::Unit::TestCase
153
157
  end
154
158
 
155
159
  def test_020_stop_words
160
+ d = LooseTightDictionary.new [ 'A HOTEL', 'B HTL' ]
161
+ assert_equal 'B HTL', d.find('A HTL', :must_match_at_least_one_word => true)
162
+
156
163
  d = LooseTightDictionary.new [ 'A HOTEL', 'B HTL' ], :must_match_at_least_one_word => true
157
164
  assert_equal 'B HTL', d.find('A HTL')
158
165
 
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: loose_tight_dictionary
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.4
4
+ version: 1.0.5
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,11 +9,11 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2011-12-06 00:00:00.000000000Z
12
+ date: 2012-01-13 00:00:00.000000000Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: shoulda
16
- requirement: &2177777000 !ruby/object:Gem::Requirement
16
+ requirement: &2155673120 !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - ! '>='
@@ -21,10 +21,10 @@ dependencies:
21
21
  version: '0'
22
22
  type: :development
23
23
  prerelease: false
24
- version_requirements: *2177777000
24
+ version_requirements: *2155673120
25
25
  - !ruby/object:Gem::Dependency
26
26
  name: remote_table
27
- requirement: &2177776540 !ruby/object:Gem::Requirement
27
+ requirement: &2155672640 !ruby/object:Gem::Requirement
28
28
  none: false
29
29
  requirements:
30
30
  - - ! '>='
@@ -32,10 +32,10 @@ dependencies:
32
32
  version: '0'
33
33
  type: :development
34
34
  prerelease: false
35
- version_requirements: *2177776540
35
+ version_requirements: *2155672640
36
36
  - !ruby/object:Gem::Dependency
37
37
  name: activerecord
38
- requirement: &2177776000 !ruby/object:Gem::Requirement
38
+ requirement: &2155671900 !ruby/object:Gem::Requirement
39
39
  none: false
40
40
  requirements:
41
41
  - - ! '>='
@@ -43,10 +43,10 @@ dependencies:
43
43
  version: '3'
44
44
  type: :development
45
45
  prerelease: false
46
- version_requirements: *2177776000
46
+ version_requirements: *2155671900
47
47
  - !ruby/object:Gem::Dependency
48
48
  name: mysql
49
- requirement: &2177775560 !ruby/object:Gem::Requirement
49
+ requirement: &2155671300 !ruby/object:Gem::Requirement
50
50
  none: false
51
51
  requirements:
52
52
  - - ! '>='
@@ -54,10 +54,10 @@ dependencies:
54
54
  version: '0'
55
55
  type: :development
56
56
  prerelease: false
57
- version_requirements: *2177775560
57
+ version_requirements: *2155671300
58
58
  - !ruby/object:Gem::Dependency
59
59
  name: cohort_scope
60
- requirement: &2177775060 !ruby/object:Gem::Requirement
60
+ requirement: &2155670520 !ruby/object:Gem::Requirement
61
61
  none: false
62
62
  requirements:
63
63
  - - ! '>='
@@ -65,10 +65,10 @@ dependencies:
65
65
  version: '0'
66
66
  type: :development
67
67
  prerelease: false
68
- version_requirements: *2177775060
68
+ version_requirements: *2155670520
69
69
  - !ruby/object:Gem::Dependency
70
70
  name: weighted_average
71
- requirement: &2177774620 !ruby/object:Gem::Requirement
71
+ requirement: &2155670100 !ruby/object:Gem::Requirement
72
72
  none: false
73
73
  requirements:
74
74
  - - ! '>='
@@ -76,10 +76,10 @@ dependencies:
76
76
  version: '0'
77
77
  type: :development
78
78
  prerelease: false
79
- version_requirements: *2177774620
79
+ version_requirements: *2155670100
80
80
  - !ruby/object:Gem::Dependency
81
81
  name: rake
82
- requirement: &2177774160 !ruby/object:Gem::Requirement
82
+ requirement: &2155669660 !ruby/object:Gem::Requirement
83
83
  none: false
84
84
  requirements:
85
85
  - - ! '>='
@@ -87,10 +87,10 @@ dependencies:
87
87
  version: '0'
88
88
  type: :development
89
89
  prerelease: false
90
- version_requirements: *2177774160
90
+ version_requirements: *2155669660
91
91
  - !ruby/object:Gem::Dependency
92
92
  name: activesupport
93
- requirement: &2177773620 !ruby/object:Gem::Requirement
93
+ requirement: &2155668940 !ruby/object:Gem::Requirement
94
94
  none: false
95
95
  requirements:
96
96
  - - ! '>='
@@ -98,10 +98,10 @@ dependencies:
98
98
  version: '3'
99
99
  type: :runtime
100
100
  prerelease: false
101
- version_requirements: *2177773620
101
+ version_requirements: *2155668940
102
102
  - !ruby/object:Gem::Dependency
103
103
  name: to_regexp
104
- requirement: &2177773100 !ruby/object:Gem::Requirement
104
+ requirement: &2155668060 !ruby/object:Gem::Requirement
105
105
  none: false
106
106
  requirements:
107
107
  - - ! '>='
@@ -109,9 +109,9 @@ dependencies:
109
109
  version: 0.0.3
110
110
  type: :runtime
111
111
  prerelease: false
112
- version_requirements: *2177773100
113
- description: Create dictionaries that link rows between two tables using loose matching
114
- (string similarity) by default and tight matching (regexp) by request.
112
+ version_requirements: *2155668060
113
+ description: ! 'DEPRECATED: use fuzzy_match instead. Find a needle in a haystack using
114
+ string similarity and (optionally) regexp rules.'
115
115
  email:
116
116
  - seamus@abshere.net
117
117
  executables: []
@@ -185,7 +185,8 @@ rubyforge_project: loose_tight_dictionary
185
185
  rubygems_version: 1.8.10
186
186
  signing_key:
187
187
  specification_version: 3
188
- summary: Allows iterative development of dictionaries for big data sets.
188
+ summary: ! 'DEPRECATED: use fuzzy_match instead. Find a needle in a haystack using
189
+ string similarity and (optionally) regexp rules.'
189
190
  test_files:
190
191
  - test/helper.rb
191
192
  - test/test_blocking.rb