loose_tight_dictionary 1.0.4 → 1.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,5 +1,9 @@
1
1
  = loose_tight_dictionary
2
2
 
3
+ DEPRECATED: use [fuzzy_match](https://github.com/seamusabshere/fuzzy_match) instead. All further development will happen there.
4
+
5
+ FuzzyMatch 1.0.5 is identical to LooseTightDictionary 1.0.5 (except for the name).
6
+
3
7
  Find a needle in a haystack based on string similarity (using the Pair Distance algorithm and Levenshtein distance) and regular expressions.
4
8
 
5
9
  == Quickstart
@@ -22,9 +22,9 @@ class LooseTightDictionary
22
22
  attr_reader :identities
23
23
  attr_reader :tighteners
24
24
  attr_reader :stop_words
25
- attr_reader :first_blocking_decides
26
- attr_reader :must_match_blocking
27
- attr_reader :must_match_at_least_one_word
25
+ attr_reader :default_first_blocking_decides
26
+ attr_reader :default_must_match_blocking
27
+ attr_reader :default_must_match_at_least_one_word
28
28
 
29
29
  # haystack - a bunch of records
30
30
  # options
@@ -35,9 +35,9 @@ class LooseTightDictionary
35
35
  # * read: how to interpret each entry in the 'haystack', either a Proc or a symbol
36
36
  def initialize(records, options = {})
37
37
  options = options.symbolize_keys
38
- @first_blocking_decides = options.fetch :first_blocking_decides, false
39
- @must_match_blocking = options.fetch :must_match_blocking, false
40
- @must_match_at_least_one_word = options.fetch :must_match_at_least_one_word, false
38
+ @default_first_blocking_decides = options[:first_blocking_decides]
39
+ @default_must_match_blocking = options[:must_match_blocking]
40
+ @default_must_match_at_least_one_word = options[:must_match_at_least_one_word]
41
41
  @blockings = options.fetch(:blockings, []).map { |regexp_or_str| Blocking.new regexp_or_str }
42
42
  @identities = options.fetch(:identities, []).map { |regexp_or_str| Identity.new regexp_or_str }
43
43
  @tighteners = options.fetch(:tighteners, []).map { |regexp_or_str| Tightener.new regexp_or_str }
@@ -61,6 +61,9 @@ class LooseTightDictionary
61
61
  options = options.symbolize_keys
62
62
  gather_last_result = options.fetch(:gather_last_result, false)
63
63
  is_find_all = options.fetch(:find_all, false)
64
+ first_blocking_decides = options.fetch(:first_blocking_decides, default_first_blocking_decides)
65
+ must_match_blocking = options.fetch(:must_match_blocking, default_must_match_blocking)
66
+ must_match_at_least_one_word = options.fetch(:must_match_at_least_one_word, default_must_match_at_least_one_word)
64
67
 
65
68
  if gather_last_result
66
69
  free_last_result
@@ -112,10 +115,18 @@ class LooseTightDictionary
112
115
  [ candidates.dup, [] ]
113
116
  end
114
117
 
115
- # special case: the needle didn't fit anywhere, but must_match_blocking is false, so we'll try it against everything
116
118
  if joint.none?
117
- joint = disjoint
118
- disjoint = []
119
+ if must_match_blocking
120
+ if is_find_all
121
+ return []
122
+ else
123
+ return nil
124
+ end
125
+ else
126
+ # special case: the needle didn't fit anywhere, but must_match_blocking is false, so we'll try it against everything
127
+ joint = disjoint
128
+ disjoint = []
129
+ end
119
130
  end
120
131
 
121
132
  if gather_last_result
@@ -163,8 +174,8 @@ class LooseTightDictionary
163
174
  #
164
175
  # d = LooseTightDictionary.new ['737', '747', '757' ]
165
176
  # d.explain 'boeing 737-100'
166
- def explain(needle)
167
- record = find needle, :gather_last_result => true
177
+ def explain(needle, options = {})
178
+ record = find needle, options.merge(:gather_last_result => true)
168
179
  log "#" * 150
169
180
  log "# Match #{needle.inspect} => #{record.inspect}"
170
181
  log "#" * 150
@@ -27,9 +27,7 @@ class LooseTightDictionary
27
27
  end
28
28
 
29
29
  def utf8?
30
- return @utf8_query[0] if @utf8_query.is_a?(::Array)
31
- @utf8_query = [ (defined?(::Encoding) ? str1.encoding.to_s : $KCODE).downcase.start_with?('u') ]
32
- @utf8_query[0]
30
+ (defined?(::Encoding) ? str1.encoding.to_s : $KCODE).downcase.start_with?('u')
33
31
  end
34
32
 
35
33
  if defined?(::Amatch)
@@ -118,5 +116,10 @@ class LooseTightDictionary
118
116
  end
119
117
 
120
118
  end
119
+
120
+ extend ::ActiveSupport::Memoizable
121
+ memoize :dices_coefficient
122
+ memoize :levenshtein
123
+ memoize :utf8?
121
124
  end
122
125
  end
@@ -1,3 +1,3 @@
1
1
  class LooseTightDictionary
2
- VERSION = '1.0.4'
2
+ VERSION = '1.0.5'
3
3
  end
@@ -9,8 +9,8 @@ Gem::Specification.new do |s|
9
9
  s.authors = ["Seamus Abshere"]
10
10
  s.email = ["seamus@abshere.net"]
11
11
  s.homepage = "https://github.com/seamusabshere/loose_tight_dictionary"
12
- s.summary = %Q{Allows iterative development of dictionaries for big data sets.}
13
- s.description = %Q{Create dictionaries that link rows between two tables using loose matching (string similarity) by default and tight matching (regexp) by request.}
12
+ s.summary = %Q{DEPRECATED: use fuzzy_match instead. Find a needle in a haystack using string similarity and (optionally) regexp rules.}
13
+ s.description = %Q{DEPRECATED: use fuzzy_match instead. Find a needle in a haystack using string similarity and (optionally) regexp rules.}
14
14
 
15
15
  s.rubyforge_project = "loose_tight_dictionary"
16
16
 
@@ -62,6 +62,10 @@ class TestLooseTightDictionary < Test::Unit::TestCase
62
62
  d = LooseTightDictionary.new [ 'X' ], :blockings => [ /X/, /Y/ ], :must_match_blocking => true
63
63
  assert_equal 'X', d.find('X')
64
64
  assert_equal nil, d.find('A')
65
+
66
+ d = LooseTightDictionary.new [ 'X' ], :blockings => [ /X/, /Y/ ]
67
+ assert_equal 'X', d.find('X', :must_match_blocking => true)
68
+ assert_equal nil, d.find('A', :must_match_blocking => true)
65
69
  end
66
70
 
67
71
  def test_011_free
@@ -153,6 +157,9 @@ class TestLooseTightDictionary < Test::Unit::TestCase
153
157
  end
154
158
 
155
159
  def test_020_stop_words
160
+ d = LooseTightDictionary.new [ 'A HOTEL', 'B HTL' ]
161
+ assert_equal 'B HTL', d.find('A HTL', :must_match_at_least_one_word => true)
162
+
156
163
  d = LooseTightDictionary.new [ 'A HOTEL', 'B HTL' ], :must_match_at_least_one_word => true
157
164
  assert_equal 'B HTL', d.find('A HTL')
158
165
 
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: loose_tight_dictionary
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.4
4
+ version: 1.0.5
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,11 +9,11 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2011-12-06 00:00:00.000000000Z
12
+ date: 2012-01-13 00:00:00.000000000Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: shoulda
16
- requirement: &2177777000 !ruby/object:Gem::Requirement
16
+ requirement: &2155673120 !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - ! '>='
@@ -21,10 +21,10 @@ dependencies:
21
21
  version: '0'
22
22
  type: :development
23
23
  prerelease: false
24
- version_requirements: *2177777000
24
+ version_requirements: *2155673120
25
25
  - !ruby/object:Gem::Dependency
26
26
  name: remote_table
27
- requirement: &2177776540 !ruby/object:Gem::Requirement
27
+ requirement: &2155672640 !ruby/object:Gem::Requirement
28
28
  none: false
29
29
  requirements:
30
30
  - - ! '>='
@@ -32,10 +32,10 @@ dependencies:
32
32
  version: '0'
33
33
  type: :development
34
34
  prerelease: false
35
- version_requirements: *2177776540
35
+ version_requirements: *2155672640
36
36
  - !ruby/object:Gem::Dependency
37
37
  name: activerecord
38
- requirement: &2177776000 !ruby/object:Gem::Requirement
38
+ requirement: &2155671900 !ruby/object:Gem::Requirement
39
39
  none: false
40
40
  requirements:
41
41
  - - ! '>='
@@ -43,10 +43,10 @@ dependencies:
43
43
  version: '3'
44
44
  type: :development
45
45
  prerelease: false
46
- version_requirements: *2177776000
46
+ version_requirements: *2155671900
47
47
  - !ruby/object:Gem::Dependency
48
48
  name: mysql
49
- requirement: &2177775560 !ruby/object:Gem::Requirement
49
+ requirement: &2155671300 !ruby/object:Gem::Requirement
50
50
  none: false
51
51
  requirements:
52
52
  - - ! '>='
@@ -54,10 +54,10 @@ dependencies:
54
54
  version: '0'
55
55
  type: :development
56
56
  prerelease: false
57
- version_requirements: *2177775560
57
+ version_requirements: *2155671300
58
58
  - !ruby/object:Gem::Dependency
59
59
  name: cohort_scope
60
- requirement: &2177775060 !ruby/object:Gem::Requirement
60
+ requirement: &2155670520 !ruby/object:Gem::Requirement
61
61
  none: false
62
62
  requirements:
63
63
  - - ! '>='
@@ -65,10 +65,10 @@ dependencies:
65
65
  version: '0'
66
66
  type: :development
67
67
  prerelease: false
68
- version_requirements: *2177775060
68
+ version_requirements: *2155670520
69
69
  - !ruby/object:Gem::Dependency
70
70
  name: weighted_average
71
- requirement: &2177774620 !ruby/object:Gem::Requirement
71
+ requirement: &2155670100 !ruby/object:Gem::Requirement
72
72
  none: false
73
73
  requirements:
74
74
  - - ! '>='
@@ -76,10 +76,10 @@ dependencies:
76
76
  version: '0'
77
77
  type: :development
78
78
  prerelease: false
79
- version_requirements: *2177774620
79
+ version_requirements: *2155670100
80
80
  - !ruby/object:Gem::Dependency
81
81
  name: rake
82
- requirement: &2177774160 !ruby/object:Gem::Requirement
82
+ requirement: &2155669660 !ruby/object:Gem::Requirement
83
83
  none: false
84
84
  requirements:
85
85
  - - ! '>='
@@ -87,10 +87,10 @@ dependencies:
87
87
  version: '0'
88
88
  type: :development
89
89
  prerelease: false
90
- version_requirements: *2177774160
90
+ version_requirements: *2155669660
91
91
  - !ruby/object:Gem::Dependency
92
92
  name: activesupport
93
- requirement: &2177773620 !ruby/object:Gem::Requirement
93
+ requirement: &2155668940 !ruby/object:Gem::Requirement
94
94
  none: false
95
95
  requirements:
96
96
  - - ! '>='
@@ -98,10 +98,10 @@ dependencies:
98
98
  version: '3'
99
99
  type: :runtime
100
100
  prerelease: false
101
- version_requirements: *2177773620
101
+ version_requirements: *2155668940
102
102
  - !ruby/object:Gem::Dependency
103
103
  name: to_regexp
104
- requirement: &2177773100 !ruby/object:Gem::Requirement
104
+ requirement: &2155668060 !ruby/object:Gem::Requirement
105
105
  none: false
106
106
  requirements:
107
107
  - - ! '>='
@@ -109,9 +109,9 @@ dependencies:
109
109
  version: 0.0.3
110
110
  type: :runtime
111
111
  prerelease: false
112
- version_requirements: *2177773100
113
- description: Create dictionaries that link rows between two tables using loose matching
114
- (string similarity) by default and tight matching (regexp) by request.
112
+ version_requirements: *2155668060
113
+ description: ! 'DEPRECATED: use fuzzy_match instead. Find a needle in a haystack using
114
+ string similarity and (optionally) regexp rules.'
115
115
  email:
116
116
  - seamus@abshere.net
117
117
  executables: []
@@ -185,7 +185,8 @@ rubyforge_project: loose_tight_dictionary
185
185
  rubygems_version: 1.8.10
186
186
  signing_key:
187
187
  specification_version: 3
188
- summary: Allows iterative development of dictionaries for big data sets.
188
+ summary: ! 'DEPRECATED: use fuzzy_match instead. Find a needle in a haystack using
189
+ string similarity and (optionally) regexp rules.'
189
190
  test_files:
190
191
  - test/helper.rb
191
192
  - test/test_blocking.rb