loose_tight_dictionary 1.0.3 → 1.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
|
@@ -71,6 +71,7 @@ class LooseTightDictionary
|
|
|
71
71
|
last_result.tighteners = tighteners
|
|
72
72
|
last_result.identities = identities
|
|
73
73
|
last_result.blockings = blockings
|
|
74
|
+
last_result.stop_words = stop_words
|
|
74
75
|
end
|
|
75
76
|
|
|
76
77
|
needle = Wrapper.new self, needle
|
|
@@ -89,7 +90,7 @@ class LooseTightDictionary
|
|
|
89
90
|
|
|
90
91
|
candidates = if must_match_at_least_one_word
|
|
91
92
|
haystack.select do |straw|
|
|
92
|
-
needle.words
|
|
93
|
+
(needle.words & straw.words).any?
|
|
93
94
|
end
|
|
94
95
|
else
|
|
95
96
|
haystack
|
|
@@ -172,9 +173,12 @@ class LooseTightDictionary
|
|
|
172
173
|
log "-" * 150
|
|
173
174
|
log last_result.needle.render
|
|
174
175
|
log
|
|
175
|
-
log "
|
|
176
|
+
log "Stop words"
|
|
177
|
+
log last_result.stop_words.blank? ? '(none)' : last_result.stop_words.map { |stop_word| stop_word.inspect }.join("\n")
|
|
178
|
+
log
|
|
179
|
+
log "Candidates"
|
|
176
180
|
log "-" * 150
|
|
177
|
-
log last_result.
|
|
181
|
+
log last_result.candidates.map { |record| record.render }.join("\n")
|
|
178
182
|
log
|
|
179
183
|
log "Tighteners"
|
|
180
184
|
log "-" * 150
|
|
@@ -223,16 +227,8 @@ class LooseTightDictionary
|
|
|
223
227
|
|
|
224
228
|
def free
|
|
225
229
|
free_last_result
|
|
226
|
-
@options.try :clear
|
|
227
|
-
@options = nil
|
|
228
230
|
@haystack.try :clear
|
|
229
231
|
@haystack = nil
|
|
230
|
-
@tighteners.try :clear
|
|
231
|
-
@tighteners = nil
|
|
232
|
-
@identities.try :clear
|
|
233
|
-
@identities = nil
|
|
234
|
-
@blockings.try :clear
|
|
235
|
-
@blockings = nil
|
|
236
232
|
ensure
|
|
237
233
|
@freed = true
|
|
238
234
|
end
|
|
@@ -240,7 +236,6 @@ class LooseTightDictionary
|
|
|
240
236
|
private
|
|
241
237
|
|
|
242
238
|
def free_last_result
|
|
243
|
-
@last_result
|
|
244
|
-
@last_result = nil
|
|
239
|
+
@last_result = nil
|
|
245
240
|
end
|
|
246
241
|
end
|
|
@@ -4,6 +4,7 @@ class LooseTightDictionary
|
|
|
4
4
|
attr_accessor :tighteners
|
|
5
5
|
attr_accessor :blockings
|
|
6
6
|
attr_accessor :identities
|
|
7
|
+
attr_accessor :stop_words
|
|
7
8
|
attr_accessor :candidates
|
|
8
9
|
attr_accessor :joint
|
|
9
10
|
attr_accessor :disjoint
|
|
@@ -12,13 +13,5 @@ class LooseTightDictionary
|
|
|
12
13
|
attr_accessor :similarities
|
|
13
14
|
attr_accessor :record
|
|
14
15
|
attr_accessor :score
|
|
15
|
-
|
|
16
|
-
def haystack
|
|
17
|
-
joint + disjoint
|
|
18
|
-
end
|
|
19
|
-
|
|
20
|
-
def free
|
|
21
|
-
# nothing to see here
|
|
22
|
-
end
|
|
23
16
|
end
|
|
24
17
|
end
|
|
@@ -2,15 +2,6 @@
|
|
|
2
2
|
require 'helper'
|
|
3
3
|
|
|
4
4
|
class TestLooseTightDictionary < Test::Unit::TestCase
|
|
5
|
-
# in case i start doing something with the log
|
|
6
|
-
# def setup
|
|
7
|
-
# @log = StringIO.new
|
|
8
|
-
# end
|
|
9
|
-
#
|
|
10
|
-
# def teardown
|
|
11
|
-
# @log.close
|
|
12
|
-
# end
|
|
13
|
-
|
|
14
5
|
def test_001_find
|
|
15
6
|
d = LooseTightDictionary.new %w{ RATZ CATZ }
|
|
16
7
|
assert_equal 'RATZ', d.find('RITZ')
|
|
@@ -168,4 +159,25 @@ class TestLooseTightDictionary < Test::Unit::TestCase
|
|
|
168
159
|
d = LooseTightDictionary.new [ 'A HOTEL', 'B HTL' ], :must_match_at_least_one_word => true, :stop_words => [ %r{HO?TE?L} ]
|
|
169
160
|
assert_equal 'A HOTEL', d.find('A HTL')
|
|
170
161
|
end
|
|
162
|
+
|
|
163
|
+
def test_021_explain
|
|
164
|
+
require 'stringio'
|
|
165
|
+
capture = StringIO.new
|
|
166
|
+
begin
|
|
167
|
+
old_stderr = $stderr
|
|
168
|
+
$stderr = capture
|
|
169
|
+
d = LooseTightDictionary.new %w{ RATZ CATZ }
|
|
170
|
+
d.explain('RITZ')
|
|
171
|
+
ensure
|
|
172
|
+
$stderr = old_stderr
|
|
173
|
+
end
|
|
174
|
+
capture.rewind
|
|
175
|
+
assert capture.read.include?('CATZ')
|
|
176
|
+
capture.close
|
|
177
|
+
end
|
|
178
|
+
|
|
179
|
+
def test_022_compare_words_with_words
|
|
180
|
+
d = LooseTightDictionary.new [ 'PENINSULA HOTELS' ], :must_match_at_least_one_word => true
|
|
181
|
+
assert_equal nil, d.find('DOLCE LA HULPE BXL FI')
|
|
182
|
+
end
|
|
171
183
|
end
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: loose_tight_dictionary
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 1.0.
|
|
4
|
+
version: 1.0.4
|
|
5
5
|
prerelease:
|
|
6
6
|
platform: ruby
|
|
7
7
|
authors:
|
|
@@ -13,7 +13,7 @@ date: 2011-12-06 00:00:00.000000000Z
|
|
|
13
13
|
dependencies:
|
|
14
14
|
- !ruby/object:Gem::Dependency
|
|
15
15
|
name: shoulda
|
|
16
|
-
requirement: &
|
|
16
|
+
requirement: &2177777000 !ruby/object:Gem::Requirement
|
|
17
17
|
none: false
|
|
18
18
|
requirements:
|
|
19
19
|
- - ! '>='
|
|
@@ -21,10 +21,10 @@ dependencies:
|
|
|
21
21
|
version: '0'
|
|
22
22
|
type: :development
|
|
23
23
|
prerelease: false
|
|
24
|
-
version_requirements: *
|
|
24
|
+
version_requirements: *2177777000
|
|
25
25
|
- !ruby/object:Gem::Dependency
|
|
26
26
|
name: remote_table
|
|
27
|
-
requirement: &
|
|
27
|
+
requirement: &2177776540 !ruby/object:Gem::Requirement
|
|
28
28
|
none: false
|
|
29
29
|
requirements:
|
|
30
30
|
- - ! '>='
|
|
@@ -32,10 +32,10 @@ dependencies:
|
|
|
32
32
|
version: '0'
|
|
33
33
|
type: :development
|
|
34
34
|
prerelease: false
|
|
35
|
-
version_requirements: *
|
|
35
|
+
version_requirements: *2177776540
|
|
36
36
|
- !ruby/object:Gem::Dependency
|
|
37
37
|
name: activerecord
|
|
38
|
-
requirement: &
|
|
38
|
+
requirement: &2177776000 !ruby/object:Gem::Requirement
|
|
39
39
|
none: false
|
|
40
40
|
requirements:
|
|
41
41
|
- - ! '>='
|
|
@@ -43,10 +43,10 @@ dependencies:
|
|
|
43
43
|
version: '3'
|
|
44
44
|
type: :development
|
|
45
45
|
prerelease: false
|
|
46
|
-
version_requirements: *
|
|
46
|
+
version_requirements: *2177776000
|
|
47
47
|
- !ruby/object:Gem::Dependency
|
|
48
48
|
name: mysql
|
|
49
|
-
requirement: &
|
|
49
|
+
requirement: &2177775560 !ruby/object:Gem::Requirement
|
|
50
50
|
none: false
|
|
51
51
|
requirements:
|
|
52
52
|
- - ! '>='
|
|
@@ -54,10 +54,10 @@ dependencies:
|
|
|
54
54
|
version: '0'
|
|
55
55
|
type: :development
|
|
56
56
|
prerelease: false
|
|
57
|
-
version_requirements: *
|
|
57
|
+
version_requirements: *2177775560
|
|
58
58
|
- !ruby/object:Gem::Dependency
|
|
59
59
|
name: cohort_scope
|
|
60
|
-
requirement: &
|
|
60
|
+
requirement: &2177775060 !ruby/object:Gem::Requirement
|
|
61
61
|
none: false
|
|
62
62
|
requirements:
|
|
63
63
|
- - ! '>='
|
|
@@ -65,10 +65,10 @@ dependencies:
|
|
|
65
65
|
version: '0'
|
|
66
66
|
type: :development
|
|
67
67
|
prerelease: false
|
|
68
|
-
version_requirements: *
|
|
68
|
+
version_requirements: *2177775060
|
|
69
69
|
- !ruby/object:Gem::Dependency
|
|
70
70
|
name: weighted_average
|
|
71
|
-
requirement: &
|
|
71
|
+
requirement: &2177774620 !ruby/object:Gem::Requirement
|
|
72
72
|
none: false
|
|
73
73
|
requirements:
|
|
74
74
|
- - ! '>='
|
|
@@ -76,10 +76,10 @@ dependencies:
|
|
|
76
76
|
version: '0'
|
|
77
77
|
type: :development
|
|
78
78
|
prerelease: false
|
|
79
|
-
version_requirements: *
|
|
79
|
+
version_requirements: *2177774620
|
|
80
80
|
- !ruby/object:Gem::Dependency
|
|
81
81
|
name: rake
|
|
82
|
-
requirement: &
|
|
82
|
+
requirement: &2177774160 !ruby/object:Gem::Requirement
|
|
83
83
|
none: false
|
|
84
84
|
requirements:
|
|
85
85
|
- - ! '>='
|
|
@@ -87,10 +87,10 @@ dependencies:
|
|
|
87
87
|
version: '0'
|
|
88
88
|
type: :development
|
|
89
89
|
prerelease: false
|
|
90
|
-
version_requirements: *
|
|
90
|
+
version_requirements: *2177774160
|
|
91
91
|
- !ruby/object:Gem::Dependency
|
|
92
92
|
name: activesupport
|
|
93
|
-
requirement: &
|
|
93
|
+
requirement: &2177773620 !ruby/object:Gem::Requirement
|
|
94
94
|
none: false
|
|
95
95
|
requirements:
|
|
96
96
|
- - ! '>='
|
|
@@ -98,10 +98,10 @@ dependencies:
|
|
|
98
98
|
version: '3'
|
|
99
99
|
type: :runtime
|
|
100
100
|
prerelease: false
|
|
101
|
-
version_requirements: *
|
|
101
|
+
version_requirements: *2177773620
|
|
102
102
|
- !ruby/object:Gem::Dependency
|
|
103
103
|
name: to_regexp
|
|
104
|
-
requirement: &
|
|
104
|
+
requirement: &2177773100 !ruby/object:Gem::Requirement
|
|
105
105
|
none: false
|
|
106
106
|
requirements:
|
|
107
107
|
- - ! '>='
|
|
@@ -109,7 +109,7 @@ dependencies:
|
|
|
109
109
|
version: 0.0.3
|
|
110
110
|
type: :runtime
|
|
111
111
|
prerelease: false
|
|
112
|
-
version_requirements: *
|
|
112
|
+
version_requirements: *2177773100
|
|
113
113
|
description: Create dictionaries that link rows between two tables using loose matching
|
|
114
114
|
(string similarity) by default and tight matching (regexp) by request.
|
|
115
115
|
email:
|