loose_tight_dictionary 1.0.4 → 1.0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README.rdoc
CHANGED
|
@@ -1,5 +1,9 @@
|
|
|
1
1
|
= loose_tight_dictionary
|
|
2
2
|
|
|
3
|
+
DEPRECATED: use [fuzzy_match](https://github.com/seamusabshere/fuzzy_match) instead. All further development will happen there.
|
|
4
|
+
|
|
5
|
+
FuzzyMatch 1.0.5 is identical to LooseTightDictionary 1.0.5 (except for the name).
|
|
6
|
+
|
|
3
7
|
Find a needle in a haystack based on string similarity (using the Pair Distance algorithm and Levenshtein distance) and regular expressions.
|
|
4
8
|
|
|
5
9
|
== Quickstart
|
|
@@ -22,9 +22,9 @@ class LooseTightDictionary
|
|
|
22
22
|
attr_reader :identities
|
|
23
23
|
attr_reader :tighteners
|
|
24
24
|
attr_reader :stop_words
|
|
25
|
-
attr_reader :
|
|
26
|
-
attr_reader :
|
|
27
|
-
attr_reader :
|
|
25
|
+
attr_reader :default_first_blocking_decides
|
|
26
|
+
attr_reader :default_must_match_blocking
|
|
27
|
+
attr_reader :default_must_match_at_least_one_word
|
|
28
28
|
|
|
29
29
|
# haystack - a bunch of records
|
|
30
30
|
# options
|
|
@@ -35,9 +35,9 @@ class LooseTightDictionary
|
|
|
35
35
|
# * read: how to interpret each entry in the 'haystack', either a Proc or a symbol
|
|
36
36
|
def initialize(records, options = {})
|
|
37
37
|
options = options.symbolize_keys
|
|
38
|
-
@
|
|
39
|
-
@
|
|
40
|
-
@
|
|
38
|
+
@default_first_blocking_decides = options[:first_blocking_decides]
|
|
39
|
+
@default_must_match_blocking = options[:must_match_blocking]
|
|
40
|
+
@default_must_match_at_least_one_word = options[:must_match_at_least_one_word]
|
|
41
41
|
@blockings = options.fetch(:blockings, []).map { |regexp_or_str| Blocking.new regexp_or_str }
|
|
42
42
|
@identities = options.fetch(:identities, []).map { |regexp_or_str| Identity.new regexp_or_str }
|
|
43
43
|
@tighteners = options.fetch(:tighteners, []).map { |regexp_or_str| Tightener.new regexp_or_str }
|
|
@@ -61,6 +61,9 @@ class LooseTightDictionary
|
|
|
61
61
|
options = options.symbolize_keys
|
|
62
62
|
gather_last_result = options.fetch(:gather_last_result, false)
|
|
63
63
|
is_find_all = options.fetch(:find_all, false)
|
|
64
|
+
first_blocking_decides = options.fetch(:first_blocking_decides, default_first_blocking_decides)
|
|
65
|
+
must_match_blocking = options.fetch(:must_match_blocking, default_must_match_blocking)
|
|
66
|
+
must_match_at_least_one_word = options.fetch(:must_match_at_least_one_word, default_must_match_at_least_one_word)
|
|
64
67
|
|
|
65
68
|
if gather_last_result
|
|
66
69
|
free_last_result
|
|
@@ -112,10 +115,18 @@ class LooseTightDictionary
|
|
|
112
115
|
[ candidates.dup, [] ]
|
|
113
116
|
end
|
|
114
117
|
|
|
115
|
-
# special case: the needle didn't fit anywhere, but must_match_blocking is false, so we'll try it against everything
|
|
116
118
|
if joint.none?
|
|
117
|
-
|
|
118
|
-
|
|
119
|
+
if must_match_blocking
|
|
120
|
+
if is_find_all
|
|
121
|
+
return []
|
|
122
|
+
else
|
|
123
|
+
return nil
|
|
124
|
+
end
|
|
125
|
+
else
|
|
126
|
+
# special case: the needle didn't fit anywhere, but must_match_blocking is false, so we'll try it against everything
|
|
127
|
+
joint = disjoint
|
|
128
|
+
disjoint = []
|
|
129
|
+
end
|
|
119
130
|
end
|
|
120
131
|
|
|
121
132
|
if gather_last_result
|
|
@@ -163,8 +174,8 @@ class LooseTightDictionary
|
|
|
163
174
|
#
|
|
164
175
|
# d = LooseTightDictionary.new ['737', '747', '757' ]
|
|
165
176
|
# d.explain 'boeing 737-100'
|
|
166
|
-
def explain(needle)
|
|
167
|
-
record = find needle, :gather_last_result => true
|
|
177
|
+
def explain(needle, options = {})
|
|
178
|
+
record = find needle, options.merge(:gather_last_result => true)
|
|
168
179
|
log "#" * 150
|
|
169
180
|
log "# Match #{needle.inspect} => #{record.inspect}"
|
|
170
181
|
log "#" * 150
|
|
@@ -27,9 +27,7 @@ class LooseTightDictionary
|
|
|
27
27
|
end
|
|
28
28
|
|
|
29
29
|
def utf8?
|
|
30
|
-
|
|
31
|
-
@utf8_query = [ (defined?(::Encoding) ? str1.encoding.to_s : $KCODE).downcase.start_with?('u') ]
|
|
32
|
-
@utf8_query[0]
|
|
30
|
+
(defined?(::Encoding) ? str1.encoding.to_s : $KCODE).downcase.start_with?('u')
|
|
33
31
|
end
|
|
34
32
|
|
|
35
33
|
if defined?(::Amatch)
|
|
@@ -118,5 +116,10 @@ class LooseTightDictionary
|
|
|
118
116
|
end
|
|
119
117
|
|
|
120
118
|
end
|
|
119
|
+
|
|
120
|
+
extend ::ActiveSupport::Memoizable
|
|
121
|
+
memoize :dices_coefficient
|
|
122
|
+
memoize :levenshtein
|
|
123
|
+
memoize :utf8?
|
|
121
124
|
end
|
|
122
125
|
end
|
|
@@ -9,8 +9,8 @@ Gem::Specification.new do |s|
|
|
|
9
9
|
s.authors = ["Seamus Abshere"]
|
|
10
10
|
s.email = ["seamus@abshere.net"]
|
|
11
11
|
s.homepage = "https://github.com/seamusabshere/loose_tight_dictionary"
|
|
12
|
-
s.summary = %Q{
|
|
13
|
-
s.description = %Q{
|
|
12
|
+
s.summary = %Q{DEPRECATED: use fuzzy_match instead. Find a needle in a haystack using string similarity and (optionally) regexp rules.}
|
|
13
|
+
s.description = %Q{DEPRECATED: use fuzzy_match instead. Find a needle in a haystack using string similarity and (optionally) regexp rules.}
|
|
14
14
|
|
|
15
15
|
s.rubyforge_project = "loose_tight_dictionary"
|
|
16
16
|
|
|
@@ -62,6 +62,10 @@ class TestLooseTightDictionary < Test::Unit::TestCase
|
|
|
62
62
|
d = LooseTightDictionary.new [ 'X' ], :blockings => [ /X/, /Y/ ], :must_match_blocking => true
|
|
63
63
|
assert_equal 'X', d.find('X')
|
|
64
64
|
assert_equal nil, d.find('A')
|
|
65
|
+
|
|
66
|
+
d = LooseTightDictionary.new [ 'X' ], :blockings => [ /X/, /Y/ ]
|
|
67
|
+
assert_equal 'X', d.find('X', :must_match_blocking => true)
|
|
68
|
+
assert_equal nil, d.find('A', :must_match_blocking => true)
|
|
65
69
|
end
|
|
66
70
|
|
|
67
71
|
def test_011_free
|
|
@@ -153,6 +157,9 @@ class TestLooseTightDictionary < Test::Unit::TestCase
|
|
|
153
157
|
end
|
|
154
158
|
|
|
155
159
|
def test_020_stop_words
|
|
160
|
+
d = LooseTightDictionary.new [ 'A HOTEL', 'B HTL' ]
|
|
161
|
+
assert_equal 'B HTL', d.find('A HTL', :must_match_at_least_one_word => true)
|
|
162
|
+
|
|
156
163
|
d = LooseTightDictionary.new [ 'A HOTEL', 'B HTL' ], :must_match_at_least_one_word => true
|
|
157
164
|
assert_equal 'B HTL', d.find('A HTL')
|
|
158
165
|
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: loose_tight_dictionary
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 1.0.
|
|
4
|
+
version: 1.0.5
|
|
5
5
|
prerelease:
|
|
6
6
|
platform: ruby
|
|
7
7
|
authors:
|
|
@@ -9,11 +9,11 @@ authors:
|
|
|
9
9
|
autorequire:
|
|
10
10
|
bindir: bin
|
|
11
11
|
cert_chain: []
|
|
12
|
-
date:
|
|
12
|
+
date: 2012-01-13 00:00:00.000000000Z
|
|
13
13
|
dependencies:
|
|
14
14
|
- !ruby/object:Gem::Dependency
|
|
15
15
|
name: shoulda
|
|
16
|
-
requirement: &
|
|
16
|
+
requirement: &2155673120 !ruby/object:Gem::Requirement
|
|
17
17
|
none: false
|
|
18
18
|
requirements:
|
|
19
19
|
- - ! '>='
|
|
@@ -21,10 +21,10 @@ dependencies:
|
|
|
21
21
|
version: '0'
|
|
22
22
|
type: :development
|
|
23
23
|
prerelease: false
|
|
24
|
-
version_requirements: *
|
|
24
|
+
version_requirements: *2155673120
|
|
25
25
|
- !ruby/object:Gem::Dependency
|
|
26
26
|
name: remote_table
|
|
27
|
-
requirement: &
|
|
27
|
+
requirement: &2155672640 !ruby/object:Gem::Requirement
|
|
28
28
|
none: false
|
|
29
29
|
requirements:
|
|
30
30
|
- - ! '>='
|
|
@@ -32,10 +32,10 @@ dependencies:
|
|
|
32
32
|
version: '0'
|
|
33
33
|
type: :development
|
|
34
34
|
prerelease: false
|
|
35
|
-
version_requirements: *
|
|
35
|
+
version_requirements: *2155672640
|
|
36
36
|
- !ruby/object:Gem::Dependency
|
|
37
37
|
name: activerecord
|
|
38
|
-
requirement: &
|
|
38
|
+
requirement: &2155671900 !ruby/object:Gem::Requirement
|
|
39
39
|
none: false
|
|
40
40
|
requirements:
|
|
41
41
|
- - ! '>='
|
|
@@ -43,10 +43,10 @@ dependencies:
|
|
|
43
43
|
version: '3'
|
|
44
44
|
type: :development
|
|
45
45
|
prerelease: false
|
|
46
|
-
version_requirements: *
|
|
46
|
+
version_requirements: *2155671900
|
|
47
47
|
- !ruby/object:Gem::Dependency
|
|
48
48
|
name: mysql
|
|
49
|
-
requirement: &
|
|
49
|
+
requirement: &2155671300 !ruby/object:Gem::Requirement
|
|
50
50
|
none: false
|
|
51
51
|
requirements:
|
|
52
52
|
- - ! '>='
|
|
@@ -54,10 +54,10 @@ dependencies:
|
|
|
54
54
|
version: '0'
|
|
55
55
|
type: :development
|
|
56
56
|
prerelease: false
|
|
57
|
-
version_requirements: *
|
|
57
|
+
version_requirements: *2155671300
|
|
58
58
|
- !ruby/object:Gem::Dependency
|
|
59
59
|
name: cohort_scope
|
|
60
|
-
requirement: &
|
|
60
|
+
requirement: &2155670520 !ruby/object:Gem::Requirement
|
|
61
61
|
none: false
|
|
62
62
|
requirements:
|
|
63
63
|
- - ! '>='
|
|
@@ -65,10 +65,10 @@ dependencies:
|
|
|
65
65
|
version: '0'
|
|
66
66
|
type: :development
|
|
67
67
|
prerelease: false
|
|
68
|
-
version_requirements: *
|
|
68
|
+
version_requirements: *2155670520
|
|
69
69
|
- !ruby/object:Gem::Dependency
|
|
70
70
|
name: weighted_average
|
|
71
|
-
requirement: &
|
|
71
|
+
requirement: &2155670100 !ruby/object:Gem::Requirement
|
|
72
72
|
none: false
|
|
73
73
|
requirements:
|
|
74
74
|
- - ! '>='
|
|
@@ -76,10 +76,10 @@ dependencies:
|
|
|
76
76
|
version: '0'
|
|
77
77
|
type: :development
|
|
78
78
|
prerelease: false
|
|
79
|
-
version_requirements: *
|
|
79
|
+
version_requirements: *2155670100
|
|
80
80
|
- !ruby/object:Gem::Dependency
|
|
81
81
|
name: rake
|
|
82
|
-
requirement: &
|
|
82
|
+
requirement: &2155669660 !ruby/object:Gem::Requirement
|
|
83
83
|
none: false
|
|
84
84
|
requirements:
|
|
85
85
|
- - ! '>='
|
|
@@ -87,10 +87,10 @@ dependencies:
|
|
|
87
87
|
version: '0'
|
|
88
88
|
type: :development
|
|
89
89
|
prerelease: false
|
|
90
|
-
version_requirements: *
|
|
90
|
+
version_requirements: *2155669660
|
|
91
91
|
- !ruby/object:Gem::Dependency
|
|
92
92
|
name: activesupport
|
|
93
|
-
requirement: &
|
|
93
|
+
requirement: &2155668940 !ruby/object:Gem::Requirement
|
|
94
94
|
none: false
|
|
95
95
|
requirements:
|
|
96
96
|
- - ! '>='
|
|
@@ -98,10 +98,10 @@ dependencies:
|
|
|
98
98
|
version: '3'
|
|
99
99
|
type: :runtime
|
|
100
100
|
prerelease: false
|
|
101
|
-
version_requirements: *
|
|
101
|
+
version_requirements: *2155668940
|
|
102
102
|
- !ruby/object:Gem::Dependency
|
|
103
103
|
name: to_regexp
|
|
104
|
-
requirement: &
|
|
104
|
+
requirement: &2155668060 !ruby/object:Gem::Requirement
|
|
105
105
|
none: false
|
|
106
106
|
requirements:
|
|
107
107
|
- - ! '>='
|
|
@@ -109,9 +109,9 @@ dependencies:
|
|
|
109
109
|
version: 0.0.3
|
|
110
110
|
type: :runtime
|
|
111
111
|
prerelease: false
|
|
112
|
-
version_requirements: *
|
|
113
|
-
description:
|
|
114
|
-
|
|
112
|
+
version_requirements: *2155668060
|
|
113
|
+
description: ! 'DEPRECATED: use fuzzy_match instead. Find a needle in a haystack using
|
|
114
|
+
string similarity and (optionally) regexp rules.'
|
|
115
115
|
email:
|
|
116
116
|
- seamus@abshere.net
|
|
117
117
|
executables: []
|
|
@@ -185,7 +185,8 @@ rubyforge_project: loose_tight_dictionary
|
|
|
185
185
|
rubygems_version: 1.8.10
|
|
186
186
|
signing_key:
|
|
187
187
|
specification_version: 3
|
|
188
|
-
summary:
|
|
188
|
+
summary: ! 'DEPRECATED: use fuzzy_match instead. Find a needle in a haystack using
|
|
189
|
+
string similarity and (optionally) regexp rules.'
|
|
189
190
|
test_files:
|
|
190
191
|
- test/helper.rb
|
|
191
192
|
- test/test_blocking.rb
|