loose_tight_dictionary 0.0.1 → 0.0.2
Sign up to get free protection for your applications and to get access to all the features.
- data/VERSION +1 -1
- data/examples/icao-bts.rb +3 -3
- data/lib/loose_tight_dictionary.rb +51 -29
- data/test/test_loose_tight_dictionary.rb +16 -5
- metadata +2 -2
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.0.
|
1
|
+
0.0.2
|
data/examples/icao-bts.rb
CHANGED
@@ -28,7 +28,7 @@ $tee = STDOUT
|
|
28
28
|
|
29
29
|
@tightenings = RemoteTable.new :url => 'http://spreadsheets.google.com/pub?key=tiS_6CCDDM_drNphpYwE_iw&single=true&gid=0&output=csv', :headers => false
|
30
30
|
|
31
|
-
@
|
31
|
+
@identities = RemoteTable.new :url => 'http://spreadsheets.google.com/pub?key=tiS_6CCDDM_drNphpYwE_iw&single=true&gid=3&output=csv', :headers => false
|
32
32
|
|
33
33
|
@blockings = RemoteTable.new :url => 'http://spreadsheets.google.com/pub?key=tiS_6CCDDM_drNphpYwE_iw&single=true&gid=4&output=csv', :headers => false
|
34
34
|
|
@@ -36,7 +36,7 @@ $tee = STDOUT
|
|
36
36
|
|
37
37
|
@negatives = RemoteTable.new :url => 'http://spreadsheets.google.com/pub?key=tiS_6CCDDM_drNphpYwE_iw&single=true&gid=2&output=csv', :headers => false
|
38
38
|
|
39
|
-
%w{ tightenings
|
39
|
+
%w{ tightenings identities blockings }.each do |name|
|
40
40
|
$logger.info name
|
41
41
|
$logger.info "\n" + instance_variable_get("@#{name}").to_a.map { |record| record[0] }.join("\n")
|
42
42
|
$logger.info "\n"
|
@@ -49,7 +49,7 @@ end
|
|
49
49
|
:row_xpath => '//table/tr[2]/td/table/tr',
|
50
50
|
:column_xpath => 'td'
|
51
51
|
|
52
|
-
d = LooseTightDictionary.new @right, :tightenings => @tightenings, :
|
52
|
+
d = LooseTightDictionary.new @right, :tightenings => @tightenings, :identities => @identities, :blockings => @blockings, :logger => $logger, :tee => $tee
|
53
53
|
d.left_reader = lambda { |record| record['Manufacturer'] + ' ' + record['Model'] }
|
54
54
|
d.right_reader = lambda { |record| record['Manufacturer'] + ' ' + record['Long Name'] }
|
55
55
|
d.positives = @positives
|
@@ -40,7 +40,7 @@ class LooseTightDictionary
|
|
40
40
|
|
41
41
|
attr_reader :right_records
|
42
42
|
attr_reader :tightenings
|
43
|
-
attr_reader :
|
43
|
+
attr_reader :identities
|
44
44
|
attr_reader :blockings
|
45
45
|
attr_reader :logger
|
46
46
|
attr_reader :tee
|
@@ -54,7 +54,7 @@ class LooseTightDictionary
|
|
54
54
|
def initialize(right_records, options = {})
|
55
55
|
@right_records = right_records
|
56
56
|
@tightenings = options[:tightenings] || Array.new
|
57
|
-
@
|
57
|
+
@identities = options[:identities] || Array.new
|
58
58
|
@blockings = options[:blockings] || Array.new
|
59
59
|
@left_reader = options[:left_reader]
|
60
60
|
@right_reader = options[:right_reader]
|
@@ -100,29 +100,33 @@ class LooseTightDictionary
|
|
100
100
|
return
|
101
101
|
end
|
102
102
|
left_records.each do |left_record|
|
103
|
-
|
104
|
-
|
105
|
-
|
103
|
+
begin
|
104
|
+
right_record = left_to_right left_record
|
105
|
+
ensure
|
106
|
+
tee.andand.puts [ read_left(left_record), read_right(right_record), $ltd_1 ].flatten.to_csv
|
107
|
+
end
|
106
108
|
end
|
107
109
|
end
|
108
110
|
|
109
111
|
def left_to_right(left_record)
|
110
112
|
left = read_left left_record
|
111
|
-
|
113
|
+
i_options_left = i_options left
|
112
114
|
blocking_left = blocking left
|
113
115
|
t_options_left = t_options left
|
114
116
|
history = Hash.new
|
115
117
|
right_record = right_records.select { |record| blocking_left.nil? or blocking_left.match(read_right(record)) }.max do |a_record, b_record|
|
116
118
|
a = read_right a_record
|
117
119
|
b = read_right b_record
|
118
|
-
|
119
|
-
|
120
|
-
|
120
|
+
i_options_a = i_options a
|
121
|
+
i_options_b = i_options b
|
122
|
+
collision_a = collision? i_options_left, i_options_a
|
123
|
+
collision_b = collision? i_options_left, i_options_b
|
124
|
+
if collision_a and collision_b
|
121
125
|
# neither would ever work, so randomly rank one over the other
|
122
126
|
rand(2) == 1 ? -1 : 1
|
123
|
-
elsif
|
127
|
+
elsif collision_a
|
124
128
|
-1
|
125
|
-
elsif
|
129
|
+
elsif collision_b
|
126
130
|
1
|
127
131
|
else
|
128
132
|
t_left_a, t_right_a = optimize t_options_left, t_options(a)
|
@@ -157,11 +161,11 @@ class LooseTightDictionary
|
|
157
161
|
end
|
158
162
|
$ltd_1 = history[right_record]
|
159
163
|
right = read_right right_record
|
160
|
-
|
164
|
+
i_options_right = i_options right
|
161
165
|
z = 1
|
162
166
|
debugger if $ltd_left.andand.match(left) or $ltd_right.andand.match(right)
|
163
167
|
z = 1
|
164
|
-
return if
|
168
|
+
return if collision? i_options_left, i_options_right
|
165
169
|
inline_check left_record, right_record
|
166
170
|
right_record
|
167
171
|
end
|
@@ -205,14 +209,45 @@ class LooseTightDictionary
|
|
205
209
|
@_t_options ||= Hash.new
|
206
210
|
ary = Array.new
|
207
211
|
ary << T.new(str, str)
|
208
|
-
tightenings.each do |
|
209
|
-
if literal_regexp(
|
210
|
-
ary
|
212
|
+
tightenings.each do |i|
|
213
|
+
if match_data = literal_regexp(i[0]).match(str)
|
214
|
+
ary.push T.new(str, match_data.captures.compact.join)
|
211
215
|
end
|
212
216
|
end
|
213
217
|
@_t_options[str] = ary
|
214
218
|
end
|
215
219
|
|
220
|
+
class I
|
221
|
+
attr_reader :regexp, :str, :case_sensitive, :identity
|
222
|
+
def initialize(regexp, str, case_sensitive)
|
223
|
+
@regexp = regexp
|
224
|
+
@str = str
|
225
|
+
@identity = regexp.match(str).captures.compact.join
|
226
|
+
@identity = @identity.downcase if case_sensitive
|
227
|
+
end
|
228
|
+
end
|
229
|
+
|
230
|
+
def collision?(i_options_left, i_options_right)
|
231
|
+
i_options_left.any? do |r_left|
|
232
|
+
i_options_right.any? do |r_right|
|
233
|
+
r_left.regexp == r_right.regexp and r_left.identity != r_right.identity
|
234
|
+
end
|
235
|
+
end
|
236
|
+
end
|
237
|
+
|
238
|
+
def i_options(str)
|
239
|
+
return @_i_options[str] if @_i_options.andand.has_key?(str)
|
240
|
+
@_i_options ||= Hash.new
|
241
|
+
ary = Array.new
|
242
|
+
identities.each do |i|
|
243
|
+
regexp = literal_regexp i[0]
|
244
|
+
if regexp.match str
|
245
|
+
ary.push I.new(regexp, str, case_sensitive)
|
246
|
+
end
|
247
|
+
end
|
248
|
+
@_i_options[str] = ary
|
249
|
+
end
|
250
|
+
|
216
251
|
def blocking(str)
|
217
252
|
return @_blocking[str] if @_blocking.andand.has_key?(str)
|
218
253
|
@_blocking ||= Hash.new
|
@@ -225,19 +260,6 @@ class LooseTightDictionary
|
|
225
260
|
@_blocking[str] = nil
|
226
261
|
end
|
227
262
|
|
228
|
-
def restrict(str)
|
229
|
-
return @_restrict[str] if @_restrict.andand.has_key?(str)
|
230
|
-
@_restrict ||= Hash.new
|
231
|
-
restrictions.each do |restriction|
|
232
|
-
if literal_regexp(restriction[0]).match str
|
233
|
-
retval = $~.captures.compact.join
|
234
|
-
retval = retval.downcase unless case_sensitive
|
235
|
-
return @_restrict[str] = retval
|
236
|
-
end
|
237
|
-
end
|
238
|
-
@_restrict[str] = nil
|
239
|
-
end
|
240
|
-
|
241
263
|
def literal_regexp(str)
|
242
264
|
return @_literal_regexp[str] if @_literal_regexp.andand.has_key? str
|
243
265
|
@_literal_regexp ||= Hash.new
|
@@ -24,7 +24,7 @@ class TestLooseTightDictionary < Test::Unit::TestCase
|
|
24
24
|
|
25
25
|
@t_1 = [ '/(dh)c?-?(\d{0,2})-?(\d{0,4})(?:.*?)(dash|\z)/i', 'good tightening for de havilland' ]
|
26
26
|
|
27
|
-
@
|
27
|
+
@r_1 = [ '/(dh)c?-?(\d{0,2})-?(\d{0,4})(?:.*?)(dash|\z)/i', 'good identity for de havilland' ]
|
28
28
|
|
29
29
|
@left = [
|
30
30
|
@a_left,
|
@@ -43,7 +43,7 @@ class TestLooseTightDictionary < Test::Unit::TestCase
|
|
43
43
|
['DEHAVILLAND DEHAVILLAND TWIN OTTER DHC-6']
|
44
44
|
]
|
45
45
|
@tightenings = []
|
46
|
-
@
|
46
|
+
@identities = []
|
47
47
|
@blockings = []
|
48
48
|
@positives = []
|
49
49
|
@negatives = []
|
@@ -56,7 +56,7 @@ class TestLooseTightDictionary < Test::Unit::TestCase
|
|
56
56
|
def ltd
|
57
57
|
@_ltd ||= LooseTightDictionary.new @right,
|
58
58
|
:tightenings => @tightenings,
|
59
|
-
:
|
59
|
+
:identities => @identities,
|
60
60
|
:blockings => @blockings,
|
61
61
|
:positives => @positives,
|
62
62
|
:negatives => @negatives,
|
@@ -68,6 +68,17 @@ class TestLooseTightDictionary < Test::Unit::TestCase
|
|
68
68
|
end
|
69
69
|
|
70
70
|
if ENV['OLD'] == 'true' or ENV['ALL'] == 'true'
|
71
|
+
should "only use identities if they stem from the same regexp" do
|
72
|
+
@identities.push @r_1
|
73
|
+
@identities.push [ '/(cessna)(?:.*?)(citation)/i' ]
|
74
|
+
@identities.push [ '/(cessna)(?:.*?)(\d\d\d)/i' ]
|
75
|
+
x_left = [ 'CESSNA D-333 CITATION V']
|
76
|
+
x_right = [ 'CESSNA D-333' ]
|
77
|
+
@right.push x_right
|
78
|
+
|
79
|
+
assert_equal x_right, ltd.left_to_right(x_left)
|
80
|
+
end
|
81
|
+
|
71
82
|
should "have a false match without blocking" do
|
72
83
|
# @d_left will be our victim
|
73
84
|
@right.push @d_lookalike
|
@@ -198,10 +209,10 @@ class TestLooseTightDictionary < Test::Unit::TestCase
|
|
198
209
|
end
|
199
210
|
end
|
200
211
|
|
201
|
-
should "succeed if proper
|
212
|
+
should "succeed if proper identity is applied" do
|
202
213
|
@negatives.push [ @b_left[0], @c_right[0] ]
|
203
214
|
@positives.push [ @d_left[0], @d_right[0] ]
|
204
|
-
@
|
215
|
+
@identities.push @r_1
|
205
216
|
|
206
217
|
assert_nothing_raised do
|
207
218
|
ltd.check @left
|