loose_tight_dictionary 0.0.1 → 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/VERSION +1 -1
- data/examples/icao-bts.rb +3 -3
- data/lib/loose_tight_dictionary.rb +51 -29
- data/test/test_loose_tight_dictionary.rb +16 -5
- metadata +2 -2
data/VERSION
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
0.0.
|
|
1
|
+
0.0.2
|
data/examples/icao-bts.rb
CHANGED
|
@@ -28,7 +28,7 @@ $tee = STDOUT
|
|
|
28
28
|
|
|
29
29
|
@tightenings = RemoteTable.new :url => 'http://spreadsheets.google.com/pub?key=tiS_6CCDDM_drNphpYwE_iw&single=true&gid=0&output=csv', :headers => false
|
|
30
30
|
|
|
31
|
-
@
|
|
31
|
+
@identities = RemoteTable.new :url => 'http://spreadsheets.google.com/pub?key=tiS_6CCDDM_drNphpYwE_iw&single=true&gid=3&output=csv', :headers => false
|
|
32
32
|
|
|
33
33
|
@blockings = RemoteTable.new :url => 'http://spreadsheets.google.com/pub?key=tiS_6CCDDM_drNphpYwE_iw&single=true&gid=4&output=csv', :headers => false
|
|
34
34
|
|
|
@@ -36,7 +36,7 @@ $tee = STDOUT
|
|
|
36
36
|
|
|
37
37
|
@negatives = RemoteTable.new :url => 'http://spreadsheets.google.com/pub?key=tiS_6CCDDM_drNphpYwE_iw&single=true&gid=2&output=csv', :headers => false
|
|
38
38
|
|
|
39
|
-
%w{ tightenings
|
|
39
|
+
%w{ tightenings identities blockings }.each do |name|
|
|
40
40
|
$logger.info name
|
|
41
41
|
$logger.info "\n" + instance_variable_get("@#{name}").to_a.map { |record| record[0] }.join("\n")
|
|
42
42
|
$logger.info "\n"
|
|
@@ -49,7 +49,7 @@ end
|
|
|
49
49
|
:row_xpath => '//table/tr[2]/td/table/tr',
|
|
50
50
|
:column_xpath => 'td'
|
|
51
51
|
|
|
52
|
-
d = LooseTightDictionary.new @right, :tightenings => @tightenings, :
|
|
52
|
+
d = LooseTightDictionary.new @right, :tightenings => @tightenings, :identities => @identities, :blockings => @blockings, :logger => $logger, :tee => $tee
|
|
53
53
|
d.left_reader = lambda { |record| record['Manufacturer'] + ' ' + record['Model'] }
|
|
54
54
|
d.right_reader = lambda { |record| record['Manufacturer'] + ' ' + record['Long Name'] }
|
|
55
55
|
d.positives = @positives
|
|
@@ -40,7 +40,7 @@ class LooseTightDictionary
|
|
|
40
40
|
|
|
41
41
|
attr_reader :right_records
|
|
42
42
|
attr_reader :tightenings
|
|
43
|
-
attr_reader :
|
|
43
|
+
attr_reader :identities
|
|
44
44
|
attr_reader :blockings
|
|
45
45
|
attr_reader :logger
|
|
46
46
|
attr_reader :tee
|
|
@@ -54,7 +54,7 @@ class LooseTightDictionary
|
|
|
54
54
|
def initialize(right_records, options = {})
|
|
55
55
|
@right_records = right_records
|
|
56
56
|
@tightenings = options[:tightenings] || Array.new
|
|
57
|
-
@
|
|
57
|
+
@identities = options[:identities] || Array.new
|
|
58
58
|
@blockings = options[:blockings] || Array.new
|
|
59
59
|
@left_reader = options[:left_reader]
|
|
60
60
|
@right_reader = options[:right_reader]
|
|
@@ -100,29 +100,33 @@ class LooseTightDictionary
|
|
|
100
100
|
return
|
|
101
101
|
end
|
|
102
102
|
left_records.each do |left_record|
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
103
|
+
begin
|
|
104
|
+
right_record = left_to_right left_record
|
|
105
|
+
ensure
|
|
106
|
+
tee.andand.puts [ read_left(left_record), read_right(right_record), $ltd_1 ].flatten.to_csv
|
|
107
|
+
end
|
|
106
108
|
end
|
|
107
109
|
end
|
|
108
110
|
|
|
109
111
|
def left_to_right(left_record)
|
|
110
112
|
left = read_left left_record
|
|
111
|
-
|
|
113
|
+
i_options_left = i_options left
|
|
112
114
|
blocking_left = blocking left
|
|
113
115
|
t_options_left = t_options left
|
|
114
116
|
history = Hash.new
|
|
115
117
|
right_record = right_records.select { |record| blocking_left.nil? or blocking_left.match(read_right(record)) }.max do |a_record, b_record|
|
|
116
118
|
a = read_right a_record
|
|
117
119
|
b = read_right b_record
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
120
|
+
i_options_a = i_options a
|
|
121
|
+
i_options_b = i_options b
|
|
122
|
+
collision_a = collision? i_options_left, i_options_a
|
|
123
|
+
collision_b = collision? i_options_left, i_options_b
|
|
124
|
+
if collision_a and collision_b
|
|
121
125
|
# neither would ever work, so randomly rank one over the other
|
|
122
126
|
rand(2) == 1 ? -1 : 1
|
|
123
|
-
elsif
|
|
127
|
+
elsif collision_a
|
|
124
128
|
-1
|
|
125
|
-
elsif
|
|
129
|
+
elsif collision_b
|
|
126
130
|
1
|
|
127
131
|
else
|
|
128
132
|
t_left_a, t_right_a = optimize t_options_left, t_options(a)
|
|
@@ -157,11 +161,11 @@ class LooseTightDictionary
|
|
|
157
161
|
end
|
|
158
162
|
$ltd_1 = history[right_record]
|
|
159
163
|
right = read_right right_record
|
|
160
|
-
|
|
164
|
+
i_options_right = i_options right
|
|
161
165
|
z = 1
|
|
162
166
|
debugger if $ltd_left.andand.match(left) or $ltd_right.andand.match(right)
|
|
163
167
|
z = 1
|
|
164
|
-
return if
|
|
168
|
+
return if collision? i_options_left, i_options_right
|
|
165
169
|
inline_check left_record, right_record
|
|
166
170
|
right_record
|
|
167
171
|
end
|
|
@@ -205,14 +209,45 @@ class LooseTightDictionary
|
|
|
205
209
|
@_t_options ||= Hash.new
|
|
206
210
|
ary = Array.new
|
|
207
211
|
ary << T.new(str, str)
|
|
208
|
-
tightenings.each do |
|
|
209
|
-
if literal_regexp(
|
|
210
|
-
ary
|
|
212
|
+
tightenings.each do |i|
|
|
213
|
+
if match_data = literal_regexp(i[0]).match(str)
|
|
214
|
+
ary.push T.new(str, match_data.captures.compact.join)
|
|
211
215
|
end
|
|
212
216
|
end
|
|
213
217
|
@_t_options[str] = ary
|
|
214
218
|
end
|
|
215
219
|
|
|
220
|
+
class I
|
|
221
|
+
attr_reader :regexp, :str, :case_sensitive, :identity
|
|
222
|
+
def initialize(regexp, str, case_sensitive)
|
|
223
|
+
@regexp = regexp
|
|
224
|
+
@str = str
|
|
225
|
+
@identity = regexp.match(str).captures.compact.join
|
|
226
|
+
@identity = @identity.downcase if case_sensitive
|
|
227
|
+
end
|
|
228
|
+
end
|
|
229
|
+
|
|
230
|
+
def collision?(i_options_left, i_options_right)
|
|
231
|
+
i_options_left.any? do |r_left|
|
|
232
|
+
i_options_right.any? do |r_right|
|
|
233
|
+
r_left.regexp == r_right.regexp and r_left.identity != r_right.identity
|
|
234
|
+
end
|
|
235
|
+
end
|
|
236
|
+
end
|
|
237
|
+
|
|
238
|
+
def i_options(str)
|
|
239
|
+
return @_i_options[str] if @_i_options.andand.has_key?(str)
|
|
240
|
+
@_i_options ||= Hash.new
|
|
241
|
+
ary = Array.new
|
|
242
|
+
identities.each do |i|
|
|
243
|
+
regexp = literal_regexp i[0]
|
|
244
|
+
if regexp.match str
|
|
245
|
+
ary.push I.new(regexp, str, case_sensitive)
|
|
246
|
+
end
|
|
247
|
+
end
|
|
248
|
+
@_i_options[str] = ary
|
|
249
|
+
end
|
|
250
|
+
|
|
216
251
|
def blocking(str)
|
|
217
252
|
return @_blocking[str] if @_blocking.andand.has_key?(str)
|
|
218
253
|
@_blocking ||= Hash.new
|
|
@@ -225,19 +260,6 @@ class LooseTightDictionary
|
|
|
225
260
|
@_blocking[str] = nil
|
|
226
261
|
end
|
|
227
262
|
|
|
228
|
-
def restrict(str)
|
|
229
|
-
return @_restrict[str] if @_restrict.andand.has_key?(str)
|
|
230
|
-
@_restrict ||= Hash.new
|
|
231
|
-
restrictions.each do |restriction|
|
|
232
|
-
if literal_regexp(restriction[0]).match str
|
|
233
|
-
retval = $~.captures.compact.join
|
|
234
|
-
retval = retval.downcase unless case_sensitive
|
|
235
|
-
return @_restrict[str] = retval
|
|
236
|
-
end
|
|
237
|
-
end
|
|
238
|
-
@_restrict[str] = nil
|
|
239
|
-
end
|
|
240
|
-
|
|
241
263
|
def literal_regexp(str)
|
|
242
264
|
return @_literal_regexp[str] if @_literal_regexp.andand.has_key? str
|
|
243
265
|
@_literal_regexp ||= Hash.new
|
|
@@ -24,7 +24,7 @@ class TestLooseTightDictionary < Test::Unit::TestCase
|
|
|
24
24
|
|
|
25
25
|
@t_1 = [ '/(dh)c?-?(\d{0,2})-?(\d{0,4})(?:.*?)(dash|\z)/i', 'good tightening for de havilland' ]
|
|
26
26
|
|
|
27
|
-
@
|
|
27
|
+
@r_1 = [ '/(dh)c?-?(\d{0,2})-?(\d{0,4})(?:.*?)(dash|\z)/i', 'good identity for de havilland' ]
|
|
28
28
|
|
|
29
29
|
@left = [
|
|
30
30
|
@a_left,
|
|
@@ -43,7 +43,7 @@ class TestLooseTightDictionary < Test::Unit::TestCase
|
|
|
43
43
|
['DEHAVILLAND DEHAVILLAND TWIN OTTER DHC-6']
|
|
44
44
|
]
|
|
45
45
|
@tightenings = []
|
|
46
|
-
@
|
|
46
|
+
@identities = []
|
|
47
47
|
@blockings = []
|
|
48
48
|
@positives = []
|
|
49
49
|
@negatives = []
|
|
@@ -56,7 +56,7 @@ class TestLooseTightDictionary < Test::Unit::TestCase
|
|
|
56
56
|
def ltd
|
|
57
57
|
@_ltd ||= LooseTightDictionary.new @right,
|
|
58
58
|
:tightenings => @tightenings,
|
|
59
|
-
:
|
|
59
|
+
:identities => @identities,
|
|
60
60
|
:blockings => @blockings,
|
|
61
61
|
:positives => @positives,
|
|
62
62
|
:negatives => @negatives,
|
|
@@ -68,6 +68,17 @@ class TestLooseTightDictionary < Test::Unit::TestCase
|
|
|
68
68
|
end
|
|
69
69
|
|
|
70
70
|
if ENV['OLD'] == 'true' or ENV['ALL'] == 'true'
|
|
71
|
+
should "only use identities if they stem from the same regexp" do
|
|
72
|
+
@identities.push @r_1
|
|
73
|
+
@identities.push [ '/(cessna)(?:.*?)(citation)/i' ]
|
|
74
|
+
@identities.push [ '/(cessna)(?:.*?)(\d\d\d)/i' ]
|
|
75
|
+
x_left = [ 'CESSNA D-333 CITATION V']
|
|
76
|
+
x_right = [ 'CESSNA D-333' ]
|
|
77
|
+
@right.push x_right
|
|
78
|
+
|
|
79
|
+
assert_equal x_right, ltd.left_to_right(x_left)
|
|
80
|
+
end
|
|
81
|
+
|
|
71
82
|
should "have a false match without blocking" do
|
|
72
83
|
# @d_left will be our victim
|
|
73
84
|
@right.push @d_lookalike
|
|
@@ -198,10 +209,10 @@ class TestLooseTightDictionary < Test::Unit::TestCase
|
|
|
198
209
|
end
|
|
199
210
|
end
|
|
200
211
|
|
|
201
|
-
should "succeed if proper
|
|
212
|
+
should "succeed if proper identity is applied" do
|
|
202
213
|
@negatives.push [ @b_left[0], @c_right[0] ]
|
|
203
214
|
@positives.push [ @d_left[0], @d_right[0] ]
|
|
204
|
-
@
|
|
215
|
+
@identities.push @r_1
|
|
205
216
|
|
|
206
217
|
assert_nothing_raised do
|
|
207
218
|
ltd.check @left
|