loose_tight_dictionary 0.0.1 → 0.0.2

Sign up to get free protection for your applications and to get access to all the features.
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.0.1
1
+ 0.0.2
data/examples/icao-bts.rb CHANGED
@@ -28,7 +28,7 @@ $tee = STDOUT
28
28
 
29
29
  @tightenings = RemoteTable.new :url => 'http://spreadsheets.google.com/pub?key=tiS_6CCDDM_drNphpYwE_iw&single=true&gid=0&output=csv', :headers => false
30
30
 
31
- @restrictions = RemoteTable.new :url => 'http://spreadsheets.google.com/pub?key=tiS_6CCDDM_drNphpYwE_iw&single=true&gid=3&output=csv', :headers => false
31
+ @identities = RemoteTable.new :url => 'http://spreadsheets.google.com/pub?key=tiS_6CCDDM_drNphpYwE_iw&single=true&gid=3&output=csv', :headers => false
32
32
 
33
33
  @blockings = RemoteTable.new :url => 'http://spreadsheets.google.com/pub?key=tiS_6CCDDM_drNphpYwE_iw&single=true&gid=4&output=csv', :headers => false
34
34
 
@@ -36,7 +36,7 @@ $tee = STDOUT
36
36
 
37
37
  @negatives = RemoteTable.new :url => 'http://spreadsheets.google.com/pub?key=tiS_6CCDDM_drNphpYwE_iw&single=true&gid=2&output=csv', :headers => false
38
38
 
39
- %w{ tightenings restrictions blockings }.each do |name|
39
+ %w{ tightenings identities blockings }.each do |name|
40
40
  $logger.info name
41
41
  $logger.info "\n" + instance_variable_get("@#{name}").to_a.map { |record| record[0] }.join("\n")
42
42
  $logger.info "\n"
@@ -49,7 +49,7 @@ end
49
49
  :row_xpath => '//table/tr[2]/td/table/tr',
50
50
  :column_xpath => 'td'
51
51
 
52
- d = LooseTightDictionary.new @right, :tightenings => @tightenings, :restrictions => @restrictions, :blockings => @blockings, :logger => $logger, :tee => $tee
52
+ d = LooseTightDictionary.new @right, :tightenings => @tightenings, :identities => @identities, :blockings => @blockings, :logger => $logger, :tee => $tee
53
53
  d.left_reader = lambda { |record| record['Manufacturer'] + ' ' + record['Model'] }
54
54
  d.right_reader = lambda { |record| record['Manufacturer'] + ' ' + record['Long Name'] }
55
55
  d.positives = @positives
@@ -40,7 +40,7 @@ class LooseTightDictionary
40
40
 
41
41
  attr_reader :right_records
42
42
  attr_reader :tightenings
43
- attr_reader :restrictions
43
+ attr_reader :identities
44
44
  attr_reader :blockings
45
45
  attr_reader :logger
46
46
  attr_reader :tee
@@ -54,7 +54,7 @@ class LooseTightDictionary
54
54
  def initialize(right_records, options = {})
55
55
  @right_records = right_records
56
56
  @tightenings = options[:tightenings] || Array.new
57
- @restrictions = options[:restrictions] || Array.new
57
+ @identities = options[:identities] || Array.new
58
58
  @blockings = options[:blockings] || Array.new
59
59
  @left_reader = options[:left_reader]
60
60
  @right_reader = options[:right_reader]
@@ -100,29 +100,33 @@ class LooseTightDictionary
100
100
  return
101
101
  end
102
102
  left_records.each do |left_record|
103
- right_record = left_to_right left_record
104
- inline_check left_record, right_record
105
- tee.andand.puts [ read_left(left_record), read_right(right_record), $ltd_1 ].flatten.to_csv
103
+ begin
104
+ right_record = left_to_right left_record
105
+ ensure
106
+ tee.andand.puts [ read_left(left_record), read_right(right_record), $ltd_1 ].flatten.to_csv
107
+ end
106
108
  end
107
109
  end
108
110
 
109
111
  def left_to_right(left_record)
110
112
  left = read_left left_record
111
- restricted_left = restrict left
113
+ i_options_left = i_options left
112
114
  blocking_left = blocking left
113
115
  t_options_left = t_options left
114
116
  history = Hash.new
115
117
  right_record = right_records.select { |record| blocking_left.nil? or blocking_left.match(read_right(record)) }.max do |a_record, b_record|
116
118
  a = read_right a_record
117
119
  b = read_right b_record
118
- restricted_a = restrict a
119
- restricted_b = restrict b
120
- if restricted_left and restricted_a and restricted_b and restricted_left != restricted_a and restricted_left != restricted_b
120
+ i_options_a = i_options a
121
+ i_options_b = i_options b
122
+ collision_a = collision? i_options_left, i_options_a
123
+ collision_b = collision? i_options_left, i_options_b
124
+ if collision_a and collision_b
121
125
  # neither would ever work, so randomly rank one over the other
122
126
  rand(2) == 1 ? -1 : 1
123
- elsif restricted_left and restricted_a and restricted_left != restricted_a
127
+ elsif collision_a
124
128
  -1
125
- elsif restricted_left and restricted_b and restricted_left != restricted_b
129
+ elsif collision_b
126
130
  1
127
131
  else
128
132
  t_left_a, t_right_a = optimize t_options_left, t_options(a)
@@ -157,11 +161,11 @@ class LooseTightDictionary
157
161
  end
158
162
  $ltd_1 = history[right_record]
159
163
  right = read_right right_record
160
- restricted_right = restrict right
164
+ i_options_right = i_options right
161
165
  z = 1
162
166
  debugger if $ltd_left.andand.match(left) or $ltd_right.andand.match(right)
163
167
  z = 1
164
- return if restricted_left and restricted_right and restricted_left != restricted_right
168
+ return if collision? i_options_left, i_options_right
165
169
  inline_check left_record, right_record
166
170
  right_record
167
171
  end
@@ -205,14 +209,45 @@ class LooseTightDictionary
205
209
  @_t_options ||= Hash.new
206
210
  ary = Array.new
207
211
  ary << T.new(str, str)
208
- tightenings.each do |tightening|
209
- if literal_regexp(tightening[0]).match str
210
- ary << T.new(str, $~.captures.compact.join)
212
+ tightenings.each do |i|
213
+ if match_data = literal_regexp(i[0]).match(str)
214
+ ary.push T.new(str, match_data.captures.compact.join)
211
215
  end
212
216
  end
213
217
  @_t_options[str] = ary
214
218
  end
215
219
 
220
+ class I
221
+ attr_reader :regexp, :str, :case_sensitive, :identity
222
+ def initialize(regexp, str, case_sensitive)
223
+ @regexp = regexp
224
+ @str = str
225
+ @identity = regexp.match(str).captures.compact.join
226
+ @identity = @identity.downcase if case_sensitive
227
+ end
228
+ end
229
+
230
+ def collision?(i_options_left, i_options_right)
231
+ i_options_left.any? do |r_left|
232
+ i_options_right.any? do |r_right|
233
+ r_left.regexp == r_right.regexp and r_left.identity != r_right.identity
234
+ end
235
+ end
236
+ end
237
+
238
+ def i_options(str)
239
+ return @_i_options[str] if @_i_options.andand.has_key?(str)
240
+ @_i_options ||= Hash.new
241
+ ary = Array.new
242
+ identities.each do |i|
243
+ regexp = literal_regexp i[0]
244
+ if regexp.match str
245
+ ary.push I.new(regexp, str, case_sensitive)
246
+ end
247
+ end
248
+ @_i_options[str] = ary
249
+ end
250
+
216
251
  def blocking(str)
217
252
  return @_blocking[str] if @_blocking.andand.has_key?(str)
218
253
  @_blocking ||= Hash.new
@@ -225,19 +260,6 @@ class LooseTightDictionary
225
260
  @_blocking[str] = nil
226
261
  end
227
262
 
228
- def restrict(str)
229
- return @_restrict[str] if @_restrict.andand.has_key?(str)
230
- @_restrict ||= Hash.new
231
- restrictions.each do |restriction|
232
- if literal_regexp(restriction[0]).match str
233
- retval = $~.captures.compact.join
234
- retval = retval.downcase unless case_sensitive
235
- return @_restrict[str] = retval
236
- end
237
- end
238
- @_restrict[str] = nil
239
- end
240
-
241
263
  def literal_regexp(str)
242
264
  return @_literal_regexp[str] if @_literal_regexp.andand.has_key? str
243
265
  @_literal_regexp ||= Hash.new
@@ -24,7 +24,7 @@ class TestLooseTightDictionary < Test::Unit::TestCase
24
24
 
25
25
  @t_1 = [ '/(dh)c?-?(\d{0,2})-?(\d{0,4})(?:.*?)(dash|\z)/i', 'good tightening for de havilland' ]
26
26
 
27
- @d_1 = [ '/(dh)c?-?(\d{0,2})-?(\d{0,4})(?:.*?)(dash|\z)/i', 'good restriction for de havilland' ]
27
+ @r_1 = [ '/(dh)c?-?(\d{0,2})-?(\d{0,4})(?:.*?)(dash|\z)/i', 'good identity for de havilland' ]
28
28
 
29
29
  @left = [
30
30
  @a_left,
@@ -43,7 +43,7 @@ class TestLooseTightDictionary < Test::Unit::TestCase
43
43
  ['DEHAVILLAND DEHAVILLAND TWIN OTTER DHC-6']
44
44
  ]
45
45
  @tightenings = []
46
- @restrictions = []
46
+ @identities = []
47
47
  @blockings = []
48
48
  @positives = []
49
49
  @negatives = []
@@ -56,7 +56,7 @@ class TestLooseTightDictionary < Test::Unit::TestCase
56
56
  def ltd
57
57
  @_ltd ||= LooseTightDictionary.new @right,
58
58
  :tightenings => @tightenings,
59
- :restrictions => @restrictions,
59
+ :identities => @identities,
60
60
  :blockings => @blockings,
61
61
  :positives => @positives,
62
62
  :negatives => @negatives,
@@ -68,6 +68,17 @@ class TestLooseTightDictionary < Test::Unit::TestCase
68
68
  end
69
69
 
70
70
  if ENV['OLD'] == 'true' or ENV['ALL'] == 'true'
71
+ should "only use identities if they stem from the same regexp" do
72
+ @identities.push @r_1
73
+ @identities.push [ '/(cessna)(?:.*?)(citation)/i' ]
74
+ @identities.push [ '/(cessna)(?:.*?)(\d\d\d)/i' ]
75
+ x_left = [ 'CESSNA D-333 CITATION V']
76
+ x_right = [ 'CESSNA D-333' ]
77
+ @right.push x_right
78
+
79
+ assert_equal x_right, ltd.left_to_right(x_left)
80
+ end
81
+
71
82
  should "have a false match without blocking" do
72
83
  # @d_left will be our victim
73
84
  @right.push @d_lookalike
@@ -198,10 +209,10 @@ class TestLooseTightDictionary < Test::Unit::TestCase
198
209
  end
199
210
  end
200
211
 
201
- should "succeed if proper restriction is applied" do
212
+ should "succeed if proper identity is applied" do
202
213
  @negatives.push [ @b_left[0], @c_right[0] ]
203
214
  @positives.push [ @d_left[0], @d_right[0] ]
204
- @restrictions.push @d_1
215
+ @identities.push @r_1
205
216
 
206
217
  assert_nothing_raised do
207
218
  ltd.check @left
metadata CHANGED
@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
5
5
  segments:
6
6
  - 0
7
7
  - 0
8
- - 1
9
- version: 0.0.1
8
+ - 2
9
+ version: 0.0.2
10
10
  platform: ruby
11
11
  authors:
12
12
  - Seamus Abshere