loose_tight_dictionary 0.0.1 → 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.0.1
1
+ 0.0.2
data/examples/icao-bts.rb CHANGED
@@ -28,7 +28,7 @@ $tee = STDOUT
28
28
 
29
29
  @tightenings = RemoteTable.new :url => 'http://spreadsheets.google.com/pub?key=tiS_6CCDDM_drNphpYwE_iw&single=true&gid=0&output=csv', :headers => false
30
30
 
31
- @restrictions = RemoteTable.new :url => 'http://spreadsheets.google.com/pub?key=tiS_6CCDDM_drNphpYwE_iw&single=true&gid=3&output=csv', :headers => false
31
+ @identities = RemoteTable.new :url => 'http://spreadsheets.google.com/pub?key=tiS_6CCDDM_drNphpYwE_iw&single=true&gid=3&output=csv', :headers => false
32
32
 
33
33
  @blockings = RemoteTable.new :url => 'http://spreadsheets.google.com/pub?key=tiS_6CCDDM_drNphpYwE_iw&single=true&gid=4&output=csv', :headers => false
34
34
 
@@ -36,7 +36,7 @@ $tee = STDOUT
36
36
 
37
37
  @negatives = RemoteTable.new :url => 'http://spreadsheets.google.com/pub?key=tiS_6CCDDM_drNphpYwE_iw&single=true&gid=2&output=csv', :headers => false
38
38
 
39
- %w{ tightenings restrictions blockings }.each do |name|
39
+ %w{ tightenings identities blockings }.each do |name|
40
40
  $logger.info name
41
41
  $logger.info "\n" + instance_variable_get("@#{name}").to_a.map { |record| record[0] }.join("\n")
42
42
  $logger.info "\n"
@@ -49,7 +49,7 @@ end
49
49
  :row_xpath => '//table/tr[2]/td/table/tr',
50
50
  :column_xpath => 'td'
51
51
 
52
- d = LooseTightDictionary.new @right, :tightenings => @tightenings, :restrictions => @restrictions, :blockings => @blockings, :logger => $logger, :tee => $tee
52
+ d = LooseTightDictionary.new @right, :tightenings => @tightenings, :identities => @identities, :blockings => @blockings, :logger => $logger, :tee => $tee
53
53
  d.left_reader = lambda { |record| record['Manufacturer'] + ' ' + record['Model'] }
54
54
  d.right_reader = lambda { |record| record['Manufacturer'] + ' ' + record['Long Name'] }
55
55
  d.positives = @positives
@@ -40,7 +40,7 @@ class LooseTightDictionary
40
40
 
41
41
  attr_reader :right_records
42
42
  attr_reader :tightenings
43
- attr_reader :restrictions
43
+ attr_reader :identities
44
44
  attr_reader :blockings
45
45
  attr_reader :logger
46
46
  attr_reader :tee
@@ -54,7 +54,7 @@ class LooseTightDictionary
54
54
  def initialize(right_records, options = {})
55
55
  @right_records = right_records
56
56
  @tightenings = options[:tightenings] || Array.new
57
- @restrictions = options[:restrictions] || Array.new
57
+ @identities = options[:identities] || Array.new
58
58
  @blockings = options[:blockings] || Array.new
59
59
  @left_reader = options[:left_reader]
60
60
  @right_reader = options[:right_reader]
@@ -100,29 +100,33 @@ class LooseTightDictionary
100
100
  return
101
101
  end
102
102
  left_records.each do |left_record|
103
- right_record = left_to_right left_record
104
- inline_check left_record, right_record
105
- tee.andand.puts [ read_left(left_record), read_right(right_record), $ltd_1 ].flatten.to_csv
103
+ begin
104
+ right_record = left_to_right left_record
105
+ ensure
106
+ tee.andand.puts [ read_left(left_record), read_right(right_record), $ltd_1 ].flatten.to_csv
107
+ end
106
108
  end
107
109
  end
108
110
 
109
111
  def left_to_right(left_record)
110
112
  left = read_left left_record
111
- restricted_left = restrict left
113
+ i_options_left = i_options left
112
114
  blocking_left = blocking left
113
115
  t_options_left = t_options left
114
116
  history = Hash.new
115
117
  right_record = right_records.select { |record| blocking_left.nil? or blocking_left.match(read_right(record)) }.max do |a_record, b_record|
116
118
  a = read_right a_record
117
119
  b = read_right b_record
118
- restricted_a = restrict a
119
- restricted_b = restrict b
120
- if restricted_left and restricted_a and restricted_b and restricted_left != restricted_a and restricted_left != restricted_b
120
+ i_options_a = i_options a
121
+ i_options_b = i_options b
122
+ collision_a = collision? i_options_left, i_options_a
123
+ collision_b = collision? i_options_left, i_options_b
124
+ if collision_a and collision_b
121
125
  # neither would ever work, so randomly rank one over the other
122
126
  rand(2) == 1 ? -1 : 1
123
- elsif restricted_left and restricted_a and restricted_left != restricted_a
127
+ elsif collision_a
124
128
  -1
125
- elsif restricted_left and restricted_b and restricted_left != restricted_b
129
+ elsif collision_b
126
130
  1
127
131
  else
128
132
  t_left_a, t_right_a = optimize t_options_left, t_options(a)
@@ -157,11 +161,11 @@ class LooseTightDictionary
157
161
  end
158
162
  $ltd_1 = history[right_record]
159
163
  right = read_right right_record
160
- restricted_right = restrict right
164
+ i_options_right = i_options right
161
165
  z = 1
162
166
  debugger if $ltd_left.andand.match(left) or $ltd_right.andand.match(right)
163
167
  z = 1
164
- return if restricted_left and restricted_right and restricted_left != restricted_right
168
+ return if collision? i_options_left, i_options_right
165
169
  inline_check left_record, right_record
166
170
  right_record
167
171
  end
@@ -205,14 +209,45 @@ class LooseTightDictionary
205
209
  @_t_options ||= Hash.new
206
210
  ary = Array.new
207
211
  ary << T.new(str, str)
208
- tightenings.each do |tightening|
209
- if literal_regexp(tightening[0]).match str
210
- ary << T.new(str, $~.captures.compact.join)
212
+ tightenings.each do |i|
213
+ if match_data = literal_regexp(i[0]).match(str)
214
+ ary.push T.new(str, match_data.captures.compact.join)
211
215
  end
212
216
  end
213
217
  @_t_options[str] = ary
214
218
  end
215
219
 
220
+ class I
221
+ attr_reader :regexp, :str, :case_sensitive, :identity
222
+ def initialize(regexp, str, case_sensitive)
223
+ @regexp = regexp
224
+ @str = str
225
+ @identity = regexp.match(str).captures.compact.join
226
+ @identity = @identity.downcase if case_sensitive
227
+ end
228
+ end
229
+
230
+ def collision?(i_options_left, i_options_right)
231
+ i_options_left.any? do |r_left|
232
+ i_options_right.any? do |r_right|
233
+ r_left.regexp == r_right.regexp and r_left.identity != r_right.identity
234
+ end
235
+ end
236
+ end
237
+
238
+ def i_options(str)
239
+ return @_i_options[str] if @_i_options.andand.has_key?(str)
240
+ @_i_options ||= Hash.new
241
+ ary = Array.new
242
+ identities.each do |i|
243
+ regexp = literal_regexp i[0]
244
+ if regexp.match str
245
+ ary.push I.new(regexp, str, case_sensitive)
246
+ end
247
+ end
248
+ @_i_options[str] = ary
249
+ end
250
+
216
251
  def blocking(str)
217
252
  return @_blocking[str] if @_blocking.andand.has_key?(str)
218
253
  @_blocking ||= Hash.new
@@ -225,19 +260,6 @@ class LooseTightDictionary
225
260
  @_blocking[str] = nil
226
261
  end
227
262
 
228
- def restrict(str)
229
- return @_restrict[str] if @_restrict.andand.has_key?(str)
230
- @_restrict ||= Hash.new
231
- restrictions.each do |restriction|
232
- if literal_regexp(restriction[0]).match str
233
- retval = $~.captures.compact.join
234
- retval = retval.downcase unless case_sensitive
235
- return @_restrict[str] = retval
236
- end
237
- end
238
- @_restrict[str] = nil
239
- end
240
-
241
263
  def literal_regexp(str)
242
264
  return @_literal_regexp[str] if @_literal_regexp.andand.has_key? str
243
265
  @_literal_regexp ||= Hash.new
@@ -24,7 +24,7 @@ class TestLooseTightDictionary < Test::Unit::TestCase
24
24
 
25
25
  @t_1 = [ '/(dh)c?-?(\d{0,2})-?(\d{0,4})(?:.*?)(dash|\z)/i', 'good tightening for de havilland' ]
26
26
 
27
- @d_1 = [ '/(dh)c?-?(\d{0,2})-?(\d{0,4})(?:.*?)(dash|\z)/i', 'good restriction for de havilland' ]
27
+ @r_1 = [ '/(dh)c?-?(\d{0,2})-?(\d{0,4})(?:.*?)(dash|\z)/i', 'good identity for de havilland' ]
28
28
 
29
29
  @left = [
30
30
  @a_left,
@@ -43,7 +43,7 @@ class TestLooseTightDictionary < Test::Unit::TestCase
43
43
  ['DEHAVILLAND DEHAVILLAND TWIN OTTER DHC-6']
44
44
  ]
45
45
  @tightenings = []
46
- @restrictions = []
46
+ @identities = []
47
47
  @blockings = []
48
48
  @positives = []
49
49
  @negatives = []
@@ -56,7 +56,7 @@ class TestLooseTightDictionary < Test::Unit::TestCase
56
56
  def ltd
57
57
  @_ltd ||= LooseTightDictionary.new @right,
58
58
  :tightenings => @tightenings,
59
- :restrictions => @restrictions,
59
+ :identities => @identities,
60
60
  :blockings => @blockings,
61
61
  :positives => @positives,
62
62
  :negatives => @negatives,
@@ -68,6 +68,17 @@ class TestLooseTightDictionary < Test::Unit::TestCase
68
68
  end
69
69
 
70
70
  if ENV['OLD'] == 'true' or ENV['ALL'] == 'true'
71
+ should "only use identities if they stem from the same regexp" do
72
+ @identities.push @r_1
73
+ @identities.push [ '/(cessna)(?:.*?)(citation)/i' ]
74
+ @identities.push [ '/(cessna)(?:.*?)(\d\d\d)/i' ]
75
+ x_left = [ 'CESSNA D-333 CITATION V']
76
+ x_right = [ 'CESSNA D-333' ]
77
+ @right.push x_right
78
+
79
+ assert_equal x_right, ltd.left_to_right(x_left)
80
+ end
81
+
71
82
  should "have a false match without blocking" do
72
83
  # @d_left will be our victim
73
84
  @right.push @d_lookalike
@@ -198,10 +209,10 @@ class TestLooseTightDictionary < Test::Unit::TestCase
198
209
  end
199
210
  end
200
211
 
201
- should "succeed if proper restriction is applied" do
212
+ should "succeed if proper identity is applied" do
202
213
  @negatives.push [ @b_left[0], @c_right[0] ]
203
214
  @positives.push [ @d_left[0], @d_right[0] ]
204
- @restrictions.push @d_1
215
+ @identities.push @r_1
205
216
 
206
217
  assert_nothing_raised do
207
218
  ltd.check @left
metadata CHANGED
@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
5
5
  segments:
6
6
  - 0
7
7
  - 0
8
- - 1
9
- version: 0.0.1
8
+ - 2
9
+ version: 0.0.2
10
10
  platform: ruby
11
11
  authors:
12
12
  - Seamus Abshere