loose_tight_dictionary 0.0.10 → 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +1 -0
- data/Gemfile +4 -0
- data/README.rdoc +76 -23
- data/Rakefile +2 -38
- data/benchmark/before-with-free.txt +283 -0
- data/benchmark/before-without-last-result.txt +257 -0
- data/benchmark/before.txt +304 -0
- data/benchmark/memory.rb +54 -0
- data/examples/bts_aircraft/5-2-A.htm +10305 -0
- data/examples/bts_aircraft/5-2-B.htm +9576 -0
- data/examples/bts_aircraft/5-2-D.htm +7094 -0
- data/examples/bts_aircraft/5-2-E.htm +2349 -0
- data/examples/bts_aircraft/5-2-G.htm +2922 -0
- data/examples/bts_aircraft/blockings.csv +1 -0
- data/examples/bts_aircraft/identities.csv +1 -0
- data/examples/bts_aircraft/negatives.csv +1 -0
- data/examples/bts_aircraft/number_260.csv +334 -0
- data/examples/bts_aircraft/positives.csv +1 -0
- data/examples/bts_aircraft/test_bts_aircraft.rb +123 -0
- data/examples/bts_aircraft/tighteners.csv +1 -0
- data/examples/first_name_matching.rb +14 -22
- data/lib/loose_tight_dictionary/blocking.rb +36 -0
- data/lib/loose_tight_dictionary/extract_regexp.rb +30 -0
- data/lib/loose_tight_dictionary/identity.rb +25 -0
- data/lib/loose_tight_dictionary/result.rb +23 -0
- data/lib/loose_tight_dictionary/score.rb +28 -0
- data/lib/loose_tight_dictionary/similarity.rb +62 -0
- data/lib/loose_tight_dictionary/tightener.rb +30 -0
- data/lib/loose_tight_dictionary/version.rb +3 -0
- data/lib/loose_tight_dictionary/wrapper.rb +37 -0
- data/lib/loose_tight_dictionary.rb +178 -305
- data/loose_tight_dictionary.gemspec +19 -64
- data/test/helper.rb +6 -6
- data/test/test_blocking.rb +23 -0
- data/test/test_extract_regexp.rb +18 -0
- data/test/test_identity.rb +18 -0
- data/test/test_loose_tight_dictionary.rb +52 -245
- data/test/test_loose_tight_dictionary_convoluted.rb.disabled +268 -0
- data/test/test_tightening.rb +10 -0
- metadata +52 -65
- data/VERSION +0 -1
- data/examples/icao-bts.rb +0 -58
data/VERSION
DELETED
@@ -1 +0,0 @@
|
|
1
|
-
0.0.10
|
data/examples/icao-bts.rb
DELETED
@@ -1,58 +0,0 @@
|
|
1
|
-
#!/usr/bin/env ruby
|
2
|
-
|
3
|
-
require 'rubygems'
|
4
|
-
require 'remote_table'
|
5
|
-
require 'ruby-debug'
|
6
|
-
require 'logger'
|
7
|
-
require File.expand_path(File.join(File.dirname(__FILE__), '..', 'lib', 'loose_tight_dictionary.rb'))
|
8
|
-
|
9
|
-
$logger = Logger.new STDERR
|
10
|
-
$logger.level = Logger::DEBUG
|
11
|
-
$logger.datetime_format = "%H:%M:%S"
|
12
|
-
# $tee = File.open('tee.csv', 'w')
|
13
|
-
$tee = STDOUT
|
14
|
-
|
15
|
-
# $ltd_left = /(super|bonanza)/i
|
16
|
-
# $ltd_right = /bonanza d-35/i
|
17
|
-
# $ltd_dd_left = /bonanza/i
|
18
|
-
# $ltd_dd_right = /musk/i
|
19
|
-
# $ltd_dd_left_not = /allison/i
|
20
|
-
# $ltd_dd_print = true
|
21
|
-
# $ltd_ddd_left = /bonanza/i
|
22
|
-
# $ltd_ddd_right = /musk/i
|
23
|
-
# $ltd_ddd_left_not = /allison/i
|
24
|
-
# $ltd_ddd_print = true
|
25
|
-
|
26
|
-
@right = RemoteTable.new :url => 'http://www.bts.gov/programs/airline_information/accounting_and_reporting_directives/csv/number_260.csv',
|
27
|
-
:select => lambda { |record| record['Aircraft Type'].to_i.between?(1, 998) and record['Manufacturer'].present? }
|
28
|
-
|
29
|
-
@tightenings = RemoteTable.new :url => 'http://spreadsheets.google.com/pub?key=tiS_6CCDDM_drNphpYwE_iw&single=true&gid=0&output=csv', :headers => false
|
30
|
-
|
31
|
-
@identities = RemoteTable.new :url => 'http://spreadsheets.google.com/pub?key=tiS_6CCDDM_drNphpYwE_iw&single=true&gid=3&output=csv', :headers => false
|
32
|
-
|
33
|
-
@blockings = RemoteTable.new :url => 'http://spreadsheets.google.com/pub?key=tiS_6CCDDM_drNphpYwE_iw&single=true&gid=4&output=csv', :headers => false
|
34
|
-
|
35
|
-
@positives = RemoteTable.new :url => 'http://spreadsheets.google.com/pub?key=tiS_6CCDDM_drNphpYwE_iw&single=true&gid=1&output=csv', :headers => false
|
36
|
-
|
37
|
-
@negatives = RemoteTable.new :url => 'http://spreadsheets.google.com/pub?key=tiS_6CCDDM_drNphpYwE_iw&single=true&gid=2&output=csv', :headers => false
|
38
|
-
|
39
|
-
%w{ tightenings identities blockings }.each do |name|
|
40
|
-
$logger.info name
|
41
|
-
$logger.info "\n" + instance_variable_get("@#{name}").to_a.map { |record| record[0] }.join("\n")
|
42
|
-
$logger.info "\n"
|
43
|
-
end
|
44
|
-
|
45
|
-
('A'..'Z').each do |letter|
|
46
|
-
# %w{ E }.each do |letter|
|
47
|
-
@left = RemoteTable.new :url => "http://www.faa.gov/air_traffic/publications/atpubs/CNT/5-2-#{letter}.htm",
|
48
|
-
:encoding => 'US-ASCII',
|
49
|
-
:row_xpath => '//table/tr[2]/td/table/tr',
|
50
|
-
:column_xpath => 'td'
|
51
|
-
|
52
|
-
d = LooseTightDictionary.new @right, :tightenings => @tightenings, :identities => @identities, :blockings => @blockings, :logger => $logger, :tee => $tee
|
53
|
-
d.left_reader = lambda { |record| record['Manufacturer'] + ' ' + record['Model'] }
|
54
|
-
d.right_reader = lambda { |record| record['Manufacturer'] + ' ' + record['Long Name'] }
|
55
|
-
d.positives = @positives
|
56
|
-
d.negatives = @negatives
|
57
|
-
d.check @left
|
58
|
-
end
|