loose_tight_dictionary 0.0.10 → 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (42) hide show
  1. data/.gitignore +1 -0
  2. data/Gemfile +4 -0
  3. data/README.rdoc +76 -23
  4. data/Rakefile +2 -38
  5. data/benchmark/before-with-free.txt +283 -0
  6. data/benchmark/before-without-last-result.txt +257 -0
  7. data/benchmark/before.txt +304 -0
  8. data/benchmark/memory.rb +54 -0
  9. data/examples/bts_aircraft/5-2-A.htm +10305 -0
  10. data/examples/bts_aircraft/5-2-B.htm +9576 -0
  11. data/examples/bts_aircraft/5-2-D.htm +7094 -0
  12. data/examples/bts_aircraft/5-2-E.htm +2349 -0
  13. data/examples/bts_aircraft/5-2-G.htm +2922 -0
  14. data/examples/bts_aircraft/blockings.csv +1 -0
  15. data/examples/bts_aircraft/identities.csv +1 -0
  16. data/examples/bts_aircraft/negatives.csv +1 -0
  17. data/examples/bts_aircraft/number_260.csv +334 -0
  18. data/examples/bts_aircraft/positives.csv +1 -0
  19. data/examples/bts_aircraft/test_bts_aircraft.rb +123 -0
  20. data/examples/bts_aircraft/tighteners.csv +1 -0
  21. data/examples/first_name_matching.rb +14 -22
  22. data/lib/loose_tight_dictionary/blocking.rb +36 -0
  23. data/lib/loose_tight_dictionary/extract_regexp.rb +30 -0
  24. data/lib/loose_tight_dictionary/identity.rb +25 -0
  25. data/lib/loose_tight_dictionary/result.rb +23 -0
  26. data/lib/loose_tight_dictionary/score.rb +28 -0
  27. data/lib/loose_tight_dictionary/similarity.rb +62 -0
  28. data/lib/loose_tight_dictionary/tightener.rb +30 -0
  29. data/lib/loose_tight_dictionary/version.rb +3 -0
  30. data/lib/loose_tight_dictionary/wrapper.rb +37 -0
  31. data/lib/loose_tight_dictionary.rb +178 -305
  32. data/loose_tight_dictionary.gemspec +19 -64
  33. data/test/helper.rb +6 -6
  34. data/test/test_blocking.rb +23 -0
  35. data/test/test_extract_regexp.rb +18 -0
  36. data/test/test_identity.rb +18 -0
  37. data/test/test_loose_tight_dictionary.rb +52 -245
  38. data/test/test_loose_tight_dictionary_convoluted.rb.disabled +268 -0
  39. data/test/test_tightening.rb +10 -0
  40. metadata +52 -65
  41. data/VERSION +0 -1
  42. data/examples/icao-bts.rb +0 -58
data/VERSION DELETED
@@ -1 +0,0 @@
1
- 0.0.10
data/examples/icao-bts.rb DELETED
@@ -1,58 +0,0 @@
1
- #!/usr/bin/env ruby
2
-
3
- require 'rubygems'
4
- require 'remote_table'
5
- require 'ruby-debug'
6
- require 'logger'
7
- require File.expand_path(File.join(File.dirname(__FILE__), '..', 'lib', 'loose_tight_dictionary.rb'))
8
-
9
- $logger = Logger.new STDERR
10
- $logger.level = Logger::DEBUG
11
- $logger.datetime_format = "%H:%M:%S"
12
- # $tee = File.open('tee.csv', 'w')
13
- $tee = STDOUT
14
-
15
- # $ltd_left = /(super|bonanza)/i
16
- # $ltd_right = /bonanza d-35/i
17
- # $ltd_dd_left = /bonanza/i
18
- # $ltd_dd_right = /musk/i
19
- # $ltd_dd_left_not = /allison/i
20
- # $ltd_dd_print = true
21
- # $ltd_ddd_left = /bonanza/i
22
- # $ltd_ddd_right = /musk/i
23
- # $ltd_ddd_left_not = /allison/i
24
- # $ltd_ddd_print = true
25
-
26
- @right = RemoteTable.new :url => 'http://www.bts.gov/programs/airline_information/accounting_and_reporting_directives/csv/number_260.csv',
27
- :select => lambda { |record| record['Aircraft Type'].to_i.between?(1, 998) and record['Manufacturer'].present? }
28
-
29
- @tightenings = RemoteTable.new :url => 'http://spreadsheets.google.com/pub?key=tiS_6CCDDM_drNphpYwE_iw&single=true&gid=0&output=csv', :headers => false
30
-
31
- @identities = RemoteTable.new :url => 'http://spreadsheets.google.com/pub?key=tiS_6CCDDM_drNphpYwE_iw&single=true&gid=3&output=csv', :headers => false
32
-
33
- @blockings = RemoteTable.new :url => 'http://spreadsheets.google.com/pub?key=tiS_6CCDDM_drNphpYwE_iw&single=true&gid=4&output=csv', :headers => false
34
-
35
- @positives = RemoteTable.new :url => 'http://spreadsheets.google.com/pub?key=tiS_6CCDDM_drNphpYwE_iw&single=true&gid=1&output=csv', :headers => false
36
-
37
- @negatives = RemoteTable.new :url => 'http://spreadsheets.google.com/pub?key=tiS_6CCDDM_drNphpYwE_iw&single=true&gid=2&output=csv', :headers => false
38
-
39
- %w{ tightenings identities blockings }.each do |name|
40
- $logger.info name
41
- $logger.info "\n" + instance_variable_get("@#{name}").to_a.map { |record| record[0] }.join("\n")
42
- $logger.info "\n"
43
- end
44
-
45
- ('A'..'Z').each do |letter|
46
- # %w{ E }.each do |letter|
47
- @left = RemoteTable.new :url => "http://www.faa.gov/air_traffic/publications/atpubs/CNT/5-2-#{letter}.htm",
48
- :encoding => 'US-ASCII',
49
- :row_xpath => '//table/tr[2]/td/table/tr',
50
- :column_xpath => 'td'
51
-
52
- d = LooseTightDictionary.new @right, :tightenings => @tightenings, :identities => @identities, :blockings => @blockings, :logger => $logger, :tee => $tee
53
- d.left_reader = lambda { |record| record['Manufacturer'] + ' ' + record['Model'] }
54
- d.right_reader = lambda { |record| record['Manufacturer'] + ' ' + record['Long Name'] }
55
- d.positives = @positives
56
- d.negatives = @negatives
57
- d.check @left
58
- end