fuzzy_match 1.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. data/.document +5 -0
  2. data/.gitignore +22 -0
  3. data/Gemfile +4 -0
  4. data/LICENSE +20 -0
  5. data/README.rdoc +94 -0
  6. data/Rakefile +21 -0
  7. data/THANKS-WILLIAM-JAMES.rb +37 -0
  8. data/benchmark/before-with-free.txt +283 -0
  9. data/benchmark/before-without-last-result.txt +257 -0
  10. data/benchmark/before.txt +304 -0
  11. data/benchmark/memory.rb +54 -0
  12. data/examples/bts_aircraft/5-2-A.htm +10305 -0
  13. data/examples/bts_aircraft/5-2-B.htm +9576 -0
  14. data/examples/bts_aircraft/5-2-D.htm +7094 -0
  15. data/examples/bts_aircraft/5-2-E.htm +2349 -0
  16. data/examples/bts_aircraft/5-2-G.htm +2922 -0
  17. data/examples/bts_aircraft/blockings.csv +1 -0
  18. data/examples/bts_aircraft/identities.csv +1 -0
  19. data/examples/bts_aircraft/negatives.csv +1 -0
  20. data/examples/bts_aircraft/number_260.csv +334 -0
  21. data/examples/bts_aircraft/positives.csv +1 -0
  22. data/examples/bts_aircraft/test_bts_aircraft.rb +118 -0
  23. data/examples/bts_aircraft/tighteners.csv +1 -0
  24. data/examples/first_name_matching.rb +15 -0
  25. data/examples/icao-bts.xls +0 -0
  26. data/fuzzy_match.gemspec +32 -0
  27. data/lib/fuzzy_match/blocking.rb +36 -0
  28. data/lib/fuzzy_match/cached_result.rb +74 -0
  29. data/lib/fuzzy_match/identity.rb +23 -0
  30. data/lib/fuzzy_match/result.rb +17 -0
  31. data/lib/fuzzy_match/score.rb +125 -0
  32. data/lib/fuzzy_match/similarity.rb +53 -0
  33. data/lib/fuzzy_match/stop_word.rb +19 -0
  34. data/lib/fuzzy_match/tightener.rb +28 -0
  35. data/lib/fuzzy_match/version.rb +3 -0
  36. data/lib/fuzzy_match/wrapper.rb +67 -0
  37. data/lib/fuzzy_match.rb +252 -0
  38. data/test/helper.rb +12 -0
  39. data/test/test_blocking.rb +23 -0
  40. data/test/test_cache.rb +130 -0
  41. data/test/test_fuzzy_match.rb +190 -0
  42. data/test/test_fuzzy_match_convoluted.rb.disabled +268 -0
  43. data/test/test_identity.rb +33 -0
  44. data/test/test_tightening.rb +10 -0
  45. metadata +197 -0
@@ -0,0 +1 @@
1
+ regexp,notes,more notes,
@@ -0,0 +1 @@
1
+ regexp,notes
@@ -0,0 +1 @@
1
+ needle,haystack,
@@ -0,0 +1,334 @@
1
+ Aircraft Type,Aircraft Name,Manufacturer,Long Name,Short Name,Begin Date,End Date
2
+ 7,AERO COMMANDER 200,ROCKWELL,AERO COMMANDER 200,COMMANDR,1/1/1990,
3
+ 8,AERO MACCHI AL-60,AERO MACCHI,AERO MACCHI AL-60,AL-60,1/1/1990,
4
+ 9,AERONCA 7-AC,AERONCA,AERONCA 7-AC,7-AC,1/1/1990,
5
+ 10,BEECH D-35,BEECHCRAFT,BEECH BONANZA D-35A/C/D/E/G/H/J/K/S/V,BONANZA,1/1/1990,
6
+ 20,BELLANCA CH-300,BELLANCA,BELLANCA CH-300,CH-300,1/1/1990,
7
+ 24,BEECH B-23,BEECHCRAFT,BEECH B-23 MUSKETEER,MUSKTEER,1/1/1990,
8
+ 29,CESSNA 150/152,CESSNA,CESSNA 150/152,CSS150/2,1/1/1990,
9
+ 30,CESSNA 180,CESSNA,CESSNA 180,CSSNA180,1/1/1990,
10
+ 31,CESSNA 180A/B,CESSNA,CESSNA 180A/B,CSNA180A,1/1/1990,
11
+ 32,CESSNA 180C/D/E/F,CESSNA,CESSNA 180C/D/E/F,CSNA180C,1/1/1990,
12
+ 33,CESSNA 185A/B/C,CESSNA,CESSNA 185A/B/C SKYWAGON,SKYWAGON,1/1/1990,
13
+ 34,HELIO H-250,HELIO,HELIO H-250,H-250,1/1/1990,
14
+ 35,CESSNA 206/207/209,CESSNA,CESSNA C206/207/209/210 STATIONAIR,STATIONR,1/1/1990,
15
+ 36,CESSNA 172 SKYHAWK,CESSNA,CESSNA 172 SKYHAWK,SKYHAWK,1/1/1990,
16
+ 37,CESSNA 195,CESSNA,CESSNA 195,CSSNA195,1/1/1990,
17
+ 38,CESSNA CARDINAL,CESSNA,CESSNA 177 CARDINAL,CARDINAL,1/1/1990
18
+ 39,CESSNA 182 SKYLANE,CESSNA,CESSNA 182 SKYLANE,SKYLANE,1/1/1990
19
+ 40,DEHAVILLAND DHC2,DEHAVILLAND OF CANADA,DEHAVILLAND DHC2 BEAVER,BEAVER,1/1/1990
20
+ 41,CESSNA 205,CESSNA,CESSNA 205,CSSNA205,1/1/1990
21
+ 42,DEHAVILLAND DHC3,DEHAVILLAND OF CANADA,DEHAVILLAND DHC3 OTTER,OTTER,1/1/1990
22
+ 44,LAKE LA-4,LAKE,LAKE LA-4,LA-4,1/1/1990
23
+ 50,HOWARD DGA-15P,HOWARD,HOWARD DGA-15P,DGA-15P,1/1/1990
24
+ 51,MOONEY M-21,MOONEY,MOONEY M-21,M-21,1/1/1990
25
+ 52,MOONEY M-20C/E/G,MOONEY,MOONEY M-20C/E/G,M-20C/E,1/1/1990
26
+ 65,NOORDUYN UC-64AS,NOORDUYN,NOORDUYN UC-64AS,UC-64AS,1/1/1990
27
+ 70,PILATUS PORTER PC6,PILATUS,PILATUS PORTER PC6,PC6,1/1/1990
28
+ 71,PILATUS PC6/350,PILATUS,PILATUS PORTER PC6/350,PC6/350,1/1/1990
29
+ 79,PIPER PA-32,PIPER,PIPER PA-32 (CHEROKEE 6),CHEROKEE,1/1/1990
30
+ 80,PIPER PA-18,PIPER,PIPER PA-18 (SUPER-CUB),SUPERCUB,1/1/1990
31
+ 81,PIPER PA-14,PIPER,PIPER PA-14 (FAMILY-CRUISER),FAMCRUIS,1/1/1990
32
+ 82,PIPER PA-22,PIPER,PIPER PA-22 (TRI-PACER),TRIPACER,1/1/1990
33
+ 83,PIPER PA-24,PIPER,PIPER PA-24 (COMANCHE),COMANCHE,1/1/1990
34
+ 84,PIPER PA-28,PIPER,PIPER PA-28 (CHEROKEE),CHEROKEE,1/1/1990
35
+ 85,STINSON SR-9,STINSON,STINSON SR-9,SR-9,1/1/1990
36
+ 86,PIPER PA-12,PIPER,PIPER PA-12 (SUPERCRUISER),,1/1/2002
37
+ 87,STINSON V-77,STINSON,STINSON V-77,V-77,1/1/1990
38
+ 88,STINSON SR-10E,STINSON,STINSON SR-10E BUSHMAN,BUSHMAN,1/1/1990
39
+ 91,FLT/AMPHIB TURBINE,,FLOAT/AMPHIB TURBINE,FLT/AMPH,1/1/1990
40
+ 92,PISTON-LT 450 HP,,FLOAT/AMPHIB PISTON-LT 450 HP,FLT/AMPH,1/1/1990
41
+ 93,PISTON-450+ HP,,FLOAT/AMPHIB PISTON-450+ HP,FLT/AMPH,1/1/1990
42
+ 94,LAND-TURBINE,,LAND-TURBINE,LAND,1/1/1990
43
+ 95,PISTON-LT 450 HP,,LAND-PISTON-LT 450 HP,LAND,1/1/1990
44
+ 96,PISTON-450+ HP,,LAND-PISTON-450+ HP,LAND,1/1/1990
45
+ 97,STIN.FW 300-450 HP,STINSON,STINSON F.W. 300-450 HP,STNSN FW,1/1/1950
46
+ 98,STIN.FW LT 300 HP,STINSON,STINSON 0-299 HP,STNSN FW,1/1/1950
47
+ 103,AERO COMANDR 500/6,ROCKWELL,AERO COMMANDER (500/600 SERIES EXCPT 680FL),COMMANDR,1/1/1990
48
+ 104,AERO COMANDR 680FL,ROCKWELL,GRAND COMMANDER 680FL,COMMANDR,1/1/1990
49
+ 105,BEECH C-50,BEECHCRAFT,BEECH C-50 (TWIN BONANZA),TWBONANZ,1/1/1990
50
+ 110,BEECH 18,BEECHCRAFT,BEECH 18 C-185,BEECH 18,1/1/1990
51
+ 111,BEECH KINGAIR C-90,BEECHCRAFT,BEECH KING AIR C-90,KING AIR,1/1/1990
52
+ 113,BEECH B-95,BEECHCRAFT,BEECH B-95 (TRAVELAIR),TRAVLAIR,1/1/1990
53
+ 115,BEECH AT-11,BEECHCRAFT,BEECH AT-11,AT-11,1/1/1990
54
+ 117,BEECH BARON B-55,BEECHCRAFT,BEECH BARON (55 SERIES),BARON,1/1/1990
55
+ 120,CESSNA T-50,CESSNA,CESSNA T-50 (BOBCAT),BOBCAT,1/1/1990
56
+ 121,CESSNA C-421,CESSNA,CESSNA C-421,C-421,1/1/1990
57
+ 122,CESSNA C-310,CESSNA,CESSNA C-310 SERIES,C-310,1/1/1990
58
+ 125,CESSNA C-402/402A,CESSNA,CESSNA C-402/402A,C-402/A,1/1/1990
59
+ 128,CESSNA 404,CESSNA,CESSNA 404,C-404,1/1/1990
60
+ 130,C-28 5ACF PBY,CONSOLIDATED VULTEE,C-28 5ACF PBY,PBY,1/1/1990
61
+ 131,BRITT-NORMAN BN2/A,PILATUS BRITTEN-NORMAN,PILATUS BRITTEN-NORMAN BN2/A ISLANDER,ISLANDER,1/1/1990
62
+ 132,C-28 5ACF-PBY EMQ,CONSOLIDATED VULTEE,C-28 5ACF-PBY-EMQ,PBY-EMQ,1/1/1990
63
+ 133,BEECH QUEEN AIR,BEECHCRAFT,BEECH 65/65A-80/65B-80 (QUEEN AIR),QUEENAIR,1/1/1990
64
+ 140,CONVAIR CV-240,CONVAIR,CONVAIR CV-240,CV-240,1/1/1990
65
+ 143,CONVAIR CV-340/440,CONVAIR,CONVAIR CV-340/440,C340/440,1/1/1990
66
+ 148,CESSNA C-337,CESSNA,CESSNA C-337 (SUPER SKY MASTER),SKYMASTR,1/1/1990
67
+ 149,CESSNA C-401,CESSNA,CESSNA C-401,C-401,1/1/1990
68
+ 150,CURTISS C46 SERIES,CURTISS-WRIGHT,CURTISS C46/20T/A/D/F/R COMMANDO,COMMANDO,1/1/1990
69
+ 152,CESSNA C-411,CESSNA,CESSNA C-411,C-411,1/1/1990
70
+ 153,DHC4 CARIBOU,DEHAVILLAND OF CANADA,DEHAVILLAND DHC4 CARIBOU,CARIBOU,1/1/1990
71
+ 158,DOUGLAS DC-2,MCDONNELL DOUGLAS,MCDONNELL DOUGLAS DC-2,DC-2,1/1/1990
72
+ 159,HAMILTON B-18S,HAMILTON,HAMILTON B-18S LITTLE LINER (CONVERTED),B-18S,1/1/1990
73
+ 160,DOUGLAS DC-3,MCDONNELL DOUGLAS,"MCDONNELL DOUGLAS DC-3/A/C,C-47/B",DC-3,1/1/1990
74
+ 161,DORNIER DO-28,DORNIER,DORNIER DO-28 SKYSERVANT,DO-28,1/1/1990
75
+ 166,CESSNA C-336,CESSNA,CESSNA C-336,C-336,1/1/1990
76
+ 167,FAIRCHILD C-82A,FAIRCHILD,FAIRCHILD C-82A,C-82A,1/1/1990
77
+ 170,GRUMMAN G-21A,GRUMMAN,GRUMMAN G-21A (GOOSE),GOOSE,1/1/1990
78
+ 171,GRUMMAN SA-16A-GR,GRUMMAN,GRUMMAN SA-16A-GR (ALBATROSS),ALBATRSS,1/1/1990
79
+ 172,GRUMMAN G-44/44A,GRUMMAN,GRUMMAN G-44/44A (WIDGEON),WIDGEON,1/1/1990
80
+ 173,GRUMMAN G-73,GRUMMAN,GRUMMAN G-73 (MALLARD),MALLARD,1/1/1990
81
+ 174,DOVE DH-104,DEHAVILLAND OF CANADA,DEHAVILLAND DOVE DH-104,DOVE,1/1/1990,
82
+ 175,LOCKHEED L-12A,LOCKHEED,LOCKHEED L-12A/L-10/10A,L-12A,1/1/1990,
83
+ 180,MARTIN 202/202A,MARTIN,MARTIN 202/202A,M-202,1/1/1990,
84
+ 185,MARTIN 404,MARTIN,MARTIN 404,M-404,1/1/1990,
85
+ 190,PIPER PA-23,PIPER,PIPER PA-23-250 (AZTEC/APACHE),AZTEC,1/1/1990,
86
+ 193,PIPER T-1020,PIPER,PIPER T-1020,T-1020,1/1/1990,10/31/1996
87
+ 194,PIPER PA-31,PIPER,PIPER PA-31 (NAVAJO),NAVAJO,1/1/1990,10/31/1996
88
+ 194,PIPER PA-31/T-1020,PIPER,PIPER PA-31 (NAVAJO)/T-1020,NAVAJO,11/1/1996,
89
+ 195,PIPER PA-34/39,PIPER,PIPER PA-34/39 (TWIN COMMANCHE),TWNCOMAN,1/1/1990,
90
+ 200,BOEING 377,BOEING,BOEING 377 STRATOCRUISER,B-377,1/1/1990,
91
+ 201,BN2A TRISLANDER,PILATUS BRITTEN-NORMAN,PILATUS BRITTEN-NORMAN BN2A TRISLANDER,TRISLNDR,1/1/1990,
92
+ 205,DH-114 HERON,DEHAVILLAND OF CANADA,DEHAVILLAND DH-114 HERON,HERON,1/1/1990,
93
+ 210,DOUGLAS DC-4,MCDONNELL DOUGLAS,MCDONNELL DOUGLAS DC-4(C54/C54A/C54B/C54E),DC-4,1/1/1990,
94
+ 216,DOUGLAS DC-6,MCDONNELL DOUGLAS,MCDONNELL DOUGLAS DC-6,DC-6,1/1/1990,
95
+ 218,DOUGLAS DC-6A,MCDONNELL DOUGLAS,MCDONNELL DOUGLAS DC-6A,DC-6A,1/1/1990,
96
+ 220,DOUGLAS DC-6B,MCDONNELL DOUGLAS,MCDONNELL DOUGLAS DC-6B,DC-6B,1/1/1990,
97
+ 225,DOUGLAS DC-7A/B,MCDONNELL DOUGLAS,MCDONNELL DOUGLAS DC-7A/B,DC-7A/B,1/1/1990
98
+ 228,DOUGLAS DC-7C,MCDONNELL DOUGLAS,MCDONNELL DOUGLAS DC-7C,DC-7C,1/1/1990
99
+ 240,LOCKHEED L-049,LOCKHEED,LOCKHEED L-049,L-049,1/1/1990
100
+ 242,LOCKHEED L-649,LOCKHEED,LOCKHEED L-649,L-649,1/1/1990
101
+ 244,LOCKHEED L-749/A,LOCKHEED,LOCKHEED L-749/749A,L-749/A,1/1/1990
102
+ 247,LOCKHEED L-1049,LOCKHEED,LOCKHEED L-1049,L-1049,1/1/1990
103
+ 248,LOCKHEED 1049C/D/E,LOCKHEED,LOCKHEED L-1049C/D/E,L-1049C,1/1/1990
104
+ 252,LOCKHEED L-1049G/H,LOCKHEED,LOCKHEED L-1049G/H,L-1049G,1/1/1990
105
+ 258,LOCKHEED L-1649A,LOCKHEED,LOCKHEED L-1649A,L-1649A,1/1/1990
106
+ 280,SIKORSKY VS-44A,SIKORSKY,SIKORSKY VS-44A (AMPHIBIAN),VS-44A,1/1/1990
107
+ 303,SUD ALOUETTE,SUD AVIATION,SUD ALOUETTE,ALOUETTE,1/1/1990
108
+ 311,BELL B-47D,BELL,BELL B-47D,B-47D,1/1/1990
109
+ 312,BELL B-47G,BELL,BELL B-47G,B-47G,1/1/1990
110
+ 313,BELL B-47G2,BELL,BELL B-47G2,B-47G2,1/1/1990
111
+ 314,BELL B-47J2,BELL,BELL B-47J2,B-47J2,1/1/1990
112
+ 315,BELL B-206A,BELL,BELL B-206A,B-206A,1/1/1990
113
+ 320,BOEING BV-107,BOEING VERTOL,BOEING VERTOL BV-107,BV-107,1/1/1990
114
+ 321,BOEING BV-44,BOEING VERTOL,BOEING VERTOL BV-44,BV-44,1/1/1990
115
+ 322,BRANTLEY B-2,BRANTLEY,BRANTLEY B-2,BRNTLY 2,1/1/1990
116
+ 323,FAIRCHILD FH-1100,FAIRCHILD-HILLER,FAIRCHILD-HILLER FH-1100,FH-1100,1/1/1990
117
+ 330,BOELKOW BO-105C,BOELKOW,BOELKOW BO-105C,BO-105C,1/1/1990
118
+ 350,HUGHES 300,HUGHES,HUGHES 300,HUGHS300,1/1/1990
119
+ 360,ROBINSON R44,ROBINSON,ROBINSON R44,R44,9/1/2002
120
+ 380,SIKORSKY S-51,SIKORSKY,SIKORSKY S-51,S-51,1/1/1990
121
+ 381,SIKORSKY S-55,SIKORSKY,SIKORSKY S-55,S-55,1/1/1990
122
+ 385,SIKORSKY S-58A/B/C,SIKORSKY,SIKORSKY S-58A/B/C,S-58,1/1/1990
123
+ 386,SIKORSKY S-61N,SIKORSKY,SIKORSKY S-61N,S-61N,1/1/1990
124
+ 387,SIKORSKY S-61,SIKORSKY,SIKORSKY S-61,S-61,1/1/1990
125
+ 388,SIKORSKY S-61L,SIKORSKY,SIKORSKY S-61L,S-61L,1/1/1990
126
+ 389,SIKORSKY S-62/A,SIKORSKY,SIKORSKY S-62/A,S-62/A,1/1/1990
127
+ 390,SIKORSKY S-76,SIKORSKY,SIKORSKY S-76,S-76,10/1/2002
128
+ 396,WESTLAND SR-N5,WESTLAND,WESTLAND SR-N5 (ACV),SR-N5,1/1/1990
129
+ 401,BEECH 1300,BEECHCRAFT,BEECH 1300,BE-1300,1/1/1990
130
+ 402,BEECH 18,BEECHCRAFT,BEECH MODEL 18 TURBO-PROP CONVERSIONS,BEECH 18,1/1/1990
131
+ 403,BEECH 99,BEECHCRAFT,BEECH 99 AIRLINER,BEECH 99,1/1/1990
132
+ 404,BEECH C99,BEECHCRAFT,BEECH C99,BEECHC99,1/1/1990
133
+ 405,BEECH 1900 A/B/C,BEECHCRAFT,BEECH 1900 A/B/C,BE-1900,1/1/1990
134
+ 406,BEECH 200 KINGAIR,BEECH,BEECH 200 SUPER KINGAIR,KINGAIR,1/1/1995
135
+ 407,BAE-748,BRITISH AEROSPACE,BRITISH AEROSPACE (HAWKER-SIDDELEY) BAE-748,BAE-748,1/1/1990
136
+ 408,BAE-ATP,BRITISH AEROSPACE,BRITISH AEROSPACE BAE-ATP,BAE-ATP,1/1/1990
137
+ 409,BEECH B-100,BEECHCRAFT,BEECHCRAFT KING AIR B100 ,BEECH,10/1/2002
138
+ 410,ROCKWELL 680-W/690,ROCKWELL,ROCKWELL TURBO-COMMANDER 680-W/690,COMMANDR,1/1/1990
139
+ 411,BEECHCRAFT 65-A90,BEECHCRAFT,BEECH KING AIR C-90,BEECH 65,10/1/2002
140
+ 412,CASA 212,"CONSTRUCCIONES AERONAUTICAS,SA",CASA/NURTANIO C212 AVIOCAR,CASA 212,1/1/1990
141
+ 413,CASA 235,"CONSTRUCCIONES AERONAUTICAS,SA",CASA 235,CASA 235,1/1/1990
142
+ 416,CESSNA 208,CESSNA,CESSNA 208 CARAVAN,CARAVAN,1/1/1990
143
+ 417,CESSNA 406,CESSNA,CESSNA 406 CARAVAN II,CARAVNII,1/1/2002
144
+ 418,CESSNA C-441,CESSNA,CESSNA C-441,C-441,1/1/1990
145
+ 420,CONVAIR CV-540,CONVAIR,CONVAIR CV-540,CV-540,1/1/1990,
146
+ 430,CONVAIR CV-580,CONVAIR,CONVAIR CV-580,CV-580,1/1/1990,
147
+ 435,CONVAIR CV-600,CONVAIR,CONVAIR CV-600,CV-600,1/1/1990,
148
+ 440,CONVAIR CV-640,CONVAIR,CONVAIR CV-640,CV-640,1/1/1990,
149
+ 441,ATR-42,AEROSPATIALE/AERITALIA,AEROSPATIALE/AERITALIA ATR-42,ATR-42,1/1/1990,
150
+ 442,ATR-72,AEROSPATIALE/AERITALIA,AEROSPATIALE/AERITALIA ATR-72,ATR-72,1/1/1990,
151
+ 443,ANTONOV 12,ANTONOV,ANTONOV 12,AN-12,6/1/1996,12/1/1999
152
+ 444,ANTONOV 24/26/32,ANTONOV DESIGN BUREAU,ANTONOV 24/26/32,AN-24/6,1/1/1999,
153
+ 445,CONVAIR CV-660,CONVAIR,CONVAIR CV-660,CV-660,1/1/1990,
154
+ 448,DORNIER 228,DORNIER,DORNIER 228,DO-228,1/1/1990,
155
+ 449,DORNIER 328,DORNIER,DORNIER 328,DO-328,6/1/1993,
156
+ 450,F-27,FOKKER/FAIRCHILD,FOKKER FRIENDSHIP F-27/FAIRCHILD F-27/A/B/F/J,F-27,1/1/1990,
157
+ 452,FOKKER 50,FOKKER,FOKKER 50,FOKKER50,1/1/1990,
158
+ 454,FH-227,FAIRCHILD-HILLER,FAIRCHILD-HILLER FH-227,FH-227,1/1/1990,
159
+ 455,FAIRCHILD METRO 23,FAIRCHILD ,FAIRCHILD METRO 23,METRO 23,10/1/2002,
160
+ 456,SAAB-FAIRCHD 340/B,SAAB-FAIRCHILD,SAAB-FAIRCHILD 340/B,SF-340/B,1/1/1990,
161
+ 457,BEECH KING AIR,BEECHCRAFT,BEECH KING AIR C-90,KING AIR,1/1/2001
162
+ 458,BEECH B-350,BEECHCRAFT,BEECHCRAFT SUPER KING AIR,BEECH,10/1/2002
163
+ 460,GRUMMAN G-21G,GRUMMAN,GRUMMAN G-21G (TURBO-GOOSE),GOOSE,1/1/1993
164
+ 461,EMB-120 BRASILIA,EMBRAER,EMBRAER EMB-120 BRASILIA,EMB-120,1/1/1990
165
+ 462,SWEARINGEN MERLIN,SWEARINGEN,SWEARINGEN METRO MERLIN,MERLIN,1/1/1990
166
+ 463,MITSUBISHI MU-2/B,MITSUBISHI,MITSUBISHI MU-2/B,MU-2/B,1/1/1990
167
+ 464,EMBRAER BANDEIRNTE,EMBRAER,EMBRAER EMB-110 BANDEIRANTE,EMB-110,1/1/1990
168
+ 465,NIHON YS-11,NIHON (NAMCO),NIHON YS-11,YS-11,1/1/1990
169
+ 466,SWEARINGEN METRO 2,FAIRCHILD SWEARINGEN,SWEARINGEN METRO II,METRO II,1/1/1990
170
+ 467,SWEARINGEN METRO 3,FAIRCHILD SWEARINGEN,SWEARINGEN METRO III,METROIII,1/1/1990
171
+ 468,H-P JETSTREAM,HANDLEY PAGE,HANDLEY PAGE JETSTREAM,JETSTRM,1/1/1990
172
+ 469,BAE JETSTREAM 31,BRITISH AEROSPACE,BRITISH AEROSPACE JETSTREAM 31,JETST 31,1/1/1990
173
+ 470,GULFSTREAM I,GULFSTREAM AEROSPACE (GRUMMAN),GULFSTREAM I,G-159,1/1/1990
174
+ 471,JETSTREAM 41,BRITISH AEROSPACE,BRITISH AEROSPACE JETSTREAM 41,JETST 41,3/1/1993
175
+ 473,GULFSTREAM I COMM,GULFSTREAM AEROSPACE (GRUMMAN),GULFSTREAM I-COMMANDER,G-159COM,1/1/1990
176
+ 475,NORD 262,NORD AVIATION,NORD 262,N-262,1/1/1990
177
+ 476,PIPER PA-31T,PIPER,PIPER PA-31T CHEYENNE II XL,PA-31T,10/1/2002
178
+ 477,MOHAWK 298,NORD AVIATION,MOHAWK 298,MO-298,1/1/1990
179
+ 478,PIPER T-1040,PIPER,PIPER T-1040,T-1040,1/1/1990
180
+ 479,PILATUS PC-12,PILATUS,PILATUS PC-12,PC-12,1/1/1999
181
+ 480,PILATUS PORTR PC6A,PILATUS,PILATUS TURBO PORTER PC-6A,PC-6A,1/1/1990
182
+ 481,PILATUS PORTR PC6B,PILATUS,PILATUS TURBO PORTER PC-6B,PC-6B,1/1/1990
183
+ 482,DEHAVILLAND DHC8-4,DEHAVILLAND,DEHAVILLAND DHC8-400 DASH-8,DHC8-400,1/1/1999
184
+ 483,DEHAVILLAND DHC8-1,DEHAVILLAND,DEHAVILLAND DHC8-100 DASH-8,DHC8-100,1/1/1990
185
+ 485,DEHAVILLAND DHC-6,DEHAVILLAND,DEHAVILLAND TWIN OTTER DHC-6,DHC-6,1/1/1990
186
+ 486,SHORT HARLAND SC-7,SHORT BROS. & HARLAND,SHORTS HARLAND SC-7 SKYVAN,SKYVAN,1/1/1990
187
+ 487,SHORTS 330,SHORT BROS.,SHORTS 330,SHORT330,1/1/1990
188
+ 488,CARSTEDT CJ-600A,CARSTEDT,CARSTEDT CJ-600A,CJ-600A,1/1/1990
189
+ 489,SHORTS 360,SHORT BROS.,SHORTS 360,SHORT360,1/1/1990
190
+ 490,VOLPAR TURBO 18,VOLPAR,VOLPAR TURBO 18,VOLPAR18,1/1/1990
191
+ 491,DEHAVILLAND DHC8-2,BOMBARDIER,DEHAVILLAND DHC8-200Q DASH-8,DHC8-200,11/1/2002
192
+ 507,ANTONOV 12,ANTONOV DESIGN BUREAU,ANTONOV 12,AN-12,1/1/2000
193
+ 508,ANTONOV AN-22-F,ANTONOV,ANTONOV AN-22 FREIGHTER,AN-22,1/1/1990
194
+ 510,AW-650,ARMSTRONG WHITWORTH,AW-650,AW-650,1/1/1990
195
+ 520,CANADAIR CL-44D,CANADAIR,CANADAIR CL-44D,CL-44D,1/1/1990
196
+ 541,ILYUSHIN 18,ILYUSHIN,ILYUSHIN IL-18,IL-18,1/1/1990
197
+ 550,LOCKHEED L-188A/C,LOCKHEED,LOCKHEED L-188A/C ELECTRA,ELECTRA,1/1/1990
198
+ 552,LOCKHEED L-382B,LOCKHEED,LOCKHEED L-382B,L-382B,1/1/1990
199
+ 553,LOCKHEED L100-10,LOCKHEED,LOCKHEED L100-10 HERCULES,HERCULES,1/1/1990
200
+ 555,LOCKHEED L100-20,LOCKHEED,LOCKHEED L100-20 HERCULES,HERCULES,1/1/1990
201
+ 556,LOCKHEED L100-30,LOCKHEED,LOCKHEED L100-30/L-382E,HERCULES,1/1/1990
202
+ 560,SHORTS-BELFAST-SH5,SHORT BROS.,SHORTS BELFAST FREIGHTER-SH5,SHORTS-5,1/1/1990
203
+ 570,DEHAVILLAND DASH-7,DEHAVILLAND,DEHAVILLAND DHC7 DASH-7,DHC-7,1/1/1990
204
+ 580,VV-700,VICKERS,VICKERS VISCOUNT 700/744/745/745D,VV-700,1/1/1990
205
+ 584,VV-800,VICKERS,VICKERS VISCOUNT V800/810/812,VV-800,1/1/1990
206
+ 484,CANADAIR RJ-200ER,CANADAIR,CANADAIR RJ-200ER,RJ-200ER,1/1/1997
207
+ 601,FOKKER F28-1000,FOKKER,FOKKER F28-1000 FELLOWSHIP,F28-1000,1/1/1990
208
+ 602,F28-4000/6000,FOKKER,FOKKER F28-4000/6000 FELLOWSHIP,F28-4000,1/1/1990
209
+ 603,FOKKER 100,FOKKER,FOKKER 100,FOKKR100,1/1/1990
210
+ 604,FOKKER 70,FOKKER,FOKKER 70,FOKKER70,1/1/1994
211
+ 605,BAC-111-200,BRITISH AEROSPACE,BRITISH AEROSPACE BAC-111-200,BAC-111,1/1/1990
212
+ 608,BOEING 717-200,BOEING,BOEING 717-200,B717-200,1/1/1999
213
+ 610,BAC-111-400,BRITISH AEROSPACE,BRITISH AEROSPACE BAC-111-400,BAC-111,1/1/1990
214
+ 611,AERO COMMNDER 1121,ROCKWELL,AERO COMMANDER 1121,COMMANDR,1/1/1990
215
+ 612,BOEING 737-700/LR,BOEING,BOEING 737-700/700LR,B737-700,1/1/1998
216
+ 613,AEROSPATL CORVETTE,AEROSPATIALE,AEROSPATIALE CORVETTE,CORVETTE,1/1/1990
217
+ 614,BOEING 737-800,BOEING,BOEING 737-800,B737-8,1/1/1998
218
+ 615,BOEING 737-5/600LR,BOEING,BOEING 737-5/600LR,B737-5LR,1/1/1998
219
+ 616,BOEING 737-500,BOEING,BOEING 737-500,B737-5,1/1/1990
220
+ 617,BOEING 737-400,BOEING,BOEING 737-400,B737-400,1/1/1990
221
+ 618,BOEING 737-300LR,BOEING,BOEING 737-300LR,B737-3LR,1/1/1990
222
+ 619,BOEING 737-300,BOEING,BOEING 737-300,B737-300,1/1/1990
223
+ 620,BOEING 737-100/200,BOEING,BOEING 737-100/200,B737-1/2,1/1/1990
224
+ 621,BOEING 737-200C,BOEING,BOEING 737-200C,B737-2C,1/1/1990
225
+ 622,BOEING 757-200,BOEING,BOEING 757-200,B757-200,1/1/1990
226
+ 623,BOEING 757-300,BOEING,BOEING 757-300,B757-300,1/1/1999
227
+ 624,BOEING 767-400,BOEING,BOEING 767-400,B767-400,1/1/1998
228
+ 625,BOEING 767-200/ER,BOEING,BOEING 767-200/200ER,B767-2/R,1/1/1990
229
+ 626,BOEING 767-300/ER,BOEING,BOEING 767-300/300ER,B767-3/R,1/1/1990
230
+ 627,BOEING 777,BOEING,BOEING 777,B777,1/1/1994
231
+ 628,CANADAIR RJ-100/ER,CANADAIR,CANADAIR RJ-100/RJ-100ER,RJ100/ER,3/1/1993
232
+ 629,CANADAIR RJ-200ER,CANADAIR,CANADAIR RJ-200ER,RJ-200ER,1/1/1997
233
+ 630,DOUGLAS DC-9-10,MCDONNELL DOUGLAS,MCDONNELL DOUGLAS DC-9-10,DC-9-10,1/1/1990
234
+ 631,CANADAIR RJ-700,CANADAIR,CANADAIR RJ-700,RJ-700,1/1/1990
235
+ 632,DORNIER 328,DORNIER,DORNIER 328,DO-328,1/1/2001
236
+ 633,BOEING 737-600,BOEING,BOEING 737-600,B737-6,4/1/2002
237
+ 634,BOEING 737-900,BOEING,BOEING737-900,B737-9,4/1/2002
238
+ 635,DOUGLAS DC-9-15F,MCDONNELL DOUGLAS,MCDONNELL DOUGLAS DC-9-15F,DC-9-15F,1/1/1990
239
+ 636,CESSNA CITATION II,CESSNA,CESSNA CITATION II,CESSNA,10/1/2002
240
+ 637,CESSNA CITATION V,CESSNA,CESSNA CITATION V,CESSNA,10/1/2002
241
+ 638,CANADAIR RJ900,BOMBARDIER,CANADAIR RJ900,RJ 900,3/1/2003,
242
+ 640,DOUGLAS DC-9-30,MCDONNELL DOUGLAS,MCDONNELL DOUGLAS DC-9-30,DC-9-30,1/1/1990,
243
+ 645,DOUGLAS DC-9-40,MCDONNELL DOUGLAS,MCDONNELL DOUGLAS DC-9-40,DC-9-40,1/1/1990,
244
+ 650,DOUGLAS DC-9-50,MCDONNELL DOUGLAS,MCDONNELL DOUGLAS DC-9-50,DC-9-50,1/1/1990,
245
+ 654,MD-87,MCDONNELL DOUGLAS,MCDONNELL DOUGLAS DC9 SUPER 87,MD-87,10/1/1996,
246
+ 655,"MD-80,1,2,3,8",MCDONNELL DOUGLAS,MCDONNELL DOUGLAS DC9 SUPER 80/MD81/2/3/8,MD-80,10/1/1996,
247
+ 655,"MD-80,1,2,3,7,8",MCDONNELL DOUGLAS,MCDONNELL DOUGLAS DC9 SUPER 80/MD81/2/3/7/8,MD-80,1/1/1990,9/30/1996
248
+ 656,MD-90,MCDONNELL DOUGLAS,MCDONNELL DOUGLAS MD-90,MD-90,1/1/1995,
249
+ 660,LEAR-23,GATES LEARJET,GATES LEARJET LEAR-23,LEAR-23,1/1/1990,
250
+ 661,LEAR-24,GATES LEARJET,GATES LEARJET LEAR-24,LEAR-24,1/1/1990,
251
+ 662,LEAR-25,GATES LEARJET,GATES LEARJET LEAR-25,LEAR-25,1/1/1990,
252
+ 663,LEAR-35,GATES LEARJET,GATES LEARJET LEAR-35,LEAR-35,1/1/1990,
253
+ 664,HFB 320 HANSA,,HFB 320 HANSA,HFB 320,1/1/1990,
254
+ 665,BAE HS-125,BRITISH AEROSP/HAWKER SIDDELEY,HAWKER SIDDELEY 125,HS-125,1/1/1995,
255
+ 666,LEAR 55,GATES LEARJET,LEAR 55,LEAR 55,10/1/2002,
256
+ 670,ROCKWELL SABRELINR,ROCKWELL,ROCKWELL SABRELINER,SABRELNR,1/1/1990,
257
+ 674,EMBRAER-135,EMBRAER,EMBRAER-135,EMB-135,1/1/1998,
258
+ 675,EMBRAER-145,EMBRAER,EMBRAER-145,EMB-145,1/1/1996,
259
+ 676,EMBRAER-140,EMBRAER,EMBRAER-140,EMB-140,1/1/2001,12/31/2000
260
+ 676,EMBRAER-140,EMBRAER,EMBRAER-140,EMB-140,1/1/2001,
261
+ 680,CARAVELLE SE-210,AEROSPATIALE,AEROSPATIALE CARAVELLE SE-210,CARAVLLE,1/1/1990,
262
+ 681,DASSAULT FALCON,DASSAULT-BREGUET,DASSAULT-BREGUET MYSTERE-FALCON,FALCON,1/1/1990,
263
+ 689,A300-600ST(BELUGA),AIRBUS INDUSTRIE,AIRBUS INDUSTRIE A330-600ST (BELUGA),BELUGA,7/1/1998,
264
+ 690,A300B/C/F-100/200,AIRBUS INDUSTRIE,AIRBUS INDUSTRIE A300B/C/F-100/200,A300B/C,1/1/1990,
265
+ 691,A300-600/R/CF/RCF,AIRBUS INDUSTRIE,AIRBUS INDUSTRIE A300-600/R/CF/RCF,A300-600,1/1/1990,
266
+ 692,A310-200C/F,AIRBUS INDUSTRIE,AIRBUS INDUSTRIE A310-200C/F,A310-2CF,1/1/1990,
267
+ 693,A310-300,AIRBUS INDUSTRIE,AIRBUS INDUSTRIE A310-300,A310-300,1/1/1990,
268
+ 694,A320-100/200,AIRBUS INDUSTRIE,AIRBUS INDUSTRIE A320-100/200,A320-1/2,1/1/1990,
269
+ 695,A300-B2,AIRBUS INDUSTRIE,AIRBUS INDUSTRIE A300-B2,A300-B2,1/1/1990,
270
+ 696,A330,AIRBUS INDUSTRIE,AIRBUS INDUSTRIE A330,A330,1/1/1992,
271
+ 697,A340,AIRBUS INDUSTRIE,AIRBUS INDUSTRIE A340,A340,1/1/1992,8/31/1996
272
+ 698,A319,AIRBUS INDUSTRIE,AIRBUS INDUSTRIE A319,A319,1/1/1997,
273
+ 699,A321,AIRBUS INDUSTRIE,AIRBUS INDUSTRIE A321,A321,1/1/1990
274
+ 710,BOEING 727-100,BOEING,BOEING 727-100,B727-100,1/1/1990
275
+ 711,BOEING 727-100C/QC,BOEING,BOEING 727-100C/QC,B727-1C,1/1/1990
276
+ 715,BOEING 727-200,BOEING,BOEING 727-200/231A,B727-200,1/1/1990
277
+ 730,DOUGLAS DC-10-10,MCDONNELL DOUGLAS,MCDONNELL DOUGLAS DC-10-10,DC-10-10,1/1/1990
278
+ 731,DOUGLAS DC-10-20,MCDONNELL DOUGLAS,MCDONNELL DOUGLAS DC-10-20,DC-10-20,1/1/1990
279
+ 732,DOUGLAS DC-10-30,MCDONNELL DOUGLAS,MCDONNELL DOUGLAS DC-10-30,DC-10-30,1/1/1990
280
+ 733,DOUGLAS DC-10-40,MCDONNELL DOUGLAS,MCDONNELL DOUGLAS DC-10-40,DC-10-40,1/1/1990
281
+ 735,DOUGLAS DC-10-30CF,MCDONNELL DOUGLAS,MCDONNELL DOUGLAS DC-10-30CF,DC10-30F,1/1/1990
282
+ 740,MD-11,MCDONNELL DOUGLAS,MCDONNELL DOUGLAS MD-11,MD-11,1/1/1990
283
+ 760,L-1011-1/100/200,LOCKHEED,LOCKHEED L-1011-1/100/200,L-1011,1/1/1990
284
+ 765,L-1011-500 TRISTAR,LOCKHEED,LOCKHEED L-1011-500 TRISTAR,L-1011,1/1/1990
285
+ 780,TUPOLEV 154,TUPOLEV,TUPOLEV TU-154,TU-154,1/1/1990
286
+ 792,YAKOLEV 42,YAKOLEV,YAKOLEV YAK-42,YAK-42,1/1/1990
287
+ 871,A340-300,AIRBUS INDUSTRIE,AIRBUS INDUSTRIE A340-300,A340-300,9/1/1996
288
+ 872,A340-500,AIRBUS INDUSTRIE,AIRBUS INDUSTRIE A340-500,A340-500,9/1/1996
289
+ 873,A340-200,AIRBUS INDUSTRIE,AIRBUS INDUSTRIE A340-200,A340-200,9/1/1996
290
+ 874,A340-600,AIRBUS INDUSTRIE,AIRBUS INDUSTRIE A340-600,A340-600,9/1/1996
291
+ 879,ILYUSHIN 96,ILYUSHIN,ILYUSHIN 96,IL-96,3/1/1993
292
+ 800,BOEING 707-100,BOEING,BOEING 707-100,B707-100,1/1/1990
293
+ 802,BOEING 707-100B,BOEING,BOEING 707-100B,B707-1B,1/1/1990
294
+ 804,BOEING 707-200,BOEING,BOEING 707-200,B707-200,1/1/1990
295
+ 806,BOEING 707-300,BOEING,BOEING 707-300,B707-300,1/1/1990
296
+ 808,BOEING 707-300B,BOEING,BOEING 707-300B,B707-3B,1/1/1990
297
+ 809,BOEING 707-300C,BOEING,BOEING 707-300C,B707-3C,1/1/1990
298
+ 810,BOEING 707-400,BOEING,BOEING 707-400,B707-400,1/1/1990
299
+ 812,BOEING 720,BOEING,BOEING 720-000,B720,1/1/1990
300
+ 814,BOEING 720-B,BOEING,BOEING 720-000B,B720-B,1/1/1990
301
+ 816,BOEING 747-100,BOEING,BOEING 747-100,B747-100,1/1/1990
302
+ 817,BOEING 747-200/300,BOEING,BOEING 747-200/300,B747-2/3,1/1/1990
303
+ 818,BOEING 747C,BOEING,BOEING 747C,B747C,1/1/1990
304
+ 819,BOEING 747-400,BOEING,BOEING 747-400,B747-400,1/1/1990
305
+ 820,BOEING 747F,BOEING,BOEING 747F,B747F,1/1/1990
306
+ 822,BOEING 747SP,BOEING,BOEING 747SP,B747SP,1/1/1990
307
+ 825,CONVAIR 880,CONVAIR,CONVAIR 880 (CV-22/22M),CV-880,1/1/1990
308
+ 830,CONVAIR 990,CONVAIR,CONVAIR 990 CORONADO (CV-30),CORONADO,1/1/1990
309
+ 835,AVROLINER RJ85,AVRO INTERNATIONAL AEROSPACE,AVROLINER RJ85,AV RJ85,1/1/1997
310
+ 840,DOUGLAS DC-8-10,MCDONNELL DOUGLAS,MCDONNELL DOUGLAS DC-8-10,DC-8-10,1/1/1990
311
+ 842,DOUGLAS DC-8-20,MCDONNELL DOUGLAS,MCDONNELL DOUGLAS DC-8-20,DC-8-20,1/1/1990
312
+ 844,DOUGLAS DC-8-30,MCDONNELL DOUGLAS,MCDONNELL DOUGLAS DC-8-30,DC-8-30,1/1/1990
313
+ 846,DOUGLAS DC-8-40,MCDONNELL DOUGLAS,MCDONNELL DOUGLAS DC-8-40,DC-8-40,1/1/1990
314
+ 848,DOUGLAS DC-8-50,MCDONNELL DOUGLAS,MCDONNELL DOUGLAS DC-8-50,DC-8-50,1/1/1990
315
+ 850,DOUGLAS DC-8-50F,MCDONNELL DOUGLAS,MCDONNELL DOUGLAS DC-8-50F,DC-8-50F,1/1/1990
316
+ 851,DOUGLAS DC-8-61,MCDONNELL DOUGLAS,MCDONNELL DOUGLAS DC-8-61,DC-8-61,1/1/1990
317
+ 852,DOUGLAS DC-8-63F,MCDONNELL DOUGLAS,MCDONNELL DOUGLAS DC-8-63F,DC-8-63F,1/1/1990
318
+ 854,DOUGLAS DC-8-62,MCDONNELL DOUGLAS,MCDONNELL DOUGLAS DC-8-62,DC-8-62,1/1/1990
319
+ 856,DOUGLAS DC-8-63,MCDONNELL DOUGLAS,MCDONNELL DOUGLAS DC-8-63,DC-8-63,1/1/1990
320
+ 860,DOUGLAS DC-8-71,MCDONNELL DOUGLAS,MCDONNELL DOUGLAS DC-8-71,DC-8-71,1/1/1990
321
+ 862,DOUGLAS DC-8-72,MCDONNELL DOUGLAS,MCDONNELL DOUGLAS DC-8-72,DC-8-72,1/1/1990
322
+ 864,DOUGLAS DC-8-73,MCDONNELL DOUGLAS,MCDONNELL DOUGLAS DC-8-73,DC-8-73,1/1/1990
323
+ 865,DOUGLAS DC-8-73F,MCDONNELL DOUGLAS,MCDONNELL DOUGLAS DC-8-73F,DC-8-73F,1/1/1990
324
+ 866,BAE-146-100/RJ70,BRITISH AEROSPACE,BRITISH AEROSPACE BAE-146-100/RJ70,BAE146-1,1/1/1990
325
+ 867,BAE-146-200,BRITISH AEROSPACE,BRITISH AEROSPACE BAE-146-200,BAE146-2,1/1/1990
326
+ 868,BAE-146-300,BRITISH AEROSPACE,BRITISH AEROSPACE BAE-146-300,BAE146-3,1/1/1990
327
+ 870,LOCKHEED JETSTAR,LOCKHEED,LOCKHEED JETSTAR,JETSTAR,1/1/1990
328
+ 875,CONCORDE,AEROSPATIALE/BRITISH AEROSPACE,AEROSPATIALE/BRITISH AEROSPACE CONCORDE,CONCORDE,1/1/1990
329
+ 876,ILYUSHIN 62,ILYUSHIN,ILYUSHIN 62,IL-62,1/1/1990
330
+ 877,ILYUSHIN 76/TD,ILYUSHIN,ILYUSHIN 76/TD,IL-76/TD,1/1/1990
331
+ 878,ILYUSHIN 86,ILYUSHIN,ILYUSHIN 86,IL-86,1/1/1990
332
+ 880,ANTONOV 124,ANTONOV,ANTONOV 124,AN-124,1/1/1990
333
+ 890,ANTONOV 225,ANTONOV,ANTONOV 225 (6 ENGINE),AN-224,1/1/1990
334
+ "SOURCE: Bureau of Transportation Statistics, Office of Airline Information",,,,,
@@ -0,0 +1 @@
1
+ needle,haystack,
@@ -0,0 +1,118 @@
1
+ require File.expand_path('../../../test/helper.rb', __FILE__)
2
+
3
+ require 'shoulda'
4
+
5
+ # How to iteratively develop a dictionary.
6
+
7
+ # ruby ./examples/bts_aircraft/test_bts_aircraft.rb
8
+
9
+ ####################################################
10
+ # Section 1 - constants that will get passed as arguments
11
+
12
+ # The records that your dictionary will return.
13
+ # (Example) A table of aircraft as defined by the U.S. Bureau of Transportation Statistics
14
+ HAYSTACK = RemoteTable.new :url => "file://#{File.expand_path('../number_260.csv', __FILE__)}", :select => lambda { |record| record['Aircraft Type'].to_i.between?(1, 998) and record['Manufacturer'].present? }
15
+
16
+ # A reader used to convert every record (which could be an object of any type) into a string that will be used for similarity.
17
+ # (Example) Combine the make and model into something like "boeing 747"
18
+ # Note the downcase!
19
+ HAYSTACK_READER = lambda { |record| "#{record['Manufacturer']} #{record['Long Name']}".downcase }
20
+
21
+ # Whether to even bother trying to find a match for something without an explicit block
22
+ # (Example) False, which is the default, which means we have more work to do
23
+ MUST_MATCH_BLOCKING = false
24
+
25
+ # Blockings
26
+ # (Example) We made these by trial and error
27
+ BLOCKINGS = RemoteTable.new(:url => "file://#{File.expand_path("../blockings.csv", __FILE__)}", :headers => :first_row).map { |row| row['regexp'] }
28
+
29
+ # Tighteners
30
+ # (Example) We made these by trial and error
31
+ TIGHTENERS = RemoteTable.new(:url => "file://#{File.expand_path("../tighteners.csv", __FILE__)}", :headers => :first_row).map { |row| row['regexp'] }
32
+
33
+ # Identities
34
+ # (Example) We made these by trial and error
35
+ IDENTITIES = RemoteTable.new(:url => "file://#{File.expand_path("../identities.csv", __FILE__)}", :headers => :first_row).map { |row| row['regexp'] }
36
+
37
+ ####################################################
38
+ # Section 2 - constants that are just for tests
39
+
40
+ # The class of each record.
41
+ # (Example) ActiveSupport::OrderedHash because we're using RemoteTable
42
+ HAYSTACK_RECORD_CLASS = HAYSTACK[0].class
43
+
44
+ # Some test needles to be found in the haystack.
45
+ # (Example) Aircraft starting with A, B, D, G from the FAA (really a list of ICAO aircraft)
46
+ NEEDLES = %w{ A B D G }.inject([]) do |memo, letter|
47
+ one_letter = RemoteTable.new :url => "file://#{File.expand_path("../5-2-#{letter}.htm", __FILE__)}",
48
+ :encoding => 'US-ASCII',
49
+ :row_xpath => '//table/tr[2]/td/table/tr',
50
+ :column_xpath => 'td'
51
+ memo + one_letter.to_a
52
+ end
53
+
54
+ # Positive matches that we know about.
55
+ # (Example) We just built this file in Excel and exported it to a CSV.
56
+ POSITIVES = RemoteTable.new :url => "file://#{File.expand_path("../positives.csv", __FILE__)}", :headers => :first_row
57
+
58
+ # Negative (false positive) matches that we know about.
59
+ # (Example) We just built this file in Excel and exported it to a CSV.
60
+ NEGATIVES = RemoteTable.new :url => "file://#{File.expand_path("../negatives.csv", __FILE__)}", :headers => :first_row
61
+
62
+ ####################################################
63
+ # Section 3
64
+
65
+ FINAL_OPTIONS = {
66
+ :read => HAYSTACK_READER,
67
+ :must_match_blocking => MUST_MATCH_BLOCKING,
68
+ :tighteners => TIGHTENERS,
69
+ :identities => IDENTITIES,
70
+ :blockings => BLOCKINGS
71
+ }
72
+
73
+ class TestBtsAircraft < Test::Unit::TestCase
74
+ should "understand records by using the haystack reader" do
75
+ d = FuzzyMatch.new HAYSTACK, FINAL_OPTIONS
76
+ assert d.haystack.map { |record| record.to_str }.include?('boeing boeing 707-100')
77
+ end
78
+
79
+ should "find an easy match" do
80
+ d = FuzzyMatch.new HAYSTACK, FINAL_OPTIONS
81
+ record = d.find('boeing 707(100)')
82
+ assert_equal HAYSTACK_RECORD_CLASS, record.class
83
+ assert_equal HAYSTACK_READER.call(record), 'boeing boeing 707-100'
84
+ end
85
+
86
+ POSITIVES.each do |row|
87
+ needle = row['needle']
88
+ correct_record = row['haystack']
89
+ should %{find #{correct_record.blank? ? 'nothing' : correct_record} when looking for #{needle}} do
90
+ d = FuzzyMatch.new HAYSTACK, FINAL_OPTIONS
91
+ record = d.find(needle.downcase)
92
+ assert_equal correct_record.downcase, HAYSTACK_READER.call(record)
93
+ end
94
+ end
95
+
96
+ NEGATIVES.each do |row|
97
+ needle = row['needle']
98
+ incorrect_record = row['haystack']
99
+ should %{not find #{incorrect_record} when looking for #{needle}} do
100
+ d = FuzzyMatch.new HAYSTACK, FINAL_OPTIONS
101
+ record = d.find(needle.downcase)
102
+ assert(incorrect_record.downcase != HAYSTACK_READER.call(record))
103
+ end
104
+ end
105
+ end
106
+
107
+ # Whenever I saw a failure like this...
108
+ # 1) Failure:
109
+ # test: BtsAircraft should find AIRBUS INDUSTRIE AIRBUS INDUSTRIE A340-300 when looking for AIRBUS A340300. (TestBtsAircraft)
110
+ # [examples/bts_aircraft/test_bts_aircraft.rb:96:in `__bind_1302579566_46630'
111
+ # /Users/seamus/.rvm/gems/ruby-1.8.7-p334/gems/shoulda-2.11.3/lib/shoulda/context.rb:382:in `call'
112
+ # /Users/seamus/.rvm/gems/ruby-1.8.7-p334/gems/shoulda-2.11.3/lib/shoulda/context.rb:382:in `test: BtsAircraft should find AIRBUS INDUSTRIE AIRBUS INDUSTRIE A340-300 when looking for AIRBUS A340300. ']:
113
+ # <"airbus industrie airbus industrie a340-300"> expected but was
114
+ # <"airbus industrie airbus industrie a340">.
115
+
116
+ # ...I would look at it like this
117
+ d = FuzzyMatch.new HAYSTACK, FINAL_OPTIONS
118
+ puts d.explain('AIRBUS A340300.'.downcase)
@@ -0,0 +1 @@
1
+ regexp,notes
@@ -0,0 +1,15 @@
1
+ #!/usr/bin/env ruby
2
+ unless RUBY_VERSION >= '1.9'
3
+ require 'rubygems'
4
+ end
5
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
6
+ require 'fuzzy_match'
7
+
8
+ haystack = [ 'seamus', 'andy', 'ben' ]
9
+ needles = [ 'Mr. Seamus', 'Sr. Andy', 'Master BenT', 'Shamus Heaney' ]
10
+
11
+ d = FuzzyMatch.new haystack
12
+ needles.each do |needle|
13
+ d.explain needle
14
+ puts
15
+ end
Binary file
@@ -0,0 +1,32 @@
1
+ # -*- encoding: utf-8 -*-
2
+ $:.push File.expand_path("../lib", __FILE__)
3
+ require "fuzzy_match/version"
4
+
5
+ Gem::Specification.new do |s|
6
+ s.name = "fuzzy_match"
7
+ s.version = FuzzyMatch::VERSION
8
+ s.platform = Gem::Platform::RUBY
9
+ s.authors = ["Seamus Abshere"]
10
+ s.email = ["seamus@abshere.net"]
11
+ s.homepage = "https://github.com/seamusabshere/fuzzy_match"
12
+ s.summary = %Q{Find a needle in a haystack using string similarity and (optionally) regexp rules. Replaces loose_tight_dictionary.}
13
+ s.description = %Q{Find a needle in a haystack using string similarity and (optionally) regexp rules. Replaces loose_tight_dictionary.}
14
+
15
+ s.rubyforge_project = "fuzzy_match"
16
+
17
+ s.files = `git ls-files`.split("\n")
18
+ s.test_files = `git ls-files -- {test,spec,ffuzzy_matchures}/*`.split("\n")
19
+ s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
20
+ s.require_paths = ["lib"]
21
+
22
+ s.add_development_dependency "shoulda"
23
+ s.add_development_dependency "remote_table"
24
+ s.add_development_dependency 'activerecord', '>=3'
25
+ s.add_development_dependency 'mysql'
26
+ s.add_development_dependency 'cohort_scope'
27
+ s.add_development_dependency 'weighted_average'
28
+ s.add_development_dependency 'rake'
29
+ # s.add_development_dependency 'amatch'
30
+ s.add_runtime_dependency 'activesupport', '>=3'
31
+ s.add_runtime_dependency 'to_regexp', '>=0.0.3'
32
+ end
@@ -0,0 +1,36 @@
1
+ class FuzzyMatch
2
+ # "Record linkage typically involves two main steps: blocking and scoring..."
3
+ # http://en.wikipedia.org/wiki/Record_linkage
4
+ #
5
+ # Blockings effectively divide up the haystack into groups that match a pattern
6
+ #
7
+ # A blocking (as in a grouping) comes into effect when a str matches.
8
+ # Then the needle must also match the blocking's regexp.
9
+ class Blocking
10
+ attr_reader :regexp
11
+
12
+ def initialize(regexp_or_str)
13
+ @regexp = regexp_or_str.to_regexp
14
+ end
15
+
16
+ def match?(str)
17
+ !!(regexp.match(str))
18
+ end
19
+
20
+ # If a blocking "joins" two strings, that means they both fit into it.
21
+ #
22
+ # Returns false if they certainly don't fit this blocking.
23
+ # Returns nil if the blocking doesn't apply, i.e. str2 doesn't fit the blocking.
24
+ def join?(str1, str2)
25
+ if str2_match_data = regexp.match(str2)
26
+ if str1_match_data = regexp.match(str1)
27
+ str2_match_data.captures == str1_match_data.captures
28
+ else
29
+ false
30
+ end
31
+ else
32
+ nil
33
+ end
34
+ end
35
+ end
36
+ end
@@ -0,0 +1,74 @@
1
+ class FuzzyMatch
2
+ class CachedResult < ::ActiveRecord::Base
3
+ set_table_name :fuzzy_match_cached_results
4
+
5
+ def self.create_table
6
+ connection.create_table :fuzzy_match_cached_results do |t|
7
+ t.string :a_class
8
+ t.string :a
9
+ t.string :b_class
10
+ t.string :b
11
+ end
12
+ connection.add_index :fuzzy_match_cached_results, [:a_class, :b_class, :a], :name => 'aba'
13
+ connection.add_index :fuzzy_match_cached_results, [:a_class, :b_class, :b], :name => 'abb'
14
+ connection.add_index :fuzzy_match_cached_results, [:a_class, :b_class, :a, :b], :name => 'abab'
15
+ reset_column_information
16
+ end
17
+
18
+ def self.setup(from_scratch = false)
19
+ connection.drop_table :fuzzy_match_cached_results if from_scratch and table_exists?
20
+ create_table unless table_exists?
21
+ end
22
+
23
+ module ActiveRecordBaseExtension
24
+ # required options:
25
+ # :primary_key - what to call on this class
26
+ # :foreign_key - what to call on the other class
27
+ def cache_fuzzy_match_matches_with(other_active_record_class, options)
28
+ other = other_active_record_class.to_s.singularize.camelcase
29
+ me = name
30
+ if me < other
31
+ a = me
32
+ b = other
33
+ primary_key = :a
34
+ foreign_key = :b
35
+ else
36
+ a = other
37
+ b = me
38
+ primary_key = :b
39
+ foreign_key = :a
40
+ end
41
+
42
+ # def aircraft
43
+ define_method other.underscore.pluralize do
44
+ other.constantize.where options[:foreign_key] => send("#{other.underscore.pluralize}_foreign_keys")
45
+ end
46
+
47
+ # def flight_segments_foreign_keys
48
+ define_method "#{other.underscore.pluralize}_foreign_keys" do
49
+ fz = ::FuzzyMatch::CachedResult.arel_table
50
+ sql = fz.project(fz[foreign_key]).where(fz["#{primary_key}_class".to_sym].eq(self.class.name).and(fz["#{foreign_key}_class".to_sym].eq(other)).and(fz[primary_key].eq(send(options[:primary_key])))).to_sql
51
+ connection.select_values sql
52
+ end
53
+
54
+ # def cache_aircraft!
55
+ define_method "cache_#{other.underscore.pluralize}!" do
56
+ other_class = other.constantize
57
+ primary_key_value = send options[:primary_key]
58
+ other_class.fuzzy_match.find_all(primary_key_value).each do |other_instance|
59
+ attrs = {}
60
+ attrs[primary_key] = primary_key_value
61
+ attrs["#{primary_key}_class"] = self.class.name
62
+ attrs[foreign_key] = other_instance.send options[:foreign_key]
63
+ attrs["#{foreign_key}_class"] = other
64
+ unless ::FuzzyMatch::CachedResult.exists? attrs
65
+ ::FuzzyMatch::CachedResult.create! attrs
66
+ end
67
+ end
68
+ end
69
+ end
70
+ end
71
+ end
72
+ end
73
+
74
+ ::ActiveRecord::Base.extend ::FuzzyMatch::CachedResult::ActiveRecordBaseExtension