fuzzy_match 1.5.0 → 2.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (55) hide show
  1. checksums.yaml +8 -8
  2. data/.rspec +2 -0
  3. data/CHANGELOG +14 -0
  4. data/Gemfile +8 -0
  5. data/README.markdown +58 -38
  6. data/Rakefile +0 -9
  7. data/bin/fuzzy_match +106 -0
  8. data/fuzzy_match.gemspec +4 -4
  9. data/groupings-screenshot.png +0 -0
  10. data/highlevel.graffle +0 -0
  11. data/highlevel.png +0 -0
  12. data/lib/fuzzy_match/record.rb +58 -0
  13. data/lib/fuzzy_match/result.rb +11 -8
  14. data/lib/fuzzy_match/rule/grouping.rb +70 -12
  15. data/lib/fuzzy_match/rule/identity.rb +3 -3
  16. data/lib/fuzzy_match/rule.rb +1 -1
  17. data/lib/fuzzy_match/score/amatch.rb +0 -4
  18. data/lib/fuzzy_match/score/pure_ruby.rb +2 -8
  19. data/lib/fuzzy_match/score.rb +4 -0
  20. data/lib/fuzzy_match/similarity.rb +10 -32
  21. data/lib/fuzzy_match/version.rb +1 -1
  22. data/lib/fuzzy_match.rb +78 -94
  23. data/{test/test_amatch.rb → spec/amatch_spec.rb} +1 -2
  24. data/{test/test_cache.rb → spec/cache_spec.rb} +7 -7
  25. data/spec/foo.rb +9 -0
  26. data/spec/fuzzy_match_spec.rb +354 -0
  27. data/spec/grouping_spec.rb +60 -0
  28. data/spec/identity_spec.rb +29 -0
  29. data/{test/test_wrapper.rb → spec/record_spec.rb} +3 -7
  30. data/spec/spec_helper.rb +21 -0
  31. metadata +56 -50
  32. data/bin/fuzzy_match_checker +0 -71
  33. data/examples/bts_aircraft/5-2-A.htm +0 -10305
  34. data/examples/bts_aircraft/5-2-B.htm +0 -9576
  35. data/examples/bts_aircraft/5-2-D.htm +0 -7094
  36. data/examples/bts_aircraft/5-2-E.htm +0 -2349
  37. data/examples/bts_aircraft/5-2-G.htm +0 -2922
  38. data/examples/bts_aircraft/groupings.csv +0 -1
  39. data/examples/bts_aircraft/identities.csv +0 -1
  40. data/examples/bts_aircraft/negatives.csv +0 -1
  41. data/examples/bts_aircraft/normalizers.csv +0 -1
  42. data/examples/bts_aircraft/number_260.csv +0 -334
  43. data/examples/bts_aircraft/positives.csv +0 -1
  44. data/examples/bts_aircraft/test_bts_aircraft.rb +0 -116
  45. data/examples/first_name_matching.rb +0 -15
  46. data/examples/icao-bts.xls +0 -0
  47. data/lib/fuzzy_match/rule/normalizer.rb +0 -20
  48. data/lib/fuzzy_match/rule/stop_word.rb +0 -11
  49. data/lib/fuzzy_match/wrapper.rb +0 -73
  50. data/test/helper.rb +0 -12
  51. data/test/test_fuzzy_match.rb +0 -304
  52. data/test/test_fuzzy_match_convoluted.rb.disabled +0 -268
  53. data/test/test_grouping.rb +0 -28
  54. data/test/test_identity.rb +0 -34
  55. data/test/test_normalizer.rb +0 -10
@@ -1 +0,0 @@
1
- regexp,notes,more notes,
@@ -1 +0,0 @@
1
- regexp,notes
@@ -1 +0,0 @@
1
- needle,haystack,
@@ -1 +0,0 @@
1
- regexp,notes
@@ -1,334 +0,0 @@
1
- Aircraft Type,Aircraft Name,Manufacturer,Long Name,Short Name,Begin Date,End Date
2
- 7,AERO COMMANDER 200,ROCKWELL,AERO COMMANDER 200,COMMANDR,1/1/1990,
3
- 8,AERO MACCHI AL-60,AERO MACCHI,AERO MACCHI AL-60,AL-60,1/1/1990,
4
- 9,AERONCA 7-AC,AERONCA,AERONCA 7-AC,7-AC,1/1/1990,
5
- 10,BEECH D-35,BEECHCRAFT,BEECH BONANZA D-35A/C/D/E/G/H/J/K/S/V,BONANZA,1/1/1990,
6
- 20,BELLANCA CH-300,BELLANCA,BELLANCA CH-300,CH-300,1/1/1990,
7
- 24,BEECH B-23,BEECHCRAFT,BEECH B-23 MUSKETEER,MUSKTEER,1/1/1990,
8
- 29,CESSNA 150/152,CESSNA,CESSNA 150/152,CSS150/2,1/1/1990,
9
- 30,CESSNA 180,CESSNA,CESSNA 180,CSSNA180,1/1/1990,
10
- 31,CESSNA 180A/B,CESSNA,CESSNA 180A/B,CSNA180A,1/1/1990,
11
- 32,CESSNA 180C/D/E/F,CESSNA,CESSNA 180C/D/E/F,CSNA180C,1/1/1990,
12
- 33,CESSNA 185A/B/C,CESSNA,CESSNA 185A/B/C SKYWAGON,SKYWAGON,1/1/1990,
13
- 34,HELIO H-250,HELIO,HELIO H-250,H-250,1/1/1990,
14
- 35,CESSNA 206/207/209,CESSNA,CESSNA C206/207/209/210 STATIONAIR,STATIONR,1/1/1990,
15
- 36,CESSNA 172 SKYHAWK,CESSNA,CESSNA 172 SKYHAWK,SKYHAWK,1/1/1990,
16
- 37,CESSNA 195,CESSNA,CESSNA 195,CSSNA195,1/1/1990,
17
- 38,CESSNA CARDINAL,CESSNA,CESSNA 177 CARDINAL,CARDINAL,1/1/1990
18
- 39,CESSNA 182 SKYLANE,CESSNA,CESSNA 182 SKYLANE,SKYLANE,1/1/1990
19
- 40,DEHAVILLAND DHC2,DEHAVILLAND OF CANADA,DEHAVILLAND DHC2 BEAVER,BEAVER,1/1/1990
20
- 41,CESSNA 205,CESSNA,CESSNA 205,CSSNA205,1/1/1990
21
- 42,DEHAVILLAND DHC3,DEHAVILLAND OF CANADA,DEHAVILLAND DHC3 OTTER,OTTER,1/1/1990
22
- 44,LAKE LA-4,LAKE,LAKE LA-4,LA-4,1/1/1990
23
- 50,HOWARD DGA-15P,HOWARD,HOWARD DGA-15P,DGA-15P,1/1/1990
24
- 51,MOONEY M-21,MOONEY,MOONEY M-21,M-21,1/1/1990
25
- 52,MOONEY M-20C/E/G,MOONEY,MOONEY M-20C/E/G,M-20C/E,1/1/1990
26
- 65,NOORDUYN UC-64AS,NOORDUYN,NOORDUYN UC-64AS,UC-64AS,1/1/1990
27
- 70,PILATUS PORTER PC6,PILATUS,PILATUS PORTER PC6,PC6,1/1/1990
28
- 71,PILATUS PC6/350,PILATUS,PILATUS PORTER PC6/350,PC6/350,1/1/1990
29
- 79,PIPER PA-32,PIPER,PIPER PA-32 (CHEROKEE 6),CHEROKEE,1/1/1990
30
- 80,PIPER PA-18,PIPER,PIPER PA-18 (SUPER-CUB),SUPERCUB,1/1/1990
31
- 81,PIPER PA-14,PIPER,PIPER PA-14 (FAMILY-CRUISER),FAMCRUIS,1/1/1990
32
- 82,PIPER PA-22,PIPER,PIPER PA-22 (TRI-PACER),TRIPACER,1/1/1990
33
- 83,PIPER PA-24,PIPER,PIPER PA-24 (COMANCHE),COMANCHE,1/1/1990
34
- 84,PIPER PA-28,PIPER,PIPER PA-28 (CHEROKEE),CHEROKEE,1/1/1990
35
- 85,STINSON SR-9,STINSON,STINSON SR-9,SR-9,1/1/1990
36
- 86,PIPER PA-12,PIPER,PIPER PA-12 (SUPERCRUISER),,1/1/2002
37
- 87,STINSON V-77,STINSON,STINSON V-77,V-77,1/1/1990
38
- 88,STINSON SR-10E,STINSON,STINSON SR-10E BUSHMAN,BUSHMAN,1/1/1990
39
- 91,FLT/AMPHIB TURBINE,,FLOAT/AMPHIB TURBINE,FLT/AMPH,1/1/1990
40
- 92,PISTON-LT 450 HP,,FLOAT/AMPHIB PISTON-LT 450 HP,FLT/AMPH,1/1/1990
41
- 93,PISTON-450+ HP,,FLOAT/AMPHIB PISTON-450+ HP,FLT/AMPH,1/1/1990
42
- 94,LAND-TURBINE,,LAND-TURBINE,LAND,1/1/1990
43
- 95,PISTON-LT 450 HP,,LAND-PISTON-LT 450 HP,LAND,1/1/1990
44
- 96,PISTON-450+ HP,,LAND-PISTON-450+ HP,LAND,1/1/1990
45
- 97,STIN.FW 300-450 HP,STINSON,STINSON F.W. 300-450 HP,STNSN FW,1/1/1950
46
- 98,STIN.FW LT 300 HP,STINSON,STINSON 0-299 HP,STNSN FW,1/1/1950
47
- 103,AERO COMANDR 500/6,ROCKWELL,AERO COMMANDER (500/600 SERIES EXCPT 680FL),COMMANDR,1/1/1990
48
- 104,AERO COMANDR 680FL,ROCKWELL,GRAND COMMANDER 680FL,COMMANDR,1/1/1990
49
- 105,BEECH C-50,BEECHCRAFT,BEECH C-50 (TWIN BONANZA),TWBONANZ,1/1/1990
50
- 110,BEECH 18,BEECHCRAFT,BEECH 18 C-185,BEECH 18,1/1/1990
51
- 111,BEECH KINGAIR C-90,BEECHCRAFT,BEECH KING AIR C-90,KING AIR,1/1/1990
52
- 113,BEECH B-95,BEECHCRAFT,BEECH B-95 (TRAVELAIR),TRAVLAIR,1/1/1990
53
- 115,BEECH AT-11,BEECHCRAFT,BEECH AT-11,AT-11,1/1/1990
54
- 117,BEECH BARON B-55,BEECHCRAFT,BEECH BARON (55 SERIES),BARON,1/1/1990
55
- 120,CESSNA T-50,CESSNA,CESSNA T-50 (BOBCAT),BOBCAT,1/1/1990
56
- 121,CESSNA C-421,CESSNA,CESSNA C-421,C-421,1/1/1990
57
- 122,CESSNA C-310,CESSNA,CESSNA C-310 SERIES,C-310,1/1/1990
58
- 125,CESSNA C-402/402A,CESSNA,CESSNA C-402/402A,C-402/A,1/1/1990
59
- 128,CESSNA 404,CESSNA,CESSNA 404,C-404,1/1/1990
60
- 130,C-28 5ACF PBY,CONSOLIDATED VULTEE,C-28 5ACF PBY,PBY,1/1/1990
61
- 131,BRITT-NORMAN BN2/A,PILATUS BRITTEN-NORMAN,PILATUS BRITTEN-NORMAN BN2/A ISLANDER,ISLANDER,1/1/1990
62
- 132,C-28 5ACF-PBY EMQ,CONSOLIDATED VULTEE,C-28 5ACF-PBY-EMQ,PBY-EMQ,1/1/1990
63
- 133,BEECH QUEEN AIR,BEECHCRAFT,BEECH 65/65A-80/65B-80 (QUEEN AIR),QUEENAIR,1/1/1990
64
- 140,CONVAIR CV-240,CONVAIR,CONVAIR CV-240,CV-240,1/1/1990
65
- 143,CONVAIR CV-340/440,CONVAIR,CONVAIR CV-340/440,C340/440,1/1/1990
66
- 148,CESSNA C-337,CESSNA,CESSNA C-337 (SUPER SKY MASTER),SKYMASTR,1/1/1990
67
- 149,CESSNA C-401,CESSNA,CESSNA C-401,C-401,1/1/1990
68
- 150,CURTISS C46 SERIES,CURTISS-WRIGHT,CURTISS C46/20T/A/D/F/R COMMANDO,COMMANDO,1/1/1990
69
- 152,CESSNA C-411,CESSNA,CESSNA C-411,C-411,1/1/1990
70
- 153,DHC4 CARIBOU,DEHAVILLAND OF CANADA,DEHAVILLAND DHC4 CARIBOU,CARIBOU,1/1/1990
71
- 158,DOUGLAS DC-2,MCDONNELL DOUGLAS,MCDONNELL DOUGLAS DC-2,DC-2,1/1/1990
72
- 159,HAMILTON B-18S,HAMILTON,HAMILTON B-18S LITTLE LINER (CONVERTED),B-18S,1/1/1990
73
- 160,DOUGLAS DC-3,MCDONNELL DOUGLAS,"MCDONNELL DOUGLAS DC-3/A/C,C-47/B",DC-3,1/1/1990
74
- 161,DORNIER DO-28,DORNIER,DORNIER DO-28 SKYSERVANT,DO-28,1/1/1990
75
- 166,CESSNA C-336,CESSNA,CESSNA C-336,C-336,1/1/1990
76
- 167,FAIRCHILD C-82A,FAIRCHILD,FAIRCHILD C-82A,C-82A,1/1/1990
77
- 170,GRUMMAN G-21A,GRUMMAN,GRUMMAN G-21A (GOOSE),GOOSE,1/1/1990
78
- 171,GRUMMAN SA-16A-GR,GRUMMAN,GRUMMAN SA-16A-GR (ALBATROSS),ALBATRSS,1/1/1990
79
- 172,GRUMMAN G-44/44A,GRUMMAN,GRUMMAN G-44/44A (WIDGEON),WIDGEON,1/1/1990
80
- 173,GRUMMAN G-73,GRUMMAN,GRUMMAN G-73 (MALLARD),MALLARD,1/1/1990
81
- 174,DOVE DH-104,DEHAVILLAND OF CANADA,DEHAVILLAND DOVE DH-104,DOVE,1/1/1990,
82
- 175,LOCKHEED L-12A,LOCKHEED,LOCKHEED L-12A/L-10/10A,L-12A,1/1/1990,
83
- 180,MARTIN 202/202A,MARTIN,MARTIN 202/202A,M-202,1/1/1990,
84
- 185,MARTIN 404,MARTIN,MARTIN 404,M-404,1/1/1990,
85
- 190,PIPER PA-23,PIPER,PIPER PA-23-250 (AZTEC/APACHE),AZTEC,1/1/1990,
86
- 193,PIPER T-1020,PIPER,PIPER T-1020,T-1020,1/1/1990,10/31/1996
87
- 194,PIPER PA-31,PIPER,PIPER PA-31 (NAVAJO),NAVAJO,1/1/1990,10/31/1996
88
- 194,PIPER PA-31/T-1020,PIPER,PIPER PA-31 (NAVAJO)/T-1020,NAVAJO,11/1/1996,
89
- 195,PIPER PA-34/39,PIPER,PIPER PA-34/39 (TWIN COMMANCHE),TWNCOMAN,1/1/1990,
90
- 200,BOEING 377,BOEING,BOEING 377 STRATOCRUISER,B-377,1/1/1990,
91
- 201,BN2A TRISLANDER,PILATUS BRITTEN-NORMAN,PILATUS BRITTEN-NORMAN BN2A TRISLANDER,TRISLNDR,1/1/1990,
92
- 205,DH-114 HERON,DEHAVILLAND OF CANADA,DEHAVILLAND DH-114 HERON,HERON,1/1/1990,
93
- 210,DOUGLAS DC-4,MCDONNELL DOUGLAS,MCDONNELL DOUGLAS DC-4(C54/C54A/C54B/C54E),DC-4,1/1/1990,
94
- 216,DOUGLAS DC-6,MCDONNELL DOUGLAS,MCDONNELL DOUGLAS DC-6,DC-6,1/1/1990,
95
- 218,DOUGLAS DC-6A,MCDONNELL DOUGLAS,MCDONNELL DOUGLAS DC-6A,DC-6A,1/1/1990,
96
- 220,DOUGLAS DC-6B,MCDONNELL DOUGLAS,MCDONNELL DOUGLAS DC-6B,DC-6B,1/1/1990,
97
- 225,DOUGLAS DC-7A/B,MCDONNELL DOUGLAS,MCDONNELL DOUGLAS DC-7A/B,DC-7A/B,1/1/1990
98
- 228,DOUGLAS DC-7C,MCDONNELL DOUGLAS,MCDONNELL DOUGLAS DC-7C,DC-7C,1/1/1990
99
- 240,LOCKHEED L-049,LOCKHEED,LOCKHEED L-049,L-049,1/1/1990
100
- 242,LOCKHEED L-649,LOCKHEED,LOCKHEED L-649,L-649,1/1/1990
101
- 244,LOCKHEED L-749/A,LOCKHEED,LOCKHEED L-749/749A,L-749/A,1/1/1990
102
- 247,LOCKHEED L-1049,LOCKHEED,LOCKHEED L-1049,L-1049,1/1/1990
103
- 248,LOCKHEED 1049C/D/E,LOCKHEED,LOCKHEED L-1049C/D/E,L-1049C,1/1/1990
104
- 252,LOCKHEED L-1049G/H,LOCKHEED,LOCKHEED L-1049G/H,L-1049G,1/1/1990
105
- 258,LOCKHEED L-1649A,LOCKHEED,LOCKHEED L-1649A,L-1649A,1/1/1990
106
- 280,SIKORSKY VS-44A,SIKORSKY,SIKORSKY VS-44A (AMPHIBIAN),VS-44A,1/1/1990
107
- 303,SUD ALOUETTE,SUD AVIATION,SUD ALOUETTE,ALOUETTE,1/1/1990
108
- 311,BELL B-47D,BELL,BELL B-47D,B-47D,1/1/1990
109
- 312,BELL B-47G,BELL,BELL B-47G,B-47G,1/1/1990
110
- 313,BELL B-47G2,BELL,BELL B-47G2,B-47G2,1/1/1990
111
- 314,BELL B-47J2,BELL,BELL B-47J2,B-47J2,1/1/1990
112
- 315,BELL B-206A,BELL,BELL B-206A,B-206A,1/1/1990
113
- 320,BOEING BV-107,BOEING VERTOL,BOEING VERTOL BV-107,BV-107,1/1/1990
114
- 321,BOEING BV-44,BOEING VERTOL,BOEING VERTOL BV-44,BV-44,1/1/1990
115
- 322,BRANTLEY B-2,BRANTLEY,BRANTLEY B-2,BRNTLY 2,1/1/1990
116
- 323,FAIRCHILD FH-1100,FAIRCHILD-HILLER,FAIRCHILD-HILLER FH-1100,FH-1100,1/1/1990
117
- 330,BOELKOW BO-105C,BOELKOW,BOELKOW BO-105C,BO-105C,1/1/1990
118
- 350,HUGHES 300,HUGHES,HUGHES 300,HUGHS300,1/1/1990
119
- 360,ROBINSON R44,ROBINSON,ROBINSON R44,R44,9/1/2002
120
- 380,SIKORSKY S-51,SIKORSKY,SIKORSKY S-51,S-51,1/1/1990
121
- 381,SIKORSKY S-55,SIKORSKY,SIKORSKY S-55,S-55,1/1/1990
122
- 385,SIKORSKY S-58A/B/C,SIKORSKY,SIKORSKY S-58A/B/C,S-58,1/1/1990
123
- 386,SIKORSKY S-61N,SIKORSKY,SIKORSKY S-61N,S-61N,1/1/1990
124
- 387,SIKORSKY S-61,SIKORSKY,SIKORSKY S-61,S-61,1/1/1990
125
- 388,SIKORSKY S-61L,SIKORSKY,SIKORSKY S-61L,S-61L,1/1/1990
126
- 389,SIKORSKY S-62/A,SIKORSKY,SIKORSKY S-62/A,S-62/A,1/1/1990
127
- 390,SIKORSKY S-76,SIKORSKY,SIKORSKY S-76,S-76,10/1/2002
128
- 396,WESTLAND SR-N5,WESTLAND,WESTLAND SR-N5 (ACV),SR-N5,1/1/1990
129
- 401,BEECH 1300,BEECHCRAFT,BEECH 1300,BE-1300,1/1/1990
130
- 402,BEECH 18,BEECHCRAFT,BEECH MODEL 18 TURBO-PROP CONVERSIONS,BEECH 18,1/1/1990
131
- 403,BEECH 99,BEECHCRAFT,BEECH 99 AIRLINER,BEECH 99,1/1/1990
132
- 404,BEECH C99,BEECHCRAFT,BEECH C99,BEECHC99,1/1/1990
133
- 405,BEECH 1900 A/B/C,BEECHCRAFT,BEECH 1900 A/B/C,BE-1900,1/1/1990
134
- 406,BEECH 200 KINGAIR,BEECH,BEECH 200 SUPER KINGAIR,KINGAIR,1/1/1995
135
- 407,BAE-748,BRITISH AEROSPACE,BRITISH AEROSPACE (HAWKER-SIDDELEY) BAE-748,BAE-748,1/1/1990
136
- 408,BAE-ATP,BRITISH AEROSPACE,BRITISH AEROSPACE BAE-ATP,BAE-ATP,1/1/1990
137
- 409,BEECH B-100,BEECHCRAFT,BEECHCRAFT KING AIR B100 ,BEECH,10/1/2002
138
- 410,ROCKWELL 680-W/690,ROCKWELL,ROCKWELL TURBO-COMMANDER 680-W/690,COMMANDR,1/1/1990
139
- 411,BEECHCRAFT 65-A90,BEECHCRAFT,BEECH KING AIR C-90,BEECH 65,10/1/2002
140
- 412,CASA 212,"CONSTRUCCIONES AERONAUTICAS,SA",CASA/NURTANIO C212 AVIOCAR,CASA 212,1/1/1990
141
- 413,CASA 235,"CONSTRUCCIONES AERONAUTICAS,SA",CASA 235,CASA 235,1/1/1990
142
- 416,CESSNA 208,CESSNA,CESSNA 208 CARAVAN,CARAVAN,1/1/1990
143
- 417,CESSNA 406,CESSNA,CESSNA 406 CARAVAN II,CARAVNII,1/1/2002
144
- 418,CESSNA C-441,CESSNA,CESSNA C-441,C-441,1/1/1990
145
- 420,CONVAIR CV-540,CONVAIR,CONVAIR CV-540,CV-540,1/1/1990,
146
- 430,CONVAIR CV-580,CONVAIR,CONVAIR CV-580,CV-580,1/1/1990,
147
- 435,CONVAIR CV-600,CONVAIR,CONVAIR CV-600,CV-600,1/1/1990,
148
- 440,CONVAIR CV-640,CONVAIR,CONVAIR CV-640,CV-640,1/1/1990,
149
- 441,ATR-42,AEROSPATIALE/AERITALIA,AEROSPATIALE/AERITALIA ATR-42,ATR-42,1/1/1990,
150
- 442,ATR-72,AEROSPATIALE/AERITALIA,AEROSPATIALE/AERITALIA ATR-72,ATR-72,1/1/1990,
151
- 443,ANTONOV 12,ANTONOV,ANTONOV 12,AN-12,6/1/1996,12/1/1999
152
- 444,ANTONOV 24/26/32,ANTONOV DESIGN BUREAU,ANTONOV 24/26/32,AN-24/6,1/1/1999,
153
- 445,CONVAIR CV-660,CONVAIR,CONVAIR CV-660,CV-660,1/1/1990,
154
- 448,DORNIER 228,DORNIER,DORNIER 228,DO-228,1/1/1990,
155
- 449,DORNIER 328,DORNIER,DORNIER 328,DO-328,6/1/1993,
156
- 450,F-27,FOKKER/FAIRCHILD,FOKKER FRIENDSHIP F-27/FAIRCHILD F-27/A/B/F/J,F-27,1/1/1990,
157
- 452,FOKKER 50,FOKKER,FOKKER 50,FOKKER50,1/1/1990,
158
- 454,FH-227,FAIRCHILD-HILLER,FAIRCHILD-HILLER FH-227,FH-227,1/1/1990,
159
- 455,FAIRCHILD METRO 23,FAIRCHILD ,FAIRCHILD METRO 23,METRO 23,10/1/2002,
160
- 456,SAAB-FAIRCHD 340/B,SAAB-FAIRCHILD,SAAB-FAIRCHILD 340/B,SF-340/B,1/1/1990,
161
- 457,BEECH KING AIR,BEECHCRAFT,BEECH KING AIR C-90,KING AIR,1/1/2001
162
- 458,BEECH B-350,BEECHCRAFT,BEECHCRAFT SUPER KING AIR,BEECH,10/1/2002
163
- 460,GRUMMAN G-21G,GRUMMAN,GRUMMAN G-21G (TURBO-GOOSE),GOOSE,1/1/1993
164
- 461,EMB-120 BRASILIA,EMBRAER,EMBRAER EMB-120 BRASILIA,EMB-120,1/1/1990
165
- 462,SWEARINGEN MERLIN,SWEARINGEN,SWEARINGEN METRO MERLIN,MERLIN,1/1/1990
166
- 463,MITSUBISHI MU-2/B,MITSUBISHI,MITSUBISHI MU-2/B,MU-2/B,1/1/1990
167
- 464,EMBRAER BANDEIRNTE,EMBRAER,EMBRAER EMB-110 BANDEIRANTE,EMB-110,1/1/1990
168
- 465,NIHON YS-11,NIHON (NAMCO),NIHON YS-11,YS-11,1/1/1990
169
- 466,SWEARINGEN METRO 2,FAIRCHILD SWEARINGEN,SWEARINGEN METRO II,METRO II,1/1/1990
170
- 467,SWEARINGEN METRO 3,FAIRCHILD SWEARINGEN,SWEARINGEN METRO III,METROIII,1/1/1990
171
- 468,H-P JETSTREAM,HANDLEY PAGE,HANDLEY PAGE JETSTREAM,JETSTRM,1/1/1990
172
- 469,BAE JETSTREAM 31,BRITISH AEROSPACE,BRITISH AEROSPACE JETSTREAM 31,JETST 31,1/1/1990
173
- 470,GULFSTREAM I,GULFSTREAM AEROSPACE (GRUMMAN),GULFSTREAM I,G-159,1/1/1990
174
- 471,JETSTREAM 41,BRITISH AEROSPACE,BRITISH AEROSPACE JETSTREAM 41,JETST 41,3/1/1993
175
- 473,GULFSTREAM I COMM,GULFSTREAM AEROSPACE (GRUMMAN),GULFSTREAM I-COMMANDER,G-159COM,1/1/1990
176
- 475,NORD 262,NORD AVIATION,NORD 262,N-262,1/1/1990
177
- 476,PIPER PA-31T,PIPER,PIPER PA-31T CHEYENNE II XL,PA-31T,10/1/2002
178
- 477,MOHAWK 298,NORD AVIATION,MOHAWK 298,MO-298,1/1/1990
179
- 478,PIPER T-1040,PIPER,PIPER T-1040,T-1040,1/1/1990
180
- 479,PILATUS PC-12,PILATUS,PILATUS PC-12,PC-12,1/1/1999
181
- 480,PILATUS PORTR PC6A,PILATUS,PILATUS TURBO PORTER PC-6A,PC-6A,1/1/1990
182
- 481,PILATUS PORTR PC6B,PILATUS,PILATUS TURBO PORTER PC-6B,PC-6B,1/1/1990
183
- 482,DEHAVILLAND DHC8-4,DEHAVILLAND,DEHAVILLAND DHC8-400 DASH-8,DHC8-400,1/1/1999
184
- 483,DEHAVILLAND DHC8-1,DEHAVILLAND,DEHAVILLAND DHC8-100 DASH-8,DHC8-100,1/1/1990
185
- 485,DEHAVILLAND DHC-6,DEHAVILLAND,DEHAVILLAND TWIN OTTER DHC-6,DHC-6,1/1/1990
186
- 486,SHORT HARLAND SC-7,SHORT BROS. & HARLAND,SHORTS HARLAND SC-7 SKYVAN,SKYVAN,1/1/1990
187
- 487,SHORTS 330,SHORT BROS.,SHORTS 330,SHORT330,1/1/1990
188
- 488,CARSTEDT CJ-600A,CARSTEDT,CARSTEDT CJ-600A,CJ-600A,1/1/1990
189
- 489,SHORTS 360,SHORT BROS.,SHORTS 360,SHORT360,1/1/1990
190
- 490,VOLPAR TURBO 18,VOLPAR,VOLPAR TURBO 18,VOLPAR18,1/1/1990
191
- 491,DEHAVILLAND DHC8-2,BOMBARDIER,DEHAVILLAND DHC8-200Q DASH-8,DHC8-200,11/1/2002
192
- 507,ANTONOV 12,ANTONOV DESIGN BUREAU,ANTONOV 12,AN-12,1/1/2000
193
- 508,ANTONOV AN-22-F,ANTONOV,ANTONOV AN-22 FREIGHTER,AN-22,1/1/1990
194
- 510,AW-650,ARMSTRONG WHITWORTH,AW-650,AW-650,1/1/1990
195
- 520,CANADAIR CL-44D,CANADAIR,CANADAIR CL-44D,CL-44D,1/1/1990
196
- 541,ILYUSHIN 18,ILYUSHIN,ILYUSHIN IL-18,IL-18,1/1/1990
197
- 550,LOCKHEED L-188A/C,LOCKHEED,LOCKHEED L-188A/C ELECTRA,ELECTRA,1/1/1990
198
- 552,LOCKHEED L-382B,LOCKHEED,LOCKHEED L-382B,L-382B,1/1/1990
199
- 553,LOCKHEED L100-10,LOCKHEED,LOCKHEED L100-10 HERCULES,HERCULES,1/1/1990
200
- 555,LOCKHEED L100-20,LOCKHEED,LOCKHEED L100-20 HERCULES,HERCULES,1/1/1990
201
- 556,LOCKHEED L100-30,LOCKHEED,LOCKHEED L100-30/L-382E,HERCULES,1/1/1990
202
- 560,SHORTS-BELFAST-SH5,SHORT BROS.,SHORTS BELFAST FREIGHTER-SH5,SHORTS-5,1/1/1990
203
- 570,DEHAVILLAND DASH-7,DEHAVILLAND,DEHAVILLAND DHC7 DASH-7,DHC-7,1/1/1990
204
- 580,VV-700,VICKERS,VICKERS VISCOUNT 700/744/745/745D,VV-700,1/1/1990
205
- 584,VV-800,VICKERS,VICKERS VISCOUNT V800/810/812,VV-800,1/1/1990
206
- 484,CANADAIR RJ-200ER,CANADAIR,CANADAIR RJ-200ER,RJ-200ER,1/1/1997
207
- 601,FOKKER F28-1000,FOKKER,FOKKER F28-1000 FELLOWSHIP,F28-1000,1/1/1990
208
- 602,F28-4000/6000,FOKKER,FOKKER F28-4000/6000 FELLOWSHIP,F28-4000,1/1/1990
209
- 603,FOKKER 100,FOKKER,FOKKER 100,FOKKR100,1/1/1990
210
- 604,FOKKER 70,FOKKER,FOKKER 70,FOKKER70,1/1/1994
211
- 605,BAC-111-200,BRITISH AEROSPACE,BRITISH AEROSPACE BAC-111-200,BAC-111,1/1/1990
212
- 608,BOEING 717-200,BOEING,BOEING 717-200,B717-200,1/1/1999
213
- 610,BAC-111-400,BRITISH AEROSPACE,BRITISH AEROSPACE BAC-111-400,BAC-111,1/1/1990
214
- 611,AERO COMMNDER 1121,ROCKWELL,AERO COMMANDER 1121,COMMANDR,1/1/1990
215
- 612,BOEING 737-700/LR,BOEING,BOEING 737-700/700LR,B737-700,1/1/1998
216
- 613,AEROSPATL CORVETTE,AEROSPATIALE,AEROSPATIALE CORVETTE,CORVETTE,1/1/1990
217
- 614,BOEING 737-800,BOEING,BOEING 737-800,B737-8,1/1/1998
218
- 615,BOEING 737-5/600LR,BOEING,BOEING 737-5/600LR,B737-5LR,1/1/1998
219
- 616,BOEING 737-500,BOEING,BOEING 737-500,B737-5,1/1/1990
220
- 617,BOEING 737-400,BOEING,BOEING 737-400,B737-400,1/1/1990
221
- 618,BOEING 737-300LR,BOEING,BOEING 737-300LR,B737-3LR,1/1/1990
222
- 619,BOEING 737-300,BOEING,BOEING 737-300,B737-300,1/1/1990
223
- 620,BOEING 737-100/200,BOEING,BOEING 737-100/200,B737-1/2,1/1/1990
224
- 621,BOEING 737-200C,BOEING,BOEING 737-200C,B737-2C,1/1/1990
225
- 622,BOEING 757-200,BOEING,BOEING 757-200,B757-200,1/1/1990
226
- 623,BOEING 757-300,BOEING,BOEING 757-300,B757-300,1/1/1999
227
- 624,BOEING 767-400,BOEING,BOEING 767-400,B767-400,1/1/1998
228
- 625,BOEING 767-200/ER,BOEING,BOEING 767-200/200ER,B767-2/R,1/1/1990
229
- 626,BOEING 767-300/ER,BOEING,BOEING 767-300/300ER,B767-3/R,1/1/1990
230
- 627,BOEING 777,BOEING,BOEING 777,B777,1/1/1994
231
- 628,CANADAIR RJ-100/ER,CANADAIR,CANADAIR RJ-100/RJ-100ER,RJ100/ER,3/1/1993
232
- 629,CANADAIR RJ-200ER,CANADAIR,CANADAIR RJ-200ER,RJ-200ER,1/1/1997
233
- 630,DOUGLAS DC-9-10,MCDONNELL DOUGLAS,MCDONNELL DOUGLAS DC-9-10,DC-9-10,1/1/1990
234
- 631,CANADAIR RJ-700,CANADAIR,CANADAIR RJ-700,RJ-700,1/1/1990
235
- 632,DORNIER 328,DORNIER,DORNIER 328,DO-328,1/1/2001
236
- 633,BOEING 737-600,BOEING,BOEING 737-600,B737-6,4/1/2002
237
- 634,BOEING 737-900,BOEING,BOEING737-900,B737-9,4/1/2002
238
- 635,DOUGLAS DC-9-15F,MCDONNELL DOUGLAS,MCDONNELL DOUGLAS DC-9-15F,DC-9-15F,1/1/1990
239
- 636,CESSNA CITATION II,CESSNA,CESSNA CITATION II,CESSNA,10/1/2002
240
- 637,CESSNA CITATION V,CESSNA,CESSNA CITATION V,CESSNA,10/1/2002
241
- 638,CANADAIR RJ900,BOMBARDIER,CANADAIR RJ900,RJ 900,3/1/2003,
242
- 640,DOUGLAS DC-9-30,MCDONNELL DOUGLAS,MCDONNELL DOUGLAS DC-9-30,DC-9-30,1/1/1990,
243
- 645,DOUGLAS DC-9-40,MCDONNELL DOUGLAS,MCDONNELL DOUGLAS DC-9-40,DC-9-40,1/1/1990,
244
- 650,DOUGLAS DC-9-50,MCDONNELL DOUGLAS,MCDONNELL DOUGLAS DC-9-50,DC-9-50,1/1/1990,
245
- 654,MD-87,MCDONNELL DOUGLAS,MCDONNELL DOUGLAS DC9 SUPER 87,MD-87,10/1/1996,
246
- 655,"MD-80,1,2,3,8",MCDONNELL DOUGLAS,MCDONNELL DOUGLAS DC9 SUPER 80/MD81/2/3/8,MD-80,10/1/1996,
247
- 655,"MD-80,1,2,3,7,8",MCDONNELL DOUGLAS,MCDONNELL DOUGLAS DC9 SUPER 80/MD81/2/3/7/8,MD-80,1/1/1990,9/30/1996
248
- 656,MD-90,MCDONNELL DOUGLAS,MCDONNELL DOUGLAS MD-90,MD-90,1/1/1995,
249
- 660,LEAR-23,GATES LEARJET,GATES LEARJET LEAR-23,LEAR-23,1/1/1990,
250
- 661,LEAR-24,GATES LEARJET,GATES LEARJET LEAR-24,LEAR-24,1/1/1990,
251
- 662,LEAR-25,GATES LEARJET,GATES LEARJET LEAR-25,LEAR-25,1/1/1990,
252
- 663,LEAR-35,GATES LEARJET,GATES LEARJET LEAR-35,LEAR-35,1/1/1990,
253
- 664,HFB 320 HANSA,,HFB 320 HANSA,HFB 320,1/1/1990,
254
- 665,BAE HS-125,BRITISH AEROSP/HAWKER SIDDELEY,HAWKER SIDDELEY 125,HS-125,1/1/1995,
255
- 666,LEAR 55,GATES LEARJET,LEAR 55,LEAR 55,10/1/2002,
256
- 670,ROCKWELL SABRELINR,ROCKWELL,ROCKWELL SABRELINER,SABRELNR,1/1/1990,
257
- 674,EMBRAER-135,EMBRAER,EMBRAER-135,EMB-135,1/1/1998,
258
- 675,EMBRAER-145,EMBRAER,EMBRAER-145,EMB-145,1/1/1996,
259
- 676,EMBRAER-140,EMBRAER,EMBRAER-140,EMB-140,1/1/2001,12/31/2000
260
- 676,EMBRAER-140,EMBRAER,EMBRAER-140,EMB-140,1/1/2001,
261
- 680,CARAVELLE SE-210,AEROSPATIALE,AEROSPATIALE CARAVELLE SE-210,CARAVLLE,1/1/1990,
262
- 681,DASSAULT FALCON,DASSAULT-BREGUET,DASSAULT-BREGUET MYSTERE-FALCON,FALCON,1/1/1990,
263
- 689,A300-600ST(BELUGA),AIRBUS INDUSTRIE,AIRBUS INDUSTRIE A330-600ST (BELUGA),BELUGA,7/1/1998,
264
- 690,A300B/C/F-100/200,AIRBUS INDUSTRIE,AIRBUS INDUSTRIE A300B/C/F-100/200,A300B/C,1/1/1990,
265
- 691,A300-600/R/CF/RCF,AIRBUS INDUSTRIE,AIRBUS INDUSTRIE A300-600/R/CF/RCF,A300-600,1/1/1990,
266
- 692,A310-200C/F,AIRBUS INDUSTRIE,AIRBUS INDUSTRIE A310-200C/F,A310-2CF,1/1/1990,
267
- 693,A310-300,AIRBUS INDUSTRIE,AIRBUS INDUSTRIE A310-300,A310-300,1/1/1990,
268
- 694,A320-100/200,AIRBUS INDUSTRIE,AIRBUS INDUSTRIE A320-100/200,A320-1/2,1/1/1990,
269
- 695,A300-B2,AIRBUS INDUSTRIE,AIRBUS INDUSTRIE A300-B2,A300-B2,1/1/1990,
270
- 696,A330,AIRBUS INDUSTRIE,AIRBUS INDUSTRIE A330,A330,1/1/1992,
271
- 697,A340,AIRBUS INDUSTRIE,AIRBUS INDUSTRIE A340,A340,1/1/1992,8/31/1996
272
- 698,A319,AIRBUS INDUSTRIE,AIRBUS INDUSTRIE A319,A319,1/1/1997,
273
- 699,A321,AIRBUS INDUSTRIE,AIRBUS INDUSTRIE A321,A321,1/1/1990
274
- 710,BOEING 727-100,BOEING,BOEING 727-100,B727-100,1/1/1990
275
- 711,BOEING 727-100C/QC,BOEING,BOEING 727-100C/QC,B727-1C,1/1/1990
276
- 715,BOEING 727-200,BOEING,BOEING 727-200/231A,B727-200,1/1/1990
277
- 730,DOUGLAS DC-10-10,MCDONNELL DOUGLAS,MCDONNELL DOUGLAS DC-10-10,DC-10-10,1/1/1990
278
- 731,DOUGLAS DC-10-20,MCDONNELL DOUGLAS,MCDONNELL DOUGLAS DC-10-20,DC-10-20,1/1/1990
279
- 732,DOUGLAS DC-10-30,MCDONNELL DOUGLAS,MCDONNELL DOUGLAS DC-10-30,DC-10-30,1/1/1990
280
- 733,DOUGLAS DC-10-40,MCDONNELL DOUGLAS,MCDONNELL DOUGLAS DC-10-40,DC-10-40,1/1/1990
281
- 735,DOUGLAS DC-10-30CF,MCDONNELL DOUGLAS,MCDONNELL DOUGLAS DC-10-30CF,DC10-30F,1/1/1990
282
- 740,MD-11,MCDONNELL DOUGLAS,MCDONNELL DOUGLAS MD-11,MD-11,1/1/1990
283
- 760,L-1011-1/100/200,LOCKHEED,LOCKHEED L-1011-1/100/200,L-1011,1/1/1990
284
- 765,L-1011-500 TRISTAR,LOCKHEED,LOCKHEED L-1011-500 TRISTAR,L-1011,1/1/1990
285
- 780,TUPOLEV 154,TUPOLEV,TUPOLEV TU-154,TU-154,1/1/1990
286
- 792,YAKOLEV 42,YAKOLEV,YAKOLEV YAK-42,YAK-42,1/1/1990
287
- 871,A340-300,AIRBUS INDUSTRIE,AIRBUS INDUSTRIE A340-300,A340-300,9/1/1996
288
- 872,A340-500,AIRBUS INDUSTRIE,AIRBUS INDUSTRIE A340-500,A340-500,9/1/1996
289
- 873,A340-200,AIRBUS INDUSTRIE,AIRBUS INDUSTRIE A340-200,A340-200,9/1/1996
290
- 874,A340-600,AIRBUS INDUSTRIE,AIRBUS INDUSTRIE A340-600,A340-600,9/1/1996
291
- 879,ILYUSHIN 96,ILYUSHIN,ILYUSHIN 96,IL-96,3/1/1993
292
- 800,BOEING 707-100,BOEING,BOEING 707-100,B707-100,1/1/1990
293
- 802,BOEING 707-100B,BOEING,BOEING 707-100B,B707-1B,1/1/1990
294
- 804,BOEING 707-200,BOEING,BOEING 707-200,B707-200,1/1/1990
295
- 806,BOEING 707-300,BOEING,BOEING 707-300,B707-300,1/1/1990
296
- 808,BOEING 707-300B,BOEING,BOEING 707-300B,B707-3B,1/1/1990
297
- 809,BOEING 707-300C,BOEING,BOEING 707-300C,B707-3C,1/1/1990
298
- 810,BOEING 707-400,BOEING,BOEING 707-400,B707-400,1/1/1990
299
- 812,BOEING 720,BOEING,BOEING 720-000,B720,1/1/1990
300
- 814,BOEING 720-B,BOEING,BOEING 720-000B,B720-B,1/1/1990
301
- 816,BOEING 747-100,BOEING,BOEING 747-100,B747-100,1/1/1990
302
- 817,BOEING 747-200/300,BOEING,BOEING 747-200/300,B747-2/3,1/1/1990
303
- 818,BOEING 747C,BOEING,BOEING 747C,B747C,1/1/1990
304
- 819,BOEING 747-400,BOEING,BOEING 747-400,B747-400,1/1/1990
305
- 820,BOEING 747F,BOEING,BOEING 747F,B747F,1/1/1990
306
- 822,BOEING 747SP,BOEING,BOEING 747SP,B747SP,1/1/1990
307
- 825,CONVAIR 880,CONVAIR,CONVAIR 880 (CV-22/22M),CV-880,1/1/1990
308
- 830,CONVAIR 990,CONVAIR,CONVAIR 990 CORONADO (CV-30),CORONADO,1/1/1990
309
- 835,AVROLINER RJ85,AVRO INTERNATIONAL AEROSPACE,AVROLINER RJ85,AV RJ85,1/1/1997
310
- 840,DOUGLAS DC-8-10,MCDONNELL DOUGLAS,MCDONNELL DOUGLAS DC-8-10,DC-8-10,1/1/1990
311
- 842,DOUGLAS DC-8-20,MCDONNELL DOUGLAS,MCDONNELL DOUGLAS DC-8-20,DC-8-20,1/1/1990
312
- 844,DOUGLAS DC-8-30,MCDONNELL DOUGLAS,MCDONNELL DOUGLAS DC-8-30,DC-8-30,1/1/1990
313
- 846,DOUGLAS DC-8-40,MCDONNELL DOUGLAS,MCDONNELL DOUGLAS DC-8-40,DC-8-40,1/1/1990
314
- 848,DOUGLAS DC-8-50,MCDONNELL DOUGLAS,MCDONNELL DOUGLAS DC-8-50,DC-8-50,1/1/1990
315
- 850,DOUGLAS DC-8-50F,MCDONNELL DOUGLAS,MCDONNELL DOUGLAS DC-8-50F,DC-8-50F,1/1/1990
316
- 851,DOUGLAS DC-8-61,MCDONNELL DOUGLAS,MCDONNELL DOUGLAS DC-8-61,DC-8-61,1/1/1990
317
- 852,DOUGLAS DC-8-63F,MCDONNELL DOUGLAS,MCDONNELL DOUGLAS DC-8-63F,DC-8-63F,1/1/1990
318
- 854,DOUGLAS DC-8-62,MCDONNELL DOUGLAS,MCDONNELL DOUGLAS DC-8-62,DC-8-62,1/1/1990
319
- 856,DOUGLAS DC-8-63,MCDONNELL DOUGLAS,MCDONNELL DOUGLAS DC-8-63,DC-8-63,1/1/1990
320
- 860,DOUGLAS DC-8-71,MCDONNELL DOUGLAS,MCDONNELL DOUGLAS DC-8-71,DC-8-71,1/1/1990
321
- 862,DOUGLAS DC-8-72,MCDONNELL DOUGLAS,MCDONNELL DOUGLAS DC-8-72,DC-8-72,1/1/1990
322
- 864,DOUGLAS DC-8-73,MCDONNELL DOUGLAS,MCDONNELL DOUGLAS DC-8-73,DC-8-73,1/1/1990
323
- 865,DOUGLAS DC-8-73F,MCDONNELL DOUGLAS,MCDONNELL DOUGLAS DC-8-73F,DC-8-73F,1/1/1990
324
- 866,BAE-146-100/RJ70,BRITISH AEROSPACE,BRITISH AEROSPACE BAE-146-100/RJ70,BAE146-1,1/1/1990
325
- 867,BAE-146-200,BRITISH AEROSPACE,BRITISH AEROSPACE BAE-146-200,BAE146-2,1/1/1990
326
- 868,BAE-146-300,BRITISH AEROSPACE,BRITISH AEROSPACE BAE-146-300,BAE146-3,1/1/1990
327
- 870,LOCKHEED JETSTAR,LOCKHEED,LOCKHEED JETSTAR,JETSTAR,1/1/1990
328
- 875,CONCORDE,AEROSPATIALE/BRITISH AEROSPACE,AEROSPATIALE/BRITISH AEROSPACE CONCORDE,CONCORDE,1/1/1990
329
- 876,ILYUSHIN 62,ILYUSHIN,ILYUSHIN 62,IL-62,1/1/1990
330
- 877,ILYUSHIN 76/TD,ILYUSHIN,ILYUSHIN 76/TD,IL-76/TD,1/1/1990
331
- 878,ILYUSHIN 86,ILYUSHIN,ILYUSHIN 86,IL-86,1/1/1990
332
- 880,ANTONOV 124,ANTONOV,ANTONOV 124,AN-124,1/1/1990
333
- 890,ANTONOV 225,ANTONOV,ANTONOV 225 (6 ENGINE),AN-224,1/1/1990
334
- "SOURCE: Bureau of Transportation Statistics, Office of Airline Information",,,,,
@@ -1 +0,0 @@
1
- needle,haystack,
@@ -1,116 +0,0 @@
1
- require File.expand_path('../../../test/helper.rb', __FILE__)
2
-
3
- # How to iteratively develop a dictionary.
4
-
5
- # ruby ./examples/bts_aircraft/test_bts_aircraft.rb
6
-
7
- ####################################################
8
- # Section 1 - constants that will get passed as arguments
9
-
10
- # The records that your dictionary will return.
11
- # (Example) A table of aircraft as defined by the U.S. Bureau of Transportation Statistics
12
- HAYSTACK = RemoteTable.new :url => "file://#{File.expand_path('../number_260.csv', __FILE__)}", :select => lambda { |record| record['Aircraft Type'].to_i.between?(1, 998) and record['Manufacturer'].present? }
13
-
14
- # A reader used to convert every record (which could be an object of any type) into a string that will be used for similarity.
15
- # (Example) Combine the make and model into something like "boeing 747"
16
- # Note the downcase!
17
- HAYSTACK_READER = lambda { |record| "#{record['Manufacturer']} #{record['Long Name']}".downcase }
18
-
19
- # Whether to even bother trying to find a match for something without an explicit group
20
- # (Example) False, which is the default, which means we have more work to do
21
- MUST_MATCH_GROUPING = false
22
-
23
- # Groupings
24
- # (Example) We made these by trial and error
25
- GROUPINGS = RemoteTable.new(:url => "file://#{File.expand_path("../groupings.csv", __FILE__)}", :headers => :first_row).map { |row| row['regexp'] }
26
-
27
- # Normalizers
28
- # (Example) We made these by trial and error
29
- NORMALIZERS = RemoteTable.new(:url => "file://#{File.expand_path("../normalizers.csv", __FILE__)}", :headers => :first_row).map { |row| row['regexp'] }
30
-
31
- # Identities
32
- # (Example) We made these by trial and error
33
- IDENTITIES = RemoteTable.new(:url => "file://#{File.expand_path("../identities.csv", __FILE__)}", :headers => :first_row).map { |row| row['regexp'] }
34
-
35
- ####################################################
36
- # Section 2 - constants that are just for tests
37
-
38
- # The class of each record.
39
- # (Example) ActiveSupport::OrderedHash because we're using RemoteTable
40
- HAYSTACK_RECORD_CLASS = HAYSTACK[0].class
41
-
42
- # Some test needles to be found in the haystack.
43
- # (Example) Aircraft starting with A, B, D, G from the FAA (really a list of ICAO aircraft)
44
- NEEDLES = %w{ A B D G }.inject([]) do |memo, letter|
45
- one_letter = RemoteTable.new :url => "file://#{File.expand_path("../5-2-#{letter}.htm", __FILE__)}",
46
- :encoding => 'US-ASCII',
47
- :row_xpath => '//table/tr[2]/td/table/tr',
48
- :column_xpath => 'td'
49
- memo + one_letter.to_a
50
- end
51
-
52
- # Positive matches that we know about.
53
- # (Example) We just built this file in Excel and exported it to a CSV.
54
- POSITIVES = RemoteTable.new :url => "file://#{File.expand_path("../positives.csv", __FILE__)}", :headers => :first_row
55
-
56
- # Negative (false positive) matches that we know about.
57
- # (Example) We just built this file in Excel and exported it to a CSV.
58
- NEGATIVES = RemoteTable.new :url => "file://#{File.expand_path("../negatives.csv", __FILE__)}", :headers => :first_row
59
-
60
- ####################################################
61
- # Section 3
62
-
63
- FINAL_OPTIONS = {
64
- :read => HAYSTACK_READER,
65
- :must_match_grouping => MUST_MATCH_GROUPING,
66
- :normalizers => NORMALIZERS,
67
- :identities => IDENTITIES,
68
- :groupings => GROUPINGS
69
- }
70
-
71
- class TestBtsAircraft < MiniTest::Spec
72
- it "understand records by using the haystack reader" do
73
- d = FuzzyMatch.new HAYSTACK, FINAL_OPTIONS
74
- d.haystack.map { |record| record.to_str }.must_include 'boeing boeing 707-100'
75
- end
76
-
77
- it "find an easy match" do
78
- d = FuzzyMatch.new HAYSTACK, FINAL_OPTIONS
79
- record = d.find('boeing 707(100)')
80
- record.class.must_equal HAYSTACK_RECORD_CLASS
81
- HAYSTACK_READER.call(record).must_equal 'boeing boeing 707-100'
82
- end
83
-
84
- POSITIVES.each do |row|
85
- needle = row['needle']
86
- correct_record = row['haystack']
87
- it %{find #{correct_record.blank? ? 'nothing' : correct_record} when looking for #{needle}} do
88
- d = FuzzyMatch.new HAYSTACK, FINAL_OPTIONS
89
- record = d.find(needle.downcase)
90
- HAYSTACK_READER.call(record).must_equal correct_record.downcase
91
- end
92
- end
93
-
94
- NEGATIVES.each do |row|
95
- needle = row['needle']
96
- incorrect_record = row['haystack']
97
- it %{not find #{incorrect_record} when looking for #{needle}} do
98
- d = FuzzyMatch.new HAYSTACK, FINAL_OPTIONS
99
- record = d.find(needle.downcase)
100
- HAYSTACK_READER.call(record)).wont_equal incorrect_record.downcase
101
- end
102
- end
103
- end
104
-
105
- # Whenever I saw a failure like this...
106
- # 1) Failure:
107
- # test: BtsAircraft should find AIRBUS INDUSTRIE AIRBUS INDUSTRIE A340-300 when looking for AIRBUS A340300. (TestBtsAircraft)
108
- # [examples/bts_aircraft/test_bts_aircraft.rb:96:in `__bind_1302579566_46630'
109
- # /Users/seamus/.rvm/gems/ruby-1.8.7-p334/gems/shoulda-2.11.3/lib/shoulda/context.rb:382:in `call'
110
- # /Users/seamus/.rvm/gems/ruby-1.8.7-p334/gems/shoulda-2.11.3/lib/shoulda/context.rb:382:in `test: BtsAircraft should find AIRBUS INDUSTRIE AIRBUS INDUSTRIE A340-300 when looking for AIRBUS A340300. ']:
111
- # <"airbus industrie airbus industrie a340-300"> expected but was
112
- # <"airbus industrie airbus industrie a340">.
113
-
114
- # ...I would look at it like this
115
- d = FuzzyMatch.new HAYSTACK, FINAL_OPTIONS
116
- puts d.explain('AIRBUS A340300.'.downcase)
@@ -1,15 +0,0 @@
1
- #!/usr/bin/env ruby
2
- unless RUBY_VERSION >= '1.9'
3
- require 'rubygems'
4
- end
5
- $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
6
- require 'fuzzy_match'
7
-
8
- haystack = [ 'seamus', 'andy', 'ben' ]
9
- needles = [ 'Mr. Seamus', 'Sr. Andy', 'Master BenT', 'Shamus Heaney' ]
10
-
11
- d = FuzzyMatch.new haystack
12
- needles.each do |needle|
13
- d.explain needle
14
- puts
15
- end
Binary file
@@ -1,20 +0,0 @@
1
- class FuzzyMatch
2
- class Rule
3
- # A normalizer just strips a string down to its core
4
- class Normalizer < Rule
5
- # A normalizer applies when its regexp matches and captures a new (shorter) string
6
- def apply?(str)
7
- !!(regexp.match(str))
8
- end
9
-
10
- # The result of applying a normalizer is just all the captures put together.
11
- def apply(str)
12
- if match_data = regexp.match(str)
13
- match_data.captures.join
14
- else
15
- str
16
- end
17
- end
18
- end
19
- end
20
- end
@@ -1,11 +0,0 @@
1
- class FuzzyMatch
2
- class Rule
3
- # A stop word is ignored
4
- class StopWord < Rule
5
- # Destructively remove stop words from the string
6
- def apply!(str)
7
- str.gsub! regexp, ''
8
- end
9
- end
10
- end
11
- end
@@ -1,73 +0,0 @@
1
- class FuzzyMatch
2
- # Wrappers are the tokens that are passed around when doing scoring and optimizing.
3
- class Wrapper #:nodoc: all
4
- # "Foo's" is one word
5
- # "North-west" is just one word
6
- # "Bolivia," is just Bolivia
7
- WORD_BOUNDARY = %r{\W*(?:\s+|$)}
8
-
9
- attr_reader :fuzzy_match
10
- attr_reader :record
11
- attr_reader :literal
12
- attr_reader :rendered
13
-
14
- def initialize(fuzzy_match, record, literal = false)
15
- @fuzzy_match = fuzzy_match
16
- @record = record
17
- @literal = literal
18
- end
19
-
20
- def inspect
21
- "#<FuzzyMatch::Wrapper render=#{render.inspect} variants=#{variants.length}>"
22
- end
23
-
24
- def read
25
- fuzzy_match.read unless literal
26
- end
27
-
28
- def render
29
- @render ||= begin
30
- memo = case read
31
- when ::Proc
32
- read.call record
33
- when ::Symbol
34
- if record.respond_to?(read)
35
- record.send read
36
- else
37
- record[read]
38
- end
39
- when ::NilClass
40
- record
41
- else
42
- record[read]
43
- end.to_s.dup
44
- fuzzy_match.stop_words.each do |stop_word|
45
- stop_word.apply! memo
46
- end
47
- memo.strip!
48
- @render = memo.freeze
49
- end
50
- end
51
-
52
- alias :to_str :render
53
-
54
- def words
55
- @words ||= render.downcase.split(WORD_BOUNDARY)
56
- end
57
-
58
- def similarity(other)
59
- Similarity.new self, other
60
- end
61
-
62
- def variants
63
- @variants ||= begin
64
- fuzzy_match.normalizers.inject([ render ]) do |memo, normalizer|
65
- if normalizer.apply? render
66
- memo << normalizer.apply(render)
67
- end
68
- memo
69
- end.uniq
70
- end
71
- end
72
- end
73
- end
data/test/helper.rb DELETED
@@ -1,12 +0,0 @@
1
- require 'rubygems'
2
- require 'bundler/setup'
3
-
4
- require 'minitest/spec'
5
- require 'minitest/autorun'
6
-
7
- if RUBY_VERSION >= '1.9'
8
- require 'minitest/reporters'
9
- MiniTest::Reporters.use! MiniTest::Reporters::SpecReporter.new
10
- end
11
-
12
- require 'fuzzy_match'