StreetAddress 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,4 @@
1
+ == 1.0.0 / 2007-06-28
2
+
3
+ * Initial Version
4
+
@@ -0,0 +1,6 @@
1
+ History.txt
2
+ Manifest.txt
3
+ README.txt
4
+ Rakefile
5
+ lib/street_address.rb
6
+ test/test_street_address.rb
@@ -0,0 +1,45 @@
1
+ StreetAddress
2
+ by Riderway (Derrek Long, Nicholas Schlueter)
3
+ http://streetaddress.rubyforge.org
4
+
5
+ == DESCRIPTION:
6
+
7
+ Parses one line street addresses and returns a normalized address object.
8
+
9
+ This is a near direct port of the of the perl module
10
+ Geo::StreetAddress::US originally written by Schuyler D. Erle.
11
+ For more information see
12
+ http://search.cpan.org/~sderle/Geo-StreetAddress-US-0.99/
13
+
14
+ == SYNOPSIS:
15
+
16
+ Currently parses United States Addresses.
17
+
18
+ === Basic Usage:
19
+
20
+ StreetAddress::US.parse("1600 Pennsylvania Ave, Washington, DC, 2006")
21
+
22
+ == LICENSE:
23
+
24
+ (The MIT License)
25
+
26
+ Copyright (c) 2007 Riderway
27
+
28
+ Permission is hereby granted, free of charge, to any person obtaining
29
+ a copy of this software and associated documentation files (the
30
+ 'Software'), to deal in the Software without restriction, including
31
+ without limitation the rights to use, copy, modify, merge, publish,
32
+ distribute, sublicense, and/or sell copies of the Software, and to
33
+ permit persons to whom the Software is furnished to do so, subject to
34
+ the following conditions:
35
+
36
+ The above copyright notice and this permission notice shall be
37
+ included in all copies or substantial portions of the Software.
38
+
39
+ THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
40
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
41
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
42
+ IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
43
+ CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
44
+ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
45
+ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,20 @@
1
+ # -*- ruby -*-
2
+
3
+ require 'rubygems'
4
+ require 'hoe'
5
+ require './lib/street_address.rb'
6
+
7
+ Hoe.new('StreetAddress', StreetAddress::VERSION) do |p|
8
+ p.rubyforge_name = 'streetaddress'
9
+ p.summary = 'Ruby port of the perl module Geo::StreetAddress::US to parse one line street addresses'
10
+ p.description = "Parses one line addresses and returns a normalized address object.
11
+
12
+ This is a near direct port of the of the perl module
13
+ Geo::StreetAddress::US originally written by Schuyler D. Erle.
14
+ For more information see
15
+ http://search.cpan.org/~sderle/Geo-StreetAddress-US-0.99/"
16
+ p.url = "http://streetaddress.rubyforge.org"
17
+ p.changes = p.paragraphs_of('History.txt', 0..1).join("\n\n")
18
+ end
19
+
20
+ # vim: syntax=Ruby
@@ -0,0 +1,813 @@
1
+ =begin rdoc
2
+
3
+ === Usage:
4
+ StreetAddress::US.parse("1600 Pennsylvania Ave, washington, dc")
5
+
6
+ === Valid Address Formats
7
+
8
+ 1600 Pennsylvania Ave Washington DC 20006
9
+ 1600 Pennsylvania Ave #400, Washington, DC, 20006
10
+ 1600 Pennsylvania Ave Washington, DC
11
+ 1600 Pennsylvania Ave #400 Washington DC
12
+ 1600 Pennsylvania Ave, 20006
13
+ 1600 Pennsylvania Ave #400, 20006
14
+ 1600 Pennsylvania Ave 20006
15
+ 1600 Pennsylvania Ave #400 20006
16
+
17
+ === Valid Intersection Formats
18
+
19
+ Hollywood & Vine, Los Angeles, CA
20
+ Hollywood Blvd and Vine St, Los Angeles, CA
21
+ Mission Street at Valencia Street, San Francisco, CA
22
+ Hollywood & Vine, Los Angeles, CA, 90028
23
+ Hollywood Blvd and Vine St, Los Angeles, CA, 90028
24
+ Mission Street at Valencia Street, San Francisco, CA, 90028
25
+
26
+ ==== License
27
+
28
+ Copyright (c) 2007 Riderway (Derrek Long, Nicholas Schlueter)
29
+
30
+ Permission is hereby granted, free of charge, to any person obtaining
31
+ a copy of this software and associated documentation files (the
32
+ "Software"), to deal in the Software without restriction, including
33
+ without limitation the rights to use, copy, modify, merge, publish,
34
+ distribute, sublicense, and/or sell copies of the Software, and to
35
+ permit persons to whom the Software is furnished to do so, subject to
36
+ the following conditions:
37
+
38
+ The above copyright notice and this permission notice shall be
39
+ included in all copies or substantial portions of the Software.
40
+
41
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
42
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
43
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
44
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
45
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
46
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
47
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
48
+
49
+ ==== Notes
50
+ If parts of the address are omitted from the original string
51
+ the accessor will be nil in StreetAddress::US::Address.
52
+
53
+ Example:
54
+ address = StreetAddress::US.parse("1600 Pennsylvania Ave, washington, dc")
55
+ assert address.postal_code.nil?
56
+
57
+ ==== Acknowledgements
58
+
59
+ This gem is a near direct port of the perl module Geo::StreetAddress::US
60
+ originally written by Schuyler D. Erle. For more information see
61
+ http://search.cpan.org/~sderle/Geo-StreetAddress-US-0.99/
62
+
63
+ =end
64
+
65
+ module StreetAddress
66
+ VERSION = '1.0.0'
67
+ class US
68
+ @@directional = {
69
+ "north" => "N",
70
+ "northeast" => "NE",
71
+ "east" => "E",
72
+ "southeast" => "SE",
73
+ "south" => "S",
74
+ "southwest" => "SW",
75
+ "west" => "W",
76
+ "northwest" => "NW"
77
+ }
78
+ @@direction_code = @@directional.invert
79
+
80
+ @@street_type = {
81
+ "allee" => "aly",
82
+ "alley" => "aly",
83
+ "ally" => "aly",
84
+ "anex" => "anx",
85
+ "annex" => "anx",
86
+ "annx" => "anx",
87
+ "arcade" => "arc",
88
+ "av" => "ave",
89
+ "aven" => "ave",
90
+ "avenu" => "ave",
91
+ "avenue" => "ave",
92
+ "avn" => "ave",
93
+ "avnue" => "ave",
94
+ "bayoo" => "byu",
95
+ "bayou" => "byu",
96
+ "beach" => "bch",
97
+ "bend" => "bnd",
98
+ "bluf" => "blf",
99
+ "bluff" => "blf",
100
+ "bluffs" => "blfs",
101
+ "bot" => "btm",
102
+ "bottm" => "btm",
103
+ "bottom" => "btm",
104
+ "boul" => "blvd",
105
+ "boulevard" => "blvd",
106
+ "boulv" => "blvd",
107
+ "branch" => "br",
108
+ "brdge" => "brg",
109
+ "bridge" => "brg",
110
+ "brnch" => "br",
111
+ "brook" => "brk",
112
+ "brooks" => "brks",
113
+ "burg" => "bg",
114
+ "burgs" => "bgs",
115
+ "bypa" => "byp",
116
+ "bypas" => "byp",
117
+ "bypass" => "byp",
118
+ "byps" => "byp",
119
+ "camp" => "cp",
120
+ "canyn" => "cyn",
121
+ "canyon" => "cyn",
122
+ "cape" => "cpe",
123
+ "causeway" => "cswy",
124
+ "causway" => "cswy",
125
+ "cen" => "ctr",
126
+ "cent" => "ctr",
127
+ "center" => "ctr",
128
+ "centers" => "ctrs",
129
+ "centr" => "ctr",
130
+ "centre" => "ctr",
131
+ "circ" => "cir",
132
+ "circl" => "cir",
133
+ "circle" => "cir",
134
+ "circles" => "cirs",
135
+ "ck" => "crk",
136
+ "cliff" => "clf",
137
+ "cliffs" => "clfs",
138
+ "club" => "clb",
139
+ "cmp" => "cp",
140
+ "cnter" => "ctr",
141
+ "cntr" => "ctr",
142
+ "cnyn" => "cyn",
143
+ "common" => "cmn",
144
+ "corner" => "cor",
145
+ "corners" => "cors",
146
+ "course" => "crse",
147
+ "court" => "ct",
148
+ "courts" => "cts",
149
+ "cove" => "cv",
150
+ "coves" => "cvs",
151
+ "cr" => "crk",
152
+ "crcl" => "cir",
153
+ "crcle" => "cir",
154
+ "crecent" => "cres",
155
+ "creek" => "crk",
156
+ "crescent" => "cres",
157
+ "cresent" => "cres",
158
+ "crest" => "crst",
159
+ "crossing" => "xing",
160
+ "crossroad" => "xrd",
161
+ "crscnt" => "cres",
162
+ "crsent" => "cres",
163
+ "crsnt" => "cres",
164
+ "crssing" => "xing",
165
+ "crssng" => "xing",
166
+ "crt" => "ct",
167
+ "curve" => "curv",
168
+ "dale" => "dl",
169
+ "dam" => "dm",
170
+ "div" => "dv",
171
+ "divide" => "dv",
172
+ "driv" => "dr",
173
+ "drive" => "dr",
174
+ "drives" => "drs",
175
+ "drv" => "dr",
176
+ "dvd" => "dv",
177
+ "estate" => "est",
178
+ "estates" => "ests",
179
+ "exp" => "expy",
180
+ "expr" => "expy",
181
+ "express" => "expy",
182
+ "expressway" => "expy",
183
+ "expw" => "expy",
184
+ "extension" => "ext",
185
+ "extensions" => "exts",
186
+ "extn" => "ext",
187
+ "extnsn" => "ext",
188
+ "falls" => "fls",
189
+ "ferry" => "fry",
190
+ "field" => "fld",
191
+ "fields" => "flds",
192
+ "flat" => "flt",
193
+ "flats" => "flts",
194
+ "ford" => "frd",
195
+ "fords" => "frds",
196
+ "forest" => "frst",
197
+ "forests" => "frst",
198
+ "forg" => "frg",
199
+ "forge" => "frg",
200
+ "forges" => "frgs",
201
+ "fork" => "frk",
202
+ "forks" => "frks",
203
+ "fort" => "ft",
204
+ "freeway" => "fwy",
205
+ "freewy" => "fwy",
206
+ "frry" => "fry",
207
+ "frt" => "ft",
208
+ "frway" => "fwy",
209
+ "frwy" => "fwy",
210
+ "garden" => "gdn",
211
+ "gardens" => "gdns",
212
+ "gardn" => "gdn",
213
+ "gateway" => "gtwy",
214
+ "gatewy" => "gtwy",
215
+ "gatway" => "gtwy",
216
+ "glen" => "gln",
217
+ "glens" => "glns",
218
+ "grden" => "gdn",
219
+ "grdn" => "gdn",
220
+ "grdns" => "gdns",
221
+ "green" => "grn",
222
+ "greens" => "grns",
223
+ "grov" => "grv",
224
+ "grove" => "grv",
225
+ "groves" => "grvs",
226
+ "gtway" => "gtwy",
227
+ "harb" => "hbr",
228
+ "harbor" => "hbr",
229
+ "harbors" => "hbrs",
230
+ "harbr" => "hbr",
231
+ "haven" => "hvn",
232
+ "havn" => "hvn",
233
+ "height" => "hts",
234
+ "heights" => "hts",
235
+ "hgts" => "hts",
236
+ "highway" => "hwy",
237
+ "highwy" => "hwy",
238
+ "hill" => "hl",
239
+ "hills" => "hls",
240
+ "hiway" => "hwy",
241
+ "hiwy" => "hwy",
242
+ "hllw" => "holw",
243
+ "hollow" => "holw",
244
+ "hollows" => "holw",
245
+ "holws" => "holw",
246
+ "hrbor" => "hbr",
247
+ "ht" => "hts",
248
+ "hway" => "hwy",
249
+ "inlet" => "inlt",
250
+ "island" => "is",
251
+ "islands" => "iss",
252
+ "isles" => "isle",
253
+ "islnd" => "is",
254
+ "islnds" => "iss",
255
+ "jction" => "jct",
256
+ "jctn" => "jct",
257
+ "jctns" => "jcts",
258
+ "junction" => "jct",
259
+ "junctions" => "jcts",
260
+ "junctn" => "jct",
261
+ "juncton" => "jct",
262
+ "key" => "ky",
263
+ "keys" => "kys",
264
+ "knol" => "knl",
265
+ "knoll" => "knl",
266
+ "knolls" => "knls",
267
+ "la" => "ln",
268
+ "lake" => "lk",
269
+ "lakes" => "lks",
270
+ "landing" => "lndg",
271
+ "lane" => "ln",
272
+ "lanes" => "ln",
273
+ "ldge" => "ldg",
274
+ "light" => "lgt",
275
+ "lights" => "lgts",
276
+ "lndng" => "lndg",
277
+ "loaf" => "lf",
278
+ "lock" => "lck",
279
+ "locks" => "lcks",
280
+ "lodg" => "ldg",
281
+ "lodge" => "ldg",
282
+ "loops" => "loop",
283
+ "manor" => "mnr",
284
+ "manors" => "mnrs",
285
+ "meadow" => "mdw",
286
+ "meadows" => "mdws",
287
+ "medows" => "mdws",
288
+ "mill" => "ml",
289
+ "mills" => "mls",
290
+ "mission" => "msn",
291
+ "missn" => "msn",
292
+ "mnt" => "mt",
293
+ "mntain" => "mtn",
294
+ "mntn" => "mtn",
295
+ "mntns" => "mtns",
296
+ "motorway" => "mtwy",
297
+ "mount" => "mt",
298
+ "mountain" => "mtn",
299
+ "mountains" => "mtns",
300
+ "mountin" => "mtn",
301
+ "mssn" => "msn",
302
+ "mtin" => "mtn",
303
+ "neck" => "nck",
304
+ "orchard" => "orch",
305
+ "orchrd" => "orch",
306
+ "overpass" => "opas",
307
+ "ovl" => "oval",
308
+ "parks" => "park",
309
+ "parkway" => "pkwy",
310
+ "parkways" => "pkwy",
311
+ "parkwy" => "pkwy",
312
+ "passage" => "psge",
313
+ "paths" => "path",
314
+ "pikes" => "pike",
315
+ "pine" => "pne",
316
+ "pines" => "pnes",
317
+ "pk" => "park",
318
+ "pkway" => "pkwy",
319
+ "pkwys" => "pkwy",
320
+ "pky" => "pkwy",
321
+ "place" => "pl",
322
+ "plain" => "pln",
323
+ "plaines" => "plns",
324
+ "plains" => "plns",
325
+ "plaza" => "plz",
326
+ "plza" => "plz",
327
+ "point" => "pt",
328
+ "points" => "pts",
329
+ "port" => "prt",
330
+ "ports" => "prts",
331
+ "prairie" => "pr",
332
+ "prarie" => "pr",
333
+ "prk" => "park",
334
+ "prr" => "pr",
335
+ "rad" => "radl",
336
+ "radial" => "radl",
337
+ "radiel" => "radl",
338
+ "ranch" => "rnch",
339
+ "ranches" => "rnch",
340
+ "rapid" => "rpd",
341
+ "rapids" => "rpds",
342
+ "rdge" => "rdg",
343
+ "rest" => "rst",
344
+ "ridge" => "rdg",
345
+ "ridges" => "rdgs",
346
+ "river" => "riv",
347
+ "rivr" => "riv",
348
+ "rnchs" => "rnch",
349
+ "road" => "rd",
350
+ "roads" => "rds",
351
+ "route" => "rte",
352
+ "rvr" => "riv",
353
+ "shoal" => "shl",
354
+ "shoals" => "shls",
355
+ "shoar" => "shr",
356
+ "shoars" => "shrs",
357
+ "shore" => "shr",
358
+ "shores" => "shrs",
359
+ "skyway" => "skwy",
360
+ "spng" => "spg",
361
+ "spngs" => "spgs",
362
+ "spring" => "spg",
363
+ "springs" => "spgs",
364
+ "sprng" => "spg",
365
+ "sprngs" => "spgs",
366
+ "spurs" => "spur",
367
+ "sqr" => "sq",
368
+ "sqre" => "sq",
369
+ "sqrs" => "sqs",
370
+ "squ" => "sq",
371
+ "square" => "sq",
372
+ "squares" => "sqs",
373
+ "station" => "sta",
374
+ "statn" => "sta",
375
+ "stn" => "sta",
376
+ "str" => "st",
377
+ "strav" => "stra",
378
+ "strave" => "stra",
379
+ "straven" => "stra",
380
+ "stravenue" => "stra",
381
+ "stravn" => "stra",
382
+ "stream" => "strm",
383
+ "street" => "st",
384
+ "streets" => "sts",
385
+ "streme" => "strm",
386
+ "strt" => "st",
387
+ "strvn" => "stra",
388
+ "strvnue" => "stra",
389
+ "sumit" => "smt",
390
+ "sumitt" => "smt",
391
+ "summit" => "smt",
392
+ "terr" => "ter",
393
+ "terrace" => "ter",
394
+ "throughway" => "trwy",
395
+ "tpk" => "tpke",
396
+ "tr" => "trl",
397
+ "trace" => "trce",
398
+ "traces" => "trce",
399
+ "track" => "trak",
400
+ "tracks" => "trak",
401
+ "trafficway" => "trfy",
402
+ "trail" => "trl",
403
+ "trails" => "trl",
404
+ "trk" => "trak",
405
+ "trks" => "trak",
406
+ "trls" => "trl",
407
+ "trnpk" => "tpke",
408
+ "trpk" => "tpke",
409
+ "tunel" => "tunl",
410
+ "tunls" => "tunl",
411
+ "tunnel" => "tunl",
412
+ "tunnels" => "tunl",
413
+ "tunnl" => "tunl",
414
+ "turnpike" => "tpke",
415
+ "turnpk" => "tpke",
416
+ "underpass" => "upas",
417
+ "union" => "un",
418
+ "unions" => "uns",
419
+ "valley" => "vly",
420
+ "valleys" => "vlys",
421
+ "vally" => "vly",
422
+ "vdct" => "via",
423
+ "viadct" => "via",
424
+ "viaduct" => "via",
425
+ "view" => "vw",
426
+ "views" => "vws",
427
+ "vill" => "vlg",
428
+ "villag" => "vlg",
429
+ "village" => "vlg",
430
+ "villages" => "vlgs",
431
+ "ville" => "vl",
432
+ "villg" => "vlg",
433
+ "villiage" => "vlg",
434
+ "vist" => "vis",
435
+ "vista" => "vis",
436
+ "vlly" => "vly",
437
+ "vst" => "vis",
438
+ "vsta" => "vis",
439
+ "walks" => "walk",
440
+ "well" => "wl",
441
+ "wells" => "wls",
442
+ "wy" => "way"
443
+ }
444
+
445
+ @@street_type_list = {}
446
+ @@street_type.to_a.each{|item| @@street_type_list[item[0]] = true; @@street_type_list[item[1]] = true}
447
+
448
+ @@state_code = {
449
+ "alabama" => "AL",
450
+ "alaska" => "AK",
451
+ "american samoa" => "AS",
452
+ "arizona" => "AZ",
453
+ "arkansas" => "AR",
454
+ "california" => "CA",
455
+ "colorado" => "CO",
456
+ "connecticut" => "CT",
457
+ "delaware" => "DE",
458
+ "district of columbia" => "DC",
459
+ "federated states of micronesia" => "FM",
460
+ "florida" => "FL",
461
+ "georgia" => "GA",
462
+ "guam" => "GU",
463
+ "hawaii" => "HI",
464
+ "idaho" => "ID",
465
+ "illinois" => "IL",
466
+ "indiana" => "IN",
467
+ "iowa" => "IA",
468
+ "kansas" => "KS",
469
+ "kentucky" => "KY",
470
+ "louisiana" => "LA",
471
+ "maine" => "ME",
472
+ "marshall islands" => "MH",
473
+ "maryland" => "MD",
474
+ "massachusetts" => "MA",
475
+ "michigan" => "MI",
476
+ "minnesota" => "MN",
477
+ "mississippi" => "MS",
478
+ "missouri" => "MO",
479
+ "montana" => "MT",
480
+ "nebraska" => "NE",
481
+ "nevada" => "NV",
482
+ "new hampshire" => "NH",
483
+ "new jersey" => "NJ",
484
+ "new mexico" => "NM",
485
+ "new york" => "NY",
486
+ "north carolina" => "NC",
487
+ "north dakota" => "ND",
488
+ "northern mariana islands" => "MP",
489
+ "ohio" => "OH",
490
+ "oklahoma" => "OK",
491
+ "oregon" => "OR",
492
+ "palau" => "PW",
493
+ "pennsylvania" => "PA",
494
+ "puerto rico" => "PR",
495
+ "rhode island" => "RI",
496
+ "south carolina" => "SC",
497
+ "south dakota" => "SD",
498
+ "tennessee" => "TN",
499
+ "texas" => "TX",
500
+ "utah" => "UT",
501
+ "vermont" => "VT",
502
+ "virgin islands" => "VI",
503
+ "virginia" => "VA",
504
+ "washington" => "WA",
505
+ "west virginia" => "WV",
506
+ "wisconsin" => "WI",
507
+ "wyoming" => "WY"
508
+ }
509
+
510
+ @@state_name = @@state_code.invert
511
+
512
+ @@state_fips = {
513
+ "01" => "AL",
514
+ "02" => "AK",
515
+ "04" => "AZ",
516
+ "05" => "AR",
517
+ "06" => "CA",
518
+ "08" => "CO",
519
+ "09" => "CT",
520
+ "10" => "DE",
521
+ "11" => "DC",
522
+ "12" => "FL",
523
+ "13" => "GA",
524
+ "15" => "HI",
525
+ "16" => "ID",
526
+ "17" => "IL",
527
+ "18" => "IN",
528
+ "19" => "IA",
529
+ "20" => "KS",
530
+ "21" => "KY",
531
+ "22" => "LA",
532
+ "23" => "ME",
533
+ "24" => "MD",
534
+ "25" => "MA",
535
+ "26" => "MI",
536
+ "27" => "MN",
537
+ "28" => "MS",
538
+ "29" => "MO",
539
+ "30" => "MT",
540
+ "31" => "NE",
541
+ "32" => "NV",
542
+ "33" => "NH",
543
+ "34" => "NJ",
544
+ "35" => "NM",
545
+ "36" => "NY",
546
+ "37" => "NC",
547
+ "38" => "ND",
548
+ "39" => "OH",
549
+ "40" => "OK",
550
+ "41" => "OR",
551
+ "42" => "PA",
552
+ "44" => "RI",
553
+ "45" => "SC",
554
+ "46" => "SD",
555
+ "47" => "TN",
556
+ "48" => "TX",
557
+ "49" => "UT",
558
+ "50" => "VT",
559
+ "51" => "VA",
560
+ "53" => "WA",
561
+ "54" => "WV",
562
+ "55" => "WI",
563
+ "56" => "WY",
564
+ "72" => "PR",
565
+ "78" => "VI"
566
+ }
567
+
568
+ @@fips_state = @@state_fips.invert
569
+
570
+ @@street_type_regexp = @@street_type_list.keys.join("|")
571
+ @@number_regexp = '\d+-?\d*'
572
+ @@fraction_regexp = '\d+\/\d+'
573
+ @@state_regexp = @@state_code.to_a.join("|")
574
+ @@direct_regexp = @@directional.keys.join("|") + "|" + @@directional.values.sort{|a,b| b.length <=> a.length }.map{|x| f = x.gsub(/(\w)/, '\1.'); [Regexp::quote(f), Regexp::quote(x)] }.join("|")
575
+ @@zip_regexp = '(\d{5})(?:-(\d{4}))?'
576
+ @@corner_regexp = '(?:\band\b|\bat\b|&|\@)'
577
+ @@unit_regexp = '(?:(su?i?te|p\W*[om]\W*b(?:ox)?|dept|apt|apartment|ro*m|fl|unit|box)\W+|\#\W*)([\w-]+)'
578
+ @@street_regexp =
579
+ '(?:
580
+ (?:(' + @@direct_regexp + ')\W+
581
+ (' + @@street_type_regexp + ')\b)
582
+ |
583
+ (?:(' + @@direct_regexp + ')\W+)?
584
+ (?:
585
+ ([^,]+)
586
+ (?:[^\w,]+(' + @@street_type_regexp + ')\b)
587
+ (?:[^\w,]+(' + @@direct_regexp + ')\b)?
588
+ |
589
+ ([^,]*\d)
590
+ (' + @@direct_regexp + ')\b
591
+ |
592
+ ([^,]+?)
593
+ (?:[^\w,]+(' + @@street_type_regexp + ')\b)?
594
+ (?:[^\w,]+(' + @@direct_regexp + ')\b)?
595
+ )
596
+ )'
597
+ @@place_regexp =
598
+ '(?:
599
+ ([^\d,]+?)\W+
600
+ ($' + @@state_regexp + ')\W*
601
+ )?
602
+ (?:' + @@zip_regexp + ')?'
603
+
604
+ @@address_regexp =
605
+ '\A\W*
606
+ (' + @@number_regexp + ')\W*
607
+ (?:' + @@fraction_regexp + '\W*)?' +
608
+ @@street_regexp + '\W+
609
+ (?:' + @@unit_regexp + '\W+)?' +
610
+ @@place_regexp +
611
+ '\W*\Z'
612
+
613
+ class << self
614
+ =begin rdoc
615
+
616
+ parses either an address or intersection and returns an instance of
617
+ StreetAddress::US::Address or nil if the location cannot be parsed
618
+
619
+ ====example
620
+ StreetAddress::US.parse('1600 Pennsylvania Ave Washington, DC 20006')
621
+ or:
622
+ StreetAddress::US.parse('Hollywood & Vine, Los Angeles, CA')
623
+
624
+ =end
625
+ def parse(location)
626
+ regex = Regexp.new(@@corner_regexp, Regexp::IGNORECASE)
627
+ if regex.match(location)
628
+ z = regex.match(location)
629
+ parse_intersection(location);
630
+ else
631
+ parse_address(location);
632
+ end
633
+ end
634
+
635
+ =begin rdoc
636
+
637
+ parses only an intersection and returnsan instance of
638
+ StreetAddress::US::Address or nil if the intersection cannot be parsed
639
+
640
+ ====example
641
+ address = StreetAddress::US.parse('Hollywood & Vine, Los Angeles, CA')
642
+ assert address.intersection?
643
+
644
+ =end
645
+ def parse_intersection(inter)
646
+ regex = Regexp.new(
647
+ '\A\W*' + @@street_regexp + '\W*?
648
+ \s+' + @@corner_regexp + '\s+' +
649
+ @@street_regexp + '\W+' +
650
+ @@place_regexp + '\W*\Z', Regexp::IGNORECASE + Regexp::EXTENDED)
651
+ match = regex.match(inter)
652
+ return if match.nil?
653
+
654
+ normalize_address(
655
+ StreetAddress::US::Address.new(
656
+ :street => match[4] || match[9],
657
+ :street_type => match[5],
658
+ :suffix => match[6],
659
+ :prefix => match[3],
660
+ :street2 => match[15] || match[20],
661
+ :street_type2 => match[16],
662
+ :suffix2 => match[17],
663
+ :prefix2 => match[14],
664
+ :city => match[23],
665
+ :state => match[24],
666
+ :postal_code => match[25]
667
+ )
668
+ )
669
+ end
670
+
671
+ =begin rdoc
672
+
673
+ parses only an address and returnsan instance of
674
+ StreetAddress::US::Address or nil if the address cannot be parsed
675
+
676
+ ====example
677
+ address = StreetAddress::US.parse('1600 Pennsylvania Ave Washington, DC 20006')
678
+ assert !address.intersection?
679
+
680
+ =end
681
+ def parse_address(addr)
682
+ regex = Regexp.new(@@address_regexp, Regexp::IGNORECASE + Regexp::EXTENDED)
683
+ match = regex.match(addr)
684
+ return if match.nil?
685
+
686
+ normalize_address(
687
+ StreetAddress::US::Address.new(
688
+ :number => match[1],
689
+ :street => match[5] || match[10] || match[2],
690
+ :street_type => match[6] || match[3],
691
+ :unit => match[14],
692
+ :unit_prefix => match[13],
693
+ :suffix => match[7] || match[12],
694
+ :prefix => match[4],
695
+ :city => match[15],
696
+ :state => match[16],
697
+ :postal_code => match[17],
698
+ :postal_code_ext => match[18]
699
+ )
700
+ )
701
+ end
702
+
703
+ def state_name #:nodoc:
704
+ @@state_name
705
+ end
706
+
707
+ def fips_state #:nodoc:
708
+ @@fips_state
709
+ end
710
+
711
+ private
712
+ def normalize_address(addr)
713
+ addr.state = normalize_state(addr.state) unless addr.state.nil?
714
+ addr.street_type = normalize_street_type(addr.street_type) unless addr.street_type.nil?
715
+ addr.prefix = normalize_directional(addr.prefix) unless addr.prefix.nil?
716
+ addr.suffix = normalize_directional(addr.suffix) unless addr.suffix.nil?
717
+ addr.street.gsub!(/\b([a-z])/) {|wd| wd.capitalize} unless addr.street.nil?
718
+ addr.street_type2 = normalize_street_type(addr.street_type2) unless addr.street_type2.nil?
719
+ addr.prefix2 = normalize_directional(addr.prefix2) unless addr.prefix2.nil?
720
+ addr.suffix2 = normalize_directional(addr.suffix2) unless addr.suffix2.nil?
721
+ addr.street2.gsub!(/\b([a-z])/) {|wd| wd.capitalize} unless addr.street2.nil?
722
+ addr.city.gsub!(/\b([a-z])/) {|wd| wd.capitalize} unless addr.city.nil?
723
+ addr.unit_prefix.capitalize! unless addr.unit_prefix.nil?
724
+ return addr
725
+ end
726
+
727
+ def normalize_state(state)
728
+ if state.length < 3
729
+ state.upcase
730
+ else
731
+ @@state_code[state.downcase]
732
+ end
733
+ end
734
+
735
+ def normalize_street_type(s_type)
736
+ s_type.downcase!
737
+ s_type = @@street_type[s_type] || s_type if @@street_type_list[s_type]
738
+ s_type.capitalize
739
+ end
740
+
741
+ def normalize_directional(dir)
742
+ if dir.length < 3
743
+ dir.upcase
744
+ else
745
+ @@directional[dir.downcase]
746
+ end
747
+ end
748
+ end
749
+
750
+ =begin rdoc
751
+
752
+ This is class returned by StreetAddress::US::parse, StreetAddress::US::parse_address
753
+ and StreetAddress::US::parse_intersection. If an instance represents an intersection
754
+ the attribute street2 will be populated.
755
+
756
+ =end
757
+ class Address
758
+ attr_accessor :number, :street, :street_type, :unit, :unit_prefix, :suffix, :prefix, :city, :state, :postal_code, :postal_code_ext, :street2, :street_type2, :suffix2, :prefix2
759
+
760
+ def initialize(args)
761
+ args.keys.each { |attrib| self.send("#{attrib}=", args[attrib]) }
762
+ end
763
+
764
+ def fip
765
+ StreetAddress::US::fips_state[@state]
766
+ end
767
+
768
+ def state_name
769
+ s_name = StreetAddress::US.state_name[state]
770
+ s_name.capitalize unless s_name.nil?
771
+ end
772
+
773
+ def intersection?
774
+ !street2.nil?
775
+ end
776
+
777
+ def to_s
778
+ s = ""
779
+ if intersection?
780
+ s += prefix + " " unless prefix.nil?
781
+ s += street
782
+ s += " " + street_type unless street_type.nil?
783
+ s += " " + suffix unless suffix.nil?
784
+ s += " and"
785
+ s += " " + prefix2 unless prefix2.nil?
786
+ s += " " + street2
787
+ s += " " + street_type2 unless street_type2.nil?
788
+ s += " " + suffix2 unless suffix2.nil?
789
+ s += ", " + city unless city.nil?
790
+ s += ", " + state unless state.nil?
791
+ s += " " + postal_code unless postal_code.nil?
792
+ else
793
+ s += number
794
+ s += " " + prefix unless prefex.nil?
795
+ s += " " + street unless street.nil?
796
+ s += " " + street_type unless street_type.nil?
797
+ if( !unit_prefix.nil? && !unit.nil? )
798
+ s += " " + unit_prefix
799
+ s += " " + unit
800
+ elsif( unit_prefix.nil? && !unil.nil? )
801
+ s += " #" + unit
802
+ end
803
+ s += " " + suffix unless suffix.nil?
804
+ s += ", " + city unless city.nil?
805
+ s += ", " + state unless state.nil?
806
+ s += " " + postal_code unless postal_code.nil?
807
+ s += "-" + postal_code_ext unless postal_code_ext.nil?
808
+ end
809
+ return s
810
+ end
811
+ end
812
+ end
813
+ end
@@ -0,0 +1,136 @@
1
+ require 'test/unit/testcase'
2
+ require File.dirname(__FILE__) + '/../lib/street_address'
3
+
4
+
5
+ class StreetAddressUs < Test::Unit::TestCase
6
+ def setup
7
+ @addr1 = "2730 S Veitch St Apt 207, Arlington, VA 22206"
8
+ @addr2 = "44 Canal Center Plaza Suite 500, Alexandria, VA 22314"
9
+ @addr3 = "1600 Pennsylvania Ave Washington DC"
10
+ @addr4 = "1005 Gravenstein Hwy N, Sebastopol CA 95472"
11
+ @addr5 = "PO BOX 450, Chicago IL 60657"
12
+ @addr6 = "2730 S Veitch St #207, Arlington, VA 22206"
13
+
14
+ @int1 = "Hollywood & Vine, Los Angeles, CA"
15
+ @int2 = "Hollywood Blvd and Vine St, Los Angeles, CA"
16
+ @int3 = "Mission Street at Valencia Street, San Francisco, CA"
17
+
18
+ end
19
+
20
+ def test_parse
21
+ assert_equal StreetAddress::US.parse("&"), nil
22
+ assert_equal StreetAddress::US.parse(" and "), nil
23
+
24
+ addr = StreetAddress::US.parse(@addr1)
25
+ assert_equal addr.number, "2730"
26
+ assert_equal addr.postal_code, "22206"
27
+ assert_equal addr.prefix, "S"
28
+ assert_equal addr.state, "VA"
29
+ assert_equal addr.street, "Veitch"
30
+ assert_equal addr.street_type, "St"
31
+ assert_equal addr.unit, "207"
32
+ assert_equal addr.unit_prefix, "Apt"
33
+ assert_equal addr.city, "Arlington"
34
+ assert_equal addr.prefix2, nil
35
+ assert_equal addr.postal_code_ext, nil
36
+
37
+ addr = StreetAddress::US.parse(@addr2)
38
+ assert_equal addr.number, "44"
39
+ assert_equal addr.postal_code, "22314"
40
+ assert_equal addr.prefix, nil
41
+ assert_equal addr.state, "VA"
42
+ assert_equal addr.street, "Canal Center"
43
+ assert_equal addr.street_type, "Plz"
44
+ assert_equal addr.unit, "500"
45
+ assert_equal addr.unit_prefix, "Suite"
46
+ assert_equal addr.city, "Alexandria"
47
+ assert_equal addr.street2, nil
48
+
49
+ addr = StreetAddress::US.parse(@addr3)
50
+ assert_equal addr.number, "1600"
51
+ assert_equal addr.postal_code, nil
52
+ assert_equal addr.prefix, nil
53
+ assert_equal addr.state, "DC"
54
+ assert_equal addr.street, "Pennsylvania"
55
+ assert_equal addr.street_type, "Ave"
56
+ assert_equal addr.unit, nil
57
+ assert_equal addr.unit_prefix, nil
58
+ assert_equal addr.city, "Washington"
59
+ assert_equal addr.street2, nil
60
+
61
+
62
+
63
+ addr = StreetAddress::US.parse(@addr4)
64
+ assert_equal addr.number, "1005"
65
+ assert_equal addr.postal_code, "95472"
66
+ assert_equal addr.prefix, nil
67
+ assert_equal addr.state, "CA"
68
+ assert_equal addr.street, "Gravenstein"
69
+ assert_equal addr.street_type, "Hwy"
70
+ assert_equal addr.unit, nil
71
+ assert_equal addr.unit_prefix, nil
72
+ assert_equal addr.city, "Sebastopol"
73
+ assert_equal addr.street2, nil
74
+ assert_equal addr.suffix, "N"
75
+
76
+
77
+ addr = StreetAddress::US.parse(@addr5)
78
+ assert_equal addr, nil
79
+
80
+
81
+ addr = StreetAddress::US.parse(@addr6)
82
+ assert_equal("207", addr.unit)
83
+
84
+ addr = StreetAddress::US.parse(@int1)
85
+ assert_equal addr.city, "Los Angeles"
86
+ assert_equal addr.state, "CA"
87
+ assert_equal addr.street, "Hollywood"
88
+ assert_equal addr.street2, "Vine"
89
+ assert_equal addr.number, nil
90
+ assert_equal addr.postal_code, nil
91
+ assert_equal addr.intersection?, true
92
+
93
+ addr = StreetAddress::US.parse(@int2)
94
+ assert_equal addr.city, "Los Angeles"
95
+ assert_equal addr.state, "CA"
96
+ assert_equal addr.street, "Hollywood"
97
+ assert_equal addr.street2, "Vine"
98
+ assert_equal addr.number, nil
99
+ assert_equal addr.postal_code, nil
100
+ assert_equal addr.intersection?, true
101
+ assert_equal addr.street_type, "Blvd"
102
+ assert_equal addr.street_type2, "St"
103
+
104
+ addr = StreetAddress::US.parse(@int3)
105
+ assert_equal addr.city, "San Francisco"
106
+ assert_equal addr.state, "CA"
107
+ assert_equal addr.street, "Mission"
108
+ assert_equal addr.street2, "Valencia"
109
+ assert_equal addr.number, nil
110
+ assert_equal addr.postal_code, nil
111
+ assert_equal addr.intersection?, true
112
+ assert_equal addr.street_type, "St"
113
+ assert_equal addr.street_type2, "St"
114
+
115
+ parseable = ["1600 Pennsylvania Ave Washington DC 20006",
116
+ "1600 Pennsylvania Ave #400, Washington, DC, 20006",
117
+ "1600 Pennsylvania Ave Washington, DC",
118
+ "1600 Pennsylvania Ave #400 Washington DC",
119
+ "1600 Pennsylvania Ave, 20006",
120
+ "1600 Pennsylvania Ave #400, 20006",
121
+ "1600 Pennsylvania Ave 20006",
122
+ "1600 Pennsylvania Ave #400 20006",
123
+ "Hollywood & Vine, Los Angeles, CA",
124
+ "Hollywood Blvd and Vine St, Los Angeles, CA",
125
+ "Mission Street at Valencia Street, San Francisco, CA",
126
+ "Hollywood & Vine, Los Angeles, CA, 90028",
127
+ "Hollywood Blvd and Vine St, Los Angeles, CA, 90028",
128
+ "Mission Street at Valencia Street, San Francisco, CA, 90028"]
129
+
130
+ parseable.each do |location|
131
+ assert_not_nil(StreetAddress::US.parse(location), location + " was not parse able")
132
+ end
133
+
134
+ end
135
+
136
+ end
metadata ADDED
@@ -0,0 +1,62 @@
1
+ --- !ruby/object:Gem::Specification
2
+ rubygems_version: 0.9.2
3
+ specification_version: 1
4
+ name: StreetAddress
5
+ version: !ruby/object:Gem::Version
6
+ version: 1.0.0
7
+ date: 2007-07-02 00:00:00 -04:00
8
+ summary: Ruby port of the perl module Geo::StreetAddress::US to parse one line street addresses
9
+ require_paths:
10
+ - lib
11
+ email: ryand-ruby@zenspider.com
12
+ homepage: http://streetaddress.rubyforge.org
13
+ rubyforge_project: streetaddress
14
+ description: Parses one line addresses and returns a normalized address object. This is a near direct port of the of the perl module Geo::StreetAddress::US originally written by Schuyler D. Erle. For more information see http://search.cpan.org/~sderle/Geo-StreetAddress-US-0.99/
15
+ autorequire:
16
+ default_executable:
17
+ bindir: bin
18
+ has_rdoc: true
19
+ required_ruby_version: !ruby/object:Gem::Version::Requirement
20
+ requirements:
21
+ - - ">"
22
+ - !ruby/object:Gem::Version
23
+ version: 0.0.0
24
+ version:
25
+ platform: ruby
26
+ signing_key:
27
+ cert_chain:
28
+ post_install_message:
29
+ authors:
30
+ - Ryan Davis
31
+ files:
32
+ - History.txt
33
+ - Manifest.txt
34
+ - README.txt
35
+ - Rakefile
36
+ - lib/street_address.rb
37
+ - test/test_street_address.rb
38
+ test_files:
39
+ - test/test_street_address.rb
40
+ rdoc_options:
41
+ - --main
42
+ - README.txt
43
+ extra_rdoc_files:
44
+ - History.txt
45
+ - Manifest.txt
46
+ - README.txt
47
+ executables: []
48
+
49
+ extensions: []
50
+
51
+ requirements: []
52
+
53
+ dependencies:
54
+ - !ruby/object:Gem::Dependency
55
+ name: hoe
56
+ version_requirement:
57
+ version_requirements: !ruby/object:Gem::Version::Requirement
58
+ requirements:
59
+ - - ">="
60
+ - !ruby/object:Gem::Version
61
+ version: 1.2.1
62
+ version: