StreetAddress 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,4 @@
1
+ == 1.0.0 / 2007-06-28
2
+
3
+ * Initial Version
4
+
@@ -0,0 +1,6 @@
1
+ History.txt
2
+ Manifest.txt
3
+ README.txt
4
+ Rakefile
5
+ lib/street_address.rb
6
+ test/test_street_address.rb
@@ -0,0 +1,45 @@
1
+ StreetAddress
2
+ by Riderway (Derrek Long, Nicholas Schlueter)
3
+ http://streetaddress.rubyforge.org
4
+
5
+ == DESCRIPTION:
6
+
7
+ Parses one line street addresses and returns a normalized address object.
8
+
9
+ This is a near direct port of the of the perl module
10
+ Geo::StreetAddress::US originally written by Schuyler D. Erle.
11
+ For more information see
12
+ http://search.cpan.org/~sderle/Geo-StreetAddress-US-0.99/
13
+
14
+ == SYNOPSIS:
15
+
16
+ Currently parses United States Addresses.
17
+
18
+ === Basic Usage:
19
+
20
+ StreetAddress::US.parse("1600 Pennsylvania Ave, Washington, DC, 2006")
21
+
22
+ == LICENSE:
23
+
24
+ (The MIT License)
25
+
26
+ Copyright (c) 2007 Riderway
27
+
28
+ Permission is hereby granted, free of charge, to any person obtaining
29
+ a copy of this software and associated documentation files (the
30
+ 'Software'), to deal in the Software without restriction, including
31
+ without limitation the rights to use, copy, modify, merge, publish,
32
+ distribute, sublicense, and/or sell copies of the Software, and to
33
+ permit persons to whom the Software is furnished to do so, subject to
34
+ the following conditions:
35
+
36
+ The above copyright notice and this permission notice shall be
37
+ included in all copies or substantial portions of the Software.
38
+
39
+ THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
40
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
41
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
42
+ IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
43
+ CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
44
+ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
45
+ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,20 @@
1
+ # -*- ruby -*-
2
+
3
+ require 'rubygems'
4
+ require 'hoe'
5
+ require './lib/street_address.rb'
6
+
7
+ Hoe.new('StreetAddress', StreetAddress::VERSION) do |p|
8
+ p.rubyforge_name = 'streetaddress'
9
+ p.summary = 'Ruby port of the perl module Geo::StreetAddress::US to parse one line street addresses'
10
+ p.description = "Parses one line addresses and returns a normalized address object.
11
+
12
+ This is a near direct port of the of the perl module
13
+ Geo::StreetAddress::US originally written by Schuyler D. Erle.
14
+ For more information see
15
+ http://search.cpan.org/~sderle/Geo-StreetAddress-US-0.99/"
16
+ p.url = "http://streetaddress.rubyforge.org"
17
+ p.changes = p.paragraphs_of('History.txt', 0..1).join("\n\n")
18
+ end
19
+
20
+ # vim: syntax=Ruby
@@ -0,0 +1,813 @@
1
+ =begin rdoc
2
+
3
+ === Usage:
4
+ StreetAddress::US.parse("1600 Pennsylvania Ave, washington, dc")
5
+
6
+ === Valid Address Formats
7
+
8
+ 1600 Pennsylvania Ave Washington DC 20006
9
+ 1600 Pennsylvania Ave #400, Washington, DC, 20006
10
+ 1600 Pennsylvania Ave Washington, DC
11
+ 1600 Pennsylvania Ave #400 Washington DC
12
+ 1600 Pennsylvania Ave, 20006
13
+ 1600 Pennsylvania Ave #400, 20006
14
+ 1600 Pennsylvania Ave 20006
15
+ 1600 Pennsylvania Ave #400 20006
16
+
17
+ === Valid Intersection Formats
18
+
19
+ Hollywood & Vine, Los Angeles, CA
20
+ Hollywood Blvd and Vine St, Los Angeles, CA
21
+ Mission Street at Valencia Street, San Francisco, CA
22
+ Hollywood & Vine, Los Angeles, CA, 90028
23
+ Hollywood Blvd and Vine St, Los Angeles, CA, 90028
24
+ Mission Street at Valencia Street, San Francisco, CA, 90028
25
+
26
+ ==== License
27
+
28
+ Copyright (c) 2007 Riderway (Derrek Long, Nicholas Schlueter)
29
+
30
+ Permission is hereby granted, free of charge, to any person obtaining
31
+ a copy of this software and associated documentation files (the
32
+ "Software"), to deal in the Software without restriction, including
33
+ without limitation the rights to use, copy, modify, merge, publish,
34
+ distribute, sublicense, and/or sell copies of the Software, and to
35
+ permit persons to whom the Software is furnished to do so, subject to
36
+ the following conditions:
37
+
38
+ The above copyright notice and this permission notice shall be
39
+ included in all copies or substantial portions of the Software.
40
+
41
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
42
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
43
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
44
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
45
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
46
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
47
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
48
+
49
+ ==== Notes
50
+ If parts of the address are omitted from the original string
51
+ the accessor will be nil in StreetAddress::US::Address.
52
+
53
+ Example:
54
+ address = StreetAddress::US.parse("1600 Pennsylvania Ave, washington, dc")
55
+ assert address.postal_code.nil?
56
+
57
+ ==== Acknowledgements
58
+
59
+ This gem is a near direct port of the perl module Geo::StreetAddress::US
60
+ originally written by Schuyler D. Erle. For more information see
61
+ http://search.cpan.org/~sderle/Geo-StreetAddress-US-0.99/
62
+
63
+ =end
64
+
65
+ module StreetAddress
66
+ VERSION = '1.0.0'
67
+ class US
68
+ @@directional = {
69
+ "north" => "N",
70
+ "northeast" => "NE",
71
+ "east" => "E",
72
+ "southeast" => "SE",
73
+ "south" => "S",
74
+ "southwest" => "SW",
75
+ "west" => "W",
76
+ "northwest" => "NW"
77
+ }
78
+ @@direction_code = @@directional.invert
79
+
80
+ @@street_type = {
81
+ "allee" => "aly",
82
+ "alley" => "aly",
83
+ "ally" => "aly",
84
+ "anex" => "anx",
85
+ "annex" => "anx",
86
+ "annx" => "anx",
87
+ "arcade" => "arc",
88
+ "av" => "ave",
89
+ "aven" => "ave",
90
+ "avenu" => "ave",
91
+ "avenue" => "ave",
92
+ "avn" => "ave",
93
+ "avnue" => "ave",
94
+ "bayoo" => "byu",
95
+ "bayou" => "byu",
96
+ "beach" => "bch",
97
+ "bend" => "bnd",
98
+ "bluf" => "blf",
99
+ "bluff" => "blf",
100
+ "bluffs" => "blfs",
101
+ "bot" => "btm",
102
+ "bottm" => "btm",
103
+ "bottom" => "btm",
104
+ "boul" => "blvd",
105
+ "boulevard" => "blvd",
106
+ "boulv" => "blvd",
107
+ "branch" => "br",
108
+ "brdge" => "brg",
109
+ "bridge" => "brg",
110
+ "brnch" => "br",
111
+ "brook" => "brk",
112
+ "brooks" => "brks",
113
+ "burg" => "bg",
114
+ "burgs" => "bgs",
115
+ "bypa" => "byp",
116
+ "bypas" => "byp",
117
+ "bypass" => "byp",
118
+ "byps" => "byp",
119
+ "camp" => "cp",
120
+ "canyn" => "cyn",
121
+ "canyon" => "cyn",
122
+ "cape" => "cpe",
123
+ "causeway" => "cswy",
124
+ "causway" => "cswy",
125
+ "cen" => "ctr",
126
+ "cent" => "ctr",
127
+ "center" => "ctr",
128
+ "centers" => "ctrs",
129
+ "centr" => "ctr",
130
+ "centre" => "ctr",
131
+ "circ" => "cir",
132
+ "circl" => "cir",
133
+ "circle" => "cir",
134
+ "circles" => "cirs",
135
+ "ck" => "crk",
136
+ "cliff" => "clf",
137
+ "cliffs" => "clfs",
138
+ "club" => "clb",
139
+ "cmp" => "cp",
140
+ "cnter" => "ctr",
141
+ "cntr" => "ctr",
142
+ "cnyn" => "cyn",
143
+ "common" => "cmn",
144
+ "corner" => "cor",
145
+ "corners" => "cors",
146
+ "course" => "crse",
147
+ "court" => "ct",
148
+ "courts" => "cts",
149
+ "cove" => "cv",
150
+ "coves" => "cvs",
151
+ "cr" => "crk",
152
+ "crcl" => "cir",
153
+ "crcle" => "cir",
154
+ "crecent" => "cres",
155
+ "creek" => "crk",
156
+ "crescent" => "cres",
157
+ "cresent" => "cres",
158
+ "crest" => "crst",
159
+ "crossing" => "xing",
160
+ "crossroad" => "xrd",
161
+ "crscnt" => "cres",
162
+ "crsent" => "cres",
163
+ "crsnt" => "cres",
164
+ "crssing" => "xing",
165
+ "crssng" => "xing",
166
+ "crt" => "ct",
167
+ "curve" => "curv",
168
+ "dale" => "dl",
169
+ "dam" => "dm",
170
+ "div" => "dv",
171
+ "divide" => "dv",
172
+ "driv" => "dr",
173
+ "drive" => "dr",
174
+ "drives" => "drs",
175
+ "drv" => "dr",
176
+ "dvd" => "dv",
177
+ "estate" => "est",
178
+ "estates" => "ests",
179
+ "exp" => "expy",
180
+ "expr" => "expy",
181
+ "express" => "expy",
182
+ "expressway" => "expy",
183
+ "expw" => "expy",
184
+ "extension" => "ext",
185
+ "extensions" => "exts",
186
+ "extn" => "ext",
187
+ "extnsn" => "ext",
188
+ "falls" => "fls",
189
+ "ferry" => "fry",
190
+ "field" => "fld",
191
+ "fields" => "flds",
192
+ "flat" => "flt",
193
+ "flats" => "flts",
194
+ "ford" => "frd",
195
+ "fords" => "frds",
196
+ "forest" => "frst",
197
+ "forests" => "frst",
198
+ "forg" => "frg",
199
+ "forge" => "frg",
200
+ "forges" => "frgs",
201
+ "fork" => "frk",
202
+ "forks" => "frks",
203
+ "fort" => "ft",
204
+ "freeway" => "fwy",
205
+ "freewy" => "fwy",
206
+ "frry" => "fry",
207
+ "frt" => "ft",
208
+ "frway" => "fwy",
209
+ "frwy" => "fwy",
210
+ "garden" => "gdn",
211
+ "gardens" => "gdns",
212
+ "gardn" => "gdn",
213
+ "gateway" => "gtwy",
214
+ "gatewy" => "gtwy",
215
+ "gatway" => "gtwy",
216
+ "glen" => "gln",
217
+ "glens" => "glns",
218
+ "grden" => "gdn",
219
+ "grdn" => "gdn",
220
+ "grdns" => "gdns",
221
+ "green" => "grn",
222
+ "greens" => "grns",
223
+ "grov" => "grv",
224
+ "grove" => "grv",
225
+ "groves" => "grvs",
226
+ "gtway" => "gtwy",
227
+ "harb" => "hbr",
228
+ "harbor" => "hbr",
229
+ "harbors" => "hbrs",
230
+ "harbr" => "hbr",
231
+ "haven" => "hvn",
232
+ "havn" => "hvn",
233
+ "height" => "hts",
234
+ "heights" => "hts",
235
+ "hgts" => "hts",
236
+ "highway" => "hwy",
237
+ "highwy" => "hwy",
238
+ "hill" => "hl",
239
+ "hills" => "hls",
240
+ "hiway" => "hwy",
241
+ "hiwy" => "hwy",
242
+ "hllw" => "holw",
243
+ "hollow" => "holw",
244
+ "hollows" => "holw",
245
+ "holws" => "holw",
246
+ "hrbor" => "hbr",
247
+ "ht" => "hts",
248
+ "hway" => "hwy",
249
+ "inlet" => "inlt",
250
+ "island" => "is",
251
+ "islands" => "iss",
252
+ "isles" => "isle",
253
+ "islnd" => "is",
254
+ "islnds" => "iss",
255
+ "jction" => "jct",
256
+ "jctn" => "jct",
257
+ "jctns" => "jcts",
258
+ "junction" => "jct",
259
+ "junctions" => "jcts",
260
+ "junctn" => "jct",
261
+ "juncton" => "jct",
262
+ "key" => "ky",
263
+ "keys" => "kys",
264
+ "knol" => "knl",
265
+ "knoll" => "knl",
266
+ "knolls" => "knls",
267
+ "la" => "ln",
268
+ "lake" => "lk",
269
+ "lakes" => "lks",
270
+ "landing" => "lndg",
271
+ "lane" => "ln",
272
+ "lanes" => "ln",
273
+ "ldge" => "ldg",
274
+ "light" => "lgt",
275
+ "lights" => "lgts",
276
+ "lndng" => "lndg",
277
+ "loaf" => "lf",
278
+ "lock" => "lck",
279
+ "locks" => "lcks",
280
+ "lodg" => "ldg",
281
+ "lodge" => "ldg",
282
+ "loops" => "loop",
283
+ "manor" => "mnr",
284
+ "manors" => "mnrs",
285
+ "meadow" => "mdw",
286
+ "meadows" => "mdws",
287
+ "medows" => "mdws",
288
+ "mill" => "ml",
289
+ "mills" => "mls",
290
+ "mission" => "msn",
291
+ "missn" => "msn",
292
+ "mnt" => "mt",
293
+ "mntain" => "mtn",
294
+ "mntn" => "mtn",
295
+ "mntns" => "mtns",
296
+ "motorway" => "mtwy",
297
+ "mount" => "mt",
298
+ "mountain" => "mtn",
299
+ "mountains" => "mtns",
300
+ "mountin" => "mtn",
301
+ "mssn" => "msn",
302
+ "mtin" => "mtn",
303
+ "neck" => "nck",
304
+ "orchard" => "orch",
305
+ "orchrd" => "orch",
306
+ "overpass" => "opas",
307
+ "ovl" => "oval",
308
+ "parks" => "park",
309
+ "parkway" => "pkwy",
310
+ "parkways" => "pkwy",
311
+ "parkwy" => "pkwy",
312
+ "passage" => "psge",
313
+ "paths" => "path",
314
+ "pikes" => "pike",
315
+ "pine" => "pne",
316
+ "pines" => "pnes",
317
+ "pk" => "park",
318
+ "pkway" => "pkwy",
319
+ "pkwys" => "pkwy",
320
+ "pky" => "pkwy",
321
+ "place" => "pl",
322
+ "plain" => "pln",
323
+ "plaines" => "plns",
324
+ "plains" => "plns",
325
+ "plaza" => "plz",
326
+ "plza" => "plz",
327
+ "point" => "pt",
328
+ "points" => "pts",
329
+ "port" => "prt",
330
+ "ports" => "prts",
331
+ "prairie" => "pr",
332
+ "prarie" => "pr",
333
+ "prk" => "park",
334
+ "prr" => "pr",
335
+ "rad" => "radl",
336
+ "radial" => "radl",
337
+ "radiel" => "radl",
338
+ "ranch" => "rnch",
339
+ "ranches" => "rnch",
340
+ "rapid" => "rpd",
341
+ "rapids" => "rpds",
342
+ "rdge" => "rdg",
343
+ "rest" => "rst",
344
+ "ridge" => "rdg",
345
+ "ridges" => "rdgs",
346
+ "river" => "riv",
347
+ "rivr" => "riv",
348
+ "rnchs" => "rnch",
349
+ "road" => "rd",
350
+ "roads" => "rds",
351
+ "route" => "rte",
352
+ "rvr" => "riv",
353
+ "shoal" => "shl",
354
+ "shoals" => "shls",
355
+ "shoar" => "shr",
356
+ "shoars" => "shrs",
357
+ "shore" => "shr",
358
+ "shores" => "shrs",
359
+ "skyway" => "skwy",
360
+ "spng" => "spg",
361
+ "spngs" => "spgs",
362
+ "spring" => "spg",
363
+ "springs" => "spgs",
364
+ "sprng" => "spg",
365
+ "sprngs" => "spgs",
366
+ "spurs" => "spur",
367
+ "sqr" => "sq",
368
+ "sqre" => "sq",
369
+ "sqrs" => "sqs",
370
+ "squ" => "sq",
371
+ "square" => "sq",
372
+ "squares" => "sqs",
373
+ "station" => "sta",
374
+ "statn" => "sta",
375
+ "stn" => "sta",
376
+ "str" => "st",
377
+ "strav" => "stra",
378
+ "strave" => "stra",
379
+ "straven" => "stra",
380
+ "stravenue" => "stra",
381
+ "stravn" => "stra",
382
+ "stream" => "strm",
383
+ "street" => "st",
384
+ "streets" => "sts",
385
+ "streme" => "strm",
386
+ "strt" => "st",
387
+ "strvn" => "stra",
388
+ "strvnue" => "stra",
389
+ "sumit" => "smt",
390
+ "sumitt" => "smt",
391
+ "summit" => "smt",
392
+ "terr" => "ter",
393
+ "terrace" => "ter",
394
+ "throughway" => "trwy",
395
+ "tpk" => "tpke",
396
+ "tr" => "trl",
397
+ "trace" => "trce",
398
+ "traces" => "trce",
399
+ "track" => "trak",
400
+ "tracks" => "trak",
401
+ "trafficway" => "trfy",
402
+ "trail" => "trl",
403
+ "trails" => "trl",
404
+ "trk" => "trak",
405
+ "trks" => "trak",
406
+ "trls" => "trl",
407
+ "trnpk" => "tpke",
408
+ "trpk" => "tpke",
409
+ "tunel" => "tunl",
410
+ "tunls" => "tunl",
411
+ "tunnel" => "tunl",
412
+ "tunnels" => "tunl",
413
+ "tunnl" => "tunl",
414
+ "turnpike" => "tpke",
415
+ "turnpk" => "tpke",
416
+ "underpass" => "upas",
417
+ "union" => "un",
418
+ "unions" => "uns",
419
+ "valley" => "vly",
420
+ "valleys" => "vlys",
421
+ "vally" => "vly",
422
+ "vdct" => "via",
423
+ "viadct" => "via",
424
+ "viaduct" => "via",
425
+ "view" => "vw",
426
+ "views" => "vws",
427
+ "vill" => "vlg",
428
+ "villag" => "vlg",
429
+ "village" => "vlg",
430
+ "villages" => "vlgs",
431
+ "ville" => "vl",
432
+ "villg" => "vlg",
433
+ "villiage" => "vlg",
434
+ "vist" => "vis",
435
+ "vista" => "vis",
436
+ "vlly" => "vly",
437
+ "vst" => "vis",
438
+ "vsta" => "vis",
439
+ "walks" => "walk",
440
+ "well" => "wl",
441
+ "wells" => "wls",
442
+ "wy" => "way"
443
+ }
444
+
445
+ @@street_type_list = {}
446
+ @@street_type.to_a.each{|item| @@street_type_list[item[0]] = true; @@street_type_list[item[1]] = true}
447
+
448
+ @@state_code = {
449
+ "alabama" => "AL",
450
+ "alaska" => "AK",
451
+ "american samoa" => "AS",
452
+ "arizona" => "AZ",
453
+ "arkansas" => "AR",
454
+ "california" => "CA",
455
+ "colorado" => "CO",
456
+ "connecticut" => "CT",
457
+ "delaware" => "DE",
458
+ "district of columbia" => "DC",
459
+ "federated states of micronesia" => "FM",
460
+ "florida" => "FL",
461
+ "georgia" => "GA",
462
+ "guam" => "GU",
463
+ "hawaii" => "HI",
464
+ "idaho" => "ID",
465
+ "illinois" => "IL",
466
+ "indiana" => "IN",
467
+ "iowa" => "IA",
468
+ "kansas" => "KS",
469
+ "kentucky" => "KY",
470
+ "louisiana" => "LA",
471
+ "maine" => "ME",
472
+ "marshall islands" => "MH",
473
+ "maryland" => "MD",
474
+ "massachusetts" => "MA",
475
+ "michigan" => "MI",
476
+ "minnesota" => "MN",
477
+ "mississippi" => "MS",
478
+ "missouri" => "MO",
479
+ "montana" => "MT",
480
+ "nebraska" => "NE",
481
+ "nevada" => "NV",
482
+ "new hampshire" => "NH",
483
+ "new jersey" => "NJ",
484
+ "new mexico" => "NM",
485
+ "new york" => "NY",
486
+ "north carolina" => "NC",
487
+ "north dakota" => "ND",
488
+ "northern mariana islands" => "MP",
489
+ "ohio" => "OH",
490
+ "oklahoma" => "OK",
491
+ "oregon" => "OR",
492
+ "palau" => "PW",
493
+ "pennsylvania" => "PA",
494
+ "puerto rico" => "PR",
495
+ "rhode island" => "RI",
496
+ "south carolina" => "SC",
497
+ "south dakota" => "SD",
498
+ "tennessee" => "TN",
499
+ "texas" => "TX",
500
+ "utah" => "UT",
501
+ "vermont" => "VT",
502
+ "virgin islands" => "VI",
503
+ "virginia" => "VA",
504
+ "washington" => "WA",
505
+ "west virginia" => "WV",
506
+ "wisconsin" => "WI",
507
+ "wyoming" => "WY"
508
+ }
509
+
510
+ @@state_name = @@state_code.invert
511
+
512
+ @@state_fips = {
513
+ "01" => "AL",
514
+ "02" => "AK",
515
+ "04" => "AZ",
516
+ "05" => "AR",
517
+ "06" => "CA",
518
+ "08" => "CO",
519
+ "09" => "CT",
520
+ "10" => "DE",
521
+ "11" => "DC",
522
+ "12" => "FL",
523
+ "13" => "GA",
524
+ "15" => "HI",
525
+ "16" => "ID",
526
+ "17" => "IL",
527
+ "18" => "IN",
528
+ "19" => "IA",
529
+ "20" => "KS",
530
+ "21" => "KY",
531
+ "22" => "LA",
532
+ "23" => "ME",
533
+ "24" => "MD",
534
+ "25" => "MA",
535
+ "26" => "MI",
536
+ "27" => "MN",
537
+ "28" => "MS",
538
+ "29" => "MO",
539
+ "30" => "MT",
540
+ "31" => "NE",
541
+ "32" => "NV",
542
+ "33" => "NH",
543
+ "34" => "NJ",
544
+ "35" => "NM",
545
+ "36" => "NY",
546
+ "37" => "NC",
547
+ "38" => "ND",
548
+ "39" => "OH",
549
+ "40" => "OK",
550
+ "41" => "OR",
551
+ "42" => "PA",
552
+ "44" => "RI",
553
+ "45" => "SC",
554
+ "46" => "SD",
555
+ "47" => "TN",
556
+ "48" => "TX",
557
+ "49" => "UT",
558
+ "50" => "VT",
559
+ "51" => "VA",
560
+ "53" => "WA",
561
+ "54" => "WV",
562
+ "55" => "WI",
563
+ "56" => "WY",
564
+ "72" => "PR",
565
+ "78" => "VI"
566
+ }
567
+
568
+ @@fips_state = @@state_fips.invert
569
+
570
+ @@street_type_regexp = @@street_type_list.keys.join("|")
571
+ @@number_regexp = '\d+-?\d*'
572
+ @@fraction_regexp = '\d+\/\d+'
573
+ @@state_regexp = @@state_code.to_a.join("|")
574
+ @@direct_regexp = @@directional.keys.join("|") + "|" + @@directional.values.sort{|a,b| b.length <=> a.length }.map{|x| f = x.gsub(/(\w)/, '\1.'); [Regexp::quote(f), Regexp::quote(x)] }.join("|")
575
+ @@zip_regexp = '(\d{5})(?:-(\d{4}))?'
576
+ @@corner_regexp = '(?:\band\b|\bat\b|&|\@)'
577
+ @@unit_regexp = '(?:(su?i?te|p\W*[om]\W*b(?:ox)?|dept|apt|apartment|ro*m|fl|unit|box)\W+|\#\W*)([\w-]+)'
578
+ @@street_regexp =
579
+ '(?:
580
+ (?:(' + @@direct_regexp + ')\W+
581
+ (' + @@street_type_regexp + ')\b)
582
+ |
583
+ (?:(' + @@direct_regexp + ')\W+)?
584
+ (?:
585
+ ([^,]+)
586
+ (?:[^\w,]+(' + @@street_type_regexp + ')\b)
587
+ (?:[^\w,]+(' + @@direct_regexp + ')\b)?
588
+ |
589
+ ([^,]*\d)
590
+ (' + @@direct_regexp + ')\b
591
+ |
592
+ ([^,]+?)
593
+ (?:[^\w,]+(' + @@street_type_regexp + ')\b)?
594
+ (?:[^\w,]+(' + @@direct_regexp + ')\b)?
595
+ )
596
+ )'
597
+ @@place_regexp =
598
+ '(?:
599
+ ([^\d,]+?)\W+
600
+ ($' + @@state_regexp + ')\W*
601
+ )?
602
+ (?:' + @@zip_regexp + ')?'
603
+
604
+ @@address_regexp =
605
+ '\A\W*
606
+ (' + @@number_regexp + ')\W*
607
+ (?:' + @@fraction_regexp + '\W*)?' +
608
+ @@street_regexp + '\W+
609
+ (?:' + @@unit_regexp + '\W+)?' +
610
+ @@place_regexp +
611
+ '\W*\Z'
612
+
613
+ class << self
614
+ =begin rdoc
615
+
616
+ parses either an address or intersection and returns an instance of
617
+ StreetAddress::US::Address or nil if the location cannot be parsed
618
+
619
+ ====example
620
+ StreetAddress::US.parse('1600 Pennsylvania Ave Washington, DC 20006')
621
+ or:
622
+ StreetAddress::US.parse('Hollywood & Vine, Los Angeles, CA')
623
+
624
+ =end
625
+ def parse(location)
626
+ regex = Regexp.new(@@corner_regexp, Regexp::IGNORECASE)
627
+ if regex.match(location)
628
+ z = regex.match(location)
629
+ parse_intersection(location);
630
+ else
631
+ parse_address(location);
632
+ end
633
+ end
634
+
635
+ =begin rdoc
636
+
637
+ parses only an intersection and returnsan instance of
638
+ StreetAddress::US::Address or nil if the intersection cannot be parsed
639
+
640
+ ====example
641
+ address = StreetAddress::US.parse('Hollywood & Vine, Los Angeles, CA')
642
+ assert address.intersection?
643
+
644
+ =end
645
+ def parse_intersection(inter)
646
+ regex = Regexp.new(
647
+ '\A\W*' + @@street_regexp + '\W*?
648
+ \s+' + @@corner_regexp + '\s+' +
649
+ @@street_regexp + '\W+' +
650
+ @@place_regexp + '\W*\Z', Regexp::IGNORECASE + Regexp::EXTENDED)
651
+ match = regex.match(inter)
652
+ return if match.nil?
653
+
654
+ normalize_address(
655
+ StreetAddress::US::Address.new(
656
+ :street => match[4] || match[9],
657
+ :street_type => match[5],
658
+ :suffix => match[6],
659
+ :prefix => match[3],
660
+ :street2 => match[15] || match[20],
661
+ :street_type2 => match[16],
662
+ :suffix2 => match[17],
663
+ :prefix2 => match[14],
664
+ :city => match[23],
665
+ :state => match[24],
666
+ :postal_code => match[25]
667
+ )
668
+ )
669
+ end
670
+
671
+ =begin rdoc
672
+
673
+ parses only an address and returnsan instance of
674
+ StreetAddress::US::Address or nil if the address cannot be parsed
675
+
676
+ ====example
677
+ address = StreetAddress::US.parse('1600 Pennsylvania Ave Washington, DC 20006')
678
+ assert !address.intersection?
679
+
680
+ =end
681
+ def parse_address(addr)
682
+ regex = Regexp.new(@@address_regexp, Regexp::IGNORECASE + Regexp::EXTENDED)
683
+ match = regex.match(addr)
684
+ return if match.nil?
685
+
686
+ normalize_address(
687
+ StreetAddress::US::Address.new(
688
+ :number => match[1],
689
+ :street => match[5] || match[10] || match[2],
690
+ :street_type => match[6] || match[3],
691
+ :unit => match[14],
692
+ :unit_prefix => match[13],
693
+ :suffix => match[7] || match[12],
694
+ :prefix => match[4],
695
+ :city => match[15],
696
+ :state => match[16],
697
+ :postal_code => match[17],
698
+ :postal_code_ext => match[18]
699
+ )
700
+ )
701
+ end
702
+
703
+ def state_name #:nodoc:
704
+ @@state_name
705
+ end
706
+
707
+ def fips_state #:nodoc:
708
+ @@fips_state
709
+ end
710
+
711
+ private
712
+ def normalize_address(addr)
713
+ addr.state = normalize_state(addr.state) unless addr.state.nil?
714
+ addr.street_type = normalize_street_type(addr.street_type) unless addr.street_type.nil?
715
+ addr.prefix = normalize_directional(addr.prefix) unless addr.prefix.nil?
716
+ addr.suffix = normalize_directional(addr.suffix) unless addr.suffix.nil?
717
+ addr.street.gsub!(/\b([a-z])/) {|wd| wd.capitalize} unless addr.street.nil?
718
+ addr.street_type2 = normalize_street_type(addr.street_type2) unless addr.street_type2.nil?
719
+ addr.prefix2 = normalize_directional(addr.prefix2) unless addr.prefix2.nil?
720
+ addr.suffix2 = normalize_directional(addr.suffix2) unless addr.suffix2.nil?
721
+ addr.street2.gsub!(/\b([a-z])/) {|wd| wd.capitalize} unless addr.street2.nil?
722
+ addr.city.gsub!(/\b([a-z])/) {|wd| wd.capitalize} unless addr.city.nil?
723
+ addr.unit_prefix.capitalize! unless addr.unit_prefix.nil?
724
+ return addr
725
+ end
726
+
727
+ def normalize_state(state)
728
+ if state.length < 3
729
+ state.upcase
730
+ else
731
+ @@state_code[state.downcase]
732
+ end
733
+ end
734
+
735
+ def normalize_street_type(s_type)
736
+ s_type.downcase!
737
+ s_type = @@street_type[s_type] || s_type if @@street_type_list[s_type]
738
+ s_type.capitalize
739
+ end
740
+
741
+ def normalize_directional(dir)
742
+ if dir.length < 3
743
+ dir.upcase
744
+ else
745
+ @@directional[dir.downcase]
746
+ end
747
+ end
748
+ end
749
+
750
+ =begin rdoc
751
+
752
+ This is class returned by StreetAddress::US::parse, StreetAddress::US::parse_address
753
+ and StreetAddress::US::parse_intersection. If an instance represents an intersection
754
+ the attribute street2 will be populated.
755
+
756
+ =end
757
+ class Address
758
+ attr_accessor :number, :street, :street_type, :unit, :unit_prefix, :suffix, :prefix, :city, :state, :postal_code, :postal_code_ext, :street2, :street_type2, :suffix2, :prefix2
759
+
760
+ def initialize(args)
761
+ args.keys.each { |attrib| self.send("#{attrib}=", args[attrib]) }
762
+ end
763
+
764
+ def fip
765
+ StreetAddress::US::fips_state[@state]
766
+ end
767
+
768
+ def state_name
769
+ s_name = StreetAddress::US.state_name[state]
770
+ s_name.capitalize unless s_name.nil?
771
+ end
772
+
773
+ def intersection?
774
+ !street2.nil?
775
+ end
776
+
777
+ def to_s
778
+ s = ""
779
+ if intersection?
780
+ s += prefix + " " unless prefix.nil?
781
+ s += street
782
+ s += " " + street_type unless street_type.nil?
783
+ s += " " + suffix unless suffix.nil?
784
+ s += " and"
785
+ s += " " + prefix2 unless prefix2.nil?
786
+ s += " " + street2
787
+ s += " " + street_type2 unless street_type2.nil?
788
+ s += " " + suffix2 unless suffix2.nil?
789
+ s += ", " + city unless city.nil?
790
+ s += ", " + state unless state.nil?
791
+ s += " " + postal_code unless postal_code.nil?
792
+ else
793
+ s += number
794
+ s += " " + prefix unless prefex.nil?
795
+ s += " " + street unless street.nil?
796
+ s += " " + street_type unless street_type.nil?
797
+ if( !unit_prefix.nil? && !unit.nil? )
798
+ s += " " + unit_prefix
799
+ s += " " + unit
800
+ elsif( unit_prefix.nil? && !unil.nil? )
801
+ s += " #" + unit
802
+ end
803
+ s += " " + suffix unless suffix.nil?
804
+ s += ", " + city unless city.nil?
805
+ s += ", " + state unless state.nil?
806
+ s += " " + postal_code unless postal_code.nil?
807
+ s += "-" + postal_code_ext unless postal_code_ext.nil?
808
+ end
809
+ return s
810
+ end
811
+ end
812
+ end
813
+ end
@@ -0,0 +1,136 @@
1
+ require 'test/unit/testcase'
2
+ require File.dirname(__FILE__) + '/../lib/street_address'
3
+
4
+
5
+ class StreetAddressUs < Test::Unit::TestCase
6
+ def setup
7
+ @addr1 = "2730 S Veitch St Apt 207, Arlington, VA 22206"
8
+ @addr2 = "44 Canal Center Plaza Suite 500, Alexandria, VA 22314"
9
+ @addr3 = "1600 Pennsylvania Ave Washington DC"
10
+ @addr4 = "1005 Gravenstein Hwy N, Sebastopol CA 95472"
11
+ @addr5 = "PO BOX 450, Chicago IL 60657"
12
+ @addr6 = "2730 S Veitch St #207, Arlington, VA 22206"
13
+
14
+ @int1 = "Hollywood & Vine, Los Angeles, CA"
15
+ @int2 = "Hollywood Blvd and Vine St, Los Angeles, CA"
16
+ @int3 = "Mission Street at Valencia Street, San Francisco, CA"
17
+
18
+ end
19
+
20
+ def test_parse
21
+ assert_equal StreetAddress::US.parse("&"), nil
22
+ assert_equal StreetAddress::US.parse(" and "), nil
23
+
24
+ addr = StreetAddress::US.parse(@addr1)
25
+ assert_equal addr.number, "2730"
26
+ assert_equal addr.postal_code, "22206"
27
+ assert_equal addr.prefix, "S"
28
+ assert_equal addr.state, "VA"
29
+ assert_equal addr.street, "Veitch"
30
+ assert_equal addr.street_type, "St"
31
+ assert_equal addr.unit, "207"
32
+ assert_equal addr.unit_prefix, "Apt"
33
+ assert_equal addr.city, "Arlington"
34
+ assert_equal addr.prefix2, nil
35
+ assert_equal addr.postal_code_ext, nil
36
+
37
+ addr = StreetAddress::US.parse(@addr2)
38
+ assert_equal addr.number, "44"
39
+ assert_equal addr.postal_code, "22314"
40
+ assert_equal addr.prefix, nil
41
+ assert_equal addr.state, "VA"
42
+ assert_equal addr.street, "Canal Center"
43
+ assert_equal addr.street_type, "Plz"
44
+ assert_equal addr.unit, "500"
45
+ assert_equal addr.unit_prefix, "Suite"
46
+ assert_equal addr.city, "Alexandria"
47
+ assert_equal addr.street2, nil
48
+
49
+ addr = StreetAddress::US.parse(@addr3)
50
+ assert_equal addr.number, "1600"
51
+ assert_equal addr.postal_code, nil
52
+ assert_equal addr.prefix, nil
53
+ assert_equal addr.state, "DC"
54
+ assert_equal addr.street, "Pennsylvania"
55
+ assert_equal addr.street_type, "Ave"
56
+ assert_equal addr.unit, nil
57
+ assert_equal addr.unit_prefix, nil
58
+ assert_equal addr.city, "Washington"
59
+ assert_equal addr.street2, nil
60
+
61
+
62
+
63
+ addr = StreetAddress::US.parse(@addr4)
64
+ assert_equal addr.number, "1005"
65
+ assert_equal addr.postal_code, "95472"
66
+ assert_equal addr.prefix, nil
67
+ assert_equal addr.state, "CA"
68
+ assert_equal addr.street, "Gravenstein"
69
+ assert_equal addr.street_type, "Hwy"
70
+ assert_equal addr.unit, nil
71
+ assert_equal addr.unit_prefix, nil
72
+ assert_equal addr.city, "Sebastopol"
73
+ assert_equal addr.street2, nil
74
+ assert_equal addr.suffix, "N"
75
+
76
+
77
+ addr = StreetAddress::US.parse(@addr5)
78
+ assert_equal addr, nil
79
+
80
+
81
+ addr = StreetAddress::US.parse(@addr6)
82
+ assert_equal("207", addr.unit)
83
+
84
+ addr = StreetAddress::US.parse(@int1)
85
+ assert_equal addr.city, "Los Angeles"
86
+ assert_equal addr.state, "CA"
87
+ assert_equal addr.street, "Hollywood"
88
+ assert_equal addr.street2, "Vine"
89
+ assert_equal addr.number, nil
90
+ assert_equal addr.postal_code, nil
91
+ assert_equal addr.intersection?, true
92
+
93
+ addr = StreetAddress::US.parse(@int2)
94
+ assert_equal addr.city, "Los Angeles"
95
+ assert_equal addr.state, "CA"
96
+ assert_equal addr.street, "Hollywood"
97
+ assert_equal addr.street2, "Vine"
98
+ assert_equal addr.number, nil
99
+ assert_equal addr.postal_code, nil
100
+ assert_equal addr.intersection?, true
101
+ assert_equal addr.street_type, "Blvd"
102
+ assert_equal addr.street_type2, "St"
103
+
104
+ addr = StreetAddress::US.parse(@int3)
105
+ assert_equal addr.city, "San Francisco"
106
+ assert_equal addr.state, "CA"
107
+ assert_equal addr.street, "Mission"
108
+ assert_equal addr.street2, "Valencia"
109
+ assert_equal addr.number, nil
110
+ assert_equal addr.postal_code, nil
111
+ assert_equal addr.intersection?, true
112
+ assert_equal addr.street_type, "St"
113
+ assert_equal addr.street_type2, "St"
114
+
115
+ parseable = ["1600 Pennsylvania Ave Washington DC 20006",
116
+ "1600 Pennsylvania Ave #400, Washington, DC, 20006",
117
+ "1600 Pennsylvania Ave Washington, DC",
118
+ "1600 Pennsylvania Ave #400 Washington DC",
119
+ "1600 Pennsylvania Ave, 20006",
120
+ "1600 Pennsylvania Ave #400, 20006",
121
+ "1600 Pennsylvania Ave 20006",
122
+ "1600 Pennsylvania Ave #400 20006",
123
+ "Hollywood & Vine, Los Angeles, CA",
124
+ "Hollywood Blvd and Vine St, Los Angeles, CA",
125
+ "Mission Street at Valencia Street, San Francisco, CA",
126
+ "Hollywood & Vine, Los Angeles, CA, 90028",
127
+ "Hollywood Blvd and Vine St, Los Angeles, CA, 90028",
128
+ "Mission Street at Valencia Street, San Francisco, CA, 90028"]
129
+
130
+ parseable.each do |location|
131
+ assert_not_nil(StreetAddress::US.parse(location), location + " was not parse able")
132
+ end
133
+
134
+ end
135
+
136
+ end
metadata ADDED
@@ -0,0 +1,62 @@
1
+ --- !ruby/object:Gem::Specification
2
+ rubygems_version: 0.9.2
3
+ specification_version: 1
4
+ name: StreetAddress
5
+ version: !ruby/object:Gem::Version
6
+ version: 1.0.0
7
+ date: 2007-07-02 00:00:00 -04:00
8
+ summary: Ruby port of the perl module Geo::StreetAddress::US to parse one line street addresses
9
+ require_paths:
10
+ - lib
11
+ email: ryand-ruby@zenspider.com
12
+ homepage: http://streetaddress.rubyforge.org
13
+ rubyforge_project: streetaddress
14
+ description: Parses one line addresses and returns a normalized address object. This is a near direct port of the of the perl module Geo::StreetAddress::US originally written by Schuyler D. Erle. For more information see http://search.cpan.org/~sderle/Geo-StreetAddress-US-0.99/
15
+ autorequire:
16
+ default_executable:
17
+ bindir: bin
18
+ has_rdoc: true
19
+ required_ruby_version: !ruby/object:Gem::Version::Requirement
20
+ requirements:
21
+ - - ">"
22
+ - !ruby/object:Gem::Version
23
+ version: 0.0.0
24
+ version:
25
+ platform: ruby
26
+ signing_key:
27
+ cert_chain:
28
+ post_install_message:
29
+ authors:
30
+ - Ryan Davis
31
+ files:
32
+ - History.txt
33
+ - Manifest.txt
34
+ - README.txt
35
+ - Rakefile
36
+ - lib/street_address.rb
37
+ - test/test_street_address.rb
38
+ test_files:
39
+ - test/test_street_address.rb
40
+ rdoc_options:
41
+ - --main
42
+ - README.txt
43
+ extra_rdoc_files:
44
+ - History.txt
45
+ - Manifest.txt
46
+ - README.txt
47
+ executables: []
48
+
49
+ extensions: []
50
+
51
+ requirements: []
52
+
53
+ dependencies:
54
+ - !ruby/object:Gem::Dependency
55
+ name: hoe
56
+ version_requirement:
57
+ version_requirements: !ruby/object:Gem::Version::Requirement
58
+ requirements:
59
+ - - ">="
60
+ - !ruby/object:Gem::Version
61
+ version: 1.2.1
62
+ version: