street_sweeper 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,4 @@
1
+ require 'bundler/setup'
2
+ Bundler.setup
3
+
4
+ require 'street_sweeper' # and any other gems you need
@@ -0,0 +1,637 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'spec_helper'
4
+
5
+ RSpec.describe StreetSweeper do
6
+ NORMAL = {
7
+ '1005 Gravenstein Hwy 95472' => {
8
+ number: '1005',
9
+ street: 'Gravenstein',
10
+ postal_code: '95472',
11
+ street_type: 'Hwy'
12
+ },
13
+ '1005 Gravenstein Hwy, 95472' => {
14
+ number: '1005',
15
+ street: 'Gravenstein',
16
+ postal_code: '95472',
17
+ street_type: 'Hwy'
18
+ },
19
+ '1005 Gravenstein Hwy N, 95472' => {
20
+ number: '1005',
21
+ street: 'Gravenstein',
22
+ postal_code: '95472',
23
+ suffix: 'N',
24
+ street_type: 'Hwy'
25
+ },
26
+ '1005 Gravenstein Highway North, 95472' => {
27
+ number: '1005',
28
+ street: 'Gravenstein',
29
+ postal_code: '95472',
30
+ suffix: 'N',
31
+ street_type: 'Hwy'
32
+ },
33
+ '1005 N Gravenstein Highway, Sebastopol, CA' => {
34
+ number: '1005',
35
+ street: 'Gravenstein',
36
+ state: 'CA',
37
+ city: 'Sebastopol',
38
+ street_type: 'Hwy',
39
+ prefix: 'N'
40
+ },
41
+ '1005 N Gravenstein Highway, Suite 500, Sebastopol, CA' => {
42
+ number: '1005',
43
+ street: 'Gravenstein',
44
+ state: 'CA',
45
+ city: 'Sebastopol',
46
+ street_type: 'Hwy',
47
+ prefix: 'N',
48
+ unit_prefix: 'Ste',
49
+ unit: '500'
50
+ },
51
+ '1005 N Gravenstein Hwy Suite 500 Sebastopol, CA' => {
52
+ number: '1005',
53
+ street: 'Gravenstein',
54
+ state: 'CA',
55
+ city: 'Sebastopol',
56
+ street_type: 'Hwy',
57
+ prefix: 'N',
58
+ unit_prefix: 'Ste',
59
+ unit: '500'
60
+ },
61
+ '1005 N Gravenstein Highway, Sebastopol, CA, 95472' => {
62
+ number: '1005',
63
+ street: 'Gravenstein',
64
+ state: 'CA',
65
+ city: 'Sebastopol',
66
+ postal_code: '95472',
67
+ street_type: 'Hwy',
68
+ prefix: 'N'
69
+ },
70
+ '1005 N Gravenstein Highway Sebastopol CA 95472' => {
71
+ number: '1005',
72
+ street: 'Gravenstein',
73
+ state: 'CA',
74
+ city: 'Sebastopol',
75
+ postal_code: '95472',
76
+ street_type: 'Hwy',
77
+ prefix: 'N'
78
+ },
79
+ '1005 Gravenstein Hwy N Sebastopol CA' => {
80
+ number: '1005',
81
+ street: 'Gravenstein',
82
+ state: 'CA',
83
+ city: 'Sebastopol',
84
+ suffix: 'N',
85
+ street_type: 'Hwy'
86
+ },
87
+ '1005 Gravenstein Hwy N, Sebastopol CA' => {
88
+ number: '1005',
89
+ street: 'Gravenstein',
90
+ state: 'CA',
91
+ city: 'Sebastopol',
92
+ suffix: 'N',
93
+ street_type: 'Hwy'
94
+ },
95
+ '1005 Gravenstein Hwy, N Sebastopol CA' => {
96
+ number: '1005',
97
+ street: 'Gravenstein',
98
+ state: 'CA',
99
+ city: 'North Sebastopol',
100
+ street_type: 'Hwy'
101
+ },
102
+ '1005 Gravenstein Hwy, North Sebastopol CA' => {
103
+ number: '1005',
104
+ street: 'Gravenstein',
105
+ state: 'CA',
106
+ city: 'North Sebastopol',
107
+ street_type: 'Hwy'
108
+ },
109
+ '1005 Gravenstein Hwy Sebastopol CA' => {
110
+ number: '1005',
111
+ street: 'Gravenstein',
112
+ state: 'CA',
113
+ city: 'Sebastopol',
114
+ street_type: 'Hwy'
115
+ },
116
+ '115 Broadway San Francisco CA' => {
117
+ street_type: nil,
118
+ number: '115',
119
+ street: 'Broadway',
120
+ state: 'CA',
121
+ city: 'San Francisco'
122
+ },
123
+ '7800 Mill Station Rd, Sebastopol, CA 95472' => {
124
+ number: '7800',
125
+ street: 'Mill Station',
126
+ state: 'CA',
127
+ city: 'Sebastopol',
128
+ postal_code: '95472',
129
+ street_type: 'Rd'
130
+ },
131
+ '7800 Mill Station Rd Sebastopol CA 95472' => {
132
+ number: '7800',
133
+ street: 'Mill Station',
134
+ state: 'CA',
135
+ city: 'Sebastopol',
136
+ postal_code: '95472',
137
+ street_type: 'Rd'
138
+ },
139
+ '1005 State Highway 116 Sebastopol CA 95472' => {
140
+ number: '1005',
141
+ street: 'State Highway 116',
142
+ state: 'CA',
143
+ city: 'Sebastopol',
144
+ postal_code: '95472',
145
+ street_type: 'Hwy'
146
+ },
147
+ '1600 Pennsylvania Ave. Washington DC' => {
148
+ number: '1600',
149
+ street: 'Pennsylvania',
150
+ state: 'DC',
151
+ city: 'Washington',
152
+ street_type: 'Ave'
153
+ },
154
+ '1600 Pennsylvania Avenue Washington DC' => {
155
+ number: '1600',
156
+ street: 'Pennsylvania',
157
+ state: 'DC',
158
+ city: 'Washington',
159
+ street_type: 'Ave'
160
+ },
161
+ '48S 400E, Salt Lake City UT' => {
162
+ street_type: nil,
163
+ number: '48',
164
+ street: '400',
165
+ state: 'UT',
166
+ city: 'Salt Lake City',
167
+ suffix: 'E',
168
+ prefix: 'S'
169
+ },
170
+ '550 S 400 E #3206, Salt Lake City UT 84111' => {
171
+ number: '550',
172
+ street: '400',
173
+ state: 'UT',
174
+ unit: '3206',
175
+ postal_code: '84111',
176
+ city: 'Salt Lake City',
177
+ suffix: 'E',
178
+ street_type: nil,
179
+ unit_prefix: '#',
180
+ prefix: 'S'
181
+ },
182
+ '6641 N 2200 W Apt D304 Park City, UT 84098' => {
183
+ number: '6641',
184
+ street: '2200',
185
+ state: 'UT',
186
+ unit: 'D304',
187
+ postal_code: '84098',
188
+ city: 'Park City',
189
+ suffix: 'W',
190
+ street_type: nil,
191
+ unit_prefix: 'Apt',
192
+ prefix: 'N'
193
+ },
194
+ '100 South St, Philadelphia, PA' => {
195
+ number: '100',
196
+ street: 'South',
197
+ state: 'PA',
198
+ city: 'Philadelphia',
199
+ street_type: 'St'
200
+ },
201
+ '100 S.E. Washington Ave, Minneapolis, MN' => {
202
+ number: '100',
203
+ street: 'Washington',
204
+ state: 'MN',
205
+ city: 'Minneapolis',
206
+ street_type: 'Ave',
207
+ prefix: 'SE'
208
+ },
209
+ '3813 1/2 Some Road, Los Angeles, CA' => {
210
+ number: '3813',
211
+ street: 'Some',
212
+ state: 'CA',
213
+ city: 'Los Angeles',
214
+ street_type: 'Rd'
215
+ },
216
+ '1 First St, e San Jose CA' => { # lower case city direction
217
+ number: '1',
218
+ street: '1st',
219
+ state: 'CA',
220
+ city: 'East San Jose',
221
+ street_type: 'St'
222
+ },
223
+ '123 Maple Rochester, New York' => { # space in state name
224
+ street_type: nil,
225
+ number: '123',
226
+ street: 'Maple',
227
+ state: 'NY',
228
+ city: 'Rochester'
229
+ },
230
+ '233 S Wacker Dr 60606-6306' => { # zip+4 with hyphen
231
+ number: '233',
232
+ street: 'Wacker',
233
+ postal_code: '60606',
234
+ postal_code_ext: '6306',
235
+ street_type: 'Dr',
236
+ prefix: 'S'
237
+ },
238
+ '233 S Wacker Dr 606066306' => { # zip+4 without hyphen
239
+ number: '233',
240
+ street: 'Wacker',
241
+ postal_code: '60606',
242
+ postal_code_ext: '6306',
243
+ street_type: 'Dr',
244
+ prefix: 'S'
245
+ },
246
+ 'lt42 99 Some Road, Some City LA' => { # no space before sec_unit_num
247
+ unit: '42',
248
+ city: 'Some City',
249
+ number: '99',
250
+ street: 'Some',
251
+ unit_prefix: 'Lot',
252
+ street_type: 'Rd',
253
+ state: 'LA'
254
+ },
255
+ '36401 County Road 43, Eaton, CO 80615' => { # numbered County Road
256
+ city: 'Eaton',
257
+ postal_code: '80615',
258
+ number: '36401',
259
+ street: 'County Road 43',
260
+ street_type: 'Rd',
261
+ state: 'CO'
262
+ },
263
+ "14168 W RIVER RD \nCOLUMBIA STATION, OH 44028-9430" => { # overlapping street type and road name
264
+ city: 'Columbia Station',
265
+ postal_code: '44028',
266
+ postal_code_ext: '9430',
267
+ number: '14168',
268
+ street: 'River',
269
+ street_type: 'Rd',
270
+ state: 'OH',
271
+ prefix: 'W'
272
+ },
273
+ "555 E LAKE AVE \nBELLEFONTAINE, OH 43311-2509" => {
274
+ city: 'Bellefontaine',
275
+ postal_code: '43311',
276
+ postal_code_ext: '2509',
277
+ number: '555',
278
+ street: 'Lake',
279
+ street_type: 'Ave',
280
+ state: 'OH',
281
+ prefix: 'E'
282
+ },
283
+ "19600 N PARK BLVD \nSHAKER HEIGHTS, OH 44122-1825" => {
284
+ city: 'Shaker Heights',
285
+ postal_code: '44122',
286
+ postal_code_ext: '1825',
287
+ number: '19600',
288
+ street: 'Park',
289
+ street_type: 'Blvd',
290
+ state: 'OH',
291
+ prefix: 'N'
292
+ },
293
+ '1234 COUNTY HWY 60E, Town, CO 12345' => {
294
+ city: 'Town',
295
+ postal_code: '12345',
296
+ number: '1234',
297
+ street: 'County Hwy 60',
298
+ suffix: 'E',
299
+ street_type: 'Hwy',
300
+ state: 'CO'
301
+ },
302
+ "'45 Quaker Ave, Ste 105'" => { # RT#73397
303
+ number: '45',
304
+ street: 'Quaker',
305
+ street_type: 'Ave',
306
+ unit: '105',
307
+ unit_prefix: 'Ste'
308
+ },
309
+ ##### pre-existing tests from ruby library
310
+ '2730 S Veitch St Apt 207, Arlington, VA 22206' => {
311
+ number: '2730',
312
+ postal_code: '22206',
313
+ prefix: 'S',
314
+ state: 'VA',
315
+ street: 'Veitch',
316
+ street_type: 'St',
317
+ unit: '207',
318
+ unit_prefix: 'Apt',
319
+ city: 'Arlington',
320
+ prefix2: nil,
321
+ postal_code_ext: nil
322
+ },
323
+ '44 Canal Center Plaza Suite 500, Alexandria, VA 22314' => {
324
+ number: '44',
325
+ postal_code: '22314',
326
+ prefix: nil,
327
+ state: 'VA',
328
+ street: 'Canal Center',
329
+ street_type: 'Plz',
330
+ unit: '500',
331
+ unit_prefix: 'Ste',
332
+ city: 'Alexandria',
333
+ street2: nil
334
+ },
335
+ '1600 Pennsylvania Ave NW Washington DC' => {
336
+ number: '1600',
337
+ postal_code: nil,
338
+ prefix: nil,
339
+ state: 'DC',
340
+ street: 'Pennsylvania',
341
+ street_type: 'Ave',
342
+ unit: nil,
343
+ unit_prefix: nil,
344
+ city: 'Washington',
345
+ street2: nil,
346
+ suffix: 'NW'
347
+ },
348
+ '1005 Gravenstein Hwy N, Sebastopol CA 95472' => {
349
+ number: '1005',
350
+ postal_code: '95472',
351
+ prefix: nil,
352
+ state: 'CA',
353
+ street: 'Gravenstein',
354
+ street_type: 'Hwy',
355
+ unit: nil,
356
+ unit_prefix: nil,
357
+ city: 'Sebastopol',
358
+ street2: nil,
359
+ suffix: 'N'
360
+ },
361
+ '2730 S Veitch St #207, Arlington, VA 22206' => {
362
+ number: '2730',
363
+ street: 'Veitch',
364
+ street_type: 'St',
365
+ unit: '207',
366
+ unit_prefix: '#',
367
+ suffix: nil,
368
+ prefix: 'S',
369
+ city: 'Arlington',
370
+ state: 'VA',
371
+ postal_code: '22206',
372
+ postal_code_ext: nil
373
+ },
374
+
375
+ 'P.O. BOX 293930, ARLINGTON, VA 22206' => {
376
+ number: nil,
377
+ street: 'PO Box 293930',
378
+ street_type: nil,
379
+ unit: nil,
380
+ unit_prefix: nil,
381
+ suffix: nil,
382
+ prefix: nil,
383
+ city: 'Arlington',
384
+ state: 'VA',
385
+ postal_code: '22206',
386
+ postal_code_ext: nil
387
+ }
388
+ }.freeze
389
+
390
+ PARSEABLE_INTERSECTIONS = {
391
+ 'Mission & Valencia San Francisco CA' => {
392
+ street_type: nil,
393
+ street_type2: nil,
394
+ street: 'Mission',
395
+ state: 'CA',
396
+ city: 'San Francisco',
397
+ street2: 'Valencia'
398
+ },
399
+
400
+ 'Mission & Valencia, San Francisco CA' => {
401
+ street_type: nil,
402
+ street_type2: nil,
403
+ street: 'Mission',
404
+ state: 'CA',
405
+ city: 'San Francisco',
406
+ street2: 'Valencia'
407
+ },
408
+ 'Mission St and Valencia St San Francisco CA' => {
409
+ street_type: 'St',
410
+ street_type2: 'St',
411
+ street: 'Mission',
412
+ state: 'CA',
413
+ city: 'San Francisco',
414
+ street2: 'Valencia'
415
+ },
416
+ 'Hollywood Blvd and Vine St Los Angeles, CA' => {
417
+ street_type: 'Blvd',
418
+ street_type2: 'St',
419
+ street: 'Hollywood',
420
+ state: 'CA',
421
+ city: 'Los Angeles',
422
+ street2: 'Vine'
423
+ },
424
+ 'Mission St & Valencia St San Francisco CA' => {
425
+ street_type: 'St',
426
+ street_type2: 'St',
427
+ street: 'Mission',
428
+ state: 'CA',
429
+ city: 'San Francisco',
430
+ street2: 'Valencia'
431
+ },
432
+ 'Mission and Valencia Sts San Francisco CA' => {
433
+ street_type: 'St',
434
+ street_type2: 'St',
435
+ street: 'Mission',
436
+ state: 'CA',
437
+ city: 'San Francisco',
438
+ street2: 'Valencia'
439
+ },
440
+ 'Mission & Valencia Sts. San Francisco CA' => {
441
+ street_type: 'St',
442
+ street_type2: 'St',
443
+ street: 'Mission',
444
+ state: 'CA',
445
+ city: 'San Francisco',
446
+ street2: 'Valencia'
447
+ },
448
+ 'Mission & Valencia Streets San Francisco CA' => {
449
+ street_type: 'St',
450
+ street_type2: 'St',
451
+ street: 'Mission',
452
+ state: 'CA',
453
+ city: 'San Francisco',
454
+ street2: 'Valencia'
455
+ },
456
+ 'Mission Avenue and Valencia Street San Francisco CA' => {
457
+ street_type: 'Ave',
458
+ street_type2: 'St',
459
+ street: 'Mission',
460
+ state: 'CA',
461
+ city: 'San Francisco',
462
+ street2: 'Valencia'
463
+ }
464
+ }.freeze
465
+
466
+ INFORMAL = {
467
+ '#42 233 S Wacker Dr 60606' => {
468
+ number: '233',
469
+ postal_code: '60606',
470
+ prefix: 'S',
471
+ state: nil,
472
+ street: 'Wacker',
473
+ street_type: 'Dr',
474
+ unit: '42',
475
+ unit_prefix: '#',
476
+ city: nil,
477
+ street2: nil,
478
+ suffix: nil
479
+ },
480
+ 'Apt. 42, 233 S Wacker Dr 60606' => {
481
+ number: '233',
482
+ postal_code: '60606',
483
+ prefix: 'S',
484
+ state: nil,
485
+ street: 'Wacker',
486
+ street_type: 'Dr',
487
+ unit: '42',
488
+ unit_prefix: 'Apt',
489
+ city: nil,
490
+ street2: nil,
491
+ suffix: nil
492
+ },
493
+ '2730 S Veitch St #207' => {
494
+ number: '2730',
495
+ street: 'Veitch',
496
+ street_type: 'St',
497
+ unit: '207',
498
+ unit_prefix: '#',
499
+ suffix: nil,
500
+ prefix: 'S',
501
+ city: nil,
502
+ state: nil,
503
+ postal_code: nil
504
+ },
505
+ '321 S. Washington' => { # RT#82146
506
+ street_type: nil,
507
+ prefix: 'S',
508
+ street: 'Washington',
509
+ number: '321'
510
+ },
511
+ '233 S Wacker Dr lobby 60606' => { # unnumbered secondary unit type
512
+ number: '233',
513
+ street: 'Wacker',
514
+ postal_code: '60606',
515
+ street_type: 'Dr',
516
+ prefix: 'S',
517
+ unit_prefix: 'Lbby'
518
+ },
519
+ '(233 S Wacker Dr lobby 60606)' => { # surrounding punctuation
520
+ number: '233',
521
+ street: 'Wacker',
522
+ postal_code: '60606',
523
+ street_type: 'Dr',
524
+ prefix: 'S',
525
+ unit_prefix: 'Lbby'
526
+ }
527
+ }.freeze
528
+
529
+ EXPECTED_FAILURES = [
530
+ '1005 N Gravenstein Hwy Sebastopol',
531
+ '1005 N Gravenstein Hwy Sebastopol CZ',
532
+ 'Gravenstein Hwy 95472',
533
+ 'E1005 Gravenstein Hwy 95472'
534
+ ].freeze
535
+
536
+ ACCESSIBLE_ATTRIBUTES = %w[number street street_type unit unit_prefix suffix
537
+ prefix city state postal_code postal_code_ext
538
+ street2 street_type2 suffix2 prefix2].freeze
539
+
540
+ ADDRESSES = NORMAL.merge(INFORMAL)
541
+ ALL_ADDRESSES = ADDRESSES.merge(PARSEABLE_INTERSECTIONS)
542
+
543
+ describe '#parse_address' do
544
+ EXPECTED_FAILURES.each do |address|
545
+ context address.to_s do
546
+ it 'returns nil' do
547
+ expect(StreetSweeper.parse_address(address)).to be_nil
548
+ end
549
+ end
550
+ end
551
+ end
552
+
553
+ describe '#parse' do
554
+ context 'with default settings' do
555
+ ALL_ADDRESSES.each_pair do |address, expected|
556
+ context address.to_s do
557
+ ACCESSIBLE_ATTRIBUTES.each do |attribute|
558
+ next if expected[attribute.to_sym].to_s == ''
559
+ it "#{attribute}: #{expected[attribute.to_sym]}" do
560
+ compare_expected_to_actual(expected, address, attribute)
561
+ end
562
+ end
563
+ end
564
+ end
565
+ end
566
+
567
+ context 'avoid_redundant_street_type: true' do
568
+ context '36401 County Road 43, Eaton, CO 80615' do
569
+ it 'returns street_type: nil' do
570
+ address = '36401 County Road 43, Eaton, CO 80615'
571
+ expected_results = {
572
+ number: '36401',
573
+ street: 'County Road 43',
574
+ city: 'Eaton',
575
+ state: 'CO',
576
+ postal_code: '80615',
577
+ street_type: nil
578
+ }
579
+ parsed_address = StreetSweeper.parse(address, avoid_redundant_street_type: true)
580
+ compare_expected_to_actual_hash(expected_results, parsed_address.to_h, address)
581
+ end
582
+ end
583
+ end
584
+
585
+ context 'informal: true' do
586
+ ALL_ADDRESSES.each_pair do |address, expected|
587
+ context address.to_s do
588
+ ACCESSIBLE_ATTRIBUTES.each do |attribute|
589
+ next if expected[attribute.to_sym].to_s == ''
590
+ it "#{attribute}: #{expected[attribute.to_sym]}" do
591
+ compare_expected_to_actual(expected, address, attribute, true)
592
+ end
593
+ end
594
+ end
595
+ end
596
+
597
+ it 'receiving a valid standard address, parses the input' do
598
+ a = StreetSweeper.parse('2730 S Veitch St, Arlington, VA 222064444', informal: true)
599
+ expect(a.number).to eq('2730')
600
+ expect(a.prefix).to eq('S')
601
+ expect(a.street).to eq('Veitch')
602
+ expect(a.street_type).to eq('St')
603
+ expect(a.city).to eq('Arlington')
604
+ expect(a.state).to eq('VA')
605
+ expect(a.postal_code).to eq('22206')
606
+ expect(a.postal_code_ext).to eq('4444')
607
+ end
608
+
609
+ it 'receiving a valid informal address, parses the input' do
610
+ a = StreetSweeper.parse('2730 S Veitch St', informal: true)
611
+ expect(a.number).to eq('2730')
612
+ expect(a.prefix).to eq('S')
613
+ expect(a.street).to eq('Veitch')
614
+ expect(a.street_type).to eq('St')
615
+ end
616
+
617
+ it 'receiving a valid informal address with trailing words, parses the input' do
618
+ a = StreetSweeper.parse('2730 S Veitch St in the south of arlington', informal: true)
619
+ expect(a.number).to eq('2730')
620
+ expect(a.prefix).to eq('S')
621
+ expect(a.street).to eq('Veitch')
622
+ expect(a.street_type).to eq('St')
623
+ end
624
+ end
625
+ end
626
+
627
+ def compare_expected_to_actual_hash(expected, actual, _address)
628
+ expected.each_pair do |expected_key, expected_value|
629
+ expect(actual[expected_key]).to eq(expected_value)
630
+ end
631
+ end
632
+
633
+ def compare_expected_to_actual(expected, address, attribute, informal = false, redundant = false)
634
+ addr = StreetSweeper.parse(address, informal: informal, avoid_redundant_street_type: redundant)
635
+ expect(addr.send(attribute)).to eq(expected[attribute.to_sym])
636
+ end
637
+ end