street_sweeper 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,4 @@
1
+ require 'bundler/setup'
2
+ Bundler.setup
3
+
4
+ require 'street_sweeper' # and any other gems you need
@@ -0,0 +1,637 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'spec_helper'
4
+
5
+ RSpec.describe StreetSweeper do
6
+ NORMAL = {
7
+ '1005 Gravenstein Hwy 95472' => {
8
+ number: '1005',
9
+ street: 'Gravenstein',
10
+ postal_code: '95472',
11
+ street_type: 'Hwy'
12
+ },
13
+ '1005 Gravenstein Hwy, 95472' => {
14
+ number: '1005',
15
+ street: 'Gravenstein',
16
+ postal_code: '95472',
17
+ street_type: 'Hwy'
18
+ },
19
+ '1005 Gravenstein Hwy N, 95472' => {
20
+ number: '1005',
21
+ street: 'Gravenstein',
22
+ postal_code: '95472',
23
+ suffix: 'N',
24
+ street_type: 'Hwy'
25
+ },
26
+ '1005 Gravenstein Highway North, 95472' => {
27
+ number: '1005',
28
+ street: 'Gravenstein',
29
+ postal_code: '95472',
30
+ suffix: 'N',
31
+ street_type: 'Hwy'
32
+ },
33
+ '1005 N Gravenstein Highway, Sebastopol, CA' => {
34
+ number: '1005',
35
+ street: 'Gravenstein',
36
+ state: 'CA',
37
+ city: 'Sebastopol',
38
+ street_type: 'Hwy',
39
+ prefix: 'N'
40
+ },
41
+ '1005 N Gravenstein Highway, Suite 500, Sebastopol, CA' => {
42
+ number: '1005',
43
+ street: 'Gravenstein',
44
+ state: 'CA',
45
+ city: 'Sebastopol',
46
+ street_type: 'Hwy',
47
+ prefix: 'N',
48
+ unit_prefix: 'Ste',
49
+ unit: '500'
50
+ },
51
+ '1005 N Gravenstein Hwy Suite 500 Sebastopol, CA' => {
52
+ number: '1005',
53
+ street: 'Gravenstein',
54
+ state: 'CA',
55
+ city: 'Sebastopol',
56
+ street_type: 'Hwy',
57
+ prefix: 'N',
58
+ unit_prefix: 'Ste',
59
+ unit: '500'
60
+ },
61
+ '1005 N Gravenstein Highway, Sebastopol, CA, 95472' => {
62
+ number: '1005',
63
+ street: 'Gravenstein',
64
+ state: 'CA',
65
+ city: 'Sebastopol',
66
+ postal_code: '95472',
67
+ street_type: 'Hwy',
68
+ prefix: 'N'
69
+ },
70
+ '1005 N Gravenstein Highway Sebastopol CA 95472' => {
71
+ number: '1005',
72
+ street: 'Gravenstein',
73
+ state: 'CA',
74
+ city: 'Sebastopol',
75
+ postal_code: '95472',
76
+ street_type: 'Hwy',
77
+ prefix: 'N'
78
+ },
79
+ '1005 Gravenstein Hwy N Sebastopol CA' => {
80
+ number: '1005',
81
+ street: 'Gravenstein',
82
+ state: 'CA',
83
+ city: 'Sebastopol',
84
+ suffix: 'N',
85
+ street_type: 'Hwy'
86
+ },
87
+ '1005 Gravenstein Hwy N, Sebastopol CA' => {
88
+ number: '1005',
89
+ street: 'Gravenstein',
90
+ state: 'CA',
91
+ city: 'Sebastopol',
92
+ suffix: 'N',
93
+ street_type: 'Hwy'
94
+ },
95
+ '1005 Gravenstein Hwy, N Sebastopol CA' => {
96
+ number: '1005',
97
+ street: 'Gravenstein',
98
+ state: 'CA',
99
+ city: 'North Sebastopol',
100
+ street_type: 'Hwy'
101
+ },
102
+ '1005 Gravenstein Hwy, North Sebastopol CA' => {
103
+ number: '1005',
104
+ street: 'Gravenstein',
105
+ state: 'CA',
106
+ city: 'North Sebastopol',
107
+ street_type: 'Hwy'
108
+ },
109
+ '1005 Gravenstein Hwy Sebastopol CA' => {
110
+ number: '1005',
111
+ street: 'Gravenstein',
112
+ state: 'CA',
113
+ city: 'Sebastopol',
114
+ street_type: 'Hwy'
115
+ },
116
+ '115 Broadway San Francisco CA' => {
117
+ street_type: nil,
118
+ number: '115',
119
+ street: 'Broadway',
120
+ state: 'CA',
121
+ city: 'San Francisco'
122
+ },
123
+ '7800 Mill Station Rd, Sebastopol, CA 95472' => {
124
+ number: '7800',
125
+ street: 'Mill Station',
126
+ state: 'CA',
127
+ city: 'Sebastopol',
128
+ postal_code: '95472',
129
+ street_type: 'Rd'
130
+ },
131
+ '7800 Mill Station Rd Sebastopol CA 95472' => {
132
+ number: '7800',
133
+ street: 'Mill Station',
134
+ state: 'CA',
135
+ city: 'Sebastopol',
136
+ postal_code: '95472',
137
+ street_type: 'Rd'
138
+ },
139
+ '1005 State Highway 116 Sebastopol CA 95472' => {
140
+ number: '1005',
141
+ street: 'State Highway 116',
142
+ state: 'CA',
143
+ city: 'Sebastopol',
144
+ postal_code: '95472',
145
+ street_type: 'Hwy'
146
+ },
147
+ '1600 Pennsylvania Ave. Washington DC' => {
148
+ number: '1600',
149
+ street: 'Pennsylvania',
150
+ state: 'DC',
151
+ city: 'Washington',
152
+ street_type: 'Ave'
153
+ },
154
+ '1600 Pennsylvania Avenue Washington DC' => {
155
+ number: '1600',
156
+ street: 'Pennsylvania',
157
+ state: 'DC',
158
+ city: 'Washington',
159
+ street_type: 'Ave'
160
+ },
161
+ '48S 400E, Salt Lake City UT' => {
162
+ street_type: nil,
163
+ number: '48',
164
+ street: '400',
165
+ state: 'UT',
166
+ city: 'Salt Lake City',
167
+ suffix: 'E',
168
+ prefix: 'S'
169
+ },
170
+ '550 S 400 E #3206, Salt Lake City UT 84111' => {
171
+ number: '550',
172
+ street: '400',
173
+ state: 'UT',
174
+ unit: '3206',
175
+ postal_code: '84111',
176
+ city: 'Salt Lake City',
177
+ suffix: 'E',
178
+ street_type: nil,
179
+ unit_prefix: '#',
180
+ prefix: 'S'
181
+ },
182
+ '6641 N 2200 W Apt D304 Park City, UT 84098' => {
183
+ number: '6641',
184
+ street: '2200',
185
+ state: 'UT',
186
+ unit: 'D304',
187
+ postal_code: '84098',
188
+ city: 'Park City',
189
+ suffix: 'W',
190
+ street_type: nil,
191
+ unit_prefix: 'Apt',
192
+ prefix: 'N'
193
+ },
194
+ '100 South St, Philadelphia, PA' => {
195
+ number: '100',
196
+ street: 'South',
197
+ state: 'PA',
198
+ city: 'Philadelphia',
199
+ street_type: 'St'
200
+ },
201
+ '100 S.E. Washington Ave, Minneapolis, MN' => {
202
+ number: '100',
203
+ street: 'Washington',
204
+ state: 'MN',
205
+ city: 'Minneapolis',
206
+ street_type: 'Ave',
207
+ prefix: 'SE'
208
+ },
209
+ '3813 1/2 Some Road, Los Angeles, CA' => {
210
+ number: '3813',
211
+ street: 'Some',
212
+ state: 'CA',
213
+ city: 'Los Angeles',
214
+ street_type: 'Rd'
215
+ },
216
+ '1 First St, e San Jose CA' => { # lower case city direction
217
+ number: '1',
218
+ street: '1st',
219
+ state: 'CA',
220
+ city: 'East San Jose',
221
+ street_type: 'St'
222
+ },
223
+ '123 Maple Rochester, New York' => { # space in state name
224
+ street_type: nil,
225
+ number: '123',
226
+ street: 'Maple',
227
+ state: 'NY',
228
+ city: 'Rochester'
229
+ },
230
+ '233 S Wacker Dr 60606-6306' => { # zip+4 with hyphen
231
+ number: '233',
232
+ street: 'Wacker',
233
+ postal_code: '60606',
234
+ postal_code_ext: '6306',
235
+ street_type: 'Dr',
236
+ prefix: 'S'
237
+ },
238
+ '233 S Wacker Dr 606066306' => { # zip+4 without hyphen
239
+ number: '233',
240
+ street: 'Wacker',
241
+ postal_code: '60606',
242
+ postal_code_ext: '6306',
243
+ street_type: 'Dr',
244
+ prefix: 'S'
245
+ },
246
+ 'lt42 99 Some Road, Some City LA' => { # no space before sec_unit_num
247
+ unit: '42',
248
+ city: 'Some City',
249
+ number: '99',
250
+ street: 'Some',
251
+ unit_prefix: 'Lot',
252
+ street_type: 'Rd',
253
+ state: 'LA'
254
+ },
255
+ '36401 County Road 43, Eaton, CO 80615' => { # numbered County Road
256
+ city: 'Eaton',
257
+ postal_code: '80615',
258
+ number: '36401',
259
+ street: 'County Road 43',
260
+ street_type: 'Rd',
261
+ state: 'CO'
262
+ },
263
+ "14168 W RIVER RD \nCOLUMBIA STATION, OH 44028-9430" => { # overlapping street type and road name
264
+ city: 'Columbia Station',
265
+ postal_code: '44028',
266
+ postal_code_ext: '9430',
267
+ number: '14168',
268
+ street: 'River',
269
+ street_type: 'Rd',
270
+ state: 'OH',
271
+ prefix: 'W'
272
+ },
273
+ "555 E LAKE AVE \nBELLEFONTAINE, OH 43311-2509" => {
274
+ city: 'Bellefontaine',
275
+ postal_code: '43311',
276
+ postal_code_ext: '2509',
277
+ number: '555',
278
+ street: 'Lake',
279
+ street_type: 'Ave',
280
+ state: 'OH',
281
+ prefix: 'E'
282
+ },
283
+ "19600 N PARK BLVD \nSHAKER HEIGHTS, OH 44122-1825" => {
284
+ city: 'Shaker Heights',
285
+ postal_code: '44122',
286
+ postal_code_ext: '1825',
287
+ number: '19600',
288
+ street: 'Park',
289
+ street_type: 'Blvd',
290
+ state: 'OH',
291
+ prefix: 'N'
292
+ },
293
+ '1234 COUNTY HWY 60E, Town, CO 12345' => {
294
+ city: 'Town',
295
+ postal_code: '12345',
296
+ number: '1234',
297
+ street: 'County Hwy 60',
298
+ suffix: 'E',
299
+ street_type: 'Hwy',
300
+ state: 'CO'
301
+ },
302
+ "'45 Quaker Ave, Ste 105'" => { # RT#73397
303
+ number: '45',
304
+ street: 'Quaker',
305
+ street_type: 'Ave',
306
+ unit: '105',
307
+ unit_prefix: 'Ste'
308
+ },
309
+ ##### pre-existing tests from ruby library
310
+ '2730 S Veitch St Apt 207, Arlington, VA 22206' => {
311
+ number: '2730',
312
+ postal_code: '22206',
313
+ prefix: 'S',
314
+ state: 'VA',
315
+ street: 'Veitch',
316
+ street_type: 'St',
317
+ unit: '207',
318
+ unit_prefix: 'Apt',
319
+ city: 'Arlington',
320
+ prefix2: nil,
321
+ postal_code_ext: nil
322
+ },
323
+ '44 Canal Center Plaza Suite 500, Alexandria, VA 22314' => {
324
+ number: '44',
325
+ postal_code: '22314',
326
+ prefix: nil,
327
+ state: 'VA',
328
+ street: 'Canal Center',
329
+ street_type: 'Plz',
330
+ unit: '500',
331
+ unit_prefix: 'Ste',
332
+ city: 'Alexandria',
333
+ street2: nil
334
+ },
335
+ '1600 Pennsylvania Ave NW Washington DC' => {
336
+ number: '1600',
337
+ postal_code: nil,
338
+ prefix: nil,
339
+ state: 'DC',
340
+ street: 'Pennsylvania',
341
+ street_type: 'Ave',
342
+ unit: nil,
343
+ unit_prefix: nil,
344
+ city: 'Washington',
345
+ street2: nil,
346
+ suffix: 'NW'
347
+ },
348
+ '1005 Gravenstein Hwy N, Sebastopol CA 95472' => {
349
+ number: '1005',
350
+ postal_code: '95472',
351
+ prefix: nil,
352
+ state: 'CA',
353
+ street: 'Gravenstein',
354
+ street_type: 'Hwy',
355
+ unit: nil,
356
+ unit_prefix: nil,
357
+ city: 'Sebastopol',
358
+ street2: nil,
359
+ suffix: 'N'
360
+ },
361
+ '2730 S Veitch St #207, Arlington, VA 22206' => {
362
+ number: '2730',
363
+ street: 'Veitch',
364
+ street_type: 'St',
365
+ unit: '207',
366
+ unit_prefix: '#',
367
+ suffix: nil,
368
+ prefix: 'S',
369
+ city: 'Arlington',
370
+ state: 'VA',
371
+ postal_code: '22206',
372
+ postal_code_ext: nil
373
+ },
374
+
375
+ 'P.O. BOX 293930, ARLINGTON, VA 22206' => {
376
+ number: nil,
377
+ street: 'PO Box 293930',
378
+ street_type: nil,
379
+ unit: nil,
380
+ unit_prefix: nil,
381
+ suffix: nil,
382
+ prefix: nil,
383
+ city: 'Arlington',
384
+ state: 'VA',
385
+ postal_code: '22206',
386
+ postal_code_ext: nil
387
+ }
388
+ }.freeze
389
+
390
+ PARSEABLE_INTERSECTIONS = {
391
+ 'Mission & Valencia San Francisco CA' => {
392
+ street_type: nil,
393
+ street_type2: nil,
394
+ street: 'Mission',
395
+ state: 'CA',
396
+ city: 'San Francisco',
397
+ street2: 'Valencia'
398
+ },
399
+
400
+ 'Mission & Valencia, San Francisco CA' => {
401
+ street_type: nil,
402
+ street_type2: nil,
403
+ street: 'Mission',
404
+ state: 'CA',
405
+ city: 'San Francisco',
406
+ street2: 'Valencia'
407
+ },
408
+ 'Mission St and Valencia St San Francisco CA' => {
409
+ street_type: 'St',
410
+ street_type2: 'St',
411
+ street: 'Mission',
412
+ state: 'CA',
413
+ city: 'San Francisco',
414
+ street2: 'Valencia'
415
+ },
416
+ 'Hollywood Blvd and Vine St Los Angeles, CA' => {
417
+ street_type: 'Blvd',
418
+ street_type2: 'St',
419
+ street: 'Hollywood',
420
+ state: 'CA',
421
+ city: 'Los Angeles',
422
+ street2: 'Vine'
423
+ },
424
+ 'Mission St & Valencia St San Francisco CA' => {
425
+ street_type: 'St',
426
+ street_type2: 'St',
427
+ street: 'Mission',
428
+ state: 'CA',
429
+ city: 'San Francisco',
430
+ street2: 'Valencia'
431
+ },
432
+ 'Mission and Valencia Sts San Francisco CA' => {
433
+ street_type: 'St',
434
+ street_type2: 'St',
435
+ street: 'Mission',
436
+ state: 'CA',
437
+ city: 'San Francisco',
438
+ street2: 'Valencia'
439
+ },
440
+ 'Mission & Valencia Sts. San Francisco CA' => {
441
+ street_type: 'St',
442
+ street_type2: 'St',
443
+ street: 'Mission',
444
+ state: 'CA',
445
+ city: 'San Francisco',
446
+ street2: 'Valencia'
447
+ },
448
+ 'Mission & Valencia Streets San Francisco CA' => {
449
+ street_type: 'St',
450
+ street_type2: 'St',
451
+ street: 'Mission',
452
+ state: 'CA',
453
+ city: 'San Francisco',
454
+ street2: 'Valencia'
455
+ },
456
+ 'Mission Avenue and Valencia Street San Francisco CA' => {
457
+ street_type: 'Ave',
458
+ street_type2: 'St',
459
+ street: 'Mission',
460
+ state: 'CA',
461
+ city: 'San Francisco',
462
+ street2: 'Valencia'
463
+ }
464
+ }.freeze
465
+
466
+ INFORMAL = {
467
+ '#42 233 S Wacker Dr 60606' => {
468
+ number: '233',
469
+ postal_code: '60606',
470
+ prefix: 'S',
471
+ state: nil,
472
+ street: 'Wacker',
473
+ street_type: 'Dr',
474
+ unit: '42',
475
+ unit_prefix: '#',
476
+ city: nil,
477
+ street2: nil,
478
+ suffix: nil
479
+ },
480
+ 'Apt. 42, 233 S Wacker Dr 60606' => {
481
+ number: '233',
482
+ postal_code: '60606',
483
+ prefix: 'S',
484
+ state: nil,
485
+ street: 'Wacker',
486
+ street_type: 'Dr',
487
+ unit: '42',
488
+ unit_prefix: 'Apt',
489
+ city: nil,
490
+ street2: nil,
491
+ suffix: nil
492
+ },
493
+ '2730 S Veitch St #207' => {
494
+ number: '2730',
495
+ street: 'Veitch',
496
+ street_type: 'St',
497
+ unit: '207',
498
+ unit_prefix: '#',
499
+ suffix: nil,
500
+ prefix: 'S',
501
+ city: nil,
502
+ state: nil,
503
+ postal_code: nil
504
+ },
505
+ '321 S. Washington' => { # RT#82146
506
+ street_type: nil,
507
+ prefix: 'S',
508
+ street: 'Washington',
509
+ number: '321'
510
+ },
511
+ '233 S Wacker Dr lobby 60606' => { # unnumbered secondary unit type
512
+ number: '233',
513
+ street: 'Wacker',
514
+ postal_code: '60606',
515
+ street_type: 'Dr',
516
+ prefix: 'S',
517
+ unit_prefix: 'Lbby'
518
+ },
519
+ '(233 S Wacker Dr lobby 60606)' => { # surrounding punctuation
520
+ number: '233',
521
+ street: 'Wacker',
522
+ postal_code: '60606',
523
+ street_type: 'Dr',
524
+ prefix: 'S',
525
+ unit_prefix: 'Lbby'
526
+ }
527
+ }.freeze
528
+
529
+ EXPECTED_FAILURES = [
530
+ '1005 N Gravenstein Hwy Sebastopol',
531
+ '1005 N Gravenstein Hwy Sebastopol CZ',
532
+ 'Gravenstein Hwy 95472',
533
+ 'E1005 Gravenstein Hwy 95472'
534
+ ].freeze
535
+
536
+ ACCESSIBLE_ATTRIBUTES = %w[number street street_type unit unit_prefix suffix
537
+ prefix city state postal_code postal_code_ext
538
+ street2 street_type2 suffix2 prefix2].freeze
539
+
540
+ ADDRESSES = NORMAL.merge(INFORMAL)
541
+ ALL_ADDRESSES = ADDRESSES.merge(PARSEABLE_INTERSECTIONS)
542
+
543
+ describe '#parse_address' do
544
+ EXPECTED_FAILURES.each do |address|
545
+ context address.to_s do
546
+ it 'returns nil' do
547
+ expect(StreetSweeper.parse_address(address)).to be_nil
548
+ end
549
+ end
550
+ end
551
+ end
552
+
553
+ describe '#parse' do
554
+ context 'with default settings' do
555
+ ALL_ADDRESSES.each_pair do |address, expected|
556
+ context address.to_s do
557
+ ACCESSIBLE_ATTRIBUTES.each do |attribute|
558
+ next if expected[attribute.to_sym].to_s == ''
559
+ it "#{attribute}: #{expected[attribute.to_sym]}" do
560
+ compare_expected_to_actual(expected, address, attribute)
561
+ end
562
+ end
563
+ end
564
+ end
565
+ end
566
+
567
+ context 'avoid_redundant_street_type: true' do
568
+ context '36401 County Road 43, Eaton, CO 80615' do
569
+ it 'returns street_type: nil' do
570
+ address = '36401 County Road 43, Eaton, CO 80615'
571
+ expected_results = {
572
+ number: '36401',
573
+ street: 'County Road 43',
574
+ city: 'Eaton',
575
+ state: 'CO',
576
+ postal_code: '80615',
577
+ street_type: nil
578
+ }
579
+ parsed_address = StreetSweeper.parse(address, avoid_redundant_street_type: true)
580
+ compare_expected_to_actual_hash(expected_results, parsed_address.to_h, address)
581
+ end
582
+ end
583
+ end
584
+
585
+ context 'informal: true' do
586
+ ALL_ADDRESSES.each_pair do |address, expected|
587
+ context address.to_s do
588
+ ACCESSIBLE_ATTRIBUTES.each do |attribute|
589
+ next if expected[attribute.to_sym].to_s == ''
590
+ it "#{attribute}: #{expected[attribute.to_sym]}" do
591
+ compare_expected_to_actual(expected, address, attribute, true)
592
+ end
593
+ end
594
+ end
595
+ end
596
+
597
+ it 'receiving a valid standard address, parses the input' do
598
+ a = StreetSweeper.parse('2730 S Veitch St, Arlington, VA 222064444', informal: true)
599
+ expect(a.number).to eq('2730')
600
+ expect(a.prefix).to eq('S')
601
+ expect(a.street).to eq('Veitch')
602
+ expect(a.street_type).to eq('St')
603
+ expect(a.city).to eq('Arlington')
604
+ expect(a.state).to eq('VA')
605
+ expect(a.postal_code).to eq('22206')
606
+ expect(a.postal_code_ext).to eq('4444')
607
+ end
608
+
609
+ it 'receiving a valid informal address, parses the input' do
610
+ a = StreetSweeper.parse('2730 S Veitch St', informal: true)
611
+ expect(a.number).to eq('2730')
612
+ expect(a.prefix).to eq('S')
613
+ expect(a.street).to eq('Veitch')
614
+ expect(a.street_type).to eq('St')
615
+ end
616
+
617
+ it 'receiving a valid informal address with trailing words, parses the input' do
618
+ a = StreetSweeper.parse('2730 S Veitch St in the south of arlington', informal: true)
619
+ expect(a.number).to eq('2730')
620
+ expect(a.prefix).to eq('S')
621
+ expect(a.street).to eq('Veitch')
622
+ expect(a.street_type).to eq('St')
623
+ end
624
+ end
625
+ end
626
+
627
+ def compare_expected_to_actual_hash(expected, actual, _address)
628
+ expected.each_pair do |expected_key, expected_value|
629
+ expect(actual[expected_key]).to eq(expected_value)
630
+ end
631
+ end
632
+
633
+ def compare_expected_to_actual(expected, address, attribute, informal = false, redundant = false)
634
+ addr = StreetSweeper.parse(address, informal: informal, avoid_redundant_street_type: redundant)
635
+ expect(addr.send(attribute)).to eq(expected[attribute.to_sym])
636
+ end
637
+ end