addrforge 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- addrforge/__init__.py +22 -0
- addrforge/cli.py +71 -0
- addrforge/data.py +626 -0
- addrforge/errors.py +9 -0
- addrforge/format.py +71 -0
- addrforge/lines.py +26 -0
- addrforge/models.py +96 -0
- addrforge/normalize.py +77 -0
- addrforge/parser.py +371 -0
- addrforge/patterns.py +138 -0
- addrforge/validation.py +255 -0
- addrforge-0.1.0.dist-info/METADATA +141 -0
- addrforge-0.1.0.dist-info/RECORD +17 -0
- addrforge-0.1.0.dist-info/WHEEL +5 -0
- addrforge-0.1.0.dist-info/entry_points.txt +2 -0
- addrforge-0.1.0.dist-info/licenses/LICENSE +21 -0
- addrforge-0.1.0.dist-info/top_level.txt +1 -0
addrforge/data.py
ADDED
|
@@ -0,0 +1,626 @@
|
|
|
1
|
+
"""Normalization data for US address parsing."""
|
|
2
|
+
|
|
3
|
+
from typing import Dict, FrozenSet
|
|
4
|
+
|
|
5
|
+
ADDRESS_KINDS: FrozenSet[str] = frozenset({"street", "route", "po_box", "unknown"})
|
|
6
|
+
|
|
7
|
+
STATE_ABBREVIATIONS: FrozenSet[str] = frozenset(
|
|
8
|
+
{
|
|
9
|
+
"AL",
|
|
10
|
+
"AK",
|
|
11
|
+
"AZ",
|
|
12
|
+
"AR",
|
|
13
|
+
"CA",
|
|
14
|
+
"CO",
|
|
15
|
+
"CT",
|
|
16
|
+
"DE",
|
|
17
|
+
"FL",
|
|
18
|
+
"GA",
|
|
19
|
+
"HI",
|
|
20
|
+
"ID",
|
|
21
|
+
"IL",
|
|
22
|
+
"IN",
|
|
23
|
+
"IA",
|
|
24
|
+
"KS",
|
|
25
|
+
"KY",
|
|
26
|
+
"LA",
|
|
27
|
+
"ME",
|
|
28
|
+
"MD",
|
|
29
|
+
"MA",
|
|
30
|
+
"MI",
|
|
31
|
+
"MN",
|
|
32
|
+
"MS",
|
|
33
|
+
"MO",
|
|
34
|
+
"MT",
|
|
35
|
+
"NE",
|
|
36
|
+
"NV",
|
|
37
|
+
"NH",
|
|
38
|
+
"NJ",
|
|
39
|
+
"NM",
|
|
40
|
+
"NY",
|
|
41
|
+
"NC",
|
|
42
|
+
"ND",
|
|
43
|
+
"OH",
|
|
44
|
+
"OK",
|
|
45
|
+
"OR",
|
|
46
|
+
"PA",
|
|
47
|
+
"RI",
|
|
48
|
+
"SC",
|
|
49
|
+
"SD",
|
|
50
|
+
"TN",
|
|
51
|
+
"TX",
|
|
52
|
+
"UT",
|
|
53
|
+
"VT",
|
|
54
|
+
"VA",
|
|
55
|
+
"WA",
|
|
56
|
+
"WV",
|
|
57
|
+
"WI",
|
|
58
|
+
"WY",
|
|
59
|
+
"DC",
|
|
60
|
+
"AA",
|
|
61
|
+
"AE",
|
|
62
|
+
"AP",
|
|
63
|
+
}
|
|
64
|
+
)
|
|
65
|
+
|
|
66
|
+
NON_US_COUNTRY_TERMS: FrozenSet[str] = frozenset(
|
|
67
|
+
{
|
|
68
|
+
"AUSTRALIA",
|
|
69
|
+
"BRASIL",
|
|
70
|
+
"BRAZIL",
|
|
71
|
+
"CANADA",
|
|
72
|
+
"DEUTSCHLAND",
|
|
73
|
+
"FRANCE",
|
|
74
|
+
"GERMANY",
|
|
75
|
+
"INDIA",
|
|
76
|
+
"IRELAND",
|
|
77
|
+
"ITALY",
|
|
78
|
+
"JAPAN",
|
|
79
|
+
"MEXICO",
|
|
80
|
+
"NETHERLANDS",
|
|
81
|
+
"NEW ZEALAND",
|
|
82
|
+
"SPAIN",
|
|
83
|
+
"UNITED KINGDOM",
|
|
84
|
+
"UK",
|
|
85
|
+
}
|
|
86
|
+
)
|
|
87
|
+
|
|
88
|
+
CANADIAN_PROVINCES: FrozenSet[str] = frozenset(
|
|
89
|
+
{
|
|
90
|
+
"AB",
|
|
91
|
+
"BC",
|
|
92
|
+
"MB",
|
|
93
|
+
"NB",
|
|
94
|
+
"NL",
|
|
95
|
+
"NS",
|
|
96
|
+
"NT",
|
|
97
|
+
"NU",
|
|
98
|
+
"ON",
|
|
99
|
+
"PE",
|
|
100
|
+
"QC",
|
|
101
|
+
"SK",
|
|
102
|
+
"YT",
|
|
103
|
+
}
|
|
104
|
+
)
|
|
105
|
+
|
|
106
|
+
DIRECTIONALS: Dict[str, str] = {
|
|
107
|
+
"N": "N",
|
|
108
|
+
"NORTH": "N",
|
|
109
|
+
"S": "S",
|
|
110
|
+
"SOUTH": "S",
|
|
111
|
+
"E": "E",
|
|
112
|
+
"EAST": "E",
|
|
113
|
+
"W": "W",
|
|
114
|
+
"WEST": "W",
|
|
115
|
+
"NE": "NE",
|
|
116
|
+
"NORTHEAST": "NE",
|
|
117
|
+
"NW": "NW",
|
|
118
|
+
"NORTHWEST": "NW",
|
|
119
|
+
"SE": "SE",
|
|
120
|
+
"SOUTHEAST": "SE",
|
|
121
|
+
"SW": "SW",
|
|
122
|
+
"SOUTHWEST": "SW",
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
UNIT_TYPES: Dict[str, str] = {
|
|
126
|
+
"APT": "APT",
|
|
127
|
+
"APARTMENT": "APT",
|
|
128
|
+
"BSMT": "BSMT",
|
|
129
|
+
"BASEMENT": "BSMT",
|
|
130
|
+
"BLDG": "BLDG",
|
|
131
|
+
"BUILDING": "BLDG",
|
|
132
|
+
"DEPT": "DEPT",
|
|
133
|
+
"DEPARTMENT": "DEPT",
|
|
134
|
+
"FL": "FL",
|
|
135
|
+
"FLOOR": "FL",
|
|
136
|
+
"FRNT": "FRNT",
|
|
137
|
+
"FRONT": "FRNT",
|
|
138
|
+
"HNGR": "HNGR",
|
|
139
|
+
"HANGAR": "HNGR",
|
|
140
|
+
"KEY": "KEY",
|
|
141
|
+
"LBBY": "LBBY",
|
|
142
|
+
"LOBBY": "LBBY",
|
|
143
|
+
"LOT": "LOT",
|
|
144
|
+
"LOWR": "LOWR",
|
|
145
|
+
"LOWER": "LOWR",
|
|
146
|
+
"NO": "UNIT",
|
|
147
|
+
"NUMBER": "UNIT",
|
|
148
|
+
"OFC": "OFC",
|
|
149
|
+
"OFFICE": "OFC",
|
|
150
|
+
"PH": "PH",
|
|
151
|
+
"PENTHOUSE": "PH",
|
|
152
|
+
"PIER": "PIER",
|
|
153
|
+
"REAR": "REAR",
|
|
154
|
+
"RM": "RM",
|
|
155
|
+
"ROOM": "RM",
|
|
156
|
+
"SIDE": "SIDE",
|
|
157
|
+
"SLIP": "SLIP",
|
|
158
|
+
"SPC": "SPC",
|
|
159
|
+
"SPACE": "SPC",
|
|
160
|
+
"STOP": "STOP",
|
|
161
|
+
"STE": "STE",
|
|
162
|
+
"SUITE": "STE",
|
|
163
|
+
"TRLR": "TRLR",
|
|
164
|
+
"TRAILER": "TRLR",
|
|
165
|
+
"UNIT": "UNIT",
|
|
166
|
+
"UPPR": "UPPR",
|
|
167
|
+
"UPPER": "UPPR",
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
STREET_SUFFIXES: Dict[str, str] = {
|
|
171
|
+
"ALLEE": "ALY",
|
|
172
|
+
"ALLEY": "ALY",
|
|
173
|
+
"ALY": "ALY",
|
|
174
|
+
"AV": "AVE",
|
|
175
|
+
"AVE": "AVE",
|
|
176
|
+
"AVEN": "AVE",
|
|
177
|
+
"AVENU": "AVE",
|
|
178
|
+
"AVENUE": "AVE",
|
|
179
|
+
"BAYOO": "BYU",
|
|
180
|
+
"BAYOU": "BYU",
|
|
181
|
+
"BCH": "BCH",
|
|
182
|
+
"BEACH": "BCH",
|
|
183
|
+
"BEND": "BND",
|
|
184
|
+
"BND": "BND",
|
|
185
|
+
"BLVD": "BLVD",
|
|
186
|
+
"BOULEVARD": "BLVD",
|
|
187
|
+
"BOT": "BTM",
|
|
188
|
+
"BOTTM": "BTM",
|
|
189
|
+
"BOTTOM": "BTM",
|
|
190
|
+
"BR": "BR",
|
|
191
|
+
"BRANCH": "BR",
|
|
192
|
+
"BRG": "BRG",
|
|
193
|
+
"BRIDGE": "BRG",
|
|
194
|
+
"BROOK": "BRK",
|
|
195
|
+
"BROOKS": "BRKS",
|
|
196
|
+
"BRK": "BRK",
|
|
197
|
+
"BRKS": "BRKS",
|
|
198
|
+
"BURG": "BG",
|
|
199
|
+
"BURGS": "BGS",
|
|
200
|
+
"BYPA": "BYP",
|
|
201
|
+
"BYPAS": "BYP",
|
|
202
|
+
"BYP": "BYP",
|
|
203
|
+
"BYPASS": "BYP",
|
|
204
|
+
"BYU": "BYU",
|
|
205
|
+
"CAMP": "CP",
|
|
206
|
+
"CANYN": "CYN",
|
|
207
|
+
"CANYON": "CYN",
|
|
208
|
+
"CAPE": "CPE",
|
|
209
|
+
"CAUSEWAY": "CSWY",
|
|
210
|
+
"CAUSWAY": "CSWY",
|
|
211
|
+
"CEN": "CTR",
|
|
212
|
+
"CENT": "CTR",
|
|
213
|
+
"CENTER": "CTR",
|
|
214
|
+
"CENTERS": "CTRS",
|
|
215
|
+
"CENTR": "CTR",
|
|
216
|
+
"CENTRE": "CTR",
|
|
217
|
+
"CIRC": "CIR",
|
|
218
|
+
"CIRCL": "CIR",
|
|
219
|
+
"CIR": "CIR",
|
|
220
|
+
"CIRCLE": "CIR",
|
|
221
|
+
"CIRCLES": "CIRS",
|
|
222
|
+
"CIRS": "CIRS",
|
|
223
|
+
"CLIFF": "CLF",
|
|
224
|
+
"CLIFFS": "CLFS",
|
|
225
|
+
"CLF": "CLF",
|
|
226
|
+
"CLFS": "CLFS",
|
|
227
|
+
"CLB": "CLB",
|
|
228
|
+
"CLUB": "CLB",
|
|
229
|
+
"CMP": "CP",
|
|
230
|
+
"COMMON": "CMN",
|
|
231
|
+
"COMMONS": "CMNS",
|
|
232
|
+
"CORNER": "COR",
|
|
233
|
+
"CORNERS": "CORS",
|
|
234
|
+
"COR": "COR",
|
|
235
|
+
"CORS": "CORS",
|
|
236
|
+
"COURSE": "CRSE",
|
|
237
|
+
"CRSE": "CRSE",
|
|
238
|
+
"CP": "CP",
|
|
239
|
+
"CPE": "CPE",
|
|
240
|
+
"CRES": "CRES",
|
|
241
|
+
"CRESCENT": "CRES",
|
|
242
|
+
"CREST": "CRST",
|
|
243
|
+
"CROSSING": "XING",
|
|
244
|
+
"CROSSROAD": "XRD",
|
|
245
|
+
"CROSSROADS": "XRDS",
|
|
246
|
+
"CRST": "CRST",
|
|
247
|
+
"CSWY": "CSWY",
|
|
248
|
+
"CTR": "CTR",
|
|
249
|
+
"CTRS": "CTRS",
|
|
250
|
+
"CT": "CT",
|
|
251
|
+
"COURT": "CT",
|
|
252
|
+
"COURTS": "CTS",
|
|
253
|
+
"CTS": "CTS",
|
|
254
|
+
"CURVE": "CURV",
|
|
255
|
+
"CURV": "CURV",
|
|
256
|
+
"CV": "CV",
|
|
257
|
+
"COVE": "CV",
|
|
258
|
+
"COVES": "CVS",
|
|
259
|
+
"CVS": "CVS",
|
|
260
|
+
"CYN": "CYN",
|
|
261
|
+
"DALE": "DL",
|
|
262
|
+
"DAM": "DM",
|
|
263
|
+
"DIV": "DV",
|
|
264
|
+
"DIVIDE": "DV",
|
|
265
|
+
"DL": "DL",
|
|
266
|
+
"DM": "DM",
|
|
267
|
+
"DR": "DR",
|
|
268
|
+
"DRIV": "DR",
|
|
269
|
+
"DRIVE": "DR",
|
|
270
|
+
"DRIVES": "DRS",
|
|
271
|
+
"DRS": "DRS",
|
|
272
|
+
"DVD": "DV",
|
|
273
|
+
"DV": "DV",
|
|
274
|
+
"EST": "EST",
|
|
275
|
+
"ESTATE": "EST",
|
|
276
|
+
"ESTATES": "ESTS",
|
|
277
|
+
"ESTS": "ESTS",
|
|
278
|
+
"EXP": "EXPY",
|
|
279
|
+
"EXPR": "EXPY",
|
|
280
|
+
"EXPRESS": "EXPY",
|
|
281
|
+
"EXPY": "EXPY",
|
|
282
|
+
"EXPRESSWAY": "EXPY",
|
|
283
|
+
"EXT": "EXT",
|
|
284
|
+
"EXTENSION": "EXT",
|
|
285
|
+
"EXTENSIONS": "EXTS",
|
|
286
|
+
"EXTN": "EXT",
|
|
287
|
+
"EXTNSN": "EXT",
|
|
288
|
+
"EXTS": "EXTS",
|
|
289
|
+
"FALL": "FALL",
|
|
290
|
+
"FALLS": "FLS",
|
|
291
|
+
"FERRY": "FRY",
|
|
292
|
+
"FIELD": "FLD",
|
|
293
|
+
"FIELDS": "FLDS",
|
|
294
|
+
"FLAT": "FLT",
|
|
295
|
+
"FLATS": "FLTS",
|
|
296
|
+
"FLD": "FLD",
|
|
297
|
+
"FLDS": "FLDS",
|
|
298
|
+
"FLS": "FLS",
|
|
299
|
+
"FLT": "FLT",
|
|
300
|
+
"FLTS": "FLTS",
|
|
301
|
+
"FORD": "FRD",
|
|
302
|
+
"FORDS": "FRDS",
|
|
303
|
+
"FOREST": "FRST",
|
|
304
|
+
"FORESTS": "FRST",
|
|
305
|
+
"FORG": "FRG",
|
|
306
|
+
"FORGE": "FRG",
|
|
307
|
+
"FORGES": "FRGS",
|
|
308
|
+
"FORK": "FRK",
|
|
309
|
+
"FORKS": "FRKS",
|
|
310
|
+
"FORT": "FT",
|
|
311
|
+
"FRD": "FRD",
|
|
312
|
+
"FRDS": "FRDS",
|
|
313
|
+
"FRG": "FRG",
|
|
314
|
+
"FRGS": "FRGS",
|
|
315
|
+
"FRK": "FRK",
|
|
316
|
+
"FRKS": "FRKS",
|
|
317
|
+
"FRRY": "FRY",
|
|
318
|
+
"FRST": "FRST",
|
|
319
|
+
"FT": "FT",
|
|
320
|
+
"FRY": "FRY",
|
|
321
|
+
"FREEWAY": "FWY",
|
|
322
|
+
"FWY": "FWY",
|
|
323
|
+
"GARDEN": "GDN",
|
|
324
|
+
"GARDENS": "GDNS",
|
|
325
|
+
"GARDN": "GDN",
|
|
326
|
+
"GATEWAY": "GTWY",
|
|
327
|
+
"GATEWY": "GTWY",
|
|
328
|
+
"GATWAY": "GTWY",
|
|
329
|
+
"GDN": "GDN",
|
|
330
|
+
"GDNS": "GDNS",
|
|
331
|
+
"GTWAY": "GTWY",
|
|
332
|
+
"GTWY": "GTWY",
|
|
333
|
+
"GLEN": "GLN",
|
|
334
|
+
"GLENS": "GLNS",
|
|
335
|
+
"GLN": "GLN",
|
|
336
|
+
"GLNS": "GLNS",
|
|
337
|
+
"GREEN": "GRN",
|
|
338
|
+
"GREENS": "GRNS",
|
|
339
|
+
"GRN": "GRN",
|
|
340
|
+
"GRNS": "GRNS",
|
|
341
|
+
"GRV": "GRV",
|
|
342
|
+
"GROVE": "GRV",
|
|
343
|
+
"GROVES": "GRVS",
|
|
344
|
+
"GRVS": "GRVS",
|
|
345
|
+
"HARB": "HBR",
|
|
346
|
+
"HARBOR": "HBR",
|
|
347
|
+
"HARBORS": "HBRS",
|
|
348
|
+
"HARBR": "HBR",
|
|
349
|
+
"HAVEN": "HVN",
|
|
350
|
+
"HBR": "HBR",
|
|
351
|
+
"HBRS": "HBRS",
|
|
352
|
+
"HEIGHT": "HTS",
|
|
353
|
+
"HEIGHTS": "HTS",
|
|
354
|
+
"HGTS": "HTS",
|
|
355
|
+
"HILL": "HL",
|
|
356
|
+
"HILLS": "HLS",
|
|
357
|
+
"HL": "HL",
|
|
358
|
+
"HLLW": "HOLW",
|
|
359
|
+
"HLS": "HLS",
|
|
360
|
+
"HOLLOW": "HOLW",
|
|
361
|
+
"HOLLOWS": "HOLW",
|
|
362
|
+
"HOLW": "HOLW",
|
|
363
|
+
"HOLWS": "HOLW",
|
|
364
|
+
"HIGHWAY": "HWY",
|
|
365
|
+
"HIGHWY": "HWY",
|
|
366
|
+
"HIWAY": "HWY",
|
|
367
|
+
"HIWY": "HWY",
|
|
368
|
+
"HT": "HTS",
|
|
369
|
+
"HTS": "HTS",
|
|
370
|
+
"HVN": "HVN",
|
|
371
|
+
"HWAY": "HWY",
|
|
372
|
+
"HWY": "HWY",
|
|
373
|
+
"INLET": "INLT",
|
|
374
|
+
"INLT": "INLT",
|
|
375
|
+
"IS": "IS",
|
|
376
|
+
"ISLAND": "IS",
|
|
377
|
+
"ISLANDS": "ISS",
|
|
378
|
+
"ISLE": "ISLE",
|
|
379
|
+
"ISLES": "ISLE",
|
|
380
|
+
"ISS": "ISS",
|
|
381
|
+
"JCT": "JCT",
|
|
382
|
+
"JCTION": "JCT",
|
|
383
|
+
"JCTN": "JCT",
|
|
384
|
+
"JCTNS": "JCTS",
|
|
385
|
+
"JUNCTION": "JCT",
|
|
386
|
+
"JUNCTIONS": "JCTS",
|
|
387
|
+
"JCTS": "JCTS",
|
|
388
|
+
"KEYS": "KYS",
|
|
389
|
+
"KNOL": "KNL",
|
|
390
|
+
"KNOLL": "KNL",
|
|
391
|
+
"KNOLLS": "KNLS",
|
|
392
|
+
"KNL": "KNL",
|
|
393
|
+
"KNLS": "KNLS",
|
|
394
|
+
"KY": "KY",
|
|
395
|
+
"KYS": "KYS",
|
|
396
|
+
"LA": "LN",
|
|
397
|
+
"LANE": "LN",
|
|
398
|
+
"LIGHT": "LGT",
|
|
399
|
+
"LIGHTS": "LGTS",
|
|
400
|
+
"LGT": "LGT",
|
|
401
|
+
"LGTS": "LGTS",
|
|
402
|
+
"LOAF": "LF",
|
|
403
|
+
"LF": "LF",
|
|
404
|
+
"LN": "LN",
|
|
405
|
+
"LOCK": "LCK",
|
|
406
|
+
"LOCKS": "LCKS",
|
|
407
|
+
"LCK": "LCK",
|
|
408
|
+
"LCKS": "LCKS",
|
|
409
|
+
"LDG": "LDG",
|
|
410
|
+
"LDGE": "LDG",
|
|
411
|
+
"LODGE": "LDG",
|
|
412
|
+
"LOOP": "LOOP",
|
|
413
|
+
"LOOPS": "LOOP",
|
|
414
|
+
"MALL": "MALL",
|
|
415
|
+
"MANOR": "MNR",
|
|
416
|
+
"MANORS": "MNRS",
|
|
417
|
+
"MEADOW": "MDW",
|
|
418
|
+
"MEADOWS": "MDWS",
|
|
419
|
+
"MEDOWS": "MDWS",
|
|
420
|
+
"MEWS": "MEWS",
|
|
421
|
+
"MILL": "ML",
|
|
422
|
+
"MILLS": "MLS",
|
|
423
|
+
"MISSION": "MSN",
|
|
424
|
+
"MISSN": "MSN",
|
|
425
|
+
"ML": "ML",
|
|
426
|
+
"MLS": "MLS",
|
|
427
|
+
"MNR": "MNR",
|
|
428
|
+
"MNRS": "MNRS",
|
|
429
|
+
"MNT": "MT",
|
|
430
|
+
"MNTN": "MTN",
|
|
431
|
+
"MNTNS": "MTNS",
|
|
432
|
+
"MOTORWAY": "MTWY",
|
|
433
|
+
"MOUNT": "MT",
|
|
434
|
+
"MOUNTAIN": "MTN",
|
|
435
|
+
"MOUNTAINS": "MTNS",
|
|
436
|
+
"MSN": "MSN",
|
|
437
|
+
"MSSN": "MSN",
|
|
438
|
+
"MT": "MT",
|
|
439
|
+
"MTIN": "MTN",
|
|
440
|
+
"MTN": "MTN",
|
|
441
|
+
"MTNS": "MTNS",
|
|
442
|
+
"MTWY": "MTWY",
|
|
443
|
+
"NECK": "NCK",
|
|
444
|
+
"NCK": "NCK",
|
|
445
|
+
"ORCH": "ORCH",
|
|
446
|
+
"ORCHARD": "ORCH",
|
|
447
|
+
"ORCHRD": "ORCH",
|
|
448
|
+
"OVERPASS": "OPAS",
|
|
449
|
+
"OPAS": "OPAS",
|
|
450
|
+
"PARK": "PARK",
|
|
451
|
+
"PARKS": "PARK",
|
|
452
|
+
"PARKWAY": "PKWY",
|
|
453
|
+
"PARKWAYS": "PKWY",
|
|
454
|
+
"PASS": "PASS",
|
|
455
|
+
"PASSAGE": "PSGE",
|
|
456
|
+
"PATH": "PATH",
|
|
457
|
+
"PATHS": "PATH",
|
|
458
|
+
"PIKE": "PIKE",
|
|
459
|
+
"PIKES": "PIKE",
|
|
460
|
+
"PINE": "PNE",
|
|
461
|
+
"PINES": "PNES",
|
|
462
|
+
"PKWY": "PKWY",
|
|
463
|
+
"PKWYS": "PKWY",
|
|
464
|
+
"PLACE": "PL",
|
|
465
|
+
"PL": "PL",
|
|
466
|
+
"PLAZA": "PLZ",
|
|
467
|
+
"PLZA": "PLZ",
|
|
468
|
+
"PLZ": "PLZ",
|
|
469
|
+
"PNE": "PNE",
|
|
470
|
+
"PNES": "PNES",
|
|
471
|
+
"POINT": "PT",
|
|
472
|
+
"POINTS": "PTS",
|
|
473
|
+
"PORT": "PRT",
|
|
474
|
+
"PORTS": "PRTS",
|
|
475
|
+
"PRAIRIE": "PR",
|
|
476
|
+
"PR": "PR",
|
|
477
|
+
"PRARIE": "PR",
|
|
478
|
+
"PRK": "PARK",
|
|
479
|
+
"PRR": "PR",
|
|
480
|
+
"PRT": "PRT",
|
|
481
|
+
"PRTS": "PRTS",
|
|
482
|
+
"PT": "PT",
|
|
483
|
+
"PTS": "PTS",
|
|
484
|
+
"RAD": "RADL",
|
|
485
|
+
"RADIEL": "RADL",
|
|
486
|
+
"RADIAL": "RADL",
|
|
487
|
+
"RADL": "RADL",
|
|
488
|
+
"RAMP": "RAMP",
|
|
489
|
+
"RANCH": "RNCH",
|
|
490
|
+
"RANCHES": "RNCH",
|
|
491
|
+
"RAPID": "RPD",
|
|
492
|
+
"RAPIDS": "RPDS",
|
|
493
|
+
"RD": "RD",
|
|
494
|
+
"ROAD": "RD",
|
|
495
|
+
"ROADS": "RDS",
|
|
496
|
+
"RDS": "RDS",
|
|
497
|
+
"REST": "RST",
|
|
498
|
+
"RIDGE": "RDG",
|
|
499
|
+
"RIDGES": "RDGS",
|
|
500
|
+
"RDG": "RDG",
|
|
501
|
+
"RDGS": "RDGS",
|
|
502
|
+
"RIVER": "RIV",
|
|
503
|
+
"RIV": "RIV",
|
|
504
|
+
"RIVR": "RIV",
|
|
505
|
+
"RNCH": "RNCH",
|
|
506
|
+
"RNCHS": "RNCH",
|
|
507
|
+
"ROW": "ROW",
|
|
508
|
+
"RPD": "RPD",
|
|
509
|
+
"RPDS": "RPDS",
|
|
510
|
+
"RST": "RST",
|
|
511
|
+
"RVR": "RIV",
|
|
512
|
+
"RUN": "RUN",
|
|
513
|
+
"RUE": "RUE",
|
|
514
|
+
"SHL": "SHL",
|
|
515
|
+
"SHLS": "SHLS",
|
|
516
|
+
"SHOAL": "SHL",
|
|
517
|
+
"SHOALS": "SHLS",
|
|
518
|
+
"SHOAR": "SHR",
|
|
519
|
+
"SHOARS": "SHRS",
|
|
520
|
+
"SHORE": "SHR",
|
|
521
|
+
"SHORES": "SHRS",
|
|
522
|
+
"SHR": "SHR",
|
|
523
|
+
"SHRS": "SHRS",
|
|
524
|
+
"SKYWAY": "SKWY",
|
|
525
|
+
"SKWY": "SKWY",
|
|
526
|
+
"SMT": "SMT",
|
|
527
|
+
"SPG": "SPG",
|
|
528
|
+
"SPGS": "SPGS",
|
|
529
|
+
"SPRING": "SPG",
|
|
530
|
+
"SPRINGS": "SPGS",
|
|
531
|
+
"SPRNG": "SPG",
|
|
532
|
+
"SPRNGS": "SPGS",
|
|
533
|
+
"SPUR": "SPUR",
|
|
534
|
+
"SPURS": "SPUR",
|
|
535
|
+
"SQ": "SQ",
|
|
536
|
+
"SQR": "SQ",
|
|
537
|
+
"SQRE": "SQ",
|
|
538
|
+
"SQRS": "SQS",
|
|
539
|
+
"SQUARE": "SQ",
|
|
540
|
+
"SQUARES": "SQS",
|
|
541
|
+
"SQS": "SQS",
|
|
542
|
+
"STA": "STA",
|
|
543
|
+
"STATION": "STA",
|
|
544
|
+
"STATN": "STA",
|
|
545
|
+
"ST": "ST",
|
|
546
|
+
"STRA": "STRA",
|
|
547
|
+
"STRAV": "STRA",
|
|
548
|
+
"STRAVEN": "STRA",
|
|
549
|
+
"STRAVENUE": "STRA",
|
|
550
|
+
"STRAVN": "STRA",
|
|
551
|
+
"STREAM": "STRM",
|
|
552
|
+
"STREET": "ST",
|
|
553
|
+
"STREETS": "STS",
|
|
554
|
+
"STREME": "STRM",
|
|
555
|
+
"STRM": "STRM",
|
|
556
|
+
"STS": "STS",
|
|
557
|
+
"SUMIT": "SMT",
|
|
558
|
+
"SUMITT": "SMT",
|
|
559
|
+
"SUMMIT": "SMT",
|
|
560
|
+
"TER": "TER",
|
|
561
|
+
"TERRACE": "TER",
|
|
562
|
+
"THROUGHWAY": "TRWY",
|
|
563
|
+
"TRACE": "TRCE",
|
|
564
|
+
"TRACES": "TRCE",
|
|
565
|
+
"TRACE": "TRCE",
|
|
566
|
+
"TRAIL": "TRL",
|
|
567
|
+
"TRAILER": "TRLR",
|
|
568
|
+
"TRAILS": "TRL",
|
|
569
|
+
"TRCE": "TRCE",
|
|
570
|
+
"TRL": "TRL",
|
|
571
|
+
"TRLR": "TRLR",
|
|
572
|
+
"TRWY": "TRWY",
|
|
573
|
+
"TUNEL": "TUNL",
|
|
574
|
+
"TUNL": "TUNL",
|
|
575
|
+
"TUNLS": "TUNL",
|
|
576
|
+
"TUNNEL": "TUNL",
|
|
577
|
+
"TUNNELS": "TUNL",
|
|
578
|
+
"TUNNL": "TUNL",
|
|
579
|
+
"TURNPIKE": "TPKE",
|
|
580
|
+
"TPKE": "TPKE",
|
|
581
|
+
"TRNPK": "TPKE",
|
|
582
|
+
"UNDERPASS": "UPAS",
|
|
583
|
+
"UNION": "UN",
|
|
584
|
+
"UNIONS": "UNS",
|
|
585
|
+
"UN": "UN",
|
|
586
|
+
"UNS": "UNS",
|
|
587
|
+
"UPAS": "UPAS",
|
|
588
|
+
"VALLEY": "VLY",
|
|
589
|
+
"VALLEYS": "VLYS",
|
|
590
|
+
"VALLY": "VLY",
|
|
591
|
+
"VDCT": "VIA",
|
|
592
|
+
"VIA": "VIA",
|
|
593
|
+
"VIADCT": "VIA",
|
|
594
|
+
"VIADUCT": "VIA",
|
|
595
|
+
"VIEW": "VW",
|
|
596
|
+
"VIEWS": "VWS",
|
|
597
|
+
"VILL": "VLG",
|
|
598
|
+
"VILLAG": "VLG",
|
|
599
|
+
"VILLAGE": "VLG",
|
|
600
|
+
"VILLAGES": "VLGS",
|
|
601
|
+
"VILLE": "VL",
|
|
602
|
+
"VL": "VL",
|
|
603
|
+
"VLG": "VLG",
|
|
604
|
+
"VLGS": "VLGS",
|
|
605
|
+
"VLLY": "VLY",
|
|
606
|
+
"VLY": "VLY",
|
|
607
|
+
"VLYS": "VLYS",
|
|
608
|
+
"VST": "VIS",
|
|
609
|
+
"VSTA": "VIS",
|
|
610
|
+
"VW": "VW",
|
|
611
|
+
"VWS": "VWS",
|
|
612
|
+
"VISTA": "VIS",
|
|
613
|
+
"VIS": "VIS",
|
|
614
|
+
"WALKS": "WALK",
|
|
615
|
+
"WALK": "WALK",
|
|
616
|
+
"WALL": "WALL",
|
|
617
|
+
"WAY": "WAY",
|
|
618
|
+
"WAYS": "WAYS",
|
|
619
|
+
"WELL": "WL",
|
|
620
|
+
"WELLS": "WLS",
|
|
621
|
+
"WL": "WL",
|
|
622
|
+
"WLS": "WLS",
|
|
623
|
+
"XING": "XING",
|
|
624
|
+
"XRD": "XRD",
|
|
625
|
+
"XRDS": "XRDS",
|
|
626
|
+
}
|
addrforge/errors.py
ADDED
addrforge/format.py
ADDED
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
"""Formatting helpers for standardized address strings."""
|
|
2
|
+
|
|
3
|
+
from typing import List, Optional
|
|
4
|
+
|
|
5
|
+
from .models import ParsedAddress
|
|
6
|
+
from .normalize import collapse_spaces
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def _append(parts: List[str], value: Optional[str]) -> None:
|
|
10
|
+
if value:
|
|
11
|
+
parts.append(value)
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def format_standardized(address: ParsedAddress) -> str:
|
|
15
|
+
"""Build an uppercase USPS-like address string from parsed components."""
|
|
16
|
+
|
|
17
|
+
parts: List[str] = []
|
|
18
|
+
|
|
19
|
+
if address.kind == "po_box":
|
|
20
|
+
if address.po_box and _is_complete_mailbox_phrase(address.po_box):
|
|
21
|
+
_append(parts, address.po_box)
|
|
22
|
+
else:
|
|
23
|
+
_append(parts, "PO BOX")
|
|
24
|
+
_append(parts, address.po_box)
|
|
25
|
+
elif address.kind == "route":
|
|
26
|
+
_append(parts, address.route)
|
|
27
|
+
elif address.kind == "street":
|
|
28
|
+
_append(parts, address.number)
|
|
29
|
+
_append(parts, address.predir)
|
|
30
|
+
_append(parts, address.street_name)
|
|
31
|
+
_append(parts, address.suffix)
|
|
32
|
+
_append(parts, address.postdir)
|
|
33
|
+
|
|
34
|
+
_append(parts, address.unit_type)
|
|
35
|
+
_append(parts, address.unit_id)
|
|
36
|
+
_append(parts, address.city)
|
|
37
|
+
_append(parts, address.state)
|
|
38
|
+
_append(parts, address.zip_code)
|
|
39
|
+
|
|
40
|
+
return collapse_spaces(" ".join(parts).upper())
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def primary_line(address: ParsedAddress) -> str:
|
|
44
|
+
"""Return only the primary delivery line."""
|
|
45
|
+
|
|
46
|
+
return format_standardized(
|
|
47
|
+
ParsedAddress(
|
|
48
|
+
raw=address.raw,
|
|
49
|
+
kind=address.kind,
|
|
50
|
+
number=address.number,
|
|
51
|
+
predir=address.predir,
|
|
52
|
+
street_name=address.street_name,
|
|
53
|
+
suffix=address.suffix,
|
|
54
|
+
postdir=address.postdir,
|
|
55
|
+
route=address.route,
|
|
56
|
+
po_box=address.po_box,
|
|
57
|
+
)
|
|
58
|
+
)
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def unit_line(address: ParsedAddress) -> str:
|
|
62
|
+
"""Return only the secondary unit line."""
|
|
63
|
+
|
|
64
|
+
parts: List[str] = []
|
|
65
|
+
_append(parts, address.unit_type)
|
|
66
|
+
_append(parts, address.unit_id)
|
|
67
|
+
return collapse_spaces(" ".join(parts).upper())
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def _is_complete_mailbox_phrase(value: str) -> bool:
|
|
71
|
+
return value.startswith(("RR ", "HC ", "PSC ", "CMR ", "UNIT "))
|