addrforge 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 addrforge contributors
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,141 @@
1
+ Metadata-Version: 2.4
2
+ Name: addrforge
3
+ Version: 0.1.0
4
+ Summary: Dependency-free US address parser and normalizer
5
+ Author: addrforge contributors
6
+ License-Expression: MIT
7
+ Project-URL: Homepage, https://github.com/RohitMadhu/addrforge
8
+ Project-URL: Repository, https://github.com/RohitMadhu/addrforge
9
+ Project-URL: Issues, https://github.com/RohitMadhu/addrforge/issues
10
+ Keywords: address,parser,normalization,usps
11
+ Classifier: Development Status :: 3 - Alpha
12
+ Classifier: Intended Audience :: Developers
13
+ Classifier: Operating System :: OS Independent
14
+ Classifier: Programming Language :: Python :: 3
15
+ Classifier: Programming Language :: Python :: 3.9
16
+ Classifier: Programming Language :: Python :: 3.10
17
+ Classifier: Programming Language :: Python :: 3.11
18
+ Classifier: Programming Language :: Python :: 3.12
19
+ Classifier: Topic :: Text Processing
20
+ Requires-Python: >=3.9
21
+ Description-Content-Type: text/markdown
22
+ License-File: LICENSE
23
+ Dynamic: license-file
24
+
25
+ # addrforge
26
+
27
+ `addrforge` is a lightweight, dependency-free Python package for parsing and standardizing messy US address strings. It returns structured components, USPS-like uppercase normalized strings, mailing-style address lines, and optional geocodability checks through free public providers.
28
+
29
+ The local parser is offline and dependency-free. Optional validation providers may call external public APIs, but they still do not prove USPS deliverability.
30
+
31
+ ## Install
32
+
33
+ ```bash
34
+ pip install addrforge
35
+ ```
36
+
37
+ For editable local development:
38
+
39
+ ```bash
40
+ pip install -e .
41
+ ```
42
+
43
+ ## Quickstart
44
+
45
+ ```python
46
+ from addrforge import explain, parse, split_lines, standardize, validate
47
+
48
+ parsed = parse("123 north main street apartment 4b")
49
+ print(parsed.kind) # street
50
+ print(parsed.number) # 123
51
+ print(parsed.predir) # N
52
+ print(parsed.suffix) # ST
53
+ print(parsed.unit_type) # APT
54
+ print(parsed.standardized) # 123 N MAIN ST APT 4B
55
+ print(parsed.confidence) # 0.99
56
+ print(parsed.parse_notes) # ('missing_place_tail',)
57
+ print(parsed.match_level) # partial
58
+
59
+ print(standardize("PO Box 45, Fairfax, VA 22030"))
60
+ # PO BOX 45 FAIRFAX VA 22030
61
+
62
+ print(explain("Main Street"))
63
+ # ('missing_house_number', 'missing_place_tail')
64
+
65
+ lines = split_lines("123 Main St Apt 4B, Fairfax VA 22030")
66
+ print(lines.line1) # 123 MAIN ST
67
+ print(lines.line2) # APT 4B
68
+
69
+ result = validate("1600 Pennsylvania Ave NW, Washington DC 20500", provider="census")
70
+ print(result.is_valid) # True when Census returns an address-range match
71
+ print(result.is_deliverable) # None; this is not USPS delivery validation
72
+ ```
73
+
74
+ For strict parsing:
75
+
76
+ ```python
77
+ parse("Main Street", strict=True).kind
78
+ # unknown
79
+ ```
80
+
81
+ For JSON:
82
+
83
+ ```python
84
+ print(parse("123 Main St").to_json(indent=2))
85
+ ```
86
+
87
+ From the command line:
88
+
89
+ ```bash
90
+ addrforge "123 north main street apartment 4b"
91
+ addrforge --json "123 Main St, Fairfax VA 22030"
92
+ addrforge --lines "123 Main St Apt 4B, Fairfax VA 22030"
93
+ ```
94
+
95
+ ## Supported Patterns
96
+
97
+ - Standard street addresses such as `123 Main Street`, `123 N Main St`, `12-14 W Elm Rd`, and `1600 Pennsylvania Avenue NW`
98
+ - Named streets without a house number such as `42nd Street`
99
+ - Unit and subaddress designators including `Apt`, `Apartment`, `Suite`, `Ste`, `Unit`, `Room`, `Floor`, `Bldg`, `Lot`, `No. 4`, `#200`, and unit-before-street forms such as `Suite 200 123 Main Street`
100
+ - Highway and route forms including `I-95`, `US 29`, `State Route 7`, `County Road 12`, `Farm to Market Road 1960`, and `Route 66`
101
+ - PO Box forms including `PO Box 45`, `P.O. Box 45`, and `Post Office Box 45`
102
+ - Rural, highway contract, and military mailbox forms such as `RR 2 Box 152`, `HC 67 Box 12`, `PSC 123 Box 456 APO AE 09012`, and `CMR 123 Box 456`
103
+ - Optional city, state, and ZIP tails such as `Fairfax, VA 22030`, `Arlington VA 22201`, and `Washington, DC 20001-1234`
104
+ - Obvious non-US addresses are rejected cleanly with `is_us=False` and `reject_reason`
105
+
106
+ Parsed results include:
107
+
108
+ - `confidence`: heuristic score from `0.0` to `0.99`
109
+ - `match_level`: `exact-ish`, `partial`, `weak`, or `unknown`
110
+ - `components_missing`: component names still needed for mailing-style completeness
111
+ - `warnings`: machine-readable warning codes
112
+ - `parse_notes`: explanation codes for partial or ambiguous parses
113
+ - `is_complete_for_mailing`: whether the parsed components include a primary line, city, state, and ZIP
114
+
115
+ These are heuristics for caller triage, not proof that an address is real.
116
+
117
+ ## Optional Validation Providers
118
+
119
+ `addrforge.validate()` supports two no-registration providers:
120
+
121
+ - `provider="census"` uses the US Census Geocoder. It can indicate whether an address is geocodable against Census address-range data.
122
+ - `provider="nominatim"` uses OpenStreetMap Nominatim. It can indicate whether OSM found a US place/address-like result.
123
+
124
+ Both providers use only the Python standard library. Both may be free to call without registration, but they are external services with their own usage policies. Nominatim requires a real identifying User-Agent and low request volume. Neither provider is a USPS/CASS/DPV deliverability validator.
125
+
126
+ ## Non-Goals
127
+
128
+ - USPS/CASS/DPV deliverability checks
129
+ - Misspelling correction
130
+ - International address parsing
131
+ - Replacing specialized address parsers for every edge case
132
+
133
+ ## Development
134
+
135
+ Run the test suite with:
136
+
137
+ ```bash
138
+ PYTHONPATH=src python -m unittest discover
139
+ ```
140
+
141
+ The library targets Python 3.9+ and uses only the Python standard library.
@@ -0,0 +1,117 @@
1
+ # addrforge
2
+
3
+ `addrforge` is a lightweight, dependency-free Python package for parsing and standardizing messy US address strings. It returns structured components, USPS-like uppercase normalized strings, mailing-style address lines, and optional geocodability checks through free public providers.
4
+
5
+ The local parser is offline and dependency-free. Optional validation providers may call external public APIs, but they still do not prove USPS deliverability.
6
+
7
+ ## Install
8
+
9
+ ```bash
10
+ pip install addrforge
11
+ ```
12
+
13
+ For editable local development:
14
+
15
+ ```bash
16
+ pip install -e .
17
+ ```
18
+
19
+ ## Quickstart
20
+
21
+ ```python
22
+ from addrforge import explain, parse, split_lines, standardize, validate
23
+
24
+ parsed = parse("123 north main street apartment 4b")
25
+ print(parsed.kind) # street
26
+ print(parsed.number) # 123
27
+ print(parsed.predir) # N
28
+ print(parsed.suffix) # ST
29
+ print(parsed.unit_type) # APT
30
+ print(parsed.standardized) # 123 N MAIN ST APT 4B
31
+ print(parsed.confidence) # 0.99
32
+ print(parsed.parse_notes) # ('missing_place_tail',)
33
+ print(parsed.match_level) # partial
34
+
35
+ print(standardize("PO Box 45, Fairfax, VA 22030"))
36
+ # PO BOX 45 FAIRFAX VA 22030
37
+
38
+ print(explain("Main Street"))
39
+ # ('missing_house_number', 'missing_place_tail')
40
+
41
+ lines = split_lines("123 Main St Apt 4B, Fairfax VA 22030")
42
+ print(lines.line1) # 123 MAIN ST
43
+ print(lines.line2) # APT 4B
44
+
45
+ result = validate("1600 Pennsylvania Ave NW, Washington DC 20500", provider="census")
46
+ print(result.is_valid) # True when Census returns an address-range match
47
+ print(result.is_deliverable) # None; this is not USPS delivery validation
48
+ ```
49
+
50
+ For strict parsing:
51
+
52
+ ```python
53
+ parse("Main Street", strict=True).kind
54
+ # unknown
55
+ ```
56
+
57
+ For JSON:
58
+
59
+ ```python
60
+ print(parse("123 Main St").to_json(indent=2))
61
+ ```
62
+
63
+ From the command line:
64
+
65
+ ```bash
66
+ addrforge "123 north main street apartment 4b"
67
+ addrforge --json "123 Main St, Fairfax VA 22030"
68
+ addrforge --lines "123 Main St Apt 4B, Fairfax VA 22030"
69
+ ```
70
+
71
+ ## Supported Patterns
72
+
73
+ - Standard street addresses such as `123 Main Street`, `123 N Main St`, `12-14 W Elm Rd`, and `1600 Pennsylvania Avenue NW`
74
+ - Named streets without a house number such as `42nd Street`
75
+ - Unit and subaddress designators including `Apt`, `Apartment`, `Suite`, `Ste`, `Unit`, `Room`, `Floor`, `Bldg`, `Lot`, `No. 4`, `#200`, and unit-before-street forms such as `Suite 200 123 Main Street`
76
+ - Highway and route forms including `I-95`, `US 29`, `State Route 7`, `County Road 12`, `Farm to Market Road 1960`, and `Route 66`
77
+ - PO Box forms including `PO Box 45`, `P.O. Box 45`, and `Post Office Box 45`
78
+ - Rural, highway contract, and military mailbox forms such as `RR 2 Box 152`, `HC 67 Box 12`, `PSC 123 Box 456 APO AE 09012`, and `CMR 123 Box 456`
79
+ - Optional city, state, and ZIP tails such as `Fairfax, VA 22030`, `Arlington VA 22201`, and `Washington, DC 20001-1234`
80
+ - Obvious non-US addresses are rejected cleanly with `is_us=False` and `reject_reason`
81
+
82
+ Parsed results include:
83
+
84
+ - `confidence`: heuristic score from `0.0` to `0.99`
85
+ - `match_level`: `exact-ish`, `partial`, `weak`, or `unknown`
86
+ - `components_missing`: component names still needed for mailing-style completeness
87
+ - `warnings`: machine-readable warning codes
88
+ - `parse_notes`: explanation codes for partial or ambiguous parses
89
+ - `is_complete_for_mailing`: whether the parsed components include a primary line, city, state, and ZIP
90
+
91
+ These are heuristics for caller triage, not proof that an address is real.
92
+
93
+ ## Optional Validation Providers
94
+
95
+ `addrforge.validate()` supports two no-registration providers:
96
+
97
+ - `provider="census"` uses the US Census Geocoder. It can indicate whether an address is geocodable against Census address-range data.
98
+ - `provider="nominatim"` uses OpenStreetMap Nominatim. It can indicate whether OSM found a US place/address-like result.
99
+
100
+ Both providers use only the Python standard library. Both may be free to call without registration, but they are external services with their own usage policies. Nominatim requires a real identifying User-Agent and low request volume. Neither provider is a USPS/CASS/DPV deliverability validator.
101
+
102
+ ## Non-Goals
103
+
104
+ - USPS/CASS/DPV deliverability checks
105
+ - Misspelling correction
106
+ - International address parsing
107
+ - Replacing specialized address parsers for every edge case
108
+
109
+ ## Development
110
+
111
+ Run the test suite with:
112
+
113
+ ```bash
114
+ PYTHONPATH=src python -m unittest discover
115
+ ```
116
+
117
+ The library targets Python 3.9+ and uses only the Python standard library.
@@ -0,0 +1,37 @@
1
+ [build-system]
2
+ requires = ["setuptools>=77"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "addrforge"
7
+ version = "0.1.0"
8
+ description = "Dependency-free US address parser and normalizer"
9
+ readme = "README.md"
10
+ requires-python = ">=3.9"
11
+ license = "MIT"
12
+ license-files = ["LICENSE"]
13
+ authors = [{name = "addrforge contributors"}]
14
+ keywords = ["address", "parser", "normalization", "usps"]
15
+ classifiers = [
16
+ "Development Status :: 3 - Alpha",
17
+ "Intended Audience :: Developers",
18
+ "Operating System :: OS Independent",
19
+ "Programming Language :: Python :: 3",
20
+ "Programming Language :: Python :: 3.9",
21
+ "Programming Language :: Python :: 3.10",
22
+ "Programming Language :: Python :: 3.11",
23
+ "Programming Language :: Python :: 3.12",
24
+ "Topic :: Text Processing",
25
+ ]
26
+ dependencies = []
27
+
28
+ [project.urls]
29
+ Homepage = "https://github.com/RohitMadhu/addrforge"
30
+ Repository = "https://github.com/RohitMadhu/addrforge"
31
+ Issues = "https://github.com/RohitMadhu/addrforge/issues"
32
+
33
+ [project.scripts]
34
+ addrforge = "addrforge.cli:main"
35
+
36
+ [tool.setuptools.packages.find]
37
+ where = ["src"]
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,22 @@
1
+ """Public API for addrforge."""
2
+
3
+ from .lines import split_lines
4
+ from .models import AddressLines, ParsedAddress, ValidationResult
5
+ from .parser import explain, is_probably_address, parse, standardize
6
+ from .validation import CensusGeocoderProvider, NominatimProvider, ValidationProvider, get_provider, validate
7
+
8
+ __all__ = [
9
+ "AddressLines",
10
+ "CensusGeocoderProvider",
11
+ "NominatimProvider",
12
+ "ParsedAddress",
13
+ "ValidationResult",
14
+ "ValidationProvider",
15
+ "explain",
16
+ "get_provider",
17
+ "is_probably_address",
18
+ "parse",
19
+ "split_lines",
20
+ "standardize",
21
+ "validate",
22
+ ]
@@ -0,0 +1,71 @@
1
+ """Command line interface for addrforge."""
2
+
3
+ import argparse
4
+ import sys
5
+ from typing import Optional, Sequence, TextIO
6
+
7
+ from .lines import split_lines
8
+ from .parser import parse
9
+ from .validation import validate
10
+
11
+
12
+ def main(argv: Optional[Sequence[str]] = None, *, stdout: Optional[TextIO] = None) -> int:
13
+ """Run the addrforge command line interface."""
14
+
15
+ parser = argparse.ArgumentParser(prog="addrforge", description="Parse and standardize US address strings.")
16
+ parser.add_argument("address", nargs="*", help="address text; stdin is used when omitted")
17
+ parser.add_argument("--json", action="store_true", help="print JSON instead of plain text")
18
+ parser.add_argument("--lines", action="store_true", help="split into mailing-style address lines")
19
+ parser.add_argument("--strict", action="store_true", help="reject incomplete partial parses")
20
+ parser.add_argument(
21
+ "--validate",
22
+ choices=("census", "nominatim"),
23
+ help="run an optional external geocodability check",
24
+ )
25
+ parser.add_argument("--timeout", type=float, default=5.0, help="validation request timeout in seconds")
26
+
27
+ args = parser.parse_args(argv)
28
+ out = stdout or sys.stdout
29
+ text = " ".join(args.address).strip() if args.address else sys.stdin.read().strip()
30
+
31
+ if args.validate:
32
+ result = validate(text, provider=args.validate, timeout=args.timeout)
33
+ print(result.to_json(indent=2) if args.json else _validation_summary(result), file=out)
34
+ return 0 if result.error is None else 2
35
+
36
+ if args.lines:
37
+ lines = split_lines(text, strict=args.strict)
38
+ print(lines.to_json(indent=2) if args.json else _line_summary(lines), file=out)
39
+ return 0
40
+
41
+ parsed = parse(text, strict=args.strict)
42
+ print(parsed.to_json(indent=2) if args.json else parsed.standardized, file=out)
43
+ return 0
44
+
45
+
46
+ def _line_summary(lines: object) -> str:
47
+ parts = [
48
+ getattr(lines, "line1", ""),
49
+ getattr(lines, "line2", ""),
50
+ " ".join(
51
+ part
52
+ for part in (
53
+ getattr(lines, "city", None),
54
+ getattr(lines, "state", None),
55
+ getattr(lines, "zip_code", None),
56
+ )
57
+ if part
58
+ ),
59
+ ]
60
+ return "\n".join(part for part in parts if part)
61
+
62
+
63
+ def _validation_summary(result: object) -> str:
64
+ matched = getattr(result, "matched_address", None) or ""
65
+ status = "matched" if getattr(result, "is_valid", False) else "not matched"
66
+ provider = getattr(result, "provider", "provider")
67
+ return f"{provider}: {status}" + (f"\n{matched}" if matched else "")
68
+
69
+
70
+ if __name__ == "__main__": # pragma: no cover
71
+ raise SystemExit(main())