enterprise-domain-mapper 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (24) hide show
  1. enterprise_domain_mapper-0.1.0/LICENSE +21 -0
  2. enterprise_domain_mapper-0.1.0/PKG-INFO +207 -0
  3. enterprise_domain_mapper-0.1.0/README.md +174 -0
  4. enterprise_domain_mapper-0.1.0/pyproject.toml +65 -0
  5. enterprise_domain_mapper-0.1.0/setup.cfg +4 -0
  6. enterprise_domain_mapper-0.1.0/src/domain_mapper/__init__.py +3 -0
  7. enterprise_domain_mapper-0.1.0/src/domain_mapper/cli.py +192 -0
  8. enterprise_domain_mapper-0.1.0/src/domain_mapper/dns_verifier.py +87 -0
  9. enterprise_domain_mapper-0.1.0/src/domain_mapper/mapper.py +117 -0
  10. enterprise_domain_mapper-0.1.0/src/domain_mapper/models.py +73 -0
  11. enterprise_domain_mapper-0.1.0/src/domain_mapper/output.py +84 -0
  12. enterprise_domain_mapper-0.1.0/src/domain_mapper/sources/__init__.py +7 -0
  13. enterprise_domain_mapper-0.1.0/src/domain_mapper/sources/sec_edgar.py +243 -0
  14. enterprise_domain_mapper-0.1.0/src/domain_mapper/sources/tld_generator.py +177 -0
  15. enterprise_domain_mapper-0.1.0/src/domain_mapper/sources/wikipedia.py +208 -0
  16. enterprise_domain_mapper-0.1.0/src/enterprise_domain_mapper.egg-info/PKG-INFO +207 -0
  17. enterprise_domain_mapper-0.1.0/src/enterprise_domain_mapper.egg-info/SOURCES.txt +22 -0
  18. enterprise_domain_mapper-0.1.0/src/enterprise_domain_mapper.egg-info/dependency_links.txt +1 -0
  19. enterprise_domain_mapper-0.1.0/src/enterprise_domain_mapper.egg-info/entry_points.txt +2 -0
  20. enterprise_domain_mapper-0.1.0/src/enterprise_domain_mapper.egg-info/requires.txt +11 -0
  21. enterprise_domain_mapper-0.1.0/src/enterprise_domain_mapper.egg-info/top_level.txt +1 -0
  22. enterprise_domain_mapper-0.1.0/tests/test_models.py +73 -0
  23. enterprise_domain_mapper-0.1.0/tests/test_output.py +69 -0
  24. enterprise_domain_mapper-0.1.0/tests/test_tld_generator.py +76 -0
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025 GTM Layer
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,207 @@
1
+ Metadata-Version: 2.4
2
+ Name: enterprise-domain-mapper
3
+ Version: 0.1.0
4
+ Summary: Map enterprise corporate structures to enrichable domains
5
+ Author-email: GTM Layer <hello@gtmlayer.com>
6
+ License: MIT
7
+ Project-URL: Homepage, https://github.com/gtmlayer/enterprise-domain-mapper
8
+ Project-URL: Repository, https://github.com/gtmlayer/enterprise-domain-mapper
9
+ Project-URL: Issues, https://github.com/gtmlayer/enterprise-domain-mapper/issues
10
+ Keywords: enterprise,enrichment,abm,domains,subsidiaries,sales
11
+ Classifier: Development Status :: 4 - Beta
12
+ Classifier: Intended Audience :: Developers
13
+ Classifier: License :: OSI Approved :: MIT License
14
+ Classifier: Programming Language :: Python :: 3
15
+ Classifier: Programming Language :: Python :: 3.10
16
+ Classifier: Programming Language :: Python :: 3.11
17
+ Classifier: Programming Language :: Python :: 3.12
18
+ Classifier: Topic :: Office/Business
19
+ Requires-Python: >=3.10
20
+ Description-Content-Type: text/markdown
21
+ License-File: LICENSE
22
+ Requires-Dist: requests>=2.31.0
23
+ Requires-Dist: beautifulsoup4>=4.12.0
24
+ Requires-Dist: click>=8.1.0
25
+ Requires-Dist: rich>=13.0.0
26
+ Provides-Extra: dev
27
+ Requires-Dist: pytest>=7.0; extra == "dev"
28
+ Requires-Dist: pytest-cov>=4.0; extra == "dev"
29
+ Requires-Dist: ruff>=0.1.0; extra == "dev"
30
+ Requires-Dist: black>=23.0; extra == "dev"
31
+ Requires-Dist: responses>=0.23.0; extra == "dev"
32
+ Dynamic: license-file
33
+
34
+ # Enterprise Domain Mapper
35
+
36
+ [![CI](https://github.com/gtmlayer/enterprise-domain-mapper/actions/workflows/ci.yml/badge.svg)](https://github.com/gtmlayer/enterprise-domain-mapper/actions/workflows/ci.yml)
37
+ [![Python 3.10+](https://img.shields.io/badge/python-3.10%2B-blue.svg)](https://www.python.org/downloads/)
38
+ [![License: MIT](https://img.shields.io/badge/License-MIT-green.svg)](LICENSE)
39
+
40
+ **Map enterprise corporate structures to enrichable domains.** Feed it company names, get back subsidiaries, acquisitions, and regional domains that your enrichment tools are missing.
41
+
42
+ Every sales team doing enterprise ABM hits the same wall: large companies have dozens of subsidiaries, acquisitions, and regional entities, each with their own email domain. Without a complete domain map, tools like Clay and Apollo only find contacts at the parent domain. Entire business units get missed.
43
+
44
+ This tool fixes that.
45
+
46
+ ```
47
+ $ domain-mapper "NTT Data"
48
+
49
+ NTT Data
50
+ ├── NTT DATA Services nttdataservices.com [SEC EDGAR]
51
+ ├── NTT DATA Business Solutions nttdata-solutions.com [Wikipedia]
52
+ ├── Dimension Data dimensiondata.com [Wikipedia]
53
+ ├── NTT DATA Italia nttdata.it [TLD guess ✓ DNS verified]
54
+ ├── NTT DATA UK nttdata.co.uk [TLD guess ✓ DNS verified]
55
+ └── NTT DATA Japan nttdata.co.jp [TLD guess ✓ DNS verified]
56
+
57
+ Found 12 subsidiaries, 18 domains (6 confirmed, 12 guessed, 9 DNS verified)
58
+ ```
59
+
60
+ ## The problem
61
+
62
+ Enterprise accounts don't operate under a single domain. A company like NTT Data has subsidiaries in 50+ countries, each with localised domains. Deloitte has member firms. Boeing has defence subsidiaries that use completely different brands.
63
+
64
+ If you're running enrichment against just `nttdata.com`, you're finding maybe 30% of the contacts you could be reaching. The rest are hiding behind `dimensiondata.com`, `nttdata.co.uk`, `nttdata.it`, and domains you didn't know existed.
65
+
66
+ Building these domain maps manually takes hours per account. We built this tool because we got tired of doing it by hand.
67
+
68
+ ## Quick start
69
+
70
+ ### Installation
71
+
72
+ ```bash
73
+ pip install enterprise-domain-mapper
74
+ ```
75
+
76
+ Or clone and install locally:
77
+
78
+ ```bash
79
+ git clone https://github.com/gtmlayer/enterprise-domain-mapper.git
80
+ cd enterprise-domain-mapper
81
+ pip install -e .
82
+ ```
83
+
84
+ ### Single company lookup
85
+
86
+ ```bash
87
+ domain-mapper "Boeing"
88
+ ```
89
+
90
+ ### Batch mode (CSV input)
91
+
92
+ ```bash
93
+ domain-mapper accounts.csv --output results.csv
94
+ ```
95
+
96
+ Your input CSV just needs a column with company names. The tool auto-detects columns named `company_name`, `company`, `name`, or `account`. If you have a domain column (`domain`, `website`, `url`), it'll use that as the parent domain for TLD guessing.
97
+
98
+ ### With DNS verification
99
+
100
+ ```bash
101
+ domain-mapper accounts.csv --output results.csv --verify-dns
102
+ ```
103
+
104
+ This checks whether guessed domains actually have mail infrastructure (MX records) or at minimum resolve (A records). Adds a few seconds per company but filters out the noise.
105
+
106
+ ## What it does
107
+
108
+ The tool combines three data sources and a verification layer to build comprehensive domain maps:
109
+
110
+ ### 1. SEC EDGAR Exhibit 21 scraper
111
+
112
+ For US-listed companies, SEC filings include Exhibit 21: a legally required list of all subsidiaries. The tool looks up the company's CIK, finds the latest 10-K filing, and parses the subsidiary list with jurisdictions.
113
+
114
+ This is the highest-quality source - it's legally mandated disclosure, so it's comprehensive and current.
115
+
116
+ ### 2. Wikipedia corporate structure parser
117
+
118
+ For non-US companies (or supplementary data), the tool searches Wikipedia for the company page and extracts subsidiary and acquisition data from infoboxes and structured sections.
119
+
120
+ Covers companies globally, though data depth varies by how well-maintained the Wikipedia page is.
121
+
122
+ ### 3. TLD pattern generator
123
+
124
+ Once subsidiaries are identified with their jurisdictions, the tool generates likely domain patterns. A subsidiary in Italy with parent domain `nttdata.com` produces guesses like `nttdata.it`. Covers 70+ countries with their standard corporate TLD patterns (e.g. UK produces `co.uk` and `.uk`, Japan produces `co.jp` and `.jp`).
125
+
126
+ ### 4. DNS verification (optional)
127
+
128
+ MX record lookup with A record fallback to confirm guessed domains actually resolve. MX records are the strongest signal - if a domain has mail infrastructure, it's real. A records confirm the domain exists even without mail setup.
129
+
130
+ ## Output format
131
+
132
+ ### Detailed output (default)
133
+
134
+ Nine columns, one row per subsidiary-domain pair:
135
+
136
+ | Column | Description |
137
+ |--------|-------------|
138
+ | `parent_company` | The company you looked up |
139
+ | `parent_domain` | Known parent domain |
140
+ | `subsidiary_name` | Name of the subsidiary or entity |
141
+ | `subsidiary_type` | Subsidiary, acquisition, division, etc. |
142
+ | `jurisdiction` | Country or region |
143
+ | `domain` | Confirmed or guessed domain |
144
+ | `domain_source` | Where it came from (SEC EDGAR, Wikipedia, TLD guess) |
145
+ | `dns_verified` | Whether DNS verification passed |
146
+ | `confidence` | High (confirmed), Medium (guessed + verified), Low (guessed only) |
147
+
148
+ ### Clay import format
149
+
150
+ One row per company with domains consolidated into a single field, ready for direct import into Clay as a data source.
151
+
152
+ ```bash
153
+ domain-mapper accounts.csv --output results.csv --format clay
154
+ ```
155
+
156
+ ## Importing into Clay
157
+
158
+ 1. Run the tool with `--format clay` to get the Clay-optimised output
159
+ 2. In Clay, create a new table or add to an existing one
160
+ 3. Import the CSV - the columns map directly to Clay's expected format
161
+ 4. Use the domain list column with Clay's enrichment tools to find contacts across all mapped domains
162
+
163
+ This is the workflow that sparked the whole tool. We were manually building domain maps for a client's enterprise accounts and realised the process was repeatable enough to automate.
164
+
165
+ ## Example
166
+
167
+ The `examples/` directory contains `input_sample.csv` with five test companies to get you started:
168
+
169
+ ```bash
170
+ domain-mapper examples/input_sample.csv --output examples/results.csv --verify-dns
171
+ ```
172
+
173
+ ## Contributing
174
+
175
+ Want to add a new data source? The architecture makes it straightforward:
176
+
177
+ 1. Create a new module in `src/domain_mapper/sources/`
178
+ 2. Implement a class with a `get_subsidiaries(company_name)` method that returns a list of subsidiary objects
179
+ 3. Add it to the orchestrator in `mapper.py`
180
+ 4. Write tests in `tests/`
181
+
182
+ Some data sources we'd love to see contributed:
183
+
184
+ - Companies House (UK company registry)
185
+ - OpenCorporates API
186
+ - Crunchbase (acquisitions data)
187
+ - D&B corporate hierarchies
188
+
189
+ Pull requests welcome. Run `ruff check` and `black` before submitting, and make sure `pytest` passes.
190
+
191
+ ## Tech stack
192
+
193
+ - Python 3.10+
194
+ - `requests` and `beautifulsoup4` for web scraping
195
+ - `click` for the CLI
196
+ - `rich` for terminal output
197
+ - No paid APIs, no API keys required
198
+
199
+ ## Built by GTM Layer
200
+
201
+ [GTM Layer](https://gtmlayer.com) builds revenue systems for B2B sales teams. We work with companies on CRM architecture, enrichment pipelines, signal-driven outbound, and everything in between.
202
+
203
+ This tool came out of real client work - we built it to solve a problem we kept hitting on enterprise ABM engagements. If you're running into similar challenges, [get in touch](https://gtmlayer.com).
204
+
205
+ ## Licence
206
+
207
+ MIT - use it however you want.
@@ -0,0 +1,174 @@
1
+ # Enterprise Domain Mapper
2
+
3
+ [![CI](https://github.com/gtmlayer/enterprise-domain-mapper/actions/workflows/ci.yml/badge.svg)](https://github.com/gtmlayer/enterprise-domain-mapper/actions/workflows/ci.yml)
4
+ [![Python 3.10+](https://img.shields.io/badge/python-3.10%2B-blue.svg)](https://www.python.org/downloads/)
5
+ [![License: MIT](https://img.shields.io/badge/License-MIT-green.svg)](LICENSE)
6
+
7
+ **Map enterprise corporate structures to enrichable domains.** Feed it company names, get back subsidiaries, acquisitions, and regional domains that your enrichment tools are missing.
8
+
9
+ Every sales team doing enterprise ABM hits the same wall: large companies have dozens of subsidiaries, acquisitions, and regional entities, each with their own email domain. Without a complete domain map, tools like Clay and Apollo only find contacts at the parent domain. Entire business units get missed.
10
+
11
+ This tool fixes that.
12
+
13
+ ```
14
+ $ domain-mapper "NTT Data"
15
+
16
+ NTT Data
17
+ ├── NTT DATA Services nttdataservices.com [SEC EDGAR]
18
+ ├── NTT DATA Business Solutions nttdata-solutions.com [Wikipedia]
19
+ ├── Dimension Data dimensiondata.com [Wikipedia]
20
+ ├── NTT DATA Italia nttdata.it [TLD guess ✓ DNS verified]
21
+ ├── NTT DATA UK nttdata.co.uk [TLD guess ✓ DNS verified]
22
+ └── NTT DATA Japan nttdata.co.jp [TLD guess ✓ DNS verified]
23
+
24
+ Found 12 subsidiaries, 18 domains (6 confirmed, 12 guessed, 9 DNS verified)
25
+ ```
26
+
27
+ ## The problem
28
+
29
+ Enterprise accounts don't operate under a single domain. A company like NTT Data has subsidiaries in 50+ countries, each with localised domains. Deloitte has member firms. Boeing has defence subsidiaries that use completely different brands.
30
+
31
+ If you're running enrichment against just `nttdata.com`, you're finding maybe 30% of the contacts you could be reaching. The rest are hiding behind `dimensiondata.com`, `nttdata.co.uk`, `nttdata.it`, and domains you didn't know existed.
32
+
33
+ Building these domain maps manually takes hours per account. We built this tool because we got tired of doing it by hand.
34
+
35
+ ## Quick start
36
+
37
+ ### Installation
38
+
39
+ ```bash
40
+ pip install enterprise-domain-mapper
41
+ ```
42
+
43
+ Or clone and install locally:
44
+
45
+ ```bash
46
+ git clone https://github.com/gtmlayer/enterprise-domain-mapper.git
47
+ cd enterprise-domain-mapper
48
+ pip install -e .
49
+ ```
50
+
51
+ ### Single company lookup
52
+
53
+ ```bash
54
+ domain-mapper "Boeing"
55
+ ```
56
+
57
+ ### Batch mode (CSV input)
58
+
59
+ ```bash
60
+ domain-mapper accounts.csv --output results.csv
61
+ ```
62
+
63
+ Your input CSV just needs a column with company names. The tool auto-detects columns named `company_name`, `company`, `name`, or `account`. If you have a domain column (`domain`, `website`, `url`), it'll use that as the parent domain for TLD guessing.
64
+
65
+ ### With DNS verification
66
+
67
+ ```bash
68
+ domain-mapper accounts.csv --output results.csv --verify-dns
69
+ ```
70
+
71
+ This checks whether guessed domains actually have mail infrastructure (MX records) or at minimum resolve (A records). Adds a few seconds per company but filters out the noise.
72
+
73
+ ## What it does
74
+
75
+ The tool combines three data sources and a verification layer to build comprehensive domain maps:
76
+
77
+ ### 1. SEC EDGAR Exhibit 21 scraper
78
+
79
+ For US-listed companies, SEC filings include Exhibit 21: a legally required list of all subsidiaries. The tool looks up the company's CIK, finds the latest 10-K filing, and parses the subsidiary list with jurisdictions.
80
+
81
+ This is the highest-quality source - it's legally mandated disclosure, so it's comprehensive and current.
82
+
83
+ ### 2. Wikipedia corporate structure parser
84
+
85
+ For non-US companies (or supplementary data), the tool searches Wikipedia for the company page and extracts subsidiary and acquisition data from infoboxes and structured sections.
86
+
87
+ Covers companies globally, though data depth varies by how well-maintained the Wikipedia page is.
88
+
89
+ ### 3. TLD pattern generator
90
+
91
+ Once subsidiaries are identified with their jurisdictions, the tool generates likely domain patterns. A subsidiary in Italy with parent domain `nttdata.com` produces guesses like `nttdata.it`. Covers 70+ countries with their standard corporate TLD patterns (e.g. UK produces `co.uk` and `.uk`, Japan produces `co.jp` and `.jp`).
92
+
93
+ ### 4. DNS verification (optional)
94
+
95
+ MX record lookup with A record fallback to confirm guessed domains actually resolve. MX records are the strongest signal - if a domain has mail infrastructure, it's real. A records confirm the domain exists even without mail setup.
96
+
97
+ ## Output format
98
+
99
+ ### Detailed output (default)
100
+
101
+ Nine columns, one row per subsidiary-domain pair:
102
+
103
+ | Column | Description |
104
+ |--------|-------------|
105
+ | `parent_company` | The company you looked up |
106
+ | `parent_domain` | Known parent domain |
107
+ | `subsidiary_name` | Name of the subsidiary or entity |
108
+ | `subsidiary_type` | Subsidiary, acquisition, division, etc. |
109
+ | `jurisdiction` | Country or region |
110
+ | `domain` | Confirmed or guessed domain |
111
+ | `domain_source` | Where it came from (SEC EDGAR, Wikipedia, TLD guess) |
112
+ | `dns_verified` | Whether DNS verification passed |
113
+ | `confidence` | High (confirmed), Medium (guessed + verified), Low (guessed only) |
114
+
115
+ ### Clay import format
116
+
117
+ One row per company with domains consolidated into a single field, ready for direct import into Clay as a data source.
118
+
119
+ ```bash
120
+ domain-mapper accounts.csv --output results.csv --format clay
121
+ ```
122
+
123
+ ## Importing into Clay
124
+
125
+ 1. Run the tool with `--format clay` to get the Clay-optimised output
126
+ 2. In Clay, create a new table or add to an existing one
127
+ 3. Import the CSV - the columns map directly to Clay's expected format
128
+ 4. Use the domain list column with Clay's enrichment tools to find contacts across all mapped domains
129
+
130
+ This is the workflow that sparked the whole tool. We were manually building domain maps for a client's enterprise accounts and realised the process was repeatable enough to automate.
131
+
132
+ ## Example
133
+
134
+ The `examples/` directory contains `input_sample.csv` with five test companies to get you started:
135
+
136
+ ```bash
137
+ domain-mapper examples/input_sample.csv --output examples/results.csv --verify-dns
138
+ ```
139
+
140
+ ## Contributing
141
+
142
+ Want to add a new data source? The architecture makes it straightforward:
143
+
144
+ 1. Create a new module in `src/domain_mapper/sources/`
145
+ 2. Implement a class with a `get_subsidiaries(company_name)` method that returns a list of subsidiary objects
146
+ 3. Add it to the orchestrator in `mapper.py`
147
+ 4. Write tests in `tests/`
148
+
149
+ Some data sources we'd love to see contributed:
150
+
151
+ - Companies House (UK company registry)
152
+ - OpenCorporates API
153
+ - Crunchbase (acquisitions data)
154
+ - D&B corporate hierarchies
155
+
156
+ Pull requests welcome. Run `ruff check` and `black` before submitting, and make sure `pytest` passes.
157
+
158
+ ## Tech stack
159
+
160
+ - Python 3.10+
161
+ - `requests` and `beautifulsoup4` for web scraping
162
+ - `click` for the CLI
163
+ - `rich` for terminal output
164
+ - No paid APIs, no API keys required
165
+
166
+ ## Built by GTM Layer
167
+
168
+ [GTM Layer](https://gtmlayer.com) builds revenue systems for B2B sales teams. We work with companies on CRM architecture, enrichment pipelines, signal-driven outbound, and everything in between.
169
+
170
+ This tool came out of real client work - we built it to solve a problem we kept hitting on enterprise ABM engagements. If you're running into similar challenges, [get in touch](https://gtmlayer.com).
171
+
172
+ ## Licence
173
+
174
+ MIT - use it however you want.
@@ -0,0 +1,65 @@
1
+ [build-system]
2
+ requires = ["setuptools>=68.0", "wheel"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "enterprise-domain-mapper"
7
+ version = "0.1.0"
8
+ description = "Map enterprise corporate structures to enrichable domains"
9
+ readme = "README.md"
10
+ license = {text = "MIT"}
11
+ requires-python = ">=3.10"
12
+ authors = [
13
+ {name = "GTM Layer", email = "hello@gtmlayer.com"},
14
+ ]
15
+ keywords = ["enterprise", "enrichment", "abm", "domains", "subsidiaries", "sales"]
16
+ classifiers = [
17
+ "Development Status :: 4 - Beta",
18
+ "Intended Audience :: Developers",
19
+ "License :: OSI Approved :: MIT License",
20
+ "Programming Language :: Python :: 3",
21
+ "Programming Language :: Python :: 3.10",
22
+ "Programming Language :: Python :: 3.11",
23
+ "Programming Language :: Python :: 3.12",
24
+ "Topic :: Office/Business",
25
+ ]
26
+ dependencies = [
27
+ "requests>=2.31.0",
28
+ "beautifulsoup4>=4.12.0",
29
+ "click>=8.1.0",
30
+ "rich>=13.0.0",
31
+ ]
32
+
33
+ [project.optional-dependencies]
34
+ dev = [
35
+ "pytest>=7.0",
36
+ "pytest-cov>=4.0",
37
+ "ruff>=0.1.0",
38
+ "black>=23.0",
39
+ "responses>=0.23.0",
40
+ ]
41
+
42
+ [project.scripts]
43
+ domain-mapper = "domain_mapper.cli:main"
44
+
45
+ [project.urls]
46
+ Homepage = "https://github.com/gtmlayer/enterprise-domain-mapper"
47
+ Repository = "https://github.com/gtmlayer/enterprise-domain-mapper"
48
+ Issues = "https://github.com/gtmlayer/enterprise-domain-mapper/issues"
49
+
50
+ [tool.setuptools.packages.find]
51
+ where = ["src"]
52
+
53
+ [tool.ruff]
54
+ line-length = 100
55
+ target-version = "py310"
56
+
57
+ [tool.ruff.lint]
58
+ select = ["E", "F", "I", "N", "W"]
59
+
60
+ [tool.black]
61
+ line-length = 100
62
+ target-version = ["py310"]
63
+
64
+ [tool.pytest.ini_options]
65
+ testpaths = ["tests"]
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,3 @@
1
+ """Enterprise Domain Mapper - Map corporate structures to enrichable domains."""
2
+
3
+ __version__ = "0.1.0"
@@ -0,0 +1,192 @@
1
+ """CLI interface for the Enterprise Domain Mapper."""
2
+
3
+ import csv
4
+ import logging
5
+ import sys
6
+ from pathlib import Path
7
+
8
+ import click
9
+ from rich.console import Console
10
+ from rich.progress import Progress, SpinnerColumn, TextColumn
11
+ from rich.tree import Tree
12
+
13
+ from domain_mapper.mapper import DomainMapper
14
+ from domain_mapper.models import CompanyResult
15
+ from domain_mapper.output import write_clay_csv, write_detailed_csv
16
+
17
+ console = Console()
18
+
19
+ # Column name detection for CSV inputs
20
+ COMPANY_COLUMNS = ["company_name", "company", "name", "account", "account_name", "organization"]
21
+ DOMAIN_COLUMNS = ["domain", "website", "url", "parent_domain", "company_domain"]
22
+
23
+
24
+ def _detect_columns(headers: list[str]) -> tuple[str | None, str | None]:
25
+ """Auto-detect company name and domain columns from CSV headers."""
26
+ headers_lower = [h.lower().strip() for h in headers]
27
+
28
+ company_col = None
29
+ for candidate in COMPANY_COLUMNS:
30
+ if candidate in headers_lower:
31
+ company_col = headers[headers_lower.index(candidate)]
32
+ break
33
+
34
+ domain_col = None
35
+ for candidate in DOMAIN_COLUMNS:
36
+ if candidate in headers_lower:
37
+ domain_col = headers[headers_lower.index(candidate)]
38
+ break
39
+
40
+ return company_col, domain_col
41
+
42
+
43
+ def _print_tree(result: CompanyResult) -> None:
44
+ """Print a rich tree representation of the mapping results."""
45
+ tree = Tree(f"[bold]{result.company_name}[/bold]")
46
+
47
+ for domain in result.domains:
48
+ icon = "[green]✓[/green]" if domain.dns_verified else "[dim]·[/dim]"
49
+ source_tag = f"[dim][{domain.domain_source}][/dim]"
50
+
51
+ if domain.dns_verified:
52
+ source_tag += " [green]✓ DNS verified[/green]"
53
+
54
+ tree.add(
55
+ f"{icon} [cyan]{domain.subsidiary_name:<30}[/cyan] "
56
+ f"{domain.domain:<25} {source_tag}"
57
+ )
58
+
59
+ console.print(tree)
60
+
61
+ # Summary line
62
+ total = len(result.domains)
63
+ confirmed = len(result.confirmed_domains)
64
+ guessed = len(result.guessed_domains)
65
+ verified = len(result.verified_domains)
66
+ console.print(
67
+ f"\n[dim]Found {len(result.subsidiaries)} subsidiaries, "
68
+ f"{total} domains ({confirmed} confirmed, {guessed} guessed, "
69
+ f"{verified} DNS verified)[/dim]"
70
+ )
71
+
72
+
73
+ @click.command()
74
+ @click.argument("input", type=str)
75
+ @click.option("--output", "-o", type=click.Path(), help="Output CSV file path")
76
+ @click.option(
77
+ "--format",
78
+ "-f",
79
+ "output_format",
80
+ type=click.Choice(["detailed", "clay"]),
81
+ default="detailed",
82
+ help="Output format: detailed (one row per domain) or clay (one row per company)",
83
+ )
84
+ @click.option("--verify-dns", is_flag=True, help="Verify guessed domains via DNS lookups")
85
+ @click.option("--verbose", "-v", is_flag=True, help="Enable verbose logging")
86
+ def main(input: str, output: str | None, output_format: str, verify_dns: bool, verbose: bool):
87
+ """Map enterprise companies to their subsidiary domains.
88
+
89
+ INPUT can be a company name (e.g. "Boeing") or a CSV file path.
90
+ """
91
+ if verbose:
92
+ logging.basicConfig(level=logging.INFO, format="%(name)s: %(message)s")
93
+ else:
94
+ logging.basicConfig(level=logging.WARNING)
95
+
96
+ mapper = DomainMapper(verify_dns=verify_dns)
97
+ results: list[CompanyResult] = []
98
+
99
+ input_path = Path(input)
100
+ if input_path.exists() and input_path.suffix.lower() == ".csv":
101
+ # Batch mode: CSV input
102
+ results = _process_csv(mapper, input_path, verify_dns)
103
+ else:
104
+ # Single company mode
105
+ console.print(f"\n[bold]Mapping domains for:[/bold] {input}\n")
106
+ with Progress(
107
+ SpinnerColumn(),
108
+ TextColumn("[progress.description]{task.description}"),
109
+ console=console,
110
+ ) as progress:
111
+ task = progress.add_task("Searching SEC EDGAR, Wikipedia, generating TLDs...", total=None)
112
+ result = mapper.map_company(input)
113
+ progress.update(task, completed=True)
114
+
115
+ results = [result]
116
+ _print_tree(result)
117
+
118
+ # Write output file
119
+ if output:
120
+ output_path = Path(output)
121
+ with open(output_path, "w", newline="", encoding="utf-8") as f:
122
+ if output_format == "clay":
123
+ write_clay_csv(results, f)
124
+ else:
125
+ write_detailed_csv(results, f)
126
+ console.print(f"\n[green]✓[/green] Results written to {output_path}")
127
+ total_domains = sum(len(r.domains) for r in results)
128
+ console.print(f"[dim] {len(results)} companies, {total_domains} domains[/dim]")
129
+
130
+
131
+ def _process_csv(mapper: DomainMapper, csv_path: Path, verify_dns: bool) -> list[CompanyResult]:
132
+ """Process a CSV file of companies."""
133
+ results = []
134
+
135
+ with open(csv_path, newline="", encoding="utf-8") as f:
136
+ reader = csv.DictReader(f)
137
+ if not reader.fieldnames:
138
+ console.print("[red]Error: CSV file has no headers[/red]")
139
+ sys.exit(1)
140
+
141
+ company_col, domain_col = _detect_columns(list(reader.fieldnames))
142
+ if not company_col:
143
+ console.print(
144
+ f"[red]Error: Could not detect company name column. "
145
+ f"Expected one of: {', '.join(COMPANY_COLUMNS)}[/red]"
146
+ )
147
+ sys.exit(1)
148
+
149
+ rows = list(reader)
150
+
151
+ console.print(f"\n[bold]Processing {len(rows)} companies from {csv_path.name}[/bold]")
152
+ if domain_col:
153
+ console.print(f"[dim]Using '{company_col}' for names, '{domain_col}' for parent domains[/dim]")
154
+ else:
155
+ console.print(f"[dim]Using '{company_col}' for names (no domain column detected)[/dim]")
156
+
157
+ with Progress(
158
+ SpinnerColumn(),
159
+ TextColumn("[progress.description]{task.description}"),
160
+ console=console,
161
+ ) as progress:
162
+ for i, row in enumerate(rows):
163
+ company_name = row.get(company_col, "").strip()
164
+ parent_domain = row.get(domain_col, "").strip() if domain_col else ""
165
+
166
+ if not company_name:
167
+ continue
168
+
169
+ task = progress.add_task(
170
+ f"[{i + 1}/{len(rows)}] {company_name}...", total=None
171
+ )
172
+ result = mapper.map_company(company_name, parent_domain)
173
+ results.append(result)
174
+ progress.update(task, completed=True, description=f"[{i + 1}/{len(rows)}] {company_name}: {len(result.domains)} domains")
175
+
176
+ # Print summary
177
+ total_subs = sum(len(r.subsidiaries) for r in results)
178
+ total_domains = sum(len(r.domains) for r in results)
179
+ total_verified = sum(len(r.verified_domains) for r in results)
180
+
181
+ console.print(f"\n[bold]Summary[/bold]")
182
+ console.print(f" Companies processed: {len(results)}")
183
+ console.print(f" Total subsidiaries found: {total_subs}")
184
+ console.print(f" Total domains mapped: {total_domains}")
185
+ if verify_dns:
186
+ console.print(f" DNS verified: {total_verified}")
187
+
188
+ return results
189
+
190
+
191
+ if __name__ == "__main__":
192
+ main()