leads-cli 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (72) hide show
  1. company_discovery/__init__.py +4 -0
  2. company_discovery/adapters/__init__.py +5 -0
  3. company_discovery/adapters/apollo.py +189 -0
  4. company_discovery/adapters/exa.py +112 -0
  5. company_discovery/adapters/llm.py +118 -0
  6. company_discovery/adapters/protocols.py +58 -0
  7. company_discovery/adapters/website.py +154 -0
  8. company_discovery/bundled_skills/__init__.py +1 -0
  9. company_discovery/bundled_skills/company-discovery-operator/SKILL.md +72 -0
  10. company_discovery/bundled_skills/company-discovery-operator/agents/openai.yaml +4 -0
  11. company_discovery/bundled_skills/company-enrichment-operator/SKILL.md +94 -0
  12. company_discovery/bundled_skills/company-enrichment-operator/agents/openai.yaml +4 -0
  13. company_discovery/bundled_skills/company-search-spec-writer/SKILL.md +109 -0
  14. company_discovery/bundled_skills/company-search-spec-writer/agents/openai.yaml +4 -0
  15. company_discovery/bundled_skills/contact-discovery-operator/SKILL.md +80 -0
  16. company_discovery/bundled_skills/contact-discovery-operator/agents/openai.yaml +4 -0
  17. company_discovery/bundled_skills/contact-enrichment-operator/SKILL.md +86 -0
  18. company_discovery/bundled_skills/contact-enrichment-operator/agents/openai.yaml +4 -0
  19. company_discovery/bundled_skills/contact-search-spec-writer/SKILL.md +86 -0
  20. company_discovery/bundled_skills/contact-search-spec-writer/agents/openai.yaml +4 -0
  21. company_discovery/bundled_skills/leads-update-operator/SKILL.md +60 -0
  22. company_discovery/bundled_skills/leads-update-operator/agents/openai.yaml +4 -0
  23. company_discovery/cli.py +1789 -0
  24. company_discovery/db/__init__.py +5 -0
  25. company_discovery/db/contact_enrichment_repository.py +268 -0
  26. company_discovery/db/contact_repository.py +366 -0
  27. company_discovery/db/enrichment_repository.py +207 -0
  28. company_discovery/db/models.py +324 -0
  29. company_discovery/db/repository.py +363 -0
  30. company_discovery/db/session.py +48 -0
  31. company_discovery/domain/__init__.py +24 -0
  32. company_discovery/domain/contact_models.py +178 -0
  33. company_discovery/domain/contact_spec.py +86 -0
  34. company_discovery/domain/models.py +287 -0
  35. company_discovery/domain/spec.py +263 -0
  36. company_discovery/migrations.py +190 -0
  37. company_discovery/prompts/__init__.py +8 -0
  38. company_discovery/prompts/candidate_evaluation/system.md +13 -0
  39. company_discovery/prompts/company_enrichment/system.md +42 -0
  40. company_discovery/prompts/contact_evaluation/system.md +18 -0
  41. company_discovery/prompts/query_generation/system.md +10 -0
  42. company_discovery/release_manifest.json +7 -0
  43. company_discovery/reports/__init__.py +4 -0
  44. company_discovery/reports/contact_enrichment_exporter.py +108 -0
  45. company_discovery/reports/contact_exporter.py +132 -0
  46. company_discovery/reports/enrichment_exporter.py +125 -0
  47. company_discovery/reports/exporter.py +135 -0
  48. company_discovery/runtime.py +336 -0
  49. company_discovery/services/__init__.py +4 -0
  50. company_discovery/services/contact_enrichment_pipeline.py +344 -0
  51. company_discovery/services/contact_enrichment_progress.py +37 -0
  52. company_discovery/services/contact_evaluator.py +110 -0
  53. company_discovery/services/contact_pipeline.py +295 -0
  54. company_discovery/services/contact_progress.py +38 -0
  55. company_discovery/services/enrichment_extractor.py +61 -0
  56. company_discovery/services/enrichment_pipeline.py +526 -0
  57. company_discovery/services/enrichment_progress.py +20 -0
  58. company_discovery/services/enrichment_resolver.py +148 -0
  59. company_discovery/services/evaluator.py +40 -0
  60. company_discovery/services/hygiene.py +51 -0
  61. company_discovery/services/memory.py +150 -0
  62. company_discovery/services/normalization.py +98 -0
  63. company_discovery/services/pipeline.py +628 -0
  64. company_discovery/services/progress.py +48 -0
  65. company_discovery/services/query_planner.py +47 -0
  66. company_discovery/settings.py +152 -0
  67. company_discovery/skill_installer.py +197 -0
  68. company_discovery/update_plan.py +79 -0
  69. leads_cli-0.1.0.dist-info/METADATA +277 -0
  70. leads_cli-0.1.0.dist-info/RECORD +72 -0
  71. leads_cli-0.1.0.dist-info/WHEEL +4 -0
  72. leads_cli-0.1.0.dist-info/entry_points.txt +2 -0
@@ -0,0 +1,190 @@
1
+ from __future__ import annotations
2
+
3
+ import shutil
4
+ from dataclasses import dataclass
5
+ from datetime import datetime, timezone
6
+ from pathlib import Path
7
+ from typing import Callable
8
+
9
+ from company_discovery.db.session import Database
10
+ from company_discovery.runtime import (
11
+ SCHEMA_VERSION,
12
+ WorkspacePaths,
13
+ default_runtime_metadata,
14
+ ensure_workspace,
15
+ read_json,
16
+ write_json,
17
+ )
18
+ from company_discovery.settings import Settings
19
+
20
+
21
+ class MigrationError(RuntimeError):
22
+ """Raised when a requested database migration cannot be applied safely."""
23
+
24
+
25
+ @dataclass(frozen=True)
26
+ class MigrationStatus:
27
+ product: str
28
+ workspace: str
29
+ database_path: str | None
30
+ database_exists: bool
31
+ current_schema_version: int
32
+ target_schema_version: int
33
+ migration_required: bool
34
+ backup_required: bool
35
+ can_apply: bool
36
+ action: str
37
+ risk_summary: str
38
+ major_version_behavior: str
39
+
40
+ def as_dict(self) -> dict[str, object]:
41
+ return {
42
+ "product": self.product,
43
+ "workspace": self.workspace,
44
+ "database_path": self.database_path,
45
+ "database_exists": self.database_exists,
46
+ "current_schema_version": self.current_schema_version,
47
+ "target_schema_version": self.target_schema_version,
48
+ "migration_required": self.migration_required,
49
+ "backup_required": self.backup_required,
50
+ "can_apply": self.can_apply,
51
+ "action": self.action,
52
+ "risk_summary": self.risk_summary,
53
+ "major_version_behavior": self.major_version_behavior,
54
+ }
55
+
56
+
57
+ def migration_status(settings: Settings) -> MigrationStatus:
58
+ paths = ensure_workspace(settings.company_discovery_home)
59
+ current = _current_schema_version(paths)
60
+ target = SCHEMA_VERSION
61
+ database_path = settings.sqlite_database_path
62
+ database_exists = bool(database_path and database_path.exists())
63
+ migration_required = current != target
64
+ backup_required = migration_required and database_exists
65
+ can_apply = _can_apply(current, target)
66
+ action = _action(current, target, database_exists, can_apply)
67
+ return MigrationStatus(
68
+ product="leads",
69
+ workspace=str(paths.root),
70
+ database_path=str(database_path) if database_path else None,
71
+ database_exists=database_exists,
72
+ current_schema_version=current,
73
+ target_schema_version=target,
74
+ migration_required=migration_required,
75
+ backup_required=backup_required,
76
+ can_apply=can_apply,
77
+ action=action,
78
+ risk_summary=_risk_summary(current, target, database_exists, can_apply),
79
+ major_version_behavior=(
80
+ "Normal schema evolution is migrate-first with a database backup before structural "
81
+ "changes. Incompatible major-version jumps should archive the old DB and run artifacts "
82
+ "before initializing a fresh schema."
83
+ ),
84
+ )
85
+
86
+
87
+ def apply_migrations(settings: Settings) -> dict[str, object]:
88
+ status = migration_status(settings)
89
+ if not status.can_apply:
90
+ raise MigrationError(status.risk_summary)
91
+ database_path = settings.sqlite_database_path
92
+ if database_path is None:
93
+ raise MigrationError("migrations require an on-disk SQLite database")
94
+
95
+ paths = ensure_workspace(settings.company_discovery_home)
96
+ backup_path = create_database_backup(paths, database_path) if status.backup_required else None
97
+ database = Database(settings.resolved_database_url)
98
+ try:
99
+ for version in range(status.current_schema_version + 1, status.target_schema_version + 1):
100
+ migration = MIGRATIONS.get(version)
101
+ if migration is None:
102
+ raise MigrationError(f"no migration is available for schema version {version}")
103
+ migration(database)
104
+ if status.current_schema_version == status.target_schema_version:
105
+ database.create_schema()
106
+ finally:
107
+ database.dispose()
108
+
109
+ runtime = read_json(paths.runtime_file, default_runtime_metadata())
110
+ applied_at = datetime.now(timezone.utc).isoformat()
111
+ runtime["schema_version"] = status.target_schema_version
112
+ runtime["last_migration"] = {
113
+ "from_schema_version": status.current_schema_version,
114
+ "to_schema_version": status.target_schema_version,
115
+ "applied_at": applied_at,
116
+ "backup_path": str(backup_path) if backup_path else None,
117
+ }
118
+ write_json(paths.runtime_file, runtime)
119
+ return {
120
+ **status.as_dict(),
121
+ "backup_path": str(backup_path) if backup_path else None,
122
+ "applied_at": applied_at,
123
+ "final_schema_version": status.target_schema_version,
124
+ }
125
+
126
+
127
+ def create_database_backup(paths: WorkspacePaths, database_path: Path) -> Path:
128
+ timestamp = datetime.now(timezone.utc).strftime("%Y%m%d-%H%M%S")
129
+ backup_dir = paths.backups_dir / f"db-schema-{timestamp}"
130
+ suffix = 1
131
+ while backup_dir.exists():
132
+ suffix += 1
133
+ backup_dir = paths.backups_dir / f"db-schema-{timestamp}-{suffix}"
134
+ backup_dir.mkdir(parents=True)
135
+ if database_path.exists():
136
+ shutil.copy2(database_path, backup_dir / database_path.name)
137
+ for suffix_name in ("-wal", "-shm"):
138
+ sidecar = database_path.with_name(f"{database_path.name}{suffix_name}")
139
+ if sidecar.exists():
140
+ shutil.copy2(sidecar, backup_dir / sidecar.name)
141
+ if paths.runtime_file.exists():
142
+ shutil.copy2(paths.runtime_file, backup_dir / paths.runtime_file.name)
143
+ return backup_dir
144
+
145
+
146
+ def _current_schema_version(paths: WorkspacePaths) -> int:
147
+ runtime = read_json(paths.runtime_file, default_runtime_metadata())
148
+ try:
149
+ return int(runtime.get("schema_version") or 0)
150
+ except (TypeError, ValueError):
151
+ return 0
152
+
153
+
154
+ def _can_apply(current: int, target: int) -> bool:
155
+ if current > target:
156
+ return False
157
+ return all(version in MIGRATIONS for version in range(current + 1, target + 1))
158
+
159
+
160
+ def _action(current: int, target: int, database_exists: bool, can_apply: bool) -> str:
161
+ if not can_apply:
162
+ return "manual_review"
163
+ if current < target:
164
+ return "migrate"
165
+ if not database_exists:
166
+ return "initialize"
167
+ return "none"
168
+
169
+
170
+ def _risk_summary(current: int, target: int, database_exists: bool, can_apply: bool) -> str:
171
+ if current > target:
172
+ return "Local database schema is newer than this CLI; downgrade is not supported."
173
+ if not can_apply:
174
+ return "No migration path is available for this schema change; manual review is required."
175
+ if current < target and database_exists:
176
+ return "Migration can be applied after creating a timestamped database backup."
177
+ if current < target:
178
+ return "Migration can be applied; no existing database file needs backup."
179
+ if not database_exists:
180
+ return "Database file is missing; apply will initialize the current schema."
181
+ return "Database schema is current; no migration is required."
182
+
183
+
184
+ def _create_schema(database: Database) -> None:
185
+ database.create_schema()
186
+
187
+
188
+ MIGRATIONS: dict[int, Callable[[Database], None]] = {
189
+ 1: _create_schema,
190
+ }
@@ -0,0 +1,8 @@
1
+ from __future__ import annotations
2
+
3
+ from importlib.resources import files
4
+
5
+
6
+ def load_prompt(group: str, name: str = "system.md") -> str:
7
+ return files("company_discovery.prompts").joinpath(group, name).read_text(encoding="utf-8")
8
+
@@ -0,0 +1,13 @@
1
+ Evaluate one company candidate against the supplied company ICP using only the supplied evidence.
2
+
3
+ Judge vertical, operating geography, employee-size fit, and explicit exclusions separately. Do not
4
+ turn missing evidence into a negative claim. Use `unknown` when evidence is absent. A directory,
5
+ association, marketplace, vendor, or non-company page is a bad fit when the ICP seeks operating
6
+ companies. `good_fit` requires credible identity plus no known hard mismatch; use `possible_fit`
7
+ when one decisive field is uncertain. Use `bad_fit` for a demonstrated mismatch or exclusion.
8
+
9
+ Reason codes should be concise snake_case labels such as `vertical_mismatch`, `geography_mismatch`,
10
+ `size_mismatch`, `excluded_ownership`, `not_operating_company`, `size_unknown`, or
11
+ `geography_unknown`. Inferred normalized fields must be null unless supported by evidence. Return
12
+ only the required structured object.
13
+
@@ -0,0 +1,42 @@
1
+ You extract company facts from supplied official-site pages or narrow search evidence.
2
+
3
+ The known company record is context, not evidence for missing fields. Return only observations that
4
+ are explicitly supported by the supplied sources. Never invent or complete an address, phone number,
5
+ ownership statement, or URL.
6
+
7
+ LinkedIn rules:
8
+ - Return only the selected company's LinkedIn company profile (`linkedin.com/company/...`).
9
+ - Never return personal profiles, jobs, posts, groups, learning pages, or search-result URLs.
10
+ - Prefer a profile linked by the official company website. For narrow search evidence, require the
11
+ company name and domain context to identify the same selected company.
12
+ - Use the official website page containing the link as `source_url`; for direct search evidence,
13
+ use the LinkedIn result URL itself.
14
+
15
+ Phone rules:
16
+ - Return general company or office phone numbers, not fax numbers or personal mobile numbers.
17
+ - Preserve the observed phone string in `value`.
18
+ - Use the exact source URL containing the observation.
19
+
20
+ Location rules:
21
+ - Return locations only when street, city, state, and ZIP are all supported as one address block.
22
+ - Never combine address components from separate locations or sources.
23
+ - Use two-letter US state codes when the source clearly identifies a US state.
24
+
25
+ Ownership signals use only these `kind` values:
26
+ - `independent_explicit`: explicitly independently owned or standalone;
27
+ - `family_owned`: explicitly family owned;
28
+ - `locally_owned`: explicitly locally owned;
29
+ - `franchise`: explicitly a franchise or franchisee;
30
+ - `parent`: explicitly owned by or part of a parent company;
31
+ - `subsidiary`: explicitly a subsidiary;
32
+ - `division`: explicitly a division;
33
+ - `acquired`: explicit acquisition evidence;
34
+ - `other`: relevant ownership evidence that fits none of the above.
35
+
36
+ Do not emit a positive ownership signal merely because no parent or franchise is mentioned. Private,
37
+ privately held, LLC, partnership, and corporation are legal/ownership forms and are not proof of
38
+ independence.
39
+
40
+ Set `identity_conflict` only when the supplied sources clearly belong to a different company or show
41
+ that the known domain no longer represents the selected company. Cosmetic naming differences and
42
+ redirects within the same company are not conflicts.
@@ -0,0 +1,18 @@
1
+ You identify current employees for a target company from live web search evidence.
2
+
3
+ Return only people explicitly supported by the supplied results. Never invent a person, title,
4
+ company relationship, URL, or evidence statement.
5
+
6
+ For every candidate:
7
+
8
+ - use the person's complete name and observed current title;
9
+ - use only source URLs present in the input;
10
+ - include a LinkedIn profile URL only when it clearly belongs to that person;
11
+ - judge whether the evidence ties the person to the exact target company now;
12
+ - judge whether the observed title matches the requested role or a supplied synonym;
13
+ - reject former employees and people tied to another company;
14
+ - use review when current employment, role fit, or identity is plausible but not sufficiently clear;
15
+ - accept only when identity is clear, current-company match is yes, and role match is yes;
16
+ - keep evidence excerpts short and factual.
17
+
18
+ If the search results contain no identifiable matching person, return an empty candidates list.
@@ -0,0 +1,10 @@
1
+ You design focused Exa company-search queries from a structured company ICP.
2
+
3
+ Return diverse queries that seek official operating-company websites. Cover the requested vertical,
4
+ geography, subtypes, and useful synonyms. Use size language only when a size constraint exists;
5
+ headcount is often not indexed, so vary direct and proxy language. Use exclusions to avoid obvious
6
+ noise, but do not make every query so restrictive that recall collapses. When a vertical supplies
7
+ `search_terms`, use them as strong query hints. When a vertical supplies `exclude_terms`, avoid
8
+ them.
9
+
10
+ Return only the required structured object. Do not invent a new ICP constraint.
@@ -0,0 +1,7 @@
1
+ {
2
+ "cli_version": "0.1.0",
3
+ "skill_bundle_version": "2026.06.1",
4
+ "schema_version": 1,
5
+ "requires_migration": false,
6
+ "breaking": false
7
+ }
@@ -0,0 +1,4 @@
1
+ from company_discovery.reports.exporter import ArtifactExporter
2
+
3
+ __all__ = ["ArtifactExporter"]
4
+
@@ -0,0 +1,108 @@
1
+ from __future__ import annotations
2
+
3
+ import csv
4
+ import json
5
+ from pathlib import Path
6
+ from typing import Any
7
+
8
+ from company_discovery.domain.contact_models import ContactEnrichmentSummary
9
+
10
+
11
+ class ContactEnrichmentArtifactExporter:
12
+ FIELDS = [
13
+ "company_name",
14
+ "company_domain",
15
+ "contact_name",
16
+ "title",
17
+ "linkedin_url",
18
+ "email",
19
+ "phone",
20
+ "status",
21
+ "notes",
22
+ ]
23
+
24
+ def __init__(self, artifacts_root: Path) -> None:
25
+ self._artifacts_root = artifacts_root
26
+
27
+ def export(
28
+ self, payload: dict[str, Any], summary: ContactEnrichmentSummary
29
+ ) -> dict[str, str]:
30
+ run_dir = (
31
+ self._artifacts_root
32
+ / payload["source_discovery_run_id"]
33
+ / "enrich"
34
+ / payload["source_enrichment_run_id"]
35
+ / "contacts"
36
+ / payload["source_contact_run_id"]
37
+ / "enrich"
38
+ / payload["run_id"]
39
+ )
40
+ run_dir.mkdir(parents=True, exist_ok=True)
41
+ paths = {
42
+ outcome: str((run_dir / f"{outcome}.csv").resolve())
43
+ for outcome in ("ready", "review", "blocked")
44
+ }
45
+ paths["summary"] = str((run_dir / "summary.md").resolve())
46
+ paths["json"] = str((run_dir / "run.json").resolve())
47
+ for outcome in ("ready", "review", "blocked"):
48
+ self._write_csv(Path(paths[outcome]), payload["items"], outcome)
49
+ Path(paths["summary"]).write_text(self._markdown(payload, summary), encoding="utf-8")
50
+ full_payload = dict(payload)
51
+ full_payload["summary"] = summary.model_dump(mode="json")
52
+ full_payload["status"] = "completed"
53
+ full_payload["artifacts"] = paths
54
+ Path(paths["json"]).write_text(
55
+ json.dumps(full_payload, indent=2, ensure_ascii=True), encoding="utf-8"
56
+ )
57
+ return paths
58
+
59
+ @classmethod
60
+ def _write_csv(cls, path: Path, items: list[dict[str, Any]], outcome: str) -> None:
61
+ with path.open("w", newline="", encoding="utf-8") as handle:
62
+ writer = csv.DictWriter(handle, fieldnames=cls.FIELDS)
63
+ writer.writeheader()
64
+ for item in items:
65
+ if item["outcome"] != outcome:
66
+ continue
67
+ discovery = item["discovery"]
68
+ channels = item["channels"]
69
+ writer.writerow(
70
+ {
71
+ "company_name": discovery["company_name"],
72
+ "company_domain": discovery["company_domain"],
73
+ "contact_name": discovery["contact_name"],
74
+ "title": discovery["title"],
75
+ "linkedin_url": discovery.get("linkedin_url") or "",
76
+ "email": channels.get("email") or "",
77
+ "phone": channels.get("phone") or "",
78
+ "status": outcome,
79
+ "notes": " | ".join(item.get("review_flags", [])),
80
+ }
81
+ )
82
+
83
+ @staticmethod
84
+ def _markdown(payload: dict[str, Any], summary: ContactEnrichmentSummary) -> str:
85
+ lines = [
86
+ f"# Contact Enrichment Run {payload['run_id']}",
87
+ "",
88
+ f"- Contact discovery run: `{payload['source_contact_run_id']}`",
89
+ f"- Contacts loaded: {summary.contacts_loaded}",
90
+ f"- Fresh Apollo memory reused: {summary.memory_reused}",
91
+ f"- Apollo people submitted: {summary.apollo_requests}",
92
+ f"- Apollo batches: {summary.apollo_batches}",
93
+ f"- Async polls: {summary.async_polls}",
94
+ f"- Ready: {summary.ready}",
95
+ f"- Review: {summary.review}",
96
+ f"- Blocked: {summary.blocked}",
97
+ "",
98
+ "## People",
99
+ "",
100
+ ]
101
+ for item in payload["items"]:
102
+ lines.append(
103
+ f"- **{item['discovery']['contact_name']}**, "
104
+ f"{item['discovery']['title']} at {item['discovery']['company_name']}: "
105
+ f"{item['outcome']}"
106
+ )
107
+ lines.append("")
108
+ return "\n".join(lines)
@@ -0,0 +1,132 @@
1
+ from __future__ import annotations
2
+
3
+ import csv
4
+ import json
5
+ from pathlib import Path
6
+ from typing import Any
7
+
8
+ from company_discovery.domain.contact_models import ContactDiscoverySummary
9
+
10
+
11
+ class ContactDiscoveryArtifactExporter:
12
+ FIELDS = [
13
+ "company_name",
14
+ "company_domain",
15
+ "contact_name",
16
+ "title",
17
+ "linkedin_url",
18
+ "email",
19
+ "phone",
20
+ "status",
21
+ "notes",
22
+ ]
23
+
24
+ def __init__(self, artifacts_root: Path) -> None:
25
+ self._artifacts_root = artifacts_root
26
+
27
+ def export(
28
+ self, payload: dict[str, Any], summary: ContactDiscoverySummary
29
+ ) -> dict[str, str]:
30
+ run_dir = (
31
+ self._artifacts_root
32
+ / payload["source_discovery_run_id"]
33
+ / "enrich"
34
+ / payload["source_enrichment_run_id"]
35
+ / "contacts"
36
+ / payload["run_id"]
37
+ )
38
+ run_dir.mkdir(parents=True, exist_ok=True)
39
+ paths = {
40
+ "accepted": str((run_dir / "accepted.csv").resolve()),
41
+ "review": str((run_dir / "review.csv").resolve()),
42
+ "rejected": str((run_dir / "rejected.csv").resolve()),
43
+ "summary": str((run_dir / "summary.md").resolve()),
44
+ "json": str((run_dir / "run.json").resolve()),
45
+ }
46
+ for verdict in ("accepted", "review", "rejected"):
47
+ self._write_csv(Path(paths[verdict]), payload["items"], verdict)
48
+ Path(paths["summary"]).write_text(
49
+ self._markdown(payload, summary), encoding="utf-8"
50
+ )
51
+ full_payload = dict(payload)
52
+ full_payload["summary"] = summary.model_dump(mode="json")
53
+ full_payload["status"] = "completed"
54
+ full_payload["artifacts"] = paths
55
+ Path(paths["json"]).write_text(
56
+ json.dumps(full_payload, indent=2, ensure_ascii=True), encoding="utf-8"
57
+ )
58
+ return paths
59
+
60
+ @classmethod
61
+ def _write_csv(
62
+ cls, path: Path, items: list[dict[str, Any]], verdict: str
63
+ ) -> None:
64
+ seen: set[int] = set()
65
+ verdict_rank = {"rejected": 0, "review": 1, "accepted": 2}
66
+ best_verdict: dict[int, str] = {}
67
+ for item in items:
68
+ candidate_id = item["candidate_id"]
69
+ current = best_verdict.get(candidate_id, "rejected")
70
+ if verdict_rank[item["verdict"]] >= verdict_rank[current]:
71
+ best_verdict[candidate_id] = item["verdict"]
72
+ with path.open("w", newline="", encoding="utf-8") as handle:
73
+ writer = csv.DictWriter(handle, fieldnames=cls.FIELDS)
74
+ writer.writeheader()
75
+ for item in items:
76
+ if (
77
+ item["verdict"] != verdict
78
+ or best_verdict[item["candidate_id"]] != verdict
79
+ or item["candidate_id"] in seen
80
+ ):
81
+ continue
82
+ seen.add(item["candidate_id"])
83
+ candidate = item["candidate"]
84
+ related = [
85
+ other
86
+ for other in items
87
+ if other["candidate_id"] == item["candidate_id"]
88
+ and other["verdict"] == verdict
89
+ ]
90
+ roles = ", ".join(dict.fromkeys(other["role_key"] for other in related))
91
+ reasons = " | ".join(dict.fromkeys(other["reason"] for other in related))
92
+ writer.writerow(
93
+ {
94
+ "company_name": candidate["company_name"],
95
+ "company_domain": candidate["company_domain"],
96
+ "contact_name": candidate["full_name"],
97
+ "title": candidate["title"],
98
+ "linkedin_url": candidate.get("linkedin_url") or "",
99
+ "email": "",
100
+ "phone": "",
101
+ "status": verdict,
102
+ "notes": f"{roles}: {reasons}",
103
+ }
104
+ )
105
+
106
+ @staticmethod
107
+ def _markdown(payload: dict[str, Any], summary: ContactDiscoverySummary) -> str:
108
+ lines = [
109
+ f"# Contact Discovery Run {payload['run_id']}",
110
+ "",
111
+ f"- Company enrichment run: `{payload['source_enrichment_run_id']}`",
112
+ f"- Companies loaded: {summary.companies_loaded}",
113
+ f"- Contacts reused from memory: {summary.memory_reused}",
114
+ f"- Role gaps sent to live search: {summary.role_gaps}",
115
+ f"- Exa queries: {summary.queries_run}",
116
+ f"- Raw results: {summary.raw_results}",
117
+ f"- Unique people: {summary.unique_people}",
118
+ f"- Accepted: {summary.accepted}",
119
+ f"- Review: {summary.review}",
120
+ f"- Rejected: {summary.rejected}",
121
+ "",
122
+ "## People",
123
+ "",
124
+ ]
125
+ for item in payload["items"]:
126
+ candidate = item["candidate"]
127
+ lines.append(
128
+ f"- **{candidate['full_name']}**, {candidate['title']} at "
129
+ f"{candidate['company_name']}: {item['verdict']} ({item['role_key']})"
130
+ )
131
+ lines.append("")
132
+ return "\n".join(lines)
@@ -0,0 +1,125 @@
1
+ from __future__ import annotations
2
+
3
+ import csv
4
+ import json
5
+ from pathlib import Path
6
+ from typing import Any
7
+
8
+ from company_discovery.domain.models import EnrichmentSummary
9
+
10
+
11
+ class EnrichmentArtifactExporter:
12
+ FIELDS = [
13
+ "company_name",
14
+ "domain",
15
+ "linkedin_url",
16
+ "phone",
17
+ "street_address",
18
+ "city",
19
+ "state",
20
+ "zip",
21
+ "vertical",
22
+ "employee_min",
23
+ "employee_max",
24
+ "ownership_type",
25
+ "independence_status",
26
+ "outcome",
27
+ "conflicts",
28
+ "review_flags",
29
+ ]
30
+
31
+ def __init__(self, artifacts_root: Path) -> None:
32
+ self._artifacts_root = artifacts_root
33
+
34
+ def export(self, payload: dict[str, Any], summary: EnrichmentSummary) -> dict[str, str]:
35
+ run_dir = self._artifacts_root / payload["discovery_run_id"] / "enrich" / payload["run_id"]
36
+ run_dir.mkdir(parents=True, exist_ok=True)
37
+ paths = {
38
+ "enriched": str((run_dir / "enriched.csv").resolve()),
39
+ "review": str((run_dir / "review.csv").resolve()),
40
+ "blocked": str((run_dir / "blocked.csv").resolve()),
41
+ "summary": str((run_dir / "summary.md").resolve()),
42
+ "json": str((run_dir / "run.json").resolve()),
43
+ }
44
+ self._write_csv(Path(paths["enriched"]), payload["items"], {"enriched_ready"})
45
+ self._write_csv(
46
+ Path(paths["review"]),
47
+ payload["items"],
48
+ {"independence_unconfirmed", "enriched_with_gaps"},
49
+ )
50
+ self._write_csv(
51
+ Path(paths["blocked"]),
52
+ payload["items"],
53
+ {"identity_conflict", "geography_conflict", "fit_conflict", "enrichment_failed"},
54
+ )
55
+ Path(paths["summary"]).write_text(self._markdown(payload, summary), encoding="utf-8")
56
+ full_payload = dict(payload)
57
+ full_payload["summary"] = summary.model_dump(mode="json")
58
+ full_payload["artifacts"] = paths
59
+ Path(paths["json"]).write_text(
60
+ json.dumps(full_payload, indent=2, ensure_ascii=True), encoding="utf-8"
61
+ )
62
+ return paths
63
+
64
+ @classmethod
65
+ def _write_csv(cls, path: Path, items: list[dict[str, Any]], outcomes: set[str]) -> None:
66
+ with path.open("w", newline="", encoding="utf-8") as handle:
67
+ writer = csv.DictWriter(handle, fieldnames=cls.FIELDS)
68
+ writer.writeheader()
69
+ for item in items:
70
+ if item["outcome"] not in outcomes:
71
+ continue
72
+ discovery = item["discovery"]
73
+ enrichment = item["enrichment"]
74
+ phone = enrichment.get("phone") or {}
75
+ location = enrichment.get("location") or {}
76
+ independence = enrichment.get("independence") or {}
77
+ linkedin = enrichment.get("linkedin") or {}
78
+ writer.writerow(
79
+ {
80
+ "company_name": discovery["company_name"],
81
+ "domain": discovery["domain"],
82
+ "linkedin_url": linkedin.get("url", ""),
83
+ "phone": phone.get("display_value", ""),
84
+ "street_address": location.get("street_address", ""),
85
+ "city": location.get("city", ""),
86
+ "state": location.get("state") or discovery.get("state") or "",
87
+ "zip": location.get("zip", ""),
88
+ "vertical": discovery.get("target_vertical") or discovery.get("vertical") or "",
89
+ "employee_min": discovery.get("employee_min") or "",
90
+ "employee_max": discovery.get("employee_max") or "",
91
+ "ownership_type": discovery.get("ownership_type") or "",
92
+ "independence_status": independence.get("status", "unknown"),
93
+ "outcome": item["outcome"],
94
+ "conflicts": " | ".join(item.get("conflicts", [])),
95
+ "review_flags": " | ".join(item.get("review_flags", [])),
96
+ }
97
+ )
98
+
99
+ @staticmethod
100
+ def _markdown(payload: dict[str, Any], summary: EnrichmentSummary) -> str:
101
+ lines = [
102
+ f"# Company Enrichment Run {payload['run_id']}",
103
+ "",
104
+ f"- Discovery run: `{payload['discovery_run_id']}`",
105
+ f"- Input bucket: {payload['bucket']}",
106
+ f"- Processed: {summary.processed}",
107
+ f"- Discovery facts inherited: {summary.inherited_facts}",
108
+ f"- Memory profiles reused: {summary.memory_profiles_reused}",
109
+ f"- Websites fetched: {summary.websites_fetched}",
110
+ f"- Fallback searches: {summary.fallback_searches}",
111
+ f"- Ready: {summary.ready}",
112
+ f"- Review: {summary.review}",
113
+ f"- Blocked: {summary.blocked}",
114
+ f"- Failed: {summary.failed}",
115
+ "",
116
+ "## Companies",
117
+ "",
118
+ ]
119
+ for item in payload["items"]:
120
+ lines.append(
121
+ f"- **{item['discovery']['company_name']}** ({item['discovery']['domain']}): "
122
+ f"{item['outcome']}"
123
+ )
124
+ lines.append("")
125
+ return "\n".join(lines)