leads-cli 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (72) hide show
  1. company_discovery/__init__.py +4 -0
  2. company_discovery/adapters/__init__.py +5 -0
  3. company_discovery/adapters/apollo.py +189 -0
  4. company_discovery/adapters/exa.py +112 -0
  5. company_discovery/adapters/llm.py +118 -0
  6. company_discovery/adapters/protocols.py +58 -0
  7. company_discovery/adapters/website.py +154 -0
  8. company_discovery/bundled_skills/__init__.py +1 -0
  9. company_discovery/bundled_skills/company-discovery-operator/SKILL.md +72 -0
  10. company_discovery/bundled_skills/company-discovery-operator/agents/openai.yaml +4 -0
  11. company_discovery/bundled_skills/company-enrichment-operator/SKILL.md +94 -0
  12. company_discovery/bundled_skills/company-enrichment-operator/agents/openai.yaml +4 -0
  13. company_discovery/bundled_skills/company-search-spec-writer/SKILL.md +109 -0
  14. company_discovery/bundled_skills/company-search-spec-writer/agents/openai.yaml +4 -0
  15. company_discovery/bundled_skills/contact-discovery-operator/SKILL.md +80 -0
  16. company_discovery/bundled_skills/contact-discovery-operator/agents/openai.yaml +4 -0
  17. company_discovery/bundled_skills/contact-enrichment-operator/SKILL.md +86 -0
  18. company_discovery/bundled_skills/contact-enrichment-operator/agents/openai.yaml +4 -0
  19. company_discovery/bundled_skills/contact-search-spec-writer/SKILL.md +86 -0
  20. company_discovery/bundled_skills/contact-search-spec-writer/agents/openai.yaml +4 -0
  21. company_discovery/bundled_skills/leads-update-operator/SKILL.md +60 -0
  22. company_discovery/bundled_skills/leads-update-operator/agents/openai.yaml +4 -0
  23. company_discovery/cli.py +1789 -0
  24. company_discovery/db/__init__.py +5 -0
  25. company_discovery/db/contact_enrichment_repository.py +268 -0
  26. company_discovery/db/contact_repository.py +366 -0
  27. company_discovery/db/enrichment_repository.py +207 -0
  28. company_discovery/db/models.py +324 -0
  29. company_discovery/db/repository.py +363 -0
  30. company_discovery/db/session.py +48 -0
  31. company_discovery/domain/__init__.py +24 -0
  32. company_discovery/domain/contact_models.py +178 -0
  33. company_discovery/domain/contact_spec.py +86 -0
  34. company_discovery/domain/models.py +287 -0
  35. company_discovery/domain/spec.py +263 -0
  36. company_discovery/migrations.py +190 -0
  37. company_discovery/prompts/__init__.py +8 -0
  38. company_discovery/prompts/candidate_evaluation/system.md +13 -0
  39. company_discovery/prompts/company_enrichment/system.md +42 -0
  40. company_discovery/prompts/contact_evaluation/system.md +18 -0
  41. company_discovery/prompts/query_generation/system.md +10 -0
  42. company_discovery/release_manifest.json +7 -0
  43. company_discovery/reports/__init__.py +4 -0
  44. company_discovery/reports/contact_enrichment_exporter.py +108 -0
  45. company_discovery/reports/contact_exporter.py +132 -0
  46. company_discovery/reports/enrichment_exporter.py +125 -0
  47. company_discovery/reports/exporter.py +135 -0
  48. company_discovery/runtime.py +336 -0
  49. company_discovery/services/__init__.py +4 -0
  50. company_discovery/services/contact_enrichment_pipeline.py +344 -0
  51. company_discovery/services/contact_enrichment_progress.py +37 -0
  52. company_discovery/services/contact_evaluator.py +110 -0
  53. company_discovery/services/contact_pipeline.py +295 -0
  54. company_discovery/services/contact_progress.py +38 -0
  55. company_discovery/services/enrichment_extractor.py +61 -0
  56. company_discovery/services/enrichment_pipeline.py +526 -0
  57. company_discovery/services/enrichment_progress.py +20 -0
  58. company_discovery/services/enrichment_resolver.py +148 -0
  59. company_discovery/services/evaluator.py +40 -0
  60. company_discovery/services/hygiene.py +51 -0
  61. company_discovery/services/memory.py +150 -0
  62. company_discovery/services/normalization.py +98 -0
  63. company_discovery/services/pipeline.py +628 -0
  64. company_discovery/services/progress.py +48 -0
  65. company_discovery/services/query_planner.py +47 -0
  66. company_discovery/settings.py +152 -0
  67. company_discovery/skill_installer.py +197 -0
  68. company_discovery/update_plan.py +79 -0
  69. leads_cli-0.1.0.dist-info/METADATA +277 -0
  70. leads_cli-0.1.0.dist-info/RECORD +72 -0
  71. leads_cli-0.1.0.dist-info/WHEEL +4 -0
  72. leads_cli-0.1.0.dist-info/entry_points.txt +2 -0
@@ -0,0 +1,5 @@
1
+ from company_discovery.db.repository import DiscoveryRepository
2
+ from company_discovery.db.session import Database
3
+
4
+ __all__ = ["Database", "DiscoveryRepository"]
5
+
@@ -0,0 +1,268 @@
1
+ from __future__ import annotations
2
+
3
+ from datetime import UTC, datetime, timedelta
4
+ from typing import Any
5
+ from uuid import uuid4
6
+
7
+ from sqlalchemy import select
8
+ from sqlalchemy.exc import IntegrityError
9
+ from sqlalchemy.orm import joinedload
10
+
11
+ from company_discovery.db.models import (
12
+ ContactCandidateRow,
13
+ ContactDiscoveryRunRow,
14
+ ContactEnrichmentFactRow,
15
+ ContactEnrichmentItemRow,
16
+ ContactEnrichmentRunRow,
17
+ ContactEvaluationRow,
18
+ EnrichmentRunRow,
19
+ )
20
+ from company_discovery.db.session import Database
21
+ from company_discovery.domain.contact_models import (
22
+ ContactChannelProfile,
23
+ ContactEnrichmentItem,
24
+ ContactEnrichmentOutcome,
25
+ ContactEnrichmentSummary,
26
+ )
27
+
28
+
29
+ class ContactEnrichmentRunNotFoundError(LookupError):
30
+ pass
31
+
32
+
33
+ class ContactEnrichmentRepository:
34
+ RUN_ID_PREFIX = "contact-enrich-"
35
+ CREATE_RUN_ATTEMPTS = 5
36
+
37
+ def __init__(self, database: Database) -> None:
38
+ self.database = database
39
+
40
+ def accepted_contacts(self, contact_run_id: str) -> list[dict[str, Any]]:
41
+ with self.database.session() as session:
42
+ run = session.get(ContactDiscoveryRunRow, contact_run_id)
43
+ if run is None:
44
+ raise ContactEnrichmentRunNotFoundError(
45
+ f"contact discovery run not found: {contact_run_id}"
46
+ )
47
+ if run.status != "completed":
48
+ raise ValueError(f"contact discovery run {contact_run_id} is {run.status}, not completed")
49
+ rows = session.execute(
50
+ select(ContactEvaluationRow, ContactCandidateRow)
51
+ .join(ContactCandidateRow)
52
+ .where(
53
+ ContactEvaluationRow.run_id == contact_run_id,
54
+ ContactEvaluationRow.verdict == "accepted",
55
+ )
56
+ .order_by(ContactEvaluationRow.id)
57
+ ).all()
58
+ contacts: list[dict[str, Any]] = []
59
+ seen: set[int] = set()
60
+ for evaluation, candidate in rows:
61
+ if candidate.id in seen:
62
+ continue
63
+ seen.add(candidate.id)
64
+ roles = [
65
+ row.role_key
66
+ for row in session.scalars(
67
+ select(ContactEvaluationRow).where(
68
+ ContactEvaluationRow.run_id == contact_run_id,
69
+ ContactEvaluationRow.candidate_id == candidate.id,
70
+ ContactEvaluationRow.verdict == "accepted",
71
+ )
72
+ ).all()
73
+ ]
74
+ contacts.append(
75
+ {
76
+ "candidate_id": candidate.id,
77
+ "company_name": candidate.company_name,
78
+ "company_domain": candidate.company_domain,
79
+ "full_name": candidate.full_name,
80
+ "normalized_name": candidate.normalized_name,
81
+ "title": candidate.title,
82
+ "linkedin_url": candidate.linkedin_url,
83
+ "role_keys": list(dict.fromkeys(roles)),
84
+ "discovery_reason": evaluation.reason,
85
+ "source_urls": candidate.source_urls,
86
+ }
87
+ )
88
+ return contacts
89
+
90
+ def create_run(self, contact_run_id: str, options: dict[str, Any]) -> str:
91
+ for _ in range(self.CREATE_RUN_ATTEMPTS):
92
+ try:
93
+ with self.database.session() as session:
94
+ if session.get(ContactDiscoveryRunRow, contact_run_id) is None:
95
+ raise ContactEnrichmentRunNotFoundError(
96
+ f"contact discovery run not found: {contact_run_id}"
97
+ )
98
+ run_id = self._new_run_id()
99
+ session.add(
100
+ ContactEnrichmentRunRow(
101
+ id=run_id,
102
+ contact_discovery_run_id=contact_run_id,
103
+ options_payload=options,
104
+ )
105
+ )
106
+ return run_id
107
+ except IntegrityError:
108
+ continue
109
+ raise RuntimeError("unable to allocate a unique contact enrichment run id")
110
+
111
+ def fresh_item(self, candidate_id: int, freshness_days: int) -> ContactEnrichmentItem | None:
112
+ cutoff = datetime.now(UTC) - timedelta(days=freshness_days)
113
+ with self.database.session() as session:
114
+ fact = session.scalar(
115
+ select(ContactEnrichmentFactRow)
116
+ .where(
117
+ ContactEnrichmentFactRow.candidate_id == candidate_id,
118
+ ContactEnrichmentFactRow.observed_at >= cutoff,
119
+ )
120
+ .order_by(ContactEnrichmentFactRow.observed_at.desc())
121
+ .limit(1)
122
+ )
123
+ if fact is None:
124
+ return None
125
+ candidate = session.get(ContactCandidateRow, candidate_id)
126
+ if candidate is None:
127
+ return None
128
+ discovery = {
129
+ "company_name": candidate.company_name,
130
+ "company_domain": candidate.company_domain,
131
+ "contact_name": candidate.full_name,
132
+ "title": candidate.title,
133
+ "linkedin_url": candidate.linkedin_url,
134
+ "role_keys": [],
135
+ "source_urls": candidate.source_urls,
136
+ }
137
+ return ContactEnrichmentItem(
138
+ candidate_id=candidate_id,
139
+ discovery=discovery,
140
+ channels=ContactChannelProfile.model_validate(fact.channels_payload),
141
+ outcome=ContactEnrichmentOutcome(fact.outcome),
142
+ review_flags=fact.review_flags,
143
+ trace=[{"stage": "memory", "fact_id": fact.id}],
144
+ )
145
+
146
+ def save_item(self, run_id: str, item: ContactEnrichmentItem) -> None:
147
+ with self.database.session() as session:
148
+ self._require_run(session, run_id)
149
+ session.add(
150
+ ContactEnrichmentItemRow(
151
+ run_id=run_id,
152
+ candidate_id=item.candidate_id,
153
+ discovery_snapshot=item.discovery,
154
+ channels_payload=item.channels.model_dump(mode="json"),
155
+ outcome=item.outcome.value,
156
+ review_flags=item.review_flags,
157
+ trace_payload=item.trace,
158
+ )
159
+ )
160
+ session.add(
161
+ ContactEnrichmentFactRow(
162
+ candidate_id=item.candidate_id,
163
+ enrichment_run_id=run_id,
164
+ channels_payload=item.channels.model_dump(mode="json"),
165
+ outcome=item.outcome.value,
166
+ review_flags=item.review_flags,
167
+ observed_at=item.channels.observed_at,
168
+ )
169
+ )
170
+
171
+ def complete_run(
172
+ self, run_id: str, summary: ContactEnrichmentSummary, paths: dict[str, str]
173
+ ) -> None:
174
+ with self.database.session() as session:
175
+ run = self._require_run(session, run_id)
176
+ run.status = "completed"
177
+ run.summary_payload = summary.model_dump(mode="json")
178
+ run.artifact_paths = paths
179
+ run.completed_at = datetime.now(UTC)
180
+
181
+ def fail_run(self, run_id: str, error: Exception) -> None:
182
+ with self.database.session() as session:
183
+ run = self._require_run(session, run_id)
184
+ run.status = "failed"
185
+ run.error_message = str(error)
186
+ run.completed_at = datetime.now(UTC)
187
+
188
+ def set_artifacts(self, run_id: str, paths: dict[str, str]) -> None:
189
+ with self.database.session() as session:
190
+ self._require_run(session, run_id).artifact_paths = paths
191
+
192
+ def get_run(self, run_id: str) -> dict[str, Any]:
193
+ with self.database.session() as session:
194
+ row = session.scalar(
195
+ select(ContactEnrichmentRunRow)
196
+ .options(joinedload(ContactEnrichmentRunRow.items))
197
+ .where(ContactEnrichmentRunRow.id == run_id)
198
+ )
199
+ if row is None:
200
+ raise ContactEnrichmentRunNotFoundError(
201
+ f"contact enrichment run not found: {run_id}"
202
+ )
203
+ contact_run = session.get(ContactDiscoveryRunRow, row.contact_discovery_run_id)
204
+ if contact_run is None:
205
+ raise ContactEnrichmentRunNotFoundError(
206
+ f"source contact discovery run missing: {row.contact_discovery_run_id}"
207
+ )
208
+ company_run = session.get(EnrichmentRunRow, contact_run.enrichment_run_id)
209
+ if company_run is None:
210
+ raise ContactEnrichmentRunNotFoundError(
211
+ f"source company enrichment run missing: {contact_run.enrichment_run_id}"
212
+ )
213
+ return {
214
+ "run_id": row.id,
215
+ "source_contact_run_id": row.contact_discovery_run_id,
216
+ "source_enrichment_run_id": contact_run.enrichment_run_id,
217
+ "source_discovery_run_id": company_run.discovery_run_id,
218
+ "options": row.options_payload,
219
+ "status": row.status,
220
+ "summary": row.summary_payload,
221
+ "artifacts": row.artifact_paths,
222
+ "error": row.error_message,
223
+ "created_at": row.created_at.isoformat(),
224
+ "completed_at": row.completed_at.isoformat() if row.completed_at else None,
225
+ "items": [
226
+ {
227
+ "candidate_id": item.candidate_id,
228
+ "discovery": item.discovery_snapshot,
229
+ "channels": item.channels_payload,
230
+ "outcome": item.outcome,
231
+ "review_flags": item.review_flags,
232
+ "trace": item.trace_payload,
233
+ }
234
+ for item in row.items
235
+ ],
236
+ }
237
+
238
+ def inspect_contact(self, run_id: str, person: str) -> list[dict[str, Any]]:
239
+ normalized = " ".join(
240
+ "".join(char.lower() if char.isalnum() else " " for char in person).split()
241
+ )
242
+ matches = [
243
+ item
244
+ for item in self.get_run(run_id)["items"]
245
+ if " ".join(
246
+ "".join(
247
+ char.lower() if char.isalnum() else " "
248
+ for char in item["discovery"]["contact_name"]
249
+ ).split()
250
+ )
251
+ == normalized
252
+ ]
253
+ if not matches:
254
+ raise LookupError(f"person {person!r} was not found in run {run_id}")
255
+ return matches
256
+
257
+ @staticmethod
258
+ def _require_run(session: Any, run_id: str) -> ContactEnrichmentRunRow:
259
+ row = session.get(ContactEnrichmentRunRow, run_id)
260
+ if row is None:
261
+ raise ContactEnrichmentRunNotFoundError(
262
+ f"contact enrichment run not found: {run_id}"
263
+ )
264
+ return row
265
+
266
+ @classmethod
267
+ def _new_run_id(cls) -> str:
268
+ return f"{cls.RUN_ID_PREFIX}{uuid4().hex[:12]}"
@@ -0,0 +1,366 @@
1
+ from __future__ import annotations
2
+
3
+ from datetime import UTC, datetime, timedelta
4
+ from pathlib import Path
5
+ from typing import Any
6
+ from uuid import uuid4
7
+
8
+ from sqlalchemy import select
9
+ from sqlalchemy.exc import IntegrityError
10
+ from sqlalchemy.orm import joinedload
11
+
12
+ from company_discovery.db.models import (
13
+ ContactCandidateRow,
14
+ ContactDiscoveryQueryRow,
15
+ ContactDiscoveryRunRow,
16
+ ContactEvaluationRow,
17
+ EnrichmentRunRow,
18
+ )
19
+ from company_discovery.db.session import Database
20
+ from company_discovery.domain.contact_models import (
21
+ ContactCandidate,
22
+ ContactDiscoveryItem,
23
+ ContactDiscoverySummary,
24
+ ContactVerdict,
25
+ EvidenceVerdict,
26
+ )
27
+ from company_discovery.domain.contact_spec import ContactSearchSpec
28
+ from company_discovery.domain.models import ExaSearchResult
29
+
30
+
31
+ class ContactRunNotFoundError(LookupError):
32
+ pass
33
+
34
+
35
+ class ContactNotFoundError(LookupError):
36
+ pass
37
+
38
+
39
+ class ContactDiscoveryRepository:
40
+ RUN_ID_PREFIX = "contact-discover-"
41
+ CREATE_RUN_ATTEMPTS = 5
42
+
43
+ def __init__(self, database: Database) -> None:
44
+ self.database = database
45
+
46
+ def source_companies(self, spec: ContactSearchSpec) -> list[dict[str, Any]]:
47
+ source = spec.company_source
48
+ with self.database.session() as session:
49
+ run = session.get(EnrichmentRunRow, source.enrichment_run_id)
50
+ if run is None:
51
+ raise ContactRunNotFoundError(
52
+ f"company enrichment run not found: {source.enrichment_run_id}"
53
+ )
54
+ if run.status != "completed":
55
+ raise ValueError(
56
+ f"company enrichment run {source.enrichment_run_id} is {run.status}, not completed"
57
+ )
58
+
59
+ allowed = {
60
+ "ready": {"enriched_ready"},
61
+ "review": {"enriched_with_gaps", "independence_unconfirmed"},
62
+ "all": {
63
+ "enriched_ready",
64
+ "enriched_with_gaps",
65
+ "independence_unconfirmed",
66
+ },
67
+ }[source.bucket]
68
+ selected_domains = set(source.domains)
69
+ companies: list[dict[str, Any]] = []
70
+ for item in run.items:
71
+ domain = item.discovery_snapshot["domain"]
72
+ if item.outcome not in allowed:
73
+ continue
74
+ if selected_domains and domain not in selected_domains:
75
+ continue
76
+ companies.append(
77
+ {
78
+ "company_id": item.candidate_id,
79
+ "company_name": item.discovery_snapshot["company_name"],
80
+ "company_domain": domain,
81
+ "vertical": item.discovery_snapshot.get("target_vertical")
82
+ or item.discovery_snapshot.get("vertical"),
83
+ "state": item.discovery_snapshot.get("state"),
84
+ "linkedin_url": (item.enrichment_payload.get("linkedin") or {}).get("url"),
85
+ "company_enrichment_outcome": item.outcome,
86
+ }
87
+ )
88
+
89
+ if selected_domains:
90
+ found = {company["company_domain"] for company in companies}
91
+ missing = sorted(selected_domains - found)
92
+ if missing:
93
+ raise ValueError(
94
+ "requested domains are not available in the selected company bucket: "
95
+ + ", ".join(missing)
96
+ )
97
+ return companies[: spec.company_limit]
98
+
99
+ def create_run(self, spec: ContactSearchSpec, source_spec_path: Path | None) -> str:
100
+ for _ in range(self.CREATE_RUN_ATTEMPTS):
101
+ try:
102
+ with self.database.session() as session:
103
+ run_id = self._new_run_id()
104
+ session.add(
105
+ ContactDiscoveryRunRow(
106
+ id=run_id,
107
+ enrichment_run_id=spec.company_source.enrichment_run_id,
108
+ spec_payload=spec.model_dump(mode="json"),
109
+ source_spec_path=str(source_spec_path.resolve())
110
+ if source_spec_path
111
+ else None,
112
+ )
113
+ )
114
+ return run_id
115
+ except IntegrityError:
116
+ continue
117
+ raise RuntimeError("unable to allocate a unique contact discovery run id")
118
+
119
+ def fresh_contacts(
120
+ self,
121
+ company_domain: str,
122
+ role_key: str,
123
+ freshness_days: int,
124
+ limit: int,
125
+ ) -> list[ContactDiscoveryItem]:
126
+ cutoff = datetime.now(UTC) - timedelta(days=freshness_days)
127
+ with self.database.session() as session:
128
+ rows = session.execute(
129
+ select(ContactEvaluationRow, ContactCandidateRow)
130
+ .join(ContactCandidateRow)
131
+ .where(
132
+ ContactCandidateRow.company_domain == company_domain,
133
+ ContactEvaluationRow.role_key == role_key,
134
+ ContactEvaluationRow.verdict == ContactVerdict.ACCEPTED.value,
135
+ ContactEvaluationRow.created_at >= cutoff,
136
+ )
137
+ .order_by(ContactEvaluationRow.created_at.desc())
138
+ ).all()
139
+ found: list[ContactDiscoveryItem] = []
140
+ seen: set[int] = set()
141
+ for evaluation, candidate in rows:
142
+ if candidate.id in seen:
143
+ continue
144
+ seen.add(candidate.id)
145
+ found.append(self._item(evaluation, candidate, source="memory"))
146
+ if len(found) == limit:
147
+ break
148
+ return found
149
+
150
+ def add_query(
151
+ self,
152
+ run_id: str,
153
+ company_domain: str,
154
+ role_key: str,
155
+ query: str,
156
+ results: list[ExaSearchResult],
157
+ cost_dollars: float,
158
+ ) -> None:
159
+ with self.database.session() as session:
160
+ self._require_run(session, run_id)
161
+ session.add(
162
+ ContactDiscoveryQueryRow(
163
+ run_id=run_id,
164
+ company_domain=company_domain,
165
+ role_key=role_key,
166
+ query_text=query,
167
+ result_count=len(results),
168
+ cost_dollars=cost_dollars,
169
+ raw_results=[result.model_dump(mode="json") for result in results],
170
+ )
171
+ )
172
+
173
+ def upsert_candidate(self, candidate: ContactCandidate) -> int:
174
+ with self.database.session() as session:
175
+ row = session.scalar(
176
+ select(ContactCandidateRow).where(
177
+ ContactCandidateRow.company_domain == candidate.company_domain,
178
+ ContactCandidateRow.identity_key == candidate.identity_key,
179
+ )
180
+ )
181
+ if row is None and candidate.linkedin_url:
182
+ row = session.scalar(
183
+ select(ContactCandidateRow).where(
184
+ ContactCandidateRow.company_domain == candidate.company_domain,
185
+ ContactCandidateRow.normalized_name == candidate.normalized_name,
186
+ ContactCandidateRow.linkedin_url.is_(None),
187
+ )
188
+ )
189
+ if row is None:
190
+ row = ContactCandidateRow(
191
+ company_candidate_id=candidate.company_id,
192
+ company_name=candidate.company_name,
193
+ company_domain=candidate.company_domain,
194
+ full_name=candidate.full_name,
195
+ normalized_name=candidate.normalized_name,
196
+ identity_key=candidate.identity_key,
197
+ title=candidate.title,
198
+ linkedin_url=candidate.linkedin_url,
199
+ source_urls=candidate.source_urls,
200
+ evidence=candidate.evidence,
201
+ first_seen_at=candidate.first_seen_at,
202
+ last_seen_at=candidate.last_seen_at,
203
+ )
204
+ session.add(row)
205
+ session.flush()
206
+ else:
207
+ row.company_name = candidate.company_name
208
+ row.full_name = candidate.full_name
209
+ row.identity_key = candidate.identity_key
210
+ row.title = candidate.title
211
+ row.linkedin_url = candidate.linkedin_url or row.linkedin_url
212
+ row.source_urls = list(dict.fromkeys([*row.source_urls, *candidate.source_urls]))
213
+ row.evidence = list(dict.fromkeys([*row.evidence, *candidate.evidence]))
214
+ row.last_seen_at = datetime.now(UTC)
215
+ return row.id
216
+
217
+ def record_item(self, run_id: str, item: ContactDiscoveryItem) -> None:
218
+ with self.database.session() as session:
219
+ self._require_run(session, run_id)
220
+ session.add(
221
+ ContactEvaluationRow(
222
+ run_id=run_id,
223
+ candidate_id=item.candidate_id,
224
+ role_key=item.role_key,
225
+ verdict=item.verdict.value,
226
+ reason=item.reason,
227
+ current_company_match=item.current_company_match.value,
228
+ role_match=item.role_match.value,
229
+ identity_clear=item.identity_clear,
230
+ source=item.source,
231
+ )
232
+ )
233
+
234
+ def complete_run(
235
+ self, run_id: str, summary: ContactDiscoverySummary, paths: dict[str, str]
236
+ ) -> None:
237
+ with self.database.session() as session:
238
+ row = self._require_run(session, run_id)
239
+ row.status = "completed"
240
+ row.summary_payload = summary.model_dump(mode="json")
241
+ row.artifact_paths = paths
242
+ row.completed_at = datetime.now(UTC)
243
+
244
+ def fail_run(self, run_id: str, error: Exception) -> None:
245
+ with self.database.session() as session:
246
+ row = self._require_run(session, run_id)
247
+ row.status = "failed"
248
+ row.error_message = str(error)
249
+ row.completed_at = datetime.now(UTC)
250
+
251
+ def set_artifacts(self, run_id: str, paths: dict[str, str]) -> None:
252
+ with self.database.session() as session:
253
+ self._require_run(session, run_id).artifact_paths = paths
254
+
255
+ def get_run(self, run_id: str) -> dict[str, Any]:
256
+ with self.database.session() as session:
257
+ row = session.execute(
258
+ select(ContactDiscoveryRunRow)
259
+ .options(
260
+ joinedload(ContactDiscoveryRunRow.queries),
261
+ joinedload(ContactDiscoveryRunRow.evaluations).joinedload(
262
+ ContactEvaluationRow.candidate
263
+ ),
264
+ )
265
+ .where(ContactDiscoveryRunRow.id == run_id)
266
+ ).unique().scalar_one_or_none()
267
+ if row is None:
268
+ raise ContactRunNotFoundError(f"contact discovery run not found: {run_id}")
269
+ enrichment_run = session.get(EnrichmentRunRow, row.enrichment_run_id)
270
+ if enrichment_run is None:
271
+ raise ContactRunNotFoundError(
272
+ "source company enrichment run not found for contact discovery run "
273
+ f"{run_id}: {row.enrichment_run_id}"
274
+ )
275
+ return {
276
+ "run_id": row.id,
277
+ "source_enrichment_run_id": row.enrichment_run_id,
278
+ "source_discovery_run_id": enrichment_run.discovery_run_id,
279
+ "spec": row.spec_payload,
280
+ "source_spec_path": row.source_spec_path,
281
+ "status": row.status,
282
+ "summary": row.summary_payload,
283
+ "artifacts": row.artifact_paths,
284
+ "error": row.error_message,
285
+ "created_at": row.created_at.isoformat(),
286
+ "completed_at": row.completed_at.isoformat() if row.completed_at else None,
287
+ "queries": [
288
+ {
289
+ "company_domain": query.company_domain,
290
+ "role_key": query.role_key,
291
+ "query": query.query_text,
292
+ "result_count": query.result_count,
293
+ "cost_dollars": query.cost_dollars,
294
+ "raw_results": query.raw_results,
295
+ }
296
+ for query in row.queries
297
+ ],
298
+ "items": [
299
+ self._item(evaluation, evaluation.candidate).model_dump(mode="json")
300
+ for evaluation in row.evaluations
301
+ ],
302
+ }
303
+
304
+ def inspect_contact(self, run_id: str, person: str) -> list[dict[str, Any]]:
305
+ normalized = normalize_person_name(person)
306
+ payload = self.get_run(run_id)
307
+ matches = [
308
+ item
309
+ for item in payload["items"]
310
+ if item["candidate"]["normalized_name"] == normalized
311
+ ]
312
+ if not matches:
313
+ raise ContactNotFoundError(f"person {person!r} was not found in run {run_id}")
314
+ return matches
315
+
316
+ @staticmethod
317
+ def _item(
318
+ evaluation: ContactEvaluationRow,
319
+ candidate: ContactCandidateRow,
320
+ source: str | None = None,
321
+ ) -> ContactDiscoveryItem:
322
+ return ContactDiscoveryItem(
323
+ candidate_id=candidate.id,
324
+ candidate=ContactCandidate(
325
+ company_id=candidate.company_candidate_id,
326
+ company_name=candidate.company_name,
327
+ company_domain=candidate.company_domain,
328
+ full_name=candidate.full_name,
329
+ normalized_name=candidate.normalized_name,
330
+ identity_key=candidate.identity_key,
331
+ title=candidate.title,
332
+ linkedin_url=candidate.linkedin_url,
333
+ source_urls=candidate.source_urls,
334
+ evidence=candidate.evidence,
335
+ first_seen_at=candidate.first_seen_at,
336
+ last_seen_at=candidate.last_seen_at,
337
+ ),
338
+ role_key=evaluation.role_key,
339
+ verdict=ContactVerdict(evaluation.verdict),
340
+ reason=evaluation.reason,
341
+ current_company_match=EvidenceVerdict(evaluation.current_company_match),
342
+ role_match=EvidenceVerdict(evaluation.role_match),
343
+ identity_clear=evaluation.identity_clear,
344
+ source=source or evaluation.source,
345
+ )
346
+
347
+ @staticmethod
348
+ def _require_run(session: Any, run_id: str) -> ContactDiscoveryRunRow:
349
+ row = session.get(ContactDiscoveryRunRow, run_id)
350
+ if row is None:
351
+ raise ContactRunNotFoundError(f"contact discovery run not found: {run_id}")
352
+ return row
353
+
354
+ @classmethod
355
+ def _new_run_id(cls) -> str:
356
+ return f"{cls.RUN_ID_PREFIX}{uuid4().hex[:12]}"
357
+
358
+
359
+ def normalize_person_name(value: str) -> str:
360
+ return " ".join("".join(char.lower() if char.isalnum() else " " for char in value).split())
361
+
362
+
363
+ def contact_identity_key(normalized_name: str, linkedin_url: str | None) -> str:
364
+ if linkedin_url:
365
+ return f"linkedin:{linkedin_url.lower().split('?', 1)[0].rstrip('/')}"
366
+ return f"name:{normalized_name}"