leads-cli 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (72) hide show
  1. company_discovery/__init__.py +4 -0
  2. company_discovery/adapters/__init__.py +5 -0
  3. company_discovery/adapters/apollo.py +189 -0
  4. company_discovery/adapters/exa.py +112 -0
  5. company_discovery/adapters/llm.py +118 -0
  6. company_discovery/adapters/protocols.py +58 -0
  7. company_discovery/adapters/website.py +154 -0
  8. company_discovery/bundled_skills/__init__.py +1 -0
  9. company_discovery/bundled_skills/company-discovery-operator/SKILL.md +72 -0
  10. company_discovery/bundled_skills/company-discovery-operator/agents/openai.yaml +4 -0
  11. company_discovery/bundled_skills/company-enrichment-operator/SKILL.md +94 -0
  12. company_discovery/bundled_skills/company-enrichment-operator/agents/openai.yaml +4 -0
  13. company_discovery/bundled_skills/company-search-spec-writer/SKILL.md +109 -0
  14. company_discovery/bundled_skills/company-search-spec-writer/agents/openai.yaml +4 -0
  15. company_discovery/bundled_skills/contact-discovery-operator/SKILL.md +80 -0
  16. company_discovery/bundled_skills/contact-discovery-operator/agents/openai.yaml +4 -0
  17. company_discovery/bundled_skills/contact-enrichment-operator/SKILL.md +86 -0
  18. company_discovery/bundled_skills/contact-enrichment-operator/agents/openai.yaml +4 -0
  19. company_discovery/bundled_skills/contact-search-spec-writer/SKILL.md +86 -0
  20. company_discovery/bundled_skills/contact-search-spec-writer/agents/openai.yaml +4 -0
  21. company_discovery/bundled_skills/leads-update-operator/SKILL.md +60 -0
  22. company_discovery/bundled_skills/leads-update-operator/agents/openai.yaml +4 -0
  23. company_discovery/cli.py +1789 -0
  24. company_discovery/db/__init__.py +5 -0
  25. company_discovery/db/contact_enrichment_repository.py +268 -0
  26. company_discovery/db/contact_repository.py +366 -0
  27. company_discovery/db/enrichment_repository.py +207 -0
  28. company_discovery/db/models.py +324 -0
  29. company_discovery/db/repository.py +363 -0
  30. company_discovery/db/session.py +48 -0
  31. company_discovery/domain/__init__.py +24 -0
  32. company_discovery/domain/contact_models.py +178 -0
  33. company_discovery/domain/contact_spec.py +86 -0
  34. company_discovery/domain/models.py +287 -0
  35. company_discovery/domain/spec.py +263 -0
  36. company_discovery/migrations.py +190 -0
  37. company_discovery/prompts/__init__.py +8 -0
  38. company_discovery/prompts/candidate_evaluation/system.md +13 -0
  39. company_discovery/prompts/company_enrichment/system.md +42 -0
  40. company_discovery/prompts/contact_evaluation/system.md +18 -0
  41. company_discovery/prompts/query_generation/system.md +10 -0
  42. company_discovery/release_manifest.json +7 -0
  43. company_discovery/reports/__init__.py +4 -0
  44. company_discovery/reports/contact_enrichment_exporter.py +108 -0
  45. company_discovery/reports/contact_exporter.py +132 -0
  46. company_discovery/reports/enrichment_exporter.py +125 -0
  47. company_discovery/reports/exporter.py +135 -0
  48. company_discovery/runtime.py +336 -0
  49. company_discovery/services/__init__.py +4 -0
  50. company_discovery/services/contact_enrichment_pipeline.py +344 -0
  51. company_discovery/services/contact_enrichment_progress.py +37 -0
  52. company_discovery/services/contact_evaluator.py +110 -0
  53. company_discovery/services/contact_pipeline.py +295 -0
  54. company_discovery/services/contact_progress.py +38 -0
  55. company_discovery/services/enrichment_extractor.py +61 -0
  56. company_discovery/services/enrichment_pipeline.py +526 -0
  57. company_discovery/services/enrichment_progress.py +20 -0
  58. company_discovery/services/enrichment_resolver.py +148 -0
  59. company_discovery/services/evaluator.py +40 -0
  60. company_discovery/services/hygiene.py +51 -0
  61. company_discovery/services/memory.py +150 -0
  62. company_discovery/services/normalization.py +98 -0
  63. company_discovery/services/pipeline.py +628 -0
  64. company_discovery/services/progress.py +48 -0
  65. company_discovery/services/query_planner.py +47 -0
  66. company_discovery/settings.py +152 -0
  67. company_discovery/skill_installer.py +197 -0
  68. company_discovery/update_plan.py +79 -0
  69. leads_cli-0.1.0.dist-info/METADATA +277 -0
  70. leads_cli-0.1.0.dist-info/RECORD +72 -0
  71. leads_cli-0.1.0.dist-info/WHEEL +4 -0
  72. leads_cli-0.1.0.dist-info/entry_points.txt +2 -0
@@ -0,0 +1,363 @@
1
+ from __future__ import annotations
2
+
3
+ from dataclasses import dataclass
4
+ from datetime import UTC, datetime
5
+ from typing import Any
6
+ from uuid import uuid4
7
+
8
+ from sqlalchemy import Select, func, select
9
+ from sqlalchemy.exc import IntegrityError
10
+ from sqlalchemy.orm import joinedload
11
+
12
+ from company_discovery.db.models import (
13
+ CandidateEvaluationRow,
14
+ CompanyCandidateRow,
15
+ DiscoveryQueryRow,
16
+ DiscoveryRunRow,
17
+ RawResultRow,
18
+ )
19
+ from company_discovery.db.session import Database
20
+ from company_discovery.domain.models import (
21
+ CandidateBucket,
22
+ CandidateEvaluation,
23
+ ExaSearchResult,
24
+ NormalizedCandidate,
25
+ RunSummary,
26
+ )
27
+ from company_discovery.domain.spec import CompanySearchSpec
28
+
29
+
30
+ @dataclass(frozen=True)
31
+ class MemoryRecord:
32
+ candidate_id: int
33
+ candidate: NormalizedCandidate
34
+ latest_fit: str | None
35
+ latest_bucket: str | None
36
+ latest_reason: str | None
37
+ latest_reason_codes: tuple[str, ...]
38
+ latest_evaluation: CandidateEvaluation | None
39
+ ever_selected: bool
40
+ latest_spec: CompanySearchSpec | None = None
41
+
42
+
43
+ class RunNotFoundError(LookupError):
44
+ pass
45
+
46
+
47
+ class CandidateNotFoundError(LookupError):
48
+ pass
49
+
50
+
51
+ class DiscoveryRepository:
52
+ RUN_ID_PREFIX = "company-discover-"
53
+ CREATE_RUN_ATTEMPTS = 5
54
+
55
+ def __init__(self, database: Database) -> None:
56
+ self.database = database
57
+
58
+ def create_run(self, spec: CompanySearchSpec, source_spec_path: str | None = None) -> str:
59
+ for _ in range(self.CREATE_RUN_ATTEMPTS):
60
+ try:
61
+ run_id = self._new_run_id()
62
+ with self.database.session() as session:
63
+ session.add(
64
+ DiscoveryRunRow(
65
+ id=run_id,
66
+ spec_payload=spec.model_dump(mode="json"),
67
+ source_spec_path=source_spec_path,
68
+ status="running",
69
+ )
70
+ )
71
+ return run_id
72
+ except IntegrityError:
73
+ continue
74
+ raise RuntimeError("unable to allocate a unique company discovery run id")
75
+
76
+ @classmethod
77
+ def _new_run_id(cls) -> str:
78
+ return f"{cls.RUN_ID_PREFIX}{uuid4().hex[:12]}"
79
+
80
+ def complete_run(
81
+ self,
82
+ run_id: str,
83
+ summary: RunSummary,
84
+ artifact_paths: dict[str, str],
85
+ ) -> None:
86
+ with self.database.session() as session:
87
+ row = self._require_run(session, run_id)
88
+ row.status = "completed"
89
+ row.summary_payload = summary.model_dump(mode="json")
90
+ row.artifact_paths = artifact_paths
91
+ row.completed_at = datetime.now(UTC)
92
+
93
+ def fail_run(self, run_id: str, error: Exception) -> None:
94
+ with self.database.session() as session:
95
+ row = self._require_run(session, run_id)
96
+ row.status = "failed"
97
+ row.error_message = str(error)
98
+ row.completed_at = datetime.now(UTC)
99
+
100
+ def set_artifacts(self, run_id: str, artifact_paths: dict[str, str]) -> None:
101
+ with self.database.session() as session:
102
+ row = self._require_run(session, run_id)
103
+ row.artifact_paths = artifact_paths
104
+
105
+ def add_query(self, run_id: str, order: int, text: str, rationale: str = "") -> int:
106
+ with self.database.session() as session:
107
+ row = DiscoveryQueryRow(
108
+ run_id=run_id,
109
+ query_order=order,
110
+ query_text=text,
111
+ rationale=rationale,
112
+ )
113
+ session.add(row)
114
+ session.flush()
115
+ return row.id
116
+
117
+ def save_query_results(
118
+ self,
119
+ run_id: str,
120
+ query_id: int,
121
+ results: list[ExaSearchResult],
122
+ cost_dollars: float,
123
+ ) -> None:
124
+ with self.database.session() as session:
125
+ query = session.get(DiscoveryQueryRow, query_id)
126
+ if query is None or query.run_id != run_id:
127
+ raise LookupError(f"query {query_id} does not belong to run {run_id}")
128
+ query.result_count = len(results)
129
+ query.cost_dollars = cost_dollars
130
+ session.add_all(
131
+ RawResultRow(
132
+ run_id=run_id,
133
+ query_id=query_id,
134
+ result_position=result.position,
135
+ observed_url=result.url,
136
+ observed_title=result.title,
137
+ raw_payload=result.model_dump(mode="json"),
138
+ )
139
+ for result in results
140
+ )
141
+
142
+ def upsert_candidate(self, candidate: NormalizedCandidate) -> int:
143
+ with self.database.session() as session:
144
+ row = session.scalar(
145
+ select(CompanyCandidateRow).where(CompanyCandidateRow.domain == candidate.domain)
146
+ )
147
+ payload = candidate.model_dump(mode="json")
148
+ if row is None:
149
+ row = CompanyCandidateRow(
150
+ canonical_name=candidate.company_name,
151
+ domain=candidate.domain,
152
+ dedupe_key=candidate.dedupe_key,
153
+ normalized_payload=payload,
154
+ vertical=candidate.vertical,
155
+ country=candidate.country,
156
+ state=candidate.state,
157
+ employee_min=candidate.employee_min,
158
+ employee_max=candidate.employee_max,
159
+ ownership_type=candidate.ownership_type,
160
+ excluded=candidate.excluded,
161
+ first_seen_at=candidate.first_seen_at,
162
+ last_seen_at=candidate.last_seen_at,
163
+ )
164
+ session.add(row)
165
+ session.flush()
166
+ else:
167
+ merged = self._merge_candidate_payload(row.normalized_payload, payload)
168
+ row.canonical_name = candidate.company_name or row.canonical_name
169
+ row.normalized_payload = merged
170
+ row.last_seen_at = candidate.last_seen_at
171
+ row.excluded = row.excluded or candidate.excluded
172
+ return row.id
173
+
174
+ def record_evaluation(
175
+ self,
176
+ run_id: str,
177
+ candidate_id: int,
178
+ evaluation: CandidateEvaluation,
179
+ bucket: CandidateBucket,
180
+ source: str,
181
+ ) -> None:
182
+ now = datetime.now(UTC)
183
+ with self.database.session() as session:
184
+ candidate = session.get(CompanyCandidateRow, candidate_id)
185
+ if candidate is None:
186
+ raise CandidateNotFoundError(f"candidate not found: {candidate_id}")
187
+ existing = session.scalar(
188
+ select(CandidateEvaluationRow).where(
189
+ CandidateEvaluationRow.run_id == run_id,
190
+ CandidateEvaluationRow.candidate_id == candidate_id,
191
+ )
192
+ )
193
+ if existing is not None:
194
+ raise ValueError(f"candidate {candidate.domain} already evaluated in run {run_id}")
195
+ session.add(
196
+ CandidateEvaluationRow(
197
+ run_id=run_id,
198
+ candidate_id=candidate_id,
199
+ evaluation_payload=evaluation.model_dump(mode="json"),
200
+ fit_outcome=evaluation.fit.value,
201
+ bucket=bucket.value,
202
+ reason=evaluation.reason,
203
+ reason_codes=evaluation.reason_codes,
204
+ source=source,
205
+ created_at=now,
206
+ )
207
+ )
208
+ self._apply_inferences(candidate, evaluation)
209
+ candidate.prior_bucket = bucket.value
210
+ candidate.prior_reason = evaluation.reason
211
+ candidate.last_evaluated_at = now
212
+
213
+ def memory_records(self) -> list[MemoryRecord]:
214
+ latest = (
215
+ select(
216
+ CandidateEvaluationRow.candidate_id,
217
+ func.max(CandidateEvaluationRow.id).label("latest_id"),
218
+ )
219
+ .group_by(CandidateEvaluationRow.candidate_id)
220
+ .subquery()
221
+ )
222
+ latest_evaluations: Select[tuple[CompanyCandidateRow, CandidateEvaluationRow | None]] = (
223
+ select(CompanyCandidateRow, CandidateEvaluationRow)
224
+ .outerjoin(latest, latest.c.candidate_id == CompanyCandidateRow.id)
225
+ .outerjoin(
226
+ CandidateEvaluationRow,
227
+ CandidateEvaluationRow.id == latest.c.latest_id,
228
+ )
229
+ .order_by(CompanyCandidateRow.last_seen_at.desc())
230
+ )
231
+ with self.database.session() as session:
232
+ rows = session.execute(latest_evaluations).all()
233
+ selected_ids = set(
234
+ session.scalars(
235
+ select(CandidateEvaluationRow.candidate_id).where(
236
+ CandidateEvaluationRow.bucket == CandidateBucket.SELECTED.value
237
+ )
238
+ ).all()
239
+ )
240
+ run_specs = {
241
+ row.id: CompanySearchSpec.model_validate(row.spec_payload)
242
+ for row in session.scalars(select(DiscoveryRunRow)).all()
243
+ }
244
+ return [
245
+ MemoryRecord(
246
+ candidate_id=candidate_row.id,
247
+ candidate=NormalizedCandidate.model_validate(candidate_row.normalized_payload),
248
+ latest_fit=evaluation.fit_outcome if evaluation else None,
249
+ latest_bucket=evaluation.bucket if evaluation else None,
250
+ latest_reason=evaluation.reason if evaluation else None,
251
+ latest_reason_codes=tuple(evaluation.reason_codes) if evaluation else (),
252
+ latest_evaluation=(
253
+ CandidateEvaluation.model_validate(evaluation.evaluation_payload)
254
+ if evaluation
255
+ else None
256
+ ),
257
+ ever_selected=candidate_row.id in selected_ids,
258
+ latest_spec=run_specs.get(evaluation.run_id) if evaluation else None,
259
+ )
260
+ for candidate_row, evaluation in rows
261
+ ]
262
+
263
+ def get_run(self, run_id: str) -> dict[str, Any]:
264
+ with self.database.session() as session:
265
+ row = session.scalar(
266
+ select(DiscoveryRunRow)
267
+ .options(joinedload(DiscoveryRunRow.queries))
268
+ .where(DiscoveryRunRow.id == run_id)
269
+ )
270
+ if row is None:
271
+ raise RunNotFoundError(f"run not found: {run_id}")
272
+ evaluations = session.execute(
273
+ select(CandidateEvaluationRow, CompanyCandidateRow)
274
+ .join(CompanyCandidateRow)
275
+ .where(CandidateEvaluationRow.run_id == run_id)
276
+ .order_by(CandidateEvaluationRow.id)
277
+ ).all()
278
+ return {
279
+ "run_id": row.id,
280
+ "status": row.status,
281
+ "spec": row.spec_payload,
282
+ "summary": row.summary_payload,
283
+ "artifacts": row.artifact_paths,
284
+ "error": row.error_message,
285
+ "created_at": row.created_at.isoformat(),
286
+ "completed_at": row.completed_at.isoformat() if row.completed_at else None,
287
+ "queries": [query.query_text for query in row.queries],
288
+ "candidates": [
289
+ {
290
+ "candidate_id": candidate.id,
291
+ "company": candidate.normalized_payload,
292
+ "evaluation": evaluation.evaluation_payload,
293
+ "bucket": evaluation.bucket,
294
+ "source": evaluation.source,
295
+ }
296
+ for evaluation, candidate in evaluations
297
+ ],
298
+ }
299
+
300
+ def inspect_candidate(self, run_id: str, domain: str) -> dict[str, Any]:
301
+ with self.database.session() as session:
302
+ result = session.execute(
303
+ select(CandidateEvaluationRow, CompanyCandidateRow)
304
+ .join(CompanyCandidateRow)
305
+ .where(
306
+ CandidateEvaluationRow.run_id == run_id,
307
+ CompanyCandidateRow.domain == domain,
308
+ )
309
+ ).first()
310
+ if result is None:
311
+ raise CandidateNotFoundError(f"domain {domain} was not evaluated in run {run_id}")
312
+ evaluation, candidate = result
313
+ raw_hits = session.scalars(
314
+ select(RawResultRow).where(
315
+ RawResultRow.run_id == run_id,
316
+ RawResultRow.observed_url.contains(domain),
317
+ )
318
+ ).all()
319
+ return {
320
+ "company": candidate.normalized_payload,
321
+ "evaluation": evaluation.evaluation_payload,
322
+ "bucket": evaluation.bucket,
323
+ "source": evaluation.source,
324
+ "raw_hits": [hit.raw_payload for hit in raw_hits],
325
+ }
326
+
327
+ @staticmethod
328
+ def _require_run(session: Any, run_id: str) -> DiscoveryRunRow:
329
+ row = session.get(DiscoveryRunRow, run_id)
330
+ if row is None:
331
+ raise RunNotFoundError(f"run not found: {run_id}")
332
+ return row
333
+
334
+ @staticmethod
335
+ def _merge_candidate_payload(current: dict[str, Any], incoming: dict[str, Any]) -> dict[str, Any]:
336
+ merged = dict(current)
337
+ for key, value in incoming.items():
338
+ if key == "sightings":
339
+ existing = {item["url"]: item for item in merged.get("sightings", [])}
340
+ existing.update({item["url"]: item for item in value})
341
+ merged[key] = list(existing.values())
342
+ elif value is not None and value != []:
343
+ merged[key] = value
344
+ return merged
345
+
346
+ @staticmethod
347
+ def _apply_inferences(candidate: CompanyCandidateRow, evaluation: CandidateEvaluation) -> None:
348
+ updates = {
349
+ "vertical": evaluation.inferred_vertical,
350
+ "country": evaluation.inferred_country,
351
+ "state": evaluation.inferred_state,
352
+ "employee_min": evaluation.inferred_employee_min,
353
+ "employee_max": evaluation.inferred_employee_max,
354
+ "ownership_type": evaluation.inferred_ownership_type,
355
+ }
356
+ payload = dict(candidate.normalized_payload)
357
+ for field, value in updates.items():
358
+ if value is not None:
359
+ setattr(candidate, field, value)
360
+ payload[field] = value
361
+ candidate.excluded = evaluation.excluded.value == "yes"
362
+ payload["excluded"] = candidate.excluded
363
+ candidate.normalized_payload = payload
@@ -0,0 +1,48 @@
1
+ from __future__ import annotations
2
+
3
+ from contextlib import contextmanager
4
+ from pathlib import Path
5
+ from typing import Iterator
6
+
7
+ from sqlalchemy import create_engine, event
8
+ from sqlalchemy.orm import Session, sessionmaker
9
+
10
+ from company_discovery.db.models import Base
11
+
12
+
13
+ class Database:
14
+ def __init__(self, url: str) -> None:
15
+ if url.startswith("sqlite:///"):
16
+ Path(url.removeprefix("sqlite:///")).parent.mkdir(parents=True, exist_ok=True)
17
+ self.engine = create_engine(url, future=True)
18
+ if url.startswith("sqlite"):
19
+ event.listen(self.engine, "connect", self._enable_sqlite_foreign_keys)
20
+ self._session_factory = sessionmaker(
21
+ bind=self.engine,
22
+ class_=Session,
23
+ expire_on_commit=False,
24
+ )
25
+
26
+ @staticmethod
27
+ def _enable_sqlite_foreign_keys(dbapi_connection: object, _: object) -> None:
28
+ cursor = dbapi_connection.cursor() # type: ignore[attr-defined]
29
+ cursor.execute("PRAGMA foreign_keys=ON")
30
+ cursor.close()
31
+
32
+ def create_schema(self) -> None:
33
+ Base.metadata.create_all(self.engine)
34
+
35
+ @contextmanager
36
+ def session(self) -> Iterator[Session]:
37
+ session = self._session_factory()
38
+ try:
39
+ yield session
40
+ session.commit()
41
+ except Exception:
42
+ session.rollback()
43
+ raise
44
+ finally:
45
+ session.close()
46
+
47
+ def dispose(self) -> None:
48
+ self.engine.dispose()
@@ -0,0 +1,24 @@
1
+ from company_discovery.domain.models import (
2
+ CandidateBucket,
3
+ CandidateEvaluation,
4
+ ExaSearchResult,
5
+ FitVerdict,
6
+ MatchVerdict,
7
+ NormalizedCandidate,
8
+ QueryPlan,
9
+ RunResult,
10
+ )
11
+ from company_discovery.domain.spec import CompanySearchSpec
12
+
13
+ __all__ = [
14
+ "CandidateBucket",
15
+ "CandidateEvaluation",
16
+ "CompanySearchSpec",
17
+ "ExaSearchResult",
18
+ "FitVerdict",
19
+ "MatchVerdict",
20
+ "NormalizedCandidate",
21
+ "QueryPlan",
22
+ "RunResult",
23
+ ]
24
+
@@ -0,0 +1,178 @@
1
+ from __future__ import annotations
2
+
3
+ from datetime import UTC, datetime
4
+ from enum import StrEnum
5
+ from typing import Any
6
+
7
+ from pydantic import Field, field_validator
8
+
9
+ from company_discovery.domain.models import DomainModel, ExaSearchResult
10
+
11
+
12
+ class ContactVerdict(StrEnum):
13
+ ACCEPTED = "accepted"
14
+ REVIEW = "review"
15
+ REJECTED = "rejected"
16
+
17
+
18
+ class EvidenceVerdict(StrEnum):
19
+ YES = "yes"
20
+ LIKELY = "likely"
21
+ UNKNOWN = "unknown"
22
+ NO = "no"
23
+
24
+
25
+ class ContactAssessment(DomainModel):
26
+ full_name: str = Field(min_length=3)
27
+ title: str = Field(min_length=2)
28
+ linkedin_url: str | None = None
29
+ source_urls: list[str] = Field(min_length=1)
30
+ evidence: list[str] = Field(min_length=1)
31
+ current_company_match: EvidenceVerdict
32
+ role_match: EvidenceVerdict
33
+ identity_clear: bool
34
+ verdict: ContactVerdict
35
+ reason: str = Field(min_length=3)
36
+
37
+ @field_validator("full_name", "title")
38
+ @classmethod
39
+ def normalize_text(cls, value: str) -> str:
40
+ return " ".join(value.split())
41
+
42
+
43
+ class ContactAssessmentBatch(DomainModel):
44
+ candidates: list[ContactAssessment] = Field(default_factory=list, max_length=30)
45
+
46
+
47
+ class ContactCandidate(DomainModel):
48
+ company_id: int
49
+ company_name: str
50
+ company_domain: str
51
+ full_name: str
52
+ normalized_name: str
53
+ identity_key: str
54
+ title: str
55
+ linkedin_url: str | None = None
56
+ source_urls: list[str] = Field(default_factory=list)
57
+ evidence: list[str] = Field(default_factory=list)
58
+ first_seen_at: datetime = Field(default_factory=lambda: datetime.now(UTC))
59
+ last_seen_at: datetime = Field(default_factory=lambda: datetime.now(UTC))
60
+
61
+
62
+ class ContactDiscoveryItem(DomainModel):
63
+ candidate_id: int
64
+ candidate: ContactCandidate
65
+ role_key: str
66
+ verdict: ContactVerdict
67
+ reason: str
68
+ current_company_match: EvidenceVerdict
69
+ role_match: EvidenceVerdict
70
+ identity_clear: bool
71
+ source: str
72
+
73
+
74
+ class ContactDiscoverySummary(DomainModel):
75
+ companies_loaded: int = 0
76
+ memory_reused: int = 0
77
+ role_gaps: int = 0
78
+ queries_run: int = 0
79
+ raw_results: int = 0
80
+ unique_people: int = 0
81
+ accepted: int = 0
82
+ review: int = 0
83
+ rejected: int = 0
84
+
85
+
86
+ class ContactDiscoveryResult(DomainModel):
87
+ run_id: str
88
+ source_enrichment_run_id: str
89
+ summary: ContactDiscoverySummary
90
+ items: list[ContactDiscoveryItem]
91
+ artifact_paths: dict[str, str] = Field(default_factory=dict)
92
+
93
+
94
+ class ContactSearchBatch(DomainModel):
95
+ company_name: str
96
+ company_domain: str
97
+ role_key: str
98
+ role_labels: list[str]
99
+ results: list[ExaSearchResult]
100
+
101
+
102
+ class ContactEnrichmentOutcome(StrEnum):
103
+ READY = "ready"
104
+ REVIEW = "review"
105
+ BLOCKED = "blocked"
106
+
107
+
108
+ class ApolloPersonRequest(DomainModel):
109
+ candidate_id: int
110
+ first_name: str
111
+ last_name: str
112
+ full_name: str
113
+ company_name: str
114
+ company_domain: str
115
+ linkedin_url: str | None = None
116
+
117
+
118
+ class ApolloPersonMatch(DomainModel):
119
+ candidate_id: int
120
+ person_found: bool
121
+ full_name: str | None = None
122
+ linkedin_url: str | None = None
123
+ title: str | None = None
124
+ organization_name: str | None = None
125
+ organization_domain: str | None = None
126
+ email: str | None = None
127
+ email_status: str | None = None
128
+ phones: list[str] = Field(default_factory=list)
129
+ apollo_person_id: str | None = None
130
+ raw: dict[str, Any] = Field(default_factory=dict)
131
+
132
+
133
+ class ApolloBatchResult(DomainModel):
134
+ matches: list[ApolloPersonMatch] = Field(default_factory=list)
135
+ request_id: str | None = None
136
+ pending: bool = False
137
+
138
+
139
+ class ContactChannelProfile(DomainModel):
140
+ email_requested: bool = False
141
+ phone_requested: bool = False
142
+ email: str | None = None
143
+ email_status: str | None = None
144
+ phone: str | None = None
145
+ apollo_person_id: str | None = None
146
+ apollo_linkedin_url: str | None = None
147
+ apollo_company_name: str | None = None
148
+ apollo_company_domain: str | None = None
149
+ apollo_title: str | None = None
150
+ observed_at: datetime = Field(default_factory=lambda: datetime.now(UTC))
151
+
152
+
153
+ class ContactEnrichmentItem(DomainModel):
154
+ candidate_id: int
155
+ discovery: dict[str, Any]
156
+ channels: ContactChannelProfile
157
+ outcome: ContactEnrichmentOutcome
158
+ review_flags: list[str] = Field(default_factory=list)
159
+ trace: list[dict[str, Any]] = Field(default_factory=list)
160
+
161
+
162
+ class ContactEnrichmentSummary(DomainModel):
163
+ contacts_loaded: int = 0
164
+ memory_reused: int = 0
165
+ apollo_requests: int = 0
166
+ apollo_batches: int = 0
167
+ async_polls: int = 0
168
+ ready: int = 0
169
+ review: int = 0
170
+ blocked: int = 0
171
+
172
+
173
+ class ContactEnrichmentResult(DomainModel):
174
+ run_id: str
175
+ source_contact_run_id: str
176
+ summary: ContactEnrichmentSummary
177
+ items: list[ContactEnrichmentItem]
178
+ artifact_paths: dict[str, str] = Field(default_factory=dict)
@@ -0,0 +1,86 @@
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ import re
5
+ from pathlib import Path
6
+ from typing import Literal
7
+
8
+ from pydantic import Field, field_validator, model_validator
9
+
10
+ from company_discovery.domain.models import DomainModel
11
+ from company_discovery.services.normalization import canonical_domain
12
+
13
+
14
+ ROLE_KEY_PATTERN = re.compile(r"^[a-z][a-z0-9_]{1,63}$")
15
+
16
+
17
+ class ContactCompanySource(DomainModel):
18
+ enrichment_run_id: str = Field(min_length=1)
19
+ bucket: Literal["ready", "review", "all"] = "ready"
20
+ domains: list[str] = Field(default_factory=list)
21
+
22
+ @field_validator("domains")
23
+ @classmethod
24
+ def normalize_domains(cls, values: list[str]) -> list[str]:
25
+ normalized: list[str] = []
26
+ for value in values:
27
+ domain = canonical_domain(value)
28
+ if domain is None:
29
+ raise ValueError(f"invalid company domain: {value}")
30
+ if domain not in normalized:
31
+ normalized.append(domain)
32
+ return normalized
33
+
34
+
35
+ class ContactRoleTarget(DomainModel):
36
+ key: str = Field(min_length=2, max_length=64)
37
+ labels: list[str] = Field(min_length=1, max_length=12)
38
+ max_per_company: int = Field(default=1, ge=1, le=10)
39
+
40
+ @field_validator("key")
41
+ @classmethod
42
+ def validate_key(cls, value: str) -> str:
43
+ normalized = value.strip().lower().replace("-", "_").replace(" ", "_")
44
+ if not ROLE_KEY_PATTERN.fullmatch(normalized):
45
+ raise ValueError("role key must use lowercase letters, numbers, and underscores")
46
+ return normalized
47
+
48
+ @field_validator("labels")
49
+ @classmethod
50
+ def normalize_labels(cls, values: list[str]) -> list[str]:
51
+ labels: list[str] = []
52
+ for value in values:
53
+ label = " ".join(value.lower().split())
54
+ if len(label) < 2:
55
+ raise ValueError("role labels cannot be empty")
56
+ if label not in labels:
57
+ labels.append(label)
58
+ return labels
59
+
60
+
61
+ class ContactSearchSpec(DomainModel):
62
+ version: Literal[1] = 1
63
+ company_source: ContactCompanySource
64
+ roles: list[ContactRoleTarget] = Field(min_length=1, max_length=20)
65
+ company_limit: int | None = Field(default=None, ge=1, le=1000)
66
+ contact_limit: int | None = Field(default=None, ge=1, le=10000)
67
+ current_only: bool = True
68
+ require_role_match: bool = True
69
+ memory_freshness_days: int = Field(default=30, ge=1, le=365)
70
+
71
+ @model_validator(mode="after")
72
+ def validate_unique_roles(self) -> "ContactSearchSpec":
73
+ keys = [role.key for role in self.roles]
74
+ if len(keys) != len(set(keys)):
75
+ raise ValueError("role keys must be unique")
76
+ return self
77
+
78
+ @classmethod
79
+ def from_file(cls, path: Path) -> "ContactSearchSpec":
80
+ try:
81
+ payload = json.loads(path.read_text(encoding="utf-8"))
82
+ except FileNotFoundError as exc:
83
+ raise ValueError(f"spec file does not exist: {path}") from exc
84
+ except json.JSONDecodeError as exc:
85
+ raise ValueError(f"invalid JSON: {exc}") from exc
86
+ return cls.model_validate(payload)