leads-cli 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- company_discovery/__init__.py +4 -0
- company_discovery/adapters/__init__.py +5 -0
- company_discovery/adapters/apollo.py +189 -0
- company_discovery/adapters/exa.py +112 -0
- company_discovery/adapters/llm.py +118 -0
- company_discovery/adapters/protocols.py +58 -0
- company_discovery/adapters/website.py +154 -0
- company_discovery/bundled_skills/__init__.py +1 -0
- company_discovery/bundled_skills/company-discovery-operator/SKILL.md +72 -0
- company_discovery/bundled_skills/company-discovery-operator/agents/openai.yaml +4 -0
- company_discovery/bundled_skills/company-enrichment-operator/SKILL.md +94 -0
- company_discovery/bundled_skills/company-enrichment-operator/agents/openai.yaml +4 -0
- company_discovery/bundled_skills/company-search-spec-writer/SKILL.md +109 -0
- company_discovery/bundled_skills/company-search-spec-writer/agents/openai.yaml +4 -0
- company_discovery/bundled_skills/contact-discovery-operator/SKILL.md +80 -0
- company_discovery/bundled_skills/contact-discovery-operator/agents/openai.yaml +4 -0
- company_discovery/bundled_skills/contact-enrichment-operator/SKILL.md +86 -0
- company_discovery/bundled_skills/contact-enrichment-operator/agents/openai.yaml +4 -0
- company_discovery/bundled_skills/contact-search-spec-writer/SKILL.md +86 -0
- company_discovery/bundled_skills/contact-search-spec-writer/agents/openai.yaml +4 -0
- company_discovery/bundled_skills/leads-update-operator/SKILL.md +60 -0
- company_discovery/bundled_skills/leads-update-operator/agents/openai.yaml +4 -0
- company_discovery/cli.py +1789 -0
- company_discovery/db/__init__.py +5 -0
- company_discovery/db/contact_enrichment_repository.py +268 -0
- company_discovery/db/contact_repository.py +366 -0
- company_discovery/db/enrichment_repository.py +207 -0
- company_discovery/db/models.py +324 -0
- company_discovery/db/repository.py +363 -0
- company_discovery/db/session.py +48 -0
- company_discovery/domain/__init__.py +24 -0
- company_discovery/domain/contact_models.py +178 -0
- company_discovery/domain/contact_spec.py +86 -0
- company_discovery/domain/models.py +287 -0
- company_discovery/domain/spec.py +263 -0
- company_discovery/migrations.py +190 -0
- company_discovery/prompts/__init__.py +8 -0
- company_discovery/prompts/candidate_evaluation/system.md +13 -0
- company_discovery/prompts/company_enrichment/system.md +42 -0
- company_discovery/prompts/contact_evaluation/system.md +18 -0
- company_discovery/prompts/query_generation/system.md +10 -0
- company_discovery/release_manifest.json +7 -0
- company_discovery/reports/__init__.py +4 -0
- company_discovery/reports/contact_enrichment_exporter.py +108 -0
- company_discovery/reports/contact_exporter.py +132 -0
- company_discovery/reports/enrichment_exporter.py +125 -0
- company_discovery/reports/exporter.py +135 -0
- company_discovery/runtime.py +336 -0
- company_discovery/services/__init__.py +4 -0
- company_discovery/services/contact_enrichment_pipeline.py +344 -0
- company_discovery/services/contact_enrichment_progress.py +37 -0
- company_discovery/services/contact_evaluator.py +110 -0
- company_discovery/services/contact_pipeline.py +295 -0
- company_discovery/services/contact_progress.py +38 -0
- company_discovery/services/enrichment_extractor.py +61 -0
- company_discovery/services/enrichment_pipeline.py +526 -0
- company_discovery/services/enrichment_progress.py +20 -0
- company_discovery/services/enrichment_resolver.py +148 -0
- company_discovery/services/evaluator.py +40 -0
- company_discovery/services/hygiene.py +51 -0
- company_discovery/services/memory.py +150 -0
- company_discovery/services/normalization.py +98 -0
- company_discovery/services/pipeline.py +628 -0
- company_discovery/services/progress.py +48 -0
- company_discovery/services/query_planner.py +47 -0
- company_discovery/settings.py +152 -0
- company_discovery/skill_installer.py +197 -0
- company_discovery/update_plan.py +79 -0
- leads_cli-0.1.0.dist-info/METADATA +277 -0
- leads_cli-0.1.0.dist-info/RECORD +72 -0
- leads_cli-0.1.0.dist-info/WHEEL +4 -0
- leads_cli-0.1.0.dist-info/entry_points.txt +2 -0
|
@@ -0,0 +1,268 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from datetime import UTC, datetime, timedelta
|
|
4
|
+
from typing import Any
|
|
5
|
+
from uuid import uuid4
|
|
6
|
+
|
|
7
|
+
from sqlalchemy import select
|
|
8
|
+
from sqlalchemy.exc import IntegrityError
|
|
9
|
+
from sqlalchemy.orm import joinedload
|
|
10
|
+
|
|
11
|
+
from company_discovery.db.models import (
|
|
12
|
+
ContactCandidateRow,
|
|
13
|
+
ContactDiscoveryRunRow,
|
|
14
|
+
ContactEnrichmentFactRow,
|
|
15
|
+
ContactEnrichmentItemRow,
|
|
16
|
+
ContactEnrichmentRunRow,
|
|
17
|
+
ContactEvaluationRow,
|
|
18
|
+
EnrichmentRunRow,
|
|
19
|
+
)
|
|
20
|
+
from company_discovery.db.session import Database
|
|
21
|
+
from company_discovery.domain.contact_models import (
|
|
22
|
+
ContactChannelProfile,
|
|
23
|
+
ContactEnrichmentItem,
|
|
24
|
+
ContactEnrichmentOutcome,
|
|
25
|
+
ContactEnrichmentSummary,
|
|
26
|
+
)
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class ContactEnrichmentRunNotFoundError(LookupError):
|
|
30
|
+
pass
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
class ContactEnrichmentRepository:
|
|
34
|
+
RUN_ID_PREFIX = "contact-enrich-"
|
|
35
|
+
CREATE_RUN_ATTEMPTS = 5
|
|
36
|
+
|
|
37
|
+
def __init__(self, database: Database) -> None:
|
|
38
|
+
self.database = database
|
|
39
|
+
|
|
40
|
+
def accepted_contacts(self, contact_run_id: str) -> list[dict[str, Any]]:
|
|
41
|
+
with self.database.session() as session:
|
|
42
|
+
run = session.get(ContactDiscoveryRunRow, contact_run_id)
|
|
43
|
+
if run is None:
|
|
44
|
+
raise ContactEnrichmentRunNotFoundError(
|
|
45
|
+
f"contact discovery run not found: {contact_run_id}"
|
|
46
|
+
)
|
|
47
|
+
if run.status != "completed":
|
|
48
|
+
raise ValueError(f"contact discovery run {contact_run_id} is {run.status}, not completed")
|
|
49
|
+
rows = session.execute(
|
|
50
|
+
select(ContactEvaluationRow, ContactCandidateRow)
|
|
51
|
+
.join(ContactCandidateRow)
|
|
52
|
+
.where(
|
|
53
|
+
ContactEvaluationRow.run_id == contact_run_id,
|
|
54
|
+
ContactEvaluationRow.verdict == "accepted",
|
|
55
|
+
)
|
|
56
|
+
.order_by(ContactEvaluationRow.id)
|
|
57
|
+
).all()
|
|
58
|
+
contacts: list[dict[str, Any]] = []
|
|
59
|
+
seen: set[int] = set()
|
|
60
|
+
for evaluation, candidate in rows:
|
|
61
|
+
if candidate.id in seen:
|
|
62
|
+
continue
|
|
63
|
+
seen.add(candidate.id)
|
|
64
|
+
roles = [
|
|
65
|
+
row.role_key
|
|
66
|
+
for row in session.scalars(
|
|
67
|
+
select(ContactEvaluationRow).where(
|
|
68
|
+
ContactEvaluationRow.run_id == contact_run_id,
|
|
69
|
+
ContactEvaluationRow.candidate_id == candidate.id,
|
|
70
|
+
ContactEvaluationRow.verdict == "accepted",
|
|
71
|
+
)
|
|
72
|
+
).all()
|
|
73
|
+
]
|
|
74
|
+
contacts.append(
|
|
75
|
+
{
|
|
76
|
+
"candidate_id": candidate.id,
|
|
77
|
+
"company_name": candidate.company_name,
|
|
78
|
+
"company_domain": candidate.company_domain,
|
|
79
|
+
"full_name": candidate.full_name,
|
|
80
|
+
"normalized_name": candidate.normalized_name,
|
|
81
|
+
"title": candidate.title,
|
|
82
|
+
"linkedin_url": candidate.linkedin_url,
|
|
83
|
+
"role_keys": list(dict.fromkeys(roles)),
|
|
84
|
+
"discovery_reason": evaluation.reason,
|
|
85
|
+
"source_urls": candidate.source_urls,
|
|
86
|
+
}
|
|
87
|
+
)
|
|
88
|
+
return contacts
|
|
89
|
+
|
|
90
|
+
def create_run(self, contact_run_id: str, options: dict[str, Any]) -> str:
|
|
91
|
+
for _ in range(self.CREATE_RUN_ATTEMPTS):
|
|
92
|
+
try:
|
|
93
|
+
with self.database.session() as session:
|
|
94
|
+
if session.get(ContactDiscoveryRunRow, contact_run_id) is None:
|
|
95
|
+
raise ContactEnrichmentRunNotFoundError(
|
|
96
|
+
f"contact discovery run not found: {contact_run_id}"
|
|
97
|
+
)
|
|
98
|
+
run_id = self._new_run_id()
|
|
99
|
+
session.add(
|
|
100
|
+
ContactEnrichmentRunRow(
|
|
101
|
+
id=run_id,
|
|
102
|
+
contact_discovery_run_id=contact_run_id,
|
|
103
|
+
options_payload=options,
|
|
104
|
+
)
|
|
105
|
+
)
|
|
106
|
+
return run_id
|
|
107
|
+
except IntegrityError:
|
|
108
|
+
continue
|
|
109
|
+
raise RuntimeError("unable to allocate a unique contact enrichment run id")
|
|
110
|
+
|
|
111
|
+
def fresh_item(self, candidate_id: int, freshness_days: int) -> ContactEnrichmentItem | None:
|
|
112
|
+
cutoff = datetime.now(UTC) - timedelta(days=freshness_days)
|
|
113
|
+
with self.database.session() as session:
|
|
114
|
+
fact = session.scalar(
|
|
115
|
+
select(ContactEnrichmentFactRow)
|
|
116
|
+
.where(
|
|
117
|
+
ContactEnrichmentFactRow.candidate_id == candidate_id,
|
|
118
|
+
ContactEnrichmentFactRow.observed_at >= cutoff,
|
|
119
|
+
)
|
|
120
|
+
.order_by(ContactEnrichmentFactRow.observed_at.desc())
|
|
121
|
+
.limit(1)
|
|
122
|
+
)
|
|
123
|
+
if fact is None:
|
|
124
|
+
return None
|
|
125
|
+
candidate = session.get(ContactCandidateRow, candidate_id)
|
|
126
|
+
if candidate is None:
|
|
127
|
+
return None
|
|
128
|
+
discovery = {
|
|
129
|
+
"company_name": candidate.company_name,
|
|
130
|
+
"company_domain": candidate.company_domain,
|
|
131
|
+
"contact_name": candidate.full_name,
|
|
132
|
+
"title": candidate.title,
|
|
133
|
+
"linkedin_url": candidate.linkedin_url,
|
|
134
|
+
"role_keys": [],
|
|
135
|
+
"source_urls": candidate.source_urls,
|
|
136
|
+
}
|
|
137
|
+
return ContactEnrichmentItem(
|
|
138
|
+
candidate_id=candidate_id,
|
|
139
|
+
discovery=discovery,
|
|
140
|
+
channels=ContactChannelProfile.model_validate(fact.channels_payload),
|
|
141
|
+
outcome=ContactEnrichmentOutcome(fact.outcome),
|
|
142
|
+
review_flags=fact.review_flags,
|
|
143
|
+
trace=[{"stage": "memory", "fact_id": fact.id}],
|
|
144
|
+
)
|
|
145
|
+
|
|
146
|
+
def save_item(self, run_id: str, item: ContactEnrichmentItem) -> None:
|
|
147
|
+
with self.database.session() as session:
|
|
148
|
+
self._require_run(session, run_id)
|
|
149
|
+
session.add(
|
|
150
|
+
ContactEnrichmentItemRow(
|
|
151
|
+
run_id=run_id,
|
|
152
|
+
candidate_id=item.candidate_id,
|
|
153
|
+
discovery_snapshot=item.discovery,
|
|
154
|
+
channels_payload=item.channels.model_dump(mode="json"),
|
|
155
|
+
outcome=item.outcome.value,
|
|
156
|
+
review_flags=item.review_flags,
|
|
157
|
+
trace_payload=item.trace,
|
|
158
|
+
)
|
|
159
|
+
)
|
|
160
|
+
session.add(
|
|
161
|
+
ContactEnrichmentFactRow(
|
|
162
|
+
candidate_id=item.candidate_id,
|
|
163
|
+
enrichment_run_id=run_id,
|
|
164
|
+
channels_payload=item.channels.model_dump(mode="json"),
|
|
165
|
+
outcome=item.outcome.value,
|
|
166
|
+
review_flags=item.review_flags,
|
|
167
|
+
observed_at=item.channels.observed_at,
|
|
168
|
+
)
|
|
169
|
+
)
|
|
170
|
+
|
|
171
|
+
def complete_run(
|
|
172
|
+
self, run_id: str, summary: ContactEnrichmentSummary, paths: dict[str, str]
|
|
173
|
+
) -> None:
|
|
174
|
+
with self.database.session() as session:
|
|
175
|
+
run = self._require_run(session, run_id)
|
|
176
|
+
run.status = "completed"
|
|
177
|
+
run.summary_payload = summary.model_dump(mode="json")
|
|
178
|
+
run.artifact_paths = paths
|
|
179
|
+
run.completed_at = datetime.now(UTC)
|
|
180
|
+
|
|
181
|
+
def fail_run(self, run_id: str, error: Exception) -> None:
|
|
182
|
+
with self.database.session() as session:
|
|
183
|
+
run = self._require_run(session, run_id)
|
|
184
|
+
run.status = "failed"
|
|
185
|
+
run.error_message = str(error)
|
|
186
|
+
run.completed_at = datetime.now(UTC)
|
|
187
|
+
|
|
188
|
+
def set_artifacts(self, run_id: str, paths: dict[str, str]) -> None:
|
|
189
|
+
with self.database.session() as session:
|
|
190
|
+
self._require_run(session, run_id).artifact_paths = paths
|
|
191
|
+
|
|
192
|
+
def get_run(self, run_id: str) -> dict[str, Any]:
|
|
193
|
+
with self.database.session() as session:
|
|
194
|
+
row = session.scalar(
|
|
195
|
+
select(ContactEnrichmentRunRow)
|
|
196
|
+
.options(joinedload(ContactEnrichmentRunRow.items))
|
|
197
|
+
.where(ContactEnrichmentRunRow.id == run_id)
|
|
198
|
+
)
|
|
199
|
+
if row is None:
|
|
200
|
+
raise ContactEnrichmentRunNotFoundError(
|
|
201
|
+
f"contact enrichment run not found: {run_id}"
|
|
202
|
+
)
|
|
203
|
+
contact_run = session.get(ContactDiscoveryRunRow, row.contact_discovery_run_id)
|
|
204
|
+
if contact_run is None:
|
|
205
|
+
raise ContactEnrichmentRunNotFoundError(
|
|
206
|
+
f"source contact discovery run missing: {row.contact_discovery_run_id}"
|
|
207
|
+
)
|
|
208
|
+
company_run = session.get(EnrichmentRunRow, contact_run.enrichment_run_id)
|
|
209
|
+
if company_run is None:
|
|
210
|
+
raise ContactEnrichmentRunNotFoundError(
|
|
211
|
+
f"source company enrichment run missing: {contact_run.enrichment_run_id}"
|
|
212
|
+
)
|
|
213
|
+
return {
|
|
214
|
+
"run_id": row.id,
|
|
215
|
+
"source_contact_run_id": row.contact_discovery_run_id,
|
|
216
|
+
"source_enrichment_run_id": contact_run.enrichment_run_id,
|
|
217
|
+
"source_discovery_run_id": company_run.discovery_run_id,
|
|
218
|
+
"options": row.options_payload,
|
|
219
|
+
"status": row.status,
|
|
220
|
+
"summary": row.summary_payload,
|
|
221
|
+
"artifacts": row.artifact_paths,
|
|
222
|
+
"error": row.error_message,
|
|
223
|
+
"created_at": row.created_at.isoformat(),
|
|
224
|
+
"completed_at": row.completed_at.isoformat() if row.completed_at else None,
|
|
225
|
+
"items": [
|
|
226
|
+
{
|
|
227
|
+
"candidate_id": item.candidate_id,
|
|
228
|
+
"discovery": item.discovery_snapshot,
|
|
229
|
+
"channels": item.channels_payload,
|
|
230
|
+
"outcome": item.outcome,
|
|
231
|
+
"review_flags": item.review_flags,
|
|
232
|
+
"trace": item.trace_payload,
|
|
233
|
+
}
|
|
234
|
+
for item in row.items
|
|
235
|
+
],
|
|
236
|
+
}
|
|
237
|
+
|
|
238
|
+
def inspect_contact(self, run_id: str, person: str) -> list[dict[str, Any]]:
|
|
239
|
+
normalized = " ".join(
|
|
240
|
+
"".join(char.lower() if char.isalnum() else " " for char in person).split()
|
|
241
|
+
)
|
|
242
|
+
matches = [
|
|
243
|
+
item
|
|
244
|
+
for item in self.get_run(run_id)["items"]
|
|
245
|
+
if " ".join(
|
|
246
|
+
"".join(
|
|
247
|
+
char.lower() if char.isalnum() else " "
|
|
248
|
+
for char in item["discovery"]["contact_name"]
|
|
249
|
+
).split()
|
|
250
|
+
)
|
|
251
|
+
== normalized
|
|
252
|
+
]
|
|
253
|
+
if not matches:
|
|
254
|
+
raise LookupError(f"person {person!r} was not found in run {run_id}")
|
|
255
|
+
return matches
|
|
256
|
+
|
|
257
|
+
@staticmethod
|
|
258
|
+
def _require_run(session: Any, run_id: str) -> ContactEnrichmentRunRow:
|
|
259
|
+
row = session.get(ContactEnrichmentRunRow, run_id)
|
|
260
|
+
if row is None:
|
|
261
|
+
raise ContactEnrichmentRunNotFoundError(
|
|
262
|
+
f"contact enrichment run not found: {run_id}"
|
|
263
|
+
)
|
|
264
|
+
return row
|
|
265
|
+
|
|
266
|
+
@classmethod
|
|
267
|
+
def _new_run_id(cls) -> str:
|
|
268
|
+
return f"{cls.RUN_ID_PREFIX}{uuid4().hex[:12]}"
|
|
@@ -0,0 +1,366 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from datetime import UTC, datetime, timedelta
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
from typing import Any
|
|
6
|
+
from uuid import uuid4
|
|
7
|
+
|
|
8
|
+
from sqlalchemy import select
|
|
9
|
+
from sqlalchemy.exc import IntegrityError
|
|
10
|
+
from sqlalchemy.orm import joinedload
|
|
11
|
+
|
|
12
|
+
from company_discovery.db.models import (
|
|
13
|
+
ContactCandidateRow,
|
|
14
|
+
ContactDiscoveryQueryRow,
|
|
15
|
+
ContactDiscoveryRunRow,
|
|
16
|
+
ContactEvaluationRow,
|
|
17
|
+
EnrichmentRunRow,
|
|
18
|
+
)
|
|
19
|
+
from company_discovery.db.session import Database
|
|
20
|
+
from company_discovery.domain.contact_models import (
|
|
21
|
+
ContactCandidate,
|
|
22
|
+
ContactDiscoveryItem,
|
|
23
|
+
ContactDiscoverySummary,
|
|
24
|
+
ContactVerdict,
|
|
25
|
+
EvidenceVerdict,
|
|
26
|
+
)
|
|
27
|
+
from company_discovery.domain.contact_spec import ContactSearchSpec
|
|
28
|
+
from company_discovery.domain.models import ExaSearchResult
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class ContactRunNotFoundError(LookupError):
|
|
32
|
+
pass
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
class ContactNotFoundError(LookupError):
|
|
36
|
+
pass
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
class ContactDiscoveryRepository:
|
|
40
|
+
RUN_ID_PREFIX = "contact-discover-"
|
|
41
|
+
CREATE_RUN_ATTEMPTS = 5
|
|
42
|
+
|
|
43
|
+
def __init__(self, database: Database) -> None:
|
|
44
|
+
self.database = database
|
|
45
|
+
|
|
46
|
+
def source_companies(self, spec: ContactSearchSpec) -> list[dict[str, Any]]:
|
|
47
|
+
source = spec.company_source
|
|
48
|
+
with self.database.session() as session:
|
|
49
|
+
run = session.get(EnrichmentRunRow, source.enrichment_run_id)
|
|
50
|
+
if run is None:
|
|
51
|
+
raise ContactRunNotFoundError(
|
|
52
|
+
f"company enrichment run not found: {source.enrichment_run_id}"
|
|
53
|
+
)
|
|
54
|
+
if run.status != "completed":
|
|
55
|
+
raise ValueError(
|
|
56
|
+
f"company enrichment run {source.enrichment_run_id} is {run.status}, not completed"
|
|
57
|
+
)
|
|
58
|
+
|
|
59
|
+
allowed = {
|
|
60
|
+
"ready": {"enriched_ready"},
|
|
61
|
+
"review": {"enriched_with_gaps", "independence_unconfirmed"},
|
|
62
|
+
"all": {
|
|
63
|
+
"enriched_ready",
|
|
64
|
+
"enriched_with_gaps",
|
|
65
|
+
"independence_unconfirmed",
|
|
66
|
+
},
|
|
67
|
+
}[source.bucket]
|
|
68
|
+
selected_domains = set(source.domains)
|
|
69
|
+
companies: list[dict[str, Any]] = []
|
|
70
|
+
for item in run.items:
|
|
71
|
+
domain = item.discovery_snapshot["domain"]
|
|
72
|
+
if item.outcome not in allowed:
|
|
73
|
+
continue
|
|
74
|
+
if selected_domains and domain not in selected_domains:
|
|
75
|
+
continue
|
|
76
|
+
companies.append(
|
|
77
|
+
{
|
|
78
|
+
"company_id": item.candidate_id,
|
|
79
|
+
"company_name": item.discovery_snapshot["company_name"],
|
|
80
|
+
"company_domain": domain,
|
|
81
|
+
"vertical": item.discovery_snapshot.get("target_vertical")
|
|
82
|
+
or item.discovery_snapshot.get("vertical"),
|
|
83
|
+
"state": item.discovery_snapshot.get("state"),
|
|
84
|
+
"linkedin_url": (item.enrichment_payload.get("linkedin") or {}).get("url"),
|
|
85
|
+
"company_enrichment_outcome": item.outcome,
|
|
86
|
+
}
|
|
87
|
+
)
|
|
88
|
+
|
|
89
|
+
if selected_domains:
|
|
90
|
+
found = {company["company_domain"] for company in companies}
|
|
91
|
+
missing = sorted(selected_domains - found)
|
|
92
|
+
if missing:
|
|
93
|
+
raise ValueError(
|
|
94
|
+
"requested domains are not available in the selected company bucket: "
|
|
95
|
+
+ ", ".join(missing)
|
|
96
|
+
)
|
|
97
|
+
return companies[: spec.company_limit]
|
|
98
|
+
|
|
99
|
+
def create_run(self, spec: ContactSearchSpec, source_spec_path: Path | None) -> str:
|
|
100
|
+
for _ in range(self.CREATE_RUN_ATTEMPTS):
|
|
101
|
+
try:
|
|
102
|
+
with self.database.session() as session:
|
|
103
|
+
run_id = self._new_run_id()
|
|
104
|
+
session.add(
|
|
105
|
+
ContactDiscoveryRunRow(
|
|
106
|
+
id=run_id,
|
|
107
|
+
enrichment_run_id=spec.company_source.enrichment_run_id,
|
|
108
|
+
spec_payload=spec.model_dump(mode="json"),
|
|
109
|
+
source_spec_path=str(source_spec_path.resolve())
|
|
110
|
+
if source_spec_path
|
|
111
|
+
else None,
|
|
112
|
+
)
|
|
113
|
+
)
|
|
114
|
+
return run_id
|
|
115
|
+
except IntegrityError:
|
|
116
|
+
continue
|
|
117
|
+
raise RuntimeError("unable to allocate a unique contact discovery run id")
|
|
118
|
+
|
|
119
|
+
def fresh_contacts(
|
|
120
|
+
self,
|
|
121
|
+
company_domain: str,
|
|
122
|
+
role_key: str,
|
|
123
|
+
freshness_days: int,
|
|
124
|
+
limit: int,
|
|
125
|
+
) -> list[ContactDiscoveryItem]:
|
|
126
|
+
cutoff = datetime.now(UTC) - timedelta(days=freshness_days)
|
|
127
|
+
with self.database.session() as session:
|
|
128
|
+
rows = session.execute(
|
|
129
|
+
select(ContactEvaluationRow, ContactCandidateRow)
|
|
130
|
+
.join(ContactCandidateRow)
|
|
131
|
+
.where(
|
|
132
|
+
ContactCandidateRow.company_domain == company_domain,
|
|
133
|
+
ContactEvaluationRow.role_key == role_key,
|
|
134
|
+
ContactEvaluationRow.verdict == ContactVerdict.ACCEPTED.value,
|
|
135
|
+
ContactEvaluationRow.created_at >= cutoff,
|
|
136
|
+
)
|
|
137
|
+
.order_by(ContactEvaluationRow.created_at.desc())
|
|
138
|
+
).all()
|
|
139
|
+
found: list[ContactDiscoveryItem] = []
|
|
140
|
+
seen: set[int] = set()
|
|
141
|
+
for evaluation, candidate in rows:
|
|
142
|
+
if candidate.id in seen:
|
|
143
|
+
continue
|
|
144
|
+
seen.add(candidate.id)
|
|
145
|
+
found.append(self._item(evaluation, candidate, source="memory"))
|
|
146
|
+
if len(found) == limit:
|
|
147
|
+
break
|
|
148
|
+
return found
|
|
149
|
+
|
|
150
|
+
def add_query(
|
|
151
|
+
self,
|
|
152
|
+
run_id: str,
|
|
153
|
+
company_domain: str,
|
|
154
|
+
role_key: str,
|
|
155
|
+
query: str,
|
|
156
|
+
results: list[ExaSearchResult],
|
|
157
|
+
cost_dollars: float,
|
|
158
|
+
) -> None:
|
|
159
|
+
with self.database.session() as session:
|
|
160
|
+
self._require_run(session, run_id)
|
|
161
|
+
session.add(
|
|
162
|
+
ContactDiscoveryQueryRow(
|
|
163
|
+
run_id=run_id,
|
|
164
|
+
company_domain=company_domain,
|
|
165
|
+
role_key=role_key,
|
|
166
|
+
query_text=query,
|
|
167
|
+
result_count=len(results),
|
|
168
|
+
cost_dollars=cost_dollars,
|
|
169
|
+
raw_results=[result.model_dump(mode="json") for result in results],
|
|
170
|
+
)
|
|
171
|
+
)
|
|
172
|
+
|
|
173
|
+
def upsert_candidate(self, candidate: ContactCandidate) -> int:
|
|
174
|
+
with self.database.session() as session:
|
|
175
|
+
row = session.scalar(
|
|
176
|
+
select(ContactCandidateRow).where(
|
|
177
|
+
ContactCandidateRow.company_domain == candidate.company_domain,
|
|
178
|
+
ContactCandidateRow.identity_key == candidate.identity_key,
|
|
179
|
+
)
|
|
180
|
+
)
|
|
181
|
+
if row is None and candidate.linkedin_url:
|
|
182
|
+
row = session.scalar(
|
|
183
|
+
select(ContactCandidateRow).where(
|
|
184
|
+
ContactCandidateRow.company_domain == candidate.company_domain,
|
|
185
|
+
ContactCandidateRow.normalized_name == candidate.normalized_name,
|
|
186
|
+
ContactCandidateRow.linkedin_url.is_(None),
|
|
187
|
+
)
|
|
188
|
+
)
|
|
189
|
+
if row is None:
|
|
190
|
+
row = ContactCandidateRow(
|
|
191
|
+
company_candidate_id=candidate.company_id,
|
|
192
|
+
company_name=candidate.company_name,
|
|
193
|
+
company_domain=candidate.company_domain,
|
|
194
|
+
full_name=candidate.full_name,
|
|
195
|
+
normalized_name=candidate.normalized_name,
|
|
196
|
+
identity_key=candidate.identity_key,
|
|
197
|
+
title=candidate.title,
|
|
198
|
+
linkedin_url=candidate.linkedin_url,
|
|
199
|
+
source_urls=candidate.source_urls,
|
|
200
|
+
evidence=candidate.evidence,
|
|
201
|
+
first_seen_at=candidate.first_seen_at,
|
|
202
|
+
last_seen_at=candidate.last_seen_at,
|
|
203
|
+
)
|
|
204
|
+
session.add(row)
|
|
205
|
+
session.flush()
|
|
206
|
+
else:
|
|
207
|
+
row.company_name = candidate.company_name
|
|
208
|
+
row.full_name = candidate.full_name
|
|
209
|
+
row.identity_key = candidate.identity_key
|
|
210
|
+
row.title = candidate.title
|
|
211
|
+
row.linkedin_url = candidate.linkedin_url or row.linkedin_url
|
|
212
|
+
row.source_urls = list(dict.fromkeys([*row.source_urls, *candidate.source_urls]))
|
|
213
|
+
row.evidence = list(dict.fromkeys([*row.evidence, *candidate.evidence]))
|
|
214
|
+
row.last_seen_at = datetime.now(UTC)
|
|
215
|
+
return row.id
|
|
216
|
+
|
|
217
|
+
def record_item(self, run_id: str, item: ContactDiscoveryItem) -> None:
|
|
218
|
+
with self.database.session() as session:
|
|
219
|
+
self._require_run(session, run_id)
|
|
220
|
+
session.add(
|
|
221
|
+
ContactEvaluationRow(
|
|
222
|
+
run_id=run_id,
|
|
223
|
+
candidate_id=item.candidate_id,
|
|
224
|
+
role_key=item.role_key,
|
|
225
|
+
verdict=item.verdict.value,
|
|
226
|
+
reason=item.reason,
|
|
227
|
+
current_company_match=item.current_company_match.value,
|
|
228
|
+
role_match=item.role_match.value,
|
|
229
|
+
identity_clear=item.identity_clear,
|
|
230
|
+
source=item.source,
|
|
231
|
+
)
|
|
232
|
+
)
|
|
233
|
+
|
|
234
|
+
def complete_run(
|
|
235
|
+
self, run_id: str, summary: ContactDiscoverySummary, paths: dict[str, str]
|
|
236
|
+
) -> None:
|
|
237
|
+
with self.database.session() as session:
|
|
238
|
+
row = self._require_run(session, run_id)
|
|
239
|
+
row.status = "completed"
|
|
240
|
+
row.summary_payload = summary.model_dump(mode="json")
|
|
241
|
+
row.artifact_paths = paths
|
|
242
|
+
row.completed_at = datetime.now(UTC)
|
|
243
|
+
|
|
244
|
+
def fail_run(self, run_id: str, error: Exception) -> None:
|
|
245
|
+
with self.database.session() as session:
|
|
246
|
+
row = self._require_run(session, run_id)
|
|
247
|
+
row.status = "failed"
|
|
248
|
+
row.error_message = str(error)
|
|
249
|
+
row.completed_at = datetime.now(UTC)
|
|
250
|
+
|
|
251
|
+
def set_artifacts(self, run_id: str, paths: dict[str, str]) -> None:
|
|
252
|
+
with self.database.session() as session:
|
|
253
|
+
self._require_run(session, run_id).artifact_paths = paths
|
|
254
|
+
|
|
255
|
+
def get_run(self, run_id: str) -> dict[str, Any]:
|
|
256
|
+
with self.database.session() as session:
|
|
257
|
+
row = session.execute(
|
|
258
|
+
select(ContactDiscoveryRunRow)
|
|
259
|
+
.options(
|
|
260
|
+
joinedload(ContactDiscoveryRunRow.queries),
|
|
261
|
+
joinedload(ContactDiscoveryRunRow.evaluations).joinedload(
|
|
262
|
+
ContactEvaluationRow.candidate
|
|
263
|
+
),
|
|
264
|
+
)
|
|
265
|
+
.where(ContactDiscoveryRunRow.id == run_id)
|
|
266
|
+
).unique().scalar_one_or_none()
|
|
267
|
+
if row is None:
|
|
268
|
+
raise ContactRunNotFoundError(f"contact discovery run not found: {run_id}")
|
|
269
|
+
enrichment_run = session.get(EnrichmentRunRow, row.enrichment_run_id)
|
|
270
|
+
if enrichment_run is None:
|
|
271
|
+
raise ContactRunNotFoundError(
|
|
272
|
+
"source company enrichment run not found for contact discovery run "
|
|
273
|
+
f"{run_id}: {row.enrichment_run_id}"
|
|
274
|
+
)
|
|
275
|
+
return {
|
|
276
|
+
"run_id": row.id,
|
|
277
|
+
"source_enrichment_run_id": row.enrichment_run_id,
|
|
278
|
+
"source_discovery_run_id": enrichment_run.discovery_run_id,
|
|
279
|
+
"spec": row.spec_payload,
|
|
280
|
+
"source_spec_path": row.source_spec_path,
|
|
281
|
+
"status": row.status,
|
|
282
|
+
"summary": row.summary_payload,
|
|
283
|
+
"artifacts": row.artifact_paths,
|
|
284
|
+
"error": row.error_message,
|
|
285
|
+
"created_at": row.created_at.isoformat(),
|
|
286
|
+
"completed_at": row.completed_at.isoformat() if row.completed_at else None,
|
|
287
|
+
"queries": [
|
|
288
|
+
{
|
|
289
|
+
"company_domain": query.company_domain,
|
|
290
|
+
"role_key": query.role_key,
|
|
291
|
+
"query": query.query_text,
|
|
292
|
+
"result_count": query.result_count,
|
|
293
|
+
"cost_dollars": query.cost_dollars,
|
|
294
|
+
"raw_results": query.raw_results,
|
|
295
|
+
}
|
|
296
|
+
for query in row.queries
|
|
297
|
+
],
|
|
298
|
+
"items": [
|
|
299
|
+
self._item(evaluation, evaluation.candidate).model_dump(mode="json")
|
|
300
|
+
for evaluation in row.evaluations
|
|
301
|
+
],
|
|
302
|
+
}
|
|
303
|
+
|
|
304
|
+
def inspect_contact(self, run_id: str, person: str) -> list[dict[str, Any]]:
|
|
305
|
+
normalized = normalize_person_name(person)
|
|
306
|
+
payload = self.get_run(run_id)
|
|
307
|
+
matches = [
|
|
308
|
+
item
|
|
309
|
+
for item in payload["items"]
|
|
310
|
+
if item["candidate"]["normalized_name"] == normalized
|
|
311
|
+
]
|
|
312
|
+
if not matches:
|
|
313
|
+
raise ContactNotFoundError(f"person {person!r} was not found in run {run_id}")
|
|
314
|
+
return matches
|
|
315
|
+
|
|
316
|
+
@staticmethod
|
|
317
|
+
def _item(
|
|
318
|
+
evaluation: ContactEvaluationRow,
|
|
319
|
+
candidate: ContactCandidateRow,
|
|
320
|
+
source: str | None = None,
|
|
321
|
+
) -> ContactDiscoveryItem:
|
|
322
|
+
return ContactDiscoveryItem(
|
|
323
|
+
candidate_id=candidate.id,
|
|
324
|
+
candidate=ContactCandidate(
|
|
325
|
+
company_id=candidate.company_candidate_id,
|
|
326
|
+
company_name=candidate.company_name,
|
|
327
|
+
company_domain=candidate.company_domain,
|
|
328
|
+
full_name=candidate.full_name,
|
|
329
|
+
normalized_name=candidate.normalized_name,
|
|
330
|
+
identity_key=candidate.identity_key,
|
|
331
|
+
title=candidate.title,
|
|
332
|
+
linkedin_url=candidate.linkedin_url,
|
|
333
|
+
source_urls=candidate.source_urls,
|
|
334
|
+
evidence=candidate.evidence,
|
|
335
|
+
first_seen_at=candidate.first_seen_at,
|
|
336
|
+
last_seen_at=candidate.last_seen_at,
|
|
337
|
+
),
|
|
338
|
+
role_key=evaluation.role_key,
|
|
339
|
+
verdict=ContactVerdict(evaluation.verdict),
|
|
340
|
+
reason=evaluation.reason,
|
|
341
|
+
current_company_match=EvidenceVerdict(evaluation.current_company_match),
|
|
342
|
+
role_match=EvidenceVerdict(evaluation.role_match),
|
|
343
|
+
identity_clear=evaluation.identity_clear,
|
|
344
|
+
source=source or evaluation.source,
|
|
345
|
+
)
|
|
346
|
+
|
|
347
|
+
@staticmethod
|
|
348
|
+
def _require_run(session: Any, run_id: str) -> ContactDiscoveryRunRow:
|
|
349
|
+
row = session.get(ContactDiscoveryRunRow, run_id)
|
|
350
|
+
if row is None:
|
|
351
|
+
raise ContactRunNotFoundError(f"contact discovery run not found: {run_id}")
|
|
352
|
+
return row
|
|
353
|
+
|
|
354
|
+
@classmethod
|
|
355
|
+
def _new_run_id(cls) -> str:
|
|
356
|
+
return f"{cls.RUN_ID_PREFIX}{uuid4().hex[:12]}"
|
|
357
|
+
|
|
358
|
+
|
|
359
|
+
def normalize_person_name(value: str) -> str:
|
|
360
|
+
return " ".join("".join(char.lower() if char.isalnum() else " " for char in value).split())
|
|
361
|
+
|
|
362
|
+
|
|
363
|
+
def contact_identity_key(normalized_name: str, linkedin_url: str | None) -> str:
|
|
364
|
+
if linkedin_url:
|
|
365
|
+
return f"linkedin:{linkedin_url.lower().split('?', 1)[0].rstrip('/')}"
|
|
366
|
+
return f"name:{normalized_name}"
|