leads-cli 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- company_discovery/__init__.py +4 -0
- company_discovery/adapters/__init__.py +5 -0
- company_discovery/adapters/apollo.py +189 -0
- company_discovery/adapters/exa.py +112 -0
- company_discovery/adapters/llm.py +118 -0
- company_discovery/adapters/protocols.py +58 -0
- company_discovery/adapters/website.py +154 -0
- company_discovery/bundled_skills/__init__.py +1 -0
- company_discovery/bundled_skills/company-discovery-operator/SKILL.md +72 -0
- company_discovery/bundled_skills/company-discovery-operator/agents/openai.yaml +4 -0
- company_discovery/bundled_skills/company-enrichment-operator/SKILL.md +94 -0
- company_discovery/bundled_skills/company-enrichment-operator/agents/openai.yaml +4 -0
- company_discovery/bundled_skills/company-search-spec-writer/SKILL.md +109 -0
- company_discovery/bundled_skills/company-search-spec-writer/agents/openai.yaml +4 -0
- company_discovery/bundled_skills/contact-discovery-operator/SKILL.md +80 -0
- company_discovery/bundled_skills/contact-discovery-operator/agents/openai.yaml +4 -0
- company_discovery/bundled_skills/contact-enrichment-operator/SKILL.md +86 -0
- company_discovery/bundled_skills/contact-enrichment-operator/agents/openai.yaml +4 -0
- company_discovery/bundled_skills/contact-search-spec-writer/SKILL.md +86 -0
- company_discovery/bundled_skills/contact-search-spec-writer/agents/openai.yaml +4 -0
- company_discovery/bundled_skills/leads-update-operator/SKILL.md +60 -0
- company_discovery/bundled_skills/leads-update-operator/agents/openai.yaml +4 -0
- company_discovery/cli.py +1789 -0
- company_discovery/db/__init__.py +5 -0
- company_discovery/db/contact_enrichment_repository.py +268 -0
- company_discovery/db/contact_repository.py +366 -0
- company_discovery/db/enrichment_repository.py +207 -0
- company_discovery/db/models.py +324 -0
- company_discovery/db/repository.py +363 -0
- company_discovery/db/session.py +48 -0
- company_discovery/domain/__init__.py +24 -0
- company_discovery/domain/contact_models.py +178 -0
- company_discovery/domain/contact_spec.py +86 -0
- company_discovery/domain/models.py +287 -0
- company_discovery/domain/spec.py +263 -0
- company_discovery/migrations.py +190 -0
- company_discovery/prompts/__init__.py +8 -0
- company_discovery/prompts/candidate_evaluation/system.md +13 -0
- company_discovery/prompts/company_enrichment/system.md +42 -0
- company_discovery/prompts/contact_evaluation/system.md +18 -0
- company_discovery/prompts/query_generation/system.md +10 -0
- company_discovery/release_manifest.json +7 -0
- company_discovery/reports/__init__.py +4 -0
- company_discovery/reports/contact_enrichment_exporter.py +108 -0
- company_discovery/reports/contact_exporter.py +132 -0
- company_discovery/reports/enrichment_exporter.py +125 -0
- company_discovery/reports/exporter.py +135 -0
- company_discovery/runtime.py +336 -0
- company_discovery/services/__init__.py +4 -0
- company_discovery/services/contact_enrichment_pipeline.py +344 -0
- company_discovery/services/contact_enrichment_progress.py +37 -0
- company_discovery/services/contact_evaluator.py +110 -0
- company_discovery/services/contact_pipeline.py +295 -0
- company_discovery/services/contact_progress.py +38 -0
- company_discovery/services/enrichment_extractor.py +61 -0
- company_discovery/services/enrichment_pipeline.py +526 -0
- company_discovery/services/enrichment_progress.py +20 -0
- company_discovery/services/enrichment_resolver.py +148 -0
- company_discovery/services/evaluator.py +40 -0
- company_discovery/services/hygiene.py +51 -0
- company_discovery/services/memory.py +150 -0
- company_discovery/services/normalization.py +98 -0
- company_discovery/services/pipeline.py +628 -0
- company_discovery/services/progress.py +48 -0
- company_discovery/services/query_planner.py +47 -0
- company_discovery/settings.py +152 -0
- company_discovery/skill_installer.py +197 -0
- company_discovery/update_plan.py +79 -0
- leads_cli-0.1.0.dist-info/METADATA +277 -0
- leads_cli-0.1.0.dist-info/RECORD +72 -0
- leads_cli-0.1.0.dist-info/WHEEL +4 -0
- leads_cli-0.1.0.dist-info/entry_points.txt +2 -0
|
@@ -0,0 +1,287 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from datetime import UTC, datetime
|
|
4
|
+
from enum import StrEnum
|
|
5
|
+
from typing import Any
|
|
6
|
+
|
|
7
|
+
from pydantic import BaseModel, ConfigDict, Field, model_validator
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class DomainModel(BaseModel):
|
|
11
|
+
model_config = ConfigDict(extra="forbid", str_strip_whitespace=True)
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class FitVerdict(StrEnum):
|
|
15
|
+
GOOD = "good_fit"
|
|
16
|
+
POSSIBLE = "possible_fit"
|
|
17
|
+
BAD = "bad_fit"
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class MatchVerdict(StrEnum):
|
|
21
|
+
YES = "yes"
|
|
22
|
+
LIKELY = "likely"
|
|
23
|
+
UNKNOWN = "unknown"
|
|
24
|
+
NO = "no"
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class ExclusionVerdict(StrEnum):
|
|
28
|
+
YES = "yes"
|
|
29
|
+
POSSIBLE = "possible"
|
|
30
|
+
NO = "no"
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
class CandidateBucket(StrEnum):
|
|
34
|
+
SELECTED = "selected"
|
|
35
|
+
RESERVE = "reserve"
|
|
36
|
+
REJECTED = "rejected"
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
class QueryPlan(DomainModel):
|
|
40
|
+
queries: list[str] = Field(min_length=1, max_length=20)
|
|
41
|
+
rationale: str
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
class ExaSearchResult(DomainModel):
|
|
45
|
+
query: str
|
|
46
|
+
position: int = Field(ge=1)
|
|
47
|
+
title: str
|
|
48
|
+
url: str
|
|
49
|
+
text: str | None = None
|
|
50
|
+
published_date: str | None = None
|
|
51
|
+
exa_id: str | None = None
|
|
52
|
+
raw: dict[str, Any] = Field(default_factory=dict)
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
class SourceSighting(DomainModel):
|
|
56
|
+
query: str
|
|
57
|
+
url: str
|
|
58
|
+
title: str
|
|
59
|
+
text: str | None = None
|
|
60
|
+
exa_id: str | None = None
|
|
61
|
+
raw: dict[str, Any] = Field(default_factory=dict)
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
class NormalizedCandidate(DomainModel):
|
|
65
|
+
company_name: str = Field(min_length=1)
|
|
66
|
+
domain: str = Field(min_length=1)
|
|
67
|
+
dedupe_key: str = Field(min_length=1)
|
|
68
|
+
vertical: str | None = None
|
|
69
|
+
country: str | None = None
|
|
70
|
+
state: str | None = None
|
|
71
|
+
employee_min: int | None = Field(default=None, ge=1)
|
|
72
|
+
employee_max: int | None = Field(default=None, ge=1)
|
|
73
|
+
ownership_type: str | None = None
|
|
74
|
+
excluded: bool = False
|
|
75
|
+
sightings: list[SourceSighting] = Field(default_factory=list)
|
|
76
|
+
first_seen_at: datetime = Field(default_factory=lambda: datetime.now(UTC))
|
|
77
|
+
last_seen_at: datetime = Field(default_factory=lambda: datetime.now(UTC))
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
class CandidateEvaluation(DomainModel):
|
|
81
|
+
company_name: str
|
|
82
|
+
domain: str
|
|
83
|
+
fit: FitVerdict
|
|
84
|
+
vertical_match: MatchVerdict
|
|
85
|
+
geography_match: MatchVerdict
|
|
86
|
+
size_match: MatchVerdict
|
|
87
|
+
excluded: ExclusionVerdict
|
|
88
|
+
reason: str = Field(min_length=1)
|
|
89
|
+
reason_codes: list[str]
|
|
90
|
+
evidence: list[str]
|
|
91
|
+
inferred_vertical: str | None
|
|
92
|
+
inferred_country: str | None
|
|
93
|
+
inferred_state: str | None
|
|
94
|
+
inferred_employee_min: int | None = Field(ge=1)
|
|
95
|
+
inferred_employee_max: int | None = Field(ge=1)
|
|
96
|
+
inferred_ownership_type: str | None
|
|
97
|
+
target_vertical: str | None = None
|
|
98
|
+
|
|
99
|
+
@model_validator(mode="after")
|
|
100
|
+
def validate_consistency(self) -> "CandidateEvaluation":
|
|
101
|
+
hard_mismatch = any(
|
|
102
|
+
verdict == MatchVerdict.NO
|
|
103
|
+
for verdict in (self.vertical_match, self.geography_match, self.size_match)
|
|
104
|
+
)
|
|
105
|
+
if self.fit == FitVerdict.GOOD and (
|
|
106
|
+
hard_mismatch or self.excluded != ExclusionVerdict.NO
|
|
107
|
+
):
|
|
108
|
+
raise ValueError("good_fit cannot contain a hard mismatch or exclusion")
|
|
109
|
+
if self.excluded == ExclusionVerdict.YES and self.fit != FitVerdict.BAD:
|
|
110
|
+
raise ValueError("an excluded candidate must be bad_fit")
|
|
111
|
+
if (
|
|
112
|
+
self.inferred_employee_min is not None
|
|
113
|
+
and self.inferred_employee_max is not None
|
|
114
|
+
and self.inferred_employee_min > self.inferred_employee_max
|
|
115
|
+
):
|
|
116
|
+
raise ValueError("inferred employee minimum cannot exceed maximum")
|
|
117
|
+
return self
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
class BucketedCandidate(DomainModel):
|
|
121
|
+
candidate: NormalizedCandidate
|
|
122
|
+
evaluation: CandidateEvaluation
|
|
123
|
+
bucket: CandidateBucket
|
|
124
|
+
source: str
|
|
125
|
+
target_vertical: str | None = None
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
class RunSummary(DomainModel):
|
|
129
|
+
memory_matched: int = 0
|
|
130
|
+
memory_reused: int = 0
|
|
131
|
+
memory_rechecked: int = 0
|
|
132
|
+
memory_skipped: int = 0
|
|
133
|
+
external_gap: int = 0
|
|
134
|
+
queries_generated: int = 0
|
|
135
|
+
raw_results: int = 0
|
|
136
|
+
unique_candidates: int = 0
|
|
137
|
+
hygiene_rejected: int = 0
|
|
138
|
+
selected: int = 0
|
|
139
|
+
reserve: int = 0
|
|
140
|
+
rejected: int = 0
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
class RunResult(DomainModel):
|
|
144
|
+
run_id: str
|
|
145
|
+
summary: RunSummary
|
|
146
|
+
queries: list[str]
|
|
147
|
+
candidates: list[BucketedCandidate]
|
|
148
|
+
artifact_paths: dict[str, str] = Field(default_factory=dict)
|
|
149
|
+
|
|
150
|
+
|
|
151
|
+
class IndependenceStatus(StrEnum):
|
|
152
|
+
YES = "yes"
|
|
153
|
+
NO = "no"
|
|
154
|
+
UNKNOWN = "unknown"
|
|
155
|
+
|
|
156
|
+
|
|
157
|
+
class InheritedFieldStatus(StrEnum):
|
|
158
|
+
INHERITED = "inherited"
|
|
159
|
+
CONFIRMED = "confirmed"
|
|
160
|
+
CONFLICT = "conflict"
|
|
161
|
+
|
|
162
|
+
|
|
163
|
+
class EnrichmentOutcome(StrEnum):
|
|
164
|
+
READY = "enriched_ready"
|
|
165
|
+
GAPS = "enriched_with_gaps"
|
|
166
|
+
INDEPENDENCE_UNCONFIRMED = "independence_unconfirmed"
|
|
167
|
+
IDENTITY_CONFLICT = "identity_conflict"
|
|
168
|
+
GEOGRAPHY_CONFLICT = "geography_conflict"
|
|
169
|
+
FIT_CONFLICT = "fit_conflict"
|
|
170
|
+
FAILED = "enrichment_failed"
|
|
171
|
+
|
|
172
|
+
|
|
173
|
+
class WebsitePage(DomainModel):
|
|
174
|
+
url: str
|
|
175
|
+
title: str = ""
|
|
176
|
+
text: str
|
|
177
|
+
page_type: str = "other"
|
|
178
|
+
linkedin_urls: list[str] = Field(default_factory=list)
|
|
179
|
+
|
|
180
|
+
|
|
181
|
+
class PhoneObservation(DomainModel):
|
|
182
|
+
value: str
|
|
183
|
+
label: str | None = None
|
|
184
|
+
source_url: str
|
|
185
|
+
|
|
186
|
+
|
|
187
|
+
class LocationObservation(DomainModel):
|
|
188
|
+
street_address: str
|
|
189
|
+
city: str
|
|
190
|
+
state: str
|
|
191
|
+
zip: str
|
|
192
|
+
country: str = "US"
|
|
193
|
+
label: str | None = None
|
|
194
|
+
source_url: str
|
|
195
|
+
|
|
196
|
+
|
|
197
|
+
class OwnershipSignal(DomainModel):
|
|
198
|
+
kind: str
|
|
199
|
+
statement: str
|
|
200
|
+
source_url: str
|
|
201
|
+
|
|
202
|
+
|
|
203
|
+
class LinkedInObservation(DomainModel):
|
|
204
|
+
url: str
|
|
205
|
+
source_url: str
|
|
206
|
+
|
|
207
|
+
|
|
208
|
+
class EnrichmentExtraction(DomainModel):
|
|
209
|
+
observed_company_name: str | None = None
|
|
210
|
+
identity_conflict: bool = False
|
|
211
|
+
identity_conflict_reason: str | None = None
|
|
212
|
+
phones: list[PhoneObservation] = Field(default_factory=list)
|
|
213
|
+
locations: list[LocationObservation] = Field(default_factory=list)
|
|
214
|
+
ownership_signals: list[OwnershipSignal] = Field(default_factory=list)
|
|
215
|
+
linkedin_profiles: list[LinkedInObservation] = Field(default_factory=list)
|
|
216
|
+
|
|
217
|
+
|
|
218
|
+
class PhoneFact(DomainModel):
|
|
219
|
+
value: str
|
|
220
|
+
display_value: str
|
|
221
|
+
source: str
|
|
222
|
+
source_url: str
|
|
223
|
+
observed_at: datetime = Field(default_factory=lambda: datetime.now(UTC))
|
|
224
|
+
|
|
225
|
+
|
|
226
|
+
class LocationFact(DomainModel):
|
|
227
|
+
street_address: str
|
|
228
|
+
city: str
|
|
229
|
+
state: str
|
|
230
|
+
zip: str
|
|
231
|
+
country: str = "US"
|
|
232
|
+
source: str
|
|
233
|
+
source_url: str
|
|
234
|
+
observed_at: datetime = Field(default_factory=lambda: datetime.now(UTC))
|
|
235
|
+
|
|
236
|
+
|
|
237
|
+
class IndependenceFact(DomainModel):
|
|
238
|
+
status: IndependenceStatus
|
|
239
|
+
evidence: list[str] = Field(default_factory=list)
|
|
240
|
+
source_urls: list[str] = Field(default_factory=list)
|
|
241
|
+
signal_kinds: list[str] = Field(default_factory=list)
|
|
242
|
+
observed_at: datetime = Field(default_factory=lambda: datetime.now(UTC))
|
|
243
|
+
|
|
244
|
+
|
|
245
|
+
class LinkedInFact(DomainModel):
|
|
246
|
+
url: str
|
|
247
|
+
source: str
|
|
248
|
+
source_url: str
|
|
249
|
+
observed_at: datetime = Field(default_factory=lambda: datetime.now(UTC))
|
|
250
|
+
|
|
251
|
+
|
|
252
|
+
class EnrichmentProfile(DomainModel):
|
|
253
|
+
phone: PhoneFact | None = None
|
|
254
|
+
location: LocationFact | None = None
|
|
255
|
+
independence: IndependenceFact | None = None
|
|
256
|
+
linkedin: LinkedInFact | None = None
|
|
257
|
+
|
|
258
|
+
|
|
259
|
+
class EnrichmentItem(DomainModel):
|
|
260
|
+
company_id: int
|
|
261
|
+
discovery: dict[str, Any]
|
|
262
|
+
enrichment: EnrichmentProfile
|
|
263
|
+
inherited_status: dict[str, InheritedFieldStatus]
|
|
264
|
+
outcome: EnrichmentOutcome
|
|
265
|
+
conflicts: list[str] = Field(default_factory=list)
|
|
266
|
+
review_flags: list[str] = Field(default_factory=list)
|
|
267
|
+
trace: list[dict[str, Any]] = Field(default_factory=list)
|
|
268
|
+
|
|
269
|
+
|
|
270
|
+
class EnrichmentSummary(DomainModel):
|
|
271
|
+
processed: int = 0
|
|
272
|
+
inherited_facts: int = 0
|
|
273
|
+
memory_profiles_reused: int = 0
|
|
274
|
+
websites_fetched: int = 0
|
|
275
|
+
fallback_searches: int = 0
|
|
276
|
+
ready: int = 0
|
|
277
|
+
review: int = 0
|
|
278
|
+
blocked: int = 0
|
|
279
|
+
failed: int = 0
|
|
280
|
+
|
|
281
|
+
|
|
282
|
+
class EnrichmentRunResult(DomainModel):
|
|
283
|
+
run_id: str
|
|
284
|
+
discovery_run_id: str
|
|
285
|
+
summary: EnrichmentSummary
|
|
286
|
+
items: list[EnrichmentItem]
|
|
287
|
+
artifact_paths: dict[str, str] = Field(default_factory=dict)
|
|
@@ -0,0 +1,263 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
import math
|
|
5
|
+
from enum import StrEnum
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
from typing import Literal
|
|
8
|
+
|
|
9
|
+
from pydantic import AliasChoices, BaseModel, ConfigDict, Field, field_validator, model_validator
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
US_STATE_CODES = frozenset(
|
|
13
|
+
{
|
|
14
|
+
"AL", "AK", "AZ", "AR", "CA", "CO", "CT", "DE", "FL", "GA",
|
|
15
|
+
"HI", "ID", "IL", "IN", "IA", "KS", "KY", "LA", "ME", "MD",
|
|
16
|
+
"MA", "MI", "MN", "MS", "MO", "MT", "NE", "NV", "NH", "NJ",
|
|
17
|
+
"NM", "NY", "NC", "ND", "OH", "OK", "OR", "PA", "RI", "SC",
|
|
18
|
+
"SD", "TN", "TX", "UT", "VT", "VA", "WA", "WV", "WI", "WY",
|
|
19
|
+
"DC",
|
|
20
|
+
}
|
|
21
|
+
)
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class StrictModel(BaseModel):
|
|
25
|
+
model_config = ConfigDict(extra="forbid", str_strip_whitespace=True)
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class NoveltyMode(StrEnum):
|
|
29
|
+
UNUSED_MEMORY = "unused_memory"
|
|
30
|
+
ONLY_NEW = "only_new"
|
|
31
|
+
FULL_MEMORY = "full_memory"
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
class BalanceMode(StrEnum):
|
|
35
|
+
SOFT = "soft"
|
|
36
|
+
STRICT = "strict"
|
|
37
|
+
NONE = "none"
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
class VerticalSpec(StrictModel):
|
|
41
|
+
key: str = Field(min_length=1, pattern=r"^[a-z0-9][a-z0-9_-]*$")
|
|
42
|
+
label: str = Field(min_length=1)
|
|
43
|
+
search_terms: list[str] = Field(
|
|
44
|
+
default_factory=list,
|
|
45
|
+
validation_alias=AliasChoices("search_terms", "seed_terms"),
|
|
46
|
+
)
|
|
47
|
+
exclude_terms: list[str] = Field(
|
|
48
|
+
default_factory=list,
|
|
49
|
+
validation_alias=AliasChoices("exclude_terms", "anti_terms"),
|
|
50
|
+
)
|
|
51
|
+
legacy_mode: str | None = Field(
|
|
52
|
+
default=None,
|
|
53
|
+
exclude=True,
|
|
54
|
+
validation_alias=AliasChoices("legacy_mode", "mode"),
|
|
55
|
+
)
|
|
56
|
+
|
|
57
|
+
@field_validator("key", mode="before")
|
|
58
|
+
@classmethod
|
|
59
|
+
def normalize_key(cls, value: str) -> str:
|
|
60
|
+
return value.strip().lower()
|
|
61
|
+
|
|
62
|
+
@field_validator("search_terms", "exclude_terms")
|
|
63
|
+
@classmethod
|
|
64
|
+
def normalize_terms(cls, values: list[str]) -> list[str]:
|
|
65
|
+
return list(dict.fromkeys(value.strip().lower() for value in values if value.strip()))
|
|
66
|
+
|
|
67
|
+
@field_validator("legacy_mode")
|
|
68
|
+
@classmethod
|
|
69
|
+
def normalize_legacy_mode(cls, value: str | None) -> str | None:
|
|
70
|
+
if value is None:
|
|
71
|
+
return value
|
|
72
|
+
normalized = value.strip().lower()
|
|
73
|
+
if normalized not in {"known", "exploratory"}:
|
|
74
|
+
raise ValueError("legacy vertical mode must be known or exploratory")
|
|
75
|
+
return normalized
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
class GeographySpec(StrictModel):
|
|
79
|
+
country: str = Field(default="US", min_length=2, max_length=2)
|
|
80
|
+
states: list[str] = Field(default_factory=list)
|
|
81
|
+
|
|
82
|
+
@field_validator("country")
|
|
83
|
+
@classmethod
|
|
84
|
+
def normalize_country(cls, value: str) -> str:
|
|
85
|
+
return value.upper()
|
|
86
|
+
|
|
87
|
+
@field_validator("states")
|
|
88
|
+
@classmethod
|
|
89
|
+
def normalize_states(cls, values: list[str]) -> list[str]:
|
|
90
|
+
return list(dict.fromkeys(value.upper() for value in values))
|
|
91
|
+
|
|
92
|
+
@model_validator(mode="after")
|
|
93
|
+
def validate_states(self) -> "GeographySpec":
|
|
94
|
+
if self.country == "US":
|
|
95
|
+
invalid = sorted(set(self.states) - US_STATE_CODES)
|
|
96
|
+
if invalid:
|
|
97
|
+
raise ValueError(f"invalid US state codes: {', '.join(invalid)}")
|
|
98
|
+
return self
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
class CompanySizeSpec(StrictModel):
|
|
102
|
+
employee_min: int | None = Field(default=None, ge=1)
|
|
103
|
+
employee_max: int | None = Field(default=None, ge=1)
|
|
104
|
+
|
|
105
|
+
@model_validator(mode="after")
|
|
106
|
+
def validate_range(self) -> "CompanySizeSpec":
|
|
107
|
+
if (
|
|
108
|
+
self.employee_min is not None
|
|
109
|
+
and self.employee_max is not None
|
|
110
|
+
and self.employee_min > self.employee_max
|
|
111
|
+
):
|
|
112
|
+
raise ValueError("employee_min cannot exceed employee_max")
|
|
113
|
+
return self
|
|
114
|
+
|
|
115
|
+
@property
|
|
116
|
+
def is_unbounded(self) -> bool:
|
|
117
|
+
return self.employee_min is None and self.employee_max is None
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
class IncludeSpec(StrictModel):
|
|
121
|
+
keywords: list[str] = Field(default_factory=list)
|
|
122
|
+
subtypes: list[str] = Field(default_factory=list)
|
|
123
|
+
|
|
124
|
+
@field_validator("keywords", "subtypes")
|
|
125
|
+
@classmethod
|
|
126
|
+
def normalize_values(cls, values: list[str]) -> list[str]:
|
|
127
|
+
return list(dict.fromkeys(value.strip().lower() for value in values if value.strip()))
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
class OwnershipSignalKind(StrEnum):
|
|
131
|
+
FAMILY_OWNED = "family_owned"
|
|
132
|
+
FRANCHISE = "franchise"
|
|
133
|
+
PARENT = "parent"
|
|
134
|
+
SUBSIDIARY = "subsidiary"
|
|
135
|
+
DIVISION = "division"
|
|
136
|
+
ACQUIRED = "acquired"
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
class StructuredExcludeSpec(StrictModel):
|
|
140
|
+
ownership_signals: list[OwnershipSignalKind] = Field(default_factory=list)
|
|
141
|
+
|
|
142
|
+
@field_validator("ownership_signals", mode="before")
|
|
143
|
+
@classmethod
|
|
144
|
+
def normalize_ownership_signals(cls, values: object) -> object:
|
|
145
|
+
if not isinstance(values, list):
|
|
146
|
+
return values
|
|
147
|
+
return list(
|
|
148
|
+
dict.fromkeys(
|
|
149
|
+
value.strip().lower() if isinstance(value, str) else value
|
|
150
|
+
for value in values
|
|
151
|
+
)
|
|
152
|
+
)
|
|
153
|
+
|
|
154
|
+
|
|
155
|
+
class ExcludeSpec(StrictModel):
|
|
156
|
+
keywords: list[str] = Field(default_factory=list)
|
|
157
|
+
ownership_types: list[str] = Field(default_factory=list)
|
|
158
|
+
company_patterns: list[str] = Field(default_factory=list)
|
|
159
|
+
structured: StructuredExcludeSpec = Field(default_factory=StructuredExcludeSpec)
|
|
160
|
+
|
|
161
|
+
@field_validator("keywords", "ownership_types", "company_patterns")
|
|
162
|
+
@classmethod
|
|
163
|
+
def normalize_values(cls, values: list[str]) -> list[str]:
|
|
164
|
+
return list(dict.fromkeys(value.strip().lower() for value in values if value.strip()))
|
|
165
|
+
|
|
166
|
+
|
|
167
|
+
class ExternalSearchSpec(StrictModel):
|
|
168
|
+
exa_searches: int = Field(default=8, ge=1, le=20)
|
|
169
|
+
results_per_search: int = Field(default=5, ge=1, le=100)
|
|
170
|
+
|
|
171
|
+
|
|
172
|
+
class CompanySearchSpec(StrictModel):
|
|
173
|
+
version: Literal[1]
|
|
174
|
+
count: int = Field(ge=1, le=1000)
|
|
175
|
+
verticals: list[VerticalSpec] = Field(
|
|
176
|
+
min_length=1,
|
|
177
|
+
validation_alias=AliasChoices("verticals", "vertical"),
|
|
178
|
+
)
|
|
179
|
+
geography: GeographySpec = Field(default_factory=GeographySpec)
|
|
180
|
+
company_size: CompanySizeSpec = Field(default_factory=CompanySizeSpec)
|
|
181
|
+
include: IncludeSpec = Field(default_factory=IncludeSpec)
|
|
182
|
+
exclude: ExcludeSpec = Field(default_factory=ExcludeSpec)
|
|
183
|
+
novelty_mode: NoveltyMode = NoveltyMode.UNUSED_MEMORY
|
|
184
|
+
reserve_ratio: float = Field(default=0.5, ge=0, le=2)
|
|
185
|
+
balance_mode: BalanceMode = BalanceMode.SOFT
|
|
186
|
+
external_search: ExternalSearchSpec = Field(default_factory=ExternalSearchSpec)
|
|
187
|
+
|
|
188
|
+
@field_validator("verticals", mode="before")
|
|
189
|
+
@classmethod
|
|
190
|
+
def accept_single_vertical(cls, value: object) -> object:
|
|
191
|
+
return [value] if isinstance(value, dict) else value
|
|
192
|
+
|
|
193
|
+
@field_validator("novelty_mode", mode="before")
|
|
194
|
+
@classmethod
|
|
195
|
+
def migrate_legacy_novelty_modes(cls, value: object) -> object:
|
|
196
|
+
# Keep persisted v1 specs and old agent-generated files runnable.
|
|
197
|
+
return {
|
|
198
|
+
"prefer_new": NoveltyMode.UNUSED_MEMORY,
|
|
199
|
+
"allow_known": NoveltyMode.FULL_MEMORY,
|
|
200
|
+
}.get(value, value)
|
|
201
|
+
|
|
202
|
+
@field_validator("verticals")
|
|
203
|
+
@classmethod
|
|
204
|
+
def unique_verticals(cls, values: list[VerticalSpec]) -> list[VerticalSpec]:
|
|
205
|
+
keys = [vertical.key for vertical in values]
|
|
206
|
+
if len(keys) != len(set(keys)):
|
|
207
|
+
raise ValueError("vertical keys must be unique")
|
|
208
|
+
return values
|
|
209
|
+
|
|
210
|
+
@classmethod
|
|
211
|
+
def from_file(cls, path: Path) -> "CompanySearchSpec":
|
|
212
|
+
try:
|
|
213
|
+
payload = json.loads(path.read_text(encoding="utf-8"))
|
|
214
|
+
except FileNotFoundError as exc:
|
|
215
|
+
raise ValueError(f"spec file does not exist: {path}") from exc
|
|
216
|
+
except json.JSONDecodeError as exc:
|
|
217
|
+
raise ValueError(f"spec is not valid JSON: {exc}") from exc
|
|
218
|
+
return cls.model_validate(payload)
|
|
219
|
+
|
|
220
|
+
@property
|
|
221
|
+
def is_national(self) -> bool:
|
|
222
|
+
return not self.geography.states
|
|
223
|
+
|
|
224
|
+
@property
|
|
225
|
+
def reserve_count(self) -> int:
|
|
226
|
+
if self.reserve_ratio == 0:
|
|
227
|
+
return 0
|
|
228
|
+
return math.ceil(self.count * self.reserve_ratio)
|
|
229
|
+
|
|
230
|
+
@property
|
|
231
|
+
def vertical(self) -> VerticalSpec:
|
|
232
|
+
if len(self.verticals) != 1:
|
|
233
|
+
raise ValueError("single-vertical operation requires exactly one vertical")
|
|
234
|
+
return self.verticals[0]
|
|
235
|
+
|
|
236
|
+
def lane_spec(self, vertical: VerticalSpec, count: int) -> "CompanySearchSpec":
|
|
237
|
+
return self.model_copy(update={"verticals": [vertical], "count": max(1, count)})
|
|
238
|
+
|
|
239
|
+
@property
|
|
240
|
+
def vertical_quotas(self) -> dict[str, int]:
|
|
241
|
+
base, remainder = divmod(self.count, len(self.verticals))
|
|
242
|
+
return {
|
|
243
|
+
vertical.key: base + (1 if index < remainder else 0)
|
|
244
|
+
for index, vertical in enumerate(self.verticals)
|
|
245
|
+
}
|
|
246
|
+
|
|
247
|
+
@property
|
|
248
|
+
def missing_constraints(self) -> list[str]:
|
|
249
|
+
missing: list[str] = []
|
|
250
|
+
if self.is_national:
|
|
251
|
+
missing.append("national search mode used")
|
|
252
|
+
if self.company_size.is_unbounded:
|
|
253
|
+
missing.append("no size filter applied")
|
|
254
|
+
if not any(
|
|
255
|
+
(
|
|
256
|
+
self.exclude.keywords,
|
|
257
|
+
self.exclude.ownership_types,
|
|
258
|
+
self.exclude.company_patterns,
|
|
259
|
+
self.exclude.structured.ownership_signals,
|
|
260
|
+
)
|
|
261
|
+
):
|
|
262
|
+
missing.append("no custom exclusions applied")
|
|
263
|
+
return missing
|