leads-cli 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- company_discovery/__init__.py +4 -0
- company_discovery/adapters/__init__.py +5 -0
- company_discovery/adapters/apollo.py +189 -0
- company_discovery/adapters/exa.py +112 -0
- company_discovery/adapters/llm.py +118 -0
- company_discovery/adapters/protocols.py +58 -0
- company_discovery/adapters/website.py +154 -0
- company_discovery/bundled_skills/__init__.py +1 -0
- company_discovery/bundled_skills/company-discovery-operator/SKILL.md +72 -0
- company_discovery/bundled_skills/company-discovery-operator/agents/openai.yaml +4 -0
- company_discovery/bundled_skills/company-enrichment-operator/SKILL.md +94 -0
- company_discovery/bundled_skills/company-enrichment-operator/agents/openai.yaml +4 -0
- company_discovery/bundled_skills/company-search-spec-writer/SKILL.md +109 -0
- company_discovery/bundled_skills/company-search-spec-writer/agents/openai.yaml +4 -0
- company_discovery/bundled_skills/contact-discovery-operator/SKILL.md +80 -0
- company_discovery/bundled_skills/contact-discovery-operator/agents/openai.yaml +4 -0
- company_discovery/bundled_skills/contact-enrichment-operator/SKILL.md +86 -0
- company_discovery/bundled_skills/contact-enrichment-operator/agents/openai.yaml +4 -0
- company_discovery/bundled_skills/contact-search-spec-writer/SKILL.md +86 -0
- company_discovery/bundled_skills/contact-search-spec-writer/agents/openai.yaml +4 -0
- company_discovery/bundled_skills/leads-update-operator/SKILL.md +60 -0
- company_discovery/bundled_skills/leads-update-operator/agents/openai.yaml +4 -0
- company_discovery/cli.py +1789 -0
- company_discovery/db/__init__.py +5 -0
- company_discovery/db/contact_enrichment_repository.py +268 -0
- company_discovery/db/contact_repository.py +366 -0
- company_discovery/db/enrichment_repository.py +207 -0
- company_discovery/db/models.py +324 -0
- company_discovery/db/repository.py +363 -0
- company_discovery/db/session.py +48 -0
- company_discovery/domain/__init__.py +24 -0
- company_discovery/domain/contact_models.py +178 -0
- company_discovery/domain/contact_spec.py +86 -0
- company_discovery/domain/models.py +287 -0
- company_discovery/domain/spec.py +263 -0
- company_discovery/migrations.py +190 -0
- company_discovery/prompts/__init__.py +8 -0
- company_discovery/prompts/candidate_evaluation/system.md +13 -0
- company_discovery/prompts/company_enrichment/system.md +42 -0
- company_discovery/prompts/contact_evaluation/system.md +18 -0
- company_discovery/prompts/query_generation/system.md +10 -0
- company_discovery/release_manifest.json +7 -0
- company_discovery/reports/__init__.py +4 -0
- company_discovery/reports/contact_enrichment_exporter.py +108 -0
- company_discovery/reports/contact_exporter.py +132 -0
- company_discovery/reports/enrichment_exporter.py +125 -0
- company_discovery/reports/exporter.py +135 -0
- company_discovery/runtime.py +336 -0
- company_discovery/services/__init__.py +4 -0
- company_discovery/services/contact_enrichment_pipeline.py +344 -0
- company_discovery/services/contact_enrichment_progress.py +37 -0
- company_discovery/services/contact_evaluator.py +110 -0
- company_discovery/services/contact_pipeline.py +295 -0
- company_discovery/services/contact_progress.py +38 -0
- company_discovery/services/enrichment_extractor.py +61 -0
- company_discovery/services/enrichment_pipeline.py +526 -0
- company_discovery/services/enrichment_progress.py +20 -0
- company_discovery/services/enrichment_resolver.py +148 -0
- company_discovery/services/evaluator.py +40 -0
- company_discovery/services/hygiene.py +51 -0
- company_discovery/services/memory.py +150 -0
- company_discovery/services/normalization.py +98 -0
- company_discovery/services/pipeline.py +628 -0
- company_discovery/services/progress.py +48 -0
- company_discovery/services/query_planner.py +47 -0
- company_discovery/settings.py +152 -0
- company_discovery/skill_installer.py +197 -0
- company_discovery/update_plan.py +79 -0
- leads_cli-0.1.0.dist-info/METADATA +277 -0
- leads_cli-0.1.0.dist-info/RECORD +72 -0
- leads_cli-0.1.0.dist-info/WHEEL +4 -0
- leads_cli-0.1.0.dist-info/entry_points.txt +2 -0
|
@@ -0,0 +1,190 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import shutil
|
|
4
|
+
from dataclasses import dataclass
|
|
5
|
+
from datetime import datetime, timezone
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
from typing import Callable
|
|
8
|
+
|
|
9
|
+
from company_discovery.db.session import Database
|
|
10
|
+
from company_discovery.runtime import (
|
|
11
|
+
SCHEMA_VERSION,
|
|
12
|
+
WorkspacePaths,
|
|
13
|
+
default_runtime_metadata,
|
|
14
|
+
ensure_workspace,
|
|
15
|
+
read_json,
|
|
16
|
+
write_json,
|
|
17
|
+
)
|
|
18
|
+
from company_discovery.settings import Settings
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class MigrationError(RuntimeError):
|
|
22
|
+
"""Raised when a requested database migration cannot be applied safely."""
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
@dataclass(frozen=True)
|
|
26
|
+
class MigrationStatus:
|
|
27
|
+
product: str
|
|
28
|
+
workspace: str
|
|
29
|
+
database_path: str | None
|
|
30
|
+
database_exists: bool
|
|
31
|
+
current_schema_version: int
|
|
32
|
+
target_schema_version: int
|
|
33
|
+
migration_required: bool
|
|
34
|
+
backup_required: bool
|
|
35
|
+
can_apply: bool
|
|
36
|
+
action: str
|
|
37
|
+
risk_summary: str
|
|
38
|
+
major_version_behavior: str
|
|
39
|
+
|
|
40
|
+
def as_dict(self) -> dict[str, object]:
|
|
41
|
+
return {
|
|
42
|
+
"product": self.product,
|
|
43
|
+
"workspace": self.workspace,
|
|
44
|
+
"database_path": self.database_path,
|
|
45
|
+
"database_exists": self.database_exists,
|
|
46
|
+
"current_schema_version": self.current_schema_version,
|
|
47
|
+
"target_schema_version": self.target_schema_version,
|
|
48
|
+
"migration_required": self.migration_required,
|
|
49
|
+
"backup_required": self.backup_required,
|
|
50
|
+
"can_apply": self.can_apply,
|
|
51
|
+
"action": self.action,
|
|
52
|
+
"risk_summary": self.risk_summary,
|
|
53
|
+
"major_version_behavior": self.major_version_behavior,
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def migration_status(settings: Settings) -> MigrationStatus:
|
|
58
|
+
paths = ensure_workspace(settings.company_discovery_home)
|
|
59
|
+
current = _current_schema_version(paths)
|
|
60
|
+
target = SCHEMA_VERSION
|
|
61
|
+
database_path = settings.sqlite_database_path
|
|
62
|
+
database_exists = bool(database_path and database_path.exists())
|
|
63
|
+
migration_required = current != target
|
|
64
|
+
backup_required = migration_required and database_exists
|
|
65
|
+
can_apply = _can_apply(current, target)
|
|
66
|
+
action = _action(current, target, database_exists, can_apply)
|
|
67
|
+
return MigrationStatus(
|
|
68
|
+
product="leads",
|
|
69
|
+
workspace=str(paths.root),
|
|
70
|
+
database_path=str(database_path) if database_path else None,
|
|
71
|
+
database_exists=database_exists,
|
|
72
|
+
current_schema_version=current,
|
|
73
|
+
target_schema_version=target,
|
|
74
|
+
migration_required=migration_required,
|
|
75
|
+
backup_required=backup_required,
|
|
76
|
+
can_apply=can_apply,
|
|
77
|
+
action=action,
|
|
78
|
+
risk_summary=_risk_summary(current, target, database_exists, can_apply),
|
|
79
|
+
major_version_behavior=(
|
|
80
|
+
"Normal schema evolution is migrate-first with a database backup before structural "
|
|
81
|
+
"changes. Incompatible major-version jumps should archive the old DB and run artifacts "
|
|
82
|
+
"before initializing a fresh schema."
|
|
83
|
+
),
|
|
84
|
+
)
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def apply_migrations(settings: Settings) -> dict[str, object]:
|
|
88
|
+
status = migration_status(settings)
|
|
89
|
+
if not status.can_apply:
|
|
90
|
+
raise MigrationError(status.risk_summary)
|
|
91
|
+
database_path = settings.sqlite_database_path
|
|
92
|
+
if database_path is None:
|
|
93
|
+
raise MigrationError("migrations require an on-disk SQLite database")
|
|
94
|
+
|
|
95
|
+
paths = ensure_workspace(settings.company_discovery_home)
|
|
96
|
+
backup_path = create_database_backup(paths, database_path) if status.backup_required else None
|
|
97
|
+
database = Database(settings.resolved_database_url)
|
|
98
|
+
try:
|
|
99
|
+
for version in range(status.current_schema_version + 1, status.target_schema_version + 1):
|
|
100
|
+
migration = MIGRATIONS.get(version)
|
|
101
|
+
if migration is None:
|
|
102
|
+
raise MigrationError(f"no migration is available for schema version {version}")
|
|
103
|
+
migration(database)
|
|
104
|
+
if status.current_schema_version == status.target_schema_version:
|
|
105
|
+
database.create_schema()
|
|
106
|
+
finally:
|
|
107
|
+
database.dispose()
|
|
108
|
+
|
|
109
|
+
runtime = read_json(paths.runtime_file, default_runtime_metadata())
|
|
110
|
+
applied_at = datetime.now(timezone.utc).isoformat()
|
|
111
|
+
runtime["schema_version"] = status.target_schema_version
|
|
112
|
+
runtime["last_migration"] = {
|
|
113
|
+
"from_schema_version": status.current_schema_version,
|
|
114
|
+
"to_schema_version": status.target_schema_version,
|
|
115
|
+
"applied_at": applied_at,
|
|
116
|
+
"backup_path": str(backup_path) if backup_path else None,
|
|
117
|
+
}
|
|
118
|
+
write_json(paths.runtime_file, runtime)
|
|
119
|
+
return {
|
|
120
|
+
**status.as_dict(),
|
|
121
|
+
"backup_path": str(backup_path) if backup_path else None,
|
|
122
|
+
"applied_at": applied_at,
|
|
123
|
+
"final_schema_version": status.target_schema_version,
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
def create_database_backup(paths: WorkspacePaths, database_path: Path) -> Path:
|
|
128
|
+
timestamp = datetime.now(timezone.utc).strftime("%Y%m%d-%H%M%S")
|
|
129
|
+
backup_dir = paths.backups_dir / f"db-schema-{timestamp}"
|
|
130
|
+
suffix = 1
|
|
131
|
+
while backup_dir.exists():
|
|
132
|
+
suffix += 1
|
|
133
|
+
backup_dir = paths.backups_dir / f"db-schema-{timestamp}-{suffix}"
|
|
134
|
+
backup_dir.mkdir(parents=True)
|
|
135
|
+
if database_path.exists():
|
|
136
|
+
shutil.copy2(database_path, backup_dir / database_path.name)
|
|
137
|
+
for suffix_name in ("-wal", "-shm"):
|
|
138
|
+
sidecar = database_path.with_name(f"{database_path.name}{suffix_name}")
|
|
139
|
+
if sidecar.exists():
|
|
140
|
+
shutil.copy2(sidecar, backup_dir / sidecar.name)
|
|
141
|
+
if paths.runtime_file.exists():
|
|
142
|
+
shutil.copy2(paths.runtime_file, backup_dir / paths.runtime_file.name)
|
|
143
|
+
return backup_dir
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
def _current_schema_version(paths: WorkspacePaths) -> int:
|
|
147
|
+
runtime = read_json(paths.runtime_file, default_runtime_metadata())
|
|
148
|
+
try:
|
|
149
|
+
return int(runtime.get("schema_version") or 0)
|
|
150
|
+
except (TypeError, ValueError):
|
|
151
|
+
return 0
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
def _can_apply(current: int, target: int) -> bool:
|
|
155
|
+
if current > target:
|
|
156
|
+
return False
|
|
157
|
+
return all(version in MIGRATIONS for version in range(current + 1, target + 1))
|
|
158
|
+
|
|
159
|
+
|
|
160
|
+
def _action(current: int, target: int, database_exists: bool, can_apply: bool) -> str:
|
|
161
|
+
if not can_apply:
|
|
162
|
+
return "manual_review"
|
|
163
|
+
if current < target:
|
|
164
|
+
return "migrate"
|
|
165
|
+
if not database_exists:
|
|
166
|
+
return "initialize"
|
|
167
|
+
return "none"
|
|
168
|
+
|
|
169
|
+
|
|
170
|
+
def _risk_summary(current: int, target: int, database_exists: bool, can_apply: bool) -> str:
|
|
171
|
+
if current > target:
|
|
172
|
+
return "Local database schema is newer than this CLI; downgrade is not supported."
|
|
173
|
+
if not can_apply:
|
|
174
|
+
return "No migration path is available for this schema change; manual review is required."
|
|
175
|
+
if current < target and database_exists:
|
|
176
|
+
return "Migration can be applied after creating a timestamped database backup."
|
|
177
|
+
if current < target:
|
|
178
|
+
return "Migration can be applied; no existing database file needs backup."
|
|
179
|
+
if not database_exists:
|
|
180
|
+
return "Database file is missing; apply will initialize the current schema."
|
|
181
|
+
return "Database schema is current; no migration is required."
|
|
182
|
+
|
|
183
|
+
|
|
184
|
+
def _create_schema(database: Database) -> None:
|
|
185
|
+
database.create_schema()
|
|
186
|
+
|
|
187
|
+
|
|
188
|
+
MIGRATIONS: dict[int, Callable[[Database], None]] = {
|
|
189
|
+
1: _create_schema,
|
|
190
|
+
}
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
Evaluate one company candidate against the supplied company ICP using only the supplied evidence.
|
|
2
|
+
|
|
3
|
+
Judge vertical, operating geography, employee-size fit, and explicit exclusions separately. Do not
|
|
4
|
+
turn missing evidence into a negative claim. Use `unknown` when evidence is absent. A directory,
|
|
5
|
+
association, marketplace, vendor, or non-company page is a bad fit when the ICP seeks operating
|
|
6
|
+
companies. `good_fit` requires credible identity plus no known hard mismatch; use `possible_fit`
|
|
7
|
+
when one decisive field is uncertain. Use `bad_fit` for a demonstrated mismatch or exclusion.
|
|
8
|
+
|
|
9
|
+
Reason codes should be concise snake_case labels such as `vertical_mismatch`, `geography_mismatch`,
|
|
10
|
+
`size_mismatch`, `excluded_ownership`, `not_operating_company`, `size_unknown`, or
|
|
11
|
+
`geography_unknown`. Inferred normalized fields must be null unless supported by evidence. Return
|
|
12
|
+
only the required structured object.
|
|
13
|
+
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
You extract company facts from supplied official-site pages or narrow search evidence.
|
|
2
|
+
|
|
3
|
+
The known company record is context, not evidence for missing fields. Return only observations that
|
|
4
|
+
are explicitly supported by the supplied sources. Never invent or complete an address, phone number,
|
|
5
|
+
ownership statement, or URL.
|
|
6
|
+
|
|
7
|
+
LinkedIn rules:
|
|
8
|
+
- Return only the selected company's LinkedIn company profile (`linkedin.com/company/...`).
|
|
9
|
+
- Never return personal profiles, jobs, posts, groups, learning pages, or search-result URLs.
|
|
10
|
+
- Prefer a profile linked by the official company website. For narrow search evidence, require the
|
|
11
|
+
company name and domain context to identify the same selected company.
|
|
12
|
+
- Use the official website page containing the link as `source_url`; for direct search evidence,
|
|
13
|
+
use the LinkedIn result URL itself.
|
|
14
|
+
|
|
15
|
+
Phone rules:
|
|
16
|
+
- Return general company or office phone numbers, not fax numbers or personal mobile numbers.
|
|
17
|
+
- Preserve the observed phone string in `value`.
|
|
18
|
+
- Use the exact source URL containing the observation.
|
|
19
|
+
|
|
20
|
+
Location rules:
|
|
21
|
+
- Return locations only when street, city, state, and ZIP are all supported as one address block.
|
|
22
|
+
- Never combine address components from separate locations or sources.
|
|
23
|
+
- Use two-letter US state codes when the source clearly identifies a US state.
|
|
24
|
+
|
|
25
|
+
Ownership signals use only these `kind` values:
|
|
26
|
+
- `independent_explicit`: explicitly independently owned or standalone;
|
|
27
|
+
- `family_owned`: explicitly family owned;
|
|
28
|
+
- `locally_owned`: explicitly locally owned;
|
|
29
|
+
- `franchise`: explicitly a franchise or franchisee;
|
|
30
|
+
- `parent`: explicitly owned by or part of a parent company;
|
|
31
|
+
- `subsidiary`: explicitly a subsidiary;
|
|
32
|
+
- `division`: explicitly a division;
|
|
33
|
+
- `acquired`: explicit acquisition evidence;
|
|
34
|
+
- `other`: relevant ownership evidence that fits none of the above.
|
|
35
|
+
|
|
36
|
+
Do not emit a positive ownership signal merely because no parent or franchise is mentioned. Private,
|
|
37
|
+
privately held, LLC, partnership, and corporation are legal/ownership forms and are not proof of
|
|
38
|
+
independence.
|
|
39
|
+
|
|
40
|
+
Set `identity_conflict` only when the supplied sources clearly belong to a different company or show
|
|
41
|
+
that the known domain no longer represents the selected company. Cosmetic naming differences and
|
|
42
|
+
redirects within the same company are not conflicts.
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
You identify current employees for a target company from live web search evidence.
|
|
2
|
+
|
|
3
|
+
Return only people explicitly supported by the supplied results. Never invent a person, title,
|
|
4
|
+
company relationship, URL, or evidence statement.
|
|
5
|
+
|
|
6
|
+
For every candidate:
|
|
7
|
+
|
|
8
|
+
- use the person's complete name and observed current title;
|
|
9
|
+
- use only source URLs present in the input;
|
|
10
|
+
- include a LinkedIn profile URL only when it clearly belongs to that person;
|
|
11
|
+
- judge whether the evidence ties the person to the exact target company now;
|
|
12
|
+
- judge whether the observed title matches the requested role or a supplied synonym;
|
|
13
|
+
- reject former employees and people tied to another company;
|
|
14
|
+
- use review when current employment, role fit, or identity is plausible but not sufficiently clear;
|
|
15
|
+
- accept only when identity is clear, current-company match is yes, and role match is yes;
|
|
16
|
+
- keep evidence excerpts short and factual.
|
|
17
|
+
|
|
18
|
+
If the search results contain no identifiable matching person, return an empty candidates list.
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
You design focused Exa company-search queries from a structured company ICP.
|
|
2
|
+
|
|
3
|
+
Return diverse queries that seek official operating-company websites. Cover the requested vertical,
|
|
4
|
+
geography, subtypes, and useful synonyms. Use size language only when a size constraint exists;
|
|
5
|
+
headcount is often not indexed, so vary direct and proxy language. Use exclusions to avoid obvious
|
|
6
|
+
noise, but do not make every query so restrictive that recall collapses. When a vertical supplies
|
|
7
|
+
`search_terms`, use them as strong query hints. When a vertical supplies `exclude_terms`, avoid
|
|
8
|
+
them.
|
|
9
|
+
|
|
10
|
+
Return only the required structured object. Do not invent a new ICP constraint.
|
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import csv
|
|
4
|
+
import json
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import Any
|
|
7
|
+
|
|
8
|
+
from company_discovery.domain.contact_models import ContactEnrichmentSummary
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class ContactEnrichmentArtifactExporter:
|
|
12
|
+
FIELDS = [
|
|
13
|
+
"company_name",
|
|
14
|
+
"company_domain",
|
|
15
|
+
"contact_name",
|
|
16
|
+
"title",
|
|
17
|
+
"linkedin_url",
|
|
18
|
+
"email",
|
|
19
|
+
"phone",
|
|
20
|
+
"status",
|
|
21
|
+
"notes",
|
|
22
|
+
]
|
|
23
|
+
|
|
24
|
+
def __init__(self, artifacts_root: Path) -> None:
|
|
25
|
+
self._artifacts_root = artifacts_root
|
|
26
|
+
|
|
27
|
+
def export(
|
|
28
|
+
self, payload: dict[str, Any], summary: ContactEnrichmentSummary
|
|
29
|
+
) -> dict[str, str]:
|
|
30
|
+
run_dir = (
|
|
31
|
+
self._artifacts_root
|
|
32
|
+
/ payload["source_discovery_run_id"]
|
|
33
|
+
/ "enrich"
|
|
34
|
+
/ payload["source_enrichment_run_id"]
|
|
35
|
+
/ "contacts"
|
|
36
|
+
/ payload["source_contact_run_id"]
|
|
37
|
+
/ "enrich"
|
|
38
|
+
/ payload["run_id"]
|
|
39
|
+
)
|
|
40
|
+
run_dir.mkdir(parents=True, exist_ok=True)
|
|
41
|
+
paths = {
|
|
42
|
+
outcome: str((run_dir / f"{outcome}.csv").resolve())
|
|
43
|
+
for outcome in ("ready", "review", "blocked")
|
|
44
|
+
}
|
|
45
|
+
paths["summary"] = str((run_dir / "summary.md").resolve())
|
|
46
|
+
paths["json"] = str((run_dir / "run.json").resolve())
|
|
47
|
+
for outcome in ("ready", "review", "blocked"):
|
|
48
|
+
self._write_csv(Path(paths[outcome]), payload["items"], outcome)
|
|
49
|
+
Path(paths["summary"]).write_text(self._markdown(payload, summary), encoding="utf-8")
|
|
50
|
+
full_payload = dict(payload)
|
|
51
|
+
full_payload["summary"] = summary.model_dump(mode="json")
|
|
52
|
+
full_payload["status"] = "completed"
|
|
53
|
+
full_payload["artifacts"] = paths
|
|
54
|
+
Path(paths["json"]).write_text(
|
|
55
|
+
json.dumps(full_payload, indent=2, ensure_ascii=True), encoding="utf-8"
|
|
56
|
+
)
|
|
57
|
+
return paths
|
|
58
|
+
|
|
59
|
+
@classmethod
|
|
60
|
+
def _write_csv(cls, path: Path, items: list[dict[str, Any]], outcome: str) -> None:
|
|
61
|
+
with path.open("w", newline="", encoding="utf-8") as handle:
|
|
62
|
+
writer = csv.DictWriter(handle, fieldnames=cls.FIELDS)
|
|
63
|
+
writer.writeheader()
|
|
64
|
+
for item in items:
|
|
65
|
+
if item["outcome"] != outcome:
|
|
66
|
+
continue
|
|
67
|
+
discovery = item["discovery"]
|
|
68
|
+
channels = item["channels"]
|
|
69
|
+
writer.writerow(
|
|
70
|
+
{
|
|
71
|
+
"company_name": discovery["company_name"],
|
|
72
|
+
"company_domain": discovery["company_domain"],
|
|
73
|
+
"contact_name": discovery["contact_name"],
|
|
74
|
+
"title": discovery["title"],
|
|
75
|
+
"linkedin_url": discovery.get("linkedin_url") or "",
|
|
76
|
+
"email": channels.get("email") or "",
|
|
77
|
+
"phone": channels.get("phone") or "",
|
|
78
|
+
"status": outcome,
|
|
79
|
+
"notes": " | ".join(item.get("review_flags", [])),
|
|
80
|
+
}
|
|
81
|
+
)
|
|
82
|
+
|
|
83
|
+
@staticmethod
|
|
84
|
+
def _markdown(payload: dict[str, Any], summary: ContactEnrichmentSummary) -> str:
|
|
85
|
+
lines = [
|
|
86
|
+
f"# Contact Enrichment Run {payload['run_id']}",
|
|
87
|
+
"",
|
|
88
|
+
f"- Contact discovery run: `{payload['source_contact_run_id']}`",
|
|
89
|
+
f"- Contacts loaded: {summary.contacts_loaded}",
|
|
90
|
+
f"- Fresh Apollo memory reused: {summary.memory_reused}",
|
|
91
|
+
f"- Apollo people submitted: {summary.apollo_requests}",
|
|
92
|
+
f"- Apollo batches: {summary.apollo_batches}",
|
|
93
|
+
f"- Async polls: {summary.async_polls}",
|
|
94
|
+
f"- Ready: {summary.ready}",
|
|
95
|
+
f"- Review: {summary.review}",
|
|
96
|
+
f"- Blocked: {summary.blocked}",
|
|
97
|
+
"",
|
|
98
|
+
"## People",
|
|
99
|
+
"",
|
|
100
|
+
]
|
|
101
|
+
for item in payload["items"]:
|
|
102
|
+
lines.append(
|
|
103
|
+
f"- **{item['discovery']['contact_name']}**, "
|
|
104
|
+
f"{item['discovery']['title']} at {item['discovery']['company_name']}: "
|
|
105
|
+
f"{item['outcome']}"
|
|
106
|
+
)
|
|
107
|
+
lines.append("")
|
|
108
|
+
return "\n".join(lines)
|
|
@@ -0,0 +1,132 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import csv
|
|
4
|
+
import json
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import Any
|
|
7
|
+
|
|
8
|
+
from company_discovery.domain.contact_models import ContactDiscoverySummary
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class ContactDiscoveryArtifactExporter:
|
|
12
|
+
FIELDS = [
|
|
13
|
+
"company_name",
|
|
14
|
+
"company_domain",
|
|
15
|
+
"contact_name",
|
|
16
|
+
"title",
|
|
17
|
+
"linkedin_url",
|
|
18
|
+
"email",
|
|
19
|
+
"phone",
|
|
20
|
+
"status",
|
|
21
|
+
"notes",
|
|
22
|
+
]
|
|
23
|
+
|
|
24
|
+
def __init__(self, artifacts_root: Path) -> None:
|
|
25
|
+
self._artifacts_root = artifacts_root
|
|
26
|
+
|
|
27
|
+
def export(
|
|
28
|
+
self, payload: dict[str, Any], summary: ContactDiscoverySummary
|
|
29
|
+
) -> dict[str, str]:
|
|
30
|
+
run_dir = (
|
|
31
|
+
self._artifacts_root
|
|
32
|
+
/ payload["source_discovery_run_id"]
|
|
33
|
+
/ "enrich"
|
|
34
|
+
/ payload["source_enrichment_run_id"]
|
|
35
|
+
/ "contacts"
|
|
36
|
+
/ payload["run_id"]
|
|
37
|
+
)
|
|
38
|
+
run_dir.mkdir(parents=True, exist_ok=True)
|
|
39
|
+
paths = {
|
|
40
|
+
"accepted": str((run_dir / "accepted.csv").resolve()),
|
|
41
|
+
"review": str((run_dir / "review.csv").resolve()),
|
|
42
|
+
"rejected": str((run_dir / "rejected.csv").resolve()),
|
|
43
|
+
"summary": str((run_dir / "summary.md").resolve()),
|
|
44
|
+
"json": str((run_dir / "run.json").resolve()),
|
|
45
|
+
}
|
|
46
|
+
for verdict in ("accepted", "review", "rejected"):
|
|
47
|
+
self._write_csv(Path(paths[verdict]), payload["items"], verdict)
|
|
48
|
+
Path(paths["summary"]).write_text(
|
|
49
|
+
self._markdown(payload, summary), encoding="utf-8"
|
|
50
|
+
)
|
|
51
|
+
full_payload = dict(payload)
|
|
52
|
+
full_payload["summary"] = summary.model_dump(mode="json")
|
|
53
|
+
full_payload["status"] = "completed"
|
|
54
|
+
full_payload["artifacts"] = paths
|
|
55
|
+
Path(paths["json"]).write_text(
|
|
56
|
+
json.dumps(full_payload, indent=2, ensure_ascii=True), encoding="utf-8"
|
|
57
|
+
)
|
|
58
|
+
return paths
|
|
59
|
+
|
|
60
|
+
@classmethod
|
|
61
|
+
def _write_csv(
|
|
62
|
+
cls, path: Path, items: list[dict[str, Any]], verdict: str
|
|
63
|
+
) -> None:
|
|
64
|
+
seen: set[int] = set()
|
|
65
|
+
verdict_rank = {"rejected": 0, "review": 1, "accepted": 2}
|
|
66
|
+
best_verdict: dict[int, str] = {}
|
|
67
|
+
for item in items:
|
|
68
|
+
candidate_id = item["candidate_id"]
|
|
69
|
+
current = best_verdict.get(candidate_id, "rejected")
|
|
70
|
+
if verdict_rank[item["verdict"]] >= verdict_rank[current]:
|
|
71
|
+
best_verdict[candidate_id] = item["verdict"]
|
|
72
|
+
with path.open("w", newline="", encoding="utf-8") as handle:
|
|
73
|
+
writer = csv.DictWriter(handle, fieldnames=cls.FIELDS)
|
|
74
|
+
writer.writeheader()
|
|
75
|
+
for item in items:
|
|
76
|
+
if (
|
|
77
|
+
item["verdict"] != verdict
|
|
78
|
+
or best_verdict[item["candidate_id"]] != verdict
|
|
79
|
+
or item["candidate_id"] in seen
|
|
80
|
+
):
|
|
81
|
+
continue
|
|
82
|
+
seen.add(item["candidate_id"])
|
|
83
|
+
candidate = item["candidate"]
|
|
84
|
+
related = [
|
|
85
|
+
other
|
|
86
|
+
for other in items
|
|
87
|
+
if other["candidate_id"] == item["candidate_id"]
|
|
88
|
+
and other["verdict"] == verdict
|
|
89
|
+
]
|
|
90
|
+
roles = ", ".join(dict.fromkeys(other["role_key"] for other in related))
|
|
91
|
+
reasons = " | ".join(dict.fromkeys(other["reason"] for other in related))
|
|
92
|
+
writer.writerow(
|
|
93
|
+
{
|
|
94
|
+
"company_name": candidate["company_name"],
|
|
95
|
+
"company_domain": candidate["company_domain"],
|
|
96
|
+
"contact_name": candidate["full_name"],
|
|
97
|
+
"title": candidate["title"],
|
|
98
|
+
"linkedin_url": candidate.get("linkedin_url") or "",
|
|
99
|
+
"email": "",
|
|
100
|
+
"phone": "",
|
|
101
|
+
"status": verdict,
|
|
102
|
+
"notes": f"{roles}: {reasons}",
|
|
103
|
+
}
|
|
104
|
+
)
|
|
105
|
+
|
|
106
|
+
@staticmethod
|
|
107
|
+
def _markdown(payload: dict[str, Any], summary: ContactDiscoverySummary) -> str:
|
|
108
|
+
lines = [
|
|
109
|
+
f"# Contact Discovery Run {payload['run_id']}",
|
|
110
|
+
"",
|
|
111
|
+
f"- Company enrichment run: `{payload['source_enrichment_run_id']}`",
|
|
112
|
+
f"- Companies loaded: {summary.companies_loaded}",
|
|
113
|
+
f"- Contacts reused from memory: {summary.memory_reused}",
|
|
114
|
+
f"- Role gaps sent to live search: {summary.role_gaps}",
|
|
115
|
+
f"- Exa queries: {summary.queries_run}",
|
|
116
|
+
f"- Raw results: {summary.raw_results}",
|
|
117
|
+
f"- Unique people: {summary.unique_people}",
|
|
118
|
+
f"- Accepted: {summary.accepted}",
|
|
119
|
+
f"- Review: {summary.review}",
|
|
120
|
+
f"- Rejected: {summary.rejected}",
|
|
121
|
+
"",
|
|
122
|
+
"## People",
|
|
123
|
+
"",
|
|
124
|
+
]
|
|
125
|
+
for item in payload["items"]:
|
|
126
|
+
candidate = item["candidate"]
|
|
127
|
+
lines.append(
|
|
128
|
+
f"- **{candidate['full_name']}**, {candidate['title']} at "
|
|
129
|
+
f"{candidate['company_name']}: {item['verdict']} ({item['role_key']})"
|
|
130
|
+
)
|
|
131
|
+
lines.append("")
|
|
132
|
+
return "\n".join(lines)
|
|
@@ -0,0 +1,125 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import csv
|
|
4
|
+
import json
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import Any
|
|
7
|
+
|
|
8
|
+
from company_discovery.domain.models import EnrichmentSummary
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class EnrichmentArtifactExporter:
|
|
12
|
+
FIELDS = [
|
|
13
|
+
"company_name",
|
|
14
|
+
"domain",
|
|
15
|
+
"linkedin_url",
|
|
16
|
+
"phone",
|
|
17
|
+
"street_address",
|
|
18
|
+
"city",
|
|
19
|
+
"state",
|
|
20
|
+
"zip",
|
|
21
|
+
"vertical",
|
|
22
|
+
"employee_min",
|
|
23
|
+
"employee_max",
|
|
24
|
+
"ownership_type",
|
|
25
|
+
"independence_status",
|
|
26
|
+
"outcome",
|
|
27
|
+
"conflicts",
|
|
28
|
+
"review_flags",
|
|
29
|
+
]
|
|
30
|
+
|
|
31
|
+
def __init__(self, artifacts_root: Path) -> None:
|
|
32
|
+
self._artifacts_root = artifacts_root
|
|
33
|
+
|
|
34
|
+
def export(self, payload: dict[str, Any], summary: EnrichmentSummary) -> dict[str, str]:
|
|
35
|
+
run_dir = self._artifacts_root / payload["discovery_run_id"] / "enrich" / payload["run_id"]
|
|
36
|
+
run_dir.mkdir(parents=True, exist_ok=True)
|
|
37
|
+
paths = {
|
|
38
|
+
"enriched": str((run_dir / "enriched.csv").resolve()),
|
|
39
|
+
"review": str((run_dir / "review.csv").resolve()),
|
|
40
|
+
"blocked": str((run_dir / "blocked.csv").resolve()),
|
|
41
|
+
"summary": str((run_dir / "summary.md").resolve()),
|
|
42
|
+
"json": str((run_dir / "run.json").resolve()),
|
|
43
|
+
}
|
|
44
|
+
self._write_csv(Path(paths["enriched"]), payload["items"], {"enriched_ready"})
|
|
45
|
+
self._write_csv(
|
|
46
|
+
Path(paths["review"]),
|
|
47
|
+
payload["items"],
|
|
48
|
+
{"independence_unconfirmed", "enriched_with_gaps"},
|
|
49
|
+
)
|
|
50
|
+
self._write_csv(
|
|
51
|
+
Path(paths["blocked"]),
|
|
52
|
+
payload["items"],
|
|
53
|
+
{"identity_conflict", "geography_conflict", "fit_conflict", "enrichment_failed"},
|
|
54
|
+
)
|
|
55
|
+
Path(paths["summary"]).write_text(self._markdown(payload, summary), encoding="utf-8")
|
|
56
|
+
full_payload = dict(payload)
|
|
57
|
+
full_payload["summary"] = summary.model_dump(mode="json")
|
|
58
|
+
full_payload["artifacts"] = paths
|
|
59
|
+
Path(paths["json"]).write_text(
|
|
60
|
+
json.dumps(full_payload, indent=2, ensure_ascii=True), encoding="utf-8"
|
|
61
|
+
)
|
|
62
|
+
return paths
|
|
63
|
+
|
|
64
|
+
@classmethod
|
|
65
|
+
def _write_csv(cls, path: Path, items: list[dict[str, Any]], outcomes: set[str]) -> None:
|
|
66
|
+
with path.open("w", newline="", encoding="utf-8") as handle:
|
|
67
|
+
writer = csv.DictWriter(handle, fieldnames=cls.FIELDS)
|
|
68
|
+
writer.writeheader()
|
|
69
|
+
for item in items:
|
|
70
|
+
if item["outcome"] not in outcomes:
|
|
71
|
+
continue
|
|
72
|
+
discovery = item["discovery"]
|
|
73
|
+
enrichment = item["enrichment"]
|
|
74
|
+
phone = enrichment.get("phone") or {}
|
|
75
|
+
location = enrichment.get("location") or {}
|
|
76
|
+
independence = enrichment.get("independence") or {}
|
|
77
|
+
linkedin = enrichment.get("linkedin") or {}
|
|
78
|
+
writer.writerow(
|
|
79
|
+
{
|
|
80
|
+
"company_name": discovery["company_name"],
|
|
81
|
+
"domain": discovery["domain"],
|
|
82
|
+
"linkedin_url": linkedin.get("url", ""),
|
|
83
|
+
"phone": phone.get("display_value", ""),
|
|
84
|
+
"street_address": location.get("street_address", ""),
|
|
85
|
+
"city": location.get("city", ""),
|
|
86
|
+
"state": location.get("state") or discovery.get("state") or "",
|
|
87
|
+
"zip": location.get("zip", ""),
|
|
88
|
+
"vertical": discovery.get("target_vertical") or discovery.get("vertical") or "",
|
|
89
|
+
"employee_min": discovery.get("employee_min") or "",
|
|
90
|
+
"employee_max": discovery.get("employee_max") or "",
|
|
91
|
+
"ownership_type": discovery.get("ownership_type") or "",
|
|
92
|
+
"independence_status": independence.get("status", "unknown"),
|
|
93
|
+
"outcome": item["outcome"],
|
|
94
|
+
"conflicts": " | ".join(item.get("conflicts", [])),
|
|
95
|
+
"review_flags": " | ".join(item.get("review_flags", [])),
|
|
96
|
+
}
|
|
97
|
+
)
|
|
98
|
+
|
|
99
|
+
@staticmethod
|
|
100
|
+
def _markdown(payload: dict[str, Any], summary: EnrichmentSummary) -> str:
|
|
101
|
+
lines = [
|
|
102
|
+
f"# Company Enrichment Run {payload['run_id']}",
|
|
103
|
+
"",
|
|
104
|
+
f"- Discovery run: `{payload['discovery_run_id']}`",
|
|
105
|
+
f"- Input bucket: {payload['bucket']}",
|
|
106
|
+
f"- Processed: {summary.processed}",
|
|
107
|
+
f"- Discovery facts inherited: {summary.inherited_facts}",
|
|
108
|
+
f"- Memory profiles reused: {summary.memory_profiles_reused}",
|
|
109
|
+
f"- Websites fetched: {summary.websites_fetched}",
|
|
110
|
+
f"- Fallback searches: {summary.fallback_searches}",
|
|
111
|
+
f"- Ready: {summary.ready}",
|
|
112
|
+
f"- Review: {summary.review}",
|
|
113
|
+
f"- Blocked: {summary.blocked}",
|
|
114
|
+
f"- Failed: {summary.failed}",
|
|
115
|
+
"",
|
|
116
|
+
"## Companies",
|
|
117
|
+
"",
|
|
118
|
+
]
|
|
119
|
+
for item in payload["items"]:
|
|
120
|
+
lines.append(
|
|
121
|
+
f"- **{item['discovery']['company_name']}** ({item['discovery']['domain']}): "
|
|
122
|
+
f"{item['outcome']}"
|
|
123
|
+
)
|
|
124
|
+
lines.append("")
|
|
125
|
+
return "\n".join(lines)
|