leads-cli 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (72) hide show
  1. company_discovery/__init__.py +4 -0
  2. company_discovery/adapters/__init__.py +5 -0
  3. company_discovery/adapters/apollo.py +189 -0
  4. company_discovery/adapters/exa.py +112 -0
  5. company_discovery/adapters/llm.py +118 -0
  6. company_discovery/adapters/protocols.py +58 -0
  7. company_discovery/adapters/website.py +154 -0
  8. company_discovery/bundled_skills/__init__.py +1 -0
  9. company_discovery/bundled_skills/company-discovery-operator/SKILL.md +72 -0
  10. company_discovery/bundled_skills/company-discovery-operator/agents/openai.yaml +4 -0
  11. company_discovery/bundled_skills/company-enrichment-operator/SKILL.md +94 -0
  12. company_discovery/bundled_skills/company-enrichment-operator/agents/openai.yaml +4 -0
  13. company_discovery/bundled_skills/company-search-spec-writer/SKILL.md +109 -0
  14. company_discovery/bundled_skills/company-search-spec-writer/agents/openai.yaml +4 -0
  15. company_discovery/bundled_skills/contact-discovery-operator/SKILL.md +80 -0
  16. company_discovery/bundled_skills/contact-discovery-operator/agents/openai.yaml +4 -0
  17. company_discovery/bundled_skills/contact-enrichment-operator/SKILL.md +86 -0
  18. company_discovery/bundled_skills/contact-enrichment-operator/agents/openai.yaml +4 -0
  19. company_discovery/bundled_skills/contact-search-spec-writer/SKILL.md +86 -0
  20. company_discovery/bundled_skills/contact-search-spec-writer/agents/openai.yaml +4 -0
  21. company_discovery/bundled_skills/leads-update-operator/SKILL.md +60 -0
  22. company_discovery/bundled_skills/leads-update-operator/agents/openai.yaml +4 -0
  23. company_discovery/cli.py +1789 -0
  24. company_discovery/db/__init__.py +5 -0
  25. company_discovery/db/contact_enrichment_repository.py +268 -0
  26. company_discovery/db/contact_repository.py +366 -0
  27. company_discovery/db/enrichment_repository.py +207 -0
  28. company_discovery/db/models.py +324 -0
  29. company_discovery/db/repository.py +363 -0
  30. company_discovery/db/session.py +48 -0
  31. company_discovery/domain/__init__.py +24 -0
  32. company_discovery/domain/contact_models.py +178 -0
  33. company_discovery/domain/contact_spec.py +86 -0
  34. company_discovery/domain/models.py +287 -0
  35. company_discovery/domain/spec.py +263 -0
  36. company_discovery/migrations.py +190 -0
  37. company_discovery/prompts/__init__.py +8 -0
  38. company_discovery/prompts/candidate_evaluation/system.md +13 -0
  39. company_discovery/prompts/company_enrichment/system.md +42 -0
  40. company_discovery/prompts/contact_evaluation/system.md +18 -0
  41. company_discovery/prompts/query_generation/system.md +10 -0
  42. company_discovery/release_manifest.json +7 -0
  43. company_discovery/reports/__init__.py +4 -0
  44. company_discovery/reports/contact_enrichment_exporter.py +108 -0
  45. company_discovery/reports/contact_exporter.py +132 -0
  46. company_discovery/reports/enrichment_exporter.py +125 -0
  47. company_discovery/reports/exporter.py +135 -0
  48. company_discovery/runtime.py +336 -0
  49. company_discovery/services/__init__.py +4 -0
  50. company_discovery/services/contact_enrichment_pipeline.py +344 -0
  51. company_discovery/services/contact_enrichment_progress.py +37 -0
  52. company_discovery/services/contact_evaluator.py +110 -0
  53. company_discovery/services/contact_pipeline.py +295 -0
  54. company_discovery/services/contact_progress.py +38 -0
  55. company_discovery/services/enrichment_extractor.py +61 -0
  56. company_discovery/services/enrichment_pipeline.py +526 -0
  57. company_discovery/services/enrichment_progress.py +20 -0
  58. company_discovery/services/enrichment_resolver.py +148 -0
  59. company_discovery/services/evaluator.py +40 -0
  60. company_discovery/services/hygiene.py +51 -0
  61. company_discovery/services/memory.py +150 -0
  62. company_discovery/services/normalization.py +98 -0
  63. company_discovery/services/pipeline.py +628 -0
  64. company_discovery/services/progress.py +48 -0
  65. company_discovery/services/query_planner.py +47 -0
  66. company_discovery/settings.py +152 -0
  67. company_discovery/skill_installer.py +197 -0
  68. company_discovery/update_plan.py +79 -0
  69. leads_cli-0.1.0.dist-info/METADATA +277 -0
  70. leads_cli-0.1.0.dist-info/RECORD +72 -0
  71. leads_cli-0.1.0.dist-info/WHEEL +4 -0
  72. leads_cli-0.1.0.dist-info/entry_points.txt +2 -0
@@ -0,0 +1,135 @@
1
+ from __future__ import annotations
2
+
3
+ import csv
4
+ import json
5
+ from pathlib import Path
6
+ from typing import Any
7
+
8
+ from company_discovery.domain.models import RunSummary
9
+ from company_discovery.domain.spec import CompanySearchSpec
10
+
11
+
12
+ class ArtifactExporter:
13
+ def __init__(self, artifacts_root: Path) -> None:
14
+ self._artifacts_root = artifacts_root
15
+
16
+ def export(self, run_payload: dict[str, Any], summary: RunSummary) -> dict[str, str]:
17
+ run_id = run_payload["run_id"]
18
+ run_dir = self._artifacts_root / run_id
19
+ run_dir.mkdir(parents=True, exist_ok=True)
20
+ paths: dict[str, str] = {
21
+ bucket: str((run_dir / f"{bucket}.csv").resolve())
22
+ for bucket in ("selected", "reserve", "rejected")
23
+ }
24
+ paths["summary"] = str((run_dir / "summary.md").resolve())
25
+ paths["json"] = str((run_dir / "run.json").resolve())
26
+ for bucket in ("selected", "reserve", "rejected"):
27
+ path = Path(paths[bucket])
28
+ self._write_csv(path, run_payload["candidates"], bucket)
29
+
30
+ report_path = Path(paths["summary"])
31
+ report_path.write_text(self._markdown(run_payload, summary), encoding="utf-8")
32
+
33
+ json_path = Path(paths["json"])
34
+ full_payload = dict(run_payload)
35
+ full_payload["summary"] = summary.model_dump(mode="json")
36
+ full_payload["artifacts"] = paths
37
+ json_path.write_text(json.dumps(full_payload, indent=2, ensure_ascii=True), encoding="utf-8")
38
+ return paths
39
+
40
+ @staticmethod
41
+ def _write_csv(path: Path, candidates: list[dict[str, Any]], bucket: str) -> None:
42
+ rows = [item for item in candidates if item["bucket"] == bucket]
43
+ fieldnames = [
44
+ "company_name",
45
+ "domain",
46
+ "vertical",
47
+ "target_vertical",
48
+ "country",
49
+ "state",
50
+ "employee_min",
51
+ "employee_max",
52
+ "ownership_type",
53
+ "fit",
54
+ "reason",
55
+ "reason_codes",
56
+ "evidence",
57
+ "source",
58
+ ]
59
+ with path.open("w", newline="", encoding="utf-8") as handle:
60
+ writer = csv.DictWriter(handle, fieldnames=fieldnames)
61
+ writer.writeheader()
62
+ for item in rows:
63
+ company = item["company"]
64
+ evaluation = item["evaluation"]
65
+ writer.writerow(
66
+ {
67
+ "company_name": company["company_name"],
68
+ "domain": company["domain"],
69
+ "vertical": company.get("vertical") or "",
70
+ "target_vertical": evaluation.get("target_vertical") or "",
71
+ "country": company.get("country") or "",
72
+ "state": company.get("state") or "",
73
+ "employee_min": company.get("employee_min") or "",
74
+ "employee_max": company.get("employee_max") or "",
75
+ "ownership_type": company.get("ownership_type") or "",
76
+ "fit": evaluation["fit"],
77
+ "reason": evaluation["reason"],
78
+ "reason_codes": "; ".join(evaluation.get("reason_codes", [])),
79
+ "evidence": " | ".join(evaluation.get("evidence", [])),
80
+ "source": item["source"],
81
+ }
82
+ )
83
+
84
+ @staticmethod
85
+ def _markdown(run_payload: dict[str, Any], summary: RunSummary) -> str:
86
+ spec = run_payload["spec"]
87
+ lines = [
88
+ f"# Company Discovery Run {run_payload['run_id']}",
89
+ "",
90
+ f"- Status: {run_payload['status']}",
91
+ "- Verticals: "
92
+ + ", ".join(
93
+ f"{vertical['label']} (`{vertical['key']}`)"
94
+ for vertical in spec["verticals"]
95
+ ),
96
+ f"- Balance mode: {spec.get('balance_mode', 'soft')}",
97
+ f"- Country: {spec['geography']['country']}",
98
+ f"- States: {', '.join(spec['geography']['states']) or 'all'}",
99
+ f"- Requested: {spec['count']}",
100
+ "",
101
+ "## Results",
102
+ "",
103
+ f"- Memory matched: {summary.memory_matched}",
104
+ f"- Memory reused: {summary.memory_reused}",
105
+ f"- External gap: {summary.external_gap}",
106
+ f"- Queries generated: {summary.queries_generated}",
107
+ f"- Raw results: {summary.raw_results}",
108
+ f"- Unique candidates: {summary.unique_candidates}",
109
+ f"- Selected: {summary.selected}",
110
+ f"- Reserve: {summary.reserve}",
111
+ f"- Rejected: {summary.rejected}",
112
+ "",
113
+ ]
114
+ normalized_spec = CompanySearchSpec.model_validate(spec)
115
+ if normalized_spec.missing_constraints:
116
+ lines.extend(
117
+ ["## Open Modes", ""]
118
+ + [f"- {condition}" for condition in normalized_spec.missing_constraints]
119
+ + [""]
120
+ )
121
+ for bucket in ("selected", "reserve", "rejected"):
122
+ title = bucket.title()
123
+ lines.extend([f"## {title}", ""])
124
+ items = [item for item in run_payload["candidates"] if item["bucket"] == bucket]
125
+ if not items:
126
+ lines.extend(["None.", ""])
127
+ continue
128
+ for item in items:
129
+ company = item["company"]
130
+ evaluation = item["evaluation"]
131
+ lines.append(
132
+ f"- **{company['company_name']}** ({company['domain']}): {evaluation['reason']}"
133
+ )
134
+ lines.append("")
135
+ return "\n".join(lines)
@@ -0,0 +1,336 @@
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ import logging
5
+ from logging.handlers import RotatingFileHandler
6
+ import sys
7
+ import tomllib
8
+ from copy import deepcopy
9
+ from dataclasses import dataclass
10
+ from datetime import datetime, timezone
11
+ from pathlib import Path
12
+ from typing import Any
13
+
14
+ from platformdirs import user_data_dir
15
+
16
+ PRODUCT_NAME = "leads"
17
+ DISPLAY_NAME = "Leads"
18
+ SCHEMA_VERSION = 1
19
+ SKILL_BUNDLE_VERSION = "2026.06.1"
20
+ LOGGER_NAME = "company_discovery"
21
+ WORKSPACE_POINTER_FILE = "workspace.json"
22
+
23
+
24
+ DEFAULT_CONFIG: dict[str, Any] = {
25
+ "llm": {
26
+ "provider": "openai",
27
+ "base_url": "https://api.openai.com/v1",
28
+ "model": "gpt-5-mini",
29
+ "response_format": "auto",
30
+ },
31
+ "providers": {
32
+ "exa": {
33
+ "enabled": False,
34
+ "base_url": "https://api.exa.ai",
35
+ },
36
+ "apollo": {
37
+ "enabled": False,
38
+ "base_url": "https://api.apollo.io",
39
+ "webhook_url": "",
40
+ },
41
+ },
42
+ }
43
+
44
+ DEFAULT_SECRETS: dict[str, Any] = {
45
+ "llm": {"api_key": ""},
46
+ "providers": {
47
+ "exa": {"api_key": ""},
48
+ "apollo": {"api_key": ""},
49
+ },
50
+ }
51
+
52
+
53
+ @dataclass(frozen=True)
54
+ class WorkspacePaths:
55
+ root: Path
56
+ config_dir: Path
57
+ config_file: Path
58
+ secrets_file: Path
59
+ runtime_file: Path
60
+ data_dir: Path
61
+ database_file: Path
62
+ runs_dir: Path
63
+ specs_dir: Path
64
+ company_specs_dir: Path
65
+ contact_specs_dir: Path
66
+ backups_dir: Path
67
+ logs_dir: Path
68
+ skills_dir: Path
69
+ skill_bundle_dir: Path
70
+ skill_installs_file: Path
71
+
72
+
73
+ def default_workspace_root() -> Path:
74
+ """Return the OS-appropriate default workspace root for leads."""
75
+ app_name = PRODUCT_NAME if sys.platform.startswith("linux") else DISPLAY_NAME
76
+ return Path(user_data_dir(app_name, appauthor=False, roaming=True))
77
+
78
+
79
+ def workspace_pointer_file() -> Path:
80
+ return default_workspace_root() / "config" / WORKSPACE_POINTER_FILE
81
+
82
+
83
+ def read_workspace_pointer() -> Path | None:
84
+ payload = read_json(workspace_pointer_file(), {})
85
+ raw = payload.get("workspace_root")
86
+ if not isinstance(raw, str) or not raw.strip():
87
+ return None
88
+ return Path(raw).expanduser()
89
+
90
+
91
+ def write_workspace_pointer(root: Path) -> Path:
92
+ path = workspace_pointer_file()
93
+ write_json(
94
+ path,
95
+ {
96
+ "product": PRODUCT_NAME,
97
+ "workspace_root": str(root.expanduser().resolve()),
98
+ "updated_at": datetime.now(timezone.utc).isoformat(),
99
+ },
100
+ )
101
+ return path
102
+
103
+
104
+ def workspace_paths(root: Path) -> WorkspacePaths:
105
+ root = root.expanduser()
106
+ config_dir = root / "config"
107
+ data_dir = root / "data"
108
+ specs_dir = root / "specs"
109
+ skills_dir = root / "skills"
110
+ return WorkspacePaths(
111
+ root=root,
112
+ config_dir=config_dir,
113
+ config_file=config_dir / "config.toml",
114
+ secrets_file=config_dir / "secrets.toml",
115
+ runtime_file=config_dir / "runtime.json",
116
+ data_dir=data_dir,
117
+ database_file=data_dir / "company_memory.db",
118
+ runs_dir=root / "runs",
119
+ specs_dir=specs_dir,
120
+ company_specs_dir=specs_dir / "companies",
121
+ contact_specs_dir=specs_dir / "contacts",
122
+ backups_dir=root / "backups",
123
+ logs_dir=root / "logs",
124
+ skills_dir=skills_dir,
125
+ skill_bundle_dir=skills_dir / "bundle",
126
+ skill_installs_file=skills_dir / "installs.json",
127
+ )
128
+
129
+
130
+ def default_runtime_metadata() -> dict[str, Any]:
131
+ return {
132
+ "product": PRODUCT_NAME,
133
+ "created_at": datetime.now(timezone.utc).isoformat(),
134
+ "schema_version": SCHEMA_VERSION,
135
+ "skill_bundle_version": None,
136
+ "installs": [],
137
+ }
138
+
139
+
140
+ def ensure_workspace(root: Path) -> WorkspacePaths:
141
+ paths = workspace_paths(root)
142
+ for directory in (
143
+ paths.root,
144
+ paths.config_dir,
145
+ paths.data_dir,
146
+ paths.runs_dir,
147
+ paths.company_specs_dir,
148
+ paths.contact_specs_dir,
149
+ paths.backups_dir,
150
+ paths.logs_dir,
151
+ paths.skills_dir,
152
+ paths.skill_bundle_dir,
153
+ ):
154
+ directory.mkdir(parents=True, exist_ok=True)
155
+ if not paths.config_file.exists():
156
+ write_toml(paths.config_file, DEFAULT_CONFIG)
157
+ if not paths.secrets_file.exists():
158
+ write_toml(paths.secrets_file, DEFAULT_SECRETS)
159
+ try:
160
+ paths.secrets_file.chmod(0o600)
161
+ except OSError:
162
+ pass
163
+ if not paths.runtime_file.exists():
164
+ write_json(paths.runtime_file, default_runtime_metadata())
165
+ if not paths.skill_installs_file.exists():
166
+ write_json(paths.skill_installs_file, {"skill_bundle_version": None, "installs": []})
167
+ return paths
168
+
169
+
170
+ def configure_workspace_logging(root: Path) -> Path:
171
+ paths = ensure_workspace(root)
172
+ log_file = paths.logs_dir / "leads.log"
173
+ logger = logging.getLogger(LOGGER_NAME)
174
+ logger.setLevel(logging.INFO)
175
+ logger.propagate = False
176
+ target = str(log_file.resolve())
177
+ for handler in list(logger.handlers):
178
+ if getattr(handler, "baseFilename", None) == target:
179
+ return log_file
180
+ if isinstance(handler, RotatingFileHandler):
181
+ logger.removeHandler(handler)
182
+ handler.close()
183
+ handler = RotatingFileHandler(log_file, maxBytes=1_000_000, backupCount=3, encoding="utf-8")
184
+ handler.setFormatter(
185
+ logging.Formatter("%(asctime)s %(levelname)s %(name)s: %(message)s")
186
+ )
187
+ logger.addHandler(handler)
188
+ logger.info("workspace logging initialized")
189
+ return log_file
190
+
191
+
192
+ def read_toml(path: Path) -> dict[str, Any]:
193
+ if not path.exists():
194
+ return {}
195
+ with path.open("rb") as handle:
196
+ return tomllib.load(handle)
197
+
198
+
199
+ def write_toml(path: Path, data: dict[str, Any]) -> None:
200
+ path.parent.mkdir(parents=True, exist_ok=True)
201
+ path.write_text(_dump_toml(data), encoding="utf-8")
202
+
203
+
204
+ def read_json(path: Path, default: dict[str, Any] | None = None) -> dict[str, Any]:
205
+ if not path.exists():
206
+ return deepcopy(default or {})
207
+ return json.loads(path.read_text(encoding="utf-8"))
208
+
209
+
210
+ def write_json(path: Path, data: dict[str, Any]) -> None:
211
+ path.parent.mkdir(parents=True, exist_ok=True)
212
+ path.write_text(json.dumps(data, indent=2, sort_keys=True) + "\n", encoding="utf-8")
213
+
214
+
215
+ def load_local_settings(root: Path) -> dict[str, Any]:
216
+ paths = workspace_paths(root)
217
+ config = read_toml(paths.config_file)
218
+ secrets = read_toml(paths.secrets_file)
219
+ values: dict[str, Any] = {}
220
+
221
+ llm = config.get("llm", {})
222
+ llm_secrets = secrets.get("llm", {})
223
+ _copy(values, "llm_provider", llm.get("provider"))
224
+ _copy(values, "llm_base_url", llm.get("base_url"))
225
+ _copy(values, "llm_model", llm.get("model"))
226
+ _copy(values, "llm_response_format", llm.get("response_format"))
227
+ _copy(values, "llm_api_key", _blank_to_none(llm_secrets.get("api_key")))
228
+
229
+ providers = config.get("providers", {})
230
+ provider_secrets = secrets.get("providers", {})
231
+ exa = providers.get("exa", {})
232
+ exa_secrets = provider_secrets.get("exa", {})
233
+ _copy(values, "exa_base_url", exa.get("base_url"))
234
+ _copy(values, "exa_api_key", _blank_to_none(exa_secrets.get("api_key")))
235
+
236
+ apollo = providers.get("apollo", {})
237
+ apollo_secrets = provider_secrets.get("apollo", {})
238
+ _copy(values, "apollo_base_url", apollo.get("base_url"))
239
+ _copy(values, "apollo_webhook_url", _blank_to_none(apollo.get("webhook_url")))
240
+ _copy(values, "apollo_api_key", _blank_to_none(apollo_secrets.get("api_key")))
241
+ return values
242
+
243
+
244
+ def update_config_value(root: Path, key: str, value: Any, *, secret: bool = False) -> Path:
245
+ paths = ensure_workspace(root)
246
+ target = paths.secrets_file if secret else paths.config_file
247
+ data = read_toml(target)
248
+ set_nested_value(data, key.split("."), _coerce_value(value))
249
+ write_toml(target, data)
250
+ if secret:
251
+ try:
252
+ target.chmod(0o600)
253
+ except OSError:
254
+ pass
255
+ return target
256
+
257
+
258
+ def set_nested_value(data: dict[str, Any], keys: list[str], value: Any) -> None:
259
+ cursor = data
260
+ for key in keys[:-1]:
261
+ next_value = cursor.setdefault(key, {})
262
+ if not isinstance(next_value, dict):
263
+ raise ValueError(f"Cannot set nested key through scalar value at {key!r}")
264
+ cursor = next_value
265
+ cursor[keys[-1]] = value
266
+
267
+
268
+ def merge_dicts(base: dict[str, Any], updates: dict[str, Any]) -> dict[str, Any]:
269
+ merged = deepcopy(base)
270
+ for key, value in updates.items():
271
+ if isinstance(value, dict) and isinstance(merged.get(key), dict):
272
+ merged[key] = merge_dicts(merged[key], value)
273
+ else:
274
+ merged[key] = value
275
+ return merged
276
+
277
+
278
+ def _dump_toml(data: dict[str, Any]) -> str:
279
+ lines: list[str] = []
280
+ scalars = {key: value for key, value in data.items() if not isinstance(value, dict)}
281
+ for key, value in scalars.items():
282
+ lines.append(f"{key} = {_format_toml_value(value)}")
283
+ if scalars:
284
+ lines.append("")
285
+ _write_toml_sections(lines, [], {key: value for key, value in data.items() if isinstance(value, dict)})
286
+ return "\n".join(lines).rstrip() + "\n"
287
+
288
+
289
+ def _write_toml_sections(lines: list[str], prefix: list[str], sections: dict[str, Any]) -> None:
290
+ for section, values in sections.items():
291
+ path = [*prefix, section]
292
+ scalars = {key: value for key, value in values.items() if not isinstance(value, dict)}
293
+ nested = {key: value for key, value in values.items() if isinstance(value, dict)}
294
+ if scalars:
295
+ if lines and lines[-1] != "":
296
+ lines.append("")
297
+ lines.append(f"[{'.'.join(path)}]")
298
+ for key, value in scalars.items():
299
+ lines.append(f"{key} = {_format_toml_value(value)}")
300
+ if nested:
301
+ _write_toml_sections(lines, path, nested)
302
+
303
+
304
+ def _format_toml_value(value: Any) -> str:
305
+ if isinstance(value, bool):
306
+ return "true" if value else "false"
307
+ if isinstance(value, int | float):
308
+ return str(value)
309
+ return json.dumps("" if value is None else str(value))
310
+
311
+
312
+ def _coerce_value(value: Any) -> Any:
313
+ if not isinstance(value, str):
314
+ return value
315
+ normalized = value.strip()
316
+ if normalized.lower() in {"true", "false"}:
317
+ return normalized.lower() == "true"
318
+ try:
319
+ return int(normalized)
320
+ except ValueError:
321
+ pass
322
+ try:
323
+ return float(normalized)
324
+ except ValueError:
325
+ return value
326
+
327
+
328
+ def _copy(values: dict[str, Any], key: str, value: Any) -> None:
329
+ if value is not None:
330
+ values[key] = value
331
+
332
+
333
+ def _blank_to_none(value: Any) -> Any:
334
+ if value == "":
335
+ return None
336
+ return value
@@ -0,0 +1,4 @@
1
+ from company_discovery.services.pipeline import DiscoveryPipeline
2
+
3
+ __all__ = ["DiscoveryPipeline"]
4
+