leads-cli 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (75) hide show
  1. leads_cli-0.1.0/.gitignore +10 -0
  2. leads_cli-0.1.0/NOTES/README.md +31 -0
  3. leads_cli-0.1.0/PKG-INFO +277 -0
  4. leads_cli-0.1.0/README.md +254 -0
  5. leads_cli-0.1.0/install.ps1 +83 -0
  6. leads_cli-0.1.0/install.sh +68 -0
  7. leads_cli-0.1.0/pyproject.toml +62 -0
  8. leads_cli-0.1.0/src/company_discovery/__init__.py +4 -0
  9. leads_cli-0.1.0/src/company_discovery/adapters/__init__.py +5 -0
  10. leads_cli-0.1.0/src/company_discovery/adapters/apollo.py +189 -0
  11. leads_cli-0.1.0/src/company_discovery/adapters/exa.py +112 -0
  12. leads_cli-0.1.0/src/company_discovery/adapters/llm.py +118 -0
  13. leads_cli-0.1.0/src/company_discovery/adapters/protocols.py +58 -0
  14. leads_cli-0.1.0/src/company_discovery/adapters/website.py +154 -0
  15. leads_cli-0.1.0/src/company_discovery/bundled_skills/__init__.py +1 -0
  16. leads_cli-0.1.0/src/company_discovery/bundled_skills/company-discovery-operator/SKILL.md +72 -0
  17. leads_cli-0.1.0/src/company_discovery/bundled_skills/company-discovery-operator/agents/openai.yaml +4 -0
  18. leads_cli-0.1.0/src/company_discovery/bundled_skills/company-enrichment-operator/SKILL.md +94 -0
  19. leads_cli-0.1.0/src/company_discovery/bundled_skills/company-enrichment-operator/agents/openai.yaml +4 -0
  20. leads_cli-0.1.0/src/company_discovery/bundled_skills/company-search-spec-writer/SKILL.md +109 -0
  21. leads_cli-0.1.0/src/company_discovery/bundled_skills/company-search-spec-writer/agents/openai.yaml +4 -0
  22. leads_cli-0.1.0/src/company_discovery/bundled_skills/contact-discovery-operator/SKILL.md +80 -0
  23. leads_cli-0.1.0/src/company_discovery/bundled_skills/contact-discovery-operator/agents/openai.yaml +4 -0
  24. leads_cli-0.1.0/src/company_discovery/bundled_skills/contact-enrichment-operator/SKILL.md +86 -0
  25. leads_cli-0.1.0/src/company_discovery/bundled_skills/contact-enrichment-operator/agents/openai.yaml +4 -0
  26. leads_cli-0.1.0/src/company_discovery/bundled_skills/contact-search-spec-writer/SKILL.md +86 -0
  27. leads_cli-0.1.0/src/company_discovery/bundled_skills/contact-search-spec-writer/agents/openai.yaml +4 -0
  28. leads_cli-0.1.0/src/company_discovery/bundled_skills/leads-update-operator/SKILL.md +60 -0
  29. leads_cli-0.1.0/src/company_discovery/bundled_skills/leads-update-operator/agents/openai.yaml +4 -0
  30. leads_cli-0.1.0/src/company_discovery/cli.py +1789 -0
  31. leads_cli-0.1.0/src/company_discovery/db/__init__.py +5 -0
  32. leads_cli-0.1.0/src/company_discovery/db/contact_enrichment_repository.py +268 -0
  33. leads_cli-0.1.0/src/company_discovery/db/contact_repository.py +366 -0
  34. leads_cli-0.1.0/src/company_discovery/db/enrichment_repository.py +207 -0
  35. leads_cli-0.1.0/src/company_discovery/db/models.py +324 -0
  36. leads_cli-0.1.0/src/company_discovery/db/repository.py +363 -0
  37. leads_cli-0.1.0/src/company_discovery/db/session.py +48 -0
  38. leads_cli-0.1.0/src/company_discovery/domain/__init__.py +24 -0
  39. leads_cli-0.1.0/src/company_discovery/domain/contact_models.py +178 -0
  40. leads_cli-0.1.0/src/company_discovery/domain/contact_spec.py +86 -0
  41. leads_cli-0.1.0/src/company_discovery/domain/models.py +287 -0
  42. leads_cli-0.1.0/src/company_discovery/domain/spec.py +263 -0
  43. leads_cli-0.1.0/src/company_discovery/migrations.py +190 -0
  44. leads_cli-0.1.0/src/company_discovery/prompts/__init__.py +8 -0
  45. leads_cli-0.1.0/src/company_discovery/prompts/candidate_evaluation/system.md +13 -0
  46. leads_cli-0.1.0/src/company_discovery/prompts/company_enrichment/system.md +42 -0
  47. leads_cli-0.1.0/src/company_discovery/prompts/contact_evaluation/system.md +18 -0
  48. leads_cli-0.1.0/src/company_discovery/prompts/query_generation/system.md +10 -0
  49. leads_cli-0.1.0/src/company_discovery/release_manifest.json +7 -0
  50. leads_cli-0.1.0/src/company_discovery/reports/__init__.py +4 -0
  51. leads_cli-0.1.0/src/company_discovery/reports/contact_enrichment_exporter.py +108 -0
  52. leads_cli-0.1.0/src/company_discovery/reports/contact_exporter.py +132 -0
  53. leads_cli-0.1.0/src/company_discovery/reports/enrichment_exporter.py +125 -0
  54. leads_cli-0.1.0/src/company_discovery/reports/exporter.py +135 -0
  55. leads_cli-0.1.0/src/company_discovery/runtime.py +336 -0
  56. leads_cli-0.1.0/src/company_discovery/services/__init__.py +4 -0
  57. leads_cli-0.1.0/src/company_discovery/services/contact_enrichment_pipeline.py +344 -0
  58. leads_cli-0.1.0/src/company_discovery/services/contact_enrichment_progress.py +37 -0
  59. leads_cli-0.1.0/src/company_discovery/services/contact_evaluator.py +110 -0
  60. leads_cli-0.1.0/src/company_discovery/services/contact_pipeline.py +295 -0
  61. leads_cli-0.1.0/src/company_discovery/services/contact_progress.py +38 -0
  62. leads_cli-0.1.0/src/company_discovery/services/enrichment_extractor.py +61 -0
  63. leads_cli-0.1.0/src/company_discovery/services/enrichment_pipeline.py +526 -0
  64. leads_cli-0.1.0/src/company_discovery/services/enrichment_progress.py +20 -0
  65. leads_cli-0.1.0/src/company_discovery/services/enrichment_resolver.py +148 -0
  66. leads_cli-0.1.0/src/company_discovery/services/evaluator.py +40 -0
  67. leads_cli-0.1.0/src/company_discovery/services/hygiene.py +51 -0
  68. leads_cli-0.1.0/src/company_discovery/services/memory.py +150 -0
  69. leads_cli-0.1.0/src/company_discovery/services/normalization.py +98 -0
  70. leads_cli-0.1.0/src/company_discovery/services/pipeline.py +628 -0
  71. leads_cli-0.1.0/src/company_discovery/services/progress.py +48 -0
  72. leads_cli-0.1.0/src/company_discovery/services/query_planner.py +47 -0
  73. leads_cli-0.1.0/src/company_discovery/settings.py +152 -0
  74. leads_cli-0.1.0/src/company_discovery/skill_installer.py +197 -0
  75. leads_cli-0.1.0/src/company_discovery/update_plan.py +79 -0
@@ -0,0 +1,10 @@
1
+ .company-discovery/
2
+ .leads/
3
+ .env
4
+ .pytest_cache/
5
+ .coverage
6
+ __pycache__/
7
+ *.py[cod]
8
+ *.egg-info/
9
+ build/
10
+ dist/
@@ -0,0 +1,31 @@
1
+ # Notes Index
2
+
3
+ Design and rebuild notes for the company-side tool live in this folder.
4
+
5
+ ## Discovery and architecture
6
+
7
+ - `00-company-tool-current-state.md`
8
+ - `01-company-tool-goal.md`
9
+ - `02-rebuild-principles.md`
10
+ - `03-proposed-company-architecture.md`
11
+ - `04-search-and-qualification-design.md`
12
+ - `05-memory-backlog-redesign.md`
13
+ - `06-rebuild-roadmap.md`
14
+ - `07-simplified-implementation-plan.md`
15
+ - `09-structured-exclusion-discovery.md`
16
+
17
+ ## Enrichment
18
+
19
+ - `08-company-enrichment-redesign.md`
20
+
21
+ ## Contacts
22
+
23
+ - `10-contact-discovery-and-apollo-enrichment.md`
24
+
25
+ ## Code quality
26
+
27
+ - `11-codebase-review-2026-06-22.md`
28
+
29
+ ## Packaging and operations
30
+
31
+ - `12-publishing-onboarding-and-update-plan.md`
@@ -0,0 +1,277 @@
1
+ Metadata-Version: 2.4
2
+ Name: leads-cli
3
+ Version: 0.1.0
4
+ Summary: Agent-first company and contact discovery for outbound lead generation.
5
+ Author: Paolo Auletta
6
+ License: Proprietary
7
+ Requires-Python: >=3.11
8
+ Requires-Dist: httpx>=0.27.2
9
+ Requires-Dist: platformdirs>=4.3.6
10
+ Requires-Dist: pydantic-settings>=2.7.1
11
+ Requires-Dist: pydantic>=2.10.6
12
+ Requires-Dist: questionary>=2.1.0
13
+ Requires-Dist: rich>=13.9.4
14
+ Requires-Dist: sqlalchemy>=2.0.38
15
+ Requires-Dist: tldextract>=5.1.3
16
+ Requires-Dist: typer>=0.15.1
17
+ Provides-Extra: dev
18
+ Requires-Dist: build>=1.2.2; extra == 'dev'
19
+ Requires-Dist: pytest-cov>=6.0.0; extra == 'dev'
20
+ Requires-Dist: pytest>=8.3.4; extra == 'dev'
21
+ Requires-Dist: twine>=5.1.1; extra == 'dev'
22
+ Description-Content-Type: text/markdown
23
+
24
+ # Leads
25
+
26
+ An agent-first, memory-first company and contact research engine. Strict JSON specs drive
27
+ deterministic memory retrieval, focused Exa searches, structured LLM evaluation, targeted
28
+ official-site enrichment, persistence, and reviewable CSV/Markdown/JSON artifacts.
29
+
30
+ Design and rebuild notes live in [`NOTES/`](./NOTES/README.md).
31
+
32
+ ## Install
33
+
34
+ The canonical install path is `pipx`. The package is published as `leads-cli` because `leads`
35
+ is already taken on PyPI, but it still installs the `leads` command. The installer scripts are
36
+ thin convenience wrappers around `pipx install leads-cli` or `pipx upgrade leads-cli`, followed
37
+ by `leads init`.
38
+
39
+ ### macOS and Linux
40
+
41
+ ```bash
42
+ curl -fsSL https://raw.githubusercontent.com/paoloauletta/leads/main/install.sh | bash
43
+ ```
44
+
45
+ ### Windows PowerShell
46
+
47
+ ```powershell
48
+ irm https://raw.githubusercontent.com/paoloauletta/leads/main/install.ps1 | iex
49
+ ```
50
+
51
+ ### Direct pipx install
52
+
53
+ ```bash
54
+ pipx install leads-cli
55
+ leads init
56
+ ```
57
+
58
+ Use `LEADS_SKIP_INIT=1` with either installer when you want to install first and run onboarding
59
+ later.
60
+
61
+ ## Onboarding
62
+
63
+ Run:
64
+
65
+ ```bash
66
+ leads init
67
+ ```
68
+
69
+ The wizard creates one local workspace, stores config and secrets, initializes the SQLite database,
70
+ and installs bundled skills into the agent targets you choose, such as Codex, Claude Code, or
71
+ OpenCode. After setup, use one of those agents to create a spec, run discovery, and summarize the
72
+ selected leads.
73
+
74
+ Runtime data defaults to the OS-appropriate Leads application data folder. Override it with
75
+ `LEADS_HOME=/path/to/data` when needed.
76
+
77
+ `LLM_RESPONSE_FORMAT=auto` uses strict JSON Schema with OpenAI and validated JSON Object mode
78
+ with DeepSeek or other compatible providers. Override it only when a provider documents support
79
+ for a different mode.
80
+
81
+ ## Workspace Layout
82
+
83
+ `leads init` creates one workspace root with these top-level directories:
84
+
85
+ ```text
86
+ backups/
87
+ config/
88
+ data/
89
+ logs/
90
+ runs/
91
+ skills/
92
+ specs/
93
+ ```
94
+
95
+ `config/` contains local settings, secrets, and runtime metadata. `data/company_memory.db` is the
96
+ SQLite memory database. `specs/companies/` and `specs/contacts/` are where agent-created specs
97
+ belong. `runs/` contains discovery and enrichment artifacts. `backups/` stores migration and reset
98
+ backups. `skills/` stores bundled skill copies and install metadata. `logs/leads.log` is a CLI
99
+ diagnostic log for troubleshooting; it is not lead evidence or a run artifact.
100
+
101
+ ## Commands
102
+
103
+ ```bash
104
+ leads init
105
+ leads doctor
106
+ leads init-db
107
+ leads version
108
+ leads update --check
109
+ leads migrate --check
110
+ leads config show
111
+ leads skills status
112
+ leads companies discover --spec company_search_spec.json
113
+ leads companies enrich DISCOVERY_RUN_ID
114
+ leads companies show-run RUN_ID
115
+ leads companies inspect RUN_ID --domain example.com
116
+ leads companies export RUN_ID
117
+ leads companies rerun RUN_ID
118
+ leads companies show-enrichment ENRICHMENT_RUN_ID
119
+ leads companies inspect-enrichment ENRICHMENT_RUN_ID --domain example.com
120
+ leads companies export-enrichment ENRICHMENT_RUN_ID
121
+ leads contacts validate-spec --spec contact_search_spec.json
122
+ leads contacts discover --spec contact_search_spec.json
123
+ leads contacts enrich CONTACT_DISCOVERY_RUN_ID
124
+ leads contacts show-run CONTACT_DISCOVERY_RUN_ID
125
+ leads contacts inspect CONTACT_DISCOVERY_RUN_ID --person "Jane Smith"
126
+ leads contacts export CONTACT_DISCOVERY_RUN_ID
127
+ leads contacts show-enrichment CONTACT_ENRICHMENT_RUN_ID
128
+ leads contacts inspect-enrichment CONTACT_ENRICHMENT_RUN_ID --person "Jane Smith"
129
+ leads contacts export-enrichment CONTACT_ENRICHMENT_RUN_ID
130
+ ```
131
+
132
+ `leads init-db` creates `company_memory.db` and its schema. If the database already exists, it
133
+ asks before resetting it. An accepted reset moves the existing `runs/` directory to a timestamped
134
+ archive such as `runs-previousdb-20260622T184500Z/`, then creates a new empty `runs/` directory.
135
+
136
+ `leads migrate --check` is read-only. `leads migrate --apply` creates a timestamped backup before
137
+ supported structural schema changes and refuses unknown migration paths.
138
+
139
+ Use `--verbose` on `discover` to print generated queries and candidate-level decisions.
140
+
141
+ ## Development Setup
142
+
143
+ ```bash
144
+ python -m venv .venv
145
+ .venv/bin/pip install -e '.[dev]'
146
+ .venv/bin/leads init
147
+ ```
148
+
149
+ For a local smoke test, create or copy a company spec, configure provider keys during onboarding,
150
+ then run:
151
+
152
+ ```bash
153
+ leads companies discover --spec company_search_spec.json
154
+ ```
155
+
156
+ ## Multiple verticals
157
+
158
+ Use `verticals` to request OR semantics: companies may match construction, healthcare, or
159
+ engineering; they do not need to match all three. Each vertical gets an independent memory scan,
160
+ gap calculation, Exa query plan, and evaluation lane.
161
+
162
+ Each vertical now uses one simple shape: `key`, `label`, and optional query hints. Use
163
+ `search_terms` when the label alone is too broad or niche, and `exclude_terms` when a vertical
164
+ needs a few search-time negatives. Old specs that still contain `mode`, `seed_terms`, or
165
+ `anti_terms` remain readable and normalize to the new shape.
166
+
167
+ `balance_mode` controls final selection. `soft` (the default) fills an equal quality-gated floor
168
+ per vertical, then reallocates unused slots to good companies from stronger lanes. `strict` keeps
169
+ equal caps and may return fewer companies. `none` selects good companies in discovery order.
170
+
171
+ The legacy single `vertical` object remains accepted for existing specs.
172
+
173
+ ## Memory policy
174
+
175
+ `novelty_mode` controls whether saved companies can enter a run:
176
+
177
+ - `unused_memory` (default) searches memory first and only considers companies never selected before.
178
+ - `only_new` skips memory candidates and removes externally rediscovered domains already in memory.
179
+ - `full_memory` searches all matching memory, including companies selected in previous runs.
180
+
181
+ Old `prefer_new` and `allow_known` specs remain readable and normalize to `unused_memory` and
182
+ `full_memory`, respectively.
183
+
184
+ ## Enrichment
185
+
186
+ Enrichment is always a separate command run after discovery completes:
187
+
188
+ ```bash
189
+ leads companies discover --spec company_search_spec.json
190
+ leads companies enrich DISCOVERY_RUN_ID
191
+ ```
192
+
193
+ It consumes selected companies directly from the completed discovery run. It retains company
194
+ name, root domain, target vertical, geography, employee estimate, ownership type, and discovery
195
+ evidence, then finds only the missing LinkedIn company profile, phone, complete in-scope address,
196
+ and independence status.
197
+
198
+ Each enrichment execution gets a random run ID such as `company-enrich-a1b2c3d4e5f6`. That ID is
199
+ used both for CLI follow-up commands and the enrich artifact folder under the source discovery run.
200
+
201
+ Fresh enrichment facts are reused by company/domain before any website request. The bounded website
202
+ pass reads the homepage and best contact/location/about pages; unresolved fields can use a narrow
203
+ Exa corroboration search. Output is split into `enriched.csv`, `review.csv`, and `blocked.csv`, while
204
+ the enrichment `run.json` keeps field provenance, conflicts, and the per-company trace.
205
+
206
+ LinkedIn enrichment first checks company-profile links exposed by the official website, including
207
+ footer icon links. Only `/company/...` URLs are accepted; personal profiles, jobs, and posts are
208
+ discarded. If the official site has no profile link, enrichment performs a narrow LinkedIn company
209
+ search. The normalized URL and its source page are saved in enrichment memory and exported as
210
+ `linkedin_url`.
211
+
212
+ By default, complete profiles with unknown independence remain in review. Add
213
+ `--allow-unknown-independence` only when that uncertainty is acceptable. Generic values such as
214
+ `privately_held` never count as proof of independence.
215
+
216
+ To exclude family businesses during enrichment, add this to the discovery spec:
217
+
218
+ ```json
219
+ "exclude": {
220
+ "structured": {"ownership_signals": ["family_owned"]}
221
+ }
222
+ ```
223
+
224
+ Enrichment still records the company as independent, but sends it to `blocked.csv` with a
225
+ `fit_conflict` and `excluded_family_owned` flag. The ownership signal is retained in enrichment
226
+ memory, so the same rule applies when a later run reuses fresh facts.
227
+
228
+ ## Contact Discovery
229
+
230
+ Contact discovery is a separate phase after company enrichment. It starts from a completed
231
+ `company-enrich-<id>`, uses only its ready companies by default, and finds current people matching
232
+ structured role targets.
233
+
234
+ ```bash
235
+ cp examples/contact_search_spec.json contact_search_spec.json
236
+ leads contacts validate-spec --spec contact_search_spec.json
237
+ leads contacts discover --spec contact_search_spec.json
238
+ ```
239
+
240
+ For every company and role, the command reuses accepted contact memory from the last 30 days, then
241
+ uses one Exa people-index query plus one official-domain evidence query for each remaining
242
+ per-company gap. The LLM evaluates identity, current employment at the exact target company, and
243
+ requested-title fit. A model cannot force an acceptance when those explicit checks are not
244
+ satisfied.
245
+
246
+ Artifacts are split into `accepted.csv`, `review.csv`, and `rejected.csv`. All three use the same
247
+ client-facing columns:
248
+
249
+ ```text
250
+ company_name, company_domain, contact_name, title, linkedin_url,
251
+ email, phone, status, notes
252
+ ```
253
+
254
+ `email` and `phone` are intentionally blank during discovery. Full queries, raw Exa results,
255
+ evidence, role keys, verdict details, and memory/live source decisions are retained in `run.json`.
256
+
257
+ ## Contact Enrichment
258
+
259
+ Contact enrichment is a separate Apollo-backed command after contact discovery:
260
+
261
+ ```bash
262
+ leads contacts enrich contact-discover-a1b2c3d4e5f6
263
+ ```
264
+
265
+ Only accepted contacts enter enrichment. Live-web discovery remains authoritative for the person's
266
+ identity, current company, title, role, and LinkedIn URL; Apollo can add email and phone channels
267
+ but cannot overwrite those facts. Exact identity and company/email-domain checks classify each
268
+ person as `ready`, `review`, or `blocked`, with raw Apollo trace and flags retained in `run.json`.
269
+
270
+ Apollo bulk requests are sent in groups of 10. Phone and waterfall requests are asynchronous, so
271
+ the default email-and-phone command requires `APOLLO_WEBHOOK_URL` and polls Apollo's request result
272
+ until completion. Use `--no-phone` for an email-only run when a webhook is unavailable. Fresh
273
+ Apollo results are reused for 14 days unless `--refresh` is supplied.
274
+
275
+ Artifacts live below the source contact run in
276
+ `contacts/contact-discover-<id>/enrich/contact-enrich-<id>/` and retain the same compact
277
+ client columns used by discovery. Output is split into `ready.csv`, `review.csv`, and `blocked.csv`.
@@ -0,0 +1,254 @@
1
+ # Leads
2
+
3
+ An agent-first, memory-first company and contact research engine. Strict JSON specs drive
4
+ deterministic memory retrieval, focused Exa searches, structured LLM evaluation, targeted
5
+ official-site enrichment, persistence, and reviewable CSV/Markdown/JSON artifacts.
6
+
7
+ Design and rebuild notes live in [`NOTES/`](./NOTES/README.md).
8
+
9
+ ## Install
10
+
11
+ The canonical install path is `pipx`. The package is published as `leads-cli` because `leads`
12
+ is already taken on PyPI, but it still installs the `leads` command. The installer scripts are
13
+ thin convenience wrappers around `pipx install leads-cli` or `pipx upgrade leads-cli`, followed
14
+ by `leads init`.
15
+
16
+ ### macOS and Linux
17
+
18
+ ```bash
19
+ curl -fsSL https://raw.githubusercontent.com/paoloauletta/leads/main/install.sh | bash
20
+ ```
21
+
22
+ ### Windows PowerShell
23
+
24
+ ```powershell
25
+ irm https://raw.githubusercontent.com/paoloauletta/leads/main/install.ps1 | iex
26
+ ```
27
+
28
+ ### Direct pipx install
29
+
30
+ ```bash
31
+ pipx install leads-cli
32
+ leads init
33
+ ```
34
+
35
+ Use `LEADS_SKIP_INIT=1` with either installer when you want to install first and run onboarding
36
+ later.
37
+
38
+ ## Onboarding
39
+
40
+ Run:
41
+
42
+ ```bash
43
+ leads init
44
+ ```
45
+
46
+ The wizard creates one local workspace, stores config and secrets, initializes the SQLite database,
47
+ and installs bundled skills into the agent targets you choose, such as Codex, Claude Code, or
48
+ OpenCode. After setup, use one of those agents to create a spec, run discovery, and summarize the
49
+ selected leads.
50
+
51
+ Runtime data defaults to the OS-appropriate Leads application data folder. Override it with
52
+ `LEADS_HOME=/path/to/data` when needed.
53
+
54
+ `LLM_RESPONSE_FORMAT=auto` uses strict JSON Schema with OpenAI and validated JSON Object mode
55
+ with DeepSeek or other compatible providers. Override it only when a provider documents support
56
+ for a different mode.
57
+
58
+ ## Workspace Layout
59
+
60
+ `leads init` creates one workspace root with these top-level directories:
61
+
62
+ ```text
63
+ backups/
64
+ config/
65
+ data/
66
+ logs/
67
+ runs/
68
+ skills/
69
+ specs/
70
+ ```
71
+
72
+ `config/` contains local settings, secrets, and runtime metadata. `data/company_memory.db` is the
73
+ SQLite memory database. `specs/companies/` and `specs/contacts/` are where agent-created specs
74
+ belong. `runs/` contains discovery and enrichment artifacts. `backups/` stores migration and reset
75
+ backups. `skills/` stores bundled skill copies and install metadata. `logs/leads.log` is a CLI
76
+ diagnostic log for troubleshooting; it is not lead evidence or a run artifact.
77
+
78
+ ## Commands
79
+
80
+ ```bash
81
+ leads init
82
+ leads doctor
83
+ leads init-db
84
+ leads version
85
+ leads update --check
86
+ leads migrate --check
87
+ leads config show
88
+ leads skills status
89
+ leads companies discover --spec company_search_spec.json
90
+ leads companies enrich DISCOVERY_RUN_ID
91
+ leads companies show-run RUN_ID
92
+ leads companies inspect RUN_ID --domain example.com
93
+ leads companies export RUN_ID
94
+ leads companies rerun RUN_ID
95
+ leads companies show-enrichment ENRICHMENT_RUN_ID
96
+ leads companies inspect-enrichment ENRICHMENT_RUN_ID --domain example.com
97
+ leads companies export-enrichment ENRICHMENT_RUN_ID
98
+ leads contacts validate-spec --spec contact_search_spec.json
99
+ leads contacts discover --spec contact_search_spec.json
100
+ leads contacts enrich CONTACT_DISCOVERY_RUN_ID
101
+ leads contacts show-run CONTACT_DISCOVERY_RUN_ID
102
+ leads contacts inspect CONTACT_DISCOVERY_RUN_ID --person "Jane Smith"
103
+ leads contacts export CONTACT_DISCOVERY_RUN_ID
104
+ leads contacts show-enrichment CONTACT_ENRICHMENT_RUN_ID
105
+ leads contacts inspect-enrichment CONTACT_ENRICHMENT_RUN_ID --person "Jane Smith"
106
+ leads contacts export-enrichment CONTACT_ENRICHMENT_RUN_ID
107
+ ```
108
+
109
+ `leads init-db` creates `company_memory.db` and its schema. If the database already exists, it
110
+ asks before resetting it. An accepted reset moves the existing `runs/` directory to a timestamped
111
+ archive such as `runs-previousdb-20260622T184500Z/`, then creates a new empty `runs/` directory.
112
+
113
+ `leads migrate --check` is read-only. `leads migrate --apply` creates a timestamped backup before
114
+ supported structural schema changes and refuses unknown migration paths.
115
+
116
+ Use `--verbose` on `discover` to print generated queries and candidate-level decisions.
117
+
118
+ ## Development Setup
119
+
120
+ ```bash
121
+ python -m venv .venv
122
+ .venv/bin/pip install -e '.[dev]'
123
+ .venv/bin/leads init
124
+ ```
125
+
126
+ For a local smoke test, create or copy a company spec, configure provider keys during onboarding,
127
+ then run:
128
+
129
+ ```bash
130
+ leads companies discover --spec company_search_spec.json
131
+ ```
132
+
133
+ ## Multiple verticals
134
+
135
+ Use `verticals` to request OR semantics: companies may match construction, healthcare, or
136
+ engineering; they do not need to match all three. Each vertical gets an independent memory scan,
137
+ gap calculation, Exa query plan, and evaluation lane.
138
+
139
+ Each vertical now uses one simple shape: `key`, `label`, and optional query hints. Use
140
+ `search_terms` when the label alone is too broad or niche, and `exclude_terms` when a vertical
141
+ needs a few search-time negatives. Old specs that still contain `mode`, `seed_terms`, or
142
+ `anti_terms` remain readable and normalize to the new shape.
143
+
144
+ `balance_mode` controls final selection. `soft` (the default) fills an equal quality-gated floor
145
+ per vertical, then reallocates unused slots to good companies from stronger lanes. `strict` keeps
146
+ equal caps and may return fewer companies. `none` selects good companies in discovery order.
147
+
148
+ The legacy single `vertical` object remains accepted for existing specs.
149
+
150
+ ## Memory policy
151
+
152
+ `novelty_mode` controls whether saved companies can enter a run:
153
+
154
+ - `unused_memory` (default) searches memory first and only considers companies never selected before.
155
+ - `only_new` skips memory candidates and removes externally rediscovered domains already in memory.
156
+ - `full_memory` searches all matching memory, including companies selected in previous runs.
157
+
158
+ Old `prefer_new` and `allow_known` specs remain readable and normalize to `unused_memory` and
159
+ `full_memory`, respectively.
160
+
161
+ ## Enrichment
162
+
163
+ Enrichment is always a separate command run after discovery completes:
164
+
165
+ ```bash
166
+ leads companies discover --spec company_search_spec.json
167
+ leads companies enrich DISCOVERY_RUN_ID
168
+ ```
169
+
170
+ It consumes selected companies directly from the completed discovery run. It retains company
171
+ name, root domain, target vertical, geography, employee estimate, ownership type, and discovery
172
+ evidence, then finds only the missing LinkedIn company profile, phone, complete in-scope address,
173
+ and independence status.
174
+
175
+ Each enrichment execution gets a random run ID such as `company-enrich-a1b2c3d4e5f6`. That ID is
176
+ used both for CLI follow-up commands and the enrich artifact folder under the source discovery run.
177
+
178
+ Fresh enrichment facts are reused by company/domain before any website request. The bounded website
179
+ pass reads the homepage and best contact/location/about pages; unresolved fields can use a narrow
180
+ Exa corroboration search. Output is split into `enriched.csv`, `review.csv`, and `blocked.csv`, while
181
+ the enrichment `run.json` keeps field provenance, conflicts, and the per-company trace.
182
+
183
+ LinkedIn enrichment first checks company-profile links exposed by the official website, including
184
+ footer icon links. Only `/company/...` URLs are accepted; personal profiles, jobs, and posts are
185
+ discarded. If the official site has no profile link, enrichment performs a narrow LinkedIn company
186
+ search. The normalized URL and its source page are saved in enrichment memory and exported as
187
+ `linkedin_url`.
188
+
189
+ By default, complete profiles with unknown independence remain in review. Add
190
+ `--allow-unknown-independence` only when that uncertainty is acceptable. Generic values such as
191
+ `privately_held` never count as proof of independence.
192
+
193
+ To exclude family businesses during enrichment, add this to the discovery spec:
194
+
195
+ ```json
196
+ "exclude": {
197
+ "structured": {"ownership_signals": ["family_owned"]}
198
+ }
199
+ ```
200
+
201
+ Enrichment still records the company as independent, but sends it to `blocked.csv` with a
202
+ `fit_conflict` and `excluded_family_owned` flag. The ownership signal is retained in enrichment
203
+ memory, so the same rule applies when a later run reuses fresh facts.
204
+
205
+ ## Contact Discovery
206
+
207
+ Contact discovery is a separate phase after company enrichment. It starts from a completed
208
+ `company-enrich-<id>`, uses only its ready companies by default, and finds current people matching
209
+ structured role targets.
210
+
211
+ ```bash
212
+ cp examples/contact_search_spec.json contact_search_spec.json
213
+ leads contacts validate-spec --spec contact_search_spec.json
214
+ leads contacts discover --spec contact_search_spec.json
215
+ ```
216
+
217
+ For every company and role, the command reuses accepted contact memory from the last 30 days, then
218
+ uses one Exa people-index query plus one official-domain evidence query for each remaining
219
+ per-company gap. The LLM evaluates identity, current employment at the exact target company, and
220
+ requested-title fit. A model cannot force an acceptance when those explicit checks are not
221
+ satisfied.
222
+
223
+ Artifacts are split into `accepted.csv`, `review.csv`, and `rejected.csv`. All three use the same
224
+ client-facing columns:
225
+
226
+ ```text
227
+ company_name, company_domain, contact_name, title, linkedin_url,
228
+ email, phone, status, notes
229
+ ```
230
+
231
+ `email` and `phone` are intentionally blank during discovery. Full queries, raw Exa results,
232
+ evidence, role keys, verdict details, and memory/live source decisions are retained in `run.json`.
233
+
234
+ ## Contact Enrichment
235
+
236
+ Contact enrichment is a separate Apollo-backed command after contact discovery:
237
+
238
+ ```bash
239
+ leads contacts enrich contact-discover-a1b2c3d4e5f6
240
+ ```
241
+
242
+ Only accepted contacts enter enrichment. Live-web discovery remains authoritative for the person's
243
+ identity, current company, title, role, and LinkedIn URL; Apollo can add email and phone channels
244
+ but cannot overwrite those facts. Exact identity and company/email-domain checks classify each
245
+ person as `ready`, `review`, or `blocked`, with raw Apollo trace and flags retained in `run.json`.
246
+
247
+ Apollo bulk requests are sent in groups of 10. Phone and waterfall requests are asynchronous, so
248
+ the default email-and-phone command requires `APOLLO_WEBHOOK_URL` and polls Apollo's request result
249
+ until completion. Use `--no-phone` for an email-only run when a webhook is unavailable. Fresh
250
+ Apollo results are reused for 14 days unless `--refresh` is supplied.
251
+
252
+ Artifacts live below the source contact run in
253
+ `contacts/contact-discover-<id>/enrich/contact-enrich-<id>/` and retain the same compact
254
+ client columns used by discovery. Output is split into `ready.csv`, `review.csv`, and `blocked.csv`.
@@ -0,0 +1,83 @@
1
+ $ErrorActionPreference = "Stop"
2
+
3
+ $PackageName = if ($env:LEADS_PACKAGE_NAME) { $env:LEADS_PACKAGE_NAME } else { "leads-cli" }
4
+ $SkipInit = $env:LEADS_SKIP_INIT -eq "1"
5
+
6
+ function Test-Command {
7
+ param([string]$Name)
8
+ return [bool](Get-Command $Name -ErrorAction SilentlyContinue)
9
+ }
10
+
11
+ function Invoke-Python {
12
+ param([Parameter(ValueFromRemainingArguments = $true)][string[]]$Arguments)
13
+ if (Test-Command "py") {
14
+ & py -3 @Arguments
15
+ return
16
+ }
17
+ if (Test-Command "python") {
18
+ & python @Arguments
19
+ return
20
+ }
21
+ if (Test-Command "python3") {
22
+ & python3 @Arguments
23
+ return
24
+ }
25
+ throw "Python 3 is required to install $PackageName."
26
+ }
27
+
28
+ function Invoke-Pipx {
29
+ param([Parameter(ValueFromRemainingArguments = $true)][string[]]$Arguments)
30
+ if (Test-Command "pipx") {
31
+ & pipx @Arguments
32
+ return
33
+ }
34
+ Invoke-Python -m pipx @Arguments
35
+ }
36
+
37
+ function Find-Leads {
38
+ $command = Get-Command "leads" -ErrorAction SilentlyContinue
39
+ if ($command) {
40
+ return $command.Source
41
+ }
42
+ $local = Join-Path $HOME ".local\bin\leads.exe"
43
+ if (Test-Path $local) {
44
+ return $local
45
+ }
46
+ return $null
47
+ }
48
+
49
+ Write-Host "Installing $PackageName with pipx..."
50
+ if (-not (Test-Command "pipx")) {
51
+ Invoke-Python -m pip install --user pipx
52
+ try {
53
+ Invoke-Python -m pipx ensurepath
54
+ } catch {
55
+ Write-Host "pipx installed. Your shell may need to be restarted for PATH changes."
56
+ }
57
+ }
58
+
59
+ $installed = $false
60
+ try {
61
+ $installed = (Invoke-Pipx list --short 2>$null) -contains $PackageName
62
+ } catch {
63
+ $installed = $false
64
+ }
65
+
66
+ if ($installed) {
67
+ Invoke-Pipx upgrade $PackageName
68
+ } else {
69
+ Invoke-Pipx install $PackageName
70
+ }
71
+
72
+ if ($SkipInit) {
73
+ Write-Host "Installed $PackageName. Run 'leads init' when you are ready."
74
+ exit 0
75
+ }
76
+
77
+ $leads = Find-Leads
78
+ if ($leads) {
79
+ & $leads init
80
+ } else {
81
+ Write-Host "Could not find 'leads' on PATH yet; running the package through pipx once."
82
+ Invoke-Pipx run --spec $PackageName leads init
83
+ }