openleads 2.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
automation.py ADDED
@@ -0,0 +1,14 @@
1
+ """
2
+ Back-compat shim. The outreach companion moved to ``openleads.campaign`` in v2.0.
3
+
4
+ python automation.py # dry-run preview (was the old behavior)
5
+ python automation.py --live # send
6
+
7
+ Prefer: ``openleads campaign`` / ``openleads campaign --live``.
8
+ """
9
+ import sys
10
+
11
+ from openleads.campaign import main
12
+
13
+ if __name__ == "__main__":
14
+ raise SystemExit(main(sys.argv[1:]))
lead_engine.py ADDED
@@ -0,0 +1,48 @@
1
+ """
2
+ Back-compat shim for OpenLeads v1.
3
+
4
+ The engine moved into the installable ``openleads`` package in v2.0. This module
5
+ re-exports the v1 public helpers and forwards the old CLI to ``openleads find``
6
+ so existing scripts keep working. Prefer the new entry point:
7
+
8
+ openleads find "20 founders" # or: python -m openleads find ...
9
+ """
10
+ from __future__ import annotations
11
+
12
+ import sys
13
+
14
+ # Re-export the v1 public API from its new homes (behavior unchanged).
15
+ from openleads.emails.permute import ( # noqa: F401
16
+ candidate_emails,
17
+ domain_of,
18
+ name_parts,
19
+ )
20
+ from openleads.emails.resolve import find_email # noqa: F401
21
+ from openleads.sources.yc import ( # noqa: F401
22
+ pick_exec,
23
+ split_location,
24
+ )
25
+
26
+
27
+ def _translate(argv: list[str]) -> list[str]:
28
+ """Map v1 flags onto the new ``find`` subcommand."""
29
+ out: list[str] = []
30
+ for a in argv:
31
+ if a == "--no-write":
32
+ out += ["--out", "-"] # v1 'print only' ≈ write CSV to stdout
33
+ else:
34
+ out.append(a)
35
+ return out
36
+
37
+
38
+ def main() -> int:
39
+ sys.stderr.write(
40
+ "[deprecation] `lead_engine.py` is now a shim. Use `openleads find ...` "
41
+ "(or `python -m openleads find ...`).\n"
42
+ )
43
+ from openleads.cli import main as cli_main
44
+ return cli_main(["find"] + _translate(sys.argv[1:]))
45
+
46
+
47
+ if __name__ == "__main__":
48
+ raise SystemExit(main())
openleads/__init__.py ADDED
@@ -0,0 +1,22 @@
1
+ """
2
+ OpenLeads — the free, open-source Apollo alternative.
3
+
4
+ A universal `entity -> verified email` engine fed by a registry of pluggable,
5
+ free, keyless public data sources. Find founders, developers, doctors,
6
+ researchers — anyone — and verify their email over SMTP, using only public data.
7
+
8
+ Core library is 100% Python standard library (zero runtime dependencies).
9
+ The pretty chat TUI lives behind the optional ``[chat]`` extra; sending behind
10
+ ``[campaign]``.
11
+
12
+ Public API:
13
+ from openleads import Query, Lead, Entity, EmailResult
14
+ from openleads.engine import build_leads
15
+ from openleads.sources import get_registry
16
+ """
17
+
18
+ __version__ = "2.0.0"
19
+
20
+ from openleads.models import EmailResult, Entity, Lead, Query, SourceInfo
21
+
22
+ __all__ = ["Entity", "EmailResult", "Lead", "Query", "SourceInfo", "__version__"]
openleads/__main__.py ADDED
@@ -0,0 +1,5 @@
1
+ """Enable ``python -m openleads``."""
2
+ from openleads.cli import main
3
+
4
+ if __name__ == "__main__":
5
+ main()
openleads/_http.py ADDED
@@ -0,0 +1,54 @@
1
+ """
2
+ Tiny stdlib HTTP helpers shared by sources. Optionally dataset-cached.
3
+
4
+ Keeps every source free of urllib boilerplate while honoring the cache (so a
5
+ large dataset like the YC dump is fetched once per day, not per run).
6
+ """
7
+ from __future__ import annotations
8
+
9
+ import json
10
+ import urllib.error
11
+ import urllib.request
12
+
13
+ from openleads.config import USER_AGENT
14
+
15
+
16
+ def _open(url: str, headers: dict | None = None, timeout: int = 60) -> str:
17
+ h = {"User-Agent": USER_AGENT}
18
+ if headers:
19
+ h.update(headers)
20
+ req = urllib.request.Request(url, headers=h)
21
+ with urllib.request.urlopen(req, timeout=timeout) as r:
22
+ return r.read().decode("utf-8", "ignore")
23
+
24
+
25
+ def get_text(url: str, headers: dict | None = None, timeout: int = 60,
26
+ cache=None, ttl_ns: str | None = None) -> str | None:
27
+ """GET a URL as text. Returns None on any error. Cached under ``ttl_ns`` if given."""
28
+ if cache and ttl_ns:
29
+ hit = cache.get(ttl_ns, url)
30
+ if hit is not None:
31
+ return hit
32
+ try:
33
+ text = _open(url, headers, timeout)
34
+ except (urllib.error.URLError, urllib.error.HTTPError, OSError, ValueError):
35
+ return None
36
+ if cache and ttl_ns:
37
+ cache.set(ttl_ns, url, text)
38
+ return text
39
+
40
+
41
+ def get_json(url: str, headers: dict | None = None, timeout: int = 60,
42
+ cache=None, ttl_ns: str | None = None):
43
+ """GET a URL and parse JSON. Returns None on any error. Cached under ``ttl_ns`` if given."""
44
+ if cache and ttl_ns:
45
+ hit = cache.get(ttl_ns, url)
46
+ if hit is not None:
47
+ return hit
48
+ try:
49
+ data = json.loads(_open(url, headers, timeout))
50
+ except (urllib.error.URLError, urllib.error.HTTPError, OSError, ValueError):
51
+ return None
52
+ if cache and ttl_ns:
53
+ cache.set(ttl_ns, url, data)
54
+ return data
@@ -0,0 +1,4 @@
1
+ """SQLite-backed cache so domains/mailservers/datasets aren't re-probed each run."""
2
+ from openleads.cache.store import Cache
3
+
4
+ __all__ = ["Cache"]
@@ -0,0 +1,85 @@
1
+ """
2
+ A tiny, dependency-free cache built on stdlib ``sqlite3``.
3
+
4
+ Values are JSON-serialized and stored per namespace with a per-namespace TTL:
5
+
6
+ * ``mx`` — MX lookup results, 7 days
7
+ * ``verify`` — SMTP verification outcomes, 14 days
8
+ * ``dataset`` — large source fetches (e.g. the YC dump), 1 day
9
+
10
+ A cache hit short-circuits the network, which is both a big speedup on re-runs
11
+ and the polite thing to do to mail servers. Disable with ``--no-cache``.
12
+ """
13
+ from __future__ import annotations
14
+
15
+ import json
16
+ import sqlite3
17
+ import time
18
+
19
+ from openleads.config import cache_path
20
+
21
+ DAY = 86400
22
+ DEFAULT_TTLS = {"mx": 7 * DAY, "verify": 14 * DAY, "dataset": 1 * DAY}
23
+
24
+
25
+ class Cache:
26
+ def __init__(self, path=None, ttls: dict | None = None):
27
+ self.path = str(path) if path else str(cache_path())
28
+ self.ttls = dict(DEFAULT_TTLS)
29
+ if ttls:
30
+ self.ttls.update(ttls)
31
+ self._conn = sqlite3.connect(self.path)
32
+ self._conn.execute(
33
+ "CREATE TABLE IF NOT EXISTS cache ("
34
+ " ns TEXT NOT NULL, k TEXT NOT NULL, v TEXT NOT NULL,"
35
+ " ts REAL NOT NULL, PRIMARY KEY (ns, k))"
36
+ )
37
+ self._conn.commit()
38
+
39
+ def ttl_for(self, ns: str) -> int:
40
+ return self.ttls.get(ns, DAY)
41
+
42
+ def get(self, ns: str, key: str):
43
+ """Return the cached value for (ns, key) if fresh, else None."""
44
+ row = self._conn.execute(
45
+ "SELECT v, ts FROM cache WHERE ns=? AND k=?", (ns, key)
46
+ ).fetchone()
47
+ if not row:
48
+ return None
49
+ value_json, ts = row
50
+ if time.time() - ts > self.ttl_for(ns):
51
+ self._conn.execute("DELETE FROM cache WHERE ns=? AND k=?", (ns, key))
52
+ self._conn.commit()
53
+ return None
54
+ try:
55
+ return json.loads(value_json)
56
+ except (ValueError, TypeError):
57
+ return None
58
+
59
+ def set(self, ns: str, key: str, value) -> None:
60
+ self._conn.execute(
61
+ "INSERT OR REPLACE INTO cache (ns, k, v, ts) VALUES (?, ?, ?, ?)",
62
+ (ns, key, json.dumps(value), time.time()),
63
+ )
64
+ self._conn.commit()
65
+
66
+ def clear(self) -> int:
67
+ """Delete all cached rows. Returns how many were removed."""
68
+ cur = self._conn.execute("SELECT COUNT(*) FROM cache")
69
+ n = cur.fetchone()[0]
70
+ self._conn.execute("DELETE FROM cache")
71
+ self._conn.commit()
72
+ return n
73
+
74
+ def info(self) -> dict:
75
+ """Counts per namespace, for ``openleads cache info``."""
76
+ rows = self._conn.execute(
77
+ "SELECT ns, COUNT(*) FROM cache GROUP BY ns"
78
+ ).fetchall()
79
+ return {"path": self.path, "counts": {ns: c for ns, c in rows}}
80
+
81
+ def close(self) -> None:
82
+ try:
83
+ self._conn.close()
84
+ except Exception:
85
+ pass
openleads/campaign.py ADDED
@@ -0,0 +1,283 @@
1
+ """
2
+ Cold-email companion (optional). Turns a leads file into personalized outreach.
3
+
4
+ This is the *only* part of OpenLeads that touches paid-optional services and your
5
+ mailbox — it's opt-in and dry-run by default. The core lead engine never sends
6
+ anything.
7
+
8
+ Install with the extra: ``pip install 'openleads[campaign]'``
9
+ Configure via environment (see ``.env.example``):
10
+
11
+ OPENROUTER_API_KEY free/cheap LLM for drafting
12
+ SMTP_USER, SMTP_PASS your mailbox
13
+ SMTP_HOST, SMTP_PORT default mail.example.com:465 (SSL)
14
+ SENDER_NAME your name in the From header
15
+ CAMPAIGN_ORG who you represent
16
+ CAMPAIGN_CONTEXT a few lines pitching what you're reaching out about
17
+
18
+ Run: ``openleads campaign`` (dry run) · ``openleads campaign --live`` (send)
19
+ """
20
+ from __future__ import annotations
21
+
22
+ import csv
23
+ import os
24
+ import re
25
+ import smtplib
26
+ import time
27
+ from datetime import datetime
28
+ from email.mime.multipart import MIMEMultipart
29
+ from email.mime.text import MIMEText
30
+ from email.utils import formatdate, make_msgid
31
+
32
+ OPENROUTER_URL = "https://openrouter.ai/api/v1/chat/completions"
33
+
34
+ # --- pure text helpers (unit-tested, no I/O) ------------------------------- #
35
+ PLACEHOLDER_RE = re.compile(r"[\[\{][^\]\}]{0,50}[\]\}]") # [anything] or {anything}
36
+
37
+
38
+ def clean_dashes(text: str) -> str:
39
+ """Normalize exotic Unicode punctuation/spaces to plain ASCII (outreach style)."""
40
+ repl = {
41
+ "—": ",", "–": ",",
42
+ "‑": "-", "‐": "-", "−": "-",
43
+ "’": "'", "‘": "'",
44
+ "“": '"', "”": '"',
45
+ "…": "...",
46
+ }
47
+ for k, v in repl.items():
48
+ text = text.replace(k, v)
49
+ text = re.sub(r"[  -    ]", " ", text)
50
+ text = re.sub(r"[​‌‍]", "", text)
51
+ return text
52
+
53
+
54
+ def has_placeholder(text: str) -> bool:
55
+ return bool(PLACEHOLDER_RE.search(text))
56
+
57
+
58
+ def strip_placeholders(text: str) -> str:
59
+ return re.sub(r"\s*" + PLACEHOLDER_RE.pattern, "", text).strip()
60
+
61
+
62
+ def format_body(body: str, first_name: str) -> str:
63
+ """Guarantee a greeting line and blank lines between paragraphs."""
64
+ body = (body or "").strip()
65
+ body = re.sub(r"\n[ \t]+", "\n", body)
66
+ body = re.sub(r"\n{3,}", "\n\n", body)
67
+ lines = body.split("\n")
68
+ first_line = lines[0].strip().lower() if lines else ""
69
+ if any(first_line.startswith(w) for w in ("hi", "hey", "hello", "dear")):
70
+ if len(lines) > 1 and lines[1].strip() != "":
71
+ lines.insert(1, "")
72
+ body = "\n".join(lines)
73
+ else:
74
+ name = (first_name or "").strip() or "there"
75
+ body = f"Hey {name},\n\n{body}"
76
+ return re.sub(r"\n{3,}", "\n\n", body).strip()
77
+
78
+
79
+ def parse_response(response: str, company: str) -> tuple[str, str]:
80
+ if "SUBJECT:" in response and "EMAIL:" in response:
81
+ parts = response.split("EMAIL:", 1)
82
+ subject = parts[0].replace("SUBJECT:", "").strip().split("\n")[0].strip()
83
+ body = parts[1].strip()
84
+ else:
85
+ subject = f"Quick note for {company}".strip()
86
+ body = response.strip()
87
+ return subject, body
88
+
89
+
90
+ # --- config (lazy: read only when actually running) ------------------------ #
91
+ def _load_env():
92
+ try:
93
+ from dotenv import load_dotenv
94
+ load_dotenv()
95
+ except Exception:
96
+ pass
97
+
98
+
99
+ def _config() -> dict:
100
+ _load_env()
101
+ return {
102
+ "api_key": os.environ.get("OPENROUTER_API_KEY", ""),
103
+ "model": os.environ.get("OPENROUTER_MODEL", "openai/gpt-oss-120b:free"),
104
+ "smtp_user": os.environ.get("SMTP_USER") or os.environ.get("PRIVATEMAIL_USER", ""),
105
+ "smtp_pass": os.environ.get("SMTP_PASS") or os.environ.get("PRIVATEMAIL_PASS", ""),
106
+ "smtp_host": os.environ.get("SMTP_HOST", "mail.example.com"),
107
+ "smtp_port": int(os.environ.get("SMTP_PORT", "465")),
108
+ "sender": os.environ.get("SENDER_NAME", "Me"),
109
+ "org": os.environ.get("CAMPAIGN_ORG", "our team"),
110
+ "context": os.environ.get("CAMPAIGN_CONTEXT",
111
+ "We're reaching out about a potential collaboration."),
112
+ "max_leads": int(os.environ.get("CAMPAIGN_MAX", "60")),
113
+ }
114
+
115
+
116
+ def build_prompt(lead: dict, cfg: dict) -> str:
117
+ first = lead.get("first_name") or "there"
118
+ company = lead.get("company", "")
119
+ loc = f"{lead.get('city','')}, {lead.get('country','')}".strip(", ")
120
+ li = lead.get("linkedin_url") or "not available"
121
+ return f"""Act like a world-class cold emailer with 20+ years of experience, writing on behalf of {cfg['org']}.
122
+
123
+ LEAD (use these REAL values; never write a placeholder):
124
+ - First name: {first}
125
+ - Full name: {lead.get('first_name','')} {lead.get('last_name','')}
126
+ - Title: {lead.get('title','')}
127
+ - Company: {company}
128
+ - Industry: {lead.get('industry','')}
129
+ - Location: {loc}
130
+ - LinkedIn: {li}
131
+
132
+ ABOUT {cfg['org']}:
133
+ {cfg['context']}
134
+
135
+ OUTPUT ONLY this exact format:
136
+
137
+ SUBJECT: <one short subject line>
138
+
139
+ EMAIL:
140
+ Hey {first},
141
+
142
+ <paragraph one>
143
+
144
+ <paragraph two>
145
+
146
+ Best,
147
+ {cfg['sender']}
148
+
149
+ RULES:
150
+ - Write the ACTUAL name "{first}" and company "{company}". NEVER output [brackets] or {{braces}} placeholders. If unsure of a detail, leave it out.
151
+ - Blank line between greeting, each paragraph, and the signature.
152
+ - Under 120 words, punchy, human, specific. No em dashes (use commas)."""
153
+
154
+
155
+ def get_leads(csv_path: str, max_leads: int) -> list[dict]:
156
+ if not os.path.exists(csv_path):
157
+ raise SystemExit(f"[error] {csv_path} not found. Run `openleads find` first.")
158
+ leads = []
159
+ with open(csv_path, newline="", encoding="utf-8") as f:
160
+ for row in csv.DictReader(f):
161
+ email = (row.get("Email") or "").strip()
162
+ if not email:
163
+ continue
164
+ leads.append({
165
+ "first_name": (row.get("First Name") or "").strip(),
166
+ "last_name": (row.get("Last Name") or "").strip(),
167
+ "email": email,
168
+ "title": (row.get("Title") or "").strip(),
169
+ "company": (row.get("Organization Name") or row.get("Company") or "").strip(),
170
+ "industry": (row.get("Industry") or "").strip(),
171
+ "city": (row.get("City") or "").strip(),
172
+ "country": (row.get("Country") or "").strip(),
173
+ "linkedin_url": (row.get("LinkedIn Url") or "").strip(),
174
+ })
175
+ return leads[:max_leads]
176
+
177
+
178
+ def call_llm(prompt: str, cfg: dict, max_tokens: int = 700) -> str:
179
+ import requests
180
+ headers = {"Authorization": f"Bearer {cfg['api_key']}", "Content-Type": "application/json"}
181
+ body = {"model": cfg["model"], "max_tokens": max_tokens, "temperature": 0.85,
182
+ "messages": [{"role": "user", "content": prompt}]}
183
+ for attempt in range(5):
184
+ res = requests.post(OPENROUTER_URL, headers=headers, json=body, timeout=60)
185
+ if res.status_code == 429:
186
+ time.sleep(15 * (attempt + 1))
187
+ continue
188
+ res.raise_for_status()
189
+ break
190
+ else:
191
+ raise RuntimeError("rate limited after 5 retries")
192
+ raw = res.json()["choices"][0]["message"]["content"].strip()
193
+ raw = re.sub(r"<think>.*?</think>", "", raw, flags=re.DOTALL).strip()
194
+ return raw
195
+
196
+
197
+ def generate(lead: dict, cfg: dict) -> dict:
198
+ prompt = build_prompt(lead, cfg)
199
+ result = {"subject": "", "body": ""}
200
+ for attempt in range(3):
201
+ response = call_llm(prompt, cfg)
202
+ subject, body = parse_response(response, lead.get("company", ""))
203
+ subject = clean_dashes(subject)
204
+ body = format_body(clean_dashes(body), lead.get("first_name", ""))
205
+ result = {"subject": subject, "body": body}
206
+ if not has_placeholder(subject) and not has_placeholder(body):
207
+ return result
208
+ result["subject"] = strip_placeholders(result["subject"]) or f"Note for {lead.get('company','')}"
209
+ result["body"] = format_body(strip_placeholders(result["body"]), lead.get("first_name", ""))
210
+ return result
211
+
212
+
213
+ def send_email(lead_email: str, subject: str, body: str, cfg: dict) -> None:
214
+ msg = MIMEMultipart("alternative")
215
+ msg["Subject"] = subject
216
+ msg["From"] = f"{cfg['sender']} <{cfg['smtp_user']}>"
217
+ msg["To"] = lead_email
218
+ msg["Date"] = formatdate(localtime=True)
219
+ msg["Message-ID"] = make_msgid(domain=cfg["smtp_user"].split("@")[-1] or "localhost")
220
+ msg.attach(MIMEText(body, "plain"))
221
+ with smtplib.SMTP_SSL(cfg["smtp_host"], cfg["smtp_port"]) as server:
222
+ server.login(cfg["smtp_user"], cfg["smtp_pass"])
223
+ server.sendmail(cfg["smtp_user"], lead_email, msg.as_string())
224
+
225
+
226
+ def run_campaign(dry_run: bool = True, leads_path: str = "leads.csv") -> int:
227
+ cfg = _config()
228
+ if not cfg["api_key"]:
229
+ raise SystemExit("[error] set OPENROUTER_API_KEY (see .env.example).")
230
+ leads = get_leads(leads_path, cfg["max_leads"])
231
+ print("=" * 60)
232
+ print(f" OpenLeads campaign · model: {cfg['model']}")
233
+ print(f" mode: {'DRY RUN (no send)' if dry_run else 'LIVE SEND'} · leads: {len(leads)}")
234
+ print("=" * 60 + "\n")
235
+
236
+ results = []
237
+ for i, lead in enumerate(leads, 1):
238
+ print(f"[{i}/{len(leads)}] {lead['first_name']} {lead['last_name']} "
239
+ f"| {lead['title']} @ {lead['company']} <{lead['email']}>")
240
+ try:
241
+ gen = generate(lead, cfg)
242
+ print(f" subject: {gen['subject']}\n ---\n{gen['body']}\n")
243
+ status = "preview"
244
+ if not dry_run:
245
+ send_email(lead["email"], gen["subject"], gen["body"], cfg)
246
+ status = "sent"
247
+ print(" sent!\n")
248
+ results.append({**lead, **gen, "status": status,
249
+ "timestamp": datetime.now().isoformat()})
250
+ except Exception as e:
251
+ print(f" error: {e}\n")
252
+ results.append({**lead, "subject": "", "body": "",
253
+ "status": f"error: {e}", "timestamp": datetime.now().isoformat()})
254
+ if i < len(leads):
255
+ time.sleep(4)
256
+
257
+ ts = datetime.now().strftime("%Y%m%d_%H%M%S")
258
+ out = f"campaign_{ts}.csv"
259
+ fields = ["first_name", "last_name", "email", "title", "company",
260
+ "subject", "body", "status", "timestamp"]
261
+ with open(out, "w", newline="", encoding="utf-8") as f:
262
+ w = csv.DictWriter(f, fieldnames=fields)
263
+ w.writeheader()
264
+ for r in results:
265
+ w.writerow({k: r.get(k, "") for k in fields})
266
+ print(f"[log] {out} · {sum(1 for r in results if r['status'] in ('sent', 'preview'))} processed")
267
+ return 0
268
+
269
+
270
+ def main(argv=None) -> int:
271
+ import argparse
272
+ argv = argv if argv is not None else []
273
+ p = argparse.ArgumentParser(prog="openleads campaign",
274
+ description="Personalized cold-email companion (opt-in).")
275
+ p.add_argument("--live", "--send", action="store_true", dest="live",
276
+ help="actually send (default is a dry-run preview)")
277
+ p.add_argument("--leads", default="leads.csv", help="leads CSV path")
278
+ args = p.parse_args([a for a in argv if a])
279
+ return run_campaign(dry_run=not args.live, leads_path=args.leads)
280
+
281
+
282
+ if __name__ == "__main__":
283
+ raise SystemExit(main(__import__("sys").argv[1:]))