openleads 2.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- automation.py +14 -0
- lead_engine.py +48 -0
- openleads/__init__.py +22 -0
- openleads/__main__.py +5 -0
- openleads/_http.py +54 -0
- openleads/cache/__init__.py +4 -0
- openleads/cache/store.py +85 -0
- openleads/campaign.py +283 -0
- openleads/chat.py +291 -0
- openleads/cli.py +211 -0
- openleads/config.py +59 -0
- openleads/emails/__init__.py +15 -0
- openleads/emails/mx.py +84 -0
- openleads/emails/permute.py +87 -0
- openleads/emails/resolve.py +156 -0
- openleads/emails/smtp_verify.py +68 -0
- openleads/engine.py +83 -0
- openleads/intent.py +208 -0
- openleads/models.py +160 -0
- openleads/sources/__init__.py +88 -0
- openleads/sources/base.py +42 -0
- openleads/sources/github.py +92 -0
- openleads/sources/npi.py +100 -0
- openleads/sources/openalex.py +92 -0
- openleads/sources/producthunt.py +90 -0
- openleads/sources/yc.py +160 -0
- openleads/ui.py +43 -0
- openleads/writers.py +67 -0
- openleads-2.0.0.dist-info/METADATA +256 -0
- openleads-2.0.0.dist-info/RECORD +34 -0
- openleads-2.0.0.dist-info/WHEEL +5 -0
- openleads-2.0.0.dist-info/entry_points.txt +2 -0
- openleads-2.0.0.dist-info/licenses/LICENSE +133 -0
- openleads-2.0.0.dist-info/top_level.txt +3 -0
automation.py
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Back-compat shim. The outreach companion moved to ``openleads.campaign`` in v2.0.
|
|
3
|
+
|
|
4
|
+
python automation.py # dry-run preview (was the old behavior)
|
|
5
|
+
python automation.py --live # send
|
|
6
|
+
|
|
7
|
+
Prefer: ``openleads campaign`` / ``openleads campaign --live``.
|
|
8
|
+
"""
|
|
9
|
+
import sys
|
|
10
|
+
|
|
11
|
+
from openleads.campaign import main
|
|
12
|
+
|
|
13
|
+
if __name__ == "__main__":
|
|
14
|
+
raise SystemExit(main(sys.argv[1:]))
|
lead_engine.py
ADDED
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Back-compat shim for OpenLeads v1.
|
|
3
|
+
|
|
4
|
+
The engine moved into the installable ``openleads`` package in v2.0. This module
|
|
5
|
+
re-exports the v1 public helpers and forwards the old CLI to ``openleads find``
|
|
6
|
+
so existing scripts keep working. Prefer the new entry point:
|
|
7
|
+
|
|
8
|
+
openleads find "20 founders" # or: python -m openleads find ...
|
|
9
|
+
"""
|
|
10
|
+
from __future__ import annotations
|
|
11
|
+
|
|
12
|
+
import sys
|
|
13
|
+
|
|
14
|
+
# Re-export the v1 public API from its new homes (behavior unchanged).
|
|
15
|
+
from openleads.emails.permute import ( # noqa: F401
|
|
16
|
+
candidate_emails,
|
|
17
|
+
domain_of,
|
|
18
|
+
name_parts,
|
|
19
|
+
)
|
|
20
|
+
from openleads.emails.resolve import find_email # noqa: F401
|
|
21
|
+
from openleads.sources.yc import ( # noqa: F401
|
|
22
|
+
pick_exec,
|
|
23
|
+
split_location,
|
|
24
|
+
)
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def _translate(argv: list[str]) -> list[str]:
|
|
28
|
+
"""Map v1 flags onto the new ``find`` subcommand."""
|
|
29
|
+
out: list[str] = []
|
|
30
|
+
for a in argv:
|
|
31
|
+
if a == "--no-write":
|
|
32
|
+
out += ["--out", "-"] # v1 'print only' ≈ write CSV to stdout
|
|
33
|
+
else:
|
|
34
|
+
out.append(a)
|
|
35
|
+
return out
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def main() -> int:
|
|
39
|
+
sys.stderr.write(
|
|
40
|
+
"[deprecation] `lead_engine.py` is now a shim. Use `openleads find ...` "
|
|
41
|
+
"(or `python -m openleads find ...`).\n"
|
|
42
|
+
)
|
|
43
|
+
from openleads.cli import main as cli_main
|
|
44
|
+
return cli_main(["find"] + _translate(sys.argv[1:]))
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
if __name__ == "__main__":
|
|
48
|
+
raise SystemExit(main())
|
openleads/__init__.py
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
"""
|
|
2
|
+
OpenLeads — the free, open-source Apollo alternative.
|
|
3
|
+
|
|
4
|
+
A universal `entity -> verified email` engine fed by a registry of pluggable,
|
|
5
|
+
free, keyless public data sources. Find founders, developers, doctors,
|
|
6
|
+
researchers — anyone — and verify their email over SMTP, using only public data.
|
|
7
|
+
|
|
8
|
+
Core library is 100% Python standard library (zero runtime dependencies).
|
|
9
|
+
The pretty chat TUI lives behind the optional ``[chat]`` extra; sending behind
|
|
10
|
+
``[campaign]``.
|
|
11
|
+
|
|
12
|
+
Public API:
|
|
13
|
+
from openleads import Query, Lead, Entity, EmailResult
|
|
14
|
+
from openleads.engine import build_leads
|
|
15
|
+
from openleads.sources import get_registry
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
__version__ = "2.0.0"
|
|
19
|
+
|
|
20
|
+
from openleads.models import EmailResult, Entity, Lead, Query, SourceInfo
|
|
21
|
+
|
|
22
|
+
__all__ = ["Entity", "EmailResult", "Lead", "Query", "SourceInfo", "__version__"]
|
openleads/__main__.py
ADDED
openleads/_http.py
ADDED
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Tiny stdlib HTTP helpers shared by sources. Optionally dataset-cached.
|
|
3
|
+
|
|
4
|
+
Keeps every source free of urllib boilerplate while honoring the cache (so a
|
|
5
|
+
large dataset like the YC dump is fetched once per day, not per run).
|
|
6
|
+
"""
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
import json
|
|
10
|
+
import urllib.error
|
|
11
|
+
import urllib.request
|
|
12
|
+
|
|
13
|
+
from openleads.config import USER_AGENT
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def _open(url: str, headers: dict | None = None, timeout: int = 60) -> str:
|
|
17
|
+
h = {"User-Agent": USER_AGENT}
|
|
18
|
+
if headers:
|
|
19
|
+
h.update(headers)
|
|
20
|
+
req = urllib.request.Request(url, headers=h)
|
|
21
|
+
with urllib.request.urlopen(req, timeout=timeout) as r:
|
|
22
|
+
return r.read().decode("utf-8", "ignore")
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def get_text(url: str, headers: dict | None = None, timeout: int = 60,
|
|
26
|
+
cache=None, ttl_ns: str | None = None) -> str | None:
|
|
27
|
+
"""GET a URL as text. Returns None on any error. Cached under ``ttl_ns`` if given."""
|
|
28
|
+
if cache and ttl_ns:
|
|
29
|
+
hit = cache.get(ttl_ns, url)
|
|
30
|
+
if hit is not None:
|
|
31
|
+
return hit
|
|
32
|
+
try:
|
|
33
|
+
text = _open(url, headers, timeout)
|
|
34
|
+
except (urllib.error.URLError, urllib.error.HTTPError, OSError, ValueError):
|
|
35
|
+
return None
|
|
36
|
+
if cache and ttl_ns:
|
|
37
|
+
cache.set(ttl_ns, url, text)
|
|
38
|
+
return text
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def get_json(url: str, headers: dict | None = None, timeout: int = 60,
|
|
42
|
+
cache=None, ttl_ns: str | None = None):
|
|
43
|
+
"""GET a URL and parse JSON. Returns None on any error. Cached under ``ttl_ns`` if given."""
|
|
44
|
+
if cache and ttl_ns:
|
|
45
|
+
hit = cache.get(ttl_ns, url)
|
|
46
|
+
if hit is not None:
|
|
47
|
+
return hit
|
|
48
|
+
try:
|
|
49
|
+
data = json.loads(_open(url, headers, timeout))
|
|
50
|
+
except (urllib.error.URLError, urllib.error.HTTPError, OSError, ValueError):
|
|
51
|
+
return None
|
|
52
|
+
if cache and ttl_ns:
|
|
53
|
+
cache.set(ttl_ns, url, data)
|
|
54
|
+
return data
|
openleads/cache/store.py
ADDED
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
"""
|
|
2
|
+
A tiny, dependency-free cache built on stdlib ``sqlite3``.
|
|
3
|
+
|
|
4
|
+
Values are JSON-serialized and stored per namespace with a per-namespace TTL:
|
|
5
|
+
|
|
6
|
+
* ``mx`` — MX lookup results, 7 days
|
|
7
|
+
* ``verify`` — SMTP verification outcomes, 14 days
|
|
8
|
+
* ``dataset`` — large source fetches (e.g. the YC dump), 1 day
|
|
9
|
+
|
|
10
|
+
A cache hit short-circuits the network, which is both a big speedup on re-runs
|
|
11
|
+
and the polite thing to do to mail servers. Disable with ``--no-cache``.
|
|
12
|
+
"""
|
|
13
|
+
from __future__ import annotations
|
|
14
|
+
|
|
15
|
+
import json
|
|
16
|
+
import sqlite3
|
|
17
|
+
import time
|
|
18
|
+
|
|
19
|
+
from openleads.config import cache_path
|
|
20
|
+
|
|
21
|
+
DAY = 86400
|
|
22
|
+
DEFAULT_TTLS = {"mx": 7 * DAY, "verify": 14 * DAY, "dataset": 1 * DAY}
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class Cache:
|
|
26
|
+
def __init__(self, path=None, ttls: dict | None = None):
|
|
27
|
+
self.path = str(path) if path else str(cache_path())
|
|
28
|
+
self.ttls = dict(DEFAULT_TTLS)
|
|
29
|
+
if ttls:
|
|
30
|
+
self.ttls.update(ttls)
|
|
31
|
+
self._conn = sqlite3.connect(self.path)
|
|
32
|
+
self._conn.execute(
|
|
33
|
+
"CREATE TABLE IF NOT EXISTS cache ("
|
|
34
|
+
" ns TEXT NOT NULL, k TEXT NOT NULL, v TEXT NOT NULL,"
|
|
35
|
+
" ts REAL NOT NULL, PRIMARY KEY (ns, k))"
|
|
36
|
+
)
|
|
37
|
+
self._conn.commit()
|
|
38
|
+
|
|
39
|
+
def ttl_for(self, ns: str) -> int:
|
|
40
|
+
return self.ttls.get(ns, DAY)
|
|
41
|
+
|
|
42
|
+
def get(self, ns: str, key: str):
|
|
43
|
+
"""Return the cached value for (ns, key) if fresh, else None."""
|
|
44
|
+
row = self._conn.execute(
|
|
45
|
+
"SELECT v, ts FROM cache WHERE ns=? AND k=?", (ns, key)
|
|
46
|
+
).fetchone()
|
|
47
|
+
if not row:
|
|
48
|
+
return None
|
|
49
|
+
value_json, ts = row
|
|
50
|
+
if time.time() - ts > self.ttl_for(ns):
|
|
51
|
+
self._conn.execute("DELETE FROM cache WHERE ns=? AND k=?", (ns, key))
|
|
52
|
+
self._conn.commit()
|
|
53
|
+
return None
|
|
54
|
+
try:
|
|
55
|
+
return json.loads(value_json)
|
|
56
|
+
except (ValueError, TypeError):
|
|
57
|
+
return None
|
|
58
|
+
|
|
59
|
+
def set(self, ns: str, key: str, value) -> None:
|
|
60
|
+
self._conn.execute(
|
|
61
|
+
"INSERT OR REPLACE INTO cache (ns, k, v, ts) VALUES (?, ?, ?, ?)",
|
|
62
|
+
(ns, key, json.dumps(value), time.time()),
|
|
63
|
+
)
|
|
64
|
+
self._conn.commit()
|
|
65
|
+
|
|
66
|
+
def clear(self) -> int:
|
|
67
|
+
"""Delete all cached rows. Returns how many were removed."""
|
|
68
|
+
cur = self._conn.execute("SELECT COUNT(*) FROM cache")
|
|
69
|
+
n = cur.fetchone()[0]
|
|
70
|
+
self._conn.execute("DELETE FROM cache")
|
|
71
|
+
self._conn.commit()
|
|
72
|
+
return n
|
|
73
|
+
|
|
74
|
+
def info(self) -> dict:
|
|
75
|
+
"""Counts per namespace, for ``openleads cache info``."""
|
|
76
|
+
rows = self._conn.execute(
|
|
77
|
+
"SELECT ns, COUNT(*) FROM cache GROUP BY ns"
|
|
78
|
+
).fetchall()
|
|
79
|
+
return {"path": self.path, "counts": {ns: c for ns, c in rows}}
|
|
80
|
+
|
|
81
|
+
def close(self) -> None:
|
|
82
|
+
try:
|
|
83
|
+
self._conn.close()
|
|
84
|
+
except Exception:
|
|
85
|
+
pass
|
openleads/campaign.py
ADDED
|
@@ -0,0 +1,283 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Cold-email companion (optional). Turns a leads file into personalized outreach.
|
|
3
|
+
|
|
4
|
+
This is the *only* part of OpenLeads that touches paid-optional services and your
|
|
5
|
+
mailbox — it's opt-in and dry-run by default. The core lead engine never sends
|
|
6
|
+
anything.
|
|
7
|
+
|
|
8
|
+
Install with the extra: ``pip install 'openleads[campaign]'``
|
|
9
|
+
Configure via environment (see ``.env.example``):
|
|
10
|
+
|
|
11
|
+
OPENROUTER_API_KEY free/cheap LLM for drafting
|
|
12
|
+
SMTP_USER, SMTP_PASS your mailbox
|
|
13
|
+
SMTP_HOST, SMTP_PORT default mail.example.com:465 (SSL)
|
|
14
|
+
SENDER_NAME your name in the From header
|
|
15
|
+
CAMPAIGN_ORG who you represent
|
|
16
|
+
CAMPAIGN_CONTEXT a few lines pitching what you're reaching out about
|
|
17
|
+
|
|
18
|
+
Run: ``openleads campaign`` (dry run) · ``openleads campaign --live`` (send)
|
|
19
|
+
"""
|
|
20
|
+
from __future__ import annotations
|
|
21
|
+
|
|
22
|
+
import csv
|
|
23
|
+
import os
|
|
24
|
+
import re
|
|
25
|
+
import smtplib
|
|
26
|
+
import time
|
|
27
|
+
from datetime import datetime
|
|
28
|
+
from email.mime.multipart import MIMEMultipart
|
|
29
|
+
from email.mime.text import MIMEText
|
|
30
|
+
from email.utils import formatdate, make_msgid
|
|
31
|
+
|
|
32
|
+
OPENROUTER_URL = "https://openrouter.ai/api/v1/chat/completions"
|
|
33
|
+
|
|
34
|
+
# --- pure text helpers (unit-tested, no I/O) ------------------------------- #
|
|
35
|
+
PLACEHOLDER_RE = re.compile(r"[\[\{][^\]\}]{0,50}[\]\}]") # [anything] or {anything}
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def clean_dashes(text: str) -> str:
|
|
39
|
+
"""Normalize exotic Unicode punctuation/spaces to plain ASCII (outreach style)."""
|
|
40
|
+
repl = {
|
|
41
|
+
"—": ",", "–": ",",
|
|
42
|
+
"‑": "-", "‐": "-", "−": "-",
|
|
43
|
+
"’": "'", "‘": "'",
|
|
44
|
+
"“": '"', "”": '"',
|
|
45
|
+
"…": "...",
|
|
46
|
+
}
|
|
47
|
+
for k, v in repl.items():
|
|
48
|
+
text = text.replace(k, v)
|
|
49
|
+
text = re.sub(r"[ - ]", " ", text)
|
|
50
|
+
text = re.sub(r"[]", "", text)
|
|
51
|
+
return text
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def has_placeholder(text: str) -> bool:
|
|
55
|
+
return bool(PLACEHOLDER_RE.search(text))
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def strip_placeholders(text: str) -> str:
|
|
59
|
+
return re.sub(r"\s*" + PLACEHOLDER_RE.pattern, "", text).strip()
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def format_body(body: str, first_name: str) -> str:
|
|
63
|
+
"""Guarantee a greeting line and blank lines between paragraphs."""
|
|
64
|
+
body = (body or "").strip()
|
|
65
|
+
body = re.sub(r"\n[ \t]+", "\n", body)
|
|
66
|
+
body = re.sub(r"\n{3,}", "\n\n", body)
|
|
67
|
+
lines = body.split("\n")
|
|
68
|
+
first_line = lines[0].strip().lower() if lines else ""
|
|
69
|
+
if any(first_line.startswith(w) for w in ("hi", "hey", "hello", "dear")):
|
|
70
|
+
if len(lines) > 1 and lines[1].strip() != "":
|
|
71
|
+
lines.insert(1, "")
|
|
72
|
+
body = "\n".join(lines)
|
|
73
|
+
else:
|
|
74
|
+
name = (first_name or "").strip() or "there"
|
|
75
|
+
body = f"Hey {name},\n\n{body}"
|
|
76
|
+
return re.sub(r"\n{3,}", "\n\n", body).strip()
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
def parse_response(response: str, company: str) -> tuple[str, str]:
|
|
80
|
+
if "SUBJECT:" in response and "EMAIL:" in response:
|
|
81
|
+
parts = response.split("EMAIL:", 1)
|
|
82
|
+
subject = parts[0].replace("SUBJECT:", "").strip().split("\n")[0].strip()
|
|
83
|
+
body = parts[1].strip()
|
|
84
|
+
else:
|
|
85
|
+
subject = f"Quick note for {company}".strip()
|
|
86
|
+
body = response.strip()
|
|
87
|
+
return subject, body
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
# --- config (lazy: read only when actually running) ------------------------ #
|
|
91
|
+
def _load_env():
|
|
92
|
+
try:
|
|
93
|
+
from dotenv import load_dotenv
|
|
94
|
+
load_dotenv()
|
|
95
|
+
except Exception:
|
|
96
|
+
pass
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
def _config() -> dict:
|
|
100
|
+
_load_env()
|
|
101
|
+
return {
|
|
102
|
+
"api_key": os.environ.get("OPENROUTER_API_KEY", ""),
|
|
103
|
+
"model": os.environ.get("OPENROUTER_MODEL", "openai/gpt-oss-120b:free"),
|
|
104
|
+
"smtp_user": os.environ.get("SMTP_USER") or os.environ.get("PRIVATEMAIL_USER", ""),
|
|
105
|
+
"smtp_pass": os.environ.get("SMTP_PASS") or os.environ.get("PRIVATEMAIL_PASS", ""),
|
|
106
|
+
"smtp_host": os.environ.get("SMTP_HOST", "mail.example.com"),
|
|
107
|
+
"smtp_port": int(os.environ.get("SMTP_PORT", "465")),
|
|
108
|
+
"sender": os.environ.get("SENDER_NAME", "Me"),
|
|
109
|
+
"org": os.environ.get("CAMPAIGN_ORG", "our team"),
|
|
110
|
+
"context": os.environ.get("CAMPAIGN_CONTEXT",
|
|
111
|
+
"We're reaching out about a potential collaboration."),
|
|
112
|
+
"max_leads": int(os.environ.get("CAMPAIGN_MAX", "60")),
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
def build_prompt(lead: dict, cfg: dict) -> str:
|
|
117
|
+
first = lead.get("first_name") or "there"
|
|
118
|
+
company = lead.get("company", "")
|
|
119
|
+
loc = f"{lead.get('city','')}, {lead.get('country','')}".strip(", ")
|
|
120
|
+
li = lead.get("linkedin_url") or "not available"
|
|
121
|
+
return f"""Act like a world-class cold emailer with 20+ years of experience, writing on behalf of {cfg['org']}.
|
|
122
|
+
|
|
123
|
+
LEAD (use these REAL values; never write a placeholder):
|
|
124
|
+
- First name: {first}
|
|
125
|
+
- Full name: {lead.get('first_name','')} {lead.get('last_name','')}
|
|
126
|
+
- Title: {lead.get('title','')}
|
|
127
|
+
- Company: {company}
|
|
128
|
+
- Industry: {lead.get('industry','')}
|
|
129
|
+
- Location: {loc}
|
|
130
|
+
- LinkedIn: {li}
|
|
131
|
+
|
|
132
|
+
ABOUT {cfg['org']}:
|
|
133
|
+
{cfg['context']}
|
|
134
|
+
|
|
135
|
+
OUTPUT ONLY this exact format:
|
|
136
|
+
|
|
137
|
+
SUBJECT: <one short subject line>
|
|
138
|
+
|
|
139
|
+
EMAIL:
|
|
140
|
+
Hey {first},
|
|
141
|
+
|
|
142
|
+
<paragraph one>
|
|
143
|
+
|
|
144
|
+
<paragraph two>
|
|
145
|
+
|
|
146
|
+
Best,
|
|
147
|
+
{cfg['sender']}
|
|
148
|
+
|
|
149
|
+
RULES:
|
|
150
|
+
- Write the ACTUAL name "{first}" and company "{company}". NEVER output [brackets] or {{braces}} placeholders. If unsure of a detail, leave it out.
|
|
151
|
+
- Blank line between greeting, each paragraph, and the signature.
|
|
152
|
+
- Under 120 words, punchy, human, specific. No em dashes (use commas)."""
|
|
153
|
+
|
|
154
|
+
|
|
155
|
+
def get_leads(csv_path: str, max_leads: int) -> list[dict]:
|
|
156
|
+
if not os.path.exists(csv_path):
|
|
157
|
+
raise SystemExit(f"[error] {csv_path} not found. Run `openleads find` first.")
|
|
158
|
+
leads = []
|
|
159
|
+
with open(csv_path, newline="", encoding="utf-8") as f:
|
|
160
|
+
for row in csv.DictReader(f):
|
|
161
|
+
email = (row.get("Email") or "").strip()
|
|
162
|
+
if not email:
|
|
163
|
+
continue
|
|
164
|
+
leads.append({
|
|
165
|
+
"first_name": (row.get("First Name") or "").strip(),
|
|
166
|
+
"last_name": (row.get("Last Name") or "").strip(),
|
|
167
|
+
"email": email,
|
|
168
|
+
"title": (row.get("Title") or "").strip(),
|
|
169
|
+
"company": (row.get("Organization Name") or row.get("Company") or "").strip(),
|
|
170
|
+
"industry": (row.get("Industry") or "").strip(),
|
|
171
|
+
"city": (row.get("City") or "").strip(),
|
|
172
|
+
"country": (row.get("Country") or "").strip(),
|
|
173
|
+
"linkedin_url": (row.get("LinkedIn Url") or "").strip(),
|
|
174
|
+
})
|
|
175
|
+
return leads[:max_leads]
|
|
176
|
+
|
|
177
|
+
|
|
178
|
+
def call_llm(prompt: str, cfg: dict, max_tokens: int = 700) -> str:
|
|
179
|
+
import requests
|
|
180
|
+
headers = {"Authorization": f"Bearer {cfg['api_key']}", "Content-Type": "application/json"}
|
|
181
|
+
body = {"model": cfg["model"], "max_tokens": max_tokens, "temperature": 0.85,
|
|
182
|
+
"messages": [{"role": "user", "content": prompt}]}
|
|
183
|
+
for attempt in range(5):
|
|
184
|
+
res = requests.post(OPENROUTER_URL, headers=headers, json=body, timeout=60)
|
|
185
|
+
if res.status_code == 429:
|
|
186
|
+
time.sleep(15 * (attempt + 1))
|
|
187
|
+
continue
|
|
188
|
+
res.raise_for_status()
|
|
189
|
+
break
|
|
190
|
+
else:
|
|
191
|
+
raise RuntimeError("rate limited after 5 retries")
|
|
192
|
+
raw = res.json()["choices"][0]["message"]["content"].strip()
|
|
193
|
+
raw = re.sub(r"<think>.*?</think>", "", raw, flags=re.DOTALL).strip()
|
|
194
|
+
return raw
|
|
195
|
+
|
|
196
|
+
|
|
197
|
+
def generate(lead: dict, cfg: dict) -> dict:
|
|
198
|
+
prompt = build_prompt(lead, cfg)
|
|
199
|
+
result = {"subject": "", "body": ""}
|
|
200
|
+
for attempt in range(3):
|
|
201
|
+
response = call_llm(prompt, cfg)
|
|
202
|
+
subject, body = parse_response(response, lead.get("company", ""))
|
|
203
|
+
subject = clean_dashes(subject)
|
|
204
|
+
body = format_body(clean_dashes(body), lead.get("first_name", ""))
|
|
205
|
+
result = {"subject": subject, "body": body}
|
|
206
|
+
if not has_placeholder(subject) and not has_placeholder(body):
|
|
207
|
+
return result
|
|
208
|
+
result["subject"] = strip_placeholders(result["subject"]) or f"Note for {lead.get('company','')}"
|
|
209
|
+
result["body"] = format_body(strip_placeholders(result["body"]), lead.get("first_name", ""))
|
|
210
|
+
return result
|
|
211
|
+
|
|
212
|
+
|
|
213
|
+
def send_email(lead_email: str, subject: str, body: str, cfg: dict) -> None:
|
|
214
|
+
msg = MIMEMultipart("alternative")
|
|
215
|
+
msg["Subject"] = subject
|
|
216
|
+
msg["From"] = f"{cfg['sender']} <{cfg['smtp_user']}>"
|
|
217
|
+
msg["To"] = lead_email
|
|
218
|
+
msg["Date"] = formatdate(localtime=True)
|
|
219
|
+
msg["Message-ID"] = make_msgid(domain=cfg["smtp_user"].split("@")[-1] or "localhost")
|
|
220
|
+
msg.attach(MIMEText(body, "plain"))
|
|
221
|
+
with smtplib.SMTP_SSL(cfg["smtp_host"], cfg["smtp_port"]) as server:
|
|
222
|
+
server.login(cfg["smtp_user"], cfg["smtp_pass"])
|
|
223
|
+
server.sendmail(cfg["smtp_user"], lead_email, msg.as_string())
|
|
224
|
+
|
|
225
|
+
|
|
226
|
+
def run_campaign(dry_run: bool = True, leads_path: str = "leads.csv") -> int:
|
|
227
|
+
cfg = _config()
|
|
228
|
+
if not cfg["api_key"]:
|
|
229
|
+
raise SystemExit("[error] set OPENROUTER_API_KEY (see .env.example).")
|
|
230
|
+
leads = get_leads(leads_path, cfg["max_leads"])
|
|
231
|
+
print("=" * 60)
|
|
232
|
+
print(f" OpenLeads campaign · model: {cfg['model']}")
|
|
233
|
+
print(f" mode: {'DRY RUN (no send)' if dry_run else 'LIVE SEND'} · leads: {len(leads)}")
|
|
234
|
+
print("=" * 60 + "\n")
|
|
235
|
+
|
|
236
|
+
results = []
|
|
237
|
+
for i, lead in enumerate(leads, 1):
|
|
238
|
+
print(f"[{i}/{len(leads)}] {lead['first_name']} {lead['last_name']} "
|
|
239
|
+
f"| {lead['title']} @ {lead['company']} <{lead['email']}>")
|
|
240
|
+
try:
|
|
241
|
+
gen = generate(lead, cfg)
|
|
242
|
+
print(f" subject: {gen['subject']}\n ---\n{gen['body']}\n")
|
|
243
|
+
status = "preview"
|
|
244
|
+
if not dry_run:
|
|
245
|
+
send_email(lead["email"], gen["subject"], gen["body"], cfg)
|
|
246
|
+
status = "sent"
|
|
247
|
+
print(" sent!\n")
|
|
248
|
+
results.append({**lead, **gen, "status": status,
|
|
249
|
+
"timestamp": datetime.now().isoformat()})
|
|
250
|
+
except Exception as e:
|
|
251
|
+
print(f" error: {e}\n")
|
|
252
|
+
results.append({**lead, "subject": "", "body": "",
|
|
253
|
+
"status": f"error: {e}", "timestamp": datetime.now().isoformat()})
|
|
254
|
+
if i < len(leads):
|
|
255
|
+
time.sleep(4)
|
|
256
|
+
|
|
257
|
+
ts = datetime.now().strftime("%Y%m%d_%H%M%S")
|
|
258
|
+
out = f"campaign_{ts}.csv"
|
|
259
|
+
fields = ["first_name", "last_name", "email", "title", "company",
|
|
260
|
+
"subject", "body", "status", "timestamp"]
|
|
261
|
+
with open(out, "w", newline="", encoding="utf-8") as f:
|
|
262
|
+
w = csv.DictWriter(f, fieldnames=fields)
|
|
263
|
+
w.writeheader()
|
|
264
|
+
for r in results:
|
|
265
|
+
w.writerow({k: r.get(k, "") for k in fields})
|
|
266
|
+
print(f"[log] {out} · {sum(1 for r in results if r['status'] in ('sent', 'preview'))} processed")
|
|
267
|
+
return 0
|
|
268
|
+
|
|
269
|
+
|
|
270
|
+
def main(argv=None) -> int:
|
|
271
|
+
import argparse
|
|
272
|
+
argv = argv if argv is not None else []
|
|
273
|
+
p = argparse.ArgumentParser(prog="openleads campaign",
|
|
274
|
+
description="Personalized cold-email companion (opt-in).")
|
|
275
|
+
p.add_argument("--live", "--send", action="store_true", dest="live",
|
|
276
|
+
help="actually send (default is a dry-run preview)")
|
|
277
|
+
p.add_argument("--leads", default="leads.csv", help="leads CSV path")
|
|
278
|
+
args = p.parse_args([a for a in argv if a])
|
|
279
|
+
return run_campaign(dry_run=not args.live, leads_path=args.leads)
|
|
280
|
+
|
|
281
|
+
|
|
282
|
+
if __name__ == "__main__":
|
|
283
|
+
raise SystemExit(main(__import__("sys").argv[1:]))
|