leadgen-maps 1.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- leadgen_maps/__init__.py +10 -0
- leadgen_maps/__main__.py +8 -0
- leadgen_maps/cli.py +226 -0
- leadgen_maps/config.py +74 -0
- leadgen_maps/connect.py +412 -0
- leadgen_maps/connectors/__init__.py +30 -0
- leadgen_maps/connectors/base.py +40 -0
- leadgen_maps/connectors/csv_out.py +58 -0
- leadgen_maps/connectors/gsheets.py +147 -0
- leadgen_maps/connectors/notion.py +214 -0
- leadgen_maps/connectors/pdf_out.py +92 -0
- leadgen_maps/connectors/xlsx_out.py +92 -0
- leadgen_maps/credstore.py +98 -0
- leadgen_maps/engine.py +188 -0
- leadgen_maps/extract.py +257 -0
- leadgen_maps/fields.py +121 -0
- leadgen_maps/logo.svg +1 -0
- leadgen_maps/mcp_server.py +311 -0
- leadgen_maps/net.py +27 -0
- leadgen_maps/notify.py +38 -0
- leadgen_maps/oauthapp.py +60 -0
- leadgen_maps/progress.py +106 -0
- leadgen_maps-1.1.0.dist-info/METADATA +116 -0
- leadgen_maps-1.1.0.dist-info/RECORD +27 -0
- leadgen_maps-1.1.0.dist-info/WHEEL +5 -0
- leadgen_maps-1.1.0.dist-info/entry_points.txt +4 -0
- leadgen_maps-1.1.0.dist-info/top_level.txt +1 -0
leadgen_maps/__init__.py
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
"""
|
|
2
|
+
leadgen — Apollo-class local lead generation from Google Maps, no API.
|
|
3
|
+
|
|
4
|
+
Scrapes Google Maps via real browser automation, extracts a rich Apollo-grade
|
|
5
|
+
field set, and streams leads directly into your connector of choice (Notion,
|
|
6
|
+
Google Sheets, or CSV) with live progress and guaranteed de-duplication.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
__version__ = "1.0.0"
|
|
10
|
+
__all__ = ["__version__"]
|
leadgen_maps/__main__.py
ADDED
leadgen_maps/cli.py
ADDED
|
@@ -0,0 +1,226 @@
|
|
|
1
|
+
"""
|
|
2
|
+
leadgen command-line interface.
|
|
3
|
+
|
|
4
|
+
leadgen run --niche "cafe" --location "Kolkata" --limit 30 --to notion
|
|
5
|
+
leadgen run --niche "dentist" --location "Pune" --limit 50 --website any --to csv,notion
|
|
6
|
+
leadgen doctor # check internet, connectors, license
|
|
7
|
+
leadgen fields # list the Apollo-grade field set
|
|
8
|
+
leadgen connectors # list available push targets
|
|
9
|
+
|
|
10
|
+
AI agents: add --json to stream machine-readable progress + a final summary
|
|
11
|
+
object on stdout (one JSON event per line).
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
import argparse
|
|
15
|
+
import json
|
|
16
|
+
import sys
|
|
17
|
+
|
|
18
|
+
from . import __version__, config, fields, net
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def _err(msg, json_mode=False):
|
|
22
|
+
if json_mode:
|
|
23
|
+
sys.stdout.write(json.dumps({"event": "fatal", "error": msg}) + "\n")
|
|
24
|
+
else:
|
|
25
|
+
sys.stderr.write(f"\nERROR: {msg}\n")
|
|
26
|
+
return 2
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def cmd_run(args):
|
|
30
|
+
to = [t.strip() for t in args.to.split(",") if t.strip()]
|
|
31
|
+
columns = fields.select_columns(args.fields)
|
|
32
|
+
|
|
33
|
+
# Keyless connect: if a destination is requested but not linked yet, open the
|
|
34
|
+
# browser and connect once — no .env, no keys.
|
|
35
|
+
if not args.json:
|
|
36
|
+
lowered = [t.lower() for t in to]
|
|
37
|
+
if "notion" in lowered:
|
|
38
|
+
from .connectors.notion import notion_connected
|
|
39
|
+
if not notion_connected():
|
|
40
|
+
from .connect import connect_notion
|
|
41
|
+
print("[leadgen] Notion isn't connected yet — let's fix that once.")
|
|
42
|
+
try:
|
|
43
|
+
connect_notion()
|
|
44
|
+
except Exception as e:
|
|
45
|
+
return _err(f"connect notion: {e}", args.json)
|
|
46
|
+
print("[leadgen] ✓ Notion connected. Continuing your run…")
|
|
47
|
+
if any(t in lowered for t in ("gsheets", "sheets", "google")):
|
|
48
|
+
from .connectors.gsheets import gsheets_connected
|
|
49
|
+
if not gsheets_connected():
|
|
50
|
+
from .connect import connect_google
|
|
51
|
+
print("[leadgen] Google Sheets isn't connected yet — let's fix that once.")
|
|
52
|
+
try:
|
|
53
|
+
connect_google()
|
|
54
|
+
except Exception as e:
|
|
55
|
+
return _err(f"connect google: {e}", args.json)
|
|
56
|
+
print("[leadgen] ✓ Google Sheets connected. Continuing your run…")
|
|
57
|
+
|
|
58
|
+
try:
|
|
59
|
+
from .engine import run
|
|
60
|
+
summary = run(
|
|
61
|
+
niche=args.niche, location=args.location, limit=args.limit,
|
|
62
|
+
website=args.website, to=to, columns=columns, country_code=args.cc,
|
|
63
|
+
headless=not args.show, want_reviews=not args.no_reviews,
|
|
64
|
+
json_mode=args.json, notify_email=args.email,
|
|
65
|
+
)
|
|
66
|
+
except net.OfflineError as e:
|
|
67
|
+
return _err(str(e), args.json)
|
|
68
|
+
except Exception as e:
|
|
69
|
+
return _err(f"{type(e).__name__}: {e}", args.json)
|
|
70
|
+
|
|
71
|
+
if args.json:
|
|
72
|
+
sys.stdout.write(json.dumps({"event": "summary", **summary}) + "\n")
|
|
73
|
+
else:
|
|
74
|
+
print(f"\n{'='*58}")
|
|
75
|
+
print(f" ✓ {summary['kept']} new leads (target {summary['target']})")
|
|
76
|
+
print(f" scanned {summary['queued']} listings · "
|
|
77
|
+
f"{summary['skipped_filtered']} filtered · "
|
|
78
|
+
f"{summary['skipped_duplicate']} duplicate · "
|
|
79
|
+
f"{summary['skipped_no_phone']} no-phone")
|
|
80
|
+
for name, loc in summary["connectors"].items():
|
|
81
|
+
print(f" → {name}: {loc}")
|
|
82
|
+
if summary.get("emailed"):
|
|
83
|
+
print(f" ✉ emailed {summary['emailed']}")
|
|
84
|
+
print(f"{'='*58}")
|
|
85
|
+
return 0
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
def cmd_connect(args):
|
|
89
|
+
target = (args.target or "notion").lower()
|
|
90
|
+
if target == "notion":
|
|
91
|
+
from .connect import connect_notion
|
|
92
|
+
try:
|
|
93
|
+
info = connect_notion(allow_paste=getattr(args, "paste", False))
|
|
94
|
+
except Exception as e:
|
|
95
|
+
return _err(str(e))
|
|
96
|
+
ws = info.get("workspace_name") or "your workspace"
|
|
97
|
+
print(f"\n[leadgen] ✓ Connected to {ws}. Leads database is ready.")
|
|
98
|
+
print('[leadgen] Try: leadgen run --niche "cafe" --location "Kolkata" --limit 10 --to notion')
|
|
99
|
+
return 0
|
|
100
|
+
if target in ("google", "gsheets", "sheets"):
|
|
101
|
+
from .connect import connect_google
|
|
102
|
+
try:
|
|
103
|
+
info = connect_google()
|
|
104
|
+
except Exception as e:
|
|
105
|
+
return _err(str(e))
|
|
106
|
+
print(f"\n[leadgen] ✓ Connected Google Sheets. Your sheet:")
|
|
107
|
+
print(f" https://docs.google.com/spreadsheets/d/{info['spreadsheet_id']}")
|
|
108
|
+
print('[leadgen] Try: leadgen run --niche "cafe" --location "Kolkata" --limit 10 --to gsheets')
|
|
109
|
+
return 0
|
|
110
|
+
return _err(f"don't know how to connect '{target}' (supported: notion, google)")
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
def cmd_disconnect(args):
|
|
114
|
+
target = (args.target or "all").lower()
|
|
115
|
+
from . import credstore
|
|
116
|
+
if target == "notion":
|
|
117
|
+
credstore.clear("notion"); what = "Notion"
|
|
118
|
+
elif target in ("google", "gsheets", "sheets"):
|
|
119
|
+
credstore.clear("google"); what = "Google Sheets"
|
|
120
|
+
elif target in ("all", ""):
|
|
121
|
+
credstore.clear_all(); what = "all connections"
|
|
122
|
+
else:
|
|
123
|
+
return _err(f"don't know how to disconnect '{target}' (supported: notion, google, all)")
|
|
124
|
+
print(f"[leadgen] ✓ Disconnected {what}. Re-link any time with `leadgen connect`.")
|
|
125
|
+
return 0
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
def cmd_doctor(args):
|
|
129
|
+
print("leadgen doctor")
|
|
130
|
+
print(f" version : {__version__}")
|
|
131
|
+
online = net.is_online()
|
|
132
|
+
print(f" internet : {'OK' if online else 'OFFLINE (required!)'}")
|
|
133
|
+
from .connectors.notion import notion_connected
|
|
134
|
+
from .connectors.gsheets import gsheets_connected
|
|
135
|
+
print(f" Notion connected : {'yes (relink: leadgen connect notion)' if notion_connected() else 'no — run `leadgen connect notion`'}")
|
|
136
|
+
print(f" Google Sheets : {'connected (relink: leadgen connect google)' if gsheets_connected() else 'not connected — run `leadgen connect google`'}")
|
|
137
|
+
print(f" SMTP (email) : {'configured' if config.SMTP_HOST else 'not configured'}")
|
|
138
|
+
try:
|
|
139
|
+
import playwright # noqa: F401
|
|
140
|
+
print(" Playwright : installed")
|
|
141
|
+
except ImportError:
|
|
142
|
+
print(" Playwright : MISSING — run `python -m playwright install chromium`")
|
|
143
|
+
return 0 if online else 1
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
def cmd_fields(args):
|
|
147
|
+
if args.json:
|
|
148
|
+
out = [{"key": k, "label": fields.LABELS[k], "type": fields.TYPES[k],
|
|
149
|
+
"group": fields.GROUPS[k], "source": fields.SOURCE[k]} for k in fields.KEYS]
|
|
150
|
+
print(json.dumps(out, indent=2)); return 0
|
|
151
|
+
group = None
|
|
152
|
+
for k in fields.KEYS:
|
|
153
|
+
if fields.GROUPS[k] != group:
|
|
154
|
+
group = fields.GROUPS[k]
|
|
155
|
+
print(f"\n{group}")
|
|
156
|
+
print(f" {k:<16} {fields.LABELS[k]:<20} {fields.TYPES[k]:<12} [{fields.SOURCE[k]}]")
|
|
157
|
+
print(f"\n{len(fields.KEYS)} fields. Use --fields default|all|apollo|<comma,list>")
|
|
158
|
+
return 0
|
|
159
|
+
|
|
160
|
+
|
|
161
|
+
def cmd_connectors(args):
|
|
162
|
+
from .connectors import NAMES
|
|
163
|
+
print("Available connectors (--to):")
|
|
164
|
+
print(" notion — Notion database (rich pages, photos, reviews) · `leadgen connect notion`")
|
|
165
|
+
print(" gsheets — Google Sheets (keyless OAuth) · `leadgen connect google`")
|
|
166
|
+
print(" csv — local CSV table view")
|
|
167
|
+
print(" xlsx — local Excel workbook (.xlsx)")
|
|
168
|
+
print(" pdf — local printable lead sheet (.pdf)")
|
|
169
|
+
print("\nCombine with commas, e.g. --to notion,gsheets,xlsx,pdf")
|
|
170
|
+
return 0
|
|
171
|
+
|
|
172
|
+
|
|
173
|
+
def build_parser():
|
|
174
|
+
p = argparse.ArgumentParser(prog="leadgen", description="Apollo-class local lead-gen from Google Maps (no API).")
|
|
175
|
+
p.add_argument("--version", action="version", version=f"leadgen {__version__}")
|
|
176
|
+
sub = p.add_subparsers(dest="cmd")
|
|
177
|
+
|
|
178
|
+
r = sub.add_parser("run", help="run a lead-gen job")
|
|
179
|
+
r.add_argument("--niche", required=True, help='business type, e.g. "cafe"')
|
|
180
|
+
r.add_argument("--location", required=True, help='city/area, e.g. "Kolkata"')
|
|
181
|
+
r.add_argument("--limit", type=int, default=30, help=f"how many NEW leads (max {config.MAX_LIMIT})")
|
|
182
|
+
r.add_argument("--website", choices=["without", "with", "any"], default="without",
|
|
183
|
+
help="keep businesses without a website (default), with one, or any")
|
|
184
|
+
r.add_argument("--to", default="notion", help="connector(s), comma list: notion,gsheets,csv,xlsx,pdf")
|
|
185
|
+
r.add_argument("--fields", default="default", help="default|all|apollo|<comma,list of keys>")
|
|
186
|
+
r.add_argument("--cc", default=None, help="country calling code (default 91 → Region India)")
|
|
187
|
+
r.add_argument("--show", action="store_true", help="show the browser window")
|
|
188
|
+
r.add_argument("--no-reviews", action="store_true", help="skip review snippets (faster)")
|
|
189
|
+
r.add_argument("--email", default=None, help="email a summary when the job finishes")
|
|
190
|
+
r.add_argument("--json", action="store_true", help="stream JSON events (for AI agents)")
|
|
191
|
+
r.set_defaults(func=cmd_run)
|
|
192
|
+
|
|
193
|
+
cn = sub.add_parser("connect", help="connect a destination (keyless, browser-based)")
|
|
194
|
+
cn.add_argument("target", nargs="?", default="notion", help="what to connect: notion | google")
|
|
195
|
+
cn.add_argument("--paste", action="store_true", help="(Notion) paste an integration token once instead of OAuth")
|
|
196
|
+
cn.set_defaults(func=cmd_connect)
|
|
197
|
+
|
|
198
|
+
dc = sub.add_parser("disconnect", help="remove a stored connection (logout) — notion | google | all")
|
|
199
|
+
dc.add_argument("target", nargs="?", default="all", help="what to disconnect: notion | google | all")
|
|
200
|
+
dc.set_defaults(func=cmd_disconnect)
|
|
201
|
+
|
|
202
|
+
d = sub.add_parser("doctor", help="check environment, connectors, license")
|
|
203
|
+
d.set_defaults(func=cmd_doctor)
|
|
204
|
+
|
|
205
|
+
f = sub.add_parser("fields", help="list the field set")
|
|
206
|
+
f.add_argument("--json", action="store_true")
|
|
207
|
+
f.set_defaults(func=cmd_fields)
|
|
208
|
+
|
|
209
|
+
c = sub.add_parser("connectors", help="list push targets")
|
|
210
|
+
c.set_defaults(func=cmd_connectors)
|
|
211
|
+
return p
|
|
212
|
+
|
|
213
|
+
|
|
214
|
+
def main(argv=None):
|
|
215
|
+
parser = build_parser()
|
|
216
|
+
args = parser.parse_args(argv)
|
|
217
|
+
if not getattr(args, "cmd", None):
|
|
218
|
+
parser.print_help()
|
|
219
|
+
return 0
|
|
220
|
+
if not hasattr(args, "json"):
|
|
221
|
+
args.json = False
|
|
222
|
+
return args.func(args)
|
|
223
|
+
|
|
224
|
+
|
|
225
|
+
if __name__ == "__main__":
|
|
226
|
+
sys.exit(main())
|
leadgen_maps/config.py
ADDED
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
"""Configuration + .env loading for leadgen. Secrets never live in code."""
|
|
2
|
+
|
|
3
|
+
import os
|
|
4
|
+
|
|
5
|
+
MAX_LIMIT = 256 # hard cap on leads per run (product constraint)
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def load_dotenv(explicit=None):
|
|
9
|
+
"""Load KEY=VALUE pairs from the first .env found (cwd, then ~/.leadgen)."""
|
|
10
|
+
candidates = []
|
|
11
|
+
if explicit:
|
|
12
|
+
candidates.append(explicit)
|
|
13
|
+
d = os.getcwd()
|
|
14
|
+
for _ in range(6): # walk up to find a shared root .env (works from subfolders)
|
|
15
|
+
candidates.append(os.path.join(d, ".env"))
|
|
16
|
+
parent = os.path.dirname(d)
|
|
17
|
+
if parent == d:
|
|
18
|
+
break
|
|
19
|
+
d = parent
|
|
20
|
+
candidates.append(os.path.expanduser(os.path.join("~", ".leadgen", ".env")))
|
|
21
|
+
for path in candidates:
|
|
22
|
+
if path and os.path.exists(path):
|
|
23
|
+
with open(path, encoding="utf-8") as f:
|
|
24
|
+
for line in f:
|
|
25
|
+
line = line.strip()
|
|
26
|
+
if not line or line.startswith("#") or "=" not in line:
|
|
27
|
+
continue
|
|
28
|
+
k, v = line.split("=", 1)
|
|
29
|
+
os.environ.setdefault(k.strip(), v.strip().strip('"').strip("'"))
|
|
30
|
+
return path
|
|
31
|
+
return None
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
load_dotenv()
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def get(name, default=""):
|
|
38
|
+
return os.environ.get(name, default)
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def get_bool(name, default=False):
|
|
42
|
+
v = os.environ.get(name)
|
|
43
|
+
if v is None:
|
|
44
|
+
return default
|
|
45
|
+
return v.strip().lower() in ("1", "true", "yes", "on")
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def get_float(name, default):
|
|
49
|
+
try:
|
|
50
|
+
return float(os.environ.get(name, default))
|
|
51
|
+
except (TypeError, ValueError):
|
|
52
|
+
return default
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
# ── Connectors ───────────────────────────────────────────────────────────────
|
|
56
|
+
NOTION_TOKEN = get("NOTION_TOKEN")
|
|
57
|
+
NOTION_DATABASE_ID = get("NOTION_DATABASE_ID")
|
|
58
|
+
|
|
59
|
+
GOOGLE_SERVICE_ACCOUNT_JSON = get("GOOGLE_SERVICE_ACCOUNT_JSON") # path to creds file
|
|
60
|
+
GSHEET_ID = get("GSHEET_ID")
|
|
61
|
+
GSHEET_TAB = get("GSHEET_TAB", "Leads")
|
|
62
|
+
|
|
63
|
+
# ── Notifications (email on completion) ──────────────────────────────────────
|
|
64
|
+
SMTP_HOST = get("SMTP_HOST")
|
|
65
|
+
SMTP_PORT = int(get("SMTP_PORT", "587") or 587)
|
|
66
|
+
SMTP_USER = get("SMTP_USER")
|
|
67
|
+
SMTP_PASS = get("SMTP_PASS")
|
|
68
|
+
NOTIFY_EMAIL = get("NOTIFY_EMAIL")
|
|
69
|
+
|
|
70
|
+
# ── Scrape defaults ──────────────────────────────────────────────────────────
|
|
71
|
+
DEFAULT_COUNTRY_CODE = get("DEFAULT_COUNTRY_CODE", "91")
|
|
72
|
+
HEADLESS = get_bool("HEADLESS", True)
|
|
73
|
+
MIN_DELAY = get_float("MIN_DELAY", 1.0)
|
|
74
|
+
MAX_DELAY = get_float("MAX_DELAY", 2.5)
|