leadgen-maps 1.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,10 @@
1
+ """
2
+ leadgen — Apollo-class local lead generation from Google Maps, no API.
3
+
4
+ Scrapes Google Maps via real browser automation, extracts a rich Apollo-grade
5
+ field set, and streams leads directly into your connector of choice (Notion,
6
+ Google Sheets, or CSV) with live progress and guaranteed de-duplication.
7
+ """
8
+
9
+ __version__ = "1.0.0"
10
+ __all__ = ["__version__"]
@@ -0,0 +1,8 @@
1
+ """Enable `python -m leadgen_maps ...`."""
2
+
3
+ import sys
4
+
5
+ from .cli import main
6
+
7
+ if __name__ == "__main__":
8
+ sys.exit(main())
leadgen_maps/cli.py ADDED
@@ -0,0 +1,226 @@
1
+ """
2
+ leadgen command-line interface.
3
+
4
+ leadgen run --niche "cafe" --location "Kolkata" --limit 30 --to notion
5
+ leadgen run --niche "dentist" --location "Pune" --limit 50 --website any --to csv,notion
6
+ leadgen doctor # check internet, connectors, license
7
+ leadgen fields # list the Apollo-grade field set
8
+ leadgen connectors # list available push targets
9
+
10
+ AI agents: add --json to stream machine-readable progress + a final summary
11
+ object on stdout (one JSON event per line).
12
+ """
13
+
14
+ import argparse
15
+ import json
16
+ import sys
17
+
18
+ from . import __version__, config, fields, net
19
+
20
+
21
+ def _err(msg, json_mode=False):
22
+ if json_mode:
23
+ sys.stdout.write(json.dumps({"event": "fatal", "error": msg}) + "\n")
24
+ else:
25
+ sys.stderr.write(f"\nERROR: {msg}\n")
26
+ return 2
27
+
28
+
29
+ def cmd_run(args):
30
+ to = [t.strip() for t in args.to.split(",") if t.strip()]
31
+ columns = fields.select_columns(args.fields)
32
+
33
+ # Keyless connect: if a destination is requested but not linked yet, open the
34
+ # browser and connect once — no .env, no keys.
35
+ if not args.json:
36
+ lowered = [t.lower() for t in to]
37
+ if "notion" in lowered:
38
+ from .connectors.notion import notion_connected
39
+ if not notion_connected():
40
+ from .connect import connect_notion
41
+ print("[leadgen] Notion isn't connected yet — let's fix that once.")
42
+ try:
43
+ connect_notion()
44
+ except Exception as e:
45
+ return _err(f"connect notion: {e}", args.json)
46
+ print("[leadgen] ✓ Notion connected. Continuing your run…")
47
+ if any(t in lowered for t in ("gsheets", "sheets", "google")):
48
+ from .connectors.gsheets import gsheets_connected
49
+ if not gsheets_connected():
50
+ from .connect import connect_google
51
+ print("[leadgen] Google Sheets isn't connected yet — let's fix that once.")
52
+ try:
53
+ connect_google()
54
+ except Exception as e:
55
+ return _err(f"connect google: {e}", args.json)
56
+ print("[leadgen] ✓ Google Sheets connected. Continuing your run…")
57
+
58
+ try:
59
+ from .engine import run
60
+ summary = run(
61
+ niche=args.niche, location=args.location, limit=args.limit,
62
+ website=args.website, to=to, columns=columns, country_code=args.cc,
63
+ headless=not args.show, want_reviews=not args.no_reviews,
64
+ json_mode=args.json, notify_email=args.email,
65
+ )
66
+ except net.OfflineError as e:
67
+ return _err(str(e), args.json)
68
+ except Exception as e:
69
+ return _err(f"{type(e).__name__}: {e}", args.json)
70
+
71
+ if args.json:
72
+ sys.stdout.write(json.dumps({"event": "summary", **summary}) + "\n")
73
+ else:
74
+ print(f"\n{'='*58}")
75
+ print(f" ✓ {summary['kept']} new leads (target {summary['target']})")
76
+ print(f" scanned {summary['queued']} listings · "
77
+ f"{summary['skipped_filtered']} filtered · "
78
+ f"{summary['skipped_duplicate']} duplicate · "
79
+ f"{summary['skipped_no_phone']} no-phone")
80
+ for name, loc in summary["connectors"].items():
81
+ print(f" → {name}: {loc}")
82
+ if summary.get("emailed"):
83
+ print(f" ✉ emailed {summary['emailed']}")
84
+ print(f"{'='*58}")
85
+ return 0
86
+
87
+
88
+ def cmd_connect(args):
89
+ target = (args.target or "notion").lower()
90
+ if target == "notion":
91
+ from .connect import connect_notion
92
+ try:
93
+ info = connect_notion(allow_paste=getattr(args, "paste", False))
94
+ except Exception as e:
95
+ return _err(str(e))
96
+ ws = info.get("workspace_name") or "your workspace"
97
+ print(f"\n[leadgen] ✓ Connected to {ws}. Leads database is ready.")
98
+ print('[leadgen] Try: leadgen run --niche "cafe" --location "Kolkata" --limit 10 --to notion')
99
+ return 0
100
+ if target in ("google", "gsheets", "sheets"):
101
+ from .connect import connect_google
102
+ try:
103
+ info = connect_google()
104
+ except Exception as e:
105
+ return _err(str(e))
106
+ print(f"\n[leadgen] ✓ Connected Google Sheets. Your sheet:")
107
+ print(f" https://docs.google.com/spreadsheets/d/{info['spreadsheet_id']}")
108
+ print('[leadgen] Try: leadgen run --niche "cafe" --location "Kolkata" --limit 10 --to gsheets')
109
+ return 0
110
+ return _err(f"don't know how to connect '{target}' (supported: notion, google)")
111
+
112
+
113
+ def cmd_disconnect(args):
114
+ target = (args.target or "all").lower()
115
+ from . import credstore
116
+ if target == "notion":
117
+ credstore.clear("notion"); what = "Notion"
118
+ elif target in ("google", "gsheets", "sheets"):
119
+ credstore.clear("google"); what = "Google Sheets"
120
+ elif target in ("all", ""):
121
+ credstore.clear_all(); what = "all connections"
122
+ else:
123
+ return _err(f"don't know how to disconnect '{target}' (supported: notion, google, all)")
124
+ print(f"[leadgen] ✓ Disconnected {what}. Re-link any time with `leadgen connect`.")
125
+ return 0
126
+
127
+
128
+ def cmd_doctor(args):
129
+ print("leadgen doctor")
130
+ print(f" version : {__version__}")
131
+ online = net.is_online()
132
+ print(f" internet : {'OK' if online else 'OFFLINE (required!)'}")
133
+ from .connectors.notion import notion_connected
134
+ from .connectors.gsheets import gsheets_connected
135
+ print(f" Notion connected : {'yes (relink: leadgen connect notion)' if notion_connected() else 'no — run `leadgen connect notion`'}")
136
+ print(f" Google Sheets : {'connected (relink: leadgen connect google)' if gsheets_connected() else 'not connected — run `leadgen connect google`'}")
137
+ print(f" SMTP (email) : {'configured' if config.SMTP_HOST else 'not configured'}")
138
+ try:
139
+ import playwright # noqa: F401
140
+ print(" Playwright : installed")
141
+ except ImportError:
142
+ print(" Playwright : MISSING — run `python -m playwright install chromium`")
143
+ return 0 if online else 1
144
+
145
+
146
+ def cmd_fields(args):
147
+ if args.json:
148
+ out = [{"key": k, "label": fields.LABELS[k], "type": fields.TYPES[k],
149
+ "group": fields.GROUPS[k], "source": fields.SOURCE[k]} for k in fields.KEYS]
150
+ print(json.dumps(out, indent=2)); return 0
151
+ group = None
152
+ for k in fields.KEYS:
153
+ if fields.GROUPS[k] != group:
154
+ group = fields.GROUPS[k]
155
+ print(f"\n{group}")
156
+ print(f" {k:<16} {fields.LABELS[k]:<20} {fields.TYPES[k]:<12} [{fields.SOURCE[k]}]")
157
+ print(f"\n{len(fields.KEYS)} fields. Use --fields default|all|apollo|<comma,list>")
158
+ return 0
159
+
160
+
161
+ def cmd_connectors(args):
162
+ from .connectors import NAMES
163
+ print("Available connectors (--to):")
164
+ print(" notion — Notion database (rich pages, photos, reviews) · `leadgen connect notion`")
165
+ print(" gsheets — Google Sheets (keyless OAuth) · `leadgen connect google`")
166
+ print(" csv — local CSV table view")
167
+ print(" xlsx — local Excel workbook (.xlsx)")
168
+ print(" pdf — local printable lead sheet (.pdf)")
169
+ print("\nCombine with commas, e.g. --to notion,gsheets,xlsx,pdf")
170
+ return 0
171
+
172
+
173
+ def build_parser():
174
+ p = argparse.ArgumentParser(prog="leadgen", description="Apollo-class local lead-gen from Google Maps (no API).")
175
+ p.add_argument("--version", action="version", version=f"leadgen {__version__}")
176
+ sub = p.add_subparsers(dest="cmd")
177
+
178
+ r = sub.add_parser("run", help="run a lead-gen job")
179
+ r.add_argument("--niche", required=True, help='business type, e.g. "cafe"')
180
+ r.add_argument("--location", required=True, help='city/area, e.g. "Kolkata"')
181
+ r.add_argument("--limit", type=int, default=30, help=f"how many NEW leads (max {config.MAX_LIMIT})")
182
+ r.add_argument("--website", choices=["without", "with", "any"], default="without",
183
+ help="keep businesses without a website (default), with one, or any")
184
+ r.add_argument("--to", default="notion", help="connector(s), comma list: notion,gsheets,csv,xlsx,pdf")
185
+ r.add_argument("--fields", default="default", help="default|all|apollo|<comma,list of keys>")
186
+ r.add_argument("--cc", default=None, help="country calling code (default 91 → Region India)")
187
+ r.add_argument("--show", action="store_true", help="show the browser window")
188
+ r.add_argument("--no-reviews", action="store_true", help="skip review snippets (faster)")
189
+ r.add_argument("--email", default=None, help="email a summary when the job finishes")
190
+ r.add_argument("--json", action="store_true", help="stream JSON events (for AI agents)")
191
+ r.set_defaults(func=cmd_run)
192
+
193
+ cn = sub.add_parser("connect", help="connect a destination (keyless, browser-based)")
194
+ cn.add_argument("target", nargs="?", default="notion", help="what to connect: notion | google")
195
+ cn.add_argument("--paste", action="store_true", help="(Notion) paste an integration token once instead of OAuth")
196
+ cn.set_defaults(func=cmd_connect)
197
+
198
+ dc = sub.add_parser("disconnect", help="remove a stored connection (logout) — notion | google | all")
199
+ dc.add_argument("target", nargs="?", default="all", help="what to disconnect: notion | google | all")
200
+ dc.set_defaults(func=cmd_disconnect)
201
+
202
+ d = sub.add_parser("doctor", help="check environment, connectors, license")
203
+ d.set_defaults(func=cmd_doctor)
204
+
205
+ f = sub.add_parser("fields", help="list the field set")
206
+ f.add_argument("--json", action="store_true")
207
+ f.set_defaults(func=cmd_fields)
208
+
209
+ c = sub.add_parser("connectors", help="list push targets")
210
+ c.set_defaults(func=cmd_connectors)
211
+ return p
212
+
213
+
214
+ def main(argv=None):
215
+ parser = build_parser()
216
+ args = parser.parse_args(argv)
217
+ if not getattr(args, "cmd", None):
218
+ parser.print_help()
219
+ return 0
220
+ if not hasattr(args, "json"):
221
+ args.json = False
222
+ return args.func(args)
223
+
224
+
225
+ if __name__ == "__main__":
226
+ sys.exit(main())
leadgen_maps/config.py ADDED
@@ -0,0 +1,74 @@
1
+ """Configuration + .env loading for leadgen. Secrets never live in code."""
2
+
3
+ import os
4
+
5
+ MAX_LIMIT = 256 # hard cap on leads per run (product constraint)
6
+
7
+
8
+ def load_dotenv(explicit=None):
9
+ """Load KEY=VALUE pairs from the first .env found (cwd, then ~/.leadgen)."""
10
+ candidates = []
11
+ if explicit:
12
+ candidates.append(explicit)
13
+ d = os.getcwd()
14
+ for _ in range(6): # walk up to find a shared root .env (works from subfolders)
15
+ candidates.append(os.path.join(d, ".env"))
16
+ parent = os.path.dirname(d)
17
+ if parent == d:
18
+ break
19
+ d = parent
20
+ candidates.append(os.path.expanduser(os.path.join("~", ".leadgen", ".env")))
21
+ for path in candidates:
22
+ if path and os.path.exists(path):
23
+ with open(path, encoding="utf-8") as f:
24
+ for line in f:
25
+ line = line.strip()
26
+ if not line or line.startswith("#") or "=" not in line:
27
+ continue
28
+ k, v = line.split("=", 1)
29
+ os.environ.setdefault(k.strip(), v.strip().strip('"').strip("'"))
30
+ return path
31
+ return None
32
+
33
+
34
+ load_dotenv()
35
+
36
+
37
+ def get(name, default=""):
38
+ return os.environ.get(name, default)
39
+
40
+
41
+ def get_bool(name, default=False):
42
+ v = os.environ.get(name)
43
+ if v is None:
44
+ return default
45
+ return v.strip().lower() in ("1", "true", "yes", "on")
46
+
47
+
48
+ def get_float(name, default):
49
+ try:
50
+ return float(os.environ.get(name, default))
51
+ except (TypeError, ValueError):
52
+ return default
53
+
54
+
55
+ # ── Connectors ───────────────────────────────────────────────────────────────
56
+ NOTION_TOKEN = get("NOTION_TOKEN")
57
+ NOTION_DATABASE_ID = get("NOTION_DATABASE_ID")
58
+
59
+ GOOGLE_SERVICE_ACCOUNT_JSON = get("GOOGLE_SERVICE_ACCOUNT_JSON") # path to creds file
60
+ GSHEET_ID = get("GSHEET_ID")
61
+ GSHEET_TAB = get("GSHEET_TAB", "Leads")
62
+
63
+ # ── Notifications (email on completion) ──────────────────────────────────────
64
+ SMTP_HOST = get("SMTP_HOST")
65
+ SMTP_PORT = int(get("SMTP_PORT", "587") or 587)
66
+ SMTP_USER = get("SMTP_USER")
67
+ SMTP_PASS = get("SMTP_PASS")
68
+ NOTIFY_EMAIL = get("NOTIFY_EMAIL")
69
+
70
+ # ── Scrape defaults ──────────────────────────────────────────────────────────
71
+ DEFAULT_COUNTRY_CODE = get("DEFAULT_COUNTRY_CODE", "91")
72
+ HEADLESS = get_bool("HEADLESS", True)
73
+ MIN_DELAY = get_float("MIN_DELAY", 1.0)
74
+ MAX_DELAY = get_float("MAX_DELAY", 2.5)