delimit-cli 3.14.43 → 3.14.45

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,1583 +0,0 @@
1
- """Social targeting engine -- discover engagement opportunities across platforms.
2
-
3
- Scans X (via xAI Responses API), Reddit (via RapidAPI Reddit34), Hacker News
4
- (Algolia API), and Dev.to for posts where Jamsons ventures can genuinely engage.
5
- NamePros is flagged as manual_check_needed (no API).
6
-
7
- Targets are deduplicated via fingerprint and stored in append-only JSONL.
8
- Platform configuration is user-configurable via ~/.delimit/social_target_config.json.
9
- """
10
-
11
- import copy
12
- import json
13
- import logging
14
- import os
15
- import subprocess
16
- import urllib.request
17
- import urllib.error
18
- import urllib.parse
19
- from datetime import datetime, timezone
20
- from pathlib import Path
21
- from typing import Any, Dict, List, Optional
22
-
23
- logger = logging.getLogger("delimit.ai.social_target")
24
-
25
- TARGETS_FILE = Path.home() / ".delimit" / "social_targets.jsonl"
26
- SOCIAL_TARGET_CONFIG = Path.home() / ".delimit" / "social_target_config.json"
27
-
28
- # -----------------------------------------------------------------------
29
- # User-configurable platform config
30
- # -----------------------------------------------------------------------
31
-
32
- DEFAULT_CONFIG: Dict[str, Any] = {
33
- "platforms": {
34
- "x": {"enabled": True, "provider": "twttr241"},
35
- "reddit": {"enabled": True, "provider": "proxy"},
36
- "github": {"enabled": True, "provider": "gh_cli"},
37
- "hn": {"enabled": True, "provider": "algolia"},
38
- "devto": {"enabled": True, "provider": "public_api"},
39
- "namepros": {"enabled": False, "provider": "manual"},
40
- },
41
- "subreddits": {},
42
- "github_queries": {},
43
- "scan_limit": 10,
44
- "min_engagement": {"score": 1, "comments": 2},
45
- }
46
-
47
-
48
- def _deep_merge(base: dict, override: dict) -> dict:
49
- """Recursively merge override into base. Override values win."""
50
- result = copy.deepcopy(base)
51
- for key, value in override.items():
52
- if key in result and isinstance(result[key], dict) and isinstance(value, dict):
53
- result[key] = _deep_merge(result[key], value)
54
- else:
55
- result[key] = copy.deepcopy(value)
56
- return result
57
-
58
-
59
- def _load_config() -> Dict[str, Any]:
60
- """Load social target config from disk, merging with defaults.
61
-
62
- - Loads from SOCIAL_TARGET_CONFIG if it exists
63
- - Falls back to DEFAULT_CONFIG
64
- - Merges user overrides with defaults (user config wins)
65
- - Auto-detects available API keys and disables platforms with no access
66
- """
67
- config = copy.deepcopy(DEFAULT_CONFIG)
68
-
69
- if SOCIAL_TARGET_CONFIG.exists():
70
- try:
71
- user_config = json.loads(SOCIAL_TARGET_CONFIG.read_text())
72
- config = _deep_merge(config, user_config)
73
- except (json.JSONDecodeError, ValueError, OSError) as e:
74
- logger.warning("Failed to load social target config: %s", e)
75
-
76
- # Auto-detect available platforms and disable those without access
77
- detection = _detect_available_platforms()
78
- for platform, info in detection.items():
79
- if platform in config["platforms"]:
80
- # Only auto-disable if no user override exists
81
- if not SOCIAL_TARGET_CONFIG.exists():
82
- config["platforms"][platform]["enabled"] = info["available"]
83
- elif platform not in _load_user_platform_overrides():
84
- config["platforms"][platform]["enabled"] = info["available"]
85
-
86
- return config
87
-
88
-
89
- def _load_user_platform_overrides() -> set:
90
- """Return the set of platform names explicitly set in user config."""
91
- if not SOCIAL_TARGET_CONFIG.exists():
92
- return set()
93
- try:
94
- user_config = json.loads(SOCIAL_TARGET_CONFIG.read_text())
95
- return set(user_config.get("platforms", {}).keys())
96
- except (json.JSONDecodeError, ValueError, OSError):
97
- return set()
98
-
99
-
100
- def _detect_available_platforms() -> Dict[str, Dict[str, Any]]:
101
- """Check which platforms have the necessary credentials/access.
102
-
103
- Returns dict of platform -> {available: bool, provider: str, reason: str}.
104
- """
105
- result: Dict[str, Dict[str, Any]] = {}
106
-
107
- # X/Twttr241: RapidAPI key exists?
108
- rapidapi_key = _get_rapidapi_key()
109
- if rapidapi_key:
110
- result["x"] = {"available": True, "provider": "twttr241", "reason": "RapidAPI key found"}
111
- else:
112
- # Fallback: xAI API key?
113
- xai_key = _get_xai_api_key()
114
- if xai_key:
115
- result["x"] = {"available": True, "provider": "xai", "reason": "xAI API key found (fallback)"}
116
- else:
117
- result["x"] = {"available": False, "provider": "none", "reason": "No RapidAPI or xAI API key"}
118
-
119
- # Reddit: proxy or RapidAPI
120
- proxy_url = os.environ.get("DELIMIT_REDDIT_PROXY", "")
121
- if proxy_url:
122
- result["reddit"] = {"available": True, "provider": "proxy", "reason": "DELIMIT_REDDIT_PROXY env set"}
123
- elif _test_reddit_proxy():
124
- result["reddit"] = {"available": True, "provider": "proxy", "reason": "Local proxy responding"}
125
- elif rapidapi_key:
126
- result["reddit"] = {"available": True, "provider": "rapidapi", "reason": "RapidAPI key found (fallback)"}
127
- else:
128
- result["reddit"] = {"available": False, "provider": "none", "reason": "No proxy or RapidAPI key"}
129
-
130
- # GitHub: gh auth status
131
- try:
132
- proc = subprocess.run(
133
- ["gh", "auth", "status"],
134
- capture_output=True, text=True, timeout=10,
135
- )
136
- if proc.returncode == 0:
137
- result["github"] = {"available": True, "provider": "gh_cli", "reason": "gh authenticated"}
138
- else:
139
- result["github"] = {"available": False, "provider": "gh_cli", "reason": "gh not authenticated"}
140
- except (FileNotFoundError, subprocess.TimeoutExpired):
141
- result["github"] = {"available": False, "provider": "gh_cli", "reason": "gh CLI not found"}
142
-
143
- # HN: always available (public API, no auth)
144
- result["hn"] = {"available": True, "provider": "algolia", "reason": "Public API, no auth needed"}
145
-
146
- # Dev.to: always available (public API, no auth)
147
- result["devto"] = {"available": True, "provider": "public_api", "reason": "Public API, no auth needed"}
148
-
149
- # NamePros: manual only
150
- result["namepros"] = {"available": False, "provider": "manual", "reason": "No API, manual check only"}
151
-
152
- return result
153
-
154
-
155
- def _save_config(config: Dict[str, Any]) -> None:
156
- """Write config to disk."""
157
- SOCIAL_TARGET_CONFIG.parent.mkdir(parents=True, exist_ok=True)
158
- SOCIAL_TARGET_CONFIG.write_text(json.dumps(config, indent=2) + "\n")
159
-
160
-
161
- def get_config_status() -> Dict[str, Any]:
162
- """Return current config and platform availability for the MCP tool."""
163
- config = _load_config()
164
- detection = _detect_available_platforms()
165
- return {
166
- "config": config,
167
- "platform_availability": detection,
168
- "config_file": str(SOCIAL_TARGET_CONFIG),
169
- "config_file_exists": SOCIAL_TARGET_CONFIG.exists(),
170
- }
171
-
172
-
173
- def update_platform_config(
174
- platform: str,
175
- enabled: Optional[bool] = None,
176
- provider: Optional[str] = None,
177
- ) -> Dict[str, Any]:
178
- """Update a single platform's config and save."""
179
- config = _load_config()
180
- if platform not in config["platforms"]:
181
- config["platforms"][platform] = {"enabled": True, "provider": ""}
182
-
183
- if enabled is not None:
184
- config["platforms"][platform]["enabled"] = enabled
185
- if provider:
186
- config["platforms"][platform]["provider"] = provider
187
-
188
- _save_config(config)
189
- return {"updated": True, "platform": platform, "config": config["platforms"][platform]}
190
-
191
-
192
- def add_subreddits(venture: str, subreddits: List[str]) -> Dict[str, Any]:
193
- """Add subreddits to scan for a venture."""
194
- config = _load_config()
195
- existing = config.get("subreddits", {}).get(venture, [])
196
- new_subs = [s for s in subreddits if s not in existing]
197
- if venture not in config.get("subreddits", {}):
198
- config["subreddits"][venture] = []
199
- config["subreddits"][venture].extend(new_subs)
200
- _save_config(config)
201
- return {"venture": venture, "added": new_subs, "total": config["subreddits"][venture]}
202
-
203
- # -----------------------------------------------------------------------
204
- # Per-venture routing config (loaded from ~/.delimit/social_target_ventures.json)
205
- # -----------------------------------------------------------------------
206
-
207
- _VENTURE_CONFIG_FILE = Path.home() / ".delimit" / "social_target_ventures.json"
208
-
209
-
210
- def _load_venture_config() -> Dict[str, Any]:
211
- """Load venture config from ~/.delimit/social_target_ventures.json."""
212
- if _VENTURE_CONFIG_FILE.exists():
213
- try:
214
- return json.loads(_VENTURE_CONFIG_FILE.read_text())
215
- except (json.JSONDecodeError, OSError) as e:
216
- logger.warning("Failed to load venture config: %s", e)
217
- return {}
218
-
219
-
220
- def _load_internal_users() -> set:
221
- """Load internal usernames from venture config file."""
222
- config = _load_venture_config()
223
- return set(config.get("internal_users", []))
224
-
225
-
226
- def _load_github_queries() -> Dict[str, List[str]]:
227
- """Load GitHub queries from venture config file."""
228
- config = _load_venture_config()
229
- return config.get("github_queries", {})
230
-
231
-
232
- def _load_own_repos() -> List[str]:
233
- """Load own repo list from venture config file."""
234
- config = _load_venture_config()
235
- return config.get("own_repos", [])
236
-
237
-
238
- VENTURE_CONFIG = _load_venture_config().get("ventures", {})
239
- VENTURE_GITHUB_QUERIES = _load_github_queries()
240
- OWN_REPOS = _load_own_repos()
241
- INTERNAL_USERS = _load_internal_users()
242
-
243
-
244
- # -----------------------------------------------------------------------
245
- # JSONL persistence helpers
246
- # -----------------------------------------------------------------------
247
-
248
- def _load_known_fingerprints() -> set:
249
- """Load all fingerprints from the targets file for dedup."""
250
- fps: set = set()
251
- if not TARGETS_FILE.exists():
252
- return fps
253
- try:
254
- for line in TARGETS_FILE.read_text().splitlines():
255
- if not line.strip():
256
- continue
257
- try:
258
- entry = json.loads(line)
259
- fp = entry.get("fingerprint", "")
260
- if fp:
261
- fps.add(fp)
262
- except (json.JSONDecodeError, ValueError):
263
- continue
264
- except Exception:
265
- pass
266
- return fps
267
-
268
-
269
- def _append_target(target: Dict[str, Any]) -> None:
270
- """Append a single target to the JSONL file."""
271
- TARGETS_FILE.parent.mkdir(parents=True, exist_ok=True)
272
- with open(TARGETS_FILE, "a") as f:
273
- f.write(json.dumps(target) + "\n")
274
-
275
-
276
- # -----------------------------------------------------------------------
277
- # Venture routing
278
- # -----------------------------------------------------------------------
279
-
280
- def _route_venture(text: str) -> tuple:
281
- """Match text against venture topics. Returns (venture, confidence, rationale)."""
282
- text_lower = text.lower()
283
- best_venture = None
284
- best_score = 0
285
- best_matches: List[str] = []
286
-
287
- for venture, config in VENTURE_CONFIG.items():
288
- # Check exclude terms first
289
- if any(ex.lower() in text_lower for ex in config.get("exclude_terms", [])):
290
- continue
291
- matches = [t for t in config["topics"] if t.lower() in text_lower]
292
- score = len(matches)
293
- if score > best_score:
294
- best_score = score
295
- best_venture = venture
296
- best_matches = matches
297
-
298
- if not best_venture:
299
- return None, 0.0, "No venture topic match"
300
-
301
- confidence = min(0.95, 0.5 + (best_score * 0.15))
302
- rationale = f"Matched topics: {', '.join(best_matches[:3])}"
303
- return best_venture, confidence, rationale
304
-
305
-
306
- def _classify_target(text: str, author_followers: int = 0) -> str:
307
- """Classify a target as reply, strategic, or both."""
308
- is_question = any(q in text.lower() for q in ["?", "how do", "anyone", "looking for", "recommendations"])
309
- high_reach = author_followers > 5000
310
-
311
- if is_question and high_reach:
312
- return "both"
313
- if high_reach:
314
- return "strategic"
315
- if is_question:
316
- return "reply"
317
- return "reply"
318
-
319
-
320
- # -----------------------------------------------------------------------
321
- # xAI API key resolution
322
- # -----------------------------------------------------------------------
323
-
324
- def _get_xai_api_key() -> str:
325
- """Resolve xAI API key from env or .mcp.json."""
326
- key = os.environ.get("XAI_API_KEY", "")
327
- if key:
328
- return key
329
- # Try .mcp.json
330
- mcp_path = Path.home() / ".mcp.json"
331
- if not mcp_path.exists():
332
- mcp_path = Path("/root/.mcp.json")
333
- if mcp_path.exists():
334
- try:
335
- cfg = json.loads(mcp_path.read_text())
336
- key = (cfg.get("mcpServers", {})
337
- .get("xai", {})
338
- .get("env", {})
339
- .get("XAI_API_KEY", ""))
340
- if key:
341
- return key
342
- # Also check delimit server env
343
- key = (cfg.get("mcpServers", {})
344
- .get("delimit", {})
345
- .get("env", {})
346
- .get("XAI_API_KEY", ""))
347
- except Exception:
348
- pass
349
- return key
350
-
351
-
352
- # -----------------------------------------------------------------------
353
- # Platform scanners
354
- # -----------------------------------------------------------------------
355
-
356
- def _scan_x_twttr(queries: List[str], limit: int, known_fps: set) -> List[Dict]:
357
- """Scan X/Twitter via RapidAPI Twttr241 (free, structured data)."""
358
- api_key = _get_rapidapi_key()
359
- if not api_key:
360
- return []
361
-
362
- targets: List[Dict] = []
363
- combined_query = " OR ".join(queries[:5])
364
- encoded_q = urllib.parse.quote(combined_query)
365
- url = f"https://twitter241.p.rapidapi.com/search-v2?query={encoded_q}&type=Latest&count={limit}"
366
-
367
- req = urllib.request.Request(
368
- url,
369
- headers={
370
- "X-RapidAPI-Key": api_key,
371
- "X-RapidAPI-Host": "twitter241.p.rapidapi.com",
372
- "User-Agent": "Delimit/3.11.0",
373
- },
374
- )
375
-
376
- try:
377
- with urllib.request.urlopen(req, timeout=20) as resp:
378
- result = json.loads(resp.read())
379
-
380
- # Navigate: result.timeline.instructions[].entries[].content.itemContent.tweet_results.result
381
- instructions = (
382
- result.get("result", {})
383
- .get("timeline", {})
384
- .get("instructions", [])
385
- )
386
-
387
- for instruction in instructions:
388
- for entry in instruction.get("entries", []):
389
- tweet_result = (
390
- entry.get("content", {})
391
- .get("itemContent", {})
392
- .get("tweet_results", {})
393
- .get("result", {})
394
- )
395
- if not tweet_result:
396
- continue
397
-
398
- legacy = tweet_result.get("legacy", {})
399
- core = tweet_result.get("core", {})
400
- user_legacy = (
401
- core.get("user_results", {})
402
- .get("result", {})
403
- .get("legacy", {})
404
- )
405
-
406
- tweet_id = legacy.get("id_str", "")
407
- screen_name = user_legacy.get("screen_name", "")
408
- followers = user_legacy.get("followers_count", 0) or 0
409
- full_text = legacy.get("full_text", "")
410
- likes = legacy.get("favorite_count", 0) or 0
411
- retweets = legacy.get("retweet_count", 0) or 0
412
-
413
- if not tweet_id or not full_text:
414
- continue
415
-
416
- fp = f"x:{tweet_id}"
417
- if fp in known_fps:
418
- continue
419
-
420
- venture, confidence, rationale = _route_venture(full_text)
421
- if not venture:
422
- continue
423
-
424
- author = f"@{screen_name}" if screen_name else ""
425
- target = {
426
- "fingerprint": fp,
427
- "platform": "x",
428
- "source_id": tweet_id,
429
- "canonical_url": f"https://x.com/{screen_name}/status/{tweet_id}" if screen_name else f"https://x.com/i/status/{tweet_id}",
430
- "author": author,
431
- "author_followers": followers,
432
- "content_snippet": full_text[:300],
433
- "venture": venture,
434
- "classification": _classify_target(full_text, followers),
435
- "confidence": confidence,
436
- "rationale": rationale,
437
- "manual_check_needed": False,
438
- "first_seen": datetime.now(timezone.utc).isoformat(),
439
- "status": "new",
440
- }
441
- targets.append(target)
442
- known_fps.add(fp)
443
-
444
- if len(targets) >= limit:
445
- break
446
- if len(targets) >= limit:
447
- break
448
-
449
- except Exception as e:
450
- logger.warning("Twttr241 scan failed: %s", e)
451
- return []
452
-
453
- return targets
454
-
455
-
456
- def _scan_x(queries: List[str], limit: int, known_fps: set, config: Optional[Dict] = None) -> List[Dict]:
457
- """Scan X/Twitter -- uses config to determine provider, falls back to xAI x_search."""
458
- platform_config = (config or {}).get("platforms", {}).get("x", {})
459
- provider = platform_config.get("provider", "twttr241")
460
-
461
- # Try Twttr241 first if configured (or default)
462
- if provider in ("twttr241", ""):
463
- targets = _scan_x_twttr(queries, limit, known_fps)
464
- if targets:
465
- return targets
466
-
467
- # Fallback or explicit xAI provider: xAI Responses API with x_search
468
- api_key = _get_xai_api_key()
469
- if not api_key:
470
- return [{"error": "No X scanner available (Twttr241 failed, XAI_API_KEY not configured)", "platform": "x"}]
471
-
472
- targets: List[Dict] = []
473
- # Batch queries to avoid too many API calls
474
- combined_query = " OR ".join(f'"{q}"' for q in queries[:5])
475
- prompt = (
476
- f"Search X/Twitter for recent posts about: {combined_query}. "
477
- f"Find up to {limit} posts from the last 24 hours that are asking questions, "
478
- f"sharing problems, or discussing these topics. "
479
- f"For each post, return the tweet ID, author handle, author follower count, "
480
- f"and a snippet of the content. Format as JSON array."
481
- )
482
-
483
- data = json.dumps({
484
- "model": "grok-4-0709",
485
- "tools": [{"type": "x_search"}],
486
- "messages": [{"role": "user", "content": prompt}],
487
- "temperature": 0.3,
488
- "max_tokens": 4096,
489
- }).encode()
490
-
491
- req = urllib.request.Request(
492
- "https://api.x.ai/v1/responses",
493
- data=data,
494
- headers={
495
- "Authorization": f"Bearer {api_key}",
496
- "Content-Type": "application/json",
497
- "User-Agent": "Delimit/3.11.0",
498
- },
499
- method="POST",
500
- )
501
-
502
- try:
503
- with urllib.request.urlopen(req, timeout=30) as resp:
504
- result = json.loads(resp.read())
505
-
506
- # Parse the response -- xAI Responses API returns output array
507
- response_text = ""
508
- if isinstance(result, dict):
509
- # Responses API format: result has "output" array
510
- for item in result.get("output", []):
511
- if item.get("type") == "message":
512
- for content in item.get("content", []):
513
- if content.get("type") == "output_text":
514
- response_text = content.get("text", "")
515
- break
516
- # Fallback: chat completions format
517
- if not response_text:
518
- for choice in result.get("choices", []):
519
- msg = choice.get("message", {})
520
- response_text = msg.get("content", "")
521
- if response_text:
522
- break
523
-
524
- if not response_text:
525
- logger.warning("xAI returned empty response for social targeting")
526
- return targets
527
-
528
- # Try to extract JSON from response
529
- parsed_tweets = _extract_json_array(response_text)
530
- for tweet in parsed_tweets[:limit]:
531
- tweet_id = str(tweet.get("id", tweet.get("tweet_id", "")))
532
- author = tweet.get("author", tweet.get("handle", tweet.get("username", "")))
533
- if author and not author.startswith("@"):
534
- author = f"@{author}"
535
- snippet = tweet.get("content", tweet.get("text", tweet.get("snippet", "")))
536
- followers = int(tweet.get("followers", tweet.get("author_followers", tweet.get("follower_count", 0))))
537
-
538
- fp = f"x:{tweet_id}"
539
- if fp in known_fps or not tweet_id:
540
- continue
541
-
542
- venture, confidence, rationale = _route_venture(snippet)
543
- if not venture:
544
- continue
545
-
546
- target = {
547
- "fingerprint": fp,
548
- "platform": "x",
549
- "source_id": tweet_id,
550
- "canonical_url": f"https://x.com/{author.lstrip('@')}/status/{tweet_id}" if author else f"https://x.com/i/status/{tweet_id}",
551
- "author": author,
552
- "author_followers": followers,
553
- "content_snippet": snippet[:300],
554
- "venture": venture,
555
- "classification": _classify_target(snippet, followers),
556
- "confidence": confidence,
557
- "rationale": rationale,
558
- "manual_check_needed": False,
559
- "first_seen": datetime.now(timezone.utc).isoformat(),
560
- "status": "new",
561
- }
562
- targets.append(target)
563
- known_fps.add(fp)
564
-
565
- except urllib.error.HTTPError as e:
566
- logger.error("xAI API error: %s %s", e.code, e.reason)
567
- # Try to read error body for details
568
- try:
569
- err_body = e.read().decode()[:200]
570
- logger.error("xAI error body: %s", err_body)
571
- except Exception:
572
- pass
573
- targets.append({"error": f"xAI API error: {e.code} {e.reason}", "platform": "x"})
574
- except urllib.error.URLError as e:
575
- logger.error("xAI connection error: %s", e.reason)
576
- targets.append({"error": f"xAI connection error: {e.reason}", "platform": "x"})
577
- except Exception as e:
578
- logger.error("xAI scan failed: %s", e)
579
- targets.append({"error": f"xAI scan error: {e}", "platform": "x"})
580
-
581
- return targets
582
-
583
-
584
- def _scan_hn(queries: List[str], limit: int, known_fps: set) -> List[Dict]:
585
- """Scan Hacker News via Algolia API."""
586
- targets: List[Dict] = []
587
-
588
- for query in queries[:3]: # Limit query count
589
- encoded_q = urllib.parse.quote(query)
590
- url = f"https://hn.algolia.com/api/v1/search_by_date?tags=story&query={encoded_q}&hitsPerPage={limit}"
591
- req = urllib.request.Request(
592
- url,
593
- headers={"User-Agent": "Delimit/3.11.0"},
594
- )
595
- try:
596
- with urllib.request.urlopen(req, timeout=15) as resp:
597
- data = json.loads(resp.read())
598
-
599
- for hit in data.get("hits", [])[:limit]:
600
- story_id = str(hit.get("objectID", ""))
601
- fp = f"hn:{story_id}"
602
- if fp in known_fps or not story_id:
603
- continue
604
-
605
- title = hit.get("title", "")
606
- author = hit.get("author", "")
607
- points = hit.get("points", 0) or 0
608
- snippet = title
609
-
610
- venture, confidence, rationale = _route_venture(title)
611
- if not venture:
612
- continue
613
-
614
- target = {
615
- "fingerprint": fp,
616
- "platform": "hn",
617
- "source_id": story_id,
618
- "canonical_url": f"https://news.ycombinator.com/item?id={story_id}",
619
- "author": author,
620
- "author_followers": points, # Use points as proxy for reach
621
- "content_snippet": snippet[:300],
622
- "venture": venture,
623
- "classification": _classify_target(snippet, points),
624
- "confidence": confidence,
625
- "rationale": rationale,
626
- "manual_check_needed": False,
627
- "first_seen": datetime.now(timezone.utc).isoformat(),
628
- "status": "new",
629
- }
630
- targets.append(target)
631
- known_fps.add(fp)
632
-
633
- if len(targets) >= limit:
634
- break
635
-
636
- except Exception as e:
637
- logger.error("HN scan error for query '%s': %s", query, e)
638
- continue
639
-
640
- if len(targets) >= limit:
641
- break
642
-
643
- return targets
644
-
645
-
646
- def _scan_devto(queries: List[str], limit: int, known_fps: set) -> List[Dict]:
647
- """Scan Dev.to for recent articles matching venture topics."""
648
- targets: List[Dict] = []
649
-
650
- for query in queries[:3]:
651
- # Dev.to API uses tag-based search
652
- tag = query.lower().replace(" ", "").replace("-", "")[:20]
653
- url = f"https://dev.to/api/articles?tag={urllib.parse.quote(tag)}&top=1&per_page={limit}"
654
- req = urllib.request.Request(
655
- url,
656
- headers={"User-Agent": "Delimit/3.11.0"},
657
- )
658
- try:
659
- with urllib.request.urlopen(req, timeout=15) as resp:
660
- articles = json.loads(resp.read())
661
-
662
- if not isinstance(articles, list):
663
- continue
664
-
665
- for article in articles[:limit]:
666
- article_id = str(article.get("id", ""))
667
- fp = f"devto:{article_id}"
668
- if fp in known_fps or not article_id:
669
- continue
670
-
671
- title = article.get("title", "")
672
- description = article.get("description", "")
673
- author = article.get("user", {}).get("username", "")
674
- reactions = article.get("positive_reactions_count", 0) or 0
675
- snippet = f"{title} - {description}"
676
-
677
- venture, confidence, rationale = _route_venture(snippet)
678
- if not venture:
679
- continue
680
-
681
- target = {
682
- "fingerprint": fp,
683
- "platform": "devto",
684
- "source_id": article_id,
685
- "canonical_url": article.get("url", f"https://dev.to/{author}/{article.get('slug', article_id)}"),
686
- "author": author,
687
- "author_followers": reactions,
688
- "content_snippet": snippet[:300],
689
- "venture": venture,
690
- "classification": _classify_target(snippet, reactions),
691
- "confidence": confidence,
692
- "rationale": rationale,
693
- "manual_check_needed": False,
694
- "first_seen": datetime.now(timezone.utc).isoformat(),
695
- "status": "new",
696
- }
697
- targets.append(target)
698
- known_fps.add(fp)
699
-
700
- if len(targets) >= limit:
701
- break
702
-
703
- except Exception as e:
704
- logger.error("Dev.to scan error for tag '%s': %s", tag, e)
705
- continue
706
-
707
- if len(targets) >= limit:
708
- break
709
-
710
- return targets
711
-
712
-
713
- def _gh_api(endpoint: str) -> Any:
714
- """Call GitHub API via the gh CLI. Returns parsed JSON or None on failure."""
715
- try:
716
- proc = subprocess.run(
717
- ["gh", "api", endpoint],
718
- capture_output=True,
719
- text=True,
720
- timeout=30,
721
- )
722
- if proc.returncode != 0:
723
- logger.warning("gh api %s failed: %s", endpoint, proc.stderr[:200])
724
- return None
725
- return json.loads(proc.stdout)
726
- except subprocess.TimeoutExpired:
727
- logger.error("gh api %s timed out", endpoint)
728
- return None
729
- except (json.JSONDecodeError, FileNotFoundError) as e:
730
- logger.error("gh api %s error: %s", endpoint, e)
731
- return None
732
-
733
-
734
- def _scan_github(queries: List[str], limit: int, known_fps: set, config: Optional[Dict] = None) -> List[Dict]:
735
- """Scan GitHub for repos and issues matching venture topics via gh CLI."""
736
- targets: List[Dict] = []
737
-
738
- # Phase 1: Repository search
739
- for query in queries[:5]:
740
- if len(targets) >= limit:
741
- break
742
- encoded_q = urllib.parse.quote(query)
743
- endpoint = f"search/repositories?q={encoded_q}&sort=updated&per_page={min(limit, 10)}"
744
- data = _gh_api(endpoint)
745
- if not data or not isinstance(data, dict):
746
- continue
747
-
748
- for repo in data.get("items", []):
749
- full_name = repo.get("full_name", "")
750
- fp = f"github:repo:{full_name}"
751
- if fp in known_fps or not full_name:
752
- continue
753
-
754
- stars = repo.get("stargazers_count", 0) or 0
755
- description = repo.get("description", "") or ""
756
-
757
- # Skip noise: 0 stars and no description
758
- if stars == 0 and not description:
759
- continue
760
-
761
- snippet = f"{full_name}: {description}"
762
- venture, confidence, rationale = _route_venture(snippet)
763
- if not venture:
764
- continue
765
-
766
- target = {
767
- "fingerprint": fp,
768
- "platform": "github",
769
- "source_id": full_name,
770
- "canonical_url": repo.get("html_url", f"https://github.com/{full_name}"),
771
- "author": repo.get("owner", {}).get("login", ""),
772
- "author_followers": stars,
773
- "content_snippet": snippet[:300],
774
- "venture": venture,
775
- "classification": _classify_target(snippet, stars),
776
- "confidence": confidence,
777
- "rationale": f"repo search: {rationale}",
778
- "manual_check_needed": False,
779
- "first_seen": datetime.now(timezone.utc).isoformat(),
780
- "status": "new",
781
- }
782
- targets.append(target)
783
- known_fps.add(fp)
784
-
785
- if len(targets) >= limit:
786
- break
787
-
788
- # Phase 2: Issue/discussion search
789
- for query in queries[:3]:
790
- if len(targets) >= limit:
791
- break
792
- encoded_q = urllib.parse.quote(query)
793
- endpoint = f"search/issues?q={encoded_q}&sort=created&per_page={min(limit, 10)}"
794
- data = _gh_api(endpoint)
795
- if not data or not isinstance(data, dict):
796
- continue
797
-
798
- for issue in data.get("items", []):
799
- number = issue.get("number", "")
800
- html_url = issue.get("html_url", "")
801
- # Extract repo from URL: https://github.com/owner/repo/issues/123
802
- repo_name = "/".join(html_url.split("/")[3:5]) if html_url else ""
803
- fp = f"github:issue:{repo_name}:{number}"
804
- if fp in known_fps or not number:
805
- continue
806
-
807
- title = issue.get("title", "")
808
- body = (issue.get("body") or "")[:200]
809
- author = issue.get("user", {}).get("login", "")
810
- reactions = issue.get("reactions", {}).get("total_count", 0) or 0
811
- snippet = f"{title} {body}".strip()
812
-
813
- venture, confidence, rationale = _route_venture(snippet)
814
- if not venture:
815
- continue
816
-
817
- target = {
818
- "fingerprint": fp,
819
- "platform": "github",
820
- "source_id": f"{repo_name}#{number}",
821
- "canonical_url": html_url,
822
- "author": author,
823
- "author_followers": reactions,
824
- "content_snippet": snippet[:300],
825
- "venture": venture,
826
- "classification": _classify_target(snippet, reactions),
827
- "confidence": confidence,
828
- "rationale": f"issue search: {rationale}",
829
- "manual_check_needed": False,
830
- "first_seen": datetime.now(timezone.utc).isoformat(),
831
- "status": "new",
832
- }
833
- targets.append(target)
834
- known_fps.add(fp)
835
-
836
- if len(targets) >= limit:
837
- break
838
-
839
- return targets
840
-
841
-
842
- def _monitor_own_repos(known_fps: set) -> List[Dict]:
843
- """Monitor our own repos for external engagement (forks, stars, issues, PRs)."""
844
- targets: List[Dict] = []
845
-
846
- for repo in OWN_REPOS:
847
- # Check forks
848
- forks_data = _gh_api(f"repos/{repo}/forks?sort=newest&per_page=10")
849
- if isinstance(forks_data, list):
850
- for fork in forks_data:
851
- user = fork.get("owner", {}).get("login", "")
852
- if user in INTERNAL_USERS or not user:
853
- continue
854
- fp = f"github:fork:{user}:{repo.split('/')[-1]}"
855
- if fp in known_fps:
856
- continue
857
-
858
- targets.append({
859
- "fingerprint": fp,
860
- "platform": "github",
861
- "source_id": fork.get("full_name", ""),
862
- "canonical_url": fork.get("html_url", ""),
863
- "author": user,
864
- "author_followers": fork.get("stargazers_count", 0) or 0,
865
- "content_snippet": f"{user} forked {repo}",
866
- "venture": "delimit",
867
- "classification": "strategic",
868
- "confidence": 0.7,
869
- "rationale": f"External fork of {repo}",
870
- "manual_check_needed": False,
871
- "first_seen": datetime.now(timezone.utc).isoformat(),
872
- "status": "new",
873
- })
874
- known_fps.add(fp)
875
-
876
- # Check stargazers (with timestamps)
877
- stars_data = _gh_api(
878
- f"repos/{repo}/stargazers?per_page=10"
879
- "&-H='Accept: application/vnd.github.star+json'"
880
- )
881
- # gh api may return list of user objects or star+json objects
882
- if isinstance(stars_data, list):
883
- for star in stars_data:
884
- # star+json format has "user" key; plain format is the user directly
885
- user_obj = star.get("user", star) if isinstance(star, dict) else {}
886
- user = user_obj.get("login", "")
887
- if user in INTERNAL_USERS or not user:
888
- continue
889
- fp = f"github:star:{user}:{repo.split('/')[-1]}"
890
- if fp in known_fps:
891
- continue
892
-
893
- targets.append({
894
- "fingerprint": fp,
895
- "platform": "github",
896
- "source_id": f"{user}/star/{repo}",
897
- "canonical_url": f"https://github.com/{user}",
898
- "author": user,
899
- "author_followers": 0,
900
- "content_snippet": f"{user} starred {repo}",
901
- "venture": "delimit",
902
- "classification": "strategic",
903
- "confidence": 0.6,
904
- "rationale": f"External star on {repo}",
905
- "manual_check_needed": False,
906
- "first_seen": datetime.now(timezone.utc).isoformat(),
907
- "status": "new",
908
- })
909
- known_fps.add(fp)
910
-
911
- # Check issues and PRs from external users
912
- issues_data = _gh_api(f"repos/{repo}/issues?state=all&sort=created&direction=desc&per_page=10")
913
- if isinstance(issues_data, list):
914
- for issue in issues_data:
915
- user = issue.get("user", {}).get("login", "")
916
- if user in INTERNAL_USERS or not user:
917
- continue
918
- number = issue.get("number", "")
919
- fp = f"github:issue:{repo}:{number}"
920
- if fp in known_fps or not number:
921
- continue
922
-
923
- title = issue.get("title", "")
924
- is_pr = "pull_request" in issue
925
- kind = "PR" if is_pr else "issue"
926
-
927
- targets.append({
928
- "fingerprint": fp,
929
- "platform": "github",
930
- "source_id": f"{repo}#{number}",
931
- "canonical_url": issue.get("html_url", ""),
932
- "author": user,
933
- "author_followers": issue.get("reactions", {}).get("total_count", 0) or 0,
934
- "content_snippet": f"{user} opened {kind}: {title}"[:300],
935
- "venture": "delimit",
936
- "classification": "reply",
937
- "confidence": 0.8,
938
- "rationale": f"External {kind} on {repo}",
939
- "manual_check_needed": False,
940
- "first_seen": datetime.now(timezone.utc).isoformat(),
941
- "status": "new",
942
- })
943
- known_fps.add(fp)
944
-
945
- return targets
946
-
947
-
948
- def _get_rapidapi_key() -> str:
949
- """Load RapidAPI key from secrets broker or env."""
950
- import base64
951
- # Primary: delimit secrets broker
952
- secrets_file = Path.home() / ".delimit" / "secrets" / "rapidapi-reddit.json"
953
- if secrets_file.exists():
954
- try:
955
- data = json.loads(secrets_file.read_text())
956
- encrypted = data.get("encrypted_value", "")
957
- if encrypted:
958
- return base64.b64decode(encrypted).decode()
959
- return data.get("value", "")
960
- except Exception:
961
- pass
962
- return os.environ.get("RAPIDAPI_KEY", "")
963
-
964
-
965
- # Subreddits to scan per venture
966
- # Keep total under 30 subs to stay well under rate limits (~1 req/sub/scan)
967
- VENTURE_SUBREDDITS = {
968
- "delimit": [
969
- "ClaudeAI", "vibecoding", "devops", "programming",
970
- "AI_Agents", "ContextEngineering", "cursor",
971
- "LocalLLaMA", "SaaS", "opensource",
972
- # "ChatGPTCoding", # requires high karma to post
973
- ],
974
- "domainvested": [
975
- "Domains", "flipping", "Entrepreneur", "SideProject",
976
- ],
977
- "wirereport": [
978
- "sportsbook", "sportsbetting",
979
- ],
980
- "livetube": [
981
- "Twitch", "livestreaming",
982
- ],
983
- "stakeone": [
984
- "harmony_one", "CryptoCurrency", "defi",
985
- ],
986
- }
987
-
988
-
989
- # Internal-only Reddit proxy via SSH tunnel to residential IP.
990
- # This is NOT shipped to external users — it only runs on the founder's gateway server.
991
- # External users would configure their own Reddit API credentials.
992
- REDDIT_PROXY = os.environ.get("DELIMIT_REDDIT_PROXY", "http://127.0.0.1:4819/reddit-fetch")
993
-
994
-
995
- def _scan_reddit(queries: List[str], limit: int, known_fps: set, config: Optional[Dict] = None) -> List[Dict]:
996
- """Scan Reddit via residential proxy (SSH tunnel) or RapidAPI fallback.
997
-
998
- Provider selection via config:
999
- - "proxy": try residential proxy first, fall back to RapidAPI
1000
- - "rapidapi": use RapidAPI Reddit34 directly
1001
- - "json_api": always try direct JSON (may fail from datacenter IPs)
1002
- """
1003
- platform_config = (config or {}).get("platforms", {}).get("reddit", {})
1004
- provider = platform_config.get("provider", "proxy")
1005
-
1006
- # Merge subreddits from config with defaults
1007
- config_subreddits = (config or {}).get("subreddits", {})
1008
- if config_subreddits:
1009
- # Temporarily override VENTURE_SUBREDDITS for this scan
1010
- merged = dict(VENTURE_SUBREDDITS)
1011
- for venture, subs in config_subreddits.items():
1012
- if venture in merged:
1013
- merged[venture] = list(set(merged[venture] + subs))
1014
- else:
1015
- merged[venture] = subs
1016
- # We pass the merged subs to the proxy/rapidapi scanners via the module-level dict
1017
- # This is safe since scans are single-threaded
1018
- _original_subs = dict(VENTURE_SUBREDDITS)
1019
- VENTURE_SUBREDDITS.update(merged)
1020
-
1021
- try:
1022
- if provider == "rapidapi":
1023
- api_key = _get_rapidapi_key()
1024
- if not api_key:
1025
- return _manual_check_targets("reddit", queries, limit)
1026
- return _scan_reddit_rapidapi(queries, limit, known_fps, api_key)
1027
-
1028
- # Default: try proxy first, fall back to RapidAPI
1029
- proxy_available = _test_reddit_proxy()
1030
- if not proxy_available:
1031
- api_key = _get_rapidapi_key()
1032
- if not api_key:
1033
- logger.warning("No Reddit access -- proxy down, no RapidAPI key")
1034
- return _manual_check_targets("reddit", queries, limit)
1035
- return _scan_reddit_rapidapi(queries, limit, known_fps, api_key)
1036
-
1037
- return _scan_reddit_proxy(queries, limit, known_fps)
1038
- finally:
1039
- # Restore original subreddits if we merged
1040
- if config_subreddits:
1041
- VENTURE_SUBREDDITS.clear()
1042
- VENTURE_SUBREDDITS.update(_original_subs)
1043
-
1044
-
1045
- def _test_reddit_proxy() -> bool:
1046
- """Check if residential Reddit proxy is available."""
1047
- try:
1048
- req = urllib.request.Request(f"{REDDIT_PROXY.rsplit('/reddit-fetch', 1)[0]}/health", headers={"User-Agent": "Delimit"})
1049
- with urllib.request.urlopen(req, timeout=3) as resp:
1050
- data = json.loads(resp.read())
1051
- return data.get("reddit_proxy", False)
1052
- except Exception:
1053
- return False
1054
-
1055
-
1056
- def _scan_reddit_proxy(queries: List[str], limit: int, known_fps: set) -> List[Dict]:
1057
- """Scan Reddit via residential IP proxy (free, unlimited)."""
1058
- targets: List[Dict] = []
1059
-
1060
- scanned_subs: set = set()
1061
- for venture, subs in VENTURE_SUBREDDITS.items():
1062
- for sub in subs:
1063
- if sub in scanned_subs or len(targets) >= limit:
1064
- break
1065
- scanned_subs.add(sub)
1066
-
1067
- # Scan both /new and /hot to catch high-engagement older posts
1068
- for sort in ("new", "hot"):
1069
- if len(targets) >= limit:
1070
- break
1071
- reddit_url = f"https://www.reddit.com/r/{sub}/{sort}.json?limit={min(limit, 10)}"
1072
- proxy_url = f"{REDDIT_PROXY}?url={urllib.parse.quote(reddit_url, safe='')}"
1073
- req = urllib.request.Request(proxy_url, headers={"User-Agent": "Delimit/3.11.0"})
1074
- try:
1075
- with urllib.request.urlopen(req, timeout=15) as resp:
1076
- result = json.loads(resp.read())
1077
-
1078
- posts = result.get("data", {}).get("children", [])
1079
- for post_wrapper in posts:
1080
- post = post_wrapper.get("data", {})
1081
- post_id = post.get("id", "")
1082
- fp = f"reddit:{post_id}"
1083
- if fp in known_fps or not post_id:
1084
- continue
1085
-
1086
- title = post.get("title", "")
1087
- selftext = post.get("selftext", "")[:200]
1088
- author = post.get("author", "")
1089
- score = post.get("score", 0) or 0
1090
- num_comments = post.get("num_comments", 0) or 0
1091
- permalink = post.get("permalink", "")
1092
- snippet = f"{title} {selftext}".strip()
1093
-
1094
- venture_match, confidence, rationale = _route_venture(snippet)
1095
- if not venture_match:
1096
- continue
1097
- if score < 1 and num_comments < 2:
1098
- continue
1099
-
1100
- target = {
1101
- "fingerprint": fp,
1102
- "platform": "reddit",
1103
- "source_id": post_id,
1104
- "canonical_url": f"https://reddit.com{permalink}" if permalink else "",
1105
- "author": f"u/{author}",
1106
- "author_followers": score,
1107
- "content_snippet": snippet[:300],
1108
- "venture": venture_match,
1109
- "classification": _classify_target(snippet, num_comments),
1110
- "confidence": confidence,
1111
- "rationale": f"r/{sub}/{sort}: {rationale}",
1112
- "manual_check_needed": False,
1113
- "first_seen": datetime.now(timezone.utc).isoformat(),
1114
- "status": "new",
1115
- }
1116
- targets.append(target)
1117
- known_fps.add(fp)
1118
-
1119
- if len(targets) >= limit:
1120
- break
1121
-
1122
- except Exception as e:
1123
- logger.error("Reddit proxy scan error for r/%s/%s: %s", sub, sort, e)
1124
- continue
1125
-
1126
- return targets
1127
-
1128
-
1129
- def _scan_reddit_rapidapi(queries: List[str], limit: int, known_fps: set, api_key: str) -> List[Dict]:
1130
- """Fallback: Scan Reddit via RapidAPI Reddit34."""
1131
-
1132
- targets: List[Dict] = []
1133
-
1134
- # Scan subreddits mapped to ventures
1135
- scanned_subs: set = set()
1136
- for venture, subs in VENTURE_SUBREDDITS.items():
1137
- for sub in subs:
1138
- if sub in scanned_subs or len(targets) >= limit:
1139
- break
1140
- scanned_subs.add(sub)
1141
-
1142
- url = f"https://reddit34.p.rapidapi.com/getPostsBySubreddit?subreddit={urllib.parse.quote(sub)}&sort=new&limit={min(limit, 10)}"
1143
- req = urllib.request.Request(
1144
- url,
1145
- headers={
1146
- "X-RapidAPI-Key": api_key,
1147
- "X-RapidAPI-Host": "reddit34.p.rapidapi.com",
1148
- "User-Agent": "Delimit/3.11.0",
1149
- },
1150
- )
1151
- try:
1152
- with urllib.request.urlopen(req, timeout=20) as resp:
1153
- result = json.loads(resp.read())
1154
-
1155
- if not result.get("success"):
1156
- logger.warning("Reddit34 returned success=false for r/%s", sub)
1157
- continue
1158
-
1159
- posts = result.get("data", {}).get("posts", [])
1160
- for post_wrapper in posts:
1161
- post = post_wrapper.get("data", post_wrapper)
1162
- post_id = post.get("id", "")
1163
- fp = f"reddit:{post_id}"
1164
- if fp in known_fps or not post_id:
1165
- continue
1166
-
1167
- title = post.get("title", "")
1168
- selftext = post.get("selftext", "")[:200]
1169
- author = post.get("author", "")
1170
- score = post.get("score", 0) or 0
1171
- num_comments = post.get("num_comments", 0) or 0
1172
- permalink = post.get("permalink", "")
1173
- snippet = f"{title} {selftext}".strip()
1174
-
1175
- venture_match, confidence, rationale = _route_venture(snippet)
1176
- if not venture_match:
1177
- continue
1178
-
1179
- # Skip low-engagement posts
1180
- if score < 1 and num_comments < 2:
1181
- continue
1182
-
1183
- target = {
1184
- "fingerprint": fp,
1185
- "platform": "reddit",
1186
- "source_id": post_id,
1187
- "canonical_url": f"https://reddit.com{permalink}" if permalink else "",
1188
- "author": f"u/{author}",
1189
- "author_followers": score,
1190
- "content_snippet": snippet[:300],
1191
- "venture": venture_match,
1192
- "classification": _classify_target(snippet, num_comments),
1193
- "confidence": confidence,
1194
- "rationale": f"r/{sub}: {rationale}",
1195
- "manual_check_needed": False,
1196
- "first_seen": datetime.now(timezone.utc).isoformat(),
1197
- "status": "new",
1198
- }
1199
- targets.append(target)
1200
- known_fps.add(fp)
1201
-
1202
- if len(targets) >= limit:
1203
- break
1204
-
1205
- except Exception as e:
1206
- logger.error("Reddit scan error for r/%s: %s", sub, e)
1207
- continue
1208
-
1209
- # Phase 2: keyword search across all of Reddit via getSearchPosts
1210
- if len(targets) < limit:
1211
- search_queries = queries[:3] # Top 3 venture topic queries
1212
- for query in search_queries:
1213
- if len(targets) >= limit:
1214
- break
1215
- search_url = (
1216
- f"https://reddit34.p.rapidapi.com/getSearchPosts"
1217
- f"?query={urllib.parse.quote(query)}&sort=new&limit={min(limit, 5)}"
1218
- )
1219
- req = urllib.request.Request(
1220
- search_url,
1221
- headers={
1222
- "X-RapidAPI-Key": api_key,
1223
- "X-RapidAPI-Host": "reddit34.p.rapidapi.com",
1224
- "User-Agent": "Delimit/3.11.0",
1225
- },
1226
- )
1227
- try:
1228
- with urllib.request.urlopen(req, timeout=20) as resp:
1229
- result = json.loads(resp.read())
1230
-
1231
- if not result.get("success"):
1232
- continue
1233
-
1234
- posts = result.get("data", {}).get("posts", [])
1235
- for post_wrapper in posts:
1236
- post = post_wrapper.get("data", post_wrapper)
1237
- post_id = post.get("id", "")
1238
- fp = f"reddit:{post_id}"
1239
- if fp in known_fps or not post_id:
1240
- continue
1241
-
1242
- title = post.get("title", "")
1243
- selftext = post.get("selftext", "")[:200]
1244
- author = post.get("author", "")
1245
- sub = post.get("subreddit", "")
1246
- score = post.get("score", 0) or 0
1247
- num_comments = post.get("num_comments", 0) or 0
1248
- permalink = post.get("permalink", "")
1249
- snippet = f"{title} {selftext}".strip()
1250
-
1251
- venture_match, confidence, rationale = _route_venture(snippet)
1252
- if not venture_match:
1253
- continue
1254
- if score < 1 and num_comments < 2:
1255
- continue
1256
-
1257
- target = {
1258
- "fingerprint": fp,
1259
- "platform": "reddit",
1260
- "source_id": post_id,
1261
- "canonical_url": f"https://reddit.com{permalink}" if permalink else "",
1262
- "author": f"u/{author}",
1263
- "author_followers": score,
1264
- "content_snippet": snippet[:300],
1265
- "venture": venture_match,
1266
- "classification": _classify_target(snippet, num_comments),
1267
- "confidence": confidence,
1268
- "rationale": f"search:{query}: {rationale}",
1269
- "manual_check_needed": False,
1270
- "first_seen": datetime.now(timezone.utc).isoformat(),
1271
- "status": "new",
1272
- }
1273
- targets.append(target)
1274
- known_fps.add(fp)
1275
-
1276
- if len(targets) >= limit:
1277
- break
1278
- except Exception as e:
1279
- logger.error("Reddit search error for '%s': %s", query, e)
1280
- continue
1281
-
1282
- return targets
1283
-
1284
-
1285
- def _manual_check_targets(platform: str, queries: List[str], limit: int) -> List[Dict]:
1286
- """Return manual_check_needed placeholders for platforms we cannot scrape."""
1287
- targets = []
1288
- for query in queries[:3]:
1289
- venture, confidence, rationale = _route_venture(query)
1290
- targets.append({
1291
- "fingerprint": f"{platform}:manual:{query[:30]}",
1292
- "platform": platform,
1293
- "source_id": "",
1294
- "canonical_url": "",
1295
- "author": "",
1296
- "author_followers": 0,
1297
- "content_snippet": f"Search '{query}' on {platform}",
1298
- "venture": venture or "unknown",
1299
- "classification": "reply",
1300
- "confidence": 0.0,
1301
- "rationale": f"Manual check needed -- {platform} cannot be scanned server-side",
1302
- "manual_check_needed": True,
1303
- "first_seen": datetime.now(timezone.utc).isoformat(),
1304
- "status": "manual_check_needed",
1305
- })
1306
- return targets[:limit]
1307
-
1308
-
1309
- # -----------------------------------------------------------------------
1310
- # JSON extraction helper
1311
- # -----------------------------------------------------------------------
1312
-
1313
- def _extract_json_array(text: str) -> list:
1314
- """Best-effort extraction of a JSON array from LLM response text."""
1315
- # Try the whole text first
1316
- try:
1317
- parsed = json.loads(text)
1318
- if isinstance(parsed, list):
1319
- return parsed
1320
- if isinstance(parsed, dict):
1321
- return [parsed]
1322
- except (json.JSONDecodeError, ValueError):
1323
- pass
1324
- # Try to find [...] in the text
1325
- start = text.find("[")
1326
- if start != -1:
1327
- depth = 0
1328
- for i in range(start, len(text)):
1329
- if text[i] == "[":
1330
- depth += 1
1331
- elif text[i] == "]":
1332
- depth -= 1
1333
- if depth == 0:
1334
- try:
1335
- return json.loads(text[start:i + 1])
1336
- except (json.JSONDecodeError, ValueError):
1337
- break
1338
- return []
1339
-
1340
-
1341
- # -----------------------------------------------------------------------
1342
- # Public API
1343
- # -----------------------------------------------------------------------
1344
-
1345
- def scan_targets(
1346
- platforms: List[str],
1347
- ventures: Optional[List[str]] = None,
1348
- keywords: Optional[List[str]] = None,
1349
- limit: int = 10,
1350
- ) -> List[Dict]:
1351
- """Discover engagement opportunities across platforms.
1352
-
1353
- Args:
1354
- platforms: List of platform names to scan (x, hn, devto, reddit, namepros).
1355
- ventures: Filter to specific ventures. None = all.
1356
- keywords: Extra keywords beyond venture topics.
1357
- limit: Max targets per platform.
1358
-
1359
- Returns:
1360
- List of target dicts with fingerprint, classification, and routing.
1361
- """
1362
- scan_config = _load_config()
1363
- known_fps = _load_known_fingerprints()
1364
-
1365
- # Use config scan_limit as default if limit not explicitly overridden
1366
- effective_limit = limit or scan_config.get("scan_limit", 10)
1367
-
1368
- # Build query list from venture topics + extra keywords
1369
- queries: List[str] = []
1370
- active_ventures = ventures or list(VENTURE_CONFIG.keys())
1371
- for v in active_ventures:
1372
- vc = VENTURE_CONFIG.get(v)
1373
- if vc:
1374
- queries.extend(vc["topics"])
1375
- if keywords:
1376
- queries.extend(keywords)
1377
-
1378
- # Deduplicate queries
1379
- seen_q: set = set()
1380
- unique_queries: List[str] = []
1381
- for q in queries:
1382
- q_lower = q.lower()
1383
- if q_lower not in seen_q:
1384
- seen_q.add(q_lower)
1385
- unique_queries.append(q)
1386
-
1387
- all_targets: List[Dict] = []
1388
- platform_configs = scan_config.get("platforms", {})
1389
-
1390
- for platform in platforms:
1391
- platform = platform.strip().lower()
1392
-
1393
- # Check if platform is enabled in config
1394
- plat_cfg = platform_configs.get(platform, {})
1395
- if not plat_cfg.get("enabled", True):
1396
- logger.info("Platform '%s' is disabled in config, skipping", platform)
1397
- continue
1398
-
1399
- try:
1400
- if platform == "x":
1401
- targets = _scan_x(unique_queries, effective_limit, known_fps, config=scan_config)
1402
- elif platform == "hn":
1403
- targets = _scan_hn(unique_queries, effective_limit, known_fps)
1404
- elif platform == "devto":
1405
- targets = _scan_devto(unique_queries, effective_limit, known_fps)
1406
- elif platform == "reddit":
1407
- targets = _scan_reddit(unique_queries, effective_limit, known_fps, config=scan_config)
1408
- elif platform == "github":
1409
- targets = _scan_github(unique_queries, effective_limit, known_fps, config=scan_config)
1410
- targets.extend(_monitor_own_repos(known_fps))
1411
- elif platform == "namepros":
1412
- targets = _manual_check_targets(platform, unique_queries, effective_limit)
1413
- else:
1414
- logger.warning("Unknown platform: %s", platform)
1415
- continue
1416
-
1417
- # Filter by venture if specified
1418
- if ventures:
1419
- targets = [t for t in targets if t.get("venture") in ventures or t.get("error")]
1420
-
1421
- all_targets.extend(targets)
1422
- except Exception as e:
1423
- logger.error("Platform scan error (%s): %s", platform, e)
1424
- all_targets.append({"error": f"Scan failed for {platform}: {e}", "platform": platform})
1425
-
1426
- # Persist new non-error targets
1427
- for t in all_targets:
1428
- if not t.get("error") and not t.get("manual_check_needed"):
1429
- _append_target(t)
1430
-
1431
- return all_targets
1432
-
1433
-
1434
- def process_targets(
1435
- targets: List[Dict],
1436
- draft_replies: bool = False,
1437
- create_ledger: bool = False,
1438
- ) -> Dict[str, Any]:
1439
- """Process discovered targets: draft social replies and/or create ledger items.
1440
-
1441
- Args:
1442
- targets: List of target dicts from scan_targets.
1443
- draft_replies: If True, auto-draft social posts for "reply" targets.
1444
- create_ledger: If True, return ledger item dicts for "strategic" targets.
1445
-
1446
- Returns:
1447
- Dict with drafted and ledger_items lists.
1448
- """
1449
- result: Dict[str, Any] = {"drafted": [], "ledger_items": []}
1450
-
1451
- for target in targets:
1452
- if target.get("error") or target.get("manual_check_needed"):
1453
- continue
1454
-
1455
- classification = target.get("classification", "reply")
1456
-
1457
- if draft_replies and classification in ("reply", "both"):
1458
- try:
1459
- from ai.social import save_draft
1460
- venture = target.get("venture", "delimit")
1461
- url = target.get("canonical_url", "")
1462
- snippet = target.get("content_snippet", "")
1463
- author = target.get("author", "")
1464
-
1465
- draft_text = (
1466
- f"[DRAFT - needs human writing] "
1467
- f"Engagement opportunity for {venture}: "
1468
- f"{author} posted about {snippet[:100]}... "
1469
- f"URL: {url}"
1470
- )
1471
-
1472
- # Determine platform and account
1473
- platform = target.get("platform", "x")
1474
- if platform == "x":
1475
- social_platform = "twitter"
1476
- reply_to = target.get("source_id", "")
1477
- else:
1478
- social_platform = "twitter" # Drafts go to Twitter by default
1479
- reply_to = ""
1480
-
1481
- config = VENTURE_CONFIG.get(venture, {})
1482
- account = config.get("owned_accounts", ["delimit_ai"])[0]
1483
-
1484
- entry = save_draft(
1485
- draft_text,
1486
- platform=social_platform,
1487
- account=account,
1488
- reply_to_id=reply_to,
1489
- context=f"Social target: {target.get('rationale', '')}",
1490
- )
1491
- result["drafted"].append({
1492
- "draft_id": entry.get("draft_id"),
1493
- "fingerprint": target.get("fingerprint"),
1494
- "venture": venture,
1495
- })
1496
- except Exception as e:
1497
- logger.error("Failed to draft reply for %s: %s", target.get("fingerprint"), e)
1498
-
1499
- if create_ledger and classification in ("strategic", "both"):
1500
- venture = target.get("venture", "delimit")
1501
- ledger_item = {
1502
- "title": f"[{venture.upper()}] Engage: {target.get('author', 'unknown')} on {target.get('platform', '?')}",
1503
- "description": (
1504
- f"Source: {target.get('canonical_url', 'N/A')}\n"
1505
- f"Author: {target.get('author', 'unknown')} ({target.get('author_followers', 0)} followers)\n"
1506
- f"Snippet: {target.get('content_snippet', '')[:200]}\n"
1507
- f"Rationale: {target.get('rationale', '')}"
1508
- ),
1509
- "priority": VENTURE_CONFIG.get(venture, {}).get("priority", "P1"),
1510
- "tags": [venture, "social-target", target.get("platform", "")],
1511
- }
1512
- result["ledger_items"].append(ledger_item)
1513
-
1514
- return result
1515
-
1516
-
1517
- def list_targets(limit: int = 20) -> Dict[str, Any]:
1518
- """List recent targets from the JSONL store.
1519
-
1520
- Args:
1521
- limit: Max targets to return.
1522
-
1523
- Returns:
1524
- Dict with targets list and count.
1525
- """
1526
- if not TARGETS_FILE.exists():
1527
- return {"targets": [], "count": 0}
1528
-
1529
- targets: List[Dict] = []
1530
- lines = TARGETS_FILE.read_text().splitlines()
1531
- for line in reversed(lines):
1532
- if not line.strip():
1533
- continue
1534
- try:
1535
- entry = json.loads(line)
1536
- targets.append(entry)
1537
- if len(targets) >= limit:
1538
- break
1539
- except (json.JSONDecodeError, ValueError):
1540
- continue
1541
-
1542
- return {"targets": targets, "count": len(targets), "total_stored": len(lines)}
1543
-
1544
-
1545
- def get_stats() -> Dict[str, Any]:
1546
- """Get aggregate stats on discovered targets.
1547
-
1548
- Returns:
1549
- Dict with counts by platform, venture, classification, and status.
1550
- """
1551
- if not TARGETS_FILE.exists():
1552
- return {"total": 0, "by_platform": {}, "by_venture": {}, "by_classification": {}, "by_status": {}}
1553
-
1554
- by_platform: Dict[str, int] = {}
1555
- by_venture: Dict[str, int] = {}
1556
- by_classification: Dict[str, int] = {}
1557
- by_status: Dict[str, int] = {}
1558
- total = 0
1559
-
1560
- for line in TARGETS_FILE.read_text().splitlines():
1561
- if not line.strip():
1562
- continue
1563
- try:
1564
- entry = json.loads(line)
1565
- total += 1
1566
- p = entry.get("platform", "unknown")
1567
- v = entry.get("venture", "unknown")
1568
- c = entry.get("classification", "unknown")
1569
- s = entry.get("status", "unknown")
1570
- by_platform[p] = by_platform.get(p, 0) + 1
1571
- by_venture[v] = by_venture.get(v, 0) + 1
1572
- by_classification[c] = by_classification.get(c, 0) + 1
1573
- by_status[s] = by_status.get(s, 0) + 1
1574
- except (json.JSONDecodeError, ValueError):
1575
- continue
1576
-
1577
- return {
1578
- "total": total,
1579
- "by_platform": by_platform,
1580
- "by_venture": by_venture,
1581
- "by_classification": by_classification,
1582
- "by_status": by_status,
1583
- }