delimit-cli 3.14.27 → 3.14.29

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. package/bin/delimit-setup.js +19 -2
  2. package/gateway/ai/backends/deploy_bridge.py +56 -2
  3. package/gateway/ai/backends/gateway_core.py +212 -1
  4. package/gateway/ai/backends/generate_bridge.py +84 -13
  5. package/gateway/ai/backends/governance_bridge.py +63 -16
  6. package/gateway/ai/backends/memory_bridge.py +77 -76
  7. package/gateway/ai/backends/ops_bridge.py +76 -6
  8. package/gateway/ai/backends/os_bridge.py +23 -3
  9. package/gateway/ai/backends/repo_bridge.py +156 -17
  10. package/gateway/ai/backends/tools_design.py +116 -9
  11. package/gateway/ai/backends/tools_infra.py +200 -72
  12. package/gateway/ai/backends/tools_real.py +8 -0
  13. package/gateway/ai/backends/ui_bridge.py +115 -5
  14. package/gateway/ai/backends/vault_bridge.py +69 -114
  15. package/gateway/ai/content_engine.py +1276 -0
  16. package/gateway/ai/context_fs.py +193 -0
  17. package/gateway/ai/daemon.py +500 -0
  18. package/gateway/ai/data_plane.py +291 -0
  19. package/gateway/ai/deliberation.py +1033 -6
  20. package/gateway/ai/events.py +39 -0
  21. package/gateway/ai/founding_users.py +162 -0
  22. package/gateway/ai/governance.py +698 -4
  23. package/gateway/ai/inbox_daemon.py +78 -17
  24. package/gateway/ai/integrations/__init__.py +1 -0
  25. package/gateway/ai/integrations/opensage_wrapper.py +288 -0
  26. package/gateway/ai/key_resolver.py +95 -0
  27. package/gateway/ai/ledger_manager.py +289 -1
  28. package/gateway/ai/license.py +62 -4
  29. package/gateway/ai/license_core.py +208 -7
  30. package/gateway/ai/local_server.py +215 -0
  31. package/gateway/ai/loop_engine.py +408 -0
  32. package/gateway/ai/mcp_bridge.py +178 -0
  33. package/gateway/ai/release_sync.py +2 -2
  34. package/gateway/ai/screen_record.py +374 -0
  35. package/gateway/ai/secrets_broker.py +235 -0
  36. package/gateway/ai/social.py +189 -27
  37. package/gateway/ai/social_target.py +1635 -0
  38. package/gateway/ai/supabase_sync.py +190 -0
  39. package/gateway/ai/tracing.py +195 -0
  40. package/gateway/core/contract_ledger.py +1 -1
  41. package/gateway/core/dependency_graph.py +1 -1
  42. package/gateway/core/dependency_manifest.py +1 -1
  43. package/gateway/core/diff_engine_v2.py +272 -78
  44. package/gateway/core/event_backbone.py +2 -2
  45. package/gateway/core/event_schema.py +1 -1
  46. package/gateway/core/impact_analyzer.py +1 -1
  47. package/gateway/core/policy_engine.py +4 -0
  48. package/package.json +1 -1
@@ -0,0 +1,1635 @@
1
+ """Social targeting engine -- discover engagement opportunities across platforms.
2
+
3
+ Scans X (via xAI Responses API), Reddit (via RapidAPI Reddit34), Hacker News
4
+ (Algolia API), and Dev.to for posts where Jamsons ventures can genuinely engage.
5
+ NamePros is flagged as manual_check_needed (no API).
6
+
7
+ Targets are deduplicated via fingerprint and stored in append-only JSONL.
8
+ Platform configuration is user-configurable via ~/.delimit/social_target_config.json.
9
+ """
10
+
11
+ import copy
12
+ import json
13
+ import logging
14
+ import os
15
+ import subprocess
16
+ import urllib.request
17
+ import urllib.error
18
+ import urllib.parse
19
+ from datetime import datetime, timezone
20
+ from pathlib import Path
21
+ from typing import Any, Dict, List, Optional
22
+
23
+ logger = logging.getLogger("delimit.ai.social_target")
24
+
25
+ TARGETS_FILE = Path.home() / ".delimit" / "social_targets.jsonl"
26
+ SOCIAL_TARGET_CONFIG = Path.home() / ".delimit" / "social_target_config.json"
27
+
28
+ # -----------------------------------------------------------------------
29
+ # User-configurable platform config
30
+ # -----------------------------------------------------------------------
31
+
32
+ DEFAULT_CONFIG: Dict[str, Any] = {
33
+ "platforms": {
34
+ "x": {"enabled": True, "provider": "twttr241"},
35
+ "reddit": {"enabled": True, "provider": "proxy"},
36
+ "github": {"enabled": True, "provider": "gh_cli"},
37
+ "hn": {"enabled": True, "provider": "algolia"},
38
+ "devto": {"enabled": True, "provider": "public_api"},
39
+ "namepros": {"enabled": False, "provider": "manual"},
40
+ },
41
+ "subreddits": {},
42
+ "github_queries": {},
43
+ "scan_limit": 10,
44
+ "min_engagement": {"score": 1, "comments": 2},
45
+ }
46
+
47
+
48
+ def _deep_merge(base: dict, override: dict) -> dict:
49
+ """Recursively merge override into base. Override values win."""
50
+ result = copy.deepcopy(base)
51
+ for key, value in override.items():
52
+ if key in result and isinstance(result[key], dict) and isinstance(value, dict):
53
+ result[key] = _deep_merge(result[key], value)
54
+ else:
55
+ result[key] = copy.deepcopy(value)
56
+ return result
57
+
58
+
59
+ def _load_config() -> Dict[str, Any]:
60
+ """Load social target config from disk, merging with defaults.
61
+
62
+ - Loads from SOCIAL_TARGET_CONFIG if it exists
63
+ - Falls back to DEFAULT_CONFIG
64
+ - Merges user overrides with defaults (user config wins)
65
+ - Auto-detects available API keys and disables platforms with no access
66
+ """
67
+ config = copy.deepcopy(DEFAULT_CONFIG)
68
+
69
+ if SOCIAL_TARGET_CONFIG.exists():
70
+ try:
71
+ user_config = json.loads(SOCIAL_TARGET_CONFIG.read_text())
72
+ config = _deep_merge(config, user_config)
73
+ except (json.JSONDecodeError, ValueError, OSError) as e:
74
+ logger.warning("Failed to load social target config: %s", e)
75
+
76
+ # Auto-detect available platforms and disable those without access
77
+ detection = _detect_available_platforms()
78
+ for platform, info in detection.items():
79
+ if platform in config["platforms"]:
80
+ # Only auto-disable if no user override exists
81
+ if not SOCIAL_TARGET_CONFIG.exists():
82
+ config["platforms"][platform]["enabled"] = info["available"]
83
+ elif platform not in _load_user_platform_overrides():
84
+ config["platforms"][platform]["enabled"] = info["available"]
85
+
86
+ return config
87
+
88
+
89
+ def _load_user_platform_overrides() -> set:
90
+ """Return the set of platform names explicitly set in user config."""
91
+ if not SOCIAL_TARGET_CONFIG.exists():
92
+ return set()
93
+ try:
94
+ user_config = json.loads(SOCIAL_TARGET_CONFIG.read_text())
95
+ return set(user_config.get("platforms", {}).keys())
96
+ except (json.JSONDecodeError, ValueError, OSError):
97
+ return set()
98
+
99
+
100
+ def _detect_available_platforms() -> Dict[str, Dict[str, Any]]:
101
+ """Check which platforms have the necessary credentials/access.
102
+
103
+ Returns dict of platform -> {available: bool, provider: str, reason: str}.
104
+ """
105
+ result: Dict[str, Dict[str, Any]] = {}
106
+
107
+ # X/Twttr241: RapidAPI key exists?
108
+ rapidapi_key = _get_rapidapi_key()
109
+ if rapidapi_key:
110
+ result["x"] = {"available": True, "provider": "twttr241", "reason": "RapidAPI key found"}
111
+ else:
112
+ # Fallback: xAI API key?
113
+ xai_key = _get_xai_api_key()
114
+ if xai_key:
115
+ result["x"] = {"available": True, "provider": "xai", "reason": "xAI API key found (fallback)"}
116
+ else:
117
+ result["x"] = {"available": False, "provider": "none", "reason": "No RapidAPI or xAI API key"}
118
+
119
+ # Reddit: proxy or RapidAPI
120
+ proxy_url = os.environ.get("DELIMIT_REDDIT_PROXY", "")
121
+ if proxy_url:
122
+ result["reddit"] = {"available": True, "provider": "proxy", "reason": "DELIMIT_REDDIT_PROXY env set"}
123
+ elif _test_reddit_proxy():
124
+ result["reddit"] = {"available": True, "provider": "proxy", "reason": "Local proxy responding"}
125
+ elif rapidapi_key:
126
+ result["reddit"] = {"available": True, "provider": "rapidapi", "reason": "RapidAPI key found (fallback)"}
127
+ else:
128
+ result["reddit"] = {"available": False, "provider": "none", "reason": "No proxy or RapidAPI key"}
129
+
130
+ # GitHub: gh auth status
131
+ try:
132
+ proc = subprocess.run(
133
+ ["gh", "auth", "status"],
134
+ capture_output=True, text=True, timeout=10,
135
+ )
136
+ if proc.returncode == 0:
137
+ result["github"] = {"available": True, "provider": "gh_cli", "reason": "gh authenticated"}
138
+ else:
139
+ result["github"] = {"available": False, "provider": "gh_cli", "reason": "gh not authenticated"}
140
+ except (FileNotFoundError, subprocess.TimeoutExpired):
141
+ result["github"] = {"available": False, "provider": "gh_cli", "reason": "gh CLI not found"}
142
+
143
+ # HN: always available (public API, no auth)
144
+ result["hn"] = {"available": True, "provider": "algolia", "reason": "Public API, no auth needed"}
145
+
146
+ # Dev.to: always available (public API, no auth)
147
+ result["devto"] = {"available": True, "provider": "public_api", "reason": "Public API, no auth needed"}
148
+
149
+ # NamePros: manual only
150
+ result["namepros"] = {"available": False, "provider": "manual", "reason": "No API, manual check only"}
151
+
152
+ return result
153
+
154
+
155
+ def _save_config(config: Dict[str, Any]) -> None:
156
+ """Write config to disk."""
157
+ SOCIAL_TARGET_CONFIG.parent.mkdir(parents=True, exist_ok=True)
158
+ SOCIAL_TARGET_CONFIG.write_text(json.dumps(config, indent=2) + "\n")
159
+
160
+
161
+ def get_config_status() -> Dict[str, Any]:
162
+ """Return current config and platform availability for the MCP tool."""
163
+ config = _load_config()
164
+ detection = _detect_available_platforms()
165
+ return {
166
+ "config": config,
167
+ "platform_availability": detection,
168
+ "config_file": str(SOCIAL_TARGET_CONFIG),
169
+ "config_file_exists": SOCIAL_TARGET_CONFIG.exists(),
170
+ }
171
+
172
+
173
+ def update_platform_config(
174
+ platform: str,
175
+ enabled: Optional[bool] = None,
176
+ provider: Optional[str] = None,
177
+ ) -> Dict[str, Any]:
178
+ """Update a single platform's config and save."""
179
+ config = _load_config()
180
+ if platform not in config["platforms"]:
181
+ config["platforms"][platform] = {"enabled": True, "provider": ""}
182
+
183
+ if enabled is not None:
184
+ config["platforms"][platform]["enabled"] = enabled
185
+ if provider:
186
+ config["platforms"][platform]["provider"] = provider
187
+
188
+ _save_config(config)
189
+ return {"updated": True, "platform": platform, "config": config["platforms"][platform]}
190
+
191
+
192
+ def add_subreddits(venture: str, subreddits: List[str]) -> Dict[str, Any]:
193
+ """Add subreddits to scan for a venture."""
194
+ config = _load_config()
195
+ existing = config.get("subreddits", {}).get(venture, [])
196
+ new_subs = [s for s in subreddits if s not in existing]
197
+ if venture not in config.get("subreddits", {}):
198
+ config["subreddits"][venture] = []
199
+ config["subreddits"][venture].extend(new_subs)
200
+ _save_config(config)
201
+ return {"venture": venture, "added": new_subs, "total": config["subreddits"][venture]}
202
+
203
+ # -----------------------------------------------------------------------
204
+ # Per-venture routing config
205
+ # -----------------------------------------------------------------------
206
+
207
+ VENTURE_CONFIG = {
208
+ "delimit": {
209
+ "topics": [
210
+ "API governance", "breaking changes", "OpenAPI", "API linting",
211
+ "MCP server", "MCP tools", "CLAUDE.md", "claude code",
212
+ "AI coding", "vibe coding", "semver",
213
+ ],
214
+ "exclude_terms": ["delimit_ai"],
215
+ "owned_accounts": ["delimit_ai", "delimitdev"],
216
+ "priority": "P0",
217
+ },
218
+ "domainvested": {
219
+ "topics": [
220
+ "domain investing", "domain appraisal", "domain flipping",
221
+ "expired domains", "brandable domains", "domain valuation", "NamePros",
222
+ ],
223
+ "exclude_terms": ["domainvested"],
224
+ "owned_accounts": ["domainvested"],
225
+ "priority": "P0",
226
+ },
227
+ "wirereport": {
228
+ "topics": [
229
+ "sports API", "live sports data", "sports scores API",
230
+ "sports news automation",
231
+ ],
232
+ "exclude_terms": ["wire_report", "wirereport"],
233
+ "owned_accounts": ["wirereporthq"],
234
+ "priority": "P2",
235
+ },
236
+ "livetube": {
237
+ "topics": [
238
+ "live streaming aggregator", "multi-stream",
239
+ "twitch alternatives", "live stream discovery",
240
+ ],
241
+ "exclude_terms": ["livetube"],
242
+ "owned_accounts": ["livetube_ai"],
243
+ "priority": "P2",
244
+ },
245
+ "stakeone": {
246
+ "topics": [
247
+ "Harmony ONE", "harmony validator", "ONE staking",
248
+ "harmony blockchain",
249
+ ],
250
+ "exclude_terms": ["validatorone", "stake_one"],
251
+ "owned_accounts": ["validatorone"],
252
+ "priority": "P1",
253
+ },
254
+ }
255
+
256
+
257
+ # -----------------------------------------------------------------------
258
+ # GitHub-specific config
259
+ # -----------------------------------------------------------------------
260
+
261
+ VENTURE_GITHUB_QUERIES = {
262
+ "delimit": [
263
+ "openapi breaking changes",
264
+ "API governance CI",
265
+ "MCP server claude code",
266
+ "API linting github action",
267
+ ],
268
+ "domainvested": [
269
+ "domain appraisal tool",
270
+ "domain valuation API",
271
+ ],
272
+ "stakeone": [
273
+ "harmony one validator",
274
+ "harmony staking",
275
+ ],
276
+ }
277
+
278
+ OWN_REPOS = [
279
+ "delimit-ai/delimit-mcp-server",
280
+ "delimit-ai/delimit-action",
281
+ "delimit-ai/delimit-quickstart",
282
+ ]
283
+
284
+ INTERNAL_USERS = {"infracore", "crypttrx"}
285
+
286
+
287
+ # -----------------------------------------------------------------------
288
+ # JSONL persistence helpers
289
+ # -----------------------------------------------------------------------
290
+
291
+ def _load_known_fingerprints() -> set:
292
+ """Load all fingerprints from the targets file for dedup."""
293
+ fps: set = set()
294
+ if not TARGETS_FILE.exists():
295
+ return fps
296
+ try:
297
+ for line in TARGETS_FILE.read_text().splitlines():
298
+ if not line.strip():
299
+ continue
300
+ try:
301
+ entry = json.loads(line)
302
+ fp = entry.get("fingerprint", "")
303
+ if fp:
304
+ fps.add(fp)
305
+ except (json.JSONDecodeError, ValueError):
306
+ continue
307
+ except Exception:
308
+ pass
309
+ return fps
310
+
311
+
312
+ def _append_target(target: Dict[str, Any]) -> None:
313
+ """Append a single target to the JSONL file."""
314
+ TARGETS_FILE.parent.mkdir(parents=True, exist_ok=True)
315
+ with open(TARGETS_FILE, "a") as f:
316
+ f.write(json.dumps(target) + "\n")
317
+
318
+
319
+ # -----------------------------------------------------------------------
320
+ # Venture routing
321
+ # -----------------------------------------------------------------------
322
+
323
+ def _route_venture(text: str) -> tuple:
324
+ """Match text against venture topics. Returns (venture, confidence, rationale)."""
325
+ text_lower = text.lower()
326
+ best_venture = None
327
+ best_score = 0
328
+ best_matches: List[str] = []
329
+
330
+ for venture, config in VENTURE_CONFIG.items():
331
+ # Check exclude terms first
332
+ if any(ex.lower() in text_lower for ex in config.get("exclude_terms", [])):
333
+ continue
334
+ matches = [t for t in config["topics"] if t.lower() in text_lower]
335
+ score = len(matches)
336
+ if score > best_score:
337
+ best_score = score
338
+ best_venture = venture
339
+ best_matches = matches
340
+
341
+ if not best_venture:
342
+ return None, 0.0, "No venture topic match"
343
+
344
+ confidence = min(0.95, 0.5 + (best_score * 0.15))
345
+ rationale = f"Matched topics: {', '.join(best_matches[:3])}"
346
+ return best_venture, confidence, rationale
347
+
348
+
349
+ def _classify_target(text: str, author_followers: int = 0) -> str:
350
+ """Classify a target as reply, strategic, or both."""
351
+ is_question = any(q in text.lower() for q in ["?", "how do", "anyone", "looking for", "recommendations"])
352
+ high_reach = author_followers > 5000
353
+
354
+ if is_question and high_reach:
355
+ return "both"
356
+ if high_reach:
357
+ return "strategic"
358
+ if is_question:
359
+ return "reply"
360
+ return "reply"
361
+
362
+
363
+ # -----------------------------------------------------------------------
364
+ # xAI API key resolution
365
+ # -----------------------------------------------------------------------
366
+
367
+ def _get_xai_api_key() -> str:
368
+ """Resolve xAI API key from env or .mcp.json."""
369
+ key = os.environ.get("XAI_API_KEY", "")
370
+ if key:
371
+ return key
372
+ # Try .mcp.json
373
+ mcp_path = Path.home() / ".mcp.json"
374
+ if not mcp_path.exists():
375
+ mcp_path = Path("/root/.mcp.json")
376
+ if mcp_path.exists():
377
+ try:
378
+ cfg = json.loads(mcp_path.read_text())
379
+ key = (cfg.get("mcpServers", {})
380
+ .get("xai", {})
381
+ .get("env", {})
382
+ .get("XAI_API_KEY", ""))
383
+ if key:
384
+ return key
385
+ # Also check delimit server env
386
+ key = (cfg.get("mcpServers", {})
387
+ .get("delimit", {})
388
+ .get("env", {})
389
+ .get("XAI_API_KEY", ""))
390
+ except Exception:
391
+ pass
392
+ return key
393
+
394
+
395
+ # -----------------------------------------------------------------------
396
+ # Platform scanners
397
+ # -----------------------------------------------------------------------
398
+
399
+ def _scan_x_twttr(queries: List[str], limit: int, known_fps: set) -> List[Dict]:
400
+ """Scan X/Twitter via RapidAPI Twttr241 (free, structured data)."""
401
+ api_key = _get_rapidapi_key()
402
+ if not api_key:
403
+ return []
404
+
405
+ targets: List[Dict] = []
406
+ combined_query = " OR ".join(queries[:5])
407
+ encoded_q = urllib.parse.quote(combined_query)
408
+ url = f"https://twitter241.p.rapidapi.com/search-v2?query={encoded_q}&type=Latest&count={limit}"
409
+
410
+ req = urllib.request.Request(
411
+ url,
412
+ headers={
413
+ "X-RapidAPI-Key": api_key,
414
+ "X-RapidAPI-Host": "twitter241.p.rapidapi.com",
415
+ "User-Agent": "Delimit/3.11.0",
416
+ },
417
+ )
418
+
419
+ try:
420
+ with urllib.request.urlopen(req, timeout=20) as resp:
421
+ result = json.loads(resp.read())
422
+
423
+ # Navigate: result.timeline.instructions[].entries[].content.itemContent.tweet_results.result
424
+ instructions = (
425
+ result.get("result", {})
426
+ .get("timeline", {})
427
+ .get("instructions", [])
428
+ )
429
+
430
+ for instruction in instructions:
431
+ for entry in instruction.get("entries", []):
432
+ tweet_result = (
433
+ entry.get("content", {})
434
+ .get("itemContent", {})
435
+ .get("tweet_results", {})
436
+ .get("result", {})
437
+ )
438
+ if not tweet_result:
439
+ continue
440
+
441
+ legacy = tweet_result.get("legacy", {})
442
+ core = tweet_result.get("core", {})
443
+ user_legacy = (
444
+ core.get("user_results", {})
445
+ .get("result", {})
446
+ .get("legacy", {})
447
+ )
448
+
449
+ tweet_id = legacy.get("id_str", "")
450
+ screen_name = user_legacy.get("screen_name", "")
451
+ followers = user_legacy.get("followers_count", 0) or 0
452
+ full_text = legacy.get("full_text", "")
453
+ likes = legacy.get("favorite_count", 0) or 0
454
+ retweets = legacy.get("retweet_count", 0) or 0
455
+
456
+ if not tweet_id or not full_text:
457
+ continue
458
+
459
+ fp = f"x:{tweet_id}"
460
+ if fp in known_fps:
461
+ continue
462
+
463
+ venture, confidence, rationale = _route_venture(full_text)
464
+ if not venture:
465
+ continue
466
+
467
+ author = f"@{screen_name}" if screen_name else ""
468
+ target = {
469
+ "fingerprint": fp,
470
+ "platform": "x",
471
+ "source_id": tweet_id,
472
+ "canonical_url": f"https://x.com/{screen_name}/status/{tweet_id}" if screen_name else f"https://x.com/i/status/{tweet_id}",
473
+ "author": author,
474
+ "author_followers": followers,
475
+ "content_snippet": full_text[:300],
476
+ "venture": venture,
477
+ "classification": _classify_target(full_text, followers),
478
+ "confidence": confidence,
479
+ "rationale": rationale,
480
+ "manual_check_needed": False,
481
+ "first_seen": datetime.now(timezone.utc).isoformat(),
482
+ "status": "new",
483
+ }
484
+ targets.append(target)
485
+ known_fps.add(fp)
486
+
487
+ if len(targets) >= limit:
488
+ break
489
+ if len(targets) >= limit:
490
+ break
491
+
492
+ except Exception as e:
493
+ logger.warning("Twttr241 scan failed: %s", e)
494
+ return []
495
+
496
+ return targets
497
+
498
+
499
+ def _scan_x(queries: List[str], limit: int, known_fps: set, config: Optional[Dict] = None) -> List[Dict]:
500
+ """Scan X/Twitter -- uses config to determine provider, falls back to xAI x_search."""
501
+ platform_config = (config or {}).get("platforms", {}).get("x", {})
502
+ provider = platform_config.get("provider", "twttr241")
503
+
504
+ # Try Twttr241 first if configured (or default)
505
+ if provider in ("twttr241", ""):
506
+ targets = _scan_x_twttr(queries, limit, known_fps)
507
+ if targets:
508
+ return targets
509
+
510
+ # Fallback or explicit xAI provider: xAI Responses API with x_search
511
+ api_key = _get_xai_api_key()
512
+ if not api_key:
513
+ return [{"error": "No X scanner available (Twttr241 failed, XAI_API_KEY not configured)", "platform": "x"}]
514
+
515
+ targets: List[Dict] = []
516
+ # Batch queries to avoid too many API calls
517
+ combined_query = " OR ".join(f'"{q}"' for q in queries[:5])
518
+ prompt = (
519
+ f"Search X/Twitter for recent posts about: {combined_query}. "
520
+ f"Find up to {limit} posts from the last 24 hours that are asking questions, "
521
+ f"sharing problems, or discussing these topics. "
522
+ f"For each post, return the tweet ID, author handle, author follower count, "
523
+ f"and a snippet of the content. Format as JSON array."
524
+ )
525
+
526
+ data = json.dumps({
527
+ "model": "grok-4-0709",
528
+ "tools": [{"type": "x_search"}],
529
+ "messages": [{"role": "user", "content": prompt}],
530
+ "temperature": 0.3,
531
+ "max_tokens": 4096,
532
+ }).encode()
533
+
534
+ req = urllib.request.Request(
535
+ "https://api.x.ai/v1/responses",
536
+ data=data,
537
+ headers={
538
+ "Authorization": f"Bearer {api_key}",
539
+ "Content-Type": "application/json",
540
+ "User-Agent": "Delimit/3.11.0",
541
+ },
542
+ method="POST",
543
+ )
544
+
545
+ try:
546
+ with urllib.request.urlopen(req, timeout=30) as resp:
547
+ result = json.loads(resp.read())
548
+
549
+ # Parse the response -- xAI Responses API returns output array
550
+ response_text = ""
551
+ if isinstance(result, dict):
552
+ # Responses API format: result has "output" array
553
+ for item in result.get("output", []):
554
+ if item.get("type") == "message":
555
+ for content in item.get("content", []):
556
+ if content.get("type") == "output_text":
557
+ response_text = content.get("text", "")
558
+ break
559
+ # Fallback: chat completions format
560
+ if not response_text:
561
+ for choice in result.get("choices", []):
562
+ msg = choice.get("message", {})
563
+ response_text = msg.get("content", "")
564
+ if response_text:
565
+ break
566
+
567
+ if not response_text:
568
+ logger.warning("xAI returned empty response for social targeting")
569
+ return targets
570
+
571
+ # Try to extract JSON from response
572
+ parsed_tweets = _extract_json_array(response_text)
573
+ for tweet in parsed_tweets[:limit]:
574
+ tweet_id = str(tweet.get("id", tweet.get("tweet_id", "")))
575
+ author = tweet.get("author", tweet.get("handle", tweet.get("username", "")))
576
+ if author and not author.startswith("@"):
577
+ author = f"@{author}"
578
+ snippet = tweet.get("content", tweet.get("text", tweet.get("snippet", "")))
579
+ followers = int(tweet.get("followers", tweet.get("author_followers", tweet.get("follower_count", 0))))
580
+
581
+ fp = f"x:{tweet_id}"
582
+ if fp in known_fps or not tweet_id:
583
+ continue
584
+
585
+ venture, confidence, rationale = _route_venture(snippet)
586
+ if not venture:
587
+ continue
588
+
589
+ target = {
590
+ "fingerprint": fp,
591
+ "platform": "x",
592
+ "source_id": tweet_id,
593
+ "canonical_url": f"https://x.com/{author.lstrip('@')}/status/{tweet_id}" if author else f"https://x.com/i/status/{tweet_id}",
594
+ "author": author,
595
+ "author_followers": followers,
596
+ "content_snippet": snippet[:300],
597
+ "venture": venture,
598
+ "classification": _classify_target(snippet, followers),
599
+ "confidence": confidence,
600
+ "rationale": rationale,
601
+ "manual_check_needed": False,
602
+ "first_seen": datetime.now(timezone.utc).isoformat(),
603
+ "status": "new",
604
+ }
605
+ targets.append(target)
606
+ known_fps.add(fp)
607
+
608
+ except urllib.error.HTTPError as e:
609
+ logger.error("xAI API error: %s %s", e.code, e.reason)
610
+ # Try to read error body for details
611
+ try:
612
+ err_body = e.read().decode()[:200]
613
+ logger.error("xAI error body: %s", err_body)
614
+ except Exception:
615
+ pass
616
+ targets.append({"error": f"xAI API error: {e.code} {e.reason}", "platform": "x"})
617
+ except urllib.error.URLError as e:
618
+ logger.error("xAI connection error: %s", e.reason)
619
+ targets.append({"error": f"xAI connection error: {e.reason}", "platform": "x"})
620
+ except Exception as e:
621
+ logger.error("xAI scan failed: %s", e)
622
+ targets.append({"error": f"xAI scan error: {e}", "platform": "x"})
623
+
624
+ return targets
625
+
626
+
627
+ def _scan_hn(queries: List[str], limit: int, known_fps: set) -> List[Dict]:
628
+ """Scan Hacker News via Algolia API."""
629
+ targets: List[Dict] = []
630
+
631
+ for query in queries[:3]: # Limit query count
632
+ encoded_q = urllib.parse.quote(query)
633
+ url = f"https://hn.algolia.com/api/v1/search_by_date?tags=story&query={encoded_q}&hitsPerPage={limit}"
634
+ req = urllib.request.Request(
635
+ url,
636
+ headers={"User-Agent": "Delimit/3.11.0"},
637
+ )
638
+ try:
639
+ with urllib.request.urlopen(req, timeout=15) as resp:
640
+ data = json.loads(resp.read())
641
+
642
+ for hit in data.get("hits", [])[:limit]:
643
+ story_id = str(hit.get("objectID", ""))
644
+ fp = f"hn:{story_id}"
645
+ if fp in known_fps or not story_id:
646
+ continue
647
+
648
+ title = hit.get("title", "")
649
+ author = hit.get("author", "")
650
+ points = hit.get("points", 0) or 0
651
+ snippet = title
652
+
653
+ venture, confidence, rationale = _route_venture(title)
654
+ if not venture:
655
+ continue
656
+
657
+ target = {
658
+ "fingerprint": fp,
659
+ "platform": "hn",
660
+ "source_id": story_id,
661
+ "canonical_url": f"https://news.ycombinator.com/item?id={story_id}",
662
+ "author": author,
663
+ "author_followers": points, # Use points as proxy for reach
664
+ "content_snippet": snippet[:300],
665
+ "venture": venture,
666
+ "classification": _classify_target(snippet, points),
667
+ "confidence": confidence,
668
+ "rationale": rationale,
669
+ "manual_check_needed": False,
670
+ "first_seen": datetime.now(timezone.utc).isoformat(),
671
+ "status": "new",
672
+ }
673
+ targets.append(target)
674
+ known_fps.add(fp)
675
+
676
+ if len(targets) >= limit:
677
+ break
678
+
679
+ except Exception as e:
680
+ logger.error("HN scan error for query '%s': %s", query, e)
681
+ continue
682
+
683
+ if len(targets) >= limit:
684
+ break
685
+
686
+ return targets
687
+
688
+
689
+ def _scan_devto(queries: List[str], limit: int, known_fps: set) -> List[Dict]:
690
+ """Scan Dev.to for recent articles matching venture topics."""
691
+ targets: List[Dict] = []
692
+
693
+ for query in queries[:3]:
694
+ # Dev.to API uses tag-based search
695
+ tag = query.lower().replace(" ", "").replace("-", "")[:20]
696
+ url = f"https://dev.to/api/articles?tag={urllib.parse.quote(tag)}&top=1&per_page={limit}"
697
+ req = urllib.request.Request(
698
+ url,
699
+ headers={"User-Agent": "Delimit/3.11.0"},
700
+ )
701
+ try:
702
+ with urllib.request.urlopen(req, timeout=15) as resp:
703
+ articles = json.loads(resp.read())
704
+
705
+ if not isinstance(articles, list):
706
+ continue
707
+
708
+ for article in articles[:limit]:
709
+ article_id = str(article.get("id", ""))
710
+ fp = f"devto:{article_id}"
711
+ if fp in known_fps or not article_id:
712
+ continue
713
+
714
+ title = article.get("title", "")
715
+ description = article.get("description", "")
716
+ author = article.get("user", {}).get("username", "")
717
+ reactions = article.get("positive_reactions_count", 0) or 0
718
+ snippet = f"{title} - {description}"
719
+
720
+ venture, confidence, rationale = _route_venture(snippet)
721
+ if not venture:
722
+ continue
723
+
724
+ target = {
725
+ "fingerprint": fp,
726
+ "platform": "devto",
727
+ "source_id": article_id,
728
+ "canonical_url": article.get("url", f"https://dev.to/{author}/{article.get('slug', article_id)}"),
729
+ "author": author,
730
+ "author_followers": reactions,
731
+ "content_snippet": snippet[:300],
732
+ "venture": venture,
733
+ "classification": _classify_target(snippet, reactions),
734
+ "confidence": confidence,
735
+ "rationale": rationale,
736
+ "manual_check_needed": False,
737
+ "first_seen": datetime.now(timezone.utc).isoformat(),
738
+ "status": "new",
739
+ }
740
+ targets.append(target)
741
+ known_fps.add(fp)
742
+
743
+ if len(targets) >= limit:
744
+ break
745
+
746
+ except Exception as e:
747
+ logger.error("Dev.to scan error for tag '%s': %s", tag, e)
748
+ continue
749
+
750
+ if len(targets) >= limit:
751
+ break
752
+
753
+ return targets
754
+
755
+
756
+ def _gh_api(endpoint: str) -> Any:
757
+ """Call GitHub API via the gh CLI. Returns parsed JSON or None on failure."""
758
+ try:
759
+ proc = subprocess.run(
760
+ ["gh", "api", endpoint],
761
+ capture_output=True,
762
+ text=True,
763
+ timeout=30,
764
+ )
765
+ if proc.returncode != 0:
766
+ logger.warning("gh api %s failed: %s", endpoint, proc.stderr[:200])
767
+ return None
768
+ return json.loads(proc.stdout)
769
+ except subprocess.TimeoutExpired:
770
+ logger.error("gh api %s timed out", endpoint)
771
+ return None
772
+ except (json.JSONDecodeError, FileNotFoundError) as e:
773
+ logger.error("gh api %s error: %s", endpoint, e)
774
+ return None
775
+
776
+
777
+ def _scan_github(queries: List[str], limit: int, known_fps: set, config: Optional[Dict] = None) -> List[Dict]:
778
+ """Scan GitHub for repos and issues matching venture topics via gh CLI."""
779
+ targets: List[Dict] = []
780
+
781
+ # Phase 1: Repository search
782
+ for query in queries[:5]:
783
+ if len(targets) >= limit:
784
+ break
785
+ encoded_q = urllib.parse.quote(query)
786
+ endpoint = f"search/repositories?q={encoded_q}&sort=updated&per_page={min(limit, 10)}"
787
+ data = _gh_api(endpoint)
788
+ if not data or not isinstance(data, dict):
789
+ continue
790
+
791
+ for repo in data.get("items", []):
792
+ full_name = repo.get("full_name", "")
793
+ fp = f"github:repo:{full_name}"
794
+ if fp in known_fps or not full_name:
795
+ continue
796
+
797
+ stars = repo.get("stargazers_count", 0) or 0
798
+ description = repo.get("description", "") or ""
799
+
800
+ # Skip noise: 0 stars and no description
801
+ if stars == 0 and not description:
802
+ continue
803
+
804
+ snippet = f"{full_name}: {description}"
805
+ venture, confidence, rationale = _route_venture(snippet)
806
+ if not venture:
807
+ continue
808
+
809
+ target = {
810
+ "fingerprint": fp,
811
+ "platform": "github",
812
+ "source_id": full_name,
813
+ "canonical_url": repo.get("html_url", f"https://github.com/{full_name}"),
814
+ "author": repo.get("owner", {}).get("login", ""),
815
+ "author_followers": stars,
816
+ "content_snippet": snippet[:300],
817
+ "venture": venture,
818
+ "classification": _classify_target(snippet, stars),
819
+ "confidence": confidence,
820
+ "rationale": f"repo search: {rationale}",
821
+ "manual_check_needed": False,
822
+ "first_seen": datetime.now(timezone.utc).isoformat(),
823
+ "status": "new",
824
+ }
825
+ targets.append(target)
826
+ known_fps.add(fp)
827
+
828
+ if len(targets) >= limit:
829
+ break
830
+
831
+ # Phase 2: Issue/discussion search
832
+ for query in queries[:3]:
833
+ if len(targets) >= limit:
834
+ break
835
+ encoded_q = urllib.parse.quote(query)
836
+ endpoint = f"search/issues?q={encoded_q}&sort=created&per_page={min(limit, 10)}"
837
+ data = _gh_api(endpoint)
838
+ if not data or not isinstance(data, dict):
839
+ continue
840
+
841
+ for issue in data.get("items", []):
842
+ number = issue.get("number", "")
843
+ html_url = issue.get("html_url", "")
844
+ # Extract repo from URL: https://github.com/owner/repo/issues/123
845
+ repo_name = "/".join(html_url.split("/")[3:5]) if html_url else ""
846
+ fp = f"github:issue:{repo_name}:{number}"
847
+ if fp in known_fps or not number:
848
+ continue
849
+
850
+ title = issue.get("title", "")
851
+ body = (issue.get("body") or "")[:200]
852
+ author = issue.get("user", {}).get("login", "")
853
+ reactions = issue.get("reactions", {}).get("total_count", 0) or 0
854
+ snippet = f"{title} {body}".strip()
855
+
856
+ venture, confidence, rationale = _route_venture(snippet)
857
+ if not venture:
858
+ continue
859
+
860
+ target = {
861
+ "fingerprint": fp,
862
+ "platform": "github",
863
+ "source_id": f"{repo_name}#{number}",
864
+ "canonical_url": html_url,
865
+ "author": author,
866
+ "author_followers": reactions,
867
+ "content_snippet": snippet[:300],
868
+ "venture": venture,
869
+ "classification": _classify_target(snippet, reactions),
870
+ "confidence": confidence,
871
+ "rationale": f"issue search: {rationale}",
872
+ "manual_check_needed": False,
873
+ "first_seen": datetime.now(timezone.utc).isoformat(),
874
+ "status": "new",
875
+ }
876
+ targets.append(target)
877
+ known_fps.add(fp)
878
+
879
+ if len(targets) >= limit:
880
+ break
881
+
882
+ return targets
883
+
884
+
885
+ def _monitor_own_repos(known_fps: set) -> List[Dict]:
886
+ """Monitor our own repos for external engagement (forks, stars, issues, PRs)."""
887
+ targets: List[Dict] = []
888
+
889
+ for repo in OWN_REPOS:
890
+ # Check forks
891
+ forks_data = _gh_api(f"repos/{repo}/forks?sort=newest&per_page=10")
892
+ if isinstance(forks_data, list):
893
+ for fork in forks_data:
894
+ user = fork.get("owner", {}).get("login", "")
895
+ if user in INTERNAL_USERS or not user:
896
+ continue
897
+ fp = f"github:fork:{user}:{repo.split('/')[-1]}"
898
+ if fp in known_fps:
899
+ continue
900
+
901
+ targets.append({
902
+ "fingerprint": fp,
903
+ "platform": "github",
904
+ "source_id": fork.get("full_name", ""),
905
+ "canonical_url": fork.get("html_url", ""),
906
+ "author": user,
907
+ "author_followers": fork.get("stargazers_count", 0) or 0,
908
+ "content_snippet": f"{user} forked {repo}",
909
+ "venture": "delimit",
910
+ "classification": "strategic",
911
+ "confidence": 0.7,
912
+ "rationale": f"External fork of {repo}",
913
+ "manual_check_needed": False,
914
+ "first_seen": datetime.now(timezone.utc).isoformat(),
915
+ "status": "new",
916
+ })
917
+ known_fps.add(fp)
918
+
919
+ # Check stargazers (with timestamps)
920
+ stars_data = _gh_api(
921
+ f"repos/{repo}/stargazers?per_page=10"
922
+ "&-H='Accept: application/vnd.github.star+json'"
923
+ )
924
+ # gh api may return list of user objects or star+json objects
925
+ if isinstance(stars_data, list):
926
+ for star in stars_data:
927
+ # star+json format has "user" key; plain format is the user directly
928
+ user_obj = star.get("user", star) if isinstance(star, dict) else {}
929
+ user = user_obj.get("login", "")
930
+ if user in INTERNAL_USERS or not user:
931
+ continue
932
+ fp = f"github:star:{user}:{repo.split('/')[-1]}"
933
+ if fp in known_fps:
934
+ continue
935
+
936
+ targets.append({
937
+ "fingerprint": fp,
938
+ "platform": "github",
939
+ "source_id": f"{user}/star/{repo}",
940
+ "canonical_url": f"https://github.com/{user}",
941
+ "author": user,
942
+ "author_followers": 0,
943
+ "content_snippet": f"{user} starred {repo}",
944
+ "venture": "delimit",
945
+ "classification": "strategic",
946
+ "confidence": 0.6,
947
+ "rationale": f"External star on {repo}",
948
+ "manual_check_needed": False,
949
+ "first_seen": datetime.now(timezone.utc).isoformat(),
950
+ "status": "new",
951
+ })
952
+ known_fps.add(fp)
953
+
954
+ # Check issues and PRs from external users
955
+ issues_data = _gh_api(f"repos/{repo}/issues?state=all&sort=created&direction=desc&per_page=10")
956
+ if isinstance(issues_data, list):
957
+ for issue in issues_data:
958
+ user = issue.get("user", {}).get("login", "")
959
+ if user in INTERNAL_USERS or not user:
960
+ continue
961
+ number = issue.get("number", "")
962
+ fp = f"github:issue:{repo}:{number}"
963
+ if fp in known_fps or not number:
964
+ continue
965
+
966
+ title = issue.get("title", "")
967
+ is_pr = "pull_request" in issue
968
+ kind = "PR" if is_pr else "issue"
969
+
970
+ targets.append({
971
+ "fingerprint": fp,
972
+ "platform": "github",
973
+ "source_id": f"{repo}#{number}",
974
+ "canonical_url": issue.get("html_url", ""),
975
+ "author": user,
976
+ "author_followers": issue.get("reactions", {}).get("total_count", 0) or 0,
977
+ "content_snippet": f"{user} opened {kind}: {title}"[:300],
978
+ "venture": "delimit",
979
+ "classification": "reply",
980
+ "confidence": 0.8,
981
+ "rationale": f"External {kind} on {repo}",
982
+ "manual_check_needed": False,
983
+ "first_seen": datetime.now(timezone.utc).isoformat(),
984
+ "status": "new",
985
+ })
986
+ known_fps.add(fp)
987
+
988
+ return targets
989
+
990
+
991
+ def _get_rapidapi_key() -> str:
992
+ """Load RapidAPI key from secrets broker or env."""
993
+ import base64
994
+ # Primary: delimit secrets broker
995
+ secrets_file = Path.home() / ".delimit" / "secrets" / "rapidapi-reddit.json"
996
+ if secrets_file.exists():
997
+ try:
998
+ data = json.loads(secrets_file.read_text())
999
+ encrypted = data.get("encrypted_value", "")
1000
+ if encrypted:
1001
+ return base64.b64decode(encrypted).decode()
1002
+ return data.get("value", "")
1003
+ except Exception:
1004
+ pass
1005
+ # Fallback: wire report env
1006
+ wr_env = Path("/home/jamsons/ventures/wire-report/.wr_env")
1007
+ if wr_env.exists():
1008
+ try:
1009
+ for line in wr_env.read_text().splitlines():
1010
+ if line.startswith("RAPIDAPI_KEY="):
1011
+ return line.split("=", 1)[1].strip()
1012
+ except Exception:
1013
+ pass
1014
+ return os.environ.get("RAPIDAPI_KEY", "")
1015
+
1016
+
1017
+ # Subreddits to scan per venture
1018
+ # Keep total under 30 subs to stay well under rate limits (~1 req/sub/scan)
1019
+ VENTURE_SUBREDDITS = {
1020
+ "delimit": [
1021
+ "ClaudeAI", "vibecoding", "devops", "programming",
1022
+ "AI_Agents", "ContextEngineering", "cursor",
1023
+ "LocalLLaMA", "SaaS", "opensource",
1024
+ # "ChatGPTCoding", # requires high karma to post
1025
+ ],
1026
+ "domainvested": [
1027
+ "Domains", "flipping", "Entrepreneur", "SideProject",
1028
+ ],
1029
+ "wirereport": [
1030
+ "sportsbook", "sportsbetting",
1031
+ ],
1032
+ "livetube": [
1033
+ "Twitch", "livestreaming",
1034
+ ],
1035
+ "stakeone": [
1036
+ "harmony_one", "CryptoCurrency", "defi",
1037
+ ],
1038
+ }
1039
+
1040
+
1041
+ # Internal-only Reddit proxy via SSH tunnel to residential IP.
1042
+ # This is NOT shipped to external users — it only runs on the founder's gateway server.
1043
+ # External users would configure their own Reddit API credentials.
1044
+ REDDIT_PROXY = os.environ.get("DELIMIT_REDDIT_PROXY", "http://127.0.0.1:4819/reddit-fetch")
1045
+
1046
+
1047
+ def _scan_reddit(queries: List[str], limit: int, known_fps: set, config: Optional[Dict] = None) -> List[Dict]:
1048
+ """Scan Reddit via residential proxy (SSH tunnel) or RapidAPI fallback.
1049
+
1050
+ Provider selection via config:
1051
+ - "proxy": try residential proxy first, fall back to RapidAPI
1052
+ - "rapidapi": use RapidAPI Reddit34 directly
1053
+ - "json_api": always try direct JSON (may fail from datacenter IPs)
1054
+ """
1055
+ platform_config = (config or {}).get("platforms", {}).get("reddit", {})
1056
+ provider = platform_config.get("provider", "proxy")
1057
+
1058
+ # Merge subreddits from config with defaults
1059
+ config_subreddits = (config or {}).get("subreddits", {})
1060
+ if config_subreddits:
1061
+ # Temporarily override VENTURE_SUBREDDITS for this scan
1062
+ merged = dict(VENTURE_SUBREDDITS)
1063
+ for venture, subs in config_subreddits.items():
1064
+ if venture in merged:
1065
+ merged[venture] = list(set(merged[venture] + subs))
1066
+ else:
1067
+ merged[venture] = subs
1068
+ # We pass the merged subs to the proxy/rapidapi scanners via the module-level dict
1069
+ # This is safe since scans are single-threaded
1070
+ _original_subs = dict(VENTURE_SUBREDDITS)
1071
+ VENTURE_SUBREDDITS.update(merged)
1072
+
1073
+ try:
1074
+ if provider == "rapidapi":
1075
+ api_key = _get_rapidapi_key()
1076
+ if not api_key:
1077
+ return _manual_check_targets("reddit", queries, limit)
1078
+ return _scan_reddit_rapidapi(queries, limit, known_fps, api_key)
1079
+
1080
+ # Default: try proxy first, fall back to RapidAPI
1081
+ proxy_available = _test_reddit_proxy()
1082
+ if not proxy_available:
1083
+ api_key = _get_rapidapi_key()
1084
+ if not api_key:
1085
+ logger.warning("No Reddit access -- proxy down, no RapidAPI key")
1086
+ return _manual_check_targets("reddit", queries, limit)
1087
+ return _scan_reddit_rapidapi(queries, limit, known_fps, api_key)
1088
+
1089
+ return _scan_reddit_proxy(queries, limit, known_fps)
1090
+ finally:
1091
+ # Restore original subreddits if we merged
1092
+ if config_subreddits:
1093
+ VENTURE_SUBREDDITS.clear()
1094
+ VENTURE_SUBREDDITS.update(_original_subs)
1095
+
1096
+
1097
+ def _test_reddit_proxy() -> bool:
1098
+ """Check if residential Reddit proxy is available."""
1099
+ try:
1100
+ req = urllib.request.Request(f"{REDDIT_PROXY.rsplit('/reddit-fetch', 1)[0]}/health", headers={"User-Agent": "Delimit"})
1101
+ with urllib.request.urlopen(req, timeout=3) as resp:
1102
+ data = json.loads(resp.read())
1103
+ return data.get("reddit_proxy", False)
1104
+ except Exception:
1105
+ return False
1106
+
1107
+
1108
+ def _scan_reddit_proxy(queries: List[str], limit: int, known_fps: set) -> List[Dict]:
1109
+ """Scan Reddit via residential IP proxy (free, unlimited)."""
1110
+ targets: List[Dict] = []
1111
+
1112
+ scanned_subs: set = set()
1113
+ for venture, subs in VENTURE_SUBREDDITS.items():
1114
+ for sub in subs:
1115
+ if sub in scanned_subs or len(targets) >= limit:
1116
+ break
1117
+ scanned_subs.add(sub)
1118
+
1119
+ # Scan both /new and /hot to catch high-engagement older posts
1120
+ for sort in ("new", "hot"):
1121
+ if len(targets) >= limit:
1122
+ break
1123
+ reddit_url = f"https://www.reddit.com/r/{sub}/{sort}.json?limit={min(limit, 10)}"
1124
+ proxy_url = f"{REDDIT_PROXY}?url={urllib.parse.quote(reddit_url, safe='')}"
1125
+ req = urllib.request.Request(proxy_url, headers={"User-Agent": "Delimit/3.11.0"})
1126
+ try:
1127
+ with urllib.request.urlopen(req, timeout=15) as resp:
1128
+ result = json.loads(resp.read())
1129
+
1130
+ posts = result.get("data", {}).get("children", [])
1131
+ for post_wrapper in posts:
1132
+ post = post_wrapper.get("data", {})
1133
+ post_id = post.get("id", "")
1134
+ fp = f"reddit:{post_id}"
1135
+ if fp in known_fps or not post_id:
1136
+ continue
1137
+
1138
+ title = post.get("title", "")
1139
+ selftext = post.get("selftext", "")[:200]
1140
+ author = post.get("author", "")
1141
+ score = post.get("score", 0) or 0
1142
+ num_comments = post.get("num_comments", 0) or 0
1143
+ permalink = post.get("permalink", "")
1144
+ snippet = f"{title} {selftext}".strip()
1145
+
1146
+ venture_match, confidence, rationale = _route_venture(snippet)
1147
+ if not venture_match:
1148
+ continue
1149
+ if score < 1 and num_comments < 2:
1150
+ continue
1151
+
1152
+ target = {
1153
+ "fingerprint": fp,
1154
+ "platform": "reddit",
1155
+ "source_id": post_id,
1156
+ "canonical_url": f"https://reddit.com{permalink}" if permalink else "",
1157
+ "author": f"u/{author}",
1158
+ "author_followers": score,
1159
+ "content_snippet": snippet[:300],
1160
+ "venture": venture_match,
1161
+ "classification": _classify_target(snippet, num_comments),
1162
+ "confidence": confidence,
1163
+ "rationale": f"r/{sub}/{sort}: {rationale}",
1164
+ "manual_check_needed": False,
1165
+ "first_seen": datetime.now(timezone.utc).isoformat(),
1166
+ "status": "new",
1167
+ }
1168
+ targets.append(target)
1169
+ known_fps.add(fp)
1170
+
1171
+ if len(targets) >= limit:
1172
+ break
1173
+
1174
+ except Exception as e:
1175
+ logger.error("Reddit proxy scan error for r/%s/%s: %s", sub, sort, e)
1176
+ continue
1177
+
1178
+ return targets
1179
+
1180
+
1181
+ def _scan_reddit_rapidapi(queries: List[str], limit: int, known_fps: set, api_key: str) -> List[Dict]:
1182
+ """Fallback: Scan Reddit via RapidAPI Reddit34."""
1183
+
1184
+ targets: List[Dict] = []
1185
+
1186
+ # Scan subreddits mapped to ventures
1187
+ scanned_subs: set = set()
1188
+ for venture, subs in VENTURE_SUBREDDITS.items():
1189
+ for sub in subs:
1190
+ if sub in scanned_subs or len(targets) >= limit:
1191
+ break
1192
+ scanned_subs.add(sub)
1193
+
1194
+ url = f"https://reddit34.p.rapidapi.com/getPostsBySubreddit?subreddit={urllib.parse.quote(sub)}&sort=new&limit={min(limit, 10)}"
1195
+ req = urllib.request.Request(
1196
+ url,
1197
+ headers={
1198
+ "X-RapidAPI-Key": api_key,
1199
+ "X-RapidAPI-Host": "reddit34.p.rapidapi.com",
1200
+ "User-Agent": "Delimit/3.11.0",
1201
+ },
1202
+ )
1203
+ try:
1204
+ with urllib.request.urlopen(req, timeout=20) as resp:
1205
+ result = json.loads(resp.read())
1206
+
1207
+ if not result.get("success"):
1208
+ logger.warning("Reddit34 returned success=false for r/%s", sub)
1209
+ continue
1210
+
1211
+ posts = result.get("data", {}).get("posts", [])
1212
+ for post_wrapper in posts:
1213
+ post = post_wrapper.get("data", post_wrapper)
1214
+ post_id = post.get("id", "")
1215
+ fp = f"reddit:{post_id}"
1216
+ if fp in known_fps or not post_id:
1217
+ continue
1218
+
1219
+ title = post.get("title", "")
1220
+ selftext = post.get("selftext", "")[:200]
1221
+ author = post.get("author", "")
1222
+ score = post.get("score", 0) or 0
1223
+ num_comments = post.get("num_comments", 0) or 0
1224
+ permalink = post.get("permalink", "")
1225
+ snippet = f"{title} {selftext}".strip()
1226
+
1227
+ venture_match, confidence, rationale = _route_venture(snippet)
1228
+ if not venture_match:
1229
+ continue
1230
+
1231
+ # Skip low-engagement posts
1232
+ if score < 1 and num_comments < 2:
1233
+ continue
1234
+
1235
+ target = {
1236
+ "fingerprint": fp,
1237
+ "platform": "reddit",
1238
+ "source_id": post_id,
1239
+ "canonical_url": f"https://reddit.com{permalink}" if permalink else "",
1240
+ "author": f"u/{author}",
1241
+ "author_followers": score,
1242
+ "content_snippet": snippet[:300],
1243
+ "venture": venture_match,
1244
+ "classification": _classify_target(snippet, num_comments),
1245
+ "confidence": confidence,
1246
+ "rationale": f"r/{sub}: {rationale}",
1247
+ "manual_check_needed": False,
1248
+ "first_seen": datetime.now(timezone.utc).isoformat(),
1249
+ "status": "new",
1250
+ }
1251
+ targets.append(target)
1252
+ known_fps.add(fp)
1253
+
1254
+ if len(targets) >= limit:
1255
+ break
1256
+
1257
+ except Exception as e:
1258
+ logger.error("Reddit scan error for r/%s: %s", sub, e)
1259
+ continue
1260
+
1261
+ # Phase 2: keyword search across all of Reddit via getSearchPosts
1262
+ if len(targets) < limit:
1263
+ search_queries = queries[:3] # Top 3 venture topic queries
1264
+ for query in search_queries:
1265
+ if len(targets) >= limit:
1266
+ break
1267
+ search_url = (
1268
+ f"https://reddit34.p.rapidapi.com/getSearchPosts"
1269
+ f"?query={urllib.parse.quote(query)}&sort=new&limit={min(limit, 5)}"
1270
+ )
1271
+ req = urllib.request.Request(
1272
+ search_url,
1273
+ headers={
1274
+ "X-RapidAPI-Key": api_key,
1275
+ "X-RapidAPI-Host": "reddit34.p.rapidapi.com",
1276
+ "User-Agent": "Delimit/3.11.0",
1277
+ },
1278
+ )
1279
+ try:
1280
+ with urllib.request.urlopen(req, timeout=20) as resp:
1281
+ result = json.loads(resp.read())
1282
+
1283
+ if not result.get("success"):
1284
+ continue
1285
+
1286
+ posts = result.get("data", {}).get("posts", [])
1287
+ for post_wrapper in posts:
1288
+ post = post_wrapper.get("data", post_wrapper)
1289
+ post_id = post.get("id", "")
1290
+ fp = f"reddit:{post_id}"
1291
+ if fp in known_fps or not post_id:
1292
+ continue
1293
+
1294
+ title = post.get("title", "")
1295
+ selftext = post.get("selftext", "")[:200]
1296
+ author = post.get("author", "")
1297
+ sub = post.get("subreddit", "")
1298
+ score = post.get("score", 0) or 0
1299
+ num_comments = post.get("num_comments", 0) or 0
1300
+ permalink = post.get("permalink", "")
1301
+ snippet = f"{title} {selftext}".strip()
1302
+
1303
+ venture_match, confidence, rationale = _route_venture(snippet)
1304
+ if not venture_match:
1305
+ continue
1306
+ if score < 1 and num_comments < 2:
1307
+ continue
1308
+
1309
+ target = {
1310
+ "fingerprint": fp,
1311
+ "platform": "reddit",
1312
+ "source_id": post_id,
1313
+ "canonical_url": f"https://reddit.com{permalink}" if permalink else "",
1314
+ "author": f"u/{author}",
1315
+ "author_followers": score,
1316
+ "content_snippet": snippet[:300],
1317
+ "venture": venture_match,
1318
+ "classification": _classify_target(snippet, num_comments),
1319
+ "confidence": confidence,
1320
+ "rationale": f"search:{query}: {rationale}",
1321
+ "manual_check_needed": False,
1322
+ "first_seen": datetime.now(timezone.utc).isoformat(),
1323
+ "status": "new",
1324
+ }
1325
+ targets.append(target)
1326
+ known_fps.add(fp)
1327
+
1328
+ if len(targets) >= limit:
1329
+ break
1330
+ except Exception as e:
1331
+ logger.error("Reddit search error for '%s': %s", query, e)
1332
+ continue
1333
+
1334
+ return targets
1335
+
1336
+
1337
+ def _manual_check_targets(platform: str, queries: List[str], limit: int) -> List[Dict]:
1338
+ """Return manual_check_needed placeholders for platforms we cannot scrape."""
1339
+ targets = []
1340
+ for query in queries[:3]:
1341
+ venture, confidence, rationale = _route_venture(query)
1342
+ targets.append({
1343
+ "fingerprint": f"{platform}:manual:{query[:30]}",
1344
+ "platform": platform,
1345
+ "source_id": "",
1346
+ "canonical_url": "",
1347
+ "author": "",
1348
+ "author_followers": 0,
1349
+ "content_snippet": f"Search '{query}' on {platform}",
1350
+ "venture": venture or "unknown",
1351
+ "classification": "reply",
1352
+ "confidence": 0.0,
1353
+ "rationale": f"Manual check needed -- {platform} cannot be scanned server-side",
1354
+ "manual_check_needed": True,
1355
+ "first_seen": datetime.now(timezone.utc).isoformat(),
1356
+ "status": "manual_check_needed",
1357
+ })
1358
+ return targets[:limit]
1359
+
1360
+
1361
+ # -----------------------------------------------------------------------
1362
+ # JSON extraction helper
1363
+ # -----------------------------------------------------------------------
1364
+
1365
+ def _extract_json_array(text: str) -> list:
1366
+ """Best-effort extraction of a JSON array from LLM response text."""
1367
+ # Try the whole text first
1368
+ try:
1369
+ parsed = json.loads(text)
1370
+ if isinstance(parsed, list):
1371
+ return parsed
1372
+ if isinstance(parsed, dict):
1373
+ return [parsed]
1374
+ except (json.JSONDecodeError, ValueError):
1375
+ pass
1376
+ # Try to find [...] in the text
1377
+ start = text.find("[")
1378
+ if start != -1:
1379
+ depth = 0
1380
+ for i in range(start, len(text)):
1381
+ if text[i] == "[":
1382
+ depth += 1
1383
+ elif text[i] == "]":
1384
+ depth -= 1
1385
+ if depth == 0:
1386
+ try:
1387
+ return json.loads(text[start:i + 1])
1388
+ except (json.JSONDecodeError, ValueError):
1389
+ break
1390
+ return []
1391
+
1392
+
1393
+ # -----------------------------------------------------------------------
1394
+ # Public API
1395
+ # -----------------------------------------------------------------------
1396
+
1397
+ def scan_targets(
1398
+ platforms: List[str],
1399
+ ventures: Optional[List[str]] = None,
1400
+ keywords: Optional[List[str]] = None,
1401
+ limit: int = 10,
1402
+ ) -> List[Dict]:
1403
+ """Discover engagement opportunities across platforms.
1404
+
1405
+ Args:
1406
+ platforms: List of platform names to scan (x, hn, devto, reddit, namepros).
1407
+ ventures: Filter to specific ventures. None = all.
1408
+ keywords: Extra keywords beyond venture topics.
1409
+ limit: Max targets per platform.
1410
+
1411
+ Returns:
1412
+ List of target dicts with fingerprint, classification, and routing.
1413
+ """
1414
+ scan_config = _load_config()
1415
+ known_fps = _load_known_fingerprints()
1416
+
1417
+ # Use config scan_limit as default if limit not explicitly overridden
1418
+ effective_limit = limit or scan_config.get("scan_limit", 10)
1419
+
1420
+ # Build query list from venture topics + extra keywords
1421
+ queries: List[str] = []
1422
+ active_ventures = ventures or list(VENTURE_CONFIG.keys())
1423
+ for v in active_ventures:
1424
+ vc = VENTURE_CONFIG.get(v)
1425
+ if vc:
1426
+ queries.extend(vc["topics"])
1427
+ if keywords:
1428
+ queries.extend(keywords)
1429
+
1430
+ # Deduplicate queries
1431
+ seen_q: set = set()
1432
+ unique_queries: List[str] = []
1433
+ for q in queries:
1434
+ q_lower = q.lower()
1435
+ if q_lower not in seen_q:
1436
+ seen_q.add(q_lower)
1437
+ unique_queries.append(q)
1438
+
1439
+ all_targets: List[Dict] = []
1440
+ platform_configs = scan_config.get("platforms", {})
1441
+
1442
+ for platform in platforms:
1443
+ platform = platform.strip().lower()
1444
+
1445
+ # Check if platform is enabled in config
1446
+ plat_cfg = platform_configs.get(platform, {})
1447
+ if not plat_cfg.get("enabled", True):
1448
+ logger.info("Platform '%s' is disabled in config, skipping", platform)
1449
+ continue
1450
+
1451
+ try:
1452
+ if platform == "x":
1453
+ targets = _scan_x(unique_queries, effective_limit, known_fps, config=scan_config)
1454
+ elif platform == "hn":
1455
+ targets = _scan_hn(unique_queries, effective_limit, known_fps)
1456
+ elif platform == "devto":
1457
+ targets = _scan_devto(unique_queries, effective_limit, known_fps)
1458
+ elif platform == "reddit":
1459
+ targets = _scan_reddit(unique_queries, effective_limit, known_fps, config=scan_config)
1460
+ elif platform == "github":
1461
+ targets = _scan_github(unique_queries, effective_limit, known_fps, config=scan_config)
1462
+ targets.extend(_monitor_own_repos(known_fps))
1463
+ elif platform == "namepros":
1464
+ targets = _manual_check_targets(platform, unique_queries, effective_limit)
1465
+ else:
1466
+ logger.warning("Unknown platform: %s", platform)
1467
+ continue
1468
+
1469
+ # Filter by venture if specified
1470
+ if ventures:
1471
+ targets = [t for t in targets if t.get("venture") in ventures or t.get("error")]
1472
+
1473
+ all_targets.extend(targets)
1474
+ except Exception as e:
1475
+ logger.error("Platform scan error (%s): %s", platform, e)
1476
+ all_targets.append({"error": f"Scan failed for {platform}: {e}", "platform": platform})
1477
+
1478
+ # Persist new non-error targets
1479
+ for t in all_targets:
1480
+ if not t.get("error") and not t.get("manual_check_needed"):
1481
+ _append_target(t)
1482
+
1483
+ return all_targets
1484
+
1485
+
1486
+ def process_targets(
1487
+ targets: List[Dict],
1488
+ draft_replies: bool = False,
1489
+ create_ledger: bool = False,
1490
+ ) -> Dict[str, Any]:
1491
+ """Process discovered targets: draft social replies and/or create ledger items.
1492
+
1493
+ Args:
1494
+ targets: List of target dicts from scan_targets.
1495
+ draft_replies: If True, auto-draft social posts for "reply" targets.
1496
+ create_ledger: If True, return ledger item dicts for "strategic" targets.
1497
+
1498
+ Returns:
1499
+ Dict with drafted and ledger_items lists.
1500
+ """
1501
+ result: Dict[str, Any] = {"drafted": [], "ledger_items": []}
1502
+
1503
+ for target in targets:
1504
+ if target.get("error") or target.get("manual_check_needed"):
1505
+ continue
1506
+
1507
+ classification = target.get("classification", "reply")
1508
+
1509
+ if draft_replies and classification in ("reply", "both"):
1510
+ try:
1511
+ from ai.social import save_draft
1512
+ venture = target.get("venture", "delimit")
1513
+ url = target.get("canonical_url", "")
1514
+ snippet = target.get("content_snippet", "")
1515
+ author = target.get("author", "")
1516
+
1517
+ draft_text = (
1518
+ f"[DRAFT - needs human writing] "
1519
+ f"Engagement opportunity for {venture}: "
1520
+ f"{author} posted about {snippet[:100]}... "
1521
+ f"URL: {url}"
1522
+ )
1523
+
1524
+ # Determine platform and account
1525
+ platform = target.get("platform", "x")
1526
+ if platform == "x":
1527
+ social_platform = "twitter"
1528
+ reply_to = target.get("source_id", "")
1529
+ else:
1530
+ social_platform = "twitter" # Drafts go to Twitter by default
1531
+ reply_to = ""
1532
+
1533
+ config = VENTURE_CONFIG.get(venture, {})
1534
+ account = config.get("owned_accounts", ["delimit_ai"])[0]
1535
+
1536
+ entry = save_draft(
1537
+ draft_text,
1538
+ platform=social_platform,
1539
+ account=account,
1540
+ reply_to_id=reply_to,
1541
+ context=f"Social target: {target.get('rationale', '')}",
1542
+ )
1543
+ result["drafted"].append({
1544
+ "draft_id": entry.get("draft_id"),
1545
+ "fingerprint": target.get("fingerprint"),
1546
+ "venture": venture,
1547
+ })
1548
+ except Exception as e:
1549
+ logger.error("Failed to draft reply for %s: %s", target.get("fingerprint"), e)
1550
+
1551
+ if create_ledger and classification in ("strategic", "both"):
1552
+ venture = target.get("venture", "delimit")
1553
+ ledger_item = {
1554
+ "title": f"[{venture.upper()}] Engage: {target.get('author', 'unknown')} on {target.get('platform', '?')}",
1555
+ "description": (
1556
+ f"Source: {target.get('canonical_url', 'N/A')}\n"
1557
+ f"Author: {target.get('author', 'unknown')} ({target.get('author_followers', 0)} followers)\n"
1558
+ f"Snippet: {target.get('content_snippet', '')[:200]}\n"
1559
+ f"Rationale: {target.get('rationale', '')}"
1560
+ ),
1561
+ "priority": VENTURE_CONFIG.get(venture, {}).get("priority", "P1"),
1562
+ "tags": [venture, "social-target", target.get("platform", "")],
1563
+ }
1564
+ result["ledger_items"].append(ledger_item)
1565
+
1566
+ return result
1567
+
1568
+
1569
+ def list_targets(limit: int = 20) -> Dict[str, Any]:
1570
+ """List recent targets from the JSONL store.
1571
+
1572
+ Args:
1573
+ limit: Max targets to return.
1574
+
1575
+ Returns:
1576
+ Dict with targets list and count.
1577
+ """
1578
+ if not TARGETS_FILE.exists():
1579
+ return {"targets": [], "count": 0}
1580
+
1581
+ targets: List[Dict] = []
1582
+ lines = TARGETS_FILE.read_text().splitlines()
1583
+ for line in reversed(lines):
1584
+ if not line.strip():
1585
+ continue
1586
+ try:
1587
+ entry = json.loads(line)
1588
+ targets.append(entry)
1589
+ if len(targets) >= limit:
1590
+ break
1591
+ except (json.JSONDecodeError, ValueError):
1592
+ continue
1593
+
1594
+ return {"targets": targets, "count": len(targets), "total_stored": len(lines)}
1595
+
1596
+
1597
+ def get_stats() -> Dict[str, Any]:
1598
+ """Get aggregate stats on discovered targets.
1599
+
1600
+ Returns:
1601
+ Dict with counts by platform, venture, classification, and status.
1602
+ """
1603
+ if not TARGETS_FILE.exists():
1604
+ return {"total": 0, "by_platform": {}, "by_venture": {}, "by_classification": {}, "by_status": {}}
1605
+
1606
+ by_platform: Dict[str, int] = {}
1607
+ by_venture: Dict[str, int] = {}
1608
+ by_classification: Dict[str, int] = {}
1609
+ by_status: Dict[str, int] = {}
1610
+ total = 0
1611
+
1612
+ for line in TARGETS_FILE.read_text().splitlines():
1613
+ if not line.strip():
1614
+ continue
1615
+ try:
1616
+ entry = json.loads(line)
1617
+ total += 1
1618
+ p = entry.get("platform", "unknown")
1619
+ v = entry.get("venture", "unknown")
1620
+ c = entry.get("classification", "unknown")
1621
+ s = entry.get("status", "unknown")
1622
+ by_platform[p] = by_platform.get(p, 0) + 1
1623
+ by_venture[v] = by_venture.get(v, 0) + 1
1624
+ by_classification[c] = by_classification.get(c, 0) + 1
1625
+ by_status[s] = by_status.get(s, 0) + 1
1626
+ except (json.JSONDecodeError, ValueError):
1627
+ continue
1628
+
1629
+ return {
1630
+ "total": total,
1631
+ "by_platform": by_platform,
1632
+ "by_venture": by_venture,
1633
+ "by_classification": by_classification,
1634
+ "by_status": by_status,
1635
+ }