delimit-cli 3.14.43 → 3.14.44
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +5 -1
- package/gateway/ai/founding_users.py +0 -163
- package/gateway/ai/inbox_daemon.py +0 -684
- package/gateway/ai/social.py +0 -666
- package/gateway/ai/social_target.py +0 -1583
|
@@ -1,1583 +0,0 @@
|
|
|
1
|
-
"""Social targeting engine -- discover engagement opportunities across platforms.
|
|
2
|
-
|
|
3
|
-
Scans X (via xAI Responses API), Reddit (via RapidAPI Reddit34), Hacker News
|
|
4
|
-
(Algolia API), and Dev.to for posts where Jamsons ventures can genuinely engage.
|
|
5
|
-
NamePros is flagged as manual_check_needed (no API).
|
|
6
|
-
|
|
7
|
-
Targets are deduplicated via fingerprint and stored in append-only JSONL.
|
|
8
|
-
Platform configuration is user-configurable via ~/.delimit/social_target_config.json.
|
|
9
|
-
"""
|
|
10
|
-
|
|
11
|
-
import copy
|
|
12
|
-
import json
|
|
13
|
-
import logging
|
|
14
|
-
import os
|
|
15
|
-
import subprocess
|
|
16
|
-
import urllib.request
|
|
17
|
-
import urllib.error
|
|
18
|
-
import urllib.parse
|
|
19
|
-
from datetime import datetime, timezone
|
|
20
|
-
from pathlib import Path
|
|
21
|
-
from typing import Any, Dict, List, Optional
|
|
22
|
-
|
|
23
|
-
logger = logging.getLogger("delimit.ai.social_target")
|
|
24
|
-
|
|
25
|
-
TARGETS_FILE = Path.home() / ".delimit" / "social_targets.jsonl"
|
|
26
|
-
SOCIAL_TARGET_CONFIG = Path.home() / ".delimit" / "social_target_config.json"
|
|
27
|
-
|
|
28
|
-
# -----------------------------------------------------------------------
|
|
29
|
-
# User-configurable platform config
|
|
30
|
-
# -----------------------------------------------------------------------
|
|
31
|
-
|
|
32
|
-
DEFAULT_CONFIG: Dict[str, Any] = {
|
|
33
|
-
"platforms": {
|
|
34
|
-
"x": {"enabled": True, "provider": "twttr241"},
|
|
35
|
-
"reddit": {"enabled": True, "provider": "proxy"},
|
|
36
|
-
"github": {"enabled": True, "provider": "gh_cli"},
|
|
37
|
-
"hn": {"enabled": True, "provider": "algolia"},
|
|
38
|
-
"devto": {"enabled": True, "provider": "public_api"},
|
|
39
|
-
"namepros": {"enabled": False, "provider": "manual"},
|
|
40
|
-
},
|
|
41
|
-
"subreddits": {},
|
|
42
|
-
"github_queries": {},
|
|
43
|
-
"scan_limit": 10,
|
|
44
|
-
"min_engagement": {"score": 1, "comments": 2},
|
|
45
|
-
}
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
def _deep_merge(base: dict, override: dict) -> dict:
|
|
49
|
-
"""Recursively merge override into base. Override values win."""
|
|
50
|
-
result = copy.deepcopy(base)
|
|
51
|
-
for key, value in override.items():
|
|
52
|
-
if key in result and isinstance(result[key], dict) and isinstance(value, dict):
|
|
53
|
-
result[key] = _deep_merge(result[key], value)
|
|
54
|
-
else:
|
|
55
|
-
result[key] = copy.deepcopy(value)
|
|
56
|
-
return result
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
def _load_config() -> Dict[str, Any]:
|
|
60
|
-
"""Load social target config from disk, merging with defaults.
|
|
61
|
-
|
|
62
|
-
- Loads from SOCIAL_TARGET_CONFIG if it exists
|
|
63
|
-
- Falls back to DEFAULT_CONFIG
|
|
64
|
-
- Merges user overrides with defaults (user config wins)
|
|
65
|
-
- Auto-detects available API keys and disables platforms with no access
|
|
66
|
-
"""
|
|
67
|
-
config = copy.deepcopy(DEFAULT_CONFIG)
|
|
68
|
-
|
|
69
|
-
if SOCIAL_TARGET_CONFIG.exists():
|
|
70
|
-
try:
|
|
71
|
-
user_config = json.loads(SOCIAL_TARGET_CONFIG.read_text())
|
|
72
|
-
config = _deep_merge(config, user_config)
|
|
73
|
-
except (json.JSONDecodeError, ValueError, OSError) as e:
|
|
74
|
-
logger.warning("Failed to load social target config: %s", e)
|
|
75
|
-
|
|
76
|
-
# Auto-detect available platforms and disable those without access
|
|
77
|
-
detection = _detect_available_platforms()
|
|
78
|
-
for platform, info in detection.items():
|
|
79
|
-
if platform in config["platforms"]:
|
|
80
|
-
# Only auto-disable if no user override exists
|
|
81
|
-
if not SOCIAL_TARGET_CONFIG.exists():
|
|
82
|
-
config["platforms"][platform]["enabled"] = info["available"]
|
|
83
|
-
elif platform not in _load_user_platform_overrides():
|
|
84
|
-
config["platforms"][platform]["enabled"] = info["available"]
|
|
85
|
-
|
|
86
|
-
return config
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
def _load_user_platform_overrides() -> set:
|
|
90
|
-
"""Return the set of platform names explicitly set in user config."""
|
|
91
|
-
if not SOCIAL_TARGET_CONFIG.exists():
|
|
92
|
-
return set()
|
|
93
|
-
try:
|
|
94
|
-
user_config = json.loads(SOCIAL_TARGET_CONFIG.read_text())
|
|
95
|
-
return set(user_config.get("platforms", {}).keys())
|
|
96
|
-
except (json.JSONDecodeError, ValueError, OSError):
|
|
97
|
-
return set()
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
def _detect_available_platforms() -> Dict[str, Dict[str, Any]]:
|
|
101
|
-
"""Check which platforms have the necessary credentials/access.
|
|
102
|
-
|
|
103
|
-
Returns dict of platform -> {available: bool, provider: str, reason: str}.
|
|
104
|
-
"""
|
|
105
|
-
result: Dict[str, Dict[str, Any]] = {}
|
|
106
|
-
|
|
107
|
-
# X/Twttr241: RapidAPI key exists?
|
|
108
|
-
rapidapi_key = _get_rapidapi_key()
|
|
109
|
-
if rapidapi_key:
|
|
110
|
-
result["x"] = {"available": True, "provider": "twttr241", "reason": "RapidAPI key found"}
|
|
111
|
-
else:
|
|
112
|
-
# Fallback: xAI API key?
|
|
113
|
-
xai_key = _get_xai_api_key()
|
|
114
|
-
if xai_key:
|
|
115
|
-
result["x"] = {"available": True, "provider": "xai", "reason": "xAI API key found (fallback)"}
|
|
116
|
-
else:
|
|
117
|
-
result["x"] = {"available": False, "provider": "none", "reason": "No RapidAPI or xAI API key"}
|
|
118
|
-
|
|
119
|
-
# Reddit: proxy or RapidAPI
|
|
120
|
-
proxy_url = os.environ.get("DELIMIT_REDDIT_PROXY", "")
|
|
121
|
-
if proxy_url:
|
|
122
|
-
result["reddit"] = {"available": True, "provider": "proxy", "reason": "DELIMIT_REDDIT_PROXY env set"}
|
|
123
|
-
elif _test_reddit_proxy():
|
|
124
|
-
result["reddit"] = {"available": True, "provider": "proxy", "reason": "Local proxy responding"}
|
|
125
|
-
elif rapidapi_key:
|
|
126
|
-
result["reddit"] = {"available": True, "provider": "rapidapi", "reason": "RapidAPI key found (fallback)"}
|
|
127
|
-
else:
|
|
128
|
-
result["reddit"] = {"available": False, "provider": "none", "reason": "No proxy or RapidAPI key"}
|
|
129
|
-
|
|
130
|
-
# GitHub: gh auth status
|
|
131
|
-
try:
|
|
132
|
-
proc = subprocess.run(
|
|
133
|
-
["gh", "auth", "status"],
|
|
134
|
-
capture_output=True, text=True, timeout=10,
|
|
135
|
-
)
|
|
136
|
-
if proc.returncode == 0:
|
|
137
|
-
result["github"] = {"available": True, "provider": "gh_cli", "reason": "gh authenticated"}
|
|
138
|
-
else:
|
|
139
|
-
result["github"] = {"available": False, "provider": "gh_cli", "reason": "gh not authenticated"}
|
|
140
|
-
except (FileNotFoundError, subprocess.TimeoutExpired):
|
|
141
|
-
result["github"] = {"available": False, "provider": "gh_cli", "reason": "gh CLI not found"}
|
|
142
|
-
|
|
143
|
-
# HN: always available (public API, no auth)
|
|
144
|
-
result["hn"] = {"available": True, "provider": "algolia", "reason": "Public API, no auth needed"}
|
|
145
|
-
|
|
146
|
-
# Dev.to: always available (public API, no auth)
|
|
147
|
-
result["devto"] = {"available": True, "provider": "public_api", "reason": "Public API, no auth needed"}
|
|
148
|
-
|
|
149
|
-
# NamePros: manual only
|
|
150
|
-
result["namepros"] = {"available": False, "provider": "manual", "reason": "No API, manual check only"}
|
|
151
|
-
|
|
152
|
-
return result
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
def _save_config(config: Dict[str, Any]) -> None:
|
|
156
|
-
"""Write config to disk."""
|
|
157
|
-
SOCIAL_TARGET_CONFIG.parent.mkdir(parents=True, exist_ok=True)
|
|
158
|
-
SOCIAL_TARGET_CONFIG.write_text(json.dumps(config, indent=2) + "\n")
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
def get_config_status() -> Dict[str, Any]:
|
|
162
|
-
"""Return current config and platform availability for the MCP tool."""
|
|
163
|
-
config = _load_config()
|
|
164
|
-
detection = _detect_available_platforms()
|
|
165
|
-
return {
|
|
166
|
-
"config": config,
|
|
167
|
-
"platform_availability": detection,
|
|
168
|
-
"config_file": str(SOCIAL_TARGET_CONFIG),
|
|
169
|
-
"config_file_exists": SOCIAL_TARGET_CONFIG.exists(),
|
|
170
|
-
}
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
def update_platform_config(
|
|
174
|
-
platform: str,
|
|
175
|
-
enabled: Optional[bool] = None,
|
|
176
|
-
provider: Optional[str] = None,
|
|
177
|
-
) -> Dict[str, Any]:
|
|
178
|
-
"""Update a single platform's config and save."""
|
|
179
|
-
config = _load_config()
|
|
180
|
-
if platform not in config["platforms"]:
|
|
181
|
-
config["platforms"][platform] = {"enabled": True, "provider": ""}
|
|
182
|
-
|
|
183
|
-
if enabled is not None:
|
|
184
|
-
config["platforms"][platform]["enabled"] = enabled
|
|
185
|
-
if provider:
|
|
186
|
-
config["platforms"][platform]["provider"] = provider
|
|
187
|
-
|
|
188
|
-
_save_config(config)
|
|
189
|
-
return {"updated": True, "platform": platform, "config": config["platforms"][platform]}
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
def add_subreddits(venture: str, subreddits: List[str]) -> Dict[str, Any]:
|
|
193
|
-
"""Add subreddits to scan for a venture."""
|
|
194
|
-
config = _load_config()
|
|
195
|
-
existing = config.get("subreddits", {}).get(venture, [])
|
|
196
|
-
new_subs = [s for s in subreddits if s not in existing]
|
|
197
|
-
if venture not in config.get("subreddits", {}):
|
|
198
|
-
config["subreddits"][venture] = []
|
|
199
|
-
config["subreddits"][venture].extend(new_subs)
|
|
200
|
-
_save_config(config)
|
|
201
|
-
return {"venture": venture, "added": new_subs, "total": config["subreddits"][venture]}
|
|
202
|
-
|
|
203
|
-
# -----------------------------------------------------------------------
|
|
204
|
-
# Per-venture routing config (loaded from ~/.delimit/social_target_ventures.json)
|
|
205
|
-
# -----------------------------------------------------------------------
|
|
206
|
-
|
|
207
|
-
_VENTURE_CONFIG_FILE = Path.home() / ".delimit" / "social_target_ventures.json"
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
def _load_venture_config() -> Dict[str, Any]:
|
|
211
|
-
"""Load venture config from ~/.delimit/social_target_ventures.json."""
|
|
212
|
-
if _VENTURE_CONFIG_FILE.exists():
|
|
213
|
-
try:
|
|
214
|
-
return json.loads(_VENTURE_CONFIG_FILE.read_text())
|
|
215
|
-
except (json.JSONDecodeError, OSError) as e:
|
|
216
|
-
logger.warning("Failed to load venture config: %s", e)
|
|
217
|
-
return {}
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
def _load_internal_users() -> set:
|
|
221
|
-
"""Load internal usernames from venture config file."""
|
|
222
|
-
config = _load_venture_config()
|
|
223
|
-
return set(config.get("internal_users", []))
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
def _load_github_queries() -> Dict[str, List[str]]:
|
|
227
|
-
"""Load GitHub queries from venture config file."""
|
|
228
|
-
config = _load_venture_config()
|
|
229
|
-
return config.get("github_queries", {})
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
def _load_own_repos() -> List[str]:
|
|
233
|
-
"""Load own repo list from venture config file."""
|
|
234
|
-
config = _load_venture_config()
|
|
235
|
-
return config.get("own_repos", [])
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
VENTURE_CONFIG = _load_venture_config().get("ventures", {})
|
|
239
|
-
VENTURE_GITHUB_QUERIES = _load_github_queries()
|
|
240
|
-
OWN_REPOS = _load_own_repos()
|
|
241
|
-
INTERNAL_USERS = _load_internal_users()
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
# -----------------------------------------------------------------------
|
|
245
|
-
# JSONL persistence helpers
|
|
246
|
-
# -----------------------------------------------------------------------
|
|
247
|
-
|
|
248
|
-
def _load_known_fingerprints() -> set:
|
|
249
|
-
"""Load all fingerprints from the targets file for dedup."""
|
|
250
|
-
fps: set = set()
|
|
251
|
-
if not TARGETS_FILE.exists():
|
|
252
|
-
return fps
|
|
253
|
-
try:
|
|
254
|
-
for line in TARGETS_FILE.read_text().splitlines():
|
|
255
|
-
if not line.strip():
|
|
256
|
-
continue
|
|
257
|
-
try:
|
|
258
|
-
entry = json.loads(line)
|
|
259
|
-
fp = entry.get("fingerprint", "")
|
|
260
|
-
if fp:
|
|
261
|
-
fps.add(fp)
|
|
262
|
-
except (json.JSONDecodeError, ValueError):
|
|
263
|
-
continue
|
|
264
|
-
except Exception:
|
|
265
|
-
pass
|
|
266
|
-
return fps
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
def _append_target(target: Dict[str, Any]) -> None:
|
|
270
|
-
"""Append a single target to the JSONL file."""
|
|
271
|
-
TARGETS_FILE.parent.mkdir(parents=True, exist_ok=True)
|
|
272
|
-
with open(TARGETS_FILE, "a") as f:
|
|
273
|
-
f.write(json.dumps(target) + "\n")
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
# -----------------------------------------------------------------------
|
|
277
|
-
# Venture routing
|
|
278
|
-
# -----------------------------------------------------------------------
|
|
279
|
-
|
|
280
|
-
def _route_venture(text: str) -> tuple:
|
|
281
|
-
"""Match text against venture topics. Returns (venture, confidence, rationale)."""
|
|
282
|
-
text_lower = text.lower()
|
|
283
|
-
best_venture = None
|
|
284
|
-
best_score = 0
|
|
285
|
-
best_matches: List[str] = []
|
|
286
|
-
|
|
287
|
-
for venture, config in VENTURE_CONFIG.items():
|
|
288
|
-
# Check exclude terms first
|
|
289
|
-
if any(ex.lower() in text_lower for ex in config.get("exclude_terms", [])):
|
|
290
|
-
continue
|
|
291
|
-
matches = [t for t in config["topics"] if t.lower() in text_lower]
|
|
292
|
-
score = len(matches)
|
|
293
|
-
if score > best_score:
|
|
294
|
-
best_score = score
|
|
295
|
-
best_venture = venture
|
|
296
|
-
best_matches = matches
|
|
297
|
-
|
|
298
|
-
if not best_venture:
|
|
299
|
-
return None, 0.0, "No venture topic match"
|
|
300
|
-
|
|
301
|
-
confidence = min(0.95, 0.5 + (best_score * 0.15))
|
|
302
|
-
rationale = f"Matched topics: {', '.join(best_matches[:3])}"
|
|
303
|
-
return best_venture, confidence, rationale
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
def _classify_target(text: str, author_followers: int = 0) -> str:
|
|
307
|
-
"""Classify a target as reply, strategic, or both."""
|
|
308
|
-
is_question = any(q in text.lower() for q in ["?", "how do", "anyone", "looking for", "recommendations"])
|
|
309
|
-
high_reach = author_followers > 5000
|
|
310
|
-
|
|
311
|
-
if is_question and high_reach:
|
|
312
|
-
return "both"
|
|
313
|
-
if high_reach:
|
|
314
|
-
return "strategic"
|
|
315
|
-
if is_question:
|
|
316
|
-
return "reply"
|
|
317
|
-
return "reply"
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
# -----------------------------------------------------------------------
|
|
321
|
-
# xAI API key resolution
|
|
322
|
-
# -----------------------------------------------------------------------
|
|
323
|
-
|
|
324
|
-
def _get_xai_api_key() -> str:
|
|
325
|
-
"""Resolve xAI API key from env or .mcp.json."""
|
|
326
|
-
key = os.environ.get("XAI_API_KEY", "")
|
|
327
|
-
if key:
|
|
328
|
-
return key
|
|
329
|
-
# Try .mcp.json
|
|
330
|
-
mcp_path = Path.home() / ".mcp.json"
|
|
331
|
-
if not mcp_path.exists():
|
|
332
|
-
mcp_path = Path("/root/.mcp.json")
|
|
333
|
-
if mcp_path.exists():
|
|
334
|
-
try:
|
|
335
|
-
cfg = json.loads(mcp_path.read_text())
|
|
336
|
-
key = (cfg.get("mcpServers", {})
|
|
337
|
-
.get("xai", {})
|
|
338
|
-
.get("env", {})
|
|
339
|
-
.get("XAI_API_KEY", ""))
|
|
340
|
-
if key:
|
|
341
|
-
return key
|
|
342
|
-
# Also check delimit server env
|
|
343
|
-
key = (cfg.get("mcpServers", {})
|
|
344
|
-
.get("delimit", {})
|
|
345
|
-
.get("env", {})
|
|
346
|
-
.get("XAI_API_KEY", ""))
|
|
347
|
-
except Exception:
|
|
348
|
-
pass
|
|
349
|
-
return key
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
# -----------------------------------------------------------------------
|
|
353
|
-
# Platform scanners
|
|
354
|
-
# -----------------------------------------------------------------------
|
|
355
|
-
|
|
356
|
-
def _scan_x_twttr(queries: List[str], limit: int, known_fps: set) -> List[Dict]:
|
|
357
|
-
"""Scan X/Twitter via RapidAPI Twttr241 (free, structured data)."""
|
|
358
|
-
api_key = _get_rapidapi_key()
|
|
359
|
-
if not api_key:
|
|
360
|
-
return []
|
|
361
|
-
|
|
362
|
-
targets: List[Dict] = []
|
|
363
|
-
combined_query = " OR ".join(queries[:5])
|
|
364
|
-
encoded_q = urllib.parse.quote(combined_query)
|
|
365
|
-
url = f"https://twitter241.p.rapidapi.com/search-v2?query={encoded_q}&type=Latest&count={limit}"
|
|
366
|
-
|
|
367
|
-
req = urllib.request.Request(
|
|
368
|
-
url,
|
|
369
|
-
headers={
|
|
370
|
-
"X-RapidAPI-Key": api_key,
|
|
371
|
-
"X-RapidAPI-Host": "twitter241.p.rapidapi.com",
|
|
372
|
-
"User-Agent": "Delimit/3.11.0",
|
|
373
|
-
},
|
|
374
|
-
)
|
|
375
|
-
|
|
376
|
-
try:
|
|
377
|
-
with urllib.request.urlopen(req, timeout=20) as resp:
|
|
378
|
-
result = json.loads(resp.read())
|
|
379
|
-
|
|
380
|
-
# Navigate: result.timeline.instructions[].entries[].content.itemContent.tweet_results.result
|
|
381
|
-
instructions = (
|
|
382
|
-
result.get("result", {})
|
|
383
|
-
.get("timeline", {})
|
|
384
|
-
.get("instructions", [])
|
|
385
|
-
)
|
|
386
|
-
|
|
387
|
-
for instruction in instructions:
|
|
388
|
-
for entry in instruction.get("entries", []):
|
|
389
|
-
tweet_result = (
|
|
390
|
-
entry.get("content", {})
|
|
391
|
-
.get("itemContent", {})
|
|
392
|
-
.get("tweet_results", {})
|
|
393
|
-
.get("result", {})
|
|
394
|
-
)
|
|
395
|
-
if not tweet_result:
|
|
396
|
-
continue
|
|
397
|
-
|
|
398
|
-
legacy = tweet_result.get("legacy", {})
|
|
399
|
-
core = tweet_result.get("core", {})
|
|
400
|
-
user_legacy = (
|
|
401
|
-
core.get("user_results", {})
|
|
402
|
-
.get("result", {})
|
|
403
|
-
.get("legacy", {})
|
|
404
|
-
)
|
|
405
|
-
|
|
406
|
-
tweet_id = legacy.get("id_str", "")
|
|
407
|
-
screen_name = user_legacy.get("screen_name", "")
|
|
408
|
-
followers = user_legacy.get("followers_count", 0) or 0
|
|
409
|
-
full_text = legacy.get("full_text", "")
|
|
410
|
-
likes = legacy.get("favorite_count", 0) or 0
|
|
411
|
-
retweets = legacy.get("retweet_count", 0) or 0
|
|
412
|
-
|
|
413
|
-
if not tweet_id or not full_text:
|
|
414
|
-
continue
|
|
415
|
-
|
|
416
|
-
fp = f"x:{tweet_id}"
|
|
417
|
-
if fp in known_fps:
|
|
418
|
-
continue
|
|
419
|
-
|
|
420
|
-
venture, confidence, rationale = _route_venture(full_text)
|
|
421
|
-
if not venture:
|
|
422
|
-
continue
|
|
423
|
-
|
|
424
|
-
author = f"@{screen_name}" if screen_name else ""
|
|
425
|
-
target = {
|
|
426
|
-
"fingerprint": fp,
|
|
427
|
-
"platform": "x",
|
|
428
|
-
"source_id": tweet_id,
|
|
429
|
-
"canonical_url": f"https://x.com/{screen_name}/status/{tweet_id}" if screen_name else f"https://x.com/i/status/{tweet_id}",
|
|
430
|
-
"author": author,
|
|
431
|
-
"author_followers": followers,
|
|
432
|
-
"content_snippet": full_text[:300],
|
|
433
|
-
"venture": venture,
|
|
434
|
-
"classification": _classify_target(full_text, followers),
|
|
435
|
-
"confidence": confidence,
|
|
436
|
-
"rationale": rationale,
|
|
437
|
-
"manual_check_needed": False,
|
|
438
|
-
"first_seen": datetime.now(timezone.utc).isoformat(),
|
|
439
|
-
"status": "new",
|
|
440
|
-
}
|
|
441
|
-
targets.append(target)
|
|
442
|
-
known_fps.add(fp)
|
|
443
|
-
|
|
444
|
-
if len(targets) >= limit:
|
|
445
|
-
break
|
|
446
|
-
if len(targets) >= limit:
|
|
447
|
-
break
|
|
448
|
-
|
|
449
|
-
except Exception as e:
|
|
450
|
-
logger.warning("Twttr241 scan failed: %s", e)
|
|
451
|
-
return []
|
|
452
|
-
|
|
453
|
-
return targets
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
def _scan_x(queries: List[str], limit: int, known_fps: set, config: Optional[Dict] = None) -> List[Dict]:
|
|
457
|
-
"""Scan X/Twitter -- uses config to determine provider, falls back to xAI x_search."""
|
|
458
|
-
platform_config = (config or {}).get("platforms", {}).get("x", {})
|
|
459
|
-
provider = platform_config.get("provider", "twttr241")
|
|
460
|
-
|
|
461
|
-
# Try Twttr241 first if configured (or default)
|
|
462
|
-
if provider in ("twttr241", ""):
|
|
463
|
-
targets = _scan_x_twttr(queries, limit, known_fps)
|
|
464
|
-
if targets:
|
|
465
|
-
return targets
|
|
466
|
-
|
|
467
|
-
# Fallback or explicit xAI provider: xAI Responses API with x_search
|
|
468
|
-
api_key = _get_xai_api_key()
|
|
469
|
-
if not api_key:
|
|
470
|
-
return [{"error": "No X scanner available (Twttr241 failed, XAI_API_KEY not configured)", "platform": "x"}]
|
|
471
|
-
|
|
472
|
-
targets: List[Dict] = []
|
|
473
|
-
# Batch queries to avoid too many API calls
|
|
474
|
-
combined_query = " OR ".join(f'"{q}"' for q in queries[:5])
|
|
475
|
-
prompt = (
|
|
476
|
-
f"Search X/Twitter for recent posts about: {combined_query}. "
|
|
477
|
-
f"Find up to {limit} posts from the last 24 hours that are asking questions, "
|
|
478
|
-
f"sharing problems, or discussing these topics. "
|
|
479
|
-
f"For each post, return the tweet ID, author handle, author follower count, "
|
|
480
|
-
f"and a snippet of the content. Format as JSON array."
|
|
481
|
-
)
|
|
482
|
-
|
|
483
|
-
data = json.dumps({
|
|
484
|
-
"model": "grok-4-0709",
|
|
485
|
-
"tools": [{"type": "x_search"}],
|
|
486
|
-
"messages": [{"role": "user", "content": prompt}],
|
|
487
|
-
"temperature": 0.3,
|
|
488
|
-
"max_tokens": 4096,
|
|
489
|
-
}).encode()
|
|
490
|
-
|
|
491
|
-
req = urllib.request.Request(
|
|
492
|
-
"https://api.x.ai/v1/responses",
|
|
493
|
-
data=data,
|
|
494
|
-
headers={
|
|
495
|
-
"Authorization": f"Bearer {api_key}",
|
|
496
|
-
"Content-Type": "application/json",
|
|
497
|
-
"User-Agent": "Delimit/3.11.0",
|
|
498
|
-
},
|
|
499
|
-
method="POST",
|
|
500
|
-
)
|
|
501
|
-
|
|
502
|
-
try:
|
|
503
|
-
with urllib.request.urlopen(req, timeout=30) as resp:
|
|
504
|
-
result = json.loads(resp.read())
|
|
505
|
-
|
|
506
|
-
# Parse the response -- xAI Responses API returns output array
|
|
507
|
-
response_text = ""
|
|
508
|
-
if isinstance(result, dict):
|
|
509
|
-
# Responses API format: result has "output" array
|
|
510
|
-
for item in result.get("output", []):
|
|
511
|
-
if item.get("type") == "message":
|
|
512
|
-
for content in item.get("content", []):
|
|
513
|
-
if content.get("type") == "output_text":
|
|
514
|
-
response_text = content.get("text", "")
|
|
515
|
-
break
|
|
516
|
-
# Fallback: chat completions format
|
|
517
|
-
if not response_text:
|
|
518
|
-
for choice in result.get("choices", []):
|
|
519
|
-
msg = choice.get("message", {})
|
|
520
|
-
response_text = msg.get("content", "")
|
|
521
|
-
if response_text:
|
|
522
|
-
break
|
|
523
|
-
|
|
524
|
-
if not response_text:
|
|
525
|
-
logger.warning("xAI returned empty response for social targeting")
|
|
526
|
-
return targets
|
|
527
|
-
|
|
528
|
-
# Try to extract JSON from response
|
|
529
|
-
parsed_tweets = _extract_json_array(response_text)
|
|
530
|
-
for tweet in parsed_tweets[:limit]:
|
|
531
|
-
tweet_id = str(tweet.get("id", tweet.get("tweet_id", "")))
|
|
532
|
-
author = tweet.get("author", tweet.get("handle", tweet.get("username", "")))
|
|
533
|
-
if author and not author.startswith("@"):
|
|
534
|
-
author = f"@{author}"
|
|
535
|
-
snippet = tweet.get("content", tweet.get("text", tweet.get("snippet", "")))
|
|
536
|
-
followers = int(tweet.get("followers", tweet.get("author_followers", tweet.get("follower_count", 0))))
|
|
537
|
-
|
|
538
|
-
fp = f"x:{tweet_id}"
|
|
539
|
-
if fp in known_fps or not tweet_id:
|
|
540
|
-
continue
|
|
541
|
-
|
|
542
|
-
venture, confidence, rationale = _route_venture(snippet)
|
|
543
|
-
if not venture:
|
|
544
|
-
continue
|
|
545
|
-
|
|
546
|
-
target = {
|
|
547
|
-
"fingerprint": fp,
|
|
548
|
-
"platform": "x",
|
|
549
|
-
"source_id": tweet_id,
|
|
550
|
-
"canonical_url": f"https://x.com/{author.lstrip('@')}/status/{tweet_id}" if author else f"https://x.com/i/status/{tweet_id}",
|
|
551
|
-
"author": author,
|
|
552
|
-
"author_followers": followers,
|
|
553
|
-
"content_snippet": snippet[:300],
|
|
554
|
-
"venture": venture,
|
|
555
|
-
"classification": _classify_target(snippet, followers),
|
|
556
|
-
"confidence": confidence,
|
|
557
|
-
"rationale": rationale,
|
|
558
|
-
"manual_check_needed": False,
|
|
559
|
-
"first_seen": datetime.now(timezone.utc).isoformat(),
|
|
560
|
-
"status": "new",
|
|
561
|
-
}
|
|
562
|
-
targets.append(target)
|
|
563
|
-
known_fps.add(fp)
|
|
564
|
-
|
|
565
|
-
except urllib.error.HTTPError as e:
|
|
566
|
-
logger.error("xAI API error: %s %s", e.code, e.reason)
|
|
567
|
-
# Try to read error body for details
|
|
568
|
-
try:
|
|
569
|
-
err_body = e.read().decode()[:200]
|
|
570
|
-
logger.error("xAI error body: %s", err_body)
|
|
571
|
-
except Exception:
|
|
572
|
-
pass
|
|
573
|
-
targets.append({"error": f"xAI API error: {e.code} {e.reason}", "platform": "x"})
|
|
574
|
-
except urllib.error.URLError as e:
|
|
575
|
-
logger.error("xAI connection error: %s", e.reason)
|
|
576
|
-
targets.append({"error": f"xAI connection error: {e.reason}", "platform": "x"})
|
|
577
|
-
except Exception as e:
|
|
578
|
-
logger.error("xAI scan failed: %s", e)
|
|
579
|
-
targets.append({"error": f"xAI scan error: {e}", "platform": "x"})
|
|
580
|
-
|
|
581
|
-
return targets
|
|
582
|
-
|
|
583
|
-
|
|
584
|
-
def _scan_hn(queries: List[str], limit: int, known_fps: set) -> List[Dict]:
|
|
585
|
-
"""Scan Hacker News via Algolia API."""
|
|
586
|
-
targets: List[Dict] = []
|
|
587
|
-
|
|
588
|
-
for query in queries[:3]: # Limit query count
|
|
589
|
-
encoded_q = urllib.parse.quote(query)
|
|
590
|
-
url = f"https://hn.algolia.com/api/v1/search_by_date?tags=story&query={encoded_q}&hitsPerPage={limit}"
|
|
591
|
-
req = urllib.request.Request(
|
|
592
|
-
url,
|
|
593
|
-
headers={"User-Agent": "Delimit/3.11.0"},
|
|
594
|
-
)
|
|
595
|
-
try:
|
|
596
|
-
with urllib.request.urlopen(req, timeout=15) as resp:
|
|
597
|
-
data = json.loads(resp.read())
|
|
598
|
-
|
|
599
|
-
for hit in data.get("hits", [])[:limit]:
|
|
600
|
-
story_id = str(hit.get("objectID", ""))
|
|
601
|
-
fp = f"hn:{story_id}"
|
|
602
|
-
if fp in known_fps or not story_id:
|
|
603
|
-
continue
|
|
604
|
-
|
|
605
|
-
title = hit.get("title", "")
|
|
606
|
-
author = hit.get("author", "")
|
|
607
|
-
points = hit.get("points", 0) or 0
|
|
608
|
-
snippet = title
|
|
609
|
-
|
|
610
|
-
venture, confidence, rationale = _route_venture(title)
|
|
611
|
-
if not venture:
|
|
612
|
-
continue
|
|
613
|
-
|
|
614
|
-
target = {
|
|
615
|
-
"fingerprint": fp,
|
|
616
|
-
"platform": "hn",
|
|
617
|
-
"source_id": story_id,
|
|
618
|
-
"canonical_url": f"https://news.ycombinator.com/item?id={story_id}",
|
|
619
|
-
"author": author,
|
|
620
|
-
"author_followers": points, # Use points as proxy for reach
|
|
621
|
-
"content_snippet": snippet[:300],
|
|
622
|
-
"venture": venture,
|
|
623
|
-
"classification": _classify_target(snippet, points),
|
|
624
|
-
"confidence": confidence,
|
|
625
|
-
"rationale": rationale,
|
|
626
|
-
"manual_check_needed": False,
|
|
627
|
-
"first_seen": datetime.now(timezone.utc).isoformat(),
|
|
628
|
-
"status": "new",
|
|
629
|
-
}
|
|
630
|
-
targets.append(target)
|
|
631
|
-
known_fps.add(fp)
|
|
632
|
-
|
|
633
|
-
if len(targets) >= limit:
|
|
634
|
-
break
|
|
635
|
-
|
|
636
|
-
except Exception as e:
|
|
637
|
-
logger.error("HN scan error for query '%s': %s", query, e)
|
|
638
|
-
continue
|
|
639
|
-
|
|
640
|
-
if len(targets) >= limit:
|
|
641
|
-
break
|
|
642
|
-
|
|
643
|
-
return targets
|
|
644
|
-
|
|
645
|
-
|
|
646
|
-
def _scan_devto(queries: List[str], limit: int, known_fps: set) -> List[Dict]:
|
|
647
|
-
"""Scan Dev.to for recent articles matching venture topics."""
|
|
648
|
-
targets: List[Dict] = []
|
|
649
|
-
|
|
650
|
-
for query in queries[:3]:
|
|
651
|
-
# Dev.to API uses tag-based search
|
|
652
|
-
tag = query.lower().replace(" ", "").replace("-", "")[:20]
|
|
653
|
-
url = f"https://dev.to/api/articles?tag={urllib.parse.quote(tag)}&top=1&per_page={limit}"
|
|
654
|
-
req = urllib.request.Request(
|
|
655
|
-
url,
|
|
656
|
-
headers={"User-Agent": "Delimit/3.11.0"},
|
|
657
|
-
)
|
|
658
|
-
try:
|
|
659
|
-
with urllib.request.urlopen(req, timeout=15) as resp:
|
|
660
|
-
articles = json.loads(resp.read())
|
|
661
|
-
|
|
662
|
-
if not isinstance(articles, list):
|
|
663
|
-
continue
|
|
664
|
-
|
|
665
|
-
for article in articles[:limit]:
|
|
666
|
-
article_id = str(article.get("id", ""))
|
|
667
|
-
fp = f"devto:{article_id}"
|
|
668
|
-
if fp in known_fps or not article_id:
|
|
669
|
-
continue
|
|
670
|
-
|
|
671
|
-
title = article.get("title", "")
|
|
672
|
-
description = article.get("description", "")
|
|
673
|
-
author = article.get("user", {}).get("username", "")
|
|
674
|
-
reactions = article.get("positive_reactions_count", 0) or 0
|
|
675
|
-
snippet = f"{title} - {description}"
|
|
676
|
-
|
|
677
|
-
venture, confidence, rationale = _route_venture(snippet)
|
|
678
|
-
if not venture:
|
|
679
|
-
continue
|
|
680
|
-
|
|
681
|
-
target = {
|
|
682
|
-
"fingerprint": fp,
|
|
683
|
-
"platform": "devto",
|
|
684
|
-
"source_id": article_id,
|
|
685
|
-
"canonical_url": article.get("url", f"https://dev.to/{author}/{article.get('slug', article_id)}"),
|
|
686
|
-
"author": author,
|
|
687
|
-
"author_followers": reactions,
|
|
688
|
-
"content_snippet": snippet[:300],
|
|
689
|
-
"venture": venture,
|
|
690
|
-
"classification": _classify_target(snippet, reactions),
|
|
691
|
-
"confidence": confidence,
|
|
692
|
-
"rationale": rationale,
|
|
693
|
-
"manual_check_needed": False,
|
|
694
|
-
"first_seen": datetime.now(timezone.utc).isoformat(),
|
|
695
|
-
"status": "new",
|
|
696
|
-
}
|
|
697
|
-
targets.append(target)
|
|
698
|
-
known_fps.add(fp)
|
|
699
|
-
|
|
700
|
-
if len(targets) >= limit:
|
|
701
|
-
break
|
|
702
|
-
|
|
703
|
-
except Exception as e:
|
|
704
|
-
logger.error("Dev.to scan error for tag '%s': %s", tag, e)
|
|
705
|
-
continue
|
|
706
|
-
|
|
707
|
-
if len(targets) >= limit:
|
|
708
|
-
break
|
|
709
|
-
|
|
710
|
-
return targets
|
|
711
|
-
|
|
712
|
-
|
|
713
|
-
def _gh_api(endpoint: str) -> Any:
|
|
714
|
-
"""Call GitHub API via the gh CLI. Returns parsed JSON or None on failure."""
|
|
715
|
-
try:
|
|
716
|
-
proc = subprocess.run(
|
|
717
|
-
["gh", "api", endpoint],
|
|
718
|
-
capture_output=True,
|
|
719
|
-
text=True,
|
|
720
|
-
timeout=30,
|
|
721
|
-
)
|
|
722
|
-
if proc.returncode != 0:
|
|
723
|
-
logger.warning("gh api %s failed: %s", endpoint, proc.stderr[:200])
|
|
724
|
-
return None
|
|
725
|
-
return json.loads(proc.stdout)
|
|
726
|
-
except subprocess.TimeoutExpired:
|
|
727
|
-
logger.error("gh api %s timed out", endpoint)
|
|
728
|
-
return None
|
|
729
|
-
except (json.JSONDecodeError, FileNotFoundError) as e:
|
|
730
|
-
logger.error("gh api %s error: %s", endpoint, e)
|
|
731
|
-
return None
|
|
732
|
-
|
|
733
|
-
|
|
734
|
-
def _scan_github(queries: List[str], limit: int, known_fps: set, config: Optional[Dict] = None) -> List[Dict]:
|
|
735
|
-
"""Scan GitHub for repos and issues matching venture topics via gh CLI."""
|
|
736
|
-
targets: List[Dict] = []
|
|
737
|
-
|
|
738
|
-
# Phase 1: Repository search
|
|
739
|
-
for query in queries[:5]:
|
|
740
|
-
if len(targets) >= limit:
|
|
741
|
-
break
|
|
742
|
-
encoded_q = urllib.parse.quote(query)
|
|
743
|
-
endpoint = f"search/repositories?q={encoded_q}&sort=updated&per_page={min(limit, 10)}"
|
|
744
|
-
data = _gh_api(endpoint)
|
|
745
|
-
if not data or not isinstance(data, dict):
|
|
746
|
-
continue
|
|
747
|
-
|
|
748
|
-
for repo in data.get("items", []):
|
|
749
|
-
full_name = repo.get("full_name", "")
|
|
750
|
-
fp = f"github:repo:{full_name}"
|
|
751
|
-
if fp in known_fps or not full_name:
|
|
752
|
-
continue
|
|
753
|
-
|
|
754
|
-
stars = repo.get("stargazers_count", 0) or 0
|
|
755
|
-
description = repo.get("description", "") or ""
|
|
756
|
-
|
|
757
|
-
# Skip noise: 0 stars and no description
|
|
758
|
-
if stars == 0 and not description:
|
|
759
|
-
continue
|
|
760
|
-
|
|
761
|
-
snippet = f"{full_name}: {description}"
|
|
762
|
-
venture, confidence, rationale = _route_venture(snippet)
|
|
763
|
-
if not venture:
|
|
764
|
-
continue
|
|
765
|
-
|
|
766
|
-
target = {
|
|
767
|
-
"fingerprint": fp,
|
|
768
|
-
"platform": "github",
|
|
769
|
-
"source_id": full_name,
|
|
770
|
-
"canonical_url": repo.get("html_url", f"https://github.com/{full_name}"),
|
|
771
|
-
"author": repo.get("owner", {}).get("login", ""),
|
|
772
|
-
"author_followers": stars,
|
|
773
|
-
"content_snippet": snippet[:300],
|
|
774
|
-
"venture": venture,
|
|
775
|
-
"classification": _classify_target(snippet, stars),
|
|
776
|
-
"confidence": confidence,
|
|
777
|
-
"rationale": f"repo search: {rationale}",
|
|
778
|
-
"manual_check_needed": False,
|
|
779
|
-
"first_seen": datetime.now(timezone.utc).isoformat(),
|
|
780
|
-
"status": "new",
|
|
781
|
-
}
|
|
782
|
-
targets.append(target)
|
|
783
|
-
known_fps.add(fp)
|
|
784
|
-
|
|
785
|
-
if len(targets) >= limit:
|
|
786
|
-
break
|
|
787
|
-
|
|
788
|
-
# Phase 2: Issue/discussion search
|
|
789
|
-
for query in queries[:3]:
|
|
790
|
-
if len(targets) >= limit:
|
|
791
|
-
break
|
|
792
|
-
encoded_q = urllib.parse.quote(query)
|
|
793
|
-
endpoint = f"search/issues?q={encoded_q}&sort=created&per_page={min(limit, 10)}"
|
|
794
|
-
data = _gh_api(endpoint)
|
|
795
|
-
if not data or not isinstance(data, dict):
|
|
796
|
-
continue
|
|
797
|
-
|
|
798
|
-
for issue in data.get("items", []):
|
|
799
|
-
number = issue.get("number", "")
|
|
800
|
-
html_url = issue.get("html_url", "")
|
|
801
|
-
# Extract repo from URL: https://github.com/owner/repo/issues/123
|
|
802
|
-
repo_name = "/".join(html_url.split("/")[3:5]) if html_url else ""
|
|
803
|
-
fp = f"github:issue:{repo_name}:{number}"
|
|
804
|
-
if fp in known_fps or not number:
|
|
805
|
-
continue
|
|
806
|
-
|
|
807
|
-
title = issue.get("title", "")
|
|
808
|
-
body = (issue.get("body") or "")[:200]
|
|
809
|
-
author = issue.get("user", {}).get("login", "")
|
|
810
|
-
reactions = issue.get("reactions", {}).get("total_count", 0) or 0
|
|
811
|
-
snippet = f"{title} {body}".strip()
|
|
812
|
-
|
|
813
|
-
venture, confidence, rationale = _route_venture(snippet)
|
|
814
|
-
if not venture:
|
|
815
|
-
continue
|
|
816
|
-
|
|
817
|
-
target = {
|
|
818
|
-
"fingerprint": fp,
|
|
819
|
-
"platform": "github",
|
|
820
|
-
"source_id": f"{repo_name}#{number}",
|
|
821
|
-
"canonical_url": html_url,
|
|
822
|
-
"author": author,
|
|
823
|
-
"author_followers": reactions,
|
|
824
|
-
"content_snippet": snippet[:300],
|
|
825
|
-
"venture": venture,
|
|
826
|
-
"classification": _classify_target(snippet, reactions),
|
|
827
|
-
"confidence": confidence,
|
|
828
|
-
"rationale": f"issue search: {rationale}",
|
|
829
|
-
"manual_check_needed": False,
|
|
830
|
-
"first_seen": datetime.now(timezone.utc).isoformat(),
|
|
831
|
-
"status": "new",
|
|
832
|
-
}
|
|
833
|
-
targets.append(target)
|
|
834
|
-
known_fps.add(fp)
|
|
835
|
-
|
|
836
|
-
if len(targets) >= limit:
|
|
837
|
-
break
|
|
838
|
-
|
|
839
|
-
return targets
|
|
840
|
-
|
|
841
|
-
|
|
842
|
-
def _monitor_own_repos(known_fps: set) -> List[Dict]:
|
|
843
|
-
"""Monitor our own repos for external engagement (forks, stars, issues, PRs)."""
|
|
844
|
-
targets: List[Dict] = []
|
|
845
|
-
|
|
846
|
-
for repo in OWN_REPOS:
|
|
847
|
-
# Check forks
|
|
848
|
-
forks_data = _gh_api(f"repos/{repo}/forks?sort=newest&per_page=10")
|
|
849
|
-
if isinstance(forks_data, list):
|
|
850
|
-
for fork in forks_data:
|
|
851
|
-
user = fork.get("owner", {}).get("login", "")
|
|
852
|
-
if user in INTERNAL_USERS or not user:
|
|
853
|
-
continue
|
|
854
|
-
fp = f"github:fork:{user}:{repo.split('/')[-1]}"
|
|
855
|
-
if fp in known_fps:
|
|
856
|
-
continue
|
|
857
|
-
|
|
858
|
-
targets.append({
|
|
859
|
-
"fingerprint": fp,
|
|
860
|
-
"platform": "github",
|
|
861
|
-
"source_id": fork.get("full_name", ""),
|
|
862
|
-
"canonical_url": fork.get("html_url", ""),
|
|
863
|
-
"author": user,
|
|
864
|
-
"author_followers": fork.get("stargazers_count", 0) or 0,
|
|
865
|
-
"content_snippet": f"{user} forked {repo}",
|
|
866
|
-
"venture": "delimit",
|
|
867
|
-
"classification": "strategic",
|
|
868
|
-
"confidence": 0.7,
|
|
869
|
-
"rationale": f"External fork of {repo}",
|
|
870
|
-
"manual_check_needed": False,
|
|
871
|
-
"first_seen": datetime.now(timezone.utc).isoformat(),
|
|
872
|
-
"status": "new",
|
|
873
|
-
})
|
|
874
|
-
known_fps.add(fp)
|
|
875
|
-
|
|
876
|
-
# Check stargazers (with timestamps)
|
|
877
|
-
stars_data = _gh_api(
|
|
878
|
-
f"repos/{repo}/stargazers?per_page=10"
|
|
879
|
-
"&-H='Accept: application/vnd.github.star+json'"
|
|
880
|
-
)
|
|
881
|
-
# gh api may return list of user objects or star+json objects
|
|
882
|
-
if isinstance(stars_data, list):
|
|
883
|
-
for star in stars_data:
|
|
884
|
-
# star+json format has "user" key; plain format is the user directly
|
|
885
|
-
user_obj = star.get("user", star) if isinstance(star, dict) else {}
|
|
886
|
-
user = user_obj.get("login", "")
|
|
887
|
-
if user in INTERNAL_USERS or not user:
|
|
888
|
-
continue
|
|
889
|
-
fp = f"github:star:{user}:{repo.split('/')[-1]}"
|
|
890
|
-
if fp in known_fps:
|
|
891
|
-
continue
|
|
892
|
-
|
|
893
|
-
targets.append({
|
|
894
|
-
"fingerprint": fp,
|
|
895
|
-
"platform": "github",
|
|
896
|
-
"source_id": f"{user}/star/{repo}",
|
|
897
|
-
"canonical_url": f"https://github.com/{user}",
|
|
898
|
-
"author": user,
|
|
899
|
-
"author_followers": 0,
|
|
900
|
-
"content_snippet": f"{user} starred {repo}",
|
|
901
|
-
"venture": "delimit",
|
|
902
|
-
"classification": "strategic",
|
|
903
|
-
"confidence": 0.6,
|
|
904
|
-
"rationale": f"External star on {repo}",
|
|
905
|
-
"manual_check_needed": False,
|
|
906
|
-
"first_seen": datetime.now(timezone.utc).isoformat(),
|
|
907
|
-
"status": "new",
|
|
908
|
-
})
|
|
909
|
-
known_fps.add(fp)
|
|
910
|
-
|
|
911
|
-
# Check issues and PRs from external users
|
|
912
|
-
issues_data = _gh_api(f"repos/{repo}/issues?state=all&sort=created&direction=desc&per_page=10")
|
|
913
|
-
if isinstance(issues_data, list):
|
|
914
|
-
for issue in issues_data:
|
|
915
|
-
user = issue.get("user", {}).get("login", "")
|
|
916
|
-
if user in INTERNAL_USERS or not user:
|
|
917
|
-
continue
|
|
918
|
-
number = issue.get("number", "")
|
|
919
|
-
fp = f"github:issue:{repo}:{number}"
|
|
920
|
-
if fp in known_fps or not number:
|
|
921
|
-
continue
|
|
922
|
-
|
|
923
|
-
title = issue.get("title", "")
|
|
924
|
-
is_pr = "pull_request" in issue
|
|
925
|
-
kind = "PR" if is_pr else "issue"
|
|
926
|
-
|
|
927
|
-
targets.append({
|
|
928
|
-
"fingerprint": fp,
|
|
929
|
-
"platform": "github",
|
|
930
|
-
"source_id": f"{repo}#{number}",
|
|
931
|
-
"canonical_url": issue.get("html_url", ""),
|
|
932
|
-
"author": user,
|
|
933
|
-
"author_followers": issue.get("reactions", {}).get("total_count", 0) or 0,
|
|
934
|
-
"content_snippet": f"{user} opened {kind}: {title}"[:300],
|
|
935
|
-
"venture": "delimit",
|
|
936
|
-
"classification": "reply",
|
|
937
|
-
"confidence": 0.8,
|
|
938
|
-
"rationale": f"External {kind} on {repo}",
|
|
939
|
-
"manual_check_needed": False,
|
|
940
|
-
"first_seen": datetime.now(timezone.utc).isoformat(),
|
|
941
|
-
"status": "new",
|
|
942
|
-
})
|
|
943
|
-
known_fps.add(fp)
|
|
944
|
-
|
|
945
|
-
return targets
|
|
946
|
-
|
|
947
|
-
|
|
948
|
-
def _get_rapidapi_key() -> str:
|
|
949
|
-
"""Load RapidAPI key from secrets broker or env."""
|
|
950
|
-
import base64
|
|
951
|
-
# Primary: delimit secrets broker
|
|
952
|
-
secrets_file = Path.home() / ".delimit" / "secrets" / "rapidapi-reddit.json"
|
|
953
|
-
if secrets_file.exists():
|
|
954
|
-
try:
|
|
955
|
-
data = json.loads(secrets_file.read_text())
|
|
956
|
-
encrypted = data.get("encrypted_value", "")
|
|
957
|
-
if encrypted:
|
|
958
|
-
return base64.b64decode(encrypted).decode()
|
|
959
|
-
return data.get("value", "")
|
|
960
|
-
except Exception:
|
|
961
|
-
pass
|
|
962
|
-
return os.environ.get("RAPIDAPI_KEY", "")
|
|
963
|
-
|
|
964
|
-
|
|
965
|
-
# Subreddits to scan per venture
|
|
966
|
-
# Keep total under 30 subs to stay well under rate limits (~1 req/sub/scan)
|
|
967
|
-
VENTURE_SUBREDDITS = {
|
|
968
|
-
"delimit": [
|
|
969
|
-
"ClaudeAI", "vibecoding", "devops", "programming",
|
|
970
|
-
"AI_Agents", "ContextEngineering", "cursor",
|
|
971
|
-
"LocalLLaMA", "SaaS", "opensource",
|
|
972
|
-
# "ChatGPTCoding", # requires high karma to post
|
|
973
|
-
],
|
|
974
|
-
"domainvested": [
|
|
975
|
-
"Domains", "flipping", "Entrepreneur", "SideProject",
|
|
976
|
-
],
|
|
977
|
-
"wirereport": [
|
|
978
|
-
"sportsbook", "sportsbetting",
|
|
979
|
-
],
|
|
980
|
-
"livetube": [
|
|
981
|
-
"Twitch", "livestreaming",
|
|
982
|
-
],
|
|
983
|
-
"stakeone": [
|
|
984
|
-
"harmony_one", "CryptoCurrency", "defi",
|
|
985
|
-
],
|
|
986
|
-
}
|
|
987
|
-
|
|
988
|
-
|
|
989
|
-
# Internal-only Reddit proxy via SSH tunnel to residential IP.
|
|
990
|
-
# This is NOT shipped to external users — it only runs on the founder's gateway server.
|
|
991
|
-
# External users would configure their own Reddit API credentials.
|
|
992
|
-
REDDIT_PROXY = os.environ.get("DELIMIT_REDDIT_PROXY", "http://127.0.0.1:4819/reddit-fetch")
|
|
993
|
-
|
|
994
|
-
|
|
995
|
-
def _scan_reddit(queries: List[str], limit: int, known_fps: set, config: Optional[Dict] = None) -> List[Dict]:
|
|
996
|
-
"""Scan Reddit via residential proxy (SSH tunnel) or RapidAPI fallback.
|
|
997
|
-
|
|
998
|
-
Provider selection via config:
|
|
999
|
-
- "proxy": try residential proxy first, fall back to RapidAPI
|
|
1000
|
-
- "rapidapi": use RapidAPI Reddit34 directly
|
|
1001
|
-
- "json_api": always try direct JSON (may fail from datacenter IPs)
|
|
1002
|
-
"""
|
|
1003
|
-
platform_config = (config or {}).get("platforms", {}).get("reddit", {})
|
|
1004
|
-
provider = platform_config.get("provider", "proxy")
|
|
1005
|
-
|
|
1006
|
-
# Merge subreddits from config with defaults
|
|
1007
|
-
config_subreddits = (config or {}).get("subreddits", {})
|
|
1008
|
-
if config_subreddits:
|
|
1009
|
-
# Temporarily override VENTURE_SUBREDDITS for this scan
|
|
1010
|
-
merged = dict(VENTURE_SUBREDDITS)
|
|
1011
|
-
for venture, subs in config_subreddits.items():
|
|
1012
|
-
if venture in merged:
|
|
1013
|
-
merged[venture] = list(set(merged[venture] + subs))
|
|
1014
|
-
else:
|
|
1015
|
-
merged[venture] = subs
|
|
1016
|
-
# We pass the merged subs to the proxy/rapidapi scanners via the module-level dict
|
|
1017
|
-
# This is safe since scans are single-threaded
|
|
1018
|
-
_original_subs = dict(VENTURE_SUBREDDITS)
|
|
1019
|
-
VENTURE_SUBREDDITS.update(merged)
|
|
1020
|
-
|
|
1021
|
-
try:
|
|
1022
|
-
if provider == "rapidapi":
|
|
1023
|
-
api_key = _get_rapidapi_key()
|
|
1024
|
-
if not api_key:
|
|
1025
|
-
return _manual_check_targets("reddit", queries, limit)
|
|
1026
|
-
return _scan_reddit_rapidapi(queries, limit, known_fps, api_key)
|
|
1027
|
-
|
|
1028
|
-
# Default: try proxy first, fall back to RapidAPI
|
|
1029
|
-
proxy_available = _test_reddit_proxy()
|
|
1030
|
-
if not proxy_available:
|
|
1031
|
-
api_key = _get_rapidapi_key()
|
|
1032
|
-
if not api_key:
|
|
1033
|
-
logger.warning("No Reddit access -- proxy down, no RapidAPI key")
|
|
1034
|
-
return _manual_check_targets("reddit", queries, limit)
|
|
1035
|
-
return _scan_reddit_rapidapi(queries, limit, known_fps, api_key)
|
|
1036
|
-
|
|
1037
|
-
return _scan_reddit_proxy(queries, limit, known_fps)
|
|
1038
|
-
finally:
|
|
1039
|
-
# Restore original subreddits if we merged
|
|
1040
|
-
if config_subreddits:
|
|
1041
|
-
VENTURE_SUBREDDITS.clear()
|
|
1042
|
-
VENTURE_SUBREDDITS.update(_original_subs)
|
|
1043
|
-
|
|
1044
|
-
|
|
1045
|
-
def _test_reddit_proxy() -> bool:
|
|
1046
|
-
"""Check if residential Reddit proxy is available."""
|
|
1047
|
-
try:
|
|
1048
|
-
req = urllib.request.Request(f"{REDDIT_PROXY.rsplit('/reddit-fetch', 1)[0]}/health", headers={"User-Agent": "Delimit"})
|
|
1049
|
-
with urllib.request.urlopen(req, timeout=3) as resp:
|
|
1050
|
-
data = json.loads(resp.read())
|
|
1051
|
-
return data.get("reddit_proxy", False)
|
|
1052
|
-
except Exception:
|
|
1053
|
-
return False
|
|
1054
|
-
|
|
1055
|
-
|
|
1056
|
-
def _scan_reddit_proxy(queries: List[str], limit: int, known_fps: set) -> List[Dict]:
|
|
1057
|
-
"""Scan Reddit via residential IP proxy (free, unlimited)."""
|
|
1058
|
-
targets: List[Dict] = []
|
|
1059
|
-
|
|
1060
|
-
scanned_subs: set = set()
|
|
1061
|
-
for venture, subs in VENTURE_SUBREDDITS.items():
|
|
1062
|
-
for sub in subs:
|
|
1063
|
-
if sub in scanned_subs or len(targets) >= limit:
|
|
1064
|
-
break
|
|
1065
|
-
scanned_subs.add(sub)
|
|
1066
|
-
|
|
1067
|
-
# Scan both /new and /hot to catch high-engagement older posts
|
|
1068
|
-
for sort in ("new", "hot"):
|
|
1069
|
-
if len(targets) >= limit:
|
|
1070
|
-
break
|
|
1071
|
-
reddit_url = f"https://www.reddit.com/r/{sub}/{sort}.json?limit={min(limit, 10)}"
|
|
1072
|
-
proxy_url = f"{REDDIT_PROXY}?url={urllib.parse.quote(reddit_url, safe='')}"
|
|
1073
|
-
req = urllib.request.Request(proxy_url, headers={"User-Agent": "Delimit/3.11.0"})
|
|
1074
|
-
try:
|
|
1075
|
-
with urllib.request.urlopen(req, timeout=15) as resp:
|
|
1076
|
-
result = json.loads(resp.read())
|
|
1077
|
-
|
|
1078
|
-
posts = result.get("data", {}).get("children", [])
|
|
1079
|
-
for post_wrapper in posts:
|
|
1080
|
-
post = post_wrapper.get("data", {})
|
|
1081
|
-
post_id = post.get("id", "")
|
|
1082
|
-
fp = f"reddit:{post_id}"
|
|
1083
|
-
if fp in known_fps or not post_id:
|
|
1084
|
-
continue
|
|
1085
|
-
|
|
1086
|
-
title = post.get("title", "")
|
|
1087
|
-
selftext = post.get("selftext", "")[:200]
|
|
1088
|
-
author = post.get("author", "")
|
|
1089
|
-
score = post.get("score", 0) or 0
|
|
1090
|
-
num_comments = post.get("num_comments", 0) or 0
|
|
1091
|
-
permalink = post.get("permalink", "")
|
|
1092
|
-
snippet = f"{title} {selftext}".strip()
|
|
1093
|
-
|
|
1094
|
-
venture_match, confidence, rationale = _route_venture(snippet)
|
|
1095
|
-
if not venture_match:
|
|
1096
|
-
continue
|
|
1097
|
-
if score < 1 and num_comments < 2:
|
|
1098
|
-
continue
|
|
1099
|
-
|
|
1100
|
-
target = {
|
|
1101
|
-
"fingerprint": fp,
|
|
1102
|
-
"platform": "reddit",
|
|
1103
|
-
"source_id": post_id,
|
|
1104
|
-
"canonical_url": f"https://reddit.com{permalink}" if permalink else "",
|
|
1105
|
-
"author": f"u/{author}",
|
|
1106
|
-
"author_followers": score,
|
|
1107
|
-
"content_snippet": snippet[:300],
|
|
1108
|
-
"venture": venture_match,
|
|
1109
|
-
"classification": _classify_target(snippet, num_comments),
|
|
1110
|
-
"confidence": confidence,
|
|
1111
|
-
"rationale": f"r/{sub}/{sort}: {rationale}",
|
|
1112
|
-
"manual_check_needed": False,
|
|
1113
|
-
"first_seen": datetime.now(timezone.utc).isoformat(),
|
|
1114
|
-
"status": "new",
|
|
1115
|
-
}
|
|
1116
|
-
targets.append(target)
|
|
1117
|
-
known_fps.add(fp)
|
|
1118
|
-
|
|
1119
|
-
if len(targets) >= limit:
|
|
1120
|
-
break
|
|
1121
|
-
|
|
1122
|
-
except Exception as e:
|
|
1123
|
-
logger.error("Reddit proxy scan error for r/%s/%s: %s", sub, sort, e)
|
|
1124
|
-
continue
|
|
1125
|
-
|
|
1126
|
-
return targets
|
|
1127
|
-
|
|
1128
|
-
|
|
1129
|
-
def _scan_reddit_rapidapi(queries: List[str], limit: int, known_fps: set, api_key: str) -> List[Dict]:
|
|
1130
|
-
"""Fallback: Scan Reddit via RapidAPI Reddit34."""
|
|
1131
|
-
|
|
1132
|
-
targets: List[Dict] = []
|
|
1133
|
-
|
|
1134
|
-
# Scan subreddits mapped to ventures
|
|
1135
|
-
scanned_subs: set = set()
|
|
1136
|
-
for venture, subs in VENTURE_SUBREDDITS.items():
|
|
1137
|
-
for sub in subs:
|
|
1138
|
-
if sub in scanned_subs or len(targets) >= limit:
|
|
1139
|
-
break
|
|
1140
|
-
scanned_subs.add(sub)
|
|
1141
|
-
|
|
1142
|
-
url = f"https://reddit34.p.rapidapi.com/getPostsBySubreddit?subreddit={urllib.parse.quote(sub)}&sort=new&limit={min(limit, 10)}"
|
|
1143
|
-
req = urllib.request.Request(
|
|
1144
|
-
url,
|
|
1145
|
-
headers={
|
|
1146
|
-
"X-RapidAPI-Key": api_key,
|
|
1147
|
-
"X-RapidAPI-Host": "reddit34.p.rapidapi.com",
|
|
1148
|
-
"User-Agent": "Delimit/3.11.0",
|
|
1149
|
-
},
|
|
1150
|
-
)
|
|
1151
|
-
try:
|
|
1152
|
-
with urllib.request.urlopen(req, timeout=20) as resp:
|
|
1153
|
-
result = json.loads(resp.read())
|
|
1154
|
-
|
|
1155
|
-
if not result.get("success"):
|
|
1156
|
-
logger.warning("Reddit34 returned success=false for r/%s", sub)
|
|
1157
|
-
continue
|
|
1158
|
-
|
|
1159
|
-
posts = result.get("data", {}).get("posts", [])
|
|
1160
|
-
for post_wrapper in posts:
|
|
1161
|
-
post = post_wrapper.get("data", post_wrapper)
|
|
1162
|
-
post_id = post.get("id", "")
|
|
1163
|
-
fp = f"reddit:{post_id}"
|
|
1164
|
-
if fp in known_fps or not post_id:
|
|
1165
|
-
continue
|
|
1166
|
-
|
|
1167
|
-
title = post.get("title", "")
|
|
1168
|
-
selftext = post.get("selftext", "")[:200]
|
|
1169
|
-
author = post.get("author", "")
|
|
1170
|
-
score = post.get("score", 0) or 0
|
|
1171
|
-
num_comments = post.get("num_comments", 0) or 0
|
|
1172
|
-
permalink = post.get("permalink", "")
|
|
1173
|
-
snippet = f"{title} {selftext}".strip()
|
|
1174
|
-
|
|
1175
|
-
venture_match, confidence, rationale = _route_venture(snippet)
|
|
1176
|
-
if not venture_match:
|
|
1177
|
-
continue
|
|
1178
|
-
|
|
1179
|
-
# Skip low-engagement posts
|
|
1180
|
-
if score < 1 and num_comments < 2:
|
|
1181
|
-
continue
|
|
1182
|
-
|
|
1183
|
-
target = {
|
|
1184
|
-
"fingerprint": fp,
|
|
1185
|
-
"platform": "reddit",
|
|
1186
|
-
"source_id": post_id,
|
|
1187
|
-
"canonical_url": f"https://reddit.com{permalink}" if permalink else "",
|
|
1188
|
-
"author": f"u/{author}",
|
|
1189
|
-
"author_followers": score,
|
|
1190
|
-
"content_snippet": snippet[:300],
|
|
1191
|
-
"venture": venture_match,
|
|
1192
|
-
"classification": _classify_target(snippet, num_comments),
|
|
1193
|
-
"confidence": confidence,
|
|
1194
|
-
"rationale": f"r/{sub}: {rationale}",
|
|
1195
|
-
"manual_check_needed": False,
|
|
1196
|
-
"first_seen": datetime.now(timezone.utc).isoformat(),
|
|
1197
|
-
"status": "new",
|
|
1198
|
-
}
|
|
1199
|
-
targets.append(target)
|
|
1200
|
-
known_fps.add(fp)
|
|
1201
|
-
|
|
1202
|
-
if len(targets) >= limit:
|
|
1203
|
-
break
|
|
1204
|
-
|
|
1205
|
-
except Exception as e:
|
|
1206
|
-
logger.error("Reddit scan error for r/%s: %s", sub, e)
|
|
1207
|
-
continue
|
|
1208
|
-
|
|
1209
|
-
# Phase 2: keyword search across all of Reddit via getSearchPosts
|
|
1210
|
-
if len(targets) < limit:
|
|
1211
|
-
search_queries = queries[:3] # Top 3 venture topic queries
|
|
1212
|
-
for query in search_queries:
|
|
1213
|
-
if len(targets) >= limit:
|
|
1214
|
-
break
|
|
1215
|
-
search_url = (
|
|
1216
|
-
f"https://reddit34.p.rapidapi.com/getSearchPosts"
|
|
1217
|
-
f"?query={urllib.parse.quote(query)}&sort=new&limit={min(limit, 5)}"
|
|
1218
|
-
)
|
|
1219
|
-
req = urllib.request.Request(
|
|
1220
|
-
search_url,
|
|
1221
|
-
headers={
|
|
1222
|
-
"X-RapidAPI-Key": api_key,
|
|
1223
|
-
"X-RapidAPI-Host": "reddit34.p.rapidapi.com",
|
|
1224
|
-
"User-Agent": "Delimit/3.11.0",
|
|
1225
|
-
},
|
|
1226
|
-
)
|
|
1227
|
-
try:
|
|
1228
|
-
with urllib.request.urlopen(req, timeout=20) as resp:
|
|
1229
|
-
result = json.loads(resp.read())
|
|
1230
|
-
|
|
1231
|
-
if not result.get("success"):
|
|
1232
|
-
continue
|
|
1233
|
-
|
|
1234
|
-
posts = result.get("data", {}).get("posts", [])
|
|
1235
|
-
for post_wrapper in posts:
|
|
1236
|
-
post = post_wrapper.get("data", post_wrapper)
|
|
1237
|
-
post_id = post.get("id", "")
|
|
1238
|
-
fp = f"reddit:{post_id}"
|
|
1239
|
-
if fp in known_fps or not post_id:
|
|
1240
|
-
continue
|
|
1241
|
-
|
|
1242
|
-
title = post.get("title", "")
|
|
1243
|
-
selftext = post.get("selftext", "")[:200]
|
|
1244
|
-
author = post.get("author", "")
|
|
1245
|
-
sub = post.get("subreddit", "")
|
|
1246
|
-
score = post.get("score", 0) or 0
|
|
1247
|
-
num_comments = post.get("num_comments", 0) or 0
|
|
1248
|
-
permalink = post.get("permalink", "")
|
|
1249
|
-
snippet = f"{title} {selftext}".strip()
|
|
1250
|
-
|
|
1251
|
-
venture_match, confidence, rationale = _route_venture(snippet)
|
|
1252
|
-
if not venture_match:
|
|
1253
|
-
continue
|
|
1254
|
-
if score < 1 and num_comments < 2:
|
|
1255
|
-
continue
|
|
1256
|
-
|
|
1257
|
-
target = {
|
|
1258
|
-
"fingerprint": fp,
|
|
1259
|
-
"platform": "reddit",
|
|
1260
|
-
"source_id": post_id,
|
|
1261
|
-
"canonical_url": f"https://reddit.com{permalink}" if permalink else "",
|
|
1262
|
-
"author": f"u/{author}",
|
|
1263
|
-
"author_followers": score,
|
|
1264
|
-
"content_snippet": snippet[:300],
|
|
1265
|
-
"venture": venture_match,
|
|
1266
|
-
"classification": _classify_target(snippet, num_comments),
|
|
1267
|
-
"confidence": confidence,
|
|
1268
|
-
"rationale": f"search:{query}: {rationale}",
|
|
1269
|
-
"manual_check_needed": False,
|
|
1270
|
-
"first_seen": datetime.now(timezone.utc).isoformat(),
|
|
1271
|
-
"status": "new",
|
|
1272
|
-
}
|
|
1273
|
-
targets.append(target)
|
|
1274
|
-
known_fps.add(fp)
|
|
1275
|
-
|
|
1276
|
-
if len(targets) >= limit:
|
|
1277
|
-
break
|
|
1278
|
-
except Exception as e:
|
|
1279
|
-
logger.error("Reddit search error for '%s': %s", query, e)
|
|
1280
|
-
continue
|
|
1281
|
-
|
|
1282
|
-
return targets
|
|
1283
|
-
|
|
1284
|
-
|
|
1285
|
-
def _manual_check_targets(platform: str, queries: List[str], limit: int) -> List[Dict]:
|
|
1286
|
-
"""Return manual_check_needed placeholders for platforms we cannot scrape."""
|
|
1287
|
-
targets = []
|
|
1288
|
-
for query in queries[:3]:
|
|
1289
|
-
venture, confidence, rationale = _route_venture(query)
|
|
1290
|
-
targets.append({
|
|
1291
|
-
"fingerprint": f"{platform}:manual:{query[:30]}",
|
|
1292
|
-
"platform": platform,
|
|
1293
|
-
"source_id": "",
|
|
1294
|
-
"canonical_url": "",
|
|
1295
|
-
"author": "",
|
|
1296
|
-
"author_followers": 0,
|
|
1297
|
-
"content_snippet": f"Search '{query}' on {platform}",
|
|
1298
|
-
"venture": venture or "unknown",
|
|
1299
|
-
"classification": "reply",
|
|
1300
|
-
"confidence": 0.0,
|
|
1301
|
-
"rationale": f"Manual check needed -- {platform} cannot be scanned server-side",
|
|
1302
|
-
"manual_check_needed": True,
|
|
1303
|
-
"first_seen": datetime.now(timezone.utc).isoformat(),
|
|
1304
|
-
"status": "manual_check_needed",
|
|
1305
|
-
})
|
|
1306
|
-
return targets[:limit]
|
|
1307
|
-
|
|
1308
|
-
|
|
1309
|
-
# -----------------------------------------------------------------------
|
|
1310
|
-
# JSON extraction helper
|
|
1311
|
-
# -----------------------------------------------------------------------
|
|
1312
|
-
|
|
1313
|
-
def _extract_json_array(text: str) -> list:
|
|
1314
|
-
"""Best-effort extraction of a JSON array from LLM response text."""
|
|
1315
|
-
# Try the whole text first
|
|
1316
|
-
try:
|
|
1317
|
-
parsed = json.loads(text)
|
|
1318
|
-
if isinstance(parsed, list):
|
|
1319
|
-
return parsed
|
|
1320
|
-
if isinstance(parsed, dict):
|
|
1321
|
-
return [parsed]
|
|
1322
|
-
except (json.JSONDecodeError, ValueError):
|
|
1323
|
-
pass
|
|
1324
|
-
# Try to find [...] in the text
|
|
1325
|
-
start = text.find("[")
|
|
1326
|
-
if start != -1:
|
|
1327
|
-
depth = 0
|
|
1328
|
-
for i in range(start, len(text)):
|
|
1329
|
-
if text[i] == "[":
|
|
1330
|
-
depth += 1
|
|
1331
|
-
elif text[i] == "]":
|
|
1332
|
-
depth -= 1
|
|
1333
|
-
if depth == 0:
|
|
1334
|
-
try:
|
|
1335
|
-
return json.loads(text[start:i + 1])
|
|
1336
|
-
except (json.JSONDecodeError, ValueError):
|
|
1337
|
-
break
|
|
1338
|
-
return []
|
|
1339
|
-
|
|
1340
|
-
|
|
1341
|
-
# -----------------------------------------------------------------------
|
|
1342
|
-
# Public API
|
|
1343
|
-
# -----------------------------------------------------------------------
|
|
1344
|
-
|
|
1345
|
-
def scan_targets(
|
|
1346
|
-
platforms: List[str],
|
|
1347
|
-
ventures: Optional[List[str]] = None,
|
|
1348
|
-
keywords: Optional[List[str]] = None,
|
|
1349
|
-
limit: int = 10,
|
|
1350
|
-
) -> List[Dict]:
|
|
1351
|
-
"""Discover engagement opportunities across platforms.
|
|
1352
|
-
|
|
1353
|
-
Args:
|
|
1354
|
-
platforms: List of platform names to scan (x, hn, devto, reddit, namepros).
|
|
1355
|
-
ventures: Filter to specific ventures. None = all.
|
|
1356
|
-
keywords: Extra keywords beyond venture topics.
|
|
1357
|
-
limit: Max targets per platform.
|
|
1358
|
-
|
|
1359
|
-
Returns:
|
|
1360
|
-
List of target dicts with fingerprint, classification, and routing.
|
|
1361
|
-
"""
|
|
1362
|
-
scan_config = _load_config()
|
|
1363
|
-
known_fps = _load_known_fingerprints()
|
|
1364
|
-
|
|
1365
|
-
# Use config scan_limit as default if limit not explicitly overridden
|
|
1366
|
-
effective_limit = limit or scan_config.get("scan_limit", 10)
|
|
1367
|
-
|
|
1368
|
-
# Build query list from venture topics + extra keywords
|
|
1369
|
-
queries: List[str] = []
|
|
1370
|
-
active_ventures = ventures or list(VENTURE_CONFIG.keys())
|
|
1371
|
-
for v in active_ventures:
|
|
1372
|
-
vc = VENTURE_CONFIG.get(v)
|
|
1373
|
-
if vc:
|
|
1374
|
-
queries.extend(vc["topics"])
|
|
1375
|
-
if keywords:
|
|
1376
|
-
queries.extend(keywords)
|
|
1377
|
-
|
|
1378
|
-
# Deduplicate queries
|
|
1379
|
-
seen_q: set = set()
|
|
1380
|
-
unique_queries: List[str] = []
|
|
1381
|
-
for q in queries:
|
|
1382
|
-
q_lower = q.lower()
|
|
1383
|
-
if q_lower not in seen_q:
|
|
1384
|
-
seen_q.add(q_lower)
|
|
1385
|
-
unique_queries.append(q)
|
|
1386
|
-
|
|
1387
|
-
all_targets: List[Dict] = []
|
|
1388
|
-
platform_configs = scan_config.get("platforms", {})
|
|
1389
|
-
|
|
1390
|
-
for platform in platforms:
|
|
1391
|
-
platform = platform.strip().lower()
|
|
1392
|
-
|
|
1393
|
-
# Check if platform is enabled in config
|
|
1394
|
-
plat_cfg = platform_configs.get(platform, {})
|
|
1395
|
-
if not plat_cfg.get("enabled", True):
|
|
1396
|
-
logger.info("Platform '%s' is disabled in config, skipping", platform)
|
|
1397
|
-
continue
|
|
1398
|
-
|
|
1399
|
-
try:
|
|
1400
|
-
if platform == "x":
|
|
1401
|
-
targets = _scan_x(unique_queries, effective_limit, known_fps, config=scan_config)
|
|
1402
|
-
elif platform == "hn":
|
|
1403
|
-
targets = _scan_hn(unique_queries, effective_limit, known_fps)
|
|
1404
|
-
elif platform == "devto":
|
|
1405
|
-
targets = _scan_devto(unique_queries, effective_limit, known_fps)
|
|
1406
|
-
elif platform == "reddit":
|
|
1407
|
-
targets = _scan_reddit(unique_queries, effective_limit, known_fps, config=scan_config)
|
|
1408
|
-
elif platform == "github":
|
|
1409
|
-
targets = _scan_github(unique_queries, effective_limit, known_fps, config=scan_config)
|
|
1410
|
-
targets.extend(_monitor_own_repos(known_fps))
|
|
1411
|
-
elif platform == "namepros":
|
|
1412
|
-
targets = _manual_check_targets(platform, unique_queries, effective_limit)
|
|
1413
|
-
else:
|
|
1414
|
-
logger.warning("Unknown platform: %s", platform)
|
|
1415
|
-
continue
|
|
1416
|
-
|
|
1417
|
-
# Filter by venture if specified
|
|
1418
|
-
if ventures:
|
|
1419
|
-
targets = [t for t in targets if t.get("venture") in ventures or t.get("error")]
|
|
1420
|
-
|
|
1421
|
-
all_targets.extend(targets)
|
|
1422
|
-
except Exception as e:
|
|
1423
|
-
logger.error("Platform scan error (%s): %s", platform, e)
|
|
1424
|
-
all_targets.append({"error": f"Scan failed for {platform}: {e}", "platform": platform})
|
|
1425
|
-
|
|
1426
|
-
# Persist new non-error targets
|
|
1427
|
-
for t in all_targets:
|
|
1428
|
-
if not t.get("error") and not t.get("manual_check_needed"):
|
|
1429
|
-
_append_target(t)
|
|
1430
|
-
|
|
1431
|
-
return all_targets
|
|
1432
|
-
|
|
1433
|
-
|
|
1434
|
-
def process_targets(
|
|
1435
|
-
targets: List[Dict],
|
|
1436
|
-
draft_replies: bool = False,
|
|
1437
|
-
create_ledger: bool = False,
|
|
1438
|
-
) -> Dict[str, Any]:
|
|
1439
|
-
"""Process discovered targets: draft social replies and/or create ledger items.
|
|
1440
|
-
|
|
1441
|
-
Args:
|
|
1442
|
-
targets: List of target dicts from scan_targets.
|
|
1443
|
-
draft_replies: If True, auto-draft social posts for "reply" targets.
|
|
1444
|
-
create_ledger: If True, return ledger item dicts for "strategic" targets.
|
|
1445
|
-
|
|
1446
|
-
Returns:
|
|
1447
|
-
Dict with drafted and ledger_items lists.
|
|
1448
|
-
"""
|
|
1449
|
-
result: Dict[str, Any] = {"drafted": [], "ledger_items": []}
|
|
1450
|
-
|
|
1451
|
-
for target in targets:
|
|
1452
|
-
if target.get("error") or target.get("manual_check_needed"):
|
|
1453
|
-
continue
|
|
1454
|
-
|
|
1455
|
-
classification = target.get("classification", "reply")
|
|
1456
|
-
|
|
1457
|
-
if draft_replies and classification in ("reply", "both"):
|
|
1458
|
-
try:
|
|
1459
|
-
from ai.social import save_draft
|
|
1460
|
-
venture = target.get("venture", "delimit")
|
|
1461
|
-
url = target.get("canonical_url", "")
|
|
1462
|
-
snippet = target.get("content_snippet", "")
|
|
1463
|
-
author = target.get("author", "")
|
|
1464
|
-
|
|
1465
|
-
draft_text = (
|
|
1466
|
-
f"[DRAFT - needs human writing] "
|
|
1467
|
-
f"Engagement opportunity for {venture}: "
|
|
1468
|
-
f"{author} posted about {snippet[:100]}... "
|
|
1469
|
-
f"URL: {url}"
|
|
1470
|
-
)
|
|
1471
|
-
|
|
1472
|
-
# Determine platform and account
|
|
1473
|
-
platform = target.get("platform", "x")
|
|
1474
|
-
if platform == "x":
|
|
1475
|
-
social_platform = "twitter"
|
|
1476
|
-
reply_to = target.get("source_id", "")
|
|
1477
|
-
else:
|
|
1478
|
-
social_platform = "twitter" # Drafts go to Twitter by default
|
|
1479
|
-
reply_to = ""
|
|
1480
|
-
|
|
1481
|
-
config = VENTURE_CONFIG.get(venture, {})
|
|
1482
|
-
account = config.get("owned_accounts", ["delimit_ai"])[0]
|
|
1483
|
-
|
|
1484
|
-
entry = save_draft(
|
|
1485
|
-
draft_text,
|
|
1486
|
-
platform=social_platform,
|
|
1487
|
-
account=account,
|
|
1488
|
-
reply_to_id=reply_to,
|
|
1489
|
-
context=f"Social target: {target.get('rationale', '')}",
|
|
1490
|
-
)
|
|
1491
|
-
result["drafted"].append({
|
|
1492
|
-
"draft_id": entry.get("draft_id"),
|
|
1493
|
-
"fingerprint": target.get("fingerprint"),
|
|
1494
|
-
"venture": venture,
|
|
1495
|
-
})
|
|
1496
|
-
except Exception as e:
|
|
1497
|
-
logger.error("Failed to draft reply for %s: %s", target.get("fingerprint"), e)
|
|
1498
|
-
|
|
1499
|
-
if create_ledger and classification in ("strategic", "both"):
|
|
1500
|
-
venture = target.get("venture", "delimit")
|
|
1501
|
-
ledger_item = {
|
|
1502
|
-
"title": f"[{venture.upper()}] Engage: {target.get('author', 'unknown')} on {target.get('platform', '?')}",
|
|
1503
|
-
"description": (
|
|
1504
|
-
f"Source: {target.get('canonical_url', 'N/A')}\n"
|
|
1505
|
-
f"Author: {target.get('author', 'unknown')} ({target.get('author_followers', 0)} followers)\n"
|
|
1506
|
-
f"Snippet: {target.get('content_snippet', '')[:200]}\n"
|
|
1507
|
-
f"Rationale: {target.get('rationale', '')}"
|
|
1508
|
-
),
|
|
1509
|
-
"priority": VENTURE_CONFIG.get(venture, {}).get("priority", "P1"),
|
|
1510
|
-
"tags": [venture, "social-target", target.get("platform", "")],
|
|
1511
|
-
}
|
|
1512
|
-
result["ledger_items"].append(ledger_item)
|
|
1513
|
-
|
|
1514
|
-
return result
|
|
1515
|
-
|
|
1516
|
-
|
|
1517
|
-
def list_targets(limit: int = 20) -> Dict[str, Any]:
|
|
1518
|
-
"""List recent targets from the JSONL store.
|
|
1519
|
-
|
|
1520
|
-
Args:
|
|
1521
|
-
limit: Max targets to return.
|
|
1522
|
-
|
|
1523
|
-
Returns:
|
|
1524
|
-
Dict with targets list and count.
|
|
1525
|
-
"""
|
|
1526
|
-
if not TARGETS_FILE.exists():
|
|
1527
|
-
return {"targets": [], "count": 0}
|
|
1528
|
-
|
|
1529
|
-
targets: List[Dict] = []
|
|
1530
|
-
lines = TARGETS_FILE.read_text().splitlines()
|
|
1531
|
-
for line in reversed(lines):
|
|
1532
|
-
if not line.strip():
|
|
1533
|
-
continue
|
|
1534
|
-
try:
|
|
1535
|
-
entry = json.loads(line)
|
|
1536
|
-
targets.append(entry)
|
|
1537
|
-
if len(targets) >= limit:
|
|
1538
|
-
break
|
|
1539
|
-
except (json.JSONDecodeError, ValueError):
|
|
1540
|
-
continue
|
|
1541
|
-
|
|
1542
|
-
return {"targets": targets, "count": len(targets), "total_stored": len(lines)}
|
|
1543
|
-
|
|
1544
|
-
|
|
1545
|
-
def get_stats() -> Dict[str, Any]:
|
|
1546
|
-
"""Get aggregate stats on discovered targets.
|
|
1547
|
-
|
|
1548
|
-
Returns:
|
|
1549
|
-
Dict with counts by platform, venture, classification, and status.
|
|
1550
|
-
"""
|
|
1551
|
-
if not TARGETS_FILE.exists():
|
|
1552
|
-
return {"total": 0, "by_platform": {}, "by_venture": {}, "by_classification": {}, "by_status": {}}
|
|
1553
|
-
|
|
1554
|
-
by_platform: Dict[str, int] = {}
|
|
1555
|
-
by_venture: Dict[str, int] = {}
|
|
1556
|
-
by_classification: Dict[str, int] = {}
|
|
1557
|
-
by_status: Dict[str, int] = {}
|
|
1558
|
-
total = 0
|
|
1559
|
-
|
|
1560
|
-
for line in TARGETS_FILE.read_text().splitlines():
|
|
1561
|
-
if not line.strip():
|
|
1562
|
-
continue
|
|
1563
|
-
try:
|
|
1564
|
-
entry = json.loads(line)
|
|
1565
|
-
total += 1
|
|
1566
|
-
p = entry.get("platform", "unknown")
|
|
1567
|
-
v = entry.get("venture", "unknown")
|
|
1568
|
-
c = entry.get("classification", "unknown")
|
|
1569
|
-
s = entry.get("status", "unknown")
|
|
1570
|
-
by_platform[p] = by_platform.get(p, 0) + 1
|
|
1571
|
-
by_venture[v] = by_venture.get(v, 0) + 1
|
|
1572
|
-
by_classification[c] = by_classification.get(c, 0) + 1
|
|
1573
|
-
by_status[s] = by_status.get(s, 0) + 1
|
|
1574
|
-
except (json.JSONDecodeError, ValueError):
|
|
1575
|
-
continue
|
|
1576
|
-
|
|
1577
|
-
return {
|
|
1578
|
-
"total": total,
|
|
1579
|
-
"by_platform": by_platform,
|
|
1580
|
-
"by_venture": by_venture,
|
|
1581
|
-
"by_classification": by_classification,
|
|
1582
|
-
"by_status": by_status,
|
|
1583
|
-
}
|