delimit-cli 4.0.1 → 4.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +1 -1
- package/README.md +0 -1
- package/gateway/ai/cross_model_audit.py +600 -0
- package/gateway/ai/github_scanner.py +622 -0
- package/gateway/ai/handoff_receipts.py +409 -0
- package/gateway/ai/license_core.py +1 -2
- package/gateway/ai/notify.py +12 -12
- package/gateway/ai/reddit_scanner.py +562 -0
- package/gateway/ai/server.py +341 -51
- package/gateway/ai/session_phoenix.py +371 -0
- package/gateway/ai/swarm.py +2 -2
- package/gateway/ai/toolcard_cache.py +327 -0
- package/gateway/core/contract_ledger.py +1 -1
- package/gateway/core/dependency_graph.py +1 -1
- package/gateway/core/dependency_manifest.py +1 -1
- package/gateway/core/event_backbone.py +2 -2
- package/gateway/core/event_schema.py +1 -1
- package/gateway/core/impact_analyzer.py +1 -1
- package/package.json +1 -10
- package/scripts/crosspost_devto.py +304 -0
- package/scripts/security-check.sh +66 -0
- package/scripts/weekly-tweet.py +191 -0
|
@@ -0,0 +1,562 @@
|
|
|
1
|
+
"""Reddit bulk scanner -- fetch, categorize, and rank posts for outreach.
|
|
2
|
+
|
|
3
|
+
Scans 25+ subreddits via the residential proxy, scores each post on
|
|
4
|
+
engagement, freshness, comment opportunity, and venture relevance,
|
|
5
|
+
then returns a ranked list of outreach targets.
|
|
6
|
+
|
|
7
|
+
Rate limited to 1 request per 2 seconds to stay well under Reddit limits.
|
|
8
|
+
Results are persisted to ~/.delimit/reddit_scans/{date}.json for dedup.
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
import json
|
|
12
|
+
import logging
|
|
13
|
+
import os
|
|
14
|
+
import re
|
|
15
|
+
import time
|
|
16
|
+
import urllib.error
|
|
17
|
+
import urllib.request
|
|
18
|
+
from datetime import datetime, timezone
|
|
19
|
+
from pathlib import Path
|
|
20
|
+
from typing import Any, Dict, List, Optional, Tuple
|
|
21
|
+
|
|
22
|
+
logger = logging.getLogger("delimit.ai.reddit_scanner")
|
|
23
|
+
|
|
24
|
+
# ---------------------------------------------------------------------------
|
|
25
|
+
# Subreddit groups
|
|
26
|
+
# ---------------------------------------------------------------------------
|
|
27
|
+
|
|
28
|
+
SCAN_GROUPS: Dict[str, List[str]] = {
|
|
29
|
+
"delimit_core": ["ClaudeAI", "vibecoding", "cursor", "AI_Agents"],
|
|
30
|
+
"delimit_adjacent": ["devops", "programming", "ContextEngineering", "LocalLLaMA", "MachineLearning"],
|
|
31
|
+
"domainvested": ["Domains", "Entrepreneur", "SideProject", "flipping"],
|
|
32
|
+
"wirereport": ["sportsbook", "sportsbetting"],
|
|
33
|
+
"stakeone": ["harmony_one", "CryptoCurrency", "defi"],
|
|
34
|
+
"karma_building": ["SaaS", "opensource", "webdev", "startups", "ExperiencedDevs", "selfhosted", "IndieHackers"],
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
ALL_SUBREDDITS: List[str] = [sub for subs in SCAN_GROUPS.values() for sub in subs]
|
|
38
|
+
|
|
39
|
+
# Reverse lookup: subreddit -> group
|
|
40
|
+
_SUB_TO_GROUP: Dict[str, str] = {}
|
|
41
|
+
for _group, _subs in SCAN_GROUPS.items():
|
|
42
|
+
for _sub in _subs:
|
|
43
|
+
_SUB_TO_GROUP[_sub.lower()] = _group
|
|
44
|
+
|
|
45
|
+
# ---------------------------------------------------------------------------
|
|
46
|
+
# Venture keywords (loaded from disk or defined inline as fallback)
|
|
47
|
+
# ---------------------------------------------------------------------------
|
|
48
|
+
|
|
49
|
+
_VENTURE_KEYWORDS_FALLBACK: Dict[str, List[str]] = {
|
|
50
|
+
"delimit": [
|
|
51
|
+
"api governance", "breaking changes", "openapi", "api linting",
|
|
52
|
+
"mcp server", "mcp tools", "claude.md", "claude code",
|
|
53
|
+
"ai coding", "vibe coding", "semver", "api compatibility",
|
|
54
|
+
"schema migration", "api versioning", "contract testing",
|
|
55
|
+
"session handoff", "agent state", "context engineering",
|
|
56
|
+
],
|
|
57
|
+
"domainvested": [
|
|
58
|
+
"domain investing", "domain appraisal", "domain flipping",
|
|
59
|
+
"expired domains", "brandable domains", "domain valuation",
|
|
60
|
+
"namepros", "domain name",
|
|
61
|
+
],
|
|
62
|
+
"wirereport": [
|
|
63
|
+
"sports api", "live sports data", "sports scores",
|
|
64
|
+
"sports news automation", "sports betting api",
|
|
65
|
+
],
|
|
66
|
+
"stakeone": [
|
|
67
|
+
"harmony one", "harmony validator", "one staking",
|
|
68
|
+
"harmony blockchain", "harmony network",
|
|
69
|
+
],
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
# ---------------------------------------------------------------------------
|
|
73
|
+
# Pain point categories for product intelligence
|
|
74
|
+
# ---------------------------------------------------------------------------
|
|
75
|
+
|
|
76
|
+
PAIN_CATEGORIES: Dict[str, List[str]] = {
|
|
77
|
+
"context_loss": ["lost context", "re-explain", "starting from zero", "forgot", "doesn't remember"],
|
|
78
|
+
"rate_limits": ["rate limit", "session limit", "throttled", "burned through", "ran out"],
|
|
79
|
+
"multi_model": ["switching between", "codex and claude", "multiple models", "different tool"],
|
|
80
|
+
"code_quality": ["broke my", "deleted", "undid", "regression", "broke production"],
|
|
81
|
+
"session_management": ["session died", "context window", "compact", "handoff"],
|
|
82
|
+
"governance": ["breaking change", "API broke", "schema", "backward compat"],
|
|
83
|
+
"onboarding": ["how to start", "getting started", "setup", "configure"],
|
|
84
|
+
"cost": ["expensive", "pricing", "cost", "$200", "billing"],
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
# Which pain categories map to Delimit features
|
|
88
|
+
_PAIN_TO_RELEVANCE: Dict[str, str] = {
|
|
89
|
+
"context_loss": "existing_feature", # persistent context / session handoff
|
|
90
|
+
"session_management": "existing_feature", # session handoff, compact
|
|
91
|
+
"governance": "existing_feature", # API governance, breaking change detection
|
|
92
|
+
"multi_model": "existing_feature", # cross-model continuity
|
|
93
|
+
"code_quality": "planned_feature", # test verification, guardrails
|
|
94
|
+
"onboarding": "planned_feature", # delimit init, doctor, setup
|
|
95
|
+
"rate_limits": "new_opportunity", # not directly addressed yet
|
|
96
|
+
"cost": "new_opportunity", # pricing transparency / cost tracking
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
PROXY_URL = "http://127.0.0.1:4819/reddit-fetch"
|
|
100
|
+
SCANS_DIR = Path.home() / ".delimit" / "reddit_scans"
|
|
101
|
+
VENTURES_CONFIG_PATH = Path.home() / ".delimit" / "social_target_ventures.json"
|
|
102
|
+
|
|
103
|
+
# Posts by these authors are always skipped
|
|
104
|
+
SKIP_AUTHORS = {"delimitdev", "delimit_ai", "AutoModerator", "[deleted]"}
|
|
105
|
+
|
|
106
|
+
# ---------------------------------------------------------------------------
|
|
107
|
+
# Keyword loading
|
|
108
|
+
# ---------------------------------------------------------------------------
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
def _load_venture_keywords() -> Dict[str, List[str]]:
|
|
112
|
+
"""Load venture keywords from disk config, falling back to built-in list."""
|
|
113
|
+
if VENTURES_CONFIG_PATH.exists():
|
|
114
|
+
try:
|
|
115
|
+
data = json.loads(VENTURES_CONFIG_PATH.read_text())
|
|
116
|
+
ventures = data.get("ventures", {})
|
|
117
|
+
result: Dict[str, List[str]] = {}
|
|
118
|
+
for name, cfg in ventures.items():
|
|
119
|
+
topics = cfg.get("topics", [])
|
|
120
|
+
if topics:
|
|
121
|
+
result[name] = [t.lower() for t in topics]
|
|
122
|
+
if result:
|
|
123
|
+
return result
|
|
124
|
+
except (json.JSONDecodeError, OSError) as exc:
|
|
125
|
+
logger.warning("Failed to load venture keywords: %s", exc)
|
|
126
|
+
return {k: [t.lower() for t in v] for k, v in _VENTURE_KEYWORDS_FALLBACK.items()}
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
# ---------------------------------------------------------------------------
|
|
130
|
+
# Fetching
|
|
131
|
+
# ---------------------------------------------------------------------------
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
def _fetch_subreddit(
|
|
135
|
+
subreddit: str,
|
|
136
|
+
sort: str = "hot",
|
|
137
|
+
limit: int = 10,
|
|
138
|
+
*,
|
|
139
|
+
proxy_url: str = PROXY_URL,
|
|
140
|
+
) -> List[Dict[str, Any]]:
|
|
141
|
+
"""Fetch posts from a single subreddit via the residential proxy.
|
|
142
|
+
|
|
143
|
+
The proxy endpoint expects a query parameter ``url`` containing the
|
|
144
|
+
actual Reddit JSON URL. Returns a list of extracted post dicts.
|
|
145
|
+
"""
|
|
146
|
+
reddit_url = f"https://www.reddit.com/r/{subreddit}/{sort}.json?limit={limit}&raw_json=1"
|
|
147
|
+
fetch_url = f"{proxy_url}?url={urllib.request.quote(reddit_url, safe='')}"
|
|
148
|
+
|
|
149
|
+
req = urllib.request.Request(
|
|
150
|
+
fetch_url,
|
|
151
|
+
headers={"User-Agent": "delimit-scanner/1.0", "Accept": "application/json"},
|
|
152
|
+
)
|
|
153
|
+
|
|
154
|
+
try:
|
|
155
|
+
with urllib.request.urlopen(req, timeout=15) as resp:
|
|
156
|
+
body = json.loads(resp.read().decode())
|
|
157
|
+
except Exception as exc:
|
|
158
|
+
logger.warning("Failed to fetch r/%s: %s", subreddit, exc)
|
|
159
|
+
return []
|
|
160
|
+
|
|
161
|
+
# Reddit returns {"data": {"children": [...]}}
|
|
162
|
+
children = []
|
|
163
|
+
if isinstance(body, dict):
|
|
164
|
+
children = body.get("data", {}).get("children", [])
|
|
165
|
+
|
|
166
|
+
posts: List[Dict[str, Any]] = []
|
|
167
|
+
for child in children:
|
|
168
|
+
d = child.get("data", {})
|
|
169
|
+
if not d:
|
|
170
|
+
continue
|
|
171
|
+
# Skip stickied
|
|
172
|
+
if d.get("stickied"):
|
|
173
|
+
continue
|
|
174
|
+
# Skip our own posts
|
|
175
|
+
author = d.get("author", "")
|
|
176
|
+
if author in SKIP_AUTHORS:
|
|
177
|
+
continue
|
|
178
|
+
|
|
179
|
+
posts.append({
|
|
180
|
+
"id": d.get("id", ""),
|
|
181
|
+
"title": d.get("title", ""),
|
|
182
|
+
"author": author,
|
|
183
|
+
"score": d.get("score", 0),
|
|
184
|
+
"num_comments": d.get("num_comments", 0),
|
|
185
|
+
"subreddit": d.get("subreddit", subreddit),
|
|
186
|
+
"permalink": d.get("permalink", ""),
|
|
187
|
+
"selftext": (d.get("selftext") or "")[:200],
|
|
188
|
+
"created_utc": d.get("created_utc", 0),
|
|
189
|
+
})
|
|
190
|
+
|
|
191
|
+
return posts
|
|
192
|
+
|
|
193
|
+
|
|
194
|
+
def fetch_all(
|
|
195
|
+
limit_per_sub: int = 10,
|
|
196
|
+
sort: str = "hot",
|
|
197
|
+
*,
|
|
198
|
+
rate_limit: float = 4.0,
|
|
199
|
+
proxy_url: str = PROXY_URL,
|
|
200
|
+
subreddits: Optional[List[str]] = None,
|
|
201
|
+
) -> List[Dict[str, Any]]:
|
|
202
|
+
"""Fetch posts from all configured subreddits.
|
|
203
|
+
|
|
204
|
+
Returns the combined flat list of post dicts. Inserts a ``group``
|
|
205
|
+
field into each post based on which scan group the subreddit belongs to.
|
|
206
|
+
"""
|
|
207
|
+
targets = subreddits or ALL_SUBREDDITS
|
|
208
|
+
all_posts: List[Dict[str, Any]] = []
|
|
209
|
+
|
|
210
|
+
import random
|
|
211
|
+
for i, sub in enumerate(targets):
|
|
212
|
+
if i > 0 and rate_limit > 0:
|
|
213
|
+
# Add jitter to avoid bot-pattern detection
|
|
214
|
+
time.sleep(rate_limit + random.uniform(0, 2.0))
|
|
215
|
+
posts = _fetch_subreddit(sub, sort=sort, limit=limit_per_sub, proxy_url=proxy_url)
|
|
216
|
+
group = _SUB_TO_GROUP.get(sub.lower(), "unknown")
|
|
217
|
+
for p in posts:
|
|
218
|
+
p["group"] = group
|
|
219
|
+
all_posts.extend(posts)
|
|
220
|
+
logger.info("Fetched %d posts from r/%s (%s)", len(posts), sub, group)
|
|
221
|
+
|
|
222
|
+
return all_posts
|
|
223
|
+
|
|
224
|
+
|
|
225
|
+
# ---------------------------------------------------------------------------
|
|
226
|
+
# Scoring & classification
|
|
227
|
+
# ---------------------------------------------------------------------------
|
|
228
|
+
|
|
229
|
+
|
|
230
|
+
def _age_hours(created_utc: float, now: Optional[float] = None) -> float:
|
|
231
|
+
"""Return how many hours old a post is."""
|
|
232
|
+
now_ts = now or time.time()
|
|
233
|
+
return max(0.0, (now_ts - created_utc) / 3600.0)
|
|
234
|
+
|
|
235
|
+
|
|
236
|
+
def _freshness_multiplier(age_h: float) -> float:
|
|
237
|
+
"""Return freshness multiplier: <6h = 2x, <12h = 1.5x, else 1x."""
|
|
238
|
+
if age_h < 6:
|
|
239
|
+
return 2.0
|
|
240
|
+
if age_h < 12:
|
|
241
|
+
return 1.5
|
|
242
|
+
return 1.0
|
|
243
|
+
|
|
244
|
+
|
|
245
|
+
def _relevance_tags(title: str, selftext: str, venture_keywords: Dict[str, List[str]]) -> List[str]:
|
|
246
|
+
"""Return list of matching keyword tags from the post text."""
|
|
247
|
+
combined = (title + " " + selftext).lower()
|
|
248
|
+
tags: List[str] = []
|
|
249
|
+
for _venture, keywords in venture_keywords.items():
|
|
250
|
+
for kw in keywords:
|
|
251
|
+
if kw in combined and kw not in tags:
|
|
252
|
+
tags.append(kw)
|
|
253
|
+
return tags
|
|
254
|
+
|
|
255
|
+
|
|
256
|
+
def _suggest_angle(relevance_tags: List[str], group: str) -> str:
|
|
257
|
+
"""Generate a brief suggested engagement angle."""
|
|
258
|
+
if not relevance_tags:
|
|
259
|
+
if group == "karma_building":
|
|
260
|
+
return "general expertise comment for karma building"
|
|
261
|
+
return "tangentially relevant -- low priority"
|
|
262
|
+
|
|
263
|
+
tag_str = ", ".join(relevance_tags[:3])
|
|
264
|
+
|
|
265
|
+
angle_map = {
|
|
266
|
+
"delimit_core": f"expert comment on {tag_str}",
|
|
267
|
+
"delimit_adjacent": f"helpful technical reply mentioning {tag_str}",
|
|
268
|
+
"domainvested": f"domain industry insight on {tag_str}",
|
|
269
|
+
"wirereport": f"sports data perspective on {tag_str}",
|
|
270
|
+
"stakeone": f"validator/staking expertise on {tag_str}",
|
|
271
|
+
"karma_building": f"genuine helpful comment touching on {tag_str}",
|
|
272
|
+
}
|
|
273
|
+
return angle_map.get(group, f"engage on {tag_str}")
|
|
274
|
+
|
|
275
|
+
|
|
276
|
+
def extract_pain_points(title: str, selftext: str) -> Dict[str, Any]:
|
|
277
|
+
"""Extract actionable product insights from a post's text.
|
|
278
|
+
|
|
279
|
+
Returns a dict with:
|
|
280
|
+
pain_point: one-sentence description of the user's problem
|
|
281
|
+
delimit_relevance: existing_feature | planned_feature | new_opportunity | not_relevant
|
|
282
|
+
suggested_ledger_item: one-line ledger title (empty string if not relevant)
|
|
283
|
+
product_insight: one-sentence takeaway about user needs
|
|
284
|
+
matched_categories: list of PAIN_CATEGORIES keys that matched
|
|
285
|
+
"""
|
|
286
|
+
combined = (title + " " + selftext).lower()
|
|
287
|
+
|
|
288
|
+
matched_cats: List[str] = []
|
|
289
|
+
matched_phrases: List[str] = []
|
|
290
|
+
for category, phrases in PAIN_CATEGORIES.items():
|
|
291
|
+
for phrase in phrases:
|
|
292
|
+
if phrase in combined:
|
|
293
|
+
if category not in matched_cats:
|
|
294
|
+
matched_cats.append(category)
|
|
295
|
+
matched_phrases.append(phrase)
|
|
296
|
+
|
|
297
|
+
if not matched_cats:
|
|
298
|
+
return {
|
|
299
|
+
"pain_point": "",
|
|
300
|
+
"delimit_relevance": "not_relevant",
|
|
301
|
+
"suggested_ledger_item": "",
|
|
302
|
+
"product_insight": "",
|
|
303
|
+
"matched_categories": [],
|
|
304
|
+
}
|
|
305
|
+
|
|
306
|
+
# Determine overall relevance from the most relevant category
|
|
307
|
+
relevance_priority = ["existing_feature", "planned_feature", "new_opportunity"]
|
|
308
|
+
best_relevance = "not_relevant"
|
|
309
|
+
for cat in matched_cats:
|
|
310
|
+
cat_rel = _PAIN_TO_RELEVANCE.get(cat, "not_relevant")
|
|
311
|
+
if cat_rel in relevance_priority:
|
|
312
|
+
idx = relevance_priority.index(cat_rel)
|
|
313
|
+
best_idx = relevance_priority.index(best_relevance) if best_relevance in relevance_priority else len(relevance_priority)
|
|
314
|
+
if idx < best_idx:
|
|
315
|
+
best_relevance = cat_rel
|
|
316
|
+
|
|
317
|
+
# Build pain_point: summarize from title (truncated, cleaned)
|
|
318
|
+
pain_point = title.strip()
|
|
319
|
+
if len(pain_point) > 120:
|
|
320
|
+
pain_point = pain_point[:117] + "..."
|
|
321
|
+
|
|
322
|
+
# Build suggested ledger item from category + title
|
|
323
|
+
cat_labels = {
|
|
324
|
+
"context_loss": "Context persistence",
|
|
325
|
+
"rate_limits": "Rate limit mitigation",
|
|
326
|
+
"multi_model": "Multi-model workflow",
|
|
327
|
+
"code_quality": "Code safety guardrail",
|
|
328
|
+
"session_management": "Session management",
|
|
329
|
+
"governance": "API governance",
|
|
330
|
+
"onboarding": "Onboarding flow",
|
|
331
|
+
"cost": "Cost management",
|
|
332
|
+
}
|
|
333
|
+
primary_cat = matched_cats[0]
|
|
334
|
+
ledger_prefix = cat_labels.get(primary_cat, primary_cat.replace("_", " ").title())
|
|
335
|
+
|
|
336
|
+
# Extract a compact actionable phrase from the title
|
|
337
|
+
ledger_item = ""
|
|
338
|
+
if best_relevance != "not_relevant":
|
|
339
|
+
# Use the first 80 chars of the title as the action item basis
|
|
340
|
+
short_title = title.strip()[:80].rstrip(".")
|
|
341
|
+
ledger_item = f"{ledger_prefix}: {short_title}"
|
|
342
|
+
|
|
343
|
+
# Build product insight
|
|
344
|
+
cat_insights = {
|
|
345
|
+
"context_loss": "Users lose productivity when context does not persist across sessions",
|
|
346
|
+
"rate_limits": "Rate limits and session caps are a recurring friction point for power users",
|
|
347
|
+
"multi_model": "Users want to move between AI tools without rebuilding context",
|
|
348
|
+
"code_quality": "Users fear AI making destructive changes without guardrails",
|
|
349
|
+
"session_management": "Session lifecycle management is a top concern for daily AI users",
|
|
350
|
+
"governance": "Teams need automated detection of breaking changes in APIs",
|
|
351
|
+
"onboarding": "New users struggle with initial setup and configuration",
|
|
352
|
+
"cost": "Cost predictability and transparency matter to individual developers",
|
|
353
|
+
}
|
|
354
|
+
insight = cat_insights.get(primary_cat, f"Users express frustration with {primary_cat.replace('_', ' ')}")
|
|
355
|
+
|
|
356
|
+
return {
|
|
357
|
+
"pain_point": pain_point,
|
|
358
|
+
"delimit_relevance": best_relevance,
|
|
359
|
+
"suggested_ledger_item": ledger_item,
|
|
360
|
+
"product_insight": insight,
|
|
361
|
+
"matched_categories": matched_cats,
|
|
362
|
+
}
|
|
363
|
+
|
|
364
|
+
|
|
365
|
+
def _build_product_insights(scored_posts: List[Dict[str, Any]]) -> Dict[str, Any]:
|
|
366
|
+
"""Aggregate pain_points across all scored posts into a product insights summary.
|
|
367
|
+
|
|
368
|
+
Returns:
|
|
369
|
+
top_pain_points: most mentioned pain categories with counts
|
|
370
|
+
new_opportunities: suggested ledger items for unaddressed pain
|
|
371
|
+
existing_feature_validation: posts that validate features we already ship
|
|
372
|
+
"""
|
|
373
|
+
from collections import Counter
|
|
374
|
+
|
|
375
|
+
cat_counter: Counter = Counter()
|
|
376
|
+
new_opps: List[Dict[str, str]] = []
|
|
377
|
+
existing_validations: List[Dict[str, str]] = []
|
|
378
|
+
|
|
379
|
+
for post in scored_posts:
|
|
380
|
+
pp = post.get("pain_points")
|
|
381
|
+
if not pp or not pp.get("matched_categories"):
|
|
382
|
+
continue
|
|
383
|
+
|
|
384
|
+
for cat in pp["matched_categories"]:
|
|
385
|
+
cat_counter[cat] += 1
|
|
386
|
+
|
|
387
|
+
relevance = pp.get("delimit_relevance", "not_relevant")
|
|
388
|
+
entry = {
|
|
389
|
+
"title": post.get("title", ""),
|
|
390
|
+
"subreddit": post.get("subreddit", ""),
|
|
391
|
+
"url": post.get("url", ""),
|
|
392
|
+
"pain_point": pp.get("pain_point", ""),
|
|
393
|
+
"suggested_ledger_item": pp.get("suggested_ledger_item", ""),
|
|
394
|
+
}
|
|
395
|
+
|
|
396
|
+
if relevance == "new_opportunity" and pp.get("suggested_ledger_item"):
|
|
397
|
+
new_opps.append(entry)
|
|
398
|
+
elif relevance == "existing_feature":
|
|
399
|
+
existing_validations.append(entry)
|
|
400
|
+
|
|
401
|
+
# Sort pain points by frequency
|
|
402
|
+
top_pains = [
|
|
403
|
+
{"category": cat, "count": count}
|
|
404
|
+
for cat, count in cat_counter.most_common(10)
|
|
405
|
+
]
|
|
406
|
+
|
|
407
|
+
return {
|
|
408
|
+
"top_pain_points": top_pains,
|
|
409
|
+
"new_opportunities": new_opps[:20],
|
|
410
|
+
"existing_feature_validation": existing_validations[:20],
|
|
411
|
+
}
|
|
412
|
+
|
|
413
|
+
|
|
414
|
+
def score_and_classify(
|
|
415
|
+
posts: List[Dict[str, Any]],
|
|
416
|
+
*,
|
|
417
|
+
now: Optional[float] = None,
|
|
418
|
+
venture_keywords: Optional[Dict[str, List[str]]] = None,
|
|
419
|
+
) -> List[Dict[str, Any]]:
|
|
420
|
+
"""Score and classify posts, returning them sorted by rank (best first).
|
|
421
|
+
|
|
422
|
+
Each post dict is augmented with:
|
|
423
|
+
engagement_score, age_hours, freshness_mult, relevance_tags,
|
|
424
|
+
karma_building, suggested_angle, priority, final_score, pain_points
|
|
425
|
+
"""
|
|
426
|
+
kw = venture_keywords or _load_venture_keywords()
|
|
427
|
+
now_ts = now or time.time()
|
|
428
|
+
scored: List[Dict[str, Any]] = []
|
|
429
|
+
|
|
430
|
+
for post in posts:
|
|
431
|
+
score = post.get("score", 0)
|
|
432
|
+
comments = post.get("num_comments", 0)
|
|
433
|
+
created = post.get("created_utc", 0)
|
|
434
|
+
group = post.get("group", "unknown")
|
|
435
|
+
|
|
436
|
+
age_h = _age_hours(created, now_ts)
|
|
437
|
+
engagement = score * 0.4 + comments * 0.6
|
|
438
|
+
fresh_mult = _freshness_multiplier(age_h)
|
|
439
|
+
|
|
440
|
+
# Comment opportunity bonus: high engagement but room to comment
|
|
441
|
+
comment_opp = 1.0
|
|
442
|
+
if engagement > 5 and comments < 30:
|
|
443
|
+
comment_opp = 1.3
|
|
444
|
+
|
|
445
|
+
tags = _relevance_tags(
|
|
446
|
+
post.get("title", ""),
|
|
447
|
+
post.get("selftext", ""),
|
|
448
|
+
kw,
|
|
449
|
+
)
|
|
450
|
+
relevance_mult = 1.0 + 0.2 * min(len(tags), 5) # up to 2.0x
|
|
451
|
+
|
|
452
|
+
final_score = engagement * fresh_mult * comment_opp * relevance_mult
|
|
453
|
+
is_karma = group == "karma_building"
|
|
454
|
+
|
|
455
|
+
# Classification
|
|
456
|
+
if post.get("stickied") or age_h > 48 or comments > 100:
|
|
457
|
+
priority = "skip"
|
|
458
|
+
elif final_score >= 30 and age_h < 12 and comments < 50:
|
|
459
|
+
priority = "high_priority"
|
|
460
|
+
elif final_score >= 10 or (len(tags) >= 2 and age_h < 24):
|
|
461
|
+
priority = "medium_priority"
|
|
462
|
+
elif final_score >= 3:
|
|
463
|
+
priority = "low_priority"
|
|
464
|
+
else:
|
|
465
|
+
priority = "skip"
|
|
466
|
+
|
|
467
|
+
angle = _suggest_angle(tags, group)
|
|
468
|
+
pain = extract_pain_points(post.get("title", ""), post.get("selftext", ""))
|
|
469
|
+
|
|
470
|
+
scored.append({
|
|
471
|
+
**post,
|
|
472
|
+
"engagement_score": round(engagement, 1),
|
|
473
|
+
"age_hours": round(age_h, 1),
|
|
474
|
+
"freshness_mult": fresh_mult,
|
|
475
|
+
"relevance_tags": tags,
|
|
476
|
+
"karma_building": is_karma,
|
|
477
|
+
"suggested_angle": angle,
|
|
478
|
+
"priority": priority,
|
|
479
|
+
"final_score": round(final_score, 2),
|
|
480
|
+
"url": f"https://reddit.com{post.get('permalink', '')}",
|
|
481
|
+
"pain_points": pain,
|
|
482
|
+
})
|
|
483
|
+
|
|
484
|
+
# Sort by final_score descending
|
|
485
|
+
scored.sort(key=lambda x: x["final_score"], reverse=True)
|
|
486
|
+
|
|
487
|
+
# Assign ranks
|
|
488
|
+
for i, item in enumerate(scored):
|
|
489
|
+
item["rank"] = i + 1
|
|
490
|
+
|
|
491
|
+
return scored
|
|
492
|
+
|
|
493
|
+
|
|
494
|
+
# ---------------------------------------------------------------------------
|
|
495
|
+
# Main scan orchestrator
|
|
496
|
+
# ---------------------------------------------------------------------------
|
|
497
|
+
|
|
498
|
+
|
|
499
|
+
def scan_all(
|
|
500
|
+
limit_per_sub: int = 10,
|
|
501
|
+
sort: str = "hot",
|
|
502
|
+
*,
|
|
503
|
+
rate_limit: float = 4.0,
|
|
504
|
+
proxy_url: str = PROXY_URL,
|
|
505
|
+
) -> Dict[str, Any]:
|
|
506
|
+
"""Full scan: fetch all subreddits, score, classify, persist.
|
|
507
|
+
|
|
508
|
+
Returns the complete result dict with targets, stats, and grouping.
|
|
509
|
+
"""
|
|
510
|
+
scan_start = datetime.now(timezone.utc)
|
|
511
|
+
|
|
512
|
+
raw_posts = fetch_all(
|
|
513
|
+
limit_per_sub=limit_per_sub,
|
|
514
|
+
sort=sort,
|
|
515
|
+
rate_limit=rate_limit,
|
|
516
|
+
proxy_url=proxy_url,
|
|
517
|
+
)
|
|
518
|
+
|
|
519
|
+
scored = score_and_classify(raw_posts)
|
|
520
|
+
|
|
521
|
+
# Group by scan group
|
|
522
|
+
by_group: Dict[str, List[Dict[str, Any]]] = {}
|
|
523
|
+
for item in scored:
|
|
524
|
+
g = item.get("group", "unknown")
|
|
525
|
+
by_group.setdefault(g, []).append(item)
|
|
526
|
+
|
|
527
|
+
# Stats
|
|
528
|
+
stats: Dict[str, int] = {"high_priority": 0, "medium_priority": 0, "low_priority": 0, "skip": 0}
|
|
529
|
+
for item in scored:
|
|
530
|
+
p = item.get("priority", "skip")
|
|
531
|
+
stats[p] = stats.get(p, 0) + 1
|
|
532
|
+
|
|
533
|
+
# Only include non-skip targets in the top-level targets list
|
|
534
|
+
targets = [t for t in scored if t["priority"] != "skip"]
|
|
535
|
+
|
|
536
|
+
# Product intelligence summary
|
|
537
|
+
product_insights = _build_product_insights(scored)
|
|
538
|
+
|
|
539
|
+
result: Dict[str, Any] = {
|
|
540
|
+
"scanned_at": scan_start.isoformat(),
|
|
541
|
+
"total_posts": len(raw_posts),
|
|
542
|
+
"subreddits_scanned": len(set(p.get("subreddit", "") for p in raw_posts)),
|
|
543
|
+
"targets": targets,
|
|
544
|
+
"by_group": by_group,
|
|
545
|
+
"stats": stats,
|
|
546
|
+
"product_insights": product_insights,
|
|
547
|
+
}
|
|
548
|
+
|
|
549
|
+
# Persist to disk
|
|
550
|
+
_save_scan(result, scan_start)
|
|
551
|
+
|
|
552
|
+
return result
|
|
553
|
+
|
|
554
|
+
|
|
555
|
+
def _save_scan(result: Dict[str, Any], scan_time: datetime) -> Path:
|
|
556
|
+
"""Save scan results to ~/.delimit/reddit_scans/{date}.json."""
|
|
557
|
+
SCANS_DIR.mkdir(parents=True, exist_ok=True)
|
|
558
|
+
filename = scan_time.strftime("%Y-%m-%dT%H%M%S") + ".json"
|
|
559
|
+
path = SCANS_DIR / filename
|
|
560
|
+
path.write_text(json.dumps(result, indent=2, default=str))
|
|
561
|
+
logger.info("Scan saved to %s", path)
|
|
562
|
+
return path
|