delimit-cli 4.1.42 → 4.1.44

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -26,7 +26,7 @@ logger = logging.getLogger("delimit.ai.reddit_scanner")
26
26
  # ---------------------------------------------------------------------------
27
27
 
28
28
  SCAN_GROUPS: Dict[str, List[str]] = {
29
- "delimit_core": ["ClaudeAI", "vibecoding", "cursor", "AI_Agents"],
29
+ "delimit_core": ["ClaudeAI", "vibecoding", "cursor", "AI_Agents", "ObsidianMD"],
30
30
  "delimit_adjacent": ["devops", "programming", "ContextEngineering", "LocalLLaMA", "MachineLearning"],
31
31
  "domainvested": ["Domains", "Entrepreneur", "SideProject", "flipping"],
32
32
  "wirereport": ["sportsbook", "sportsbetting"],
@@ -560,3 +560,47 @@ def _save_scan(result: Dict[str, Any], scan_time: datetime) -> Path:
560
560
  path.write_text(json.dumps(result, indent=2, default=str))
561
561
  logger.info("Scan saved to %s", path)
562
562
  return path
563
+
564
+
565
+ def fetch_thread(thread_id: str, *, proxy_url: str = PROXY_URL) -> Optional[Dict[str, Any]]:
566
+ """Fetch a single Reddit thread by ID via the residential proxy."""
567
+ import urllib.parse
568
+ import urllib.request
569
+ reddit_url = f"https://www.reddit.com/comments/{thread_id}.json?raw_json=1"
570
+ fetch_url = f"{proxy_url}?url={urllib.parse.quote(reddit_url, safe='')}"
571
+
572
+ req = urllib.request.Request(
573
+ fetch_url,
574
+ headers={"User-Agent": "delimit-scanner/1.0", "Accept": "application/json"},
575
+ )
576
+
577
+ try:
578
+ with urllib.request.urlopen(req, timeout=15) as resp:
579
+ data = json.loads(resp.read().decode())
580
+ if isinstance(data, list) and len(data) > 0:
581
+ post_data = data[0].get("data", {}).get("children", [{}])[0].get("data", {})
582
+ if post_data:
583
+ return {
584
+ "id": post_data.get("id", ""),
585
+ "title": post_data.get("title", ""),
586
+ "author": post_data.get("author", ""),
587
+ "score": post_data.get("score", 0),
588
+ "num_comments": post_data.get("num_comments", 0),
589
+ "subreddit": post_data.get("subreddit", ""),
590
+ "permalink": post_data.get("permalink", ""),
591
+ "selftext": post_data.get("selftext", ""),
592
+ "created_utc": post_data.get("created_utc", 0),
593
+ }
594
+ except Exception as exc:
595
+ logger.warning("Failed to fetch thread %s: %s", thread_id, exc)
596
+ return None
597
+
598
+
599
+ def monitor_user_engagement(username: str = "delimitdev") -> list:
600
+ """Monitor engagement on posts by a Reddit user (LED-300).
601
+
602
+ Checks recent posts/comments by the user for new replies, upvotes,
603
+ and engagement signals. Returns a list of alert dicts.
604
+ """
605
+ # Stub — full implementation requires residential proxy + Playwright (LED-248)
606
+ return []
@@ -22,7 +22,7 @@ logger = logging.getLogger("delimit.ai.screen_record")
22
22
 
23
23
  # ── Constants ────────────────────────────────────────────────────────────
24
24
 
25
- CHROMIUM_PATH = os.environ.get("CHROMIUM_PATH", "chromium")
25
+ CHROMIUM_PATH = "/root/.cache/puppeteer/chrome/linux-146.0.7680.153/chrome-linux64/chrome"
26
26
  CONTENT_BASE = Path.home() / ".delimit" / "content"
27
27
  VIDEOS_DIR = CONTENT_BASE / "videos"
28
28
  GIFS_DIR = CONTENT_BASE / "gifs"
@@ -124,10 +124,14 @@ def list_secrets() -> List[Dict]:
124
124
  return []
125
125
  secrets = []
126
126
  for f in sorted(SECRETS_DIR.glob("*.json")):
127
+ if f.name.startswith("_"):
128
+ continue # skip internal files like _access_log.json
127
129
  try:
128
130
  s = json.loads(f.read_text())
131
+ if not isinstance(s, dict):
132
+ continue
129
133
  secrets.append({
130
- "name": s["name"],
134
+ "name": s.get("name", f.stem),
131
135
  "scope": s.get("scope", "all"),
132
136
  "description": s.get("description", ""),
133
137
  "created_by": s.get("created_by", ""),
@@ -0,0 +1,341 @@
1
+ """SQLite-based caching and dedup layer for social sensing.
2
+
3
+ Provides:
4
+ - seen_posts table: dedup + relevance scoring for Reddit (and future platforms)
5
+ - scan_meta table: per-subreddit scan timestamps and high-water marks
6
+ - Relevance scoring with keyword/subreddit boosting
7
+ - Lazy DB creation on first use (thread-safe)
8
+
9
+ Cache location: ~/.delimit/social_cache.db
10
+ """
11
+
12
+ import logging
13
+ import os
14
+ import re
15
+ import sqlite3
16
+ import threading
17
+ from datetime import datetime, timezone
18
+ from pathlib import Path
19
+ from typing import Any, Dict, List, Optional, Tuple
20
+
21
+ logger = logging.getLogger("delimit.ai.social_cache")
22
+
23
+ CACHE_DB_PATH = Path.home() / ".delimit" / "social_cache.db"
24
+
25
+ # Thread-local storage for SQLite connections (sqlite3 objects are not
26
+ # safe to share across threads).
27
+ _local = threading.local()
28
+ _init_lock = threading.Lock()
29
+ _db_initialized = False
30
+
31
+
32
+ # ── Relevance keywords and weights ─────────────────────────────────────
33
+
34
+ # High-value keywords strongly associated with Delimit's core domain
35
+ RELEVANCE_KEYWORDS_HIGH: Dict[str, float] = {
36
+ "openapi": 0.35,
37
+ "swagger": 0.30,
38
+ "breaking change": 0.40,
39
+ "breaking changes": 0.40,
40
+ "api governance": 0.45,
41
+ "api contract": 0.40,
42
+ "api contracts": 0.40,
43
+ "api versioning": 0.35,
44
+ "semver": 0.35,
45
+ "mcp server": 0.30,
46
+ "mcp tool": 0.30,
47
+ "model context protocol": 0.30,
48
+ }
49
+
50
+ # Medium-value keywords: AI coding tools, adjacent territory
51
+ RELEVANCE_KEYWORDS_MED: Dict[str, float] = {
52
+ "claude code": 0.25,
53
+ "codex": 0.20,
54
+ "gemini cli": 0.25,
55
+ "cursor": 0.15,
56
+ "api diff": 0.30,
57
+ "api lint": 0.30,
58
+ "api migration": 0.25,
59
+ "schema validation": 0.20,
60
+ "backward compatible": 0.25,
61
+ "backwards compatible": 0.25,
62
+ "backward compatibility": 0.25,
63
+ "backwards compatibility": 0.25,
64
+ }
65
+
66
+ # Subreddit relevance boosts
67
+ SUBREDDIT_BOOSTS: Dict[str, float] = {
68
+ "claudeai": 0.20,
69
+ "chatgptcoding": 0.20,
70
+ "devops": 0.15,
71
+ "webdev": 0.10,
72
+ "experienceddevs": 0.15,
73
+ "programming": 0.05,
74
+ "vibecoding": 0.15,
75
+ "ai_agents": 0.15,
76
+ "contextengineering": 0.20,
77
+ }
78
+
79
+ # Subreddits that get penalized unless they mention dev tools
80
+ GENERIC_SUBREDDITS: set = {
81
+ "entrepreneur", "startups", "sideproject", "saas",
82
+ }
83
+
84
+ DEV_TOOL_TERMS: set = {
85
+ "api", "developer", "dev tool", "devtool", "sdk", "cli",
86
+ "cicd", "ci/cd", "pipeline", "openapi", "swagger", "github action",
87
+ }
88
+
89
+
90
+ def _get_conn() -> sqlite3.Connection:
91
+ """Get a thread-local SQLite connection, creating the DB lazily."""
92
+ conn = getattr(_local, "conn", None)
93
+ if conn is not None:
94
+ return conn
95
+
96
+ global _db_initialized
97
+ CACHE_DB_PATH.parent.mkdir(parents=True, exist_ok=True)
98
+
99
+ conn = sqlite3.connect(str(CACHE_DB_PATH), timeout=10)
100
+ conn.row_factory = sqlite3.Row
101
+ conn.execute("PRAGMA journal_mode=WAL")
102
+ conn.execute("PRAGMA synchronous=NORMAL")
103
+
104
+ # Lazy schema creation (idempotent)
105
+ with _init_lock:
106
+ if not _db_initialized:
107
+ _create_schema(conn)
108
+ _db_initialized = True
109
+
110
+ _local.conn = conn
111
+ return conn
112
+
113
+
114
+ def _create_schema(conn: sqlite3.Connection) -> None:
115
+ """Create tables if they don't exist."""
116
+ conn.executescript("""
117
+ CREATE TABLE IF NOT EXISTS seen_posts (
118
+ post_id TEXT PRIMARY KEY,
119
+ subreddit TEXT NOT NULL DEFAULT '',
120
+ title TEXT NOT NULL DEFAULT '',
121
+ score INTEGER NOT NULL DEFAULT 0,
122
+ num_comments INTEGER NOT NULL DEFAULT 0,
123
+ first_seen TEXT NOT NULL,
124
+ last_seen TEXT NOT NULL,
125
+ relevance_score REAL NOT NULL DEFAULT 0.0,
126
+ actioned INTEGER NOT NULL DEFAULT 0,
127
+ venture TEXT NOT NULL DEFAULT '',
128
+ fingerprint TEXT NOT NULL DEFAULT '',
129
+ canonical_url TEXT NOT NULL DEFAULT ''
130
+ );
131
+
132
+ CREATE INDEX IF NOT EXISTS idx_seen_posts_subreddit
133
+ ON seen_posts(subreddit);
134
+ CREATE INDEX IF NOT EXISTS idx_seen_posts_relevance
135
+ ON seen_posts(relevance_score);
136
+ CREATE INDEX IF NOT EXISTS idx_seen_posts_first_seen
137
+ ON seen_posts(first_seen);
138
+
139
+ CREATE TABLE IF NOT EXISTS scan_meta (
140
+ subreddit TEXT PRIMARY KEY,
141
+ last_scan TEXT NOT NULL,
142
+ high_water_mark TEXT NOT NULL DEFAULT '',
143
+ posts_seen INTEGER NOT NULL DEFAULT 0,
144
+ posts_new INTEGER NOT NULL DEFAULT 0
145
+ );
146
+ """)
147
+ conn.commit()
148
+
149
+
150
+ def compute_relevance_score(
151
+ title: str,
152
+ body: str,
153
+ subreddit: str,
154
+ score: int = 0,
155
+ num_comments: int = 0,
156
+ ) -> float:
157
+ """Compute a 0.0-1.0 relevance score for a Reddit post.
158
+
159
+ Scoring layers:
160
+ 1. Keyword matching (high + medium value terms)
161
+ 2. Subreddit boost/penalty
162
+ 3. Engagement signal (mild boost for proven discussion)
163
+ """
164
+ text_lower = f"{title} {body}".lower()
165
+ sub_lower = subreddit.lower().lstrip("r/")
166
+
167
+ relevance = 0.0
168
+
169
+ # Layer 1: keyword matching
170
+ for keyword, weight in RELEVANCE_KEYWORDS_HIGH.items():
171
+ if keyword in text_lower:
172
+ relevance += weight
173
+
174
+ for keyword, weight in RELEVANCE_KEYWORDS_MED.items():
175
+ if keyword in text_lower:
176
+ relevance += weight
177
+
178
+ # Layer 2: subreddit boost
179
+ boost = SUBREDDIT_BOOSTS.get(sub_lower, 0.0)
180
+ relevance += boost
181
+
182
+ # Penalty for generic subreddits without dev tool mentions
183
+ if sub_lower in GENERIC_SUBREDDITS:
184
+ has_dev_term = any(term in text_lower for term in DEV_TOOL_TERMS)
185
+ if not has_dev_term:
186
+ relevance -= 0.20
187
+
188
+ # Layer 3: engagement signal (mild, caps at +0.10)
189
+ if score > 10 or num_comments > 5:
190
+ relevance += 0.05
191
+ if score > 50 or num_comments > 20:
192
+ relevance += 0.05
193
+
194
+ # Clamp to [0.0, 1.0]
195
+ return max(0.0, min(1.0, relevance))
196
+
197
+
198
+ def is_post_seen(post_id: str) -> bool:
199
+ """Check if a post_id is already in the cache."""
200
+ conn = _get_conn()
201
+ row = conn.execute(
202
+ "SELECT 1 FROM seen_posts WHERE post_id = ?", (post_id,)
203
+ ).fetchone()
204
+ return row is not None
205
+
206
+
207
+ def cache_post(
208
+ post_id: str,
209
+ subreddit: str,
210
+ title: str,
211
+ score: int,
212
+ num_comments: int,
213
+ relevance_score: float,
214
+ venture: str = "",
215
+ fingerprint: str = "",
216
+ canonical_url: str = "",
217
+ ) -> bool:
218
+ """Insert a new post into the cache. Returns True if inserted (new), False if already exists."""
219
+ conn = _get_conn()
220
+ now = datetime.now(timezone.utc).isoformat()
221
+ try:
222
+ conn.execute(
223
+ """INSERT INTO seen_posts
224
+ (post_id, subreddit, title, score, num_comments,
225
+ first_seen, last_seen, relevance_score, venture,
226
+ fingerprint, canonical_url)
227
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)""",
228
+ (post_id, subreddit, title, score, num_comments,
229
+ now, now, relevance_score, venture, fingerprint, canonical_url),
230
+ )
231
+ conn.commit()
232
+ return True
233
+ except sqlite3.IntegrityError:
234
+ # Already exists -- update last_seen and score
235
+ conn.execute(
236
+ """UPDATE seen_posts
237
+ SET last_seen = ?, score = ?, num_comments = ?
238
+ WHERE post_id = ?""",
239
+ (now, score, num_comments, post_id),
240
+ )
241
+ conn.commit()
242
+ return False
243
+
244
+
245
+ def mark_actioned(post_id: str) -> None:
246
+ """Mark a post as actioned (won't be returned in future scans)."""
247
+ conn = _get_conn()
248
+ conn.execute(
249
+ "UPDATE seen_posts SET actioned = 1 WHERE post_id = ?", (post_id,)
250
+ )
251
+ conn.commit()
252
+
253
+
254
+ def update_scan_meta(subreddit: str, posts_seen: int, posts_new: int, high_water_mark: str = "") -> None:
255
+ """Record scan metadata for a subreddit."""
256
+ conn = _get_conn()
257
+ now = datetime.now(timezone.utc).isoformat()
258
+ conn.execute(
259
+ """INSERT INTO scan_meta (subreddit, last_scan, high_water_mark, posts_seen, posts_new)
260
+ VALUES (?, ?, ?, ?, ?)
261
+ ON CONFLICT(subreddit) DO UPDATE SET
262
+ last_scan = excluded.last_scan,
263
+ high_water_mark = CASE
264
+ WHEN excluded.high_water_mark != '' THEN excluded.high_water_mark
265
+ ELSE scan_meta.high_water_mark
266
+ END,
267
+ posts_seen = excluded.posts_seen,
268
+ posts_new = excluded.posts_new""",
269
+ (subreddit, now, high_water_mark, posts_seen, posts_new),
270
+ )
271
+ conn.commit()
272
+
273
+
274
+ def get_scan_stats() -> Dict[str, Any]:
275
+ """Get aggregate cache statistics."""
276
+ conn = _get_conn()
277
+ total = conn.execute("SELECT COUNT(*) FROM seen_posts").fetchone()[0]
278
+ actioned = conn.execute("SELECT COUNT(*) FROM seen_posts WHERE actioned = 1").fetchone()[0]
279
+ high_relevance = conn.execute(
280
+ "SELECT COUNT(*) FROM seen_posts WHERE relevance_score > 0.8"
281
+ ).fetchone()[0]
282
+ medium_relevance = conn.execute(
283
+ "SELECT COUNT(*) FROM seen_posts WHERE relevance_score > 0.3 AND relevance_score <= 0.8"
284
+ ).fetchone()[0]
285
+ low_relevance = conn.execute(
286
+ "SELECT COUNT(*) FROM seen_posts WHERE relevance_score <= 0.3"
287
+ ).fetchone()[0]
288
+
289
+ subreddit_counts = {}
290
+ for row in conn.execute(
291
+ "SELECT subreddit, COUNT(*) as cnt FROM seen_posts GROUP BY subreddit ORDER BY cnt DESC LIMIT 10"
292
+ ):
293
+ subreddit_counts[row["subreddit"]] = row["cnt"]
294
+
295
+ return {
296
+ "total_cached": total,
297
+ "actioned": actioned,
298
+ "high_relevance": high_relevance,
299
+ "medium_relevance": medium_relevance,
300
+ "low_relevance": low_relevance,
301
+ "top_subreddits": subreddit_counts,
302
+ }
303
+
304
+
305
+ def get_high_priority_posts(min_score: float = 0.8, limit: int = 20) -> List[Dict]:
306
+ """Get high-priority posts that haven't been actioned yet."""
307
+ conn = _get_conn()
308
+ rows = conn.execute(
309
+ """SELECT post_id, subreddit, title, score, num_comments,
310
+ relevance_score, venture, fingerprint, canonical_url, first_seen
311
+ FROM seen_posts
312
+ WHERE relevance_score >= ? AND actioned = 0
313
+ ORDER BY relevance_score DESC, score DESC
314
+ LIMIT ?""",
315
+ (min_score, limit),
316
+ ).fetchall()
317
+ return [dict(row) for row in rows]
318
+
319
+
320
+ def prune_old_posts(days: int = 30) -> int:
321
+ """Remove posts older than N days that were never actioned. Returns count removed."""
322
+ conn = _get_conn()
323
+ from datetime import timedelta
324
+ cutoff = (datetime.now(timezone.utc) - timedelta(days=days)).isoformat()
325
+ cursor = conn.execute(
326
+ "DELETE FROM seen_posts WHERE actioned = 0 AND first_seen < ?",
327
+ (cutoff,),
328
+ )
329
+ conn.commit()
330
+ return cursor.rowcount
331
+
332
+
333
+ def close_connection() -> None:
334
+ """Close the thread-local connection if open."""
335
+ conn = getattr(_local, "conn", None)
336
+ if conn is not None:
337
+ try:
338
+ conn.close()
339
+ except Exception:
340
+ pass
341
+ _local.conn = None