delimit-cli 4.1.43 → 4.1.44

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -26,7 +26,7 @@ logger = logging.getLogger("delimit.ai.reddit_scanner")
26
26
  # ---------------------------------------------------------------------------
27
27
 
28
28
  SCAN_GROUPS: Dict[str, List[str]] = {
29
- "delimit_core": ["ClaudeAI", "vibecoding", "cursor", "AI_Agents"],
29
+ "delimit_core": ["ClaudeAI", "vibecoding", "cursor", "AI_Agents", "ObsidianMD"],
30
30
  "delimit_adjacent": ["devops", "programming", "ContextEngineering", "LocalLLaMA", "MachineLearning"],
31
31
  "domainvested": ["Domains", "Entrepreneur", "SideProject", "flipping"],
32
32
  "wirereport": ["sportsbook", "sportsbetting"],
@@ -560,3 +560,47 @@ def _save_scan(result: Dict[str, Any], scan_time: datetime) -> Path:
560
560
  path.write_text(json.dumps(result, indent=2, default=str))
561
561
  logger.info("Scan saved to %s", path)
562
562
  return path
563
+
564
+
565
+ def fetch_thread(thread_id: str, *, proxy_url: str = PROXY_URL) -> Optional[Dict[str, Any]]:
566
+ """Fetch a single Reddit thread by ID via the residential proxy."""
567
+ import urllib.parse
568
+ import urllib.request
569
+ reddit_url = f"https://www.reddit.com/comments/{thread_id}.json?raw_json=1"
570
+ fetch_url = f"{proxy_url}?url={urllib.parse.quote(reddit_url, safe='')}"
571
+
572
+ req = urllib.request.Request(
573
+ fetch_url,
574
+ headers={"User-Agent": "delimit-scanner/1.0", "Accept": "application/json"},
575
+ )
576
+
577
+ try:
578
+ with urllib.request.urlopen(req, timeout=15) as resp:
579
+ data = json.loads(resp.read().decode())
580
+ if isinstance(data, list) and len(data) > 0:
581
+ post_data = data[0].get("data", {}).get("children", [{}])[0].get("data", {})
582
+ if post_data:
583
+ return {
584
+ "id": post_data.get("id", ""),
585
+ "title": post_data.get("title", ""),
586
+ "author": post_data.get("author", ""),
587
+ "score": post_data.get("score", 0),
588
+ "num_comments": post_data.get("num_comments", 0),
589
+ "subreddit": post_data.get("subreddit", ""),
590
+ "permalink": post_data.get("permalink", ""),
591
+ "selftext": post_data.get("selftext", ""),
592
+ "created_utc": post_data.get("created_utc", 0),
593
+ }
594
+ except Exception as exc:
595
+ logger.warning("Failed to fetch thread %s: %s", thread_id, exc)
596
+ return None
597
+
598
+
599
+ def monitor_user_engagement(username: str = "delimitdev") -> list:
600
+ """Monitor engagement on posts by a Reddit user (LED-300).
601
+
602
+ Checks recent posts/comments by the user for new replies, upvotes,
603
+ and engagement signals. Returns a list of alert dicts.
604
+ """
605
+ # Stub — full implementation requires residential proxy + Playwright (LED-248)
606
+ return []
@@ -22,7 +22,7 @@ logger = logging.getLogger("delimit.ai.screen_record")
22
22
 
23
23
  # ── Constants ────────────────────────────────────────────────────────────
24
24
 
25
- CHROMIUM_PATH = os.environ.get("CHROMIUM_PATH", "chromium")
25
+ CHROMIUM_PATH = "/root/.cache/puppeteer/chrome/linux-146.0.7680.153/chrome-linux64/chrome"
26
26
  CONTENT_BASE = Path.home() / ".delimit" / "content"
27
27
  VIDEOS_DIR = CONTENT_BASE / "videos"
28
28
  GIFS_DIR = CONTENT_BASE / "gifs"
@@ -124,10 +124,14 @@ def list_secrets() -> List[Dict]:
124
124
  return []
125
125
  secrets = []
126
126
  for f in sorted(SECRETS_DIR.glob("*.json")):
127
+ if f.name.startswith("_"):
128
+ continue # skip internal files like _access_log.json
127
129
  try:
128
130
  s = json.loads(f.read_text())
131
+ if not isinstance(s, dict):
132
+ continue
129
133
  secrets.append({
130
- "name": s["name"],
134
+ "name": s.get("name", f.stem),
131
135
  "scope": s.get("scope", "all"),
132
136
  "description": s.get("description", ""),
133
137
  "created_by": s.get("created_by", ""),
@@ -0,0 +1,341 @@
1
+ """SQLite-based caching and dedup layer for social sensing.
2
+
3
+ Provides:
4
+ - seen_posts table: dedup + relevance scoring for Reddit (and future platforms)
5
+ - scan_meta table: per-subreddit scan timestamps and high-water marks
6
+ - Relevance scoring with keyword/subreddit boosting
7
+ - Lazy DB creation on first use (thread-safe)
8
+
9
+ Cache location: ~/.delimit/social_cache.db
10
+ """
11
+
12
+ import logging
13
+ import os
14
+ import re
15
+ import sqlite3
16
+ import threading
17
+ from datetime import datetime, timezone
18
+ from pathlib import Path
19
+ from typing import Any, Dict, List, Optional, Tuple
20
+
21
+ logger = logging.getLogger("delimit.ai.social_cache")
22
+
23
+ CACHE_DB_PATH = Path.home() / ".delimit" / "social_cache.db"
24
+
25
+ # Thread-local storage for SQLite connections (sqlite3 objects are not
26
+ # safe to share across threads).
27
+ _local = threading.local()
28
+ _init_lock = threading.Lock()
29
+ _db_initialized = False
30
+
31
+
32
+ # ── Relevance keywords and weights ─────────────────────────────────────
33
+
34
+ # High-value keywords strongly associated with Delimit's core domain
35
+ RELEVANCE_KEYWORDS_HIGH: Dict[str, float] = {
36
+ "openapi": 0.35,
37
+ "swagger": 0.30,
38
+ "breaking change": 0.40,
39
+ "breaking changes": 0.40,
40
+ "api governance": 0.45,
41
+ "api contract": 0.40,
42
+ "api contracts": 0.40,
43
+ "api versioning": 0.35,
44
+ "semver": 0.35,
45
+ "mcp server": 0.30,
46
+ "mcp tool": 0.30,
47
+ "model context protocol": 0.30,
48
+ }
49
+
50
+ # Medium-value keywords: AI coding tools, adjacent territory
51
+ RELEVANCE_KEYWORDS_MED: Dict[str, float] = {
52
+ "claude code": 0.25,
53
+ "codex": 0.20,
54
+ "gemini cli": 0.25,
55
+ "cursor": 0.15,
56
+ "api diff": 0.30,
57
+ "api lint": 0.30,
58
+ "api migration": 0.25,
59
+ "schema validation": 0.20,
60
+ "backward compatible": 0.25,
61
+ "backwards compatible": 0.25,
62
+ "backward compatibility": 0.25,
63
+ "backwards compatibility": 0.25,
64
+ }
65
+
66
+ # Subreddit relevance boosts
67
+ SUBREDDIT_BOOSTS: Dict[str, float] = {
68
+ "claudeai": 0.20,
69
+ "chatgptcoding": 0.20,
70
+ "devops": 0.15,
71
+ "webdev": 0.10,
72
+ "experienceddevs": 0.15,
73
+ "programming": 0.05,
74
+ "vibecoding": 0.15,
75
+ "ai_agents": 0.15,
76
+ "contextengineering": 0.20,
77
+ }
78
+
79
+ # Subreddits that get penalized unless they mention dev tools
80
+ GENERIC_SUBREDDITS: set = {
81
+ "entrepreneur", "startups", "sideproject", "saas",
82
+ }
83
+
84
+ DEV_TOOL_TERMS: set = {
85
+ "api", "developer", "dev tool", "devtool", "sdk", "cli",
86
+ "cicd", "ci/cd", "pipeline", "openapi", "swagger", "github action",
87
+ }
88
+
89
+
90
+ def _get_conn() -> sqlite3.Connection:
91
+ """Get a thread-local SQLite connection, creating the DB lazily."""
92
+ conn = getattr(_local, "conn", None)
93
+ if conn is not None:
94
+ return conn
95
+
96
+ global _db_initialized
97
+ CACHE_DB_PATH.parent.mkdir(parents=True, exist_ok=True)
98
+
99
+ conn = sqlite3.connect(str(CACHE_DB_PATH), timeout=10)
100
+ conn.row_factory = sqlite3.Row
101
+ conn.execute("PRAGMA journal_mode=WAL")
102
+ conn.execute("PRAGMA synchronous=NORMAL")
103
+
104
+ # Lazy schema creation (idempotent)
105
+ with _init_lock:
106
+ if not _db_initialized:
107
+ _create_schema(conn)
108
+ _db_initialized = True
109
+
110
+ _local.conn = conn
111
+ return conn
112
+
113
+
114
+ def _create_schema(conn: sqlite3.Connection) -> None:
115
+ """Create tables if they don't exist."""
116
+ conn.executescript("""
117
+ CREATE TABLE IF NOT EXISTS seen_posts (
118
+ post_id TEXT PRIMARY KEY,
119
+ subreddit TEXT NOT NULL DEFAULT '',
120
+ title TEXT NOT NULL DEFAULT '',
121
+ score INTEGER NOT NULL DEFAULT 0,
122
+ num_comments INTEGER NOT NULL DEFAULT 0,
123
+ first_seen TEXT NOT NULL,
124
+ last_seen TEXT NOT NULL,
125
+ relevance_score REAL NOT NULL DEFAULT 0.0,
126
+ actioned INTEGER NOT NULL DEFAULT 0,
127
+ venture TEXT NOT NULL DEFAULT '',
128
+ fingerprint TEXT NOT NULL DEFAULT '',
129
+ canonical_url TEXT NOT NULL DEFAULT ''
130
+ );
131
+
132
+ CREATE INDEX IF NOT EXISTS idx_seen_posts_subreddit
133
+ ON seen_posts(subreddit);
134
+ CREATE INDEX IF NOT EXISTS idx_seen_posts_relevance
135
+ ON seen_posts(relevance_score);
136
+ CREATE INDEX IF NOT EXISTS idx_seen_posts_first_seen
137
+ ON seen_posts(first_seen);
138
+
139
+ CREATE TABLE IF NOT EXISTS scan_meta (
140
+ subreddit TEXT PRIMARY KEY,
141
+ last_scan TEXT NOT NULL,
142
+ high_water_mark TEXT NOT NULL DEFAULT '',
143
+ posts_seen INTEGER NOT NULL DEFAULT 0,
144
+ posts_new INTEGER NOT NULL DEFAULT 0
145
+ );
146
+ """)
147
+ conn.commit()
148
+
149
+
150
+ def compute_relevance_score(
151
+ title: str,
152
+ body: str,
153
+ subreddit: str,
154
+ score: int = 0,
155
+ num_comments: int = 0,
156
+ ) -> float:
157
+ """Compute a 0.0-1.0 relevance score for a Reddit post.
158
+
159
+ Scoring layers:
160
+ 1. Keyword matching (high + medium value terms)
161
+ 2. Subreddit boost/penalty
162
+ 3. Engagement signal (mild boost for proven discussion)
163
+ """
164
+ text_lower = f"{title} {body}".lower()
165
+ sub_lower = subreddit.lower().lstrip("r/")
166
+
167
+ relevance = 0.0
168
+
169
+ # Layer 1: keyword matching
170
+ for keyword, weight in RELEVANCE_KEYWORDS_HIGH.items():
171
+ if keyword in text_lower:
172
+ relevance += weight
173
+
174
+ for keyword, weight in RELEVANCE_KEYWORDS_MED.items():
175
+ if keyword in text_lower:
176
+ relevance += weight
177
+
178
+ # Layer 2: subreddit boost
179
+ boost = SUBREDDIT_BOOSTS.get(sub_lower, 0.0)
180
+ relevance += boost
181
+
182
+ # Penalty for generic subreddits without dev tool mentions
183
+ if sub_lower in GENERIC_SUBREDDITS:
184
+ has_dev_term = any(term in text_lower for term in DEV_TOOL_TERMS)
185
+ if not has_dev_term:
186
+ relevance -= 0.20
187
+
188
+ # Layer 3: engagement signal (mild, caps at +0.10)
189
+ if score > 10 or num_comments > 5:
190
+ relevance += 0.05
191
+ if score > 50 or num_comments > 20:
192
+ relevance += 0.05
193
+
194
+ # Clamp to [0.0, 1.0]
195
+ return max(0.0, min(1.0, relevance))
196
+
197
+
198
+ def is_post_seen(post_id: str) -> bool:
199
+ """Check if a post_id is already in the cache."""
200
+ conn = _get_conn()
201
+ row = conn.execute(
202
+ "SELECT 1 FROM seen_posts WHERE post_id = ?", (post_id,)
203
+ ).fetchone()
204
+ return row is not None
205
+
206
+
207
+ def cache_post(
208
+ post_id: str,
209
+ subreddit: str,
210
+ title: str,
211
+ score: int,
212
+ num_comments: int,
213
+ relevance_score: float,
214
+ venture: str = "",
215
+ fingerprint: str = "",
216
+ canonical_url: str = "",
217
+ ) -> bool:
218
+ """Insert a new post into the cache. Returns True if inserted (new), False if already exists."""
219
+ conn = _get_conn()
220
+ now = datetime.now(timezone.utc).isoformat()
221
+ try:
222
+ conn.execute(
223
+ """INSERT INTO seen_posts
224
+ (post_id, subreddit, title, score, num_comments,
225
+ first_seen, last_seen, relevance_score, venture,
226
+ fingerprint, canonical_url)
227
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)""",
228
+ (post_id, subreddit, title, score, num_comments,
229
+ now, now, relevance_score, venture, fingerprint, canonical_url),
230
+ )
231
+ conn.commit()
232
+ return True
233
+ except sqlite3.IntegrityError:
234
+ # Already exists -- update last_seen and score
235
+ conn.execute(
236
+ """UPDATE seen_posts
237
+ SET last_seen = ?, score = ?, num_comments = ?
238
+ WHERE post_id = ?""",
239
+ (now, score, num_comments, post_id),
240
+ )
241
+ conn.commit()
242
+ return False
243
+
244
+
245
+ def mark_actioned(post_id: str) -> None:
246
+ """Mark a post as actioned (won't be returned in future scans)."""
247
+ conn = _get_conn()
248
+ conn.execute(
249
+ "UPDATE seen_posts SET actioned = 1 WHERE post_id = ?", (post_id,)
250
+ )
251
+ conn.commit()
252
+
253
+
254
+ def update_scan_meta(subreddit: str, posts_seen: int, posts_new: int, high_water_mark: str = "") -> None:
255
+ """Record scan metadata for a subreddit."""
256
+ conn = _get_conn()
257
+ now = datetime.now(timezone.utc).isoformat()
258
+ conn.execute(
259
+ """INSERT INTO scan_meta (subreddit, last_scan, high_water_mark, posts_seen, posts_new)
260
+ VALUES (?, ?, ?, ?, ?)
261
+ ON CONFLICT(subreddit) DO UPDATE SET
262
+ last_scan = excluded.last_scan,
263
+ high_water_mark = CASE
264
+ WHEN excluded.high_water_mark != '' THEN excluded.high_water_mark
265
+ ELSE scan_meta.high_water_mark
266
+ END,
267
+ posts_seen = excluded.posts_seen,
268
+ posts_new = excluded.posts_new""",
269
+ (subreddit, now, high_water_mark, posts_seen, posts_new),
270
+ )
271
+ conn.commit()
272
+
273
+
274
+ def get_scan_stats() -> Dict[str, Any]:
275
+ """Get aggregate cache statistics."""
276
+ conn = _get_conn()
277
+ total = conn.execute("SELECT COUNT(*) FROM seen_posts").fetchone()[0]
278
+ actioned = conn.execute("SELECT COUNT(*) FROM seen_posts WHERE actioned = 1").fetchone()[0]
279
+ high_relevance = conn.execute(
280
+ "SELECT COUNT(*) FROM seen_posts WHERE relevance_score > 0.8"
281
+ ).fetchone()[0]
282
+ medium_relevance = conn.execute(
283
+ "SELECT COUNT(*) FROM seen_posts WHERE relevance_score > 0.3 AND relevance_score <= 0.8"
284
+ ).fetchone()[0]
285
+ low_relevance = conn.execute(
286
+ "SELECT COUNT(*) FROM seen_posts WHERE relevance_score <= 0.3"
287
+ ).fetchone()[0]
288
+
289
+ subreddit_counts = {}
290
+ for row in conn.execute(
291
+ "SELECT subreddit, COUNT(*) as cnt FROM seen_posts GROUP BY subreddit ORDER BY cnt DESC LIMIT 10"
292
+ ):
293
+ subreddit_counts[row["subreddit"]] = row["cnt"]
294
+
295
+ return {
296
+ "total_cached": total,
297
+ "actioned": actioned,
298
+ "high_relevance": high_relevance,
299
+ "medium_relevance": medium_relevance,
300
+ "low_relevance": low_relevance,
301
+ "top_subreddits": subreddit_counts,
302
+ }
303
+
304
+
305
+ def get_high_priority_posts(min_score: float = 0.8, limit: int = 20) -> List[Dict]:
306
+ """Get high-priority posts that haven't been actioned yet."""
307
+ conn = _get_conn()
308
+ rows = conn.execute(
309
+ """SELECT post_id, subreddit, title, score, num_comments,
310
+ relevance_score, venture, fingerprint, canonical_url, first_seen
311
+ FROM seen_posts
312
+ WHERE relevance_score >= ? AND actioned = 0
313
+ ORDER BY relevance_score DESC, score DESC
314
+ LIMIT ?""",
315
+ (min_score, limit),
316
+ ).fetchall()
317
+ return [dict(row) for row in rows]
318
+
319
+
320
+ def prune_old_posts(days: int = 30) -> int:
321
+ """Remove posts older than N days that were never actioned. Returns count removed."""
322
+ conn = _get_conn()
323
+ from datetime import timedelta
324
+ cutoff = (datetime.now(timezone.utc) - timedelta(days=days)).isoformat()
325
+ cursor = conn.execute(
326
+ "DELETE FROM seen_posts WHERE actioned = 0 AND first_seen < ?",
327
+ (cutoff,),
328
+ )
329
+ conn.commit()
330
+ return cursor.rowcount
331
+
332
+
333
+ def close_connection() -> None:
334
+ """Close the thread-local connection if open."""
335
+ conn = getattr(_local, "conn", None)
336
+ if conn is not None:
337
+ try:
338
+ conn.close()
339
+ except Exception:
340
+ pass
341
+ _local.conn = None
@@ -19,6 +19,13 @@ from typing import Any, Dict, List, Optional
19
19
 
20
20
  logger = logging.getLogger("delimit.ai.social_daemon")
21
21
 
22
+ # ── Vertex AI credentials (prefer ADC from gcloud auth) ─────────────
23
+ _adc_path = str(Path.home() / ".config" / "gcloud" / "application_default_credentials.json")
24
+ if not os.environ.get("GOOGLE_APPLICATION_CREDENTIALS") and os.path.exists(_adc_path):
25
+ os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = _adc_path
26
+ if not os.environ.get("GOOGLE_CLOUD_PROJECT"):
27
+ os.environ["GOOGLE_CLOUD_PROJECT"] = "jamsons"
28
+
22
29
  # ── Configuration ────────────────────────────────────────────────────
23
30
  # Default to 15 minutes (900 seconds)
24
31
  SCAN_INTERVAL = int(os.environ.get("DELIMIT_SOCIAL_SCAN_INTERVAL", "900"))
@@ -205,26 +212,50 @@ def _build_compact_summary(targets: List[Dict], processed: Dict) -> Dict[str, An
205
212
  }
206
213
 
207
214
 
215
+ _scan_digest_count_today: int = 0
216
+ _scan_digest_last_date: str = ""
217
+ _SCAN_DIGEST_MAX_PER_DAY = 4 # Max scan digest emails per day
218
+
219
+
208
220
  def _send_scan_digest(compact: Dict, processed: Dict) -> None:
209
221
  """Send a digest email summarizing the scan results.
210
222
 
211
- Only sends if there are new high-priority targets, new drafts, or new ledger items.
223
+ Only sends if there are REAL actionable items (ready drafts, not placeholders).
212
224
  Suppresses digest if nothing actionable to avoid email fatigue.
225
+ Capped at 4 per day to prevent inbox flooding.
213
226
  """
227
+ global _scan_digest_count_today, _scan_digest_last_date
214
228
  try:
215
229
  from ai.notify import send_email
216
230
 
217
231
  s = compact.get("summary", {})
218
232
  high = s.get("high_priority", 0)
219
- drafted = s.get("drafted", 0)
220
233
  ledger_items = s.get("ledger_items", 0)
221
234
  total = s.get("total_new_targets", 0)
222
235
  platforms = s.get("platform_breakdown", {})
223
- owner_actions = len(processed.get("owner_actions", []))
224
236
 
225
- # Only send if there's something actionable
226
- if high == 0 and drafted == 0 and ledger_items == 0 and owner_actions == 0:
237
+ # Count only REAL owner actions (not placeholder drafts)
238
+ owner_actions = [a for a in processed.get("owner_actions", []) if a.get("draft_id")]
239
+ real_owner_actions = len(owner_actions)
240
+
241
+ # Count ready drafts only (not placeholders that failed quality check)
242
+ real_drafted = len([d for d in processed.get("drafted", [])
243
+ if not d.get("suppressed_reason") and not d.get("deduped")])
244
+
245
+ # Only send if there's something genuinely actionable
246
+ if high == 0 and real_drafted == 0 and ledger_items == 0 and real_owner_actions == 0:
247
+ return
248
+
249
+ # Daily cap — reset counter at midnight
250
+ today = datetime.now(timezone.utc).strftime("%Y-%m-%d")
251
+ if today != _scan_digest_last_date:
252
+ _scan_digest_count_today = 0
253
+ _scan_digest_last_date = today
254
+ if _scan_digest_count_today >= _SCAN_DIGEST_MAX_PER_DAY:
255
+ logger.info("Scan digest daily cap reached (%d/%d). Suppressing.",
256
+ _scan_digest_count_today, _SCAN_DIGEST_MAX_PER_DAY)
227
257
  return
258
+ _scan_digest_count_today += 1
228
259
 
229
260
  lines = []
230
261
  lines.append(f"Social scan found {total} new targets across {platforms}.")
@@ -241,8 +272,8 @@ def _send_scan_digest(compact: Dict, processed: Dict) -> None:
241
272
  lines.append(f" {url}")
242
273
  lines.append("")
243
274
 
244
- if drafted > 0:
245
- lines.append(f"DRAFTS: {drafted} reply drafts created")
275
+ if real_drafted > 0:
276
+ lines.append(f"DRAFTS: {real_drafted} ready drafts (quality-checked)")
246
277
  lines.append("")
247
278
  # Include actual draft text for ready drafts
248
279
  for action in processed.get("owner_actions", []):
@@ -273,8 +304,8 @@ def _send_scan_digest(compact: Dict, processed: Dict) -> None:
273
304
  lines.append(f"LEDGER: {ledger_items} items added to project ledger")
274
305
  lines.append("")
275
306
 
276
- if owner_actions > 0:
277
- lines.append(f"ACTIONS: {owner_actions} items need your review")
307
+ if real_owner_actions > 0:
308
+ lines.append(f"ACTIONS: {real_owner_actions} items need your review")
278
309
  lines.append("")
279
310
 
280
311
  cache = compact.get("cache_stats", {})
@@ -283,7 +314,7 @@ def _send_scan_digest(compact: Dict, processed: Dict) -> None:
283
314
 
284
315
  send_email(
285
316
  message="\n".join(lines),
286
- subject=f"[SOCIAL] {high} high-pri, {drafted} drafts, {total} targets",
317
+ subject=f"[SOCIAL] {high} high-pri, {real_drafted} ready drafts, {real_owner_actions} actions",
287
318
  event_type="social_digest",
288
319
  )
289
320
  except Exception as e: