delimit-cli 4.1.52 → 4.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,483 +0,0 @@
1
- """
2
- Social sensing daemon for Delimit.
3
-
4
- Runs social discovery scans (X, Reddit, GitHub, Dev.to) on a regular interval.
5
- Deduplicates findings via SQLite cache and emits HTML draft emails for human approval.
6
- Also monitors for direct replies to owned posts (LED-300).
7
-
8
- Consensus 123: Part of the continuous sensing loop.
9
- """
10
-
11
- import json
12
- import logging
13
- import os
14
- import threading
15
- import time
16
- from datetime import datetime, timezone
17
- from pathlib import Path
18
- from typing import Any, Dict, List, Optional
19
-
20
- logger = logging.getLogger("delimit.ai.social_daemon")
21
-
22
- # ── Vertex AI credentials (prefer ADC from gcloud auth) ─────────────
23
- _adc_path = str(Path.home() / ".config" / "gcloud" / "application_default_credentials.json")
24
- if not os.environ.get("GOOGLE_APPLICATION_CREDENTIALS") and os.path.exists(_adc_path):
25
- os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = _adc_path
26
- if not os.environ.get("GOOGLE_CLOUD_PROJECT"):
27
- os.environ["GOOGLE_CLOUD_PROJECT"] = "jamsons"
28
-
29
- # ── Configuration ────────────────────────────────────────────────────
30
- # Default to 15 minutes (900 seconds)
31
- SCAN_INTERVAL = int(os.environ.get("DELIMIT_SOCIAL_SCAN_INTERVAL", "900"))
32
- MAX_CONSECUTIVE_FAILURES = 3
33
-
34
- # Retry config: exponential backoff (5s, 15s, 45s)
35
- RETRY_DELAYS = [5, 15, 45]
36
- MAX_RETRIES = len(RETRY_DELAYS)
37
-
38
- ALERTS_DIR = Path.home() / ".delimit" / "alerts"
39
- ALERT_FILE = ALERTS_DIR / "social_daemon.json"
40
- DAEMON_STATE = Path.home() / ".delimit" / "social_daemon_state.json"
41
- OWNER_ACTION_SUMMARY = Path.home() / ".delimit" / "owner_action_summary.json"
42
-
43
- class SocialDaemonState:
44
- """Thread-safe state for the social sensing daemon."""
45
-
46
- def __init__(self):
47
- self.running = False
48
- self.last_scan: Optional[str] = None
49
- self.targets_found: int = 0
50
- self.consecutive_failures: int = 0
51
- self.total_scans: int = 0
52
- self.stopped_reason: Optional[str] = None
53
- self._lock = threading.Lock()
54
- self._thread: Optional[threading.Thread] = None
55
- self._stop_event = threading.Event()
56
- # Scan stats for compact output
57
- self.last_scan_stats: Optional[Dict[str, Any]] = None
58
-
59
- def to_dict(self) -> Dict[str, Any]:
60
- with self._lock:
61
- result = {
62
- "running": self.running,
63
- "last_scan": self.last_scan,
64
- "targets_found": self.targets_found,
65
- "consecutive_failures": self.consecutive_failures,
66
- "total_scans": self.total_scans,
67
- "stopped_reason": self.stopped_reason,
68
- "scan_interval_seconds": SCAN_INTERVAL,
69
- }
70
- if self.last_scan_stats:
71
- result["last_scan_stats"] = self.last_scan_stats
72
- return result
73
-
74
- def record_success(self, found: int, stats: Optional[Dict[str, Any]] = None):
75
- with self._lock:
76
- self.consecutive_failures = 0
77
- self.targets_found += found
78
- self.total_scans += 1
79
- self.last_scan = datetime.now(timezone.utc).isoformat()
80
- if stats:
81
- self.last_scan_stats = stats
82
-
83
- def record_failure(self) -> int:
84
- with self._lock:
85
- self.consecutive_failures += 1
86
- self.total_scans += 1
87
- self.last_scan = datetime.now(timezone.utc).isoformat()
88
- return self.consecutive_failures
89
-
90
- _daemon_state = SocialDaemonState()
91
-
92
-
93
- def _scan_with_retry() -> Dict[str, Any]:
94
- """Execute scan_targets with exponential backoff retry on failure.
95
-
96
- Retries up to MAX_RETRIES times with delays of 5s, 15s, 45s.
97
- """
98
- from ai.social_target import scan_targets, process_targets
99
-
100
- last_error = None
101
- for attempt in range(MAX_RETRIES + 1):
102
- try:
103
- targets = []
104
- # Use broad reddit_scanner (scans 25+ subreddits with relevance scoring)
105
- try:
106
- from ai.reddit_scanner import scan_all
107
- reddit_result = scan_all(sort="hot", limit_per_sub=10)
108
- reddit_targets = reddit_result.get("targets", [])
109
- # Sort by engagement (score + comments), take top 5 for drafts
110
- MAX_REDDIT_DRAFTS = 5
111
- eligible = [rt for rt in reddit_targets if rt.get("priority") in ("high", "medium")]
112
- eligible.sort(key=lambda t: (t.get("score", 0) or 0) + (t.get("num_comments", 0) or 0) * 2, reverse=True)
113
- top_ids = set(id(rt) for rt in eligible[:MAX_REDDIT_DRAFTS])
114
- for rt in reddit_targets:
115
- if id(rt) in top_ids:
116
- rt.setdefault("classification", "reply")
117
- else:
118
- rt.setdefault("classification", "skip")
119
- rt.setdefault("platform", "reddit")
120
- rt.setdefault("venture", "delimit")
121
- rt.setdefault("fingerprint", f"reddit:{rt.get('id', '')}")
122
- targets.extend(reddit_targets)
123
- logger.info("Reddit broad scan: %d targets", len(reddit_targets))
124
- except Exception as reddit_err:
125
- logger.warning("Reddit broad scan failed: %s", reddit_err)
126
-
127
- # Also run venture-based scan for non-Reddit platforms
128
- try:
129
- other_targets = scan_targets(platforms=["x", "github", "hn", "devto"])
130
- targets.extend(other_targets)
131
- except Exception as other_err:
132
- logger.warning("Venture scan failed: %s", other_err)
133
-
134
- return {"targets": targets, "attempt": attempt + 1}
135
- except Exception as e:
136
- last_error = e
137
- if attempt < MAX_RETRIES:
138
- delay = RETRY_DELAYS[attempt]
139
- logger.warning(
140
- "Scan attempt %d/%d failed: %s. Retrying in %ds...",
141
- attempt + 1, MAX_RETRIES + 1, e, delay,
142
- )
143
- # Use stop_event.wait so we can be interrupted during retry sleep
144
- if _daemon_state._stop_event.wait(timeout=delay):
145
- # Daemon was stopped during retry
146
- raise
147
- else:
148
- logger.error(
149
- "All %d scan attempts failed. Last error: %s",
150
- MAX_RETRIES + 1, e,
151
- )
152
- raise
153
-
154
-
155
- def _build_compact_summary(targets: List[Dict], processed: Dict) -> Dict[str, Any]:
156
- """Build a compact scan summary instead of returning all 322 posts.
157
-
158
- Returns summary counts + only new high-priority posts.
159
- """
160
- # Separate high-priority from regular targets
161
- high_priority = [
162
- t for t in targets
163
- if not t.get("error")
164
- and t.get("relevance_score", 0) > 0.8
165
- ]
166
- medium_priority = [
167
- t for t in targets
168
- if not t.get("error")
169
- and 0.3 < t.get("relevance_score", 0) <= 0.8
170
- ]
171
- auto_ledger = [t for t in targets if t.get("auto_ledger")]
172
-
173
- # Platform breakdown
174
- platform_counts: Dict[str, int] = {}
175
- for t in targets:
176
- if not t.get("error"):
177
- p = t.get("platform", "unknown")
178
- platform_counts[p] = platform_counts.get(p, 0) + 1
179
-
180
- # Get cache stats if available
181
- cache_stats = {}
182
- try:
183
- from ai.social_cache import get_scan_stats
184
- cache_stats = get_scan_stats()
185
- except Exception:
186
- pass
187
-
188
- return {
189
- "summary": {
190
- "total_new_targets": len([t for t in targets if not t.get("error")]),
191
- "high_priority": len(high_priority),
192
- "medium_priority": len(medium_priority),
193
- "auto_ledger_flagged": len(auto_ledger),
194
- "platform_breakdown": platform_counts,
195
- "drafted": len(processed.get("drafted", [])),
196
- "ledger_items": len(processed.get("ledger_items", [])),
197
- "owner_actions": len(processed.get("owner_actions", [])),
198
- },
199
- "high_priority_targets": [
200
- {
201
- "fingerprint": t.get("fingerprint"),
202
- "subreddit": t.get("subreddit"),
203
- "post_title": t.get("post_title"),
204
- "relevance_score": t.get("relevance_score"),
205
- "canonical_url": t.get("canonical_url"),
206
- "venture": t.get("venture"),
207
- "auto_ledger": t.get("auto_ledger", False),
208
- }
209
- for t in high_priority[:10] # Cap at 10 for compact output
210
- ],
211
- "cache_stats": cache_stats,
212
- }
213
-
214
-
215
- _scan_digest_count_today: int = 0
216
- _scan_digest_last_date: str = ""
217
- _SCAN_DIGEST_MAX_PER_DAY = 4 # Max scan digest emails per day
218
-
219
-
220
- def _send_scan_digest(compact: Dict, processed: Dict) -> None:
221
- """Send a digest email summarizing the scan results.
222
-
223
- Only sends if there are REAL actionable items (ready drafts, not placeholders).
224
- Suppresses digest if nothing actionable to avoid email fatigue.
225
- Capped at 4 per day to prevent inbox flooding.
226
- """
227
- global _scan_digest_count_today, _scan_digest_last_date
228
- try:
229
- from ai.notify import send_email
230
-
231
- s = compact.get("summary", {})
232
- high = s.get("high_priority", 0)
233
- ledger_items = s.get("ledger_items", 0)
234
- total = s.get("total_new_targets", 0)
235
- platforms = s.get("platform_breakdown", {})
236
-
237
- # Count only REAL owner actions (not placeholder drafts)
238
- owner_actions = [a for a in processed.get("owner_actions", []) if a.get("draft_id")]
239
- real_owner_actions = len(owner_actions)
240
-
241
- # Count ready drafts only (not placeholders that failed quality check)
242
- real_drafted = len([d for d in processed.get("drafted", [])
243
- if not d.get("suppressed_reason") and not d.get("deduped")])
244
-
245
- # Only send if there's something genuinely actionable
246
- if high == 0 and real_drafted == 0 and ledger_items == 0 and real_owner_actions == 0:
247
- return
248
-
249
- # Daily cap — reset counter at midnight
250
- today = datetime.now(timezone.utc).strftime("%Y-%m-%d")
251
- if today != _scan_digest_last_date:
252
- _scan_digest_count_today = 0
253
- _scan_digest_last_date = today
254
- if _scan_digest_count_today >= _SCAN_DIGEST_MAX_PER_DAY:
255
- logger.info("Scan digest daily cap reached (%d/%d). Suppressing.",
256
- _scan_digest_count_today, _SCAN_DIGEST_MAX_PER_DAY)
257
- return
258
- _scan_digest_count_today += 1
259
-
260
- lines = []
261
- lines.append(f"Social scan found {total} new targets across {platforms}.")
262
- lines.append("")
263
-
264
- if high > 0:
265
- lines.append(f"HIGH PRIORITY: {high} targets need attention")
266
- for t in compact.get("high_priority_targets", [])[:5]:
267
- sub = t.get("subreddit", t.get("platform", ""))
268
- title = t.get("title", t.get("text", ""))[:80]
269
- url = t.get("url", t.get("canonical_url", ""))
270
- lines.append(f" [{sub}] {title}")
271
- if url:
272
- lines.append(f" {url}")
273
- lines.append("")
274
-
275
- if real_drafted > 0:
276
- lines.append(f"DRAFTS: {real_drafted} ready drafts (quality-checked)")
277
- lines.append("")
278
- # Include actual draft text for ready drafts
279
- for action in processed.get("owner_actions", []):
280
- draft_id = action.get("draft_id", "")
281
- if not draft_id:
282
- continue
283
- try:
284
- from ai.social import list_drafts
285
- all_drafts = list_drafts(status="pending")
286
- for d in all_drafts:
287
- if d.get("draft_id") == draft_id and d.get("quality") == "ready":
288
- link = action.get("link", "")
289
- platform = action.get("platform", "X")
290
- lines.append(f"--- {platform} DRAFT ---")
291
- if link:
292
- lines.append(f"REPLY TO: {link}")
293
- lines.append(f"WHY: {action.get('summary', '')[:100]}")
294
- lines.append("")
295
- lines.append("COPY THIS:")
296
- lines.append(d.get("text", ""))
297
- lines.append("--- END ---")
298
- lines.append("")
299
- break
300
- except Exception:
301
- pass
302
-
303
- if ledger_items > 0:
304
- lines.append(f"LEDGER: {ledger_items} items added to project ledger")
305
- lines.append("")
306
-
307
- if real_owner_actions > 0:
308
- lines.append(f"ACTIONS: {real_owner_actions} items need your review")
309
- lines.append("")
310
-
311
- cache = compact.get("cache_stats", {})
312
- lines.append(f"Cache: {cache.get('total_cached', 0)} posts tracked, "
313
- f"{cache.get('high_relevance', 0)} high relevance")
314
-
315
- send_email(
316
- message="\n".join(lines),
317
- subject=f"[SOCIAL] {high} high-pri, {real_drafted} ready drafts, {real_owner_actions} actions",
318
- event_type="social_digest",
319
- )
320
- except Exception as e:
321
- logger.warning("Failed to send scan digest email: %s", e)
322
-
323
-
324
- _scan_lock = threading.Lock()
325
-
326
- def scan_once() -> Dict[str, Any]:
327
- """Execute a single social scan cycle and process results (LED-238).
328
-
329
- Uses retry with exponential backoff and returns compact summary.
330
- Thread-safe: only one scan runs at a time.
331
- """
332
- if not _scan_lock.acquire(blocking=False):
333
- return {"error": "Scan already in progress", "skipped": True}
334
- try:
335
- from ai.social_target import process_targets
336
-
337
- # 1. DISCOVER: Scan all platforms (with retry)
338
- scan_result = _scan_with_retry()
339
- targets = scan_result["targets"]
340
- attempt = scan_result["attempt"]
341
- found = len(targets)
342
-
343
- # 2. ORCHESTRATE: Process discovered targets (LED-238)
344
- processed = process_targets(targets, draft_replies=True, create_ledger=True)
345
-
346
- # 3. Build compact summary
347
- compact = _build_compact_summary(targets, processed)
348
-
349
- # 4. Write owner action summary
350
- OWNER_ACTION_SUMMARY.parent.mkdir(parents=True, exist_ok=True)
351
- OWNER_ACTION_SUMMARY.write_text(json.dumps({
352
- "timestamp": datetime.now(timezone.utc).isoformat(),
353
- "targets_found": found,
354
- "scan_attempt": attempt,
355
- "owner_actions": len(processed.get("owner_actions", [])),
356
- "drafted": len(processed.get("drafted", [])),
357
- "ledger_items": len(processed.get("ledger_items", [])),
358
- "strategy_items": len(processed.get("strategy_items", [])),
359
- "compact_summary": compact["summary"],
360
- }, indent=2) + "\n")
361
-
362
- # 5. Log scan stats
363
- s = compact["summary"]
364
- logger.info(
365
- "Scan complete: %d new targets (%d high-pri, %d med-pri, %d auto-ledger) "
366
- "in %d attempt(s). Platforms: %s",
367
- s["total_new_targets"], s["high_priority"], s["medium_priority"],
368
- s["auto_ledger_flagged"], attempt, s["platform_breakdown"],
369
- )
370
-
371
- _daemon_state.record_success(found, stats=compact["summary"])
372
-
373
- # 6. Send digest email if there are actionable items
374
- _send_scan_digest(compact, processed)
375
-
376
- # Return compact output (not all 322 targets)
377
- return {
378
- "targets_found": found,
379
- "scan_attempt": attempt,
380
- "compact_summary": compact,
381
- }
382
- except Exception as e:
383
- failures = _daemon_state.record_failure()
384
- logger.error("Social scan failed after retries: %s", e)
385
- if failures >= MAX_CONSECUTIVE_FAILURES:
386
- reason = f"3 consecutive social scan failures. Last: {e}"
387
- _daemon_state.stopped_reason = reason
388
- _daemon_state.running = False
389
- _daemon_state._stop_event.set()
390
- return {"error": str(e), "consecutive_failures": failures}
391
- finally:
392
- _scan_lock.release()
393
-
394
- def _daemon_loop() -> None:
395
- """Main scanning loop."""
396
- logger.info("Social daemon started. Scanning every %d seconds.", SCAN_INTERVAL)
397
-
398
- while not _daemon_state._stop_event.is_set():
399
- try:
400
- result = scan_once()
401
- if "error" in result:
402
- logger.warning("Scan cycle error: %s", result["error"])
403
- else:
404
- summary = result.get("compact_summary", {}).get("summary", {})
405
- logger.info(
406
- "Scan cycle done: %d targets, %d high-pri, cache=%s",
407
- result.get("targets_found", 0),
408
- summary.get("high_priority", 0),
409
- result.get("compact_summary", {}).get("cache_stats", {}).get("total_cached", "?"),
410
- )
411
- except Exception as e:
412
- logger.error("Unexpected error in social daemon loop: %s", e)
413
- failures = _daemon_state.record_failure()
414
- if failures >= MAX_CONSECUTIVE_FAILURES:
415
- break
416
-
417
- _daemon_state._stop_event.wait(timeout=SCAN_INTERVAL)
418
-
419
- _daemon_state.running = False
420
- logger.info("Social daemon stopped.")
421
-
422
- def start_daemon() -> Dict[str, Any]:
423
- """Start the social daemon in a background thread."""
424
- if _daemon_state.running:
425
- return {"status": "already_running", **_daemon_state.to_dict()}
426
-
427
- _daemon_state.running = True
428
- _daemon_state.stopped_reason = None
429
- _daemon_state.consecutive_failures = 0
430
- _daemon_state._stop_event.clear()
431
-
432
- thread = threading.Thread(target=_daemon_loop, name="social-daemon", daemon=True)
433
- _daemon_state._thread = thread
434
- thread.start()
435
-
436
- return {"status": "started", **_daemon_state.to_dict()}
437
-
438
- def stop_daemon() -> Dict[str, Any]:
439
- """Stop the social daemon."""
440
- if not _daemon_state.running:
441
- return {"status": "not_running", **_daemon_state.to_dict()}
442
-
443
- _daemon_state._stop_event.set()
444
- _daemon_state.stopped_reason = "manual_stop"
445
- if _daemon_state._thread:
446
- _daemon_state._thread.join(timeout=5)
447
- _daemon_state.running = False
448
-
449
- return {"status": "stopped", **_daemon_state.to_dict()}
450
-
451
- def get_daemon_status() -> Dict[str, Any]:
452
- """Get current daemon status including cache stats."""
453
- status = _daemon_state.to_dict()
454
- try:
455
- from ai.social_cache import get_scan_stats
456
- status["cache_stats"] = get_scan_stats()
457
- except Exception:
458
- pass
459
- return status
460
-
461
- def main():
462
- """Run as standalone process."""
463
- import argparse
464
- parser = argparse.ArgumentParser(description="Delimit social sensing daemon")
465
- parser.add_argument("--interval", type=int, help="Scan interval in seconds")
466
- parser.add_argument("--once", action="store_true", help="Run once and exit")
467
- args = parser.parse_args()
468
-
469
- logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(name)s] %(levelname)s: %(message)s")
470
-
471
- if args.interval:
472
- global SCAN_INTERVAL
473
- SCAN_INTERVAL = args.interval
474
-
475
- if args.once:
476
- scan_once()
477
- return
478
-
479
- _daemon_state.running = True
480
- _daemon_loop()
481
-
482
- if __name__ == "__main__":
483
- main()
@@ -1,76 +0,0 @@
1
- -- Tweet corpus + cache + budget schema
2
- -- See DECISION_TWTTR241_CORPUS.md
3
- -- Invariants:
4
- -- tweets = append-only moat, never purged
5
- -- cache = disposable, TTL-gated
6
- -- budget = single gate for all Twttr241 HTTP calls
7
-
8
- PRAGMA journal_mode=WAL;
9
- PRAGMA synchronous=NORMAL;
10
-
11
- -- Corpus (moat, never purged)
12
- CREATE TABLE IF NOT EXISTS tweets (
13
- tweet_id TEXT PRIMARY KEY,
14
- author_handle TEXT NOT NULL,
15
- author_id TEXT,
16
- text TEXT NOT NULL,
17
- created_at INTEGER NOT NULL,
18
- fetched_at INTEGER NOT NULL,
19
- lang TEXT,
20
- reply_to_id TEXT,
21
- quote_of_id TEXT,
22
- like_count INTEGER,
23
- retweet_count INTEGER,
24
- reply_count INTEGER,
25
- view_count INTEGER,
26
- has_media INTEGER,
27
- urls_json TEXT,
28
- hashtags_json TEXT,
29
- mentions_json TEXT,
30
- venture_tags TEXT, -- comma-joined, e.g. 'delimit,wirereport'
31
- raw_json TEXT
32
- );
33
- CREATE INDEX IF NOT EXISTS idx_tweets_author_time ON tweets(author_handle, created_at DESC);
34
- CREATE INDEX IF NOT EXISTS idx_tweets_created ON tweets(created_at DESC);
35
- CREATE INDEX IF NOT EXISTS idx_tweets_venture ON tweets(venture_tags);
36
-
37
- -- Full-text search over the corpus (contentless external-content pattern)
38
- CREATE VIRTUAL TABLE IF NOT EXISTS tweets_fts USING fts5(
39
- text, author_handle,
40
- content='tweets', content_rowid='rowid'
41
- );
42
-
43
- -- Users (opportunistic)
44
- CREATE TABLE IF NOT EXISTS users (
45
- user_id TEXT PRIMARY KEY,
46
- handle TEXT NOT NULL,
47
- display_name TEXT,
48
- bio TEXT,
49
- followers_count INTEGER,
50
- following_count INTEGER,
51
- first_seen_at INTEGER NOT NULL,
52
- last_refreshed_at INTEGER NOT NULL,
53
- raw_json TEXT
54
- );
55
- CREATE UNIQUE INDEX IF NOT EXISTS idx_users_handle ON users(handle);
56
-
57
- -- Cache (disposable, TTL-gated)
58
- CREATE TABLE IF NOT EXISTS cache (
59
- cache_key TEXT PRIMARY KEY,
60
- endpoint TEXT NOT NULL,
61
- response_json TEXT NOT NULL,
62
- fetched_at INTEGER NOT NULL,
63
- expires_at INTEGER NOT NULL
64
- );
65
- CREATE INDEX IF NOT EXISTS idx_cache_expires ON cache(expires_at);
66
-
67
- -- Budget tracker (one row per hour bucket)
68
- CREATE TABLE IF NOT EXISTS budget (
69
- hour_bucket INTEGER PRIMARY KEY,
70
- day_bucket INTEGER NOT NULL,
71
- month_bucket TEXT NOT NULL,
72
- requests INTEGER NOT NULL DEFAULT 0,
73
- hit_429 INTEGER NOT NULL DEFAULT 0
74
- );
75
- CREATE INDEX IF NOT EXISTS idx_budget_day ON budget(day_bucket);
76
- CREATE INDEX IF NOT EXISTS idx_budget_month ON budget(month_bucket);