delimit-cli 4.1.53 → 4.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. package/CHANGELOG.md +26 -0
  2. package/README.md +34 -3
  3. package/bin/delimit-cli.js +150 -2
  4. package/bin/delimit-setup.js +22 -7
  5. package/gateway/ai/agent_dispatch.py +79 -0
  6. package/gateway/ai/daily_digest.py +386 -0
  7. package/gateway/ai/ledger_manager.py +32 -0
  8. package/gateway/ai/license_core.py +2 -0
  9. package/gateway/ai/notify.py +17 -11
  10. package/gateway/ai/reddit_proxy.py +28 -9
  11. package/gateway/ai/sensing/__init__.py +35 -0
  12. package/gateway/ai/sensing/schema.py +107 -0
  13. package/gateway/ai/sensing/signal_store.py +348 -0
  14. package/gateway/ai/server.py +419 -6
  15. package/gateway/ai/supabase_sync.py +308 -0
  16. package/gateway/ai/work_order.py +216 -0
  17. package/gateway/ai/workers/__init__.py +32 -0
  18. package/gateway/ai/workers/base.py +154 -0
  19. package/gateway/ai/workers/executor.py +861 -0
  20. package/gateway/ai/workers/outreach_drafter.py +161 -0
  21. package/gateway/ai/workers/pr_drafter.py +148 -0
  22. package/lib/ai-sbom-engine.js +154 -0
  23. package/lib/trust-page-engine.js +179 -0
  24. package/lib/wrap-engine.js +431 -0
  25. package/package.json +14 -1
  26. package/adapters/codex-security.js +0 -64
  27. package/adapters/codex-skill.js +0 -78
  28. package/adapters/cursor-rules.js +0 -73
  29. package/gateway/ai/continuity.py +0 -462
  30. package/gateway/ai/inbox_daemon_runner.py +0 -217
  31. package/gateway/ai/loop_engine.py +0 -1303
  32. package/gateway/ai/social_cache.py +0 -341
  33. package/gateway/ai/social_daemon.py +0 -483
  34. package/gateway/ai/tweet_corpus_schema.sql +0 -76
  35. package/scripts/crosspost_devto.py +0 -304
  36. package/scripts/demo-v420-clean.sh +0 -267
  37. package/scripts/demo-v420-deliberation.sh +0 -217
  38. package/scripts/demo-v420.sh +0 -55
  39. package/scripts/sync-gateway.sh +0 -112
@@ -0,0 +1,386 @@
1
+ """Daily digest for the Delimit autonomous loop (LED-966).
2
+
3
+ Produces a structured summary of the last 24h:
4
+ - Cycle count (sense-only daemon ticks)
5
+ - Signals ingested (count by platform)
6
+ - Deliberations held (count + transcript refs)
7
+ - Ledger deltas (items opened, in_progress, done)
8
+ - Agent dispatches (by assignee, status)
9
+ - Pending approvals (drafts awaiting founder)
10
+ - Critical events (errors, timeouts, guard trips)
11
+
12
+ Writes:
13
+ - ~/.delimit/digest/digest-YYYY-MM-DD.md (file artifact, always)
14
+ - ~/.delimit/digest/digest-YYYY-MM-DD.json (machine-readable)
15
+ - Email to founder (if DELIMIT_DIGEST_EMAIL=true AND email pipeline healthy)
16
+
17
+ Call via MCP: delimit_digest(action="run") or scheduled cron.
18
+ """
19
+
20
+ from __future__ import annotations
21
+
22
+ import json
23
+ import time
24
+ from collections import Counter
25
+ from datetime import datetime, timedelta, timezone
26
+ from pathlib import Path
27
+ from typing import Any, Dict, List, Optional
28
+
29
+ DIGEST_DIR = Path.home() / ".delimit" / "digest"
30
+ LEDGER_DIR = Path.home() / ".delimit" / "ledger"
31
+ DELIB_DIR = Path.home() / ".delimit" / "deliberations"
32
+ SIGNALS_DIR = Path.home() / ".delimit" / "intel" / "signals"
33
+ AGENTS_FILE = Path.home() / ".delimit" / "agents" / "tasks.json"
34
+
35
+
36
+ def _now() -> datetime:
37
+ return datetime.now(timezone.utc)
38
+
39
+
40
+ def _ensure_dir():
41
+ DIGEST_DIR.mkdir(parents=True, exist_ok=True)
42
+
43
+
44
+ def _count_signals(since: datetime) -> Dict[str, Any]:
45
+ """Count signals ingested in the window, grouped by platform."""
46
+ counts: Counter = Counter()
47
+ total = 0
48
+ if not SIGNALS_DIR.exists():
49
+ return {"total": 0, "by_platform": {}}
50
+ for shard in SIGNALS_DIR.glob("*.jsonl"):
51
+ if shard.name.startswith("_"):
52
+ continue
53
+ try:
54
+ shard_date = datetime.fromisoformat(shard.stem).date()
55
+ except ValueError:
56
+ continue
57
+ if shard_date < since.date():
58
+ continue
59
+ try:
60
+ for line in shard.read_text().splitlines():
61
+ if not line.strip():
62
+ continue
63
+ try:
64
+ row = json.loads(line)
65
+ except json.JSONDecodeError:
66
+ continue
67
+ try:
68
+ ts = datetime.fromisoformat(row.get("ingested_at", "").replace("Z", "+00:00"))
69
+ except Exception:
70
+ continue
71
+ if ts < since:
72
+ continue
73
+ counts[row.get("platform", "?")] += 1
74
+ total += 1
75
+ except OSError:
76
+ continue
77
+ return {"total": total, "by_platform": dict(counts.most_common())}
78
+
79
+
80
+ def _count_deliberations(since: datetime) -> Dict[str, Any]:
81
+ """Count deliberation transcripts created in the window."""
82
+ if not DELIB_DIR.exists():
83
+ return {"total": 0, "unanimous": 0, "no_consensus": 0, "recent": []}
84
+ total = 0
85
+ unanimous = 0
86
+ no_consensus = 0
87
+ recent = []
88
+ for f in sorted(DELIB_DIR.glob("deliberation_*.json"), reverse=True)[:50]:
89
+ try:
90
+ mtime = datetime.fromtimestamp(f.stat().st_mtime, tz=timezone.utc)
91
+ if mtime < since:
92
+ continue
93
+ d = json.loads(f.read_text())
94
+ total += 1
95
+ verdict = (d.get("final_verdict") or "").upper()
96
+ if "UNANIMOUS" in verdict:
97
+ unanimous += 1
98
+ elif "NO CONSENSUS" in verdict or "MAX ROUNDS" in verdict:
99
+ no_consensus += 1
100
+ rounds_field = d.get("rounds", 0)
101
+ rounds_count = len(rounds_field) if isinstance(rounds_field, list) else rounds_field
102
+ recent.append({
103
+ "file": f.name,
104
+ "verdict": (d.get("final_verdict") or "?")[:60],
105
+ "status": d.get("status", "?"),
106
+ "rounds": rounds_count,
107
+ })
108
+ except Exception:
109
+ continue
110
+ return {
111
+ "total": total,
112
+ "unanimous": unanimous,
113
+ "no_consensus": no_consensus,
114
+ "recent": recent[:10],
115
+ }
116
+
117
+
118
+ def _count_ledger_deltas(since: datetime) -> Dict[str, Any]:
119
+ """Count ledger items opened / updated / done in the window."""
120
+ opened = 0
121
+ done = 0
122
+ new_items = []
123
+ done_items = []
124
+ if not LEDGER_DIR.exists():
125
+ return {"opened": 0, "done": 0, "new": [], "completed": []}
126
+ since_iso = since.isoformat().replace("+00:00", "Z")
127
+ for lf in LEDGER_DIR.glob("*.jsonl"):
128
+ try:
129
+ for line in lf.read_text().splitlines():
130
+ if not line.strip():
131
+ continue
132
+ try:
133
+ item = json.loads(line)
134
+ except json.JSONDecodeError:
135
+ continue
136
+ created = item.get("created_at", "")
137
+ updated = item.get("updated_at", created)
138
+ if created >= since_iso and item.get("type") != "update":
139
+ opened += 1
140
+ new_items.append({
141
+ "id": item.get("id"),
142
+ "title": (item.get("title") or "")[:80],
143
+ "priority": item.get("priority", "?"),
144
+ })
145
+ if item.get("type") == "update" and item.get("status") == "done" and updated >= since_iso:
146
+ done += 1
147
+ done_items.append({
148
+ "id": item.get("id"),
149
+ "note": (item.get("note") or "")[:120],
150
+ })
151
+ except OSError:
152
+ continue
153
+ return {
154
+ "opened": opened,
155
+ "done": done,
156
+ "new": new_items[-10:],
157
+ "completed": done_items[-10:],
158
+ }
159
+
160
+
161
+ def _count_dispatches(since: datetime) -> Dict[str, Any]:
162
+ """Count swarm dispatches and their current status."""
163
+ if not AGENTS_FILE.exists():
164
+ return {"total": 0, "by_status": {}, "by_assignee": {}, "stuck_over_24h": 0}
165
+ try:
166
+ tasks = json.loads(AGENTS_FILE.read_text())
167
+ except Exception:
168
+ return {"total": 0, "by_status": {}, "by_assignee": {}, "stuck_over_24h": 0}
169
+ status_counts: Counter = Counter()
170
+ assignee_counts: Counter = Counter()
171
+ stuck = 0
172
+ dispatched_recent = 0
173
+ since_iso = since.isoformat().replace("+00:00", "Z")
174
+ for tid, task in tasks.items():
175
+ status = task.get("status", "?")
176
+ status_counts[status] += 1
177
+ if task.get("created_at", "") >= since_iso:
178
+ dispatched_recent += 1
179
+ if status == "dispatched":
180
+ assignee_counts[task.get("assignee", "?")] += 1
181
+ try:
182
+ created = datetime.fromisoformat(task.get("created_at", "").replace("Z", "+00:00"))
183
+ if (_now() - created) > timedelta(hours=24):
184
+ stuck += 1
185
+ except Exception:
186
+ pass
187
+ return {
188
+ "total_tasks": len(tasks),
189
+ "dispatched_last_24h": dispatched_recent,
190
+ "by_status": dict(status_counts),
191
+ "dispatched_by_assignee": dict(assignee_counts),
192
+ "stuck_over_24h": stuck,
193
+ }
194
+
195
+
196
+ def _check_health(since: datetime) -> Dict[str, Any]:
197
+ """Check for errors, guard trips, timeouts in the window."""
198
+ health = {
199
+ "pause_file_exists": (Path.home() / ".delimit" / "pause_dispatch").exists(),
200
+ "signal_guard_shadow_hits": 0,
201
+ "daemon_stopped": False,
202
+ }
203
+ # Signal guard shadow log
204
+ shadow = Path.home() / ".delimit" / "logs" / "signal_guard_shadow.jsonl"
205
+ if shadow.exists():
206
+ since_iso = since.isoformat().replace("+00:00", "Z")
207
+ try:
208
+ for line in shadow.read_text().splitlines():
209
+ if not line.strip():
210
+ continue
211
+ try:
212
+ row = json.loads(line)
213
+ except json.JSONDecodeError:
214
+ continue
215
+ if row.get("ts", "") >= since_iso:
216
+ health["signal_guard_shadow_hits"] += 1
217
+ except OSError:
218
+ pass
219
+ return health
220
+
221
+
222
+ def build_digest(window_hours: int = 24) -> Dict[str, Any]:
223
+ """Collect all signals for the last window_hours into a single digest dict."""
224
+ since = _now() - timedelta(hours=window_hours)
225
+ return {
226
+ "generated_at": _now().isoformat(),
227
+ "window_hours": window_hours,
228
+ "window_start": since.isoformat(),
229
+ "signals": _count_signals(since),
230
+ "deliberations": _count_deliberations(since),
231
+ "ledger": _count_ledger_deltas(since),
232
+ "dispatches": _count_dispatches(since),
233
+ "health": _check_health(since),
234
+ }
235
+
236
+
237
+ def render_markdown(digest: Dict[str, Any]) -> str:
238
+ """Render the digest as a founder-readable markdown document."""
239
+ g = digest
240
+ s = g["signals"]
241
+ d = g["deliberations"]
242
+ l = g["ledger"]
243
+ dsp = g["dispatches"]
244
+ h = g["health"]
245
+
246
+ lines = [
247
+ f"# Delimit Daily Digest — {g['generated_at'][:10]}",
248
+ "",
249
+ f"Window: last {g['window_hours']}h (since {g['window_start'][:16]}Z)",
250
+ "",
251
+ "## Health",
252
+ "",
253
+ f"- Pause file: {'🔴 ACTIVE' if h['pause_file_exists'] else '🟢 clear'}",
254
+ f"- Signal guard shadow hits: {h['signal_guard_shadow_hits']}",
255
+ "",
256
+ "## Signals ingested",
257
+ "",
258
+ f"Total: **{s['total']}** signals",
259
+ ]
260
+ for platform, count in s.get("by_platform", {}).items():
261
+ lines.append(f"- {platform}: {count}")
262
+ lines.extend([
263
+ "",
264
+ "## Deliberations",
265
+ "",
266
+ f"- Total: **{d['total']}**",
267
+ f"- Unanimous: {d['unanimous']}",
268
+ f"- No consensus / max rounds: {d['no_consensus']}",
269
+ ])
270
+ if d.get("recent"):
271
+ lines.append("")
272
+ lines.append("Recent transcripts:")
273
+ for r in d["recent"]:
274
+ lines.append(f" - `{r['file']}` — {r['verdict']} ({r.get('rounds', '?')} rounds)")
275
+ lines.extend([
276
+ "",
277
+ "## Ledger deltas",
278
+ "",
279
+ f"- Items opened: **{l['opened']}**",
280
+ f"- Items completed: **{l['done']}**",
281
+ ])
282
+ if l.get("new"):
283
+ lines.append("")
284
+ lines.append("New items:")
285
+ for item in l["new"]:
286
+ lines.append(f" - {item['id']} [{item['priority']}] {item['title']}")
287
+ if l.get("completed"):
288
+ lines.append("")
289
+ lines.append("Completed:")
290
+ for item in l["completed"]:
291
+ lines.append(f" - {item['id']} — {item['note']}")
292
+ lines.extend([
293
+ "",
294
+ "## Swarm dispatches",
295
+ "",
296
+ f"- Total tasks ever: {dsp['total_tasks']}",
297
+ f"- New dispatches last 24h: **{dsp['dispatched_last_24h']}**",
298
+ f"- Stuck (dispatched >24h): {dsp['stuck_over_24h']}",
299
+ ])
300
+ if dsp.get("by_status"):
301
+ lines.append("")
302
+ lines.append("By status:")
303
+ for status, count in dsp["by_status"].items():
304
+ lines.append(f" - {status}: {count}")
305
+ if dsp.get("dispatched_by_assignee"):
306
+ lines.append("")
307
+ lines.append("Currently dispatched by assignee:")
308
+ for who, count in dsp["dispatched_by_assignee"].items():
309
+ lines.append(f" - {who}: {count}")
310
+ lines.extend([
311
+ "",
312
+ "## Pending founder actions",
313
+ "",
314
+ f"- Stuck dispatches (need worker): {dsp['stuck_over_24h']}",
315
+ f"- Pause file present: {'yes' if h['pause_file_exists'] else 'no'}",
316
+ f"- Guard shadow hits (investigate if >0): {h['signal_guard_shadow_hits']}",
317
+ "",
318
+ "---",
319
+ f"Digest generated at {g['generated_at']}",
320
+ ])
321
+ return "\n".join(lines)
322
+
323
+
324
+ def write_digest(window_hours: int = 24) -> Dict[str, str]:
325
+ """Generate the digest and write both markdown + json artifacts.
326
+
327
+ Returns paths to the created files so the founder can inspect them
328
+ from the interactive session even without email delivery.
329
+ """
330
+ _ensure_dir()
331
+ digest = build_digest(window_hours=window_hours)
332
+ date_slug = digest["generated_at"][:10]
333
+ md_path = DIGEST_DIR / f"digest-{date_slug}.md"
334
+ json_path = DIGEST_DIR / f"digest-{date_slug}.json"
335
+ md_path.write_text(render_markdown(digest))
336
+ json_path.write_text(json.dumps(digest, indent=2))
337
+ return {
338
+ "markdown_path": str(md_path),
339
+ "json_path": str(json_path),
340
+ "summary": f"{digest['signals']['total']} signals, {digest['deliberations']['total']} deliberations, {digest['ledger']['opened']} new ledger items, {digest['dispatches']['stuck_over_24h']} stuck dispatches",
341
+ }
342
+
343
+
344
+ def send_digest_email(to: str = "", from_account: str = "pro@delimit.ai") -> Dict[str, Any]:
345
+ """Send the most recent digest via the notify pipeline.
346
+
347
+ Gated on environment: returns a no-op result when DMARC is missing
348
+ and email would be filtered. Set DELIMIT_DIGEST_EMAIL=true to force
349
+ send attempts regardless. The digest markdown is always written to
350
+ disk so the founder can inspect it from the interactive session.
351
+ """
352
+ import os
353
+ result = write_digest(window_hours=24)
354
+ md_path = Path(result["markdown_path"])
355
+ if not md_path.exists():
356
+ return {"error": "digest not written", "files": result}
357
+
358
+ send_enabled = os.environ.get("DELIMIT_DIGEST_EMAIL", "").lower() in ("true", "1", "yes")
359
+ if not send_enabled:
360
+ return {
361
+ "status": "skipped_email",
362
+ "reason": "DELIMIT_DIGEST_EMAIL not set to true; digest written to disk only",
363
+ "files": result,
364
+ }
365
+
366
+ try:
367
+ from ai.notify import send_notification
368
+ body = md_path.read_text()
369
+ send_notification(
370
+ channel="email",
371
+ message=body,
372
+ subject=f"[DIGEST] Delimit — {result['summary']}",
373
+ to=to or os.environ.get("DELIMIT_SMTP_TO", ""),
374
+ from_account=from_account,
375
+ event_type="daily_digest",
376
+ )
377
+ return {
378
+ "status": "sent",
379
+ "files": result,
380
+ }
381
+ except Exception as exc:
382
+ return {
383
+ "status": "send_failed",
384
+ "error": str(exc),
385
+ "files": result,
386
+ }
@@ -202,7 +202,39 @@ def add_item(
202
202
  LED-189: Items can have acceptance_criteria (testable "done when" conditions).
203
203
  LED-190: Items can have context, tools_needed, and estimated_complexity
204
204
  for agent-executable task format.
205
+ LED-877: Signal guard — rejects source='social_scan' writes so sensed
206
+ observations cannot land in the ledger. Observations belong in the intel
207
+ signal store (ai/sensing/signal_store.py). Bypass via env var for the
208
+ promote_to_ledger path: _DELIMIT_SIGNAL_PROMOTED_BY=<who>.
205
209
  """
210
+ _src_norm = (source or "").strip().lower()
211
+ _promoted_by = os.environ.get("_DELIMIT_SIGNAL_PROMOTED_BY", "")
212
+ _guard_mode = os.environ.get("DELIMIT_SIGNAL_GUARD", "enforce").lower()
213
+ if _src_norm.startswith("social_scan") or _src_norm.startswith("social_strategy"):
214
+ if not _promoted_by:
215
+ msg = (
216
+ f"LED-877 guard: source={source!r} is a sensed observation, not "
217
+ f"a ledger item. Use ai.sensing.signal_store.ingest() instead. "
218
+ f"Promote explicitly via promote_to_ledger(signal_id=...)."
219
+ )
220
+ if _guard_mode == "shadow":
221
+ try:
222
+ _shadow_log = Path.home() / ".delimit" / "logs" / "signal_guard_shadow.jsonl"
223
+ _shadow_log.parent.mkdir(parents=True, exist_ok=True)
224
+ with _shadow_log.open("a") as _f:
225
+ _f.write(json.dumps({
226
+ "ts": time.strftime("%Y-%m-%dT%H:%M:%SZ"),
227
+ "title": title,
228
+ "source": source,
229
+ "ledger": ledger,
230
+ "msg": msg,
231
+ }) + "\n")
232
+ except Exception:
233
+ pass
234
+ # fall through
235
+ else:
236
+ raise ValueError(msg)
237
+
206
238
  _ensure(project_path)
207
239
  venture = _detect_venture(project_path)
208
240
  ledger_dir = _project_ledger_dir(project_path)
@@ -37,6 +37,8 @@ PRO_TOOLS = frozenset({
37
37
  # Agent orchestration
38
38
  "delimit_agent_dispatch", "delimit_agent_status",
39
39
  "delimit_agent_complete", "delimit_agent_handoff",
40
+ # Worker Pool v2 executor (LED-981)
41
+ "delimit_executor",
40
42
  })
41
43
 
42
44
  # Free trial limits
@@ -1044,17 +1044,23 @@ def _enforce_email_protocol(subject: str, message: str, event_type: str) -> tupl
1044
1044
  # 1. Subject must have a valid prefix bracket
1045
1045
  if not any(subject.startswith(p) for p in _VALID_SUBJECT_PREFIXES):
1046
1046
  # Try to infer from event_type
1047
- prefix_map = {
1048
- "social_draft": "[APPROVE]",
1049
- "outreach": "[OUTREACH]",
1050
- "deploy": "[DEPLOY]",
1051
- "gate_failure": "[ALERT]",
1052
- "digest": "[DIGEST]",
1053
- "info": "[INFO]",
1054
- }
1055
- prefix = prefix_map.get(event_type, "[INFO]")
1056
- subject = f"{prefix} {subject}"
1057
- warnings.append(f"Subject prefix added: {prefix}")
1047
+ # LED-969: customer-facing emails should not get bracket prefixes.
1048
+ # Any event_type starting with "customer_" is external-facing and
1049
+ # the subject should be sent as-is (clean, professional).
1050
+ if event_type and event_type.startswith("customer_"):
1051
+ pass # no prefix for customer emails
1052
+ else:
1053
+ prefix_map = {
1054
+ "social_draft": "[APPROVE]",
1055
+ "outreach": "[OUTREACH]",
1056
+ "deploy": "[DEPLOY]",
1057
+ "gate_failure": "[ALERT]",
1058
+ "digest": "[DIGEST]",
1059
+ "info": "[INFO]",
1060
+ }
1061
+ prefix = prefix_map.get(event_type, "[INFO]")
1062
+ subject = f"{prefix} {subject}"
1063
+ warnings.append(f"Subject prefix added: {prefix}")
1058
1064
 
1059
1065
  # 2. Check required sections for this event_type
1060
1066
  required = _EMAIL_PROTOCOL.get(event_type, [])
@@ -9,24 +9,35 @@ from typing import Any, Dict, List, Optional
9
9
  logger = logging.getLogger("delimit.ai.reddit_proxy")
10
10
 
11
11
  def _get_proxy_config() -> Dict[str, str]:
12
- """Load proxy config from private secrets or environment."""
13
- config = {"proxy_url": ""}
14
-
15
- # 1. Check environment variable
12
+ """Load proxy config from private secrets or environment.
13
+
14
+ Returns {proxy_url, token}. The server-side proxy requires a bearer
15
+ token (LED-988 follow-up) clients without a token still populate
16
+ proxy_url but will fail auth at the server unless the server is run
17
+ without a token (not recommended).
18
+ """
19
+ config = {"proxy_url": "", "token": ""}
20
+
21
+ # 1. Environment variables
16
22
  env_url = os.environ.get("DELIMIT_REDDIT_PROXY")
23
+ env_token = os.environ.get("DELIMIT_REDDIT_PROXY_TOKEN")
17
24
  if env_url:
18
25
  config["proxy_url"] = env_url
26
+ if env_token:
27
+ config["token"] = env_token
28
+ if config["proxy_url"]:
19
29
  return config
20
30
 
21
- # 2. Check private secrets file
31
+ # 2. Secrets file
22
32
  secrets_path = Path.home() / ".delimit" / "secrets" / "reddit-proxy.json"
23
33
  if secrets_path.exists():
24
34
  try:
25
35
  secrets = json.loads(secrets_path.read_text())
26
- config["proxy_url"] = secrets.get("proxy_url", "")
36
+ config["proxy_url"] = secrets.get("proxy_url", "") or config["proxy_url"]
37
+ config["token"] = secrets.get("token", "") or config["token"]
27
38
  except Exception as e:
28
39
  logger.debug(f"Failed to load reddit-proxy secrets: {e}")
29
-
40
+
30
41
  return config
31
42
 
32
43
  def fetch_subreddit(subreddit: str, sort: str = "new", limit: int = 10) -> List[Dict[str, Any]]:
@@ -42,7 +53,11 @@ def fetch_subreddit(subreddit: str, sort: str = "new", limit: int = 10) -> List[
42
53
  if proxy_url:
43
54
  try:
44
55
  fetch_url = f"{proxy_url}?url={urllib.parse.quote(reddit_url, safe='')}"
45
- req = urllib.request.Request(fetch_url, headers={"User-Agent": "Delimit/1.0"})
56
+ headers = {"User-Agent": "Delimit/1.0"}
57
+ token = proxy_cfg.get("token", "")
58
+ if token:
59
+ headers["Authorization"] = f"Bearer {token}"
60
+ req = urllib.request.Request(fetch_url, headers=headers)
46
61
  with urllib.request.urlopen(req, timeout=10) as resp:
47
62
  body = json.loads(resp.read().decode())
48
63
  children = body.get("data", {}).get("children", [])
@@ -84,7 +99,11 @@ def fetch_thread(thread_id: str) -> Optional[Dict[str, Any]]:
84
99
  if proxy_url:
85
100
  try:
86
101
  fetch_url = f"{proxy_url}?url={urllib.parse.quote(reddit_url, safe='')}"
87
- req = urllib.request.Request(fetch_url, headers={"User-Agent": "Delimit/1.0"})
102
+ headers = {"User-Agent": "Delimit/1.0"}
103
+ token = proxy_cfg.get("token", "")
104
+ if token:
105
+ headers["Authorization"] = f"Bearer {token}"
106
+ req = urllib.request.Request(fetch_url, headers=headers)
88
107
  with urllib.request.urlopen(req, timeout=10) as resp:
89
108
  data = json.loads(resp.read().decode())
90
109
  if isinstance(data, list) and len(data) > 0:
@@ -0,0 +1,35 @@
1
+ """Signal sensing layer (LED-877).
2
+
3
+ Physically separates observational signals from the ledger. Signals are a
4
+ deliberation corpus, not a task queue — they must never be pulled by
5
+ build_loop as work. Import from ai.sensing.signal_store for ingest/query.
6
+ """
7
+
8
+ from ai.sensing.schema import Signal, ValidationError, normalize_url, fingerprint_of
9
+ from ai.sensing.signal_store import (
10
+ ingest,
11
+ query,
12
+ dedup_check,
13
+ age_out_to_warm,
14
+ freeze_cold,
15
+ promote_to_ledger,
16
+ SIGNALS_DIR,
17
+ HOT_WINDOW_DAYS,
18
+ WARM_WINDOW_DAYS,
19
+ )
20
+
21
+ __all__ = [
22
+ "Signal",
23
+ "ValidationError",
24
+ "normalize_url",
25
+ "fingerprint_of",
26
+ "ingest",
27
+ "query",
28
+ "dedup_check",
29
+ "age_out_to_warm",
30
+ "freeze_cold",
31
+ "promote_to_ledger",
32
+ "SIGNALS_DIR",
33
+ "HOT_WINDOW_DAYS",
34
+ "WARM_WINDOW_DAYS",
35
+ ]
@@ -0,0 +1,107 @@
1
+ """Signal schema + validation (LED-877).
2
+
3
+ A signal is an observation, not a commitment. Schema enforces enough metadata
4
+ for deliberation to work with, rejects empty-identity rows at ingest (killing
5
+ the LED-876 ghost-engage-task class of bug at its source).
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import hashlib
11
+ import re
12
+ from dataclasses import dataclass, field, asdict
13
+ from typing import Any, Dict, List, Optional
14
+ from urllib.parse import urlparse, urlunparse, parse_qsl, urlencode
15
+
16
+
17
+ class ValidationError(ValueError):
18
+ """Raised when a signal fails schema validation on ingest."""
19
+
20
+
21
+ _UTM_RE = re.compile(r"^utm_")
22
+
23
+
24
+ def normalize_url(url: str) -> str:
25
+ """Canonicalize URL: strip utm_* query params, fragment, trailing slash."""
26
+ if not url:
27
+ return ""
28
+ try:
29
+ p = urlparse(url.strip())
30
+ except Exception:
31
+ return url.strip()
32
+ if not p.scheme:
33
+ return url.strip()
34
+ query = [(k, v) for k, v in parse_qsl(p.query) if not _UTM_RE.match(k)]
35
+ path = p.path.rstrip("/") or "/"
36
+ cleaned = urlunparse(
37
+ (p.scheme.lower(), p.netloc.lower(), path, "", urlencode(query), "")
38
+ )
39
+ return cleaned
40
+
41
+
42
+ def fingerprint_of(platform: str, canonical_url: str, author: str) -> str:
43
+ """Stable dedup key for a signal."""
44
+ raw = f"{(platform or '').lower()}|{normalize_url(canonical_url)}|{(author or '').lower()}"
45
+ return hashlib.sha256(raw.encode("utf-8")).hexdigest()[:16]
46
+
47
+
48
+ @dataclass
49
+ class Signal:
50
+ """A sensed observation from an external platform.
51
+
52
+ Mandatory: canonical_url AND (author OR content_snippet).
53
+ Anything weaker than that is rejected at ingest because deliberation
54
+ cannot draw useful conclusions from a row with no identity.
55
+ """
56
+
57
+ fingerprint: str
58
+ platform: str
59
+ canonical_url: str
60
+ author: str = ""
61
+ author_handle: str = ""
62
+ content_snippet: str = ""
63
+ posted_at: str = ""
64
+ ingested_at: str = ""
65
+ classification: str = "signal"
66
+ relevance_score: float = 0.0
67
+ themes: List[str] = field(default_factory=list)
68
+ raw_ref: str = ""
69
+ id: str = ""
70
+
71
+ def to_dict(self) -> Dict[str, Any]:
72
+ return asdict(self)
73
+
74
+
75
+ def validate_and_normalize(raw: Dict[str, Any]) -> Signal:
76
+ """Convert a raw target dict from social_target.py into a validated Signal.
77
+
78
+ Raises ValidationError on missing mandatory fields so bugs surface loudly
79
+ at ingest rather than producing empty-identity rows that pollute the
80
+ corpus (the LED-876 failure mode).
81
+ """
82
+ platform = (raw.get("platform") or "").strip()
83
+ canonical_url = normalize_url(raw.get("canonical_url") or raw.get("url") or "")
84
+ author = (raw.get("author") or "").strip()
85
+ content_snippet = (raw.get("content_snippet") or raw.get("title") or "").strip()[:500]
86
+
87
+ if not canonical_url:
88
+ raise ValidationError("canonical_url is required")
89
+ if not author and not content_snippet:
90
+ raise ValidationError("at least one of author or content_snippet is required")
91
+ if not platform:
92
+ raise ValidationError("platform is required")
93
+
94
+ return Signal(
95
+ fingerprint=fingerprint_of(platform, canonical_url, author),
96
+ platform=platform,
97
+ canonical_url=canonical_url,
98
+ author=author,
99
+ author_handle=(raw.get("author_handle") or "").strip(),
100
+ content_snippet=content_snippet,
101
+ posted_at=(raw.get("posted_at") or "").strip(),
102
+ ingested_at="", # filled by signal_store.ingest
103
+ classification=(raw.get("classification") or "signal").strip(),
104
+ relevance_score=float(raw.get("relevance_score") or 0.0),
105
+ themes=list(raw.get("themes") or []),
106
+ raw_ref=(raw.get("raw_ref") or raw.get("source_url") or canonical_url).strip(),
107
+ )