delimit-cli 4.5.5 → 4.5.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -26,10 +26,14 @@ try:
26
26
  # Autonomous build loop
27
27
  "delimit_next_task", "delimit_task_complete",
28
28
  "delimit_loop_status", "delimit_loop_config",
29
+ # LED-1253: vendor-news riff MCP wrappers
30
+ "delimit_vendor_news_scan", "delimit_vendor_news_health",
31
+ "delimit_vendor_news_draft",
29
32
  })
30
33
  except ImportError:
31
34
  # license_core not available (development mode or missing binary)
32
35
  import json
36
+ import os
33
37
  import time
34
38
  from pathlib import Path
35
39
 
@@ -78,6 +82,9 @@ except ImportError:
78
82
  # Autonomous build loop
79
83
  "delimit_next_task", "delimit_task_complete",
80
84
  "delimit_loop_status", "delimit_loop_config",
85
+ # LED-1253: vendor-news riff MCP wrappers
86
+ "delimit_vendor_news_scan", "delimit_vendor_news_health",
87
+ "delimit_vendor_news_draft",
81
88
  })
82
89
  FREE_TRIAL_LIMITS = {"delimit_deliberate": 3}
83
90
 
@@ -0,0 +1,39 @@
1
+ """LED-1264: scan → strategy-ledger auto-promote bridge.
2
+
3
+ Pure consumer of ``~/.delimit/social_targets.jsonl`` (the existing
4
+ ``delimit_social_target`` output). Promotes a tightly-gated subset of
5
+ strategic signals into the strategy ledger so the founder reviews them
6
+ via a daily digest instead of inbox-spam pings.
7
+
8
+ Panel decision (UNANIMOUS R3, 2026-05-07): tight guards
9
+ (strategic + confidence ≥ 0.85 + dedup against open / 60-day-closed),
10
+ P2 priority (review, not auto-action), one daily digest email.
11
+
12
+ Public entry points:
13
+
14
+ - :func:`bridge.promote_recent_signals` — main work function
15
+ - :func:`digest.build_daily_digest` — assemble last-24h digest text
16
+ - :func:`bridge.backfill_from` — one-time idempotent backfill walker
17
+
18
+ The bridge is invoked by ``scripts/scan_bridge_cron.py`` on a 6-hour
19
+ crontab cadence (founder applies manually). Direct in-process calls to
20
+ ``ai.ledger_manager.add_item`` — no MCP subprocess.
21
+ """
22
+
23
+ from ai.scan_bridge.bridge import (
24
+ backfill_from,
25
+ promote_recent_signals,
26
+ )
27
+ from ai.scan_bridge.dedup import (
28
+ extract_topic_fingerprint,
29
+ is_duplicate,
30
+ )
31
+ from ai.scan_bridge.digest import build_daily_digest
32
+
33
+ __all__ = [
34
+ "backfill_from",
35
+ "build_daily_digest",
36
+ "extract_topic_fingerprint",
37
+ "is_duplicate",
38
+ "promote_recent_signals",
39
+ ]
@@ -0,0 +1,473 @@
1
+ """LED-1264 scan-bridge — promotion engine.
2
+
3
+ Reads ``~/.delimit/social_targets.jsonl`` (the existing
4
+ ``delimit_social_target`` output), filters to the tight panel-locked
5
+ gate, runs dedup against the strategy ledger, and promotes survivors
6
+ via direct in-process ``ledger_manager.add_item`` calls.
7
+
8
+ State / cursor:
9
+ ``~/.delimit/scan_bridge_cursor.json`` records the most-recent
10
+ ``first_seen`` value we've already processed. Subsequent runs only
11
+ consider lines newer than that. Idempotent — re-running the cron
12
+ on the same JSONL is a no-op.
13
+
14
+ Promotions log:
15
+ ``~/.delimit/scan_bridge_promotions.jsonl`` records every successful
16
+ promotion (item_id, signal_fingerprint, ts) so the daily digest can
17
+ assemble the last-24h batch without re-walking the ledger.
18
+ """
19
+
20
+ from __future__ import annotations
21
+
22
+ import json
23
+ import logging
24
+ import os
25
+ from contextlib import contextmanager
26
+ from dataclasses import dataclass
27
+ from datetime import datetime, date, timedelta, timezone
28
+ from pathlib import Path
29
+ from typing import Any, Dict, Iterable, List, Optional, Tuple
30
+
31
+ from ai.scan_bridge.dedup import (
32
+ _candidate_strategy_items,
33
+ extract_topic_fingerprint,
34
+ is_duplicate,
35
+ )
36
+
37
+ logger = logging.getLogger("delimit.ai.scan_bridge.bridge")
38
+
39
+ TARGETS_FILE = Path.home() / ".delimit" / "social_targets.jsonl"
40
+ CURSOR_FILE = Path.home() / ".delimit" / "scan_bridge_cursor.json"
41
+ PROMOTIONS_LOG = Path.home() / ".delimit" / "scan_bridge_promotions.jsonl"
42
+
43
+
44
+ def _confidence_floor() -> float:
45
+ """Resolve the active confidence floor (env-overridable per directive)."""
46
+ raw = os.environ.get("DELIMIT_SCAN_PROMO_CONFIDENCE", "")
47
+ if not raw:
48
+ return 0.85
49
+ try:
50
+ v = float(raw)
51
+ if 0.0 <= v <= 1.0:
52
+ return v
53
+ except (TypeError, ValueError):
54
+ pass
55
+ return 0.85
56
+
57
+
58
+ # ── Cursor I/O ────────────────────────────────────────────────────────
59
+
60
+
61
+ def _load_cursor() -> Optional[str]:
62
+ """Return the most-recent ``first_seen`` we've already processed."""
63
+ if not CURSOR_FILE.exists():
64
+ return None
65
+ try:
66
+ data = json.loads(CURSOR_FILE.read_text())
67
+ v = data.get("last_seen_at")
68
+ return str(v) if v else None
69
+ except (OSError, ValueError, json.JSONDecodeError):
70
+ return None
71
+
72
+
73
+ def _save_cursor(last_seen_at: str) -> None:
74
+ try:
75
+ CURSOR_FILE.parent.mkdir(parents=True, exist_ok=True)
76
+ CURSOR_FILE.write_text(json.dumps({"last_seen_at": last_seen_at}))
77
+ except OSError: # pragma: no cover — best-effort
78
+ logger.warning("scan_bridge: failed to persist cursor")
79
+
80
+
81
+ def _log_promotion(record: Dict[str, Any]) -> None:
82
+ try:
83
+ PROMOTIONS_LOG.parent.mkdir(parents=True, exist_ok=True)
84
+ with PROMOTIONS_LOG.open("a", encoding="utf-8") as fh:
85
+ fh.write(json.dumps(record) + "\n")
86
+ except OSError: # pragma: no cover — best-effort
87
+ pass
88
+
89
+
90
+ # ── Filtering ─────────────────────────────────────────────────────────
91
+
92
+
93
+ @dataclass
94
+ class _FilterStats:
95
+ considered: int = 0
96
+ rejected_classification: int = 0
97
+ rejected_confidence: int = 0
98
+ rejected_dedup: int = 0
99
+ promoted: int = 0
100
+
101
+
102
+ def _passes_strict_gate(
103
+ signal: Dict[str, Any],
104
+ *,
105
+ confidence_floor: float,
106
+ stats: _FilterStats,
107
+ ) -> Tuple[bool, str]:
108
+ """Return ``(passes, reason)``. ``reason`` is "" on pass."""
109
+ classification = (signal.get("classification") or "").strip().lower()
110
+ if classification != "strategic":
111
+ stats.rejected_classification += 1
112
+ return False, f"classification={classification or 'missing'}"
113
+ try:
114
+ confidence = float(signal.get("confidence") or 0.0)
115
+ except (TypeError, ValueError):
116
+ confidence = 0.0
117
+ if confidence < confidence_floor:
118
+ stats.rejected_confidence += 1
119
+ return False, f"confidence={confidence:.2f}<{confidence_floor:.2f}"
120
+ return True, ""
121
+
122
+
123
+ # ── Promotion path ────────────────────────────────────────────────────
124
+
125
+
126
+ def _build_title(signal: Dict[str, Any]) -> str:
127
+ snippet = (signal.get("content_snippet") or "").strip()
128
+ # If the snippet starts with a "[TAG] head" prefix the tag + head
129
+ # makes the most readable title. Otherwise fall back to the first
130
+ # 80 chars of the snippet.
131
+ if snippet.startswith("["):
132
+ head = snippet.split("\n", 1)[0]
133
+ if len(head) > 120:
134
+ head = head[:117] + "..."
135
+ return f"STRATEGIC: {head}"
136
+ if len(snippet) > 100:
137
+ snippet = snippet[:97] + "..."
138
+ return f"STRATEGIC: {snippet}" if snippet else "STRATEGIC: (no snippet)"
139
+
140
+
141
+ def _build_item(signal: Dict[str, Any]) -> Dict[str, Any]:
142
+ platform = signal.get("platform") or ""
143
+ canonical_url = signal.get("canonical_url") or ""
144
+ snippet = (signal.get("content_snippet") or "")[:280]
145
+ confidence = float(signal.get("confidence") or 0.0)
146
+ first_seen = signal.get("first_seen") or ""
147
+ source_id = signal.get("source_id") or signal.get("fingerprint") or ""
148
+
149
+ fingerprint_set = sorted(extract_topic_fingerprint(signal))
150
+
151
+ description = (
152
+ f"Auto-promoted from {platform} signal at {confidence:.2f}: "
153
+ f"{snippet}\n\nURL: {canonical_url or '(none)'}"
154
+ )
155
+ context_text = (
156
+ f"Captured by delimit_social_target on {first_seen}. "
157
+ "Panel-approved auto-promote (LED-1264) per deliberation 2026-05-07. "
158
+ "Founder reviews via daily digest."
159
+ )
160
+
161
+ return {
162
+ "title": _build_title(signal),
163
+ "ledger": "strategy",
164
+ "type": "strategy",
165
+ "priority": "P2",
166
+ "description": description,
167
+ "context": context_text,
168
+ "tags": ["auto_promoted", "scan_bridge", platform] if platform else ["auto_promoted", "scan_bridge"],
169
+ "source": "scan_bridge_auto",
170
+ "metadata_signal_ref": {
171
+ "platform": platform,
172
+ "source_id": source_id,
173
+ "fingerprint": fingerprint_set,
174
+ "first_seen": first_seen,
175
+ "confidence": confidence,
176
+ "canonical_url": canonical_url,
177
+ },
178
+ }
179
+
180
+
181
+ @contextmanager
182
+ def _signal_promote_bypass():
183
+ """Set ``_DELIMIT_SIGNAL_PROMOTED_BY`` so the LED-877 guard treats
184
+ this as the explicit promote path. Defensive against future source
185
+ name changes — guard currently allows ``scan_bridge_auto`` since it
186
+ doesn't start with the sensed prefixes, but this future-proofs.
187
+ """
188
+ key = "_DELIMIT_SIGNAL_PROMOTED_BY"
189
+ prev = os.environ.get(key)
190
+ os.environ[key] = "scan_bridge:LED-1264"
191
+ try:
192
+ yield
193
+ finally:
194
+ if prev is None:
195
+ os.environ.pop(key, None)
196
+ else:
197
+ os.environ[key] = prev
198
+
199
+
200
+ def _add_to_strategy_ledger(item: Dict[str, Any]) -> Dict[str, Any]:
201
+ """Direct in-process call to ``ledger_manager.add_item``.
202
+
203
+ The ledger module currently doesn't accept a ``metadata`` kwarg, so
204
+ we splice signal_ref into the description as a fenced JSON block AND
205
+ embed the fingerprint tokens into the tags list. Future ledger
206
+ schema enhancements that add a metadata column should swap this in
207
+ without changing the caller surface.
208
+ """
209
+ from ai.ledger_manager import add_item
210
+
211
+ signal_ref = item.pop("metadata_signal_ref", {})
212
+ fp_tokens = signal_ref.get("fingerprint") or []
213
+ fingerprint_tags = [f"fp:{t}" for t in fp_tokens][:8] # cap to keep tag list sane
214
+
215
+ # Append fenced JSON to description so tools that read raw description
216
+ # can recover the signal_ref structurally; the dedup module already
217
+ # falls back to extracting fingerprints from description text when
218
+ # the structured field is missing, so this is also recoverable.
219
+ sref_block = "\n\nsignal_ref:\n```json\n" + json.dumps(signal_ref, ensure_ascii=False, sort_keys=True) + "\n```"
220
+ item["description"] = item.get("description", "") + sref_block
221
+ item["tags"] = list(item.get("tags") or []) + fingerprint_tags
222
+
223
+ with _signal_promote_bypass():
224
+ return add_item(**item)
225
+
226
+
227
+ # ── Public API ────────────────────────────────────────────────────────
228
+
229
+
230
+ def _iter_signals(targets_file: Path = TARGETS_FILE) -> Iterable[Dict[str, Any]]:
231
+ if not targets_file.exists():
232
+ return
233
+ try:
234
+ with targets_file.open("r", encoding="utf-8") as fh:
235
+ for line in fh:
236
+ line = line.strip()
237
+ if not line:
238
+ continue
239
+ try:
240
+ yield json.loads(line)
241
+ except (json.JSONDecodeError, ValueError):
242
+ continue
243
+ except OSError as exc: # pragma: no cover
244
+ logger.warning("scan_bridge: failed to read %s: %s", targets_file, exc)
245
+
246
+
247
+ def _normalize_first_seen(value: Any) -> str:
248
+ """Return a comparable string. Empty string sorts before anything."""
249
+ if not value:
250
+ return ""
251
+ return str(value)
252
+
253
+
254
+ def promote_recent_signals(
255
+ since: Optional[datetime] = None,
256
+ *,
257
+ dry_run: bool = False,
258
+ targets_file: Optional[Path] = None,
259
+ confidence_floor: Optional[float] = None,
260
+ candidates: Optional[Iterable[Dict[str, Any]]] = None,
261
+ ) -> Dict[str, Any]:
262
+ """Process scanned signals from ``targets_file`` and promote
263
+ survivors of the strict gate to the strategy ledger.
264
+
265
+ Parameters
266
+ ----------
267
+ since:
268
+ Optional cutoff. Defaults to the persisted cursor; falls back to
269
+ 24h ago when no cursor exists.
270
+ dry_run:
271
+ When True no ledger writes happen; the response still contains
272
+ the would-be promotions for audit / preview.
273
+ targets_file:
274
+ Override the default ``social_targets.jsonl`` path (test hook).
275
+ confidence_floor:
276
+ Override the env-resolved floor (test hook).
277
+ candidates:
278
+ Override the strategy-ledger candidate list for dedup (test
279
+ hook). When omitted we fetch live items inside ``is_duplicate``.
280
+
281
+ Returns
282
+ -------
283
+ dict with keys: ``stats``, ``promoted`` (list of {item_id,
284
+ signal_fingerprint, title, snippet}), ``cursor_advanced_to``,
285
+ ``dry_run``.
286
+ """
287
+ targets_file = targets_file or TARGETS_FILE
288
+ floor = confidence_floor if confidence_floor is not None else _confidence_floor()
289
+
290
+ cursor_value = _load_cursor()
291
+ if since is not None:
292
+ # Caller-supplied since: take the LATER of since vs cursor so we
293
+ # never reprocess a row we've already promoted.
294
+ since_iso = since.astimezone(timezone.utc).isoformat()
295
+ if cursor_value and cursor_value > since_iso:
296
+ since_iso = cursor_value
297
+ else:
298
+ if cursor_value:
299
+ since_iso = cursor_value
300
+ else:
301
+ since_iso = (datetime.now(timezone.utc) - timedelta(hours=24)).isoformat()
302
+
303
+ stats = _FilterStats()
304
+ promoted: List[Dict[str, Any]] = []
305
+ max_seen = since_iso
306
+
307
+ # Resolve candidates ONCE per run for performance — production calls
308
+ # don't pass it; we hand the live list to is_duplicate as a static
309
+ # snapshot so 1000 signals don't trigger 1000 ledger walks.
310
+ if candidates is None:
311
+ snapshot = list(_candidate_strategy_items(window_days=60))
312
+ else:
313
+ snapshot = list(candidates)
314
+ # We'll mutate snapshot during the run so an early-batch promotion
315
+ # blocks a later-batch duplicate within the same invocation.
316
+ live_snapshot: List[Dict[str, Any]] = list(snapshot)
317
+
318
+ # Process newest-first within the batch so when two signals about
319
+ # the same topic appear (e.g. oasdiff v1.15.0-beta + v1.15.2), the
320
+ # MOST RECENT version wins. The earlier versions then dedup against
321
+ # the newer item — which is what the founder wants in the digest.
322
+ # We still advance the cursor to the max first_seen across the run
323
+ # so the next call only considers genuinely-new rows.
324
+ queued: List[Dict[str, Any]] = []
325
+ for signal in _iter_signals(targets_file):
326
+ first_seen = _normalize_first_seen(signal.get("first_seen"))
327
+ if first_seen <= since_iso:
328
+ continue
329
+ queued.append((first_seen, signal))
330
+ queued.sort(key=lambda pair: pair[0], reverse=True)
331
+
332
+ for first_seen, signal in queued:
333
+ stats.considered += 1
334
+ if first_seen > max_seen:
335
+ max_seen = first_seen
336
+
337
+ passes, reason = _passes_strict_gate(
338
+ signal, confidence_floor=floor, stats=stats
339
+ )
340
+ if not passes:
341
+ continue
342
+
343
+ match = is_duplicate(signal, window_days=60, candidates=live_snapshot)
344
+ if match is not None:
345
+ stats.rejected_dedup += 1
346
+ continue
347
+
348
+ if dry_run:
349
+ stats.promoted += 1
350
+ promoted.append({
351
+ "item_id": "DRY-RUN",
352
+ "signal_fingerprint": signal.get("fingerprint"),
353
+ "title": _build_title(signal),
354
+ "snippet": (signal.get("content_snippet") or "")[:200],
355
+ "confidence": signal.get("confidence"),
356
+ "platform": signal.get("platform"),
357
+ "canonical_url": signal.get("canonical_url"),
358
+ "first_seen": first_seen,
359
+ })
360
+ # Mirror within-batch dedup behaviour even in dry-run so the
361
+ # preview count matches what a real run would write. Build a
362
+ # synthetic ledger-shaped item carrying the signal's
363
+ # fingerprint tokens.
364
+ tokens = sorted(extract_topic_fingerprint(signal))
365
+ now_iso = datetime.now(timezone.utc).isoformat()
366
+ live_snapshot.append({
367
+ "id": "DRY-RUN",
368
+ "status": "open",
369
+ "title": _build_title(signal),
370
+ "description": (signal.get("content_snippet") or ""),
371
+ "context": "",
372
+ "tags": [],
373
+ "created_at": now_iso,
374
+ "updated_at": now_iso,
375
+ "metadata": {"signal_ref": {"fingerprint": tokens}},
376
+ })
377
+ continue
378
+
379
+ item = _build_item(signal)
380
+ # Capture the signal_ref before _add_to_strategy_ledger pops it
381
+ # off the item dict — we need it for the within-batch snapshot
382
+ # append below so subsequent signals can dedup against this one.
383
+ captured_signal_ref = item.get("metadata_signal_ref") or {}
384
+ try:
385
+ result = _add_to_strategy_ledger(item)
386
+ except Exception as exc:
387
+ logger.exception("scan_bridge: ledger add failed for %s", signal.get("fingerprint"))
388
+ continue
389
+ added = result.get("added") or {}
390
+ item_id = added.get("id") or ""
391
+ stats.promoted += 1
392
+ record = {
393
+ "ts": datetime.now(timezone.utc).isoformat(),
394
+ "item_id": item_id,
395
+ "signal_fingerprint": signal.get("fingerprint"),
396
+ "title": item["title"],
397
+ "platform": signal.get("platform"),
398
+ "confidence": signal.get("confidence"),
399
+ "canonical_url": signal.get("canonical_url"),
400
+ "first_seen": first_seen,
401
+ }
402
+ _log_promotion(record)
403
+ promoted.append({
404
+ "item_id": item_id,
405
+ "signal_fingerprint": signal.get("fingerprint"),
406
+ "title": item["title"],
407
+ "snippet": (signal.get("content_snippet") or "")[:200],
408
+ "confidence": signal.get("confidence"),
409
+ "platform": signal.get("platform"),
410
+ "canonical_url": signal.get("canonical_url"),
411
+ "first_seen": first_seen,
412
+ })
413
+ # Add the freshly-promoted item to the in-memory snapshot so any
414
+ # later-but-similar signal in the same batch is correctly
415
+ # de-duplicated.
416
+ now_iso = datetime.now(timezone.utc).isoformat()
417
+ live_snapshot.append({
418
+ "id": item_id,
419
+ "status": "open",
420
+ "title": item["title"],
421
+ "description": item["description"],
422
+ "context": item.get("context", ""),
423
+ "tags": item.get("tags") or [],
424
+ "created_at": now_iso,
425
+ "updated_at": now_iso,
426
+ "metadata": {"signal_ref": captured_signal_ref},
427
+ })
428
+
429
+ # Advance cursor on success — only when not a dry-run.
430
+ if not dry_run and max_seen and max_seen != since_iso:
431
+ _save_cursor(max_seen)
432
+
433
+ return {
434
+ "stats": {
435
+ "considered": stats.considered,
436
+ "rejected_classification": stats.rejected_classification,
437
+ "rejected_confidence": stats.rejected_confidence,
438
+ "rejected_dedup": stats.rejected_dedup,
439
+ "promoted": stats.promoted,
440
+ },
441
+ "promoted": promoted,
442
+ "cursor_advanced_to": max_seen if (not dry_run and max_seen != since_iso) else None,
443
+ "since": since_iso,
444
+ "dry_run": dry_run,
445
+ "confidence_floor": floor,
446
+ }
447
+
448
+
449
+ def backfill_from(
450
+ start_date: date,
451
+ *,
452
+ dry_run: bool = False,
453
+ targets_file: Optional[Path] = None,
454
+ candidates: Optional[Iterable[Dict[str, Any]]] = None,
455
+ ) -> Dict[str, Any]:
456
+ """Walk ``targets_file`` from ``start_date`` (UTC) forward and
457
+ promote everything that passes the strict gate.
458
+
459
+ Idempotent — leverages the same cursor as ``promote_recent_signals``
460
+ so re-running on the same range is a no-op (or a delta-only run if
461
+ the file has grown).
462
+
463
+ Per the directive: surface the candidate counts so the founder sees
464
+ how much real signal was captured but never promoted before this
465
+ bridge existed.
466
+ """
467
+ since_dt = datetime.combine(start_date, datetime.min.time(), tzinfo=timezone.utc)
468
+ return promote_recent_signals(
469
+ since=since_dt,
470
+ dry_run=dry_run,
471
+ targets_file=targets_file,
472
+ candidates=candidates,
473
+ )