delimit-cli 4.5.5 → 4.5.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/gateway/ai/license.py
CHANGED
|
@@ -26,10 +26,14 @@ try:
|
|
|
26
26
|
# Autonomous build loop
|
|
27
27
|
"delimit_next_task", "delimit_task_complete",
|
|
28
28
|
"delimit_loop_status", "delimit_loop_config",
|
|
29
|
+
# LED-1253: vendor-news riff MCP wrappers
|
|
30
|
+
"delimit_vendor_news_scan", "delimit_vendor_news_health",
|
|
31
|
+
"delimit_vendor_news_draft",
|
|
29
32
|
})
|
|
30
33
|
except ImportError:
|
|
31
34
|
# license_core not available (development mode or missing binary)
|
|
32
35
|
import json
|
|
36
|
+
import os
|
|
33
37
|
import time
|
|
34
38
|
from pathlib import Path
|
|
35
39
|
|
|
@@ -78,6 +82,9 @@ except ImportError:
|
|
|
78
82
|
# Autonomous build loop
|
|
79
83
|
"delimit_next_task", "delimit_task_complete",
|
|
80
84
|
"delimit_loop_status", "delimit_loop_config",
|
|
85
|
+
# LED-1253: vendor-news riff MCP wrappers
|
|
86
|
+
"delimit_vendor_news_scan", "delimit_vendor_news_health",
|
|
87
|
+
"delimit_vendor_news_draft",
|
|
81
88
|
})
|
|
82
89
|
FREE_TRIAL_LIMITS = {"delimit_deliberate": 3}
|
|
83
90
|
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
"""LED-1264: scan → strategy-ledger auto-promote bridge.
|
|
2
|
+
|
|
3
|
+
Pure consumer of ``~/.delimit/social_targets.jsonl`` (the existing
|
|
4
|
+
``delimit_social_target`` output). Promotes a tightly-gated subset of
|
|
5
|
+
strategic signals into the strategy ledger so the founder reviews them
|
|
6
|
+
via a daily digest instead of inbox-spam pings.
|
|
7
|
+
|
|
8
|
+
Panel decision (UNANIMOUS R3, 2026-05-07): tight guards
|
|
9
|
+
(strategic + confidence ≥ 0.85 + dedup against open / 60-day-closed),
|
|
10
|
+
P2 priority (review, not auto-action), one daily digest email.
|
|
11
|
+
|
|
12
|
+
Public entry points:
|
|
13
|
+
|
|
14
|
+
- :func:`bridge.promote_recent_signals` — main work function
|
|
15
|
+
- :func:`digest.build_daily_digest` — assemble last-24h digest text
|
|
16
|
+
- :func:`bridge.backfill_from` — one-time idempotent backfill walker
|
|
17
|
+
|
|
18
|
+
The bridge is invoked by ``scripts/scan_bridge_cron.py`` on a 6-hour
|
|
19
|
+
crontab cadence (founder applies manually). Direct in-process calls to
|
|
20
|
+
``ai.ledger_manager.add_item`` — no MCP subprocess.
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
from ai.scan_bridge.bridge import (
|
|
24
|
+
backfill_from,
|
|
25
|
+
promote_recent_signals,
|
|
26
|
+
)
|
|
27
|
+
from ai.scan_bridge.dedup import (
|
|
28
|
+
extract_topic_fingerprint,
|
|
29
|
+
is_duplicate,
|
|
30
|
+
)
|
|
31
|
+
from ai.scan_bridge.digest import build_daily_digest
|
|
32
|
+
|
|
33
|
+
__all__ = [
|
|
34
|
+
"backfill_from",
|
|
35
|
+
"build_daily_digest",
|
|
36
|
+
"extract_topic_fingerprint",
|
|
37
|
+
"is_duplicate",
|
|
38
|
+
"promote_recent_signals",
|
|
39
|
+
]
|
|
@@ -0,0 +1,473 @@
|
|
|
1
|
+
"""LED-1264 scan-bridge — promotion engine.
|
|
2
|
+
|
|
3
|
+
Reads ``~/.delimit/social_targets.jsonl`` (the existing
|
|
4
|
+
``delimit_social_target`` output), filters to the tight panel-locked
|
|
5
|
+
gate, runs dedup against the strategy ledger, and promotes survivors
|
|
6
|
+
via direct in-process ``ledger_manager.add_item`` calls.
|
|
7
|
+
|
|
8
|
+
State / cursor:
|
|
9
|
+
``~/.delimit/scan_bridge_cursor.json`` records the most-recent
|
|
10
|
+
``first_seen`` value we've already processed. Subsequent runs only
|
|
11
|
+
consider lines newer than that. Idempotent — re-running the cron
|
|
12
|
+
on the same JSONL is a no-op.
|
|
13
|
+
|
|
14
|
+
Promotions log:
|
|
15
|
+
``~/.delimit/scan_bridge_promotions.jsonl`` records every successful
|
|
16
|
+
promotion (item_id, signal_fingerprint, ts) so the daily digest can
|
|
17
|
+
assemble the last-24h batch without re-walking the ledger.
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
from __future__ import annotations
|
|
21
|
+
|
|
22
|
+
import json
|
|
23
|
+
import logging
|
|
24
|
+
import os
|
|
25
|
+
from contextlib import contextmanager
|
|
26
|
+
from dataclasses import dataclass
|
|
27
|
+
from datetime import datetime, date, timedelta, timezone
|
|
28
|
+
from pathlib import Path
|
|
29
|
+
from typing import Any, Dict, Iterable, List, Optional, Tuple
|
|
30
|
+
|
|
31
|
+
from ai.scan_bridge.dedup import (
|
|
32
|
+
_candidate_strategy_items,
|
|
33
|
+
extract_topic_fingerprint,
|
|
34
|
+
is_duplicate,
|
|
35
|
+
)
|
|
36
|
+
|
|
37
|
+
logger = logging.getLogger("delimit.ai.scan_bridge.bridge")
|
|
38
|
+
|
|
39
|
+
TARGETS_FILE = Path.home() / ".delimit" / "social_targets.jsonl"
|
|
40
|
+
CURSOR_FILE = Path.home() / ".delimit" / "scan_bridge_cursor.json"
|
|
41
|
+
PROMOTIONS_LOG = Path.home() / ".delimit" / "scan_bridge_promotions.jsonl"
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def _confidence_floor() -> float:
|
|
45
|
+
"""Resolve the active confidence floor (env-overridable per directive)."""
|
|
46
|
+
raw = os.environ.get("DELIMIT_SCAN_PROMO_CONFIDENCE", "")
|
|
47
|
+
if not raw:
|
|
48
|
+
return 0.85
|
|
49
|
+
try:
|
|
50
|
+
v = float(raw)
|
|
51
|
+
if 0.0 <= v <= 1.0:
|
|
52
|
+
return v
|
|
53
|
+
except (TypeError, ValueError):
|
|
54
|
+
pass
|
|
55
|
+
return 0.85
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
# ── Cursor I/O ────────────────────────────────────────────────────────
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def _load_cursor() -> Optional[str]:
|
|
62
|
+
"""Return the most-recent ``first_seen`` we've already processed."""
|
|
63
|
+
if not CURSOR_FILE.exists():
|
|
64
|
+
return None
|
|
65
|
+
try:
|
|
66
|
+
data = json.loads(CURSOR_FILE.read_text())
|
|
67
|
+
v = data.get("last_seen_at")
|
|
68
|
+
return str(v) if v else None
|
|
69
|
+
except (OSError, ValueError, json.JSONDecodeError):
|
|
70
|
+
return None
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
def _save_cursor(last_seen_at: str) -> None:
|
|
74
|
+
try:
|
|
75
|
+
CURSOR_FILE.parent.mkdir(parents=True, exist_ok=True)
|
|
76
|
+
CURSOR_FILE.write_text(json.dumps({"last_seen_at": last_seen_at}))
|
|
77
|
+
except OSError: # pragma: no cover — best-effort
|
|
78
|
+
logger.warning("scan_bridge: failed to persist cursor")
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
def _log_promotion(record: Dict[str, Any]) -> None:
|
|
82
|
+
try:
|
|
83
|
+
PROMOTIONS_LOG.parent.mkdir(parents=True, exist_ok=True)
|
|
84
|
+
with PROMOTIONS_LOG.open("a", encoding="utf-8") as fh:
|
|
85
|
+
fh.write(json.dumps(record) + "\n")
|
|
86
|
+
except OSError: # pragma: no cover — best-effort
|
|
87
|
+
pass
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
# ── Filtering ─────────────────────────────────────────────────────────
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
@dataclass
|
|
94
|
+
class _FilterStats:
|
|
95
|
+
considered: int = 0
|
|
96
|
+
rejected_classification: int = 0
|
|
97
|
+
rejected_confidence: int = 0
|
|
98
|
+
rejected_dedup: int = 0
|
|
99
|
+
promoted: int = 0
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
def _passes_strict_gate(
|
|
103
|
+
signal: Dict[str, Any],
|
|
104
|
+
*,
|
|
105
|
+
confidence_floor: float,
|
|
106
|
+
stats: _FilterStats,
|
|
107
|
+
) -> Tuple[bool, str]:
|
|
108
|
+
"""Return ``(passes, reason)``. ``reason`` is "" on pass."""
|
|
109
|
+
classification = (signal.get("classification") or "").strip().lower()
|
|
110
|
+
if classification != "strategic":
|
|
111
|
+
stats.rejected_classification += 1
|
|
112
|
+
return False, f"classification={classification or 'missing'}"
|
|
113
|
+
try:
|
|
114
|
+
confidence = float(signal.get("confidence") or 0.0)
|
|
115
|
+
except (TypeError, ValueError):
|
|
116
|
+
confidence = 0.0
|
|
117
|
+
if confidence < confidence_floor:
|
|
118
|
+
stats.rejected_confidence += 1
|
|
119
|
+
return False, f"confidence={confidence:.2f}<{confidence_floor:.2f}"
|
|
120
|
+
return True, ""
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
# ── Promotion path ────────────────────────────────────────────────────
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
def _build_title(signal: Dict[str, Any]) -> str:
|
|
127
|
+
snippet = (signal.get("content_snippet") or "").strip()
|
|
128
|
+
# If the snippet starts with a "[TAG] head" prefix the tag + head
|
|
129
|
+
# makes the most readable title. Otherwise fall back to the first
|
|
130
|
+
# 80 chars of the snippet.
|
|
131
|
+
if snippet.startswith("["):
|
|
132
|
+
head = snippet.split("\n", 1)[0]
|
|
133
|
+
if len(head) > 120:
|
|
134
|
+
head = head[:117] + "..."
|
|
135
|
+
return f"STRATEGIC: {head}"
|
|
136
|
+
if len(snippet) > 100:
|
|
137
|
+
snippet = snippet[:97] + "..."
|
|
138
|
+
return f"STRATEGIC: {snippet}" if snippet else "STRATEGIC: (no snippet)"
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
def _build_item(signal: Dict[str, Any]) -> Dict[str, Any]:
|
|
142
|
+
platform = signal.get("platform") or ""
|
|
143
|
+
canonical_url = signal.get("canonical_url") or ""
|
|
144
|
+
snippet = (signal.get("content_snippet") or "")[:280]
|
|
145
|
+
confidence = float(signal.get("confidence") or 0.0)
|
|
146
|
+
first_seen = signal.get("first_seen") or ""
|
|
147
|
+
source_id = signal.get("source_id") or signal.get("fingerprint") or ""
|
|
148
|
+
|
|
149
|
+
fingerprint_set = sorted(extract_topic_fingerprint(signal))
|
|
150
|
+
|
|
151
|
+
description = (
|
|
152
|
+
f"Auto-promoted from {platform} signal at {confidence:.2f}: "
|
|
153
|
+
f"{snippet}\n\nURL: {canonical_url or '(none)'}"
|
|
154
|
+
)
|
|
155
|
+
context_text = (
|
|
156
|
+
f"Captured by delimit_social_target on {first_seen}. "
|
|
157
|
+
"Panel-approved auto-promote (LED-1264) per deliberation 2026-05-07. "
|
|
158
|
+
"Founder reviews via daily digest."
|
|
159
|
+
)
|
|
160
|
+
|
|
161
|
+
return {
|
|
162
|
+
"title": _build_title(signal),
|
|
163
|
+
"ledger": "strategy",
|
|
164
|
+
"type": "strategy",
|
|
165
|
+
"priority": "P2",
|
|
166
|
+
"description": description,
|
|
167
|
+
"context": context_text,
|
|
168
|
+
"tags": ["auto_promoted", "scan_bridge", platform] if platform else ["auto_promoted", "scan_bridge"],
|
|
169
|
+
"source": "scan_bridge_auto",
|
|
170
|
+
"metadata_signal_ref": {
|
|
171
|
+
"platform": platform,
|
|
172
|
+
"source_id": source_id,
|
|
173
|
+
"fingerprint": fingerprint_set,
|
|
174
|
+
"first_seen": first_seen,
|
|
175
|
+
"confidence": confidence,
|
|
176
|
+
"canonical_url": canonical_url,
|
|
177
|
+
},
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
|
|
181
|
+
@contextmanager
|
|
182
|
+
def _signal_promote_bypass():
|
|
183
|
+
"""Set ``_DELIMIT_SIGNAL_PROMOTED_BY`` so the LED-877 guard treats
|
|
184
|
+
this as the explicit promote path. Defensive against future source
|
|
185
|
+
name changes — guard currently allows ``scan_bridge_auto`` since it
|
|
186
|
+
doesn't start with the sensed prefixes, but this future-proofs.
|
|
187
|
+
"""
|
|
188
|
+
key = "_DELIMIT_SIGNAL_PROMOTED_BY"
|
|
189
|
+
prev = os.environ.get(key)
|
|
190
|
+
os.environ[key] = "scan_bridge:LED-1264"
|
|
191
|
+
try:
|
|
192
|
+
yield
|
|
193
|
+
finally:
|
|
194
|
+
if prev is None:
|
|
195
|
+
os.environ.pop(key, None)
|
|
196
|
+
else:
|
|
197
|
+
os.environ[key] = prev
|
|
198
|
+
|
|
199
|
+
|
|
200
|
+
def _add_to_strategy_ledger(item: Dict[str, Any]) -> Dict[str, Any]:
|
|
201
|
+
"""Direct in-process call to ``ledger_manager.add_item``.
|
|
202
|
+
|
|
203
|
+
The ledger module currently doesn't accept a ``metadata`` kwarg, so
|
|
204
|
+
we splice signal_ref into the description as a fenced JSON block AND
|
|
205
|
+
embed the fingerprint tokens into the tags list. Future ledger
|
|
206
|
+
schema enhancements that add a metadata column should swap this in
|
|
207
|
+
without changing the caller surface.
|
|
208
|
+
"""
|
|
209
|
+
from ai.ledger_manager import add_item
|
|
210
|
+
|
|
211
|
+
signal_ref = item.pop("metadata_signal_ref", {})
|
|
212
|
+
fp_tokens = signal_ref.get("fingerprint") or []
|
|
213
|
+
fingerprint_tags = [f"fp:{t}" for t in fp_tokens][:8] # cap to keep tag list sane
|
|
214
|
+
|
|
215
|
+
# Append fenced JSON to description so tools that read raw description
|
|
216
|
+
# can recover the signal_ref structurally; the dedup module already
|
|
217
|
+
# falls back to extracting fingerprints from description text when
|
|
218
|
+
# the structured field is missing, so this is also recoverable.
|
|
219
|
+
sref_block = "\n\nsignal_ref:\n```json\n" + json.dumps(signal_ref, ensure_ascii=False, sort_keys=True) + "\n```"
|
|
220
|
+
item["description"] = item.get("description", "") + sref_block
|
|
221
|
+
item["tags"] = list(item.get("tags") or []) + fingerprint_tags
|
|
222
|
+
|
|
223
|
+
with _signal_promote_bypass():
|
|
224
|
+
return add_item(**item)
|
|
225
|
+
|
|
226
|
+
|
|
227
|
+
# ── Public API ────────────────────────────────────────────────────────
|
|
228
|
+
|
|
229
|
+
|
|
230
|
+
def _iter_signals(targets_file: Path = TARGETS_FILE) -> Iterable[Dict[str, Any]]:
|
|
231
|
+
if not targets_file.exists():
|
|
232
|
+
return
|
|
233
|
+
try:
|
|
234
|
+
with targets_file.open("r", encoding="utf-8") as fh:
|
|
235
|
+
for line in fh:
|
|
236
|
+
line = line.strip()
|
|
237
|
+
if not line:
|
|
238
|
+
continue
|
|
239
|
+
try:
|
|
240
|
+
yield json.loads(line)
|
|
241
|
+
except (json.JSONDecodeError, ValueError):
|
|
242
|
+
continue
|
|
243
|
+
except OSError as exc: # pragma: no cover
|
|
244
|
+
logger.warning("scan_bridge: failed to read %s: %s", targets_file, exc)
|
|
245
|
+
|
|
246
|
+
|
|
247
|
+
def _normalize_first_seen(value: Any) -> str:
|
|
248
|
+
"""Return a comparable string. Empty string sorts before anything."""
|
|
249
|
+
if not value:
|
|
250
|
+
return ""
|
|
251
|
+
return str(value)
|
|
252
|
+
|
|
253
|
+
|
|
254
|
+
def promote_recent_signals(
|
|
255
|
+
since: Optional[datetime] = None,
|
|
256
|
+
*,
|
|
257
|
+
dry_run: bool = False,
|
|
258
|
+
targets_file: Optional[Path] = None,
|
|
259
|
+
confidence_floor: Optional[float] = None,
|
|
260
|
+
candidates: Optional[Iterable[Dict[str, Any]]] = None,
|
|
261
|
+
) -> Dict[str, Any]:
|
|
262
|
+
"""Process scanned signals from ``targets_file`` and promote
|
|
263
|
+
survivors of the strict gate to the strategy ledger.
|
|
264
|
+
|
|
265
|
+
Parameters
|
|
266
|
+
----------
|
|
267
|
+
since:
|
|
268
|
+
Optional cutoff. Defaults to the persisted cursor; falls back to
|
|
269
|
+
24h ago when no cursor exists.
|
|
270
|
+
dry_run:
|
|
271
|
+
When True no ledger writes happen; the response still contains
|
|
272
|
+
the would-be promotions for audit / preview.
|
|
273
|
+
targets_file:
|
|
274
|
+
Override the default ``social_targets.jsonl`` path (test hook).
|
|
275
|
+
confidence_floor:
|
|
276
|
+
Override the env-resolved floor (test hook).
|
|
277
|
+
candidates:
|
|
278
|
+
Override the strategy-ledger candidate list for dedup (test
|
|
279
|
+
hook). When omitted we fetch live items inside ``is_duplicate``.
|
|
280
|
+
|
|
281
|
+
Returns
|
|
282
|
+
-------
|
|
283
|
+
dict with keys: ``stats``, ``promoted`` (list of {item_id,
|
|
284
|
+
signal_fingerprint, title, snippet}), ``cursor_advanced_to``,
|
|
285
|
+
``dry_run``.
|
|
286
|
+
"""
|
|
287
|
+
targets_file = targets_file or TARGETS_FILE
|
|
288
|
+
floor = confidence_floor if confidence_floor is not None else _confidence_floor()
|
|
289
|
+
|
|
290
|
+
cursor_value = _load_cursor()
|
|
291
|
+
if since is not None:
|
|
292
|
+
# Caller-supplied since: take the LATER of since vs cursor so we
|
|
293
|
+
# never reprocess a row we've already promoted.
|
|
294
|
+
since_iso = since.astimezone(timezone.utc).isoformat()
|
|
295
|
+
if cursor_value and cursor_value > since_iso:
|
|
296
|
+
since_iso = cursor_value
|
|
297
|
+
else:
|
|
298
|
+
if cursor_value:
|
|
299
|
+
since_iso = cursor_value
|
|
300
|
+
else:
|
|
301
|
+
since_iso = (datetime.now(timezone.utc) - timedelta(hours=24)).isoformat()
|
|
302
|
+
|
|
303
|
+
stats = _FilterStats()
|
|
304
|
+
promoted: List[Dict[str, Any]] = []
|
|
305
|
+
max_seen = since_iso
|
|
306
|
+
|
|
307
|
+
# Resolve candidates ONCE per run for performance — production calls
|
|
308
|
+
# don't pass it; we hand the live list to is_duplicate as a static
|
|
309
|
+
# snapshot so 1000 signals don't trigger 1000 ledger walks.
|
|
310
|
+
if candidates is None:
|
|
311
|
+
snapshot = list(_candidate_strategy_items(window_days=60))
|
|
312
|
+
else:
|
|
313
|
+
snapshot = list(candidates)
|
|
314
|
+
# We'll mutate snapshot during the run so an early-batch promotion
|
|
315
|
+
# blocks a later-batch duplicate within the same invocation.
|
|
316
|
+
live_snapshot: List[Dict[str, Any]] = list(snapshot)
|
|
317
|
+
|
|
318
|
+
# Process newest-first within the batch so when two signals about
|
|
319
|
+
# the same topic appear (e.g. oasdiff v1.15.0-beta + v1.15.2), the
|
|
320
|
+
# MOST RECENT version wins. The earlier versions then dedup against
|
|
321
|
+
# the newer item — which is what the founder wants in the digest.
|
|
322
|
+
# We still advance the cursor to the max first_seen across the run
|
|
323
|
+
# so the next call only considers genuinely-new rows.
|
|
324
|
+
queued: List[Dict[str, Any]] = []
|
|
325
|
+
for signal in _iter_signals(targets_file):
|
|
326
|
+
first_seen = _normalize_first_seen(signal.get("first_seen"))
|
|
327
|
+
if first_seen <= since_iso:
|
|
328
|
+
continue
|
|
329
|
+
queued.append((first_seen, signal))
|
|
330
|
+
queued.sort(key=lambda pair: pair[0], reverse=True)
|
|
331
|
+
|
|
332
|
+
for first_seen, signal in queued:
|
|
333
|
+
stats.considered += 1
|
|
334
|
+
if first_seen > max_seen:
|
|
335
|
+
max_seen = first_seen
|
|
336
|
+
|
|
337
|
+
passes, reason = _passes_strict_gate(
|
|
338
|
+
signal, confidence_floor=floor, stats=stats
|
|
339
|
+
)
|
|
340
|
+
if not passes:
|
|
341
|
+
continue
|
|
342
|
+
|
|
343
|
+
match = is_duplicate(signal, window_days=60, candidates=live_snapshot)
|
|
344
|
+
if match is not None:
|
|
345
|
+
stats.rejected_dedup += 1
|
|
346
|
+
continue
|
|
347
|
+
|
|
348
|
+
if dry_run:
|
|
349
|
+
stats.promoted += 1
|
|
350
|
+
promoted.append({
|
|
351
|
+
"item_id": "DRY-RUN",
|
|
352
|
+
"signal_fingerprint": signal.get("fingerprint"),
|
|
353
|
+
"title": _build_title(signal),
|
|
354
|
+
"snippet": (signal.get("content_snippet") or "")[:200],
|
|
355
|
+
"confidence": signal.get("confidence"),
|
|
356
|
+
"platform": signal.get("platform"),
|
|
357
|
+
"canonical_url": signal.get("canonical_url"),
|
|
358
|
+
"first_seen": first_seen,
|
|
359
|
+
})
|
|
360
|
+
# Mirror within-batch dedup behaviour even in dry-run so the
|
|
361
|
+
# preview count matches what a real run would write. Build a
|
|
362
|
+
# synthetic ledger-shaped item carrying the signal's
|
|
363
|
+
# fingerprint tokens.
|
|
364
|
+
tokens = sorted(extract_topic_fingerprint(signal))
|
|
365
|
+
now_iso = datetime.now(timezone.utc).isoformat()
|
|
366
|
+
live_snapshot.append({
|
|
367
|
+
"id": "DRY-RUN",
|
|
368
|
+
"status": "open",
|
|
369
|
+
"title": _build_title(signal),
|
|
370
|
+
"description": (signal.get("content_snippet") or ""),
|
|
371
|
+
"context": "",
|
|
372
|
+
"tags": [],
|
|
373
|
+
"created_at": now_iso,
|
|
374
|
+
"updated_at": now_iso,
|
|
375
|
+
"metadata": {"signal_ref": {"fingerprint": tokens}},
|
|
376
|
+
})
|
|
377
|
+
continue
|
|
378
|
+
|
|
379
|
+
item = _build_item(signal)
|
|
380
|
+
# Capture the signal_ref before _add_to_strategy_ledger pops it
|
|
381
|
+
# off the item dict — we need it for the within-batch snapshot
|
|
382
|
+
# append below so subsequent signals can dedup against this one.
|
|
383
|
+
captured_signal_ref = item.get("metadata_signal_ref") or {}
|
|
384
|
+
try:
|
|
385
|
+
result = _add_to_strategy_ledger(item)
|
|
386
|
+
except Exception as exc:
|
|
387
|
+
logger.exception("scan_bridge: ledger add failed for %s", signal.get("fingerprint"))
|
|
388
|
+
continue
|
|
389
|
+
added = result.get("added") or {}
|
|
390
|
+
item_id = added.get("id") or ""
|
|
391
|
+
stats.promoted += 1
|
|
392
|
+
record = {
|
|
393
|
+
"ts": datetime.now(timezone.utc).isoformat(),
|
|
394
|
+
"item_id": item_id,
|
|
395
|
+
"signal_fingerprint": signal.get("fingerprint"),
|
|
396
|
+
"title": item["title"],
|
|
397
|
+
"platform": signal.get("platform"),
|
|
398
|
+
"confidence": signal.get("confidence"),
|
|
399
|
+
"canonical_url": signal.get("canonical_url"),
|
|
400
|
+
"first_seen": first_seen,
|
|
401
|
+
}
|
|
402
|
+
_log_promotion(record)
|
|
403
|
+
promoted.append({
|
|
404
|
+
"item_id": item_id,
|
|
405
|
+
"signal_fingerprint": signal.get("fingerprint"),
|
|
406
|
+
"title": item["title"],
|
|
407
|
+
"snippet": (signal.get("content_snippet") or "")[:200],
|
|
408
|
+
"confidence": signal.get("confidence"),
|
|
409
|
+
"platform": signal.get("platform"),
|
|
410
|
+
"canonical_url": signal.get("canonical_url"),
|
|
411
|
+
"first_seen": first_seen,
|
|
412
|
+
})
|
|
413
|
+
# Add the freshly-promoted item to the in-memory snapshot so any
|
|
414
|
+
# later-but-similar signal in the same batch is correctly
|
|
415
|
+
# de-duplicated.
|
|
416
|
+
now_iso = datetime.now(timezone.utc).isoformat()
|
|
417
|
+
live_snapshot.append({
|
|
418
|
+
"id": item_id,
|
|
419
|
+
"status": "open",
|
|
420
|
+
"title": item["title"],
|
|
421
|
+
"description": item["description"],
|
|
422
|
+
"context": item.get("context", ""),
|
|
423
|
+
"tags": item.get("tags") or [],
|
|
424
|
+
"created_at": now_iso,
|
|
425
|
+
"updated_at": now_iso,
|
|
426
|
+
"metadata": {"signal_ref": captured_signal_ref},
|
|
427
|
+
})
|
|
428
|
+
|
|
429
|
+
# Advance cursor on success — only when not a dry-run.
|
|
430
|
+
if not dry_run and max_seen and max_seen != since_iso:
|
|
431
|
+
_save_cursor(max_seen)
|
|
432
|
+
|
|
433
|
+
return {
|
|
434
|
+
"stats": {
|
|
435
|
+
"considered": stats.considered,
|
|
436
|
+
"rejected_classification": stats.rejected_classification,
|
|
437
|
+
"rejected_confidence": stats.rejected_confidence,
|
|
438
|
+
"rejected_dedup": stats.rejected_dedup,
|
|
439
|
+
"promoted": stats.promoted,
|
|
440
|
+
},
|
|
441
|
+
"promoted": promoted,
|
|
442
|
+
"cursor_advanced_to": max_seen if (not dry_run and max_seen != since_iso) else None,
|
|
443
|
+
"since": since_iso,
|
|
444
|
+
"dry_run": dry_run,
|
|
445
|
+
"confidence_floor": floor,
|
|
446
|
+
}
|
|
447
|
+
|
|
448
|
+
|
|
449
|
+
def backfill_from(
|
|
450
|
+
start_date: date,
|
|
451
|
+
*,
|
|
452
|
+
dry_run: bool = False,
|
|
453
|
+
targets_file: Optional[Path] = None,
|
|
454
|
+
candidates: Optional[Iterable[Dict[str, Any]]] = None,
|
|
455
|
+
) -> Dict[str, Any]:
|
|
456
|
+
"""Walk ``targets_file`` from ``start_date`` (UTC) forward and
|
|
457
|
+
promote everything that passes the strict gate.
|
|
458
|
+
|
|
459
|
+
Idempotent — leverages the same cursor as ``promote_recent_signals``
|
|
460
|
+
so re-running on the same range is a no-op (or a delta-only run if
|
|
461
|
+
the file has grown).
|
|
462
|
+
|
|
463
|
+
Per the directive: surface the candidate counts so the founder sees
|
|
464
|
+
how much real signal was captured but never promoted before this
|
|
465
|
+
bridge existed.
|
|
466
|
+
"""
|
|
467
|
+
since_dt = datetime.combine(start_date, datetime.min.time(), tzinfo=timezone.utc)
|
|
468
|
+
return promote_recent_signals(
|
|
469
|
+
since=since_dt,
|
|
470
|
+
dry_run=dry_run,
|
|
471
|
+
targets_file=targets_file,
|
|
472
|
+
candidates=candidates,
|
|
473
|
+
)
|