delimit-cli 4.5.1 → 4.5.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +87 -0
- package/README.md +15 -5
- package/bin/delimit-cli.js +109 -24
- package/gateway/ai/content_engine.py +3 -4
- package/gateway/ai/inbox_classifier.py +215 -0
- package/gateway/ai/integrations/opensage_wrapper.py +4 -1
- package/gateway/ai/ledger_manager.py +218 -38
- package/gateway/ai/license.py +26 -0
- package/gateway/ai/notify.py +68 -3
- package/gateway/ai/reddit_proxy.py +93 -15
- package/gateway/ai/reddit_scanner.py +36 -18
- package/gateway/ai/remote_resolve.py +422 -0
- package/gateway/ai/server.py +301 -117
- package/gateway/ai/social_capability/__init__.py +6 -0
- package/gateway/ai/social_capability/capability_validator.py +367 -0
- package/gateway/ai/social_capability/current_capabilities.yaml +95 -0
- package/gateway/ai/social_capability/fit_floor.py +360 -0
- package/gateway/ai/social_queue.py +307 -0
- package/gateway/ai/supabase_sync.py +14 -2
- package/gateway/ai/swarm.py +29 -11
- package/gateway/ai/tui.py +6 -2
- package/gateway/ai/vendor_news/__init__.py +14 -0
- package/gateway/ai/vendor_news/drafter.py +562 -0
- package/gateway/ai/vendor_news/sensor.py +509 -0
- package/gateway/ai/vendor_news/watchlist.yaml +71 -0
- package/gateway/ai/x_ranker.py +417 -0
- package/lib/attest-mcp.js +487 -0
- package/lib/attest-telemetry.js +48 -0
- package/lib/delimit-home.js +35 -0
- package/lib/delimit-template.js +14 -0
- package/package.json +25 -3
- package/scripts/postinstall.js +89 -40
- package/adapters/codex-security.js +0 -64
- package/adapters/codex-skill.js +0 -78
- package/gateway/ai/content_grounding/__init__.py +0 -98
- package/gateway/ai/content_grounding/build.py +0 -350
- package/gateway/ai/content_grounding/consume.py +0 -280
- package/gateway/ai/content_grounding/features.py +0 -218
- package/gateway/ai/content_grounding/fixtures/fail/01_missing_evidence.json +0 -9
- package/gateway/ai/content_grounding/fixtures/fail/02_unknown_evidence_prefix.json +0 -9
- package/gateway/ai/content_grounding/fixtures/fail/03_banned_comparative.json +0 -17
- package/gateway/ai/content_grounding/fixtures/fail/04_banned_adoption.json +0 -17
- package/gateway/ai/content_grounding/fixtures/fail/05_aggregate_no_numeric.json +0 -17
- package/gateway/ai/content_grounding/fixtures/fail/06_unversioned_inference_rule.json +0 -18
- package/gateway/ai/content_grounding/fixtures/pass/01_feature_shipped.json +0 -18
- package/gateway/ai/content_grounding/fixtures/pass/02_aggregate_claim.json +0 -23
- package/gateway/ai/content_grounding/fixtures/pass/03_attestation.json +0 -16
- package/gateway/ai/content_grounding/schemas/claim.schema.json +0 -40
- package/gateway/ai/content_grounding/schemas/event.schema.json +0 -23
- package/gateway/ai/content_grounding/schemas.py +0 -276
- package/gateway/ai/content_grounding/telemetry.py +0 -221
- package/gateway/ai/inbox_drafts/__init__.py +0 -61
- package/gateway/ai/inbox_drafts/registry.py +0 -412
- package/gateway/ai/inbox_drafts/schema.py +0 -374
- package/gateway/ai/inbox_executor.py +0 -565
|
@@ -0,0 +1,360 @@
|
|
|
1
|
+
"""Delimit-fit floor (LED-1240 part B — selectivity bar, 2026-05-05).
|
|
2
|
+
|
|
3
|
+
The validator (``capability_validator``) hard-fails generic-claim drafts
|
|
4
|
+
AFTER the LLM has generated them. That catches drift but burns tokens and
|
|
5
|
+
surfaces weak drafts to founder review. This module pushes the bar
|
|
6
|
+
upstream: a thread/target must pass the Delimit-fit floor BEFORE we even
|
|
7
|
+
draft for it. If the floor rejects, we abstain. Abstain > weak draft.
|
|
8
|
+
|
|
9
|
+
The fit floor accepts a thread when ANY of these holds:
|
|
10
|
+
|
|
11
|
+
1. The text matches a **Delimit-domain signal** (openapi, breaking change,
|
|
12
|
+
semver, schema diff, merge gate, mcp server, ci governance, soc 2, …).
|
|
13
|
+
2. The text matches an **AI-coding-orbit signal** (claude code, codex cli,
|
|
14
|
+
cursor, gemini cli, mcp tool, …) AND a **technical-context word**
|
|
15
|
+
(ship, merge, deploy, release, PR, review, audit, test, …). Orbit-only
|
|
16
|
+
mentions without technical context (e.g. "I love Claude Code!") are
|
|
17
|
+
not enough — that's the signal that produces noise drafts.
|
|
18
|
+
3. The engagement score is high enough that abstaining has clear
|
|
19
|
+
opportunity cost (default ≥ 50). High-engagement-but-off-topic threads
|
|
20
|
+
pass through with ``human_only=True`` so the orchestrator can choose
|
|
21
|
+
to surface them for human review without auto-drafting.
|
|
22
|
+
|
|
23
|
+
A failing thread returns a dict with ``reason="no_delimit_fit"`` and is
|
|
24
|
+
NOT drafted. Logging is the caller's job — this module is pure logic.
|
|
25
|
+
|
|
26
|
+
Topic-coverage cooldown (``recent_topic_drafted``) is a separate function
|
|
27
|
+
that consults ``social_log.jsonl`` (the same source ``x_ranker`` uses for
|
|
28
|
+
author dedupe). If we drafted on the same topic within the cooldown
|
|
29
|
+
window (default 7 days), the next thread on that topic is rejected with
|
|
30
|
+
``reason="topic_cooldown"``.
|
|
31
|
+
|
|
32
|
+
Centralizing this logic here means ``x_ranker`` (X candidate ordering) and
|
|
33
|
+
``social_target`` (cross-platform target processing) both consume the same
|
|
34
|
+
gate — no drift between surfaces.
|
|
35
|
+
"""
|
|
36
|
+
|
|
37
|
+
from __future__ import annotations
|
|
38
|
+
|
|
39
|
+
import json
|
|
40
|
+
import logging
|
|
41
|
+
import re
|
|
42
|
+
from datetime import datetime, timedelta, timezone
|
|
43
|
+
from pathlib import Path
|
|
44
|
+
from typing import Any, Dict, Iterable, Optional, Set, Tuple
|
|
45
|
+
|
|
46
|
+
logger = logging.getLogger(__name__)
|
|
47
|
+
|
|
48
|
+
# Default cooldown window for topic coverage. Founder directive (2026-05-05):
|
|
49
|
+
# "if we've already drafted on a similar topic in the last 7 days, abstain on
|
|
50
|
+
# the next one to avoid spam-pattern detection."
|
|
51
|
+
DEFAULT_COOLDOWN_DAYS = 7
|
|
52
|
+
|
|
53
|
+
# Default high-engagement opportunity-cost threshold. Threads above this score
|
|
54
|
+
# pass the fit floor even without keyword match, but with ``human_only=True``
|
|
55
|
+
# so they don't auto-draft.
|
|
56
|
+
DEFAULT_HIGH_ENGAGEMENT_FLOOR = 50.0
|
|
57
|
+
|
|
58
|
+
# Same SOCIAL_LOG path as ai.x_ranker — single source of truth for "what we
|
|
59
|
+
# recently posted to". Indirection via a getter so tests can monkeypatch.
|
|
60
|
+
SOCIAL_LOG = Path.home() / ".delimit" / "social_log.jsonl"
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def _social_log_path() -> Path:
|
|
64
|
+
"""Indirection for tests."""
|
|
65
|
+
return SOCIAL_LOG
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
# ── Signal vocabularies ──────────────────────────────────────────────
|
|
69
|
+
|
|
70
|
+
# Direct Delimit-domain signals. These are the canonical surfaces the
|
|
71
|
+
# product addresses. Any one of these is sufficient by itself.
|
|
72
|
+
#
|
|
73
|
+
# Keep this list narrow and concrete — it's the difference between
|
|
74
|
+
# "drafts that have something to say" and "drafts that name-drop a
|
|
75
|
+
# topic". Per the LED-1240 directive: abstain > weak draft.
|
|
76
|
+
DELIMIT_DOMAIN_SIGNALS = (
|
|
77
|
+
# API governance
|
|
78
|
+
r"\bopenapi\b",
|
|
79
|
+
r"\bopen[\s-]?api\b",
|
|
80
|
+
r"\bapi[\s-]?versioning\b",
|
|
81
|
+
r"\bbreaking[\s-]?change(?:s)?\b",
|
|
82
|
+
r"\bsemver\b",
|
|
83
|
+
r"\bsemantic[\s-]?versioning\b",
|
|
84
|
+
r"\bschema[\s-]?diff\b",
|
|
85
|
+
r"\bspec[\s-]?validation\b",
|
|
86
|
+
r"\bapi[\s-]?spec(?:s)?\b",
|
|
87
|
+
r"\bapi[\s-]?contract(?:s)?\b",
|
|
88
|
+
r"\bapi[\s-]?migration(?:s)?\b",
|
|
89
|
+
r"\brelease[\s-]?notes\b",
|
|
90
|
+
# Merge / governance
|
|
91
|
+
r"\bmerge[\s-]?gate\b",
|
|
92
|
+
r"\bci[\s-]?governance\b",
|
|
93
|
+
r"\bapi[\s-]?governance\b",
|
|
94
|
+
# MCP
|
|
95
|
+
r"\bmcp[\s-]?server(?:s)?\b",
|
|
96
|
+
r"\bmodel[\s-]?context[\s-]?protocol\b",
|
|
97
|
+
# Compliance / audit
|
|
98
|
+
r"\bsoc[\s-]?2\b",
|
|
99
|
+
r"\bcompliance\b",
|
|
100
|
+
r"\bauditor(?:s)?\b",
|
|
101
|
+
r"\battestation(?:s)?\b",
|
|
102
|
+
r"\baudit[\s-]?trail\b",
|
|
103
|
+
)
|
|
104
|
+
|
|
105
|
+
# Adjacent / orbit signals — these are common in the AI-coding-assistant
|
|
106
|
+
# ecosystem we live in. By themselves they're noise; combined with a
|
|
107
|
+
# technical-context word they're a real signal.
|
|
108
|
+
ORBIT_SIGNALS = (
|
|
109
|
+
r"\bclaude[\s-]?code\b",
|
|
110
|
+
r"\bcodex(?:[\s-]?cli)?\b",
|
|
111
|
+
r"\bcursor(?:[\s-]?ai)?\b",
|
|
112
|
+
r"\bgemini[\s-]?cli\b",
|
|
113
|
+
r"\bmcp[\s-]?tool(?:s)?\b",
|
|
114
|
+
r"\bmcp\b",
|
|
115
|
+
r"\bai[\s-]?coding[\s-]?assistant(?:s)?\b",
|
|
116
|
+
r"\bai[\s-]?agent(?:s)?\b",
|
|
117
|
+
r"\bai[\s-]?coder(?:s)?\b",
|
|
118
|
+
)
|
|
119
|
+
|
|
120
|
+
# Technical-context words — these qualify an orbit mention as real signal.
|
|
121
|
+
# The list is what an engineer would actually say when describing an
|
|
122
|
+
# integration / shipping / review / audit moment.
|
|
123
|
+
TECH_CONTEXT_SIGNALS = (
|
|
124
|
+
r"\bship(?:s|ped|ping)?\b",
|
|
125
|
+
r"\bmerg(?:e|ed|es|ing)\b",
|
|
126
|
+
r"\bdeploy(?:s|ed|ing|ment)?\b",
|
|
127
|
+
r"\brelease(?:s|d|ing)?\b",
|
|
128
|
+
r"\bpr(?:s)?\b",
|
|
129
|
+
r"\bpull[\s-]?request(?:s)?\b",
|
|
130
|
+
r"\breview(?:s|ed|ing)?\b",
|
|
131
|
+
r"\baudit(?:s|ed|ing)?\b",
|
|
132
|
+
r"\btest(?:s|ed|ing)?\b",
|
|
133
|
+
r"\bci(?:/cd)?\b",
|
|
134
|
+
r"\bgithub[\s-]?action(?:s)?\b",
|
|
135
|
+
r"\bspec(?:s)?\b",
|
|
136
|
+
r"\bschema\b",
|
|
137
|
+
r"\bbreaking\b",
|
|
138
|
+
r"\bregression(?:s)?\b",
|
|
139
|
+
)
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
# Compiled at module load — case-insensitive, matched against full text.
|
|
143
|
+
_DELIMIT_DOMAIN_RE = re.compile("|".join(DELIMIT_DOMAIN_SIGNALS), re.IGNORECASE)
|
|
144
|
+
_ORBIT_RE = re.compile("|".join(ORBIT_SIGNALS), re.IGNORECASE)
|
|
145
|
+
_TECH_CONTEXT_RE = re.compile("|".join(TECH_CONTEXT_SIGNALS), re.IGNORECASE)
|
|
146
|
+
|
|
147
|
+
|
|
148
|
+
# ── Topic extraction (for cooldown) ─────────────────────────────────
|
|
149
|
+
|
|
150
|
+
# Lightweight topic fingerprint: the union of matched Delimit-domain signals
|
|
151
|
+
# + matched orbit signals (lowercased, deduped). If two threads share a
|
|
152
|
+
# matched signal AND we drafted on one inside the cooldown window, the
|
|
153
|
+
# second one is rejected. This avoids 5-tweets-on-MCP-in-3-days drafts.
|
|
154
|
+
def _extract_topic_fingerprint(text: str) -> Set[str]:
|
|
155
|
+
"""Return the set of canonical signal terms present in ``text``.
|
|
156
|
+
|
|
157
|
+
Used as the key for the topic-coverage cooldown. Empty string and
|
|
158
|
+
``None`` return an empty set — those threads can never collide.
|
|
159
|
+
"""
|
|
160
|
+
if not text:
|
|
161
|
+
return set()
|
|
162
|
+
out: Set[str] = set()
|
|
163
|
+
for m in _DELIMIT_DOMAIN_RE.finditer(text):
|
|
164
|
+
out.add(m.group(0).lower())
|
|
165
|
+
for m in _ORBIT_RE.finditer(text):
|
|
166
|
+
out.add(m.group(0).lower())
|
|
167
|
+
return out
|
|
168
|
+
|
|
169
|
+
|
|
170
|
+
def _parse_iso(value: Optional[str]) -> Optional[datetime]:
|
|
171
|
+
if not value:
|
|
172
|
+
return None
|
|
173
|
+
try:
|
|
174
|
+
dt = datetime.fromisoformat(value.replace("Z", "+00:00"))
|
|
175
|
+
except (TypeError, ValueError):
|
|
176
|
+
return None
|
|
177
|
+
if dt.tzinfo is None:
|
|
178
|
+
dt = dt.replace(tzinfo=timezone.utc)
|
|
179
|
+
return dt
|
|
180
|
+
|
|
181
|
+
|
|
182
|
+
def _recent_topic_fingerprints(
|
|
183
|
+
cooldown_days: int = DEFAULT_COOLDOWN_DAYS,
|
|
184
|
+
log_path: Optional[Path] = None,
|
|
185
|
+
) -> Set[str]:
|
|
186
|
+
"""Return the union of topic fingerprints found in ``social_log.jsonl``
|
|
187
|
+
within the cooldown window. Tolerant of malformed lines.
|
|
188
|
+
"""
|
|
189
|
+
p = log_path or _social_log_path()
|
|
190
|
+
if not p.exists():
|
|
191
|
+
return set()
|
|
192
|
+
cutoff = datetime.now(timezone.utc) - timedelta(days=cooldown_days)
|
|
193
|
+
seen: Set[str] = set()
|
|
194
|
+
try:
|
|
195
|
+
with open(p, "r", encoding="utf-8") as fh:
|
|
196
|
+
for line in fh:
|
|
197
|
+
line = line.strip()
|
|
198
|
+
if not line:
|
|
199
|
+
continue
|
|
200
|
+
try:
|
|
201
|
+
entry = json.loads(line)
|
|
202
|
+
except (json.JSONDecodeError, ValueError):
|
|
203
|
+
continue
|
|
204
|
+
ts = _parse_iso(entry.get("ts"))
|
|
205
|
+
if ts is None or ts < cutoff:
|
|
206
|
+
continue
|
|
207
|
+
# Use the post body + thread title (when present) as the
|
|
208
|
+
# topic surface. Reddit entries log the thread title
|
|
209
|
+
# separately; X entries don't have one but the post body
|
|
210
|
+
# carries the @-prefixed reply context.
|
|
211
|
+
blob = " ".join(
|
|
212
|
+
[
|
|
213
|
+
entry.get("text") or "",
|
|
214
|
+
entry.get("thread_title") or "",
|
|
215
|
+
]
|
|
216
|
+
)
|
|
217
|
+
seen.update(_extract_topic_fingerprint(blob))
|
|
218
|
+
except OSError as exc:
|
|
219
|
+
logger.warning("fit_floor: failed to read %s: %s", p, exc)
|
|
220
|
+
return seen
|
|
221
|
+
|
|
222
|
+
|
|
223
|
+
# ── Fit-floor decision ──────────────────────────────────────────────
|
|
224
|
+
|
|
225
|
+
|
|
226
|
+
def evaluate_fit(
|
|
227
|
+
text: str,
|
|
228
|
+
engagement_score: float = 0.0,
|
|
229
|
+
high_engagement_floor: float = DEFAULT_HIGH_ENGAGEMENT_FLOOR,
|
|
230
|
+
recent_topics: Optional[Set[str]] = None,
|
|
231
|
+
) -> Dict[str, Any]:
|
|
232
|
+
"""Decide whether a thread/target passes the Delimit-fit floor.
|
|
233
|
+
|
|
234
|
+
Args:
|
|
235
|
+
text: full thread / target text. Empty string returns a hard
|
|
236
|
+
rejection (cannot evaluate).
|
|
237
|
+
engagement_score: pre-computed engagement score (e.g. from
|
|
238
|
+
``x_ranker.score_target``). Used for the opportunity-cost
|
|
239
|
+
carve-out: very high scores pass the floor even without a
|
|
240
|
+
keyword match, but with ``human_only=True``.
|
|
241
|
+
high_engagement_floor: score threshold for the carve-out.
|
|
242
|
+
recent_topics: set of topic fingerprints we've drafted on inside
|
|
243
|
+
the cooldown window. When the candidate's matched signals
|
|
244
|
+
intersect this set, the candidate is rejected with
|
|
245
|
+
``reason="topic_cooldown"``. ``None`` skips the cooldown
|
|
246
|
+
check (callers that don't want it just pass ``set()`` or
|
|
247
|
+
``None``).
|
|
248
|
+
|
|
249
|
+
Returns:
|
|
250
|
+
Dict with:
|
|
251
|
+
- ``passed`` (bool)
|
|
252
|
+
- ``reason`` (str): "delimit_domain" | "orbit_with_context"
|
|
253
|
+
| "high_engagement_human_only" | "no_delimit_fit"
|
|
254
|
+
| "topic_cooldown" | "empty_text"
|
|
255
|
+
- ``human_only`` (bool): True only for the high-engagement
|
|
256
|
+
carve-out — caller should NOT auto-draft.
|
|
257
|
+
- ``matched_signals`` (list[str]): which canonical signal terms
|
|
258
|
+
fired, for audit / replay.
|
|
259
|
+
- ``topic_fingerprint`` (list[str]): for downstream cooldown
|
|
260
|
+
tracking.
|
|
261
|
+
"""
|
|
262
|
+
if not text or not text.strip():
|
|
263
|
+
return {
|
|
264
|
+
"passed": False,
|
|
265
|
+
"reason": "empty_text",
|
|
266
|
+
"human_only": False,
|
|
267
|
+
"matched_signals": [],
|
|
268
|
+
"topic_fingerprint": [],
|
|
269
|
+
}
|
|
270
|
+
|
|
271
|
+
domain_matches = sorted({m.group(0).lower() for m in _DELIMIT_DOMAIN_RE.finditer(text)})
|
|
272
|
+
orbit_matches = sorted({m.group(0).lower() for m in _ORBIT_RE.finditer(text)})
|
|
273
|
+
has_tech_context = bool(_TECH_CONTEXT_RE.search(text))
|
|
274
|
+
|
|
275
|
+
matched_signals = sorted(set(domain_matches) | set(orbit_matches))
|
|
276
|
+
topic_fp = matched_signals[:]
|
|
277
|
+
|
|
278
|
+
# Cooldown check — runs FIRST among the pass paths so a thread that
|
|
279
|
+
# would have qualified by Delimit-domain signal is still rejected if
|
|
280
|
+
# the topic is already covered. (Pass-path checks below only run if
|
|
281
|
+
# the cooldown didn't kill the candidate.)
|
|
282
|
+
if recent_topics:
|
|
283
|
+
overlap = set(topic_fp) & recent_topics
|
|
284
|
+
if overlap:
|
|
285
|
+
return {
|
|
286
|
+
"passed": False,
|
|
287
|
+
"reason": "topic_cooldown",
|
|
288
|
+
"human_only": False,
|
|
289
|
+
"matched_signals": matched_signals,
|
|
290
|
+
"topic_fingerprint": topic_fp,
|
|
291
|
+
"cooldown_overlap": sorted(overlap),
|
|
292
|
+
}
|
|
293
|
+
|
|
294
|
+
# Pass path 1: direct Delimit-domain hit.
|
|
295
|
+
if domain_matches:
|
|
296
|
+
return {
|
|
297
|
+
"passed": True,
|
|
298
|
+
"reason": "delimit_domain",
|
|
299
|
+
"human_only": False,
|
|
300
|
+
"matched_signals": matched_signals,
|
|
301
|
+
"topic_fingerprint": topic_fp,
|
|
302
|
+
}
|
|
303
|
+
|
|
304
|
+
# Pass path 2: orbit hit + technical-context word.
|
|
305
|
+
if orbit_matches and has_tech_context:
|
|
306
|
+
return {
|
|
307
|
+
"passed": True,
|
|
308
|
+
"reason": "orbit_with_context",
|
|
309
|
+
"human_only": False,
|
|
310
|
+
"matched_signals": matched_signals,
|
|
311
|
+
"topic_fingerprint": topic_fp,
|
|
312
|
+
}
|
|
313
|
+
|
|
314
|
+
# Pass path 3: high-engagement opportunity-cost carve-out. Pass
|
|
315
|
+
# through but flag human_only so the caller doesn't auto-draft.
|
|
316
|
+
if engagement_score >= high_engagement_floor:
|
|
317
|
+
return {
|
|
318
|
+
"passed": True,
|
|
319
|
+
"reason": "high_engagement_human_only",
|
|
320
|
+
"human_only": True,
|
|
321
|
+
"matched_signals": matched_signals,
|
|
322
|
+
"topic_fingerprint": topic_fp,
|
|
323
|
+
}
|
|
324
|
+
|
|
325
|
+
# Default: reject.
|
|
326
|
+
return {
|
|
327
|
+
"passed": False,
|
|
328
|
+
"reason": "no_delimit_fit",
|
|
329
|
+
"human_only": False,
|
|
330
|
+
"matched_signals": matched_signals,
|
|
331
|
+
"topic_fingerprint": topic_fp,
|
|
332
|
+
}
|
|
333
|
+
|
|
334
|
+
|
|
335
|
+
# ── Audit logging helpers ──────────────────────────────────────────
|
|
336
|
+
|
|
337
|
+
def append_jsonl(path: Path, payload: Dict[str, Any]) -> None:
|
|
338
|
+
"""Append a single JSON line to ``path``. Tolerant of write errors —
|
|
339
|
+
logging must never crash the caller's pipeline.
|
|
340
|
+
"""
|
|
341
|
+
try:
|
|
342
|
+
path.parent.mkdir(parents=True, exist_ok=True)
|
|
343
|
+
with open(path, "a", encoding="utf-8") as fh:
|
|
344
|
+
fh.write(json.dumps(payload, ensure_ascii=False) + "\n")
|
|
345
|
+
except OSError as exc: # pragma: no cover — best-effort logging
|
|
346
|
+
logger.warning("fit_floor: failed to write %s: %s", path, exc)
|
|
347
|
+
|
|
348
|
+
|
|
349
|
+
__all__ = [
|
|
350
|
+
"DEFAULT_COOLDOWN_DAYS",
|
|
351
|
+
"DEFAULT_HIGH_ENGAGEMENT_FLOOR",
|
|
352
|
+
"SOCIAL_LOG",
|
|
353
|
+
"DELIMIT_DOMAIN_SIGNALS",
|
|
354
|
+
"ORBIT_SIGNALS",
|
|
355
|
+
"TECH_CONTEXT_SIGNALS",
|
|
356
|
+
"evaluate_fit",
|
|
357
|
+
"append_jsonl",
|
|
358
|
+
"_extract_topic_fingerprint",
|
|
359
|
+
"_recent_topic_fingerprints",
|
|
360
|
+
]
|
|
@@ -0,0 +1,307 @@
|
|
|
1
|
+
"""Persistent scan queue for the social distribution pipeline (LED-216 Phase 2).
|
|
2
|
+
|
|
3
|
+
The queue decouples target *scanning* from draft *generation*. Scanners append
|
|
4
|
+
discovered targets here; the draft phase claims pending entries, generates a
|
|
5
|
+
post, and marks them drafted (or failed). This allows the Reddit scanner to
|
|
6
|
+
take 120s without blocking X/HN/devto/etc., and lets the draft stage run
|
|
7
|
+
under its own timeout against a stable backlog.
|
|
8
|
+
|
|
9
|
+
Storage: append-only JSONL at ``~/.delimit/social_scan_queue.jsonl``. Each
|
|
10
|
+
line is a single queue entry with the schema:
|
|
11
|
+
|
|
12
|
+
{
|
|
13
|
+
"queued_at": "2026-05-02T15:30:00Z",
|
|
14
|
+
"platform": "reddit",
|
|
15
|
+
"fingerprint": "reddit:vibecoding:abc123",
|
|
16
|
+
"source_id": "...",
|
|
17
|
+
"raw": { /* full target dict from the scanner */ },
|
|
18
|
+
"status": "pending", # pending | drafted | drafted_failed | expired
|
|
19
|
+
"drafted_at": null,
|
|
20
|
+
"draft_id": null,
|
|
21
|
+
"error": null
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
All writes are atomic (tmp + rename for full rewrites; append-only for the
|
|
25
|
+
hot path). Malformed lines are skipped with a warning rather than crashing
|
|
26
|
+
the whole queue. Dedupe-on-enqueue avoids re-queuing a fingerprint that is
|
|
27
|
+
already pending or was drafted within the last 7 days.
|
|
28
|
+
"""
|
|
29
|
+
from __future__ import annotations
|
|
30
|
+
|
|
31
|
+
import json
|
|
32
|
+
import logging
|
|
33
|
+
import os
|
|
34
|
+
import tempfile
|
|
35
|
+
from datetime import datetime, timedelta, timezone
|
|
36
|
+
from pathlib import Path
|
|
37
|
+
from typing import Any, Dict, Iterator, List, Optional
|
|
38
|
+
|
|
39
|
+
logger = logging.getLogger(__name__)
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
# ---------------------------------------------------------------------------
|
|
43
|
+
# Configuration
|
|
44
|
+
# ---------------------------------------------------------------------------
|
|
45
|
+
|
|
46
|
+
QUEUE_FILE = Path.home() / ".delimit" / "social_scan_queue.jsonl"
|
|
47
|
+
|
|
48
|
+
DEFAULT_DEDUPE_HOURS = 24 * 7 # don't re-queue a fingerprint within 7 days
|
|
49
|
+
DEFAULT_EXPIRE_HOURS = 24 * 7 # entries older than 7 days roll to expired
|
|
50
|
+
|
|
51
|
+
PENDING = "pending"
|
|
52
|
+
DRAFTED = "drafted"
|
|
53
|
+
DRAFTED_FAILED = "drafted_failed"
|
|
54
|
+
EXPIRED = "expired"
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
# ---------------------------------------------------------------------------
|
|
58
|
+
# Internal helpers
|
|
59
|
+
# ---------------------------------------------------------------------------
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def _queue_path() -> Path:
|
|
63
|
+
"""Resolve the queue file path. Indirection lets tests monkeypatch it."""
|
|
64
|
+
return QUEUE_FILE
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def _now_iso() -> str:
|
|
68
|
+
return datetime.now(timezone.utc).isoformat()
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
def _parse_iso(value: Optional[str]) -> Optional[datetime]:
|
|
72
|
+
if not value:
|
|
73
|
+
return None
|
|
74
|
+
try:
|
|
75
|
+
# ``fromisoformat`` accepts both naive and tz-aware strings.
|
|
76
|
+
dt = datetime.fromisoformat(value.replace("Z", "+00:00"))
|
|
77
|
+
except (TypeError, ValueError):
|
|
78
|
+
return None
|
|
79
|
+
if dt.tzinfo is None:
|
|
80
|
+
dt = dt.replace(tzinfo=timezone.utc)
|
|
81
|
+
return dt
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
def _ensure_parent(path: Path) -> None:
|
|
85
|
+
path.parent.mkdir(parents=True, exist_ok=True)
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
def _iter_entries(path: Optional[Path] = None) -> Iterator[Dict[str, Any]]:
|
|
89
|
+
"""Yield each parsable JSON line from the queue. Skip and log malformed."""
|
|
90
|
+
p = path or _queue_path()
|
|
91
|
+
if not p.exists():
|
|
92
|
+
return
|
|
93
|
+
try:
|
|
94
|
+
with open(p, "r", encoding="utf-8") as fh:
|
|
95
|
+
for ln_no, line in enumerate(fh, start=1):
|
|
96
|
+
line = line.strip()
|
|
97
|
+
if not line:
|
|
98
|
+
continue
|
|
99
|
+
try:
|
|
100
|
+
yield json.loads(line)
|
|
101
|
+
except (json.JSONDecodeError, ValueError) as exc:
|
|
102
|
+
logger.warning(
|
|
103
|
+
"social_queue: skipping malformed line %d in %s: %s",
|
|
104
|
+
ln_no, p, exc,
|
|
105
|
+
)
|
|
106
|
+
continue
|
|
107
|
+
except OSError as exc:
|
|
108
|
+
logger.warning("social_queue: failed to read %s: %s", p, exc)
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
def _atomic_rewrite(entries: List[Dict[str, Any]], path: Optional[Path] = None) -> None:
|
|
112
|
+
"""Rewrite the queue file atomically via tmp + os.replace."""
|
|
113
|
+
p = path or _queue_path()
|
|
114
|
+
_ensure_parent(p)
|
|
115
|
+
fd, tmp_path = tempfile.mkstemp(prefix=".social_scan_queue.", dir=str(p.parent))
|
|
116
|
+
try:
|
|
117
|
+
with os.fdopen(fd, "w", encoding="utf-8") as fh:
|
|
118
|
+
for entry in entries:
|
|
119
|
+
fh.write(json.dumps(entry, sort_keys=True) + "\n")
|
|
120
|
+
os.replace(tmp_path, p)
|
|
121
|
+
except Exception:
|
|
122
|
+
try:
|
|
123
|
+
os.unlink(tmp_path)
|
|
124
|
+
except OSError:
|
|
125
|
+
pass
|
|
126
|
+
raise
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
def _append_atomic(entry: Dict[str, Any], path: Optional[Path] = None) -> None:
|
|
130
|
+
"""Append a single line. fsync-friendly: opens in 'a', writes, closes."""
|
|
131
|
+
p = path or _queue_path()
|
|
132
|
+
_ensure_parent(p)
|
|
133
|
+
line = json.dumps(entry, sort_keys=True) + "\n"
|
|
134
|
+
with open(p, "a", encoding="utf-8") as fh:
|
|
135
|
+
fh.write(line)
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
# ---------------------------------------------------------------------------
|
|
139
|
+
# Public API
|
|
140
|
+
# ---------------------------------------------------------------------------
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
def enqueue(target: Dict[str, Any], dedupe_hours: int = DEFAULT_DEDUPE_HOURS) -> Optional[str]:
|
|
144
|
+
"""Append a scanner target to the queue.
|
|
145
|
+
|
|
146
|
+
Returns the fingerprint string on success, or ``None`` when the target
|
|
147
|
+
was deduped (already pending, or drafted within the dedupe window).
|
|
148
|
+
Targets without a ``fingerprint`` field are rejected.
|
|
149
|
+
"""
|
|
150
|
+
fingerprint = (target or {}).get("fingerprint")
|
|
151
|
+
if not fingerprint:
|
|
152
|
+
logger.warning("social_queue.enqueue: target missing fingerprint, skipping")
|
|
153
|
+
return None
|
|
154
|
+
|
|
155
|
+
now = datetime.now(timezone.utc)
|
|
156
|
+
cutoff = now - timedelta(hours=dedupe_hours)
|
|
157
|
+
|
|
158
|
+
# Dedupe: scan existing entries for a recent matching fingerprint.
|
|
159
|
+
for existing in _iter_entries():
|
|
160
|
+
if existing.get("fingerprint") != fingerprint:
|
|
161
|
+
continue
|
|
162
|
+
status = existing.get("status")
|
|
163
|
+
if status == PENDING:
|
|
164
|
+
return None
|
|
165
|
+
if status == DRAFTED:
|
|
166
|
+
ts = _parse_iso(existing.get("drafted_at") or existing.get("queued_at"))
|
|
167
|
+
if ts and ts >= cutoff:
|
|
168
|
+
return None
|
|
169
|
+
# drafted_failed or expired or older drafted → allow re-enqueue
|
|
170
|
+
|
|
171
|
+
entry = {
|
|
172
|
+
"queued_at": _now_iso(),
|
|
173
|
+
"platform": target.get("platform", ""),
|
|
174
|
+
"fingerprint": fingerprint,
|
|
175
|
+
"source_id": target.get("source_id", ""),
|
|
176
|
+
"raw": target,
|
|
177
|
+
"status": PENDING,
|
|
178
|
+
"drafted_at": None,
|
|
179
|
+
"draft_id": None,
|
|
180
|
+
"error": None,
|
|
181
|
+
}
|
|
182
|
+
_append_atomic(entry)
|
|
183
|
+
return fingerprint
|
|
184
|
+
|
|
185
|
+
|
|
186
|
+
def claim_pending(platform: Optional[str] = None, limit: int = 20) -> List[Dict[str, Any]]:
|
|
187
|
+
"""Return up to ``limit`` pending entries, optionally filtered by platform.
|
|
188
|
+
|
|
189
|
+
Read-only — does NOT mutate state. The caller must call ``mark_drafted``
|
|
190
|
+
or ``mark_failed`` once it processes the entry. Returns oldest-first
|
|
191
|
+
(FIFO) so the queue drains in scan order.
|
|
192
|
+
"""
|
|
193
|
+
out: List[Dict[str, Any]] = []
|
|
194
|
+
# Build a list because we want oldest-first; JSONL append order = FIFO.
|
|
195
|
+
for entry in _iter_entries():
|
|
196
|
+
if entry.get("status") != PENDING:
|
|
197
|
+
continue
|
|
198
|
+
if platform and entry.get("platform") != platform:
|
|
199
|
+
continue
|
|
200
|
+
out.append(entry)
|
|
201
|
+
if len(out) >= limit:
|
|
202
|
+
break
|
|
203
|
+
return out
|
|
204
|
+
|
|
205
|
+
|
|
206
|
+
def _update_entry(fingerprint: str, mutator) -> bool:
|
|
207
|
+
"""Apply ``mutator(entry) -> entry`` to the most recent entry matching
|
|
208
|
+
fingerprint with status==pending. Returns True on hit, False otherwise.
|
|
209
|
+
Rewrites the queue atomically.
|
|
210
|
+
"""
|
|
211
|
+
entries = list(_iter_entries())
|
|
212
|
+
target_idx = -1
|
|
213
|
+
for idx in range(len(entries) - 1, -1, -1):
|
|
214
|
+
e = entries[idx]
|
|
215
|
+
if e.get("fingerprint") == fingerprint and e.get("status") == PENDING:
|
|
216
|
+
target_idx = idx
|
|
217
|
+
break
|
|
218
|
+
if target_idx < 0:
|
|
219
|
+
return False
|
|
220
|
+
entries[target_idx] = mutator(entries[target_idx])
|
|
221
|
+
_atomic_rewrite(entries)
|
|
222
|
+
return True
|
|
223
|
+
|
|
224
|
+
|
|
225
|
+
def mark_drafted(fingerprint: str, draft_id: str) -> bool:
|
|
226
|
+
"""Mark a pending entry as drafted. Returns True on hit."""
|
|
227
|
+
def _mutate(entry: Dict[str, Any]) -> Dict[str, Any]:
|
|
228
|
+
entry["status"] = DRAFTED
|
|
229
|
+
entry["drafted_at"] = _now_iso()
|
|
230
|
+
entry["draft_id"] = draft_id or ""
|
|
231
|
+
entry["error"] = None
|
|
232
|
+
return entry
|
|
233
|
+
return _update_entry(fingerprint, _mutate)
|
|
234
|
+
|
|
235
|
+
|
|
236
|
+
def mark_failed(fingerprint: str, error: str) -> bool:
|
|
237
|
+
"""Mark a pending entry as drafted_failed with the supplied error string."""
|
|
238
|
+
def _mutate(entry: Dict[str, Any]) -> Dict[str, Any]:
|
|
239
|
+
entry["status"] = DRAFTED_FAILED
|
|
240
|
+
entry["drafted_at"] = _now_iso()
|
|
241
|
+
entry["error"] = (error or "")[:500]
|
|
242
|
+
return entry
|
|
243
|
+
return _update_entry(fingerprint, _mutate)
|
|
244
|
+
|
|
245
|
+
|
|
246
|
+
def expire_older_than(hours: int = DEFAULT_EXPIRE_HOURS) -> int:
|
|
247
|
+
"""Roll any pending entry older than ``hours`` into the expired status.
|
|
248
|
+
|
|
249
|
+
Returns the count expired. Entries past the window are still kept in the
|
|
250
|
+
file (for audit), but their status flips so they no longer surface to
|
|
251
|
+
``claim_pending``.
|
|
252
|
+
"""
|
|
253
|
+
cutoff = datetime.now(timezone.utc) - timedelta(hours=hours)
|
|
254
|
+
entries = list(_iter_entries())
|
|
255
|
+
if not entries:
|
|
256
|
+
return 0
|
|
257
|
+
flipped = 0
|
|
258
|
+
for entry in entries:
|
|
259
|
+
if entry.get("status") != PENDING:
|
|
260
|
+
continue
|
|
261
|
+
ts = _parse_iso(entry.get("queued_at"))
|
|
262
|
+
if ts is None or ts >= cutoff:
|
|
263
|
+
continue
|
|
264
|
+
entry["status"] = EXPIRED
|
|
265
|
+
entry["error"] = "expired_after_window"
|
|
266
|
+
flipped += 1
|
|
267
|
+
if flipped:
|
|
268
|
+
_atomic_rewrite(entries)
|
|
269
|
+
return flipped
|
|
270
|
+
|
|
271
|
+
|
|
272
|
+
def queue_stats() -> Dict[str, Any]:
|
|
273
|
+
"""Return a snapshot count: ``{pending, drafted, drafted_failed, expired,
|
|
274
|
+
by_platform: {platform: {pending, drafted, ...}}}``.
|
|
275
|
+
"""
|
|
276
|
+
totals = {PENDING: 0, DRAFTED: 0, DRAFTED_FAILED: 0, EXPIRED: 0}
|
|
277
|
+
by_platform: Dict[str, Dict[str, int]] = {}
|
|
278
|
+
for entry in _iter_entries():
|
|
279
|
+
status = entry.get("status") or "unknown"
|
|
280
|
+
if status in totals:
|
|
281
|
+
totals[status] += 1
|
|
282
|
+
plat = entry.get("platform") or "unknown"
|
|
283
|
+
plat_bucket = by_platform.setdefault(plat, {PENDING: 0, DRAFTED: 0, DRAFTED_FAILED: 0, EXPIRED: 0})
|
|
284
|
+
if status in plat_bucket:
|
|
285
|
+
plat_bucket[status] += 1
|
|
286
|
+
return {
|
|
287
|
+
"pending": totals[PENDING],
|
|
288
|
+
"drafted": totals[DRAFTED],
|
|
289
|
+
"drafted_failed": totals[DRAFTED_FAILED],
|
|
290
|
+
"expired": totals[EXPIRED],
|
|
291
|
+
"by_platform": by_platform,
|
|
292
|
+
}
|
|
293
|
+
|
|
294
|
+
|
|
295
|
+
__all__ = [
|
|
296
|
+
"QUEUE_FILE",
|
|
297
|
+
"PENDING",
|
|
298
|
+
"DRAFTED",
|
|
299
|
+
"DRAFTED_FAILED",
|
|
300
|
+
"EXPIRED",
|
|
301
|
+
"enqueue",
|
|
302
|
+
"claim_pending",
|
|
303
|
+
"mark_drafted",
|
|
304
|
+
"mark_failed",
|
|
305
|
+
"expire_older_than",
|
|
306
|
+
"queue_stats",
|
|
307
|
+
]
|
|
@@ -488,8 +488,20 @@ def sync_social_draft(draft: dict):
|
|
|
488
488
|
client.table("social_drafts").upsert(row).execute()
|
|
489
489
|
|
|
490
490
|
# ntfy on NEW pending drafts only, dedupe via the WO sent-marker file
|
|
491
|
-
# (reused — scoped by draft_id vs wo_id so no collision)
|
|
492
|
-
|
|
491
|
+
# (reused — scoped by draft_id vs wo_id so no collision).
|
|
492
|
+
# LED-216 Phase 1: suppress ntfy for capability-drift drafts and the
|
|
493
|
+
# existing rule-blocked / placeholder buckets — the row still lands
|
|
494
|
+
# in Supabase for audit, but the founder is NOT paged on a draft
|
|
495
|
+
# we already know to be unfit for posting.
|
|
496
|
+
_quality = (row.get("quality") or "").lower()
|
|
497
|
+
_suppressed_qualities = {
|
|
498
|
+
"rejected_capability_drift",
|
|
499
|
+
"placeholder",
|
|
500
|
+
}
|
|
501
|
+
if (
|
|
502
|
+
row["status"] == "pending"
|
|
503
|
+
and _quality not in _suppressed_qualities
|
|
504
|
+
):
|
|
493
505
|
try:
|
|
494
506
|
_push_draft_notification(row)
|
|
495
507
|
except Exception as exc:
|