delimit-cli 4.5.1 → 4.5.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. package/CHANGELOG.md +87 -0
  2. package/README.md +15 -5
  3. package/bin/delimit-cli.js +109 -24
  4. package/gateway/ai/content_engine.py +3 -4
  5. package/gateway/ai/inbox_classifier.py +215 -0
  6. package/gateway/ai/integrations/opensage_wrapper.py +4 -1
  7. package/gateway/ai/ledger_manager.py +218 -38
  8. package/gateway/ai/license.py +26 -0
  9. package/gateway/ai/notify.py +68 -3
  10. package/gateway/ai/reddit_proxy.py +93 -15
  11. package/gateway/ai/reddit_scanner.py +36 -18
  12. package/gateway/ai/remote_resolve.py +422 -0
  13. package/gateway/ai/server.py +301 -117
  14. package/gateway/ai/social_capability/__init__.py +6 -0
  15. package/gateway/ai/social_capability/capability_validator.py +367 -0
  16. package/gateway/ai/social_capability/current_capabilities.yaml +95 -0
  17. package/gateway/ai/social_capability/fit_floor.py +360 -0
  18. package/gateway/ai/social_queue.py +307 -0
  19. package/gateway/ai/supabase_sync.py +14 -2
  20. package/gateway/ai/swarm.py +29 -11
  21. package/gateway/ai/tui.py +6 -2
  22. package/gateway/ai/vendor_news/__init__.py +14 -0
  23. package/gateway/ai/vendor_news/drafter.py +562 -0
  24. package/gateway/ai/vendor_news/sensor.py +509 -0
  25. package/gateway/ai/vendor_news/watchlist.yaml +71 -0
  26. package/gateway/ai/x_ranker.py +417 -0
  27. package/lib/attest-mcp.js +487 -0
  28. package/lib/attest-telemetry.js +48 -0
  29. package/lib/delimit-home.js +35 -0
  30. package/lib/delimit-template.js +14 -0
  31. package/package.json +25 -3
  32. package/scripts/postinstall.js +89 -40
  33. package/adapters/codex-security.js +0 -64
  34. package/adapters/codex-skill.js +0 -78
  35. package/gateway/ai/content_grounding/__init__.py +0 -98
  36. package/gateway/ai/content_grounding/build.py +0 -350
  37. package/gateway/ai/content_grounding/consume.py +0 -280
  38. package/gateway/ai/content_grounding/features.py +0 -218
  39. package/gateway/ai/content_grounding/fixtures/fail/01_missing_evidence.json +0 -9
  40. package/gateway/ai/content_grounding/fixtures/fail/02_unknown_evidence_prefix.json +0 -9
  41. package/gateway/ai/content_grounding/fixtures/fail/03_banned_comparative.json +0 -17
  42. package/gateway/ai/content_grounding/fixtures/fail/04_banned_adoption.json +0 -17
  43. package/gateway/ai/content_grounding/fixtures/fail/05_aggregate_no_numeric.json +0 -17
  44. package/gateway/ai/content_grounding/fixtures/fail/06_unversioned_inference_rule.json +0 -18
  45. package/gateway/ai/content_grounding/fixtures/pass/01_feature_shipped.json +0 -18
  46. package/gateway/ai/content_grounding/fixtures/pass/02_aggregate_claim.json +0 -23
  47. package/gateway/ai/content_grounding/fixtures/pass/03_attestation.json +0 -16
  48. package/gateway/ai/content_grounding/schemas/claim.schema.json +0 -40
  49. package/gateway/ai/content_grounding/schemas/event.schema.json +0 -23
  50. package/gateway/ai/content_grounding/schemas.py +0 -276
  51. package/gateway/ai/content_grounding/telemetry.py +0 -221
  52. package/gateway/ai/inbox_drafts/__init__.py +0 -61
  53. package/gateway/ai/inbox_drafts/registry.py +0 -412
  54. package/gateway/ai/inbox_drafts/schema.py +0 -374
  55. package/gateway/ai/inbox_executor.py +0 -565
@@ -0,0 +1,360 @@
1
+ """Delimit-fit floor (LED-1240 part B — selectivity bar, 2026-05-05).
2
+
3
+ The validator (``capability_validator``) hard-fails generic-claim drafts
4
+ AFTER the LLM has generated them. That catches drift but burns tokens and
5
+ surfaces weak drafts to founder review. This module pushes the bar
6
+ upstream: a thread/target must pass the Delimit-fit floor BEFORE we even
7
+ draft for it. If the floor rejects, we abstain. Abstain > weak draft.
8
+
9
+ The fit floor accepts a thread when ANY of these holds:
10
+
11
+ 1. The text matches a **Delimit-domain signal** (openapi, breaking change,
12
+ semver, schema diff, merge gate, mcp server, ci governance, soc 2, …).
13
+ 2. The text matches an **AI-coding-orbit signal** (claude code, codex cli,
14
+ cursor, gemini cli, mcp tool, …) AND a **technical-context word**
15
+ (ship, merge, deploy, release, PR, review, audit, test, …). Orbit-only
16
+ mentions without technical context (e.g. "I love Claude Code!") are
17
+ not enough — that's the signal that produces noise drafts.
18
+ 3. The engagement score is high enough that abstaining has clear
19
+ opportunity cost (default ≥ 50). High-engagement-but-off-topic threads
20
+ pass through with ``human_only=True`` so the orchestrator can choose
21
+ to surface them for human review without auto-drafting.
22
+
23
+ A failing thread returns a dict with ``reason="no_delimit_fit"`` and is
24
+ NOT drafted. Logging is the caller's job — this module is pure logic.
25
+
26
+ Topic-coverage cooldown (``recent_topic_drafted``) is a separate function
27
+ that consults ``social_log.jsonl`` (the same source ``x_ranker`` uses for
28
+ author dedupe). If we drafted on the same topic within the cooldown
29
+ window (default 7 days), the next thread on that topic is rejected with
30
+ ``reason="topic_cooldown"``.
31
+
32
+ Centralizing this logic here means ``x_ranker`` (X candidate ordering) and
33
+ ``social_target`` (cross-platform target processing) both consume the same
34
+ gate — no drift between surfaces.
35
+ """
36
+
37
+ from __future__ import annotations
38
+
39
+ import json
40
+ import logging
41
+ import re
42
+ from datetime import datetime, timedelta, timezone
43
+ from pathlib import Path
44
+ from typing import Any, Dict, Iterable, Optional, Set, Tuple
45
+
46
+ logger = logging.getLogger(__name__)
47
+
48
+ # Default cooldown window for topic coverage. Founder directive (2026-05-05):
49
+ # "if we've already drafted on a similar topic in the last 7 days, abstain on
50
+ # the next one to avoid spam-pattern detection."
51
+ DEFAULT_COOLDOWN_DAYS = 7
52
+
53
+ # Default high-engagement opportunity-cost threshold. Threads above this score
54
+ # pass the fit floor even without keyword match, but with ``human_only=True``
55
+ # so they don't auto-draft.
56
+ DEFAULT_HIGH_ENGAGEMENT_FLOOR = 50.0
57
+
58
+ # Same SOCIAL_LOG path as ai.x_ranker — single source of truth for "what we
59
+ # recently posted to". Indirection via a getter so tests can monkeypatch.
60
+ SOCIAL_LOG = Path.home() / ".delimit" / "social_log.jsonl"
61
+
62
+
63
+ def _social_log_path() -> Path:
64
+ """Indirection for tests."""
65
+ return SOCIAL_LOG
66
+
67
+
68
+ # ── Signal vocabularies ──────────────────────────────────────────────
69
+
70
+ # Direct Delimit-domain signals. These are the canonical surfaces the
71
+ # product addresses. Any one of these is sufficient by itself.
72
+ #
73
+ # Keep this list narrow and concrete — it's the difference between
74
+ # "drafts that have something to say" and "drafts that name-drop a
75
+ # topic". Per the LED-1240 directive: abstain > weak draft.
76
+ DELIMIT_DOMAIN_SIGNALS = (
77
+ # API governance
78
+ r"\bopenapi\b",
79
+ r"\bopen[\s-]?api\b",
80
+ r"\bapi[\s-]?versioning\b",
81
+ r"\bbreaking[\s-]?change(?:s)?\b",
82
+ r"\bsemver\b",
83
+ r"\bsemantic[\s-]?versioning\b",
84
+ r"\bschema[\s-]?diff\b",
85
+ r"\bspec[\s-]?validation\b",
86
+ r"\bapi[\s-]?spec(?:s)?\b",
87
+ r"\bapi[\s-]?contract(?:s)?\b",
88
+ r"\bapi[\s-]?migration(?:s)?\b",
89
+ r"\brelease[\s-]?notes\b",
90
+ # Merge / governance
91
+ r"\bmerge[\s-]?gate\b",
92
+ r"\bci[\s-]?governance\b",
93
+ r"\bapi[\s-]?governance\b",
94
+ # MCP
95
+ r"\bmcp[\s-]?server(?:s)?\b",
96
+ r"\bmodel[\s-]?context[\s-]?protocol\b",
97
+ # Compliance / audit
98
+ r"\bsoc[\s-]?2\b",
99
+ r"\bcompliance\b",
100
+ r"\bauditor(?:s)?\b",
101
+ r"\battestation(?:s)?\b",
102
+ r"\baudit[\s-]?trail\b",
103
+ )
104
+
105
+ # Adjacent / orbit signals — these are common in the AI-coding-assistant
106
+ # ecosystem we live in. By themselves they're noise; combined with a
107
+ # technical-context word they're a real signal.
108
+ ORBIT_SIGNALS = (
109
+ r"\bclaude[\s-]?code\b",
110
+ r"\bcodex(?:[\s-]?cli)?\b",
111
+ r"\bcursor(?:[\s-]?ai)?\b",
112
+ r"\bgemini[\s-]?cli\b",
113
+ r"\bmcp[\s-]?tool(?:s)?\b",
114
+ r"\bmcp\b",
115
+ r"\bai[\s-]?coding[\s-]?assistant(?:s)?\b",
116
+ r"\bai[\s-]?agent(?:s)?\b",
117
+ r"\bai[\s-]?coder(?:s)?\b",
118
+ )
119
+
120
+ # Technical-context words — these qualify an orbit mention as real signal.
121
+ # The list is what an engineer would actually say when describing an
122
+ # integration / shipping / review / audit moment.
123
+ TECH_CONTEXT_SIGNALS = (
124
+ r"\bship(?:s|ped|ping)?\b",
125
+ r"\bmerg(?:e|ed|es|ing)\b",
126
+ r"\bdeploy(?:s|ed|ing|ment)?\b",
127
+ r"\brelease(?:s|d|ing)?\b",
128
+ r"\bpr(?:s)?\b",
129
+ r"\bpull[\s-]?request(?:s)?\b",
130
+ r"\breview(?:s|ed|ing)?\b",
131
+ r"\baudit(?:s|ed|ing)?\b",
132
+ r"\btest(?:s|ed|ing)?\b",
133
+ r"\bci(?:/cd)?\b",
134
+ r"\bgithub[\s-]?action(?:s)?\b",
135
+ r"\bspec(?:s)?\b",
136
+ r"\bschema\b",
137
+ r"\bbreaking\b",
138
+ r"\bregression(?:s)?\b",
139
+ )
140
+
141
+
142
+ # Compiled at module load — case-insensitive, matched against full text.
143
+ _DELIMIT_DOMAIN_RE = re.compile("|".join(DELIMIT_DOMAIN_SIGNALS), re.IGNORECASE)
144
+ _ORBIT_RE = re.compile("|".join(ORBIT_SIGNALS), re.IGNORECASE)
145
+ _TECH_CONTEXT_RE = re.compile("|".join(TECH_CONTEXT_SIGNALS), re.IGNORECASE)
146
+
147
+
148
+ # ── Topic extraction (for cooldown) ─────────────────────────────────
149
+
150
+ # Lightweight topic fingerprint: the union of matched Delimit-domain signals
151
+ # + matched orbit signals (lowercased, deduped). If two threads share a
152
+ # matched signal AND we drafted on one inside the cooldown window, the
153
+ # second one is rejected. This avoids 5-tweets-on-MCP-in-3-days drafts.
154
+ def _extract_topic_fingerprint(text: str) -> Set[str]:
155
+ """Return the set of canonical signal terms present in ``text``.
156
+
157
+ Used as the key for the topic-coverage cooldown. Empty string and
158
+ ``None`` return an empty set — those threads can never collide.
159
+ """
160
+ if not text:
161
+ return set()
162
+ out: Set[str] = set()
163
+ for m in _DELIMIT_DOMAIN_RE.finditer(text):
164
+ out.add(m.group(0).lower())
165
+ for m in _ORBIT_RE.finditer(text):
166
+ out.add(m.group(0).lower())
167
+ return out
168
+
169
+
170
+ def _parse_iso(value: Optional[str]) -> Optional[datetime]:
171
+ if not value:
172
+ return None
173
+ try:
174
+ dt = datetime.fromisoformat(value.replace("Z", "+00:00"))
175
+ except (TypeError, ValueError):
176
+ return None
177
+ if dt.tzinfo is None:
178
+ dt = dt.replace(tzinfo=timezone.utc)
179
+ return dt
180
+
181
+
182
+ def _recent_topic_fingerprints(
183
+ cooldown_days: int = DEFAULT_COOLDOWN_DAYS,
184
+ log_path: Optional[Path] = None,
185
+ ) -> Set[str]:
186
+ """Return the union of topic fingerprints found in ``social_log.jsonl``
187
+ within the cooldown window. Tolerant of malformed lines.
188
+ """
189
+ p = log_path or _social_log_path()
190
+ if not p.exists():
191
+ return set()
192
+ cutoff = datetime.now(timezone.utc) - timedelta(days=cooldown_days)
193
+ seen: Set[str] = set()
194
+ try:
195
+ with open(p, "r", encoding="utf-8") as fh:
196
+ for line in fh:
197
+ line = line.strip()
198
+ if not line:
199
+ continue
200
+ try:
201
+ entry = json.loads(line)
202
+ except (json.JSONDecodeError, ValueError):
203
+ continue
204
+ ts = _parse_iso(entry.get("ts"))
205
+ if ts is None or ts < cutoff:
206
+ continue
207
+ # Use the post body + thread title (when present) as the
208
+ # topic surface. Reddit entries log the thread title
209
+ # separately; X entries don't have one but the post body
210
+ # carries the @-prefixed reply context.
211
+ blob = " ".join(
212
+ [
213
+ entry.get("text") or "",
214
+ entry.get("thread_title") or "",
215
+ ]
216
+ )
217
+ seen.update(_extract_topic_fingerprint(blob))
218
+ except OSError as exc:
219
+ logger.warning("fit_floor: failed to read %s: %s", p, exc)
220
+ return seen
221
+
222
+
223
+ # ── Fit-floor decision ──────────────────────────────────────────────
224
+
225
+
226
+ def evaluate_fit(
227
+ text: str,
228
+ engagement_score: float = 0.0,
229
+ high_engagement_floor: float = DEFAULT_HIGH_ENGAGEMENT_FLOOR,
230
+ recent_topics: Optional[Set[str]] = None,
231
+ ) -> Dict[str, Any]:
232
+ """Decide whether a thread/target passes the Delimit-fit floor.
233
+
234
+ Args:
235
+ text: full thread / target text. Empty string returns a hard
236
+ rejection (cannot evaluate).
237
+ engagement_score: pre-computed engagement score (e.g. from
238
+ ``x_ranker.score_target``). Used for the opportunity-cost
239
+ carve-out: very high scores pass the floor even without a
240
+ keyword match, but with ``human_only=True``.
241
+ high_engagement_floor: score threshold for the carve-out.
242
+ recent_topics: set of topic fingerprints we've drafted on inside
243
+ the cooldown window. When the candidate's matched signals
244
+ intersect this set, the candidate is rejected with
245
+ ``reason="topic_cooldown"``. ``None`` skips the cooldown
246
+ check (callers that don't want it just pass ``set()`` or
247
+ ``None``).
248
+
249
+ Returns:
250
+ Dict with:
251
+ - ``passed`` (bool)
252
+ - ``reason`` (str): "delimit_domain" | "orbit_with_context"
253
+ | "high_engagement_human_only" | "no_delimit_fit"
254
+ | "topic_cooldown" | "empty_text"
255
+ - ``human_only`` (bool): True only for the high-engagement
256
+ carve-out — caller should NOT auto-draft.
257
+ - ``matched_signals`` (list[str]): which canonical signal terms
258
+ fired, for audit / replay.
259
+ - ``topic_fingerprint`` (list[str]): for downstream cooldown
260
+ tracking.
261
+ """
262
+ if not text or not text.strip():
263
+ return {
264
+ "passed": False,
265
+ "reason": "empty_text",
266
+ "human_only": False,
267
+ "matched_signals": [],
268
+ "topic_fingerprint": [],
269
+ }
270
+
271
+ domain_matches = sorted({m.group(0).lower() for m in _DELIMIT_DOMAIN_RE.finditer(text)})
272
+ orbit_matches = sorted({m.group(0).lower() for m in _ORBIT_RE.finditer(text)})
273
+ has_tech_context = bool(_TECH_CONTEXT_RE.search(text))
274
+
275
+ matched_signals = sorted(set(domain_matches) | set(orbit_matches))
276
+ topic_fp = matched_signals[:]
277
+
278
+ # Cooldown check — runs FIRST among the pass paths so a thread that
279
+ # would have qualified by Delimit-domain signal is still rejected if
280
+ # the topic is already covered. (Pass-path checks below only run if
281
+ # the cooldown didn't kill the candidate.)
282
+ if recent_topics:
283
+ overlap = set(topic_fp) & recent_topics
284
+ if overlap:
285
+ return {
286
+ "passed": False,
287
+ "reason": "topic_cooldown",
288
+ "human_only": False,
289
+ "matched_signals": matched_signals,
290
+ "topic_fingerprint": topic_fp,
291
+ "cooldown_overlap": sorted(overlap),
292
+ }
293
+
294
+ # Pass path 1: direct Delimit-domain hit.
295
+ if domain_matches:
296
+ return {
297
+ "passed": True,
298
+ "reason": "delimit_domain",
299
+ "human_only": False,
300
+ "matched_signals": matched_signals,
301
+ "topic_fingerprint": topic_fp,
302
+ }
303
+
304
+ # Pass path 2: orbit hit + technical-context word.
305
+ if orbit_matches and has_tech_context:
306
+ return {
307
+ "passed": True,
308
+ "reason": "orbit_with_context",
309
+ "human_only": False,
310
+ "matched_signals": matched_signals,
311
+ "topic_fingerprint": topic_fp,
312
+ }
313
+
314
+ # Pass path 3: high-engagement opportunity-cost carve-out. Pass
315
+ # through but flag human_only so the caller doesn't auto-draft.
316
+ if engagement_score >= high_engagement_floor:
317
+ return {
318
+ "passed": True,
319
+ "reason": "high_engagement_human_only",
320
+ "human_only": True,
321
+ "matched_signals": matched_signals,
322
+ "topic_fingerprint": topic_fp,
323
+ }
324
+
325
+ # Default: reject.
326
+ return {
327
+ "passed": False,
328
+ "reason": "no_delimit_fit",
329
+ "human_only": False,
330
+ "matched_signals": matched_signals,
331
+ "topic_fingerprint": topic_fp,
332
+ }
333
+
334
+
335
+ # ── Audit logging helpers ──────────────────────────────────────────
336
+
337
+ def append_jsonl(path: Path, payload: Dict[str, Any]) -> None:
338
+ """Append a single JSON line to ``path``. Tolerant of write errors —
339
+ logging must never crash the caller's pipeline.
340
+ """
341
+ try:
342
+ path.parent.mkdir(parents=True, exist_ok=True)
343
+ with open(path, "a", encoding="utf-8") as fh:
344
+ fh.write(json.dumps(payload, ensure_ascii=False) + "\n")
345
+ except OSError as exc: # pragma: no cover — best-effort logging
346
+ logger.warning("fit_floor: failed to write %s: %s", path, exc)
347
+
348
+
349
+ __all__ = [
350
+ "DEFAULT_COOLDOWN_DAYS",
351
+ "DEFAULT_HIGH_ENGAGEMENT_FLOOR",
352
+ "SOCIAL_LOG",
353
+ "DELIMIT_DOMAIN_SIGNALS",
354
+ "ORBIT_SIGNALS",
355
+ "TECH_CONTEXT_SIGNALS",
356
+ "evaluate_fit",
357
+ "append_jsonl",
358
+ "_extract_topic_fingerprint",
359
+ "_recent_topic_fingerprints",
360
+ ]
@@ -0,0 +1,307 @@
1
+ """Persistent scan queue for the social distribution pipeline (LED-216 Phase 2).
2
+
3
+ The queue decouples target *scanning* from draft *generation*. Scanners append
4
+ discovered targets here; the draft phase claims pending entries, generates a
5
+ post, and marks them drafted (or failed). This allows the Reddit scanner to
6
+ take 120s without blocking X/HN/devto/etc., and lets the draft stage run
7
+ under its own timeout against a stable backlog.
8
+
9
+ Storage: append-only JSONL at ``~/.delimit/social_scan_queue.jsonl``. Each
10
+ line is a single queue entry with the schema:
11
+
12
+ {
13
+ "queued_at": "2026-05-02T15:30:00Z",
14
+ "platform": "reddit",
15
+ "fingerprint": "reddit:vibecoding:abc123",
16
+ "source_id": "...",
17
+ "raw": { /* full target dict from the scanner */ },
18
+ "status": "pending", # pending | drafted | drafted_failed | expired
19
+ "drafted_at": null,
20
+ "draft_id": null,
21
+ "error": null
22
+ }
23
+
24
+ All writes are atomic (tmp + rename for full rewrites; append-only for the
25
+ hot path). Malformed lines are skipped with a warning rather than crashing
26
+ the whole queue. Dedupe-on-enqueue avoids re-queuing a fingerprint that is
27
+ already pending or was drafted within the last 7 days.
28
+ """
29
+ from __future__ import annotations
30
+
31
+ import json
32
+ import logging
33
+ import os
34
+ import tempfile
35
+ from datetime import datetime, timedelta, timezone
36
+ from pathlib import Path
37
+ from typing import Any, Dict, Iterator, List, Optional
38
+
39
+ logger = logging.getLogger(__name__)
40
+
41
+
42
+ # ---------------------------------------------------------------------------
43
+ # Configuration
44
+ # ---------------------------------------------------------------------------
45
+
46
+ QUEUE_FILE = Path.home() / ".delimit" / "social_scan_queue.jsonl"
47
+
48
+ DEFAULT_DEDUPE_HOURS = 24 * 7 # don't re-queue a fingerprint within 7 days
49
+ DEFAULT_EXPIRE_HOURS = 24 * 7 # entries older than 7 days roll to expired
50
+
51
+ PENDING = "pending"
52
+ DRAFTED = "drafted"
53
+ DRAFTED_FAILED = "drafted_failed"
54
+ EXPIRED = "expired"
55
+
56
+
57
+ # ---------------------------------------------------------------------------
58
+ # Internal helpers
59
+ # ---------------------------------------------------------------------------
60
+
61
+
62
+ def _queue_path() -> Path:
63
+ """Resolve the queue file path. Indirection lets tests monkeypatch it."""
64
+ return QUEUE_FILE
65
+
66
+
67
+ def _now_iso() -> str:
68
+ return datetime.now(timezone.utc).isoformat()
69
+
70
+
71
+ def _parse_iso(value: Optional[str]) -> Optional[datetime]:
72
+ if not value:
73
+ return None
74
+ try:
75
+ # ``fromisoformat`` accepts both naive and tz-aware strings.
76
+ dt = datetime.fromisoformat(value.replace("Z", "+00:00"))
77
+ except (TypeError, ValueError):
78
+ return None
79
+ if dt.tzinfo is None:
80
+ dt = dt.replace(tzinfo=timezone.utc)
81
+ return dt
82
+
83
+
84
+ def _ensure_parent(path: Path) -> None:
85
+ path.parent.mkdir(parents=True, exist_ok=True)
86
+
87
+
88
+ def _iter_entries(path: Optional[Path] = None) -> Iterator[Dict[str, Any]]:
89
+ """Yield each parsable JSON line from the queue. Skip and log malformed."""
90
+ p = path or _queue_path()
91
+ if not p.exists():
92
+ return
93
+ try:
94
+ with open(p, "r", encoding="utf-8") as fh:
95
+ for ln_no, line in enumerate(fh, start=1):
96
+ line = line.strip()
97
+ if not line:
98
+ continue
99
+ try:
100
+ yield json.loads(line)
101
+ except (json.JSONDecodeError, ValueError) as exc:
102
+ logger.warning(
103
+ "social_queue: skipping malformed line %d in %s: %s",
104
+ ln_no, p, exc,
105
+ )
106
+ continue
107
+ except OSError as exc:
108
+ logger.warning("social_queue: failed to read %s: %s", p, exc)
109
+
110
+
111
+ def _atomic_rewrite(entries: List[Dict[str, Any]], path: Optional[Path] = None) -> None:
112
+ """Rewrite the queue file atomically via tmp + os.replace."""
113
+ p = path or _queue_path()
114
+ _ensure_parent(p)
115
+ fd, tmp_path = tempfile.mkstemp(prefix=".social_scan_queue.", dir=str(p.parent))
116
+ try:
117
+ with os.fdopen(fd, "w", encoding="utf-8") as fh:
118
+ for entry in entries:
119
+ fh.write(json.dumps(entry, sort_keys=True) + "\n")
120
+ os.replace(tmp_path, p)
121
+ except Exception:
122
+ try:
123
+ os.unlink(tmp_path)
124
+ except OSError:
125
+ pass
126
+ raise
127
+
128
+
129
+ def _append_atomic(entry: Dict[str, Any], path: Optional[Path] = None) -> None:
130
+ """Append a single line. fsync-friendly: opens in 'a', writes, closes."""
131
+ p = path or _queue_path()
132
+ _ensure_parent(p)
133
+ line = json.dumps(entry, sort_keys=True) + "\n"
134
+ with open(p, "a", encoding="utf-8") as fh:
135
+ fh.write(line)
136
+
137
+
138
+ # ---------------------------------------------------------------------------
139
+ # Public API
140
+ # ---------------------------------------------------------------------------
141
+
142
+
143
+ def enqueue(target: Dict[str, Any], dedupe_hours: int = DEFAULT_DEDUPE_HOURS) -> Optional[str]:
144
+ """Append a scanner target to the queue.
145
+
146
+ Returns the fingerprint string on success, or ``None`` when the target
147
+ was deduped (already pending, or drafted within the dedupe window).
148
+ Targets without a ``fingerprint`` field are rejected.
149
+ """
150
+ fingerprint = (target or {}).get("fingerprint")
151
+ if not fingerprint:
152
+ logger.warning("social_queue.enqueue: target missing fingerprint, skipping")
153
+ return None
154
+
155
+ now = datetime.now(timezone.utc)
156
+ cutoff = now - timedelta(hours=dedupe_hours)
157
+
158
+ # Dedupe: scan existing entries for a recent matching fingerprint.
159
+ for existing in _iter_entries():
160
+ if existing.get("fingerprint") != fingerprint:
161
+ continue
162
+ status = existing.get("status")
163
+ if status == PENDING:
164
+ return None
165
+ if status == DRAFTED:
166
+ ts = _parse_iso(existing.get("drafted_at") or existing.get("queued_at"))
167
+ if ts and ts >= cutoff:
168
+ return None
169
+ # drafted_failed or expired or older drafted → allow re-enqueue
170
+
171
+ entry = {
172
+ "queued_at": _now_iso(),
173
+ "platform": target.get("platform", ""),
174
+ "fingerprint": fingerprint,
175
+ "source_id": target.get("source_id", ""),
176
+ "raw": target,
177
+ "status": PENDING,
178
+ "drafted_at": None,
179
+ "draft_id": None,
180
+ "error": None,
181
+ }
182
+ _append_atomic(entry)
183
+ return fingerprint
184
+
185
+
186
+ def claim_pending(platform: Optional[str] = None, limit: int = 20) -> List[Dict[str, Any]]:
187
+ """Return up to ``limit`` pending entries, optionally filtered by platform.
188
+
189
+ Read-only — does NOT mutate state. The caller must call ``mark_drafted``
190
+ or ``mark_failed`` once it processes the entry. Returns oldest-first
191
+ (FIFO) so the queue drains in scan order.
192
+ """
193
+ out: List[Dict[str, Any]] = []
194
+ # Build a list because we want oldest-first; JSONL append order = FIFO.
195
+ for entry in _iter_entries():
196
+ if entry.get("status") != PENDING:
197
+ continue
198
+ if platform and entry.get("platform") != platform:
199
+ continue
200
+ out.append(entry)
201
+ if len(out) >= limit:
202
+ break
203
+ return out
204
+
205
+
206
+ def _update_entry(fingerprint: str, mutator) -> bool:
207
+ """Apply ``mutator(entry) -> entry`` to the most recent entry matching
208
+ fingerprint with status==pending. Returns True on hit, False otherwise.
209
+ Rewrites the queue atomically.
210
+ """
211
+ entries = list(_iter_entries())
212
+ target_idx = -1
213
+ for idx in range(len(entries) - 1, -1, -1):
214
+ e = entries[idx]
215
+ if e.get("fingerprint") == fingerprint and e.get("status") == PENDING:
216
+ target_idx = idx
217
+ break
218
+ if target_idx < 0:
219
+ return False
220
+ entries[target_idx] = mutator(entries[target_idx])
221
+ _atomic_rewrite(entries)
222
+ return True
223
+
224
+
225
+ def mark_drafted(fingerprint: str, draft_id: str) -> bool:
226
+ """Mark a pending entry as drafted. Returns True on hit."""
227
+ def _mutate(entry: Dict[str, Any]) -> Dict[str, Any]:
228
+ entry["status"] = DRAFTED
229
+ entry["drafted_at"] = _now_iso()
230
+ entry["draft_id"] = draft_id or ""
231
+ entry["error"] = None
232
+ return entry
233
+ return _update_entry(fingerprint, _mutate)
234
+
235
+
236
+ def mark_failed(fingerprint: str, error: str) -> bool:
237
+ """Mark a pending entry as drafted_failed with the supplied error string."""
238
+ def _mutate(entry: Dict[str, Any]) -> Dict[str, Any]:
239
+ entry["status"] = DRAFTED_FAILED
240
+ entry["drafted_at"] = _now_iso()
241
+ entry["error"] = (error or "")[:500]
242
+ return entry
243
+ return _update_entry(fingerprint, _mutate)
244
+
245
+
246
+ def expire_older_than(hours: int = DEFAULT_EXPIRE_HOURS) -> int:
247
+ """Roll any pending entry older than ``hours`` into the expired status.
248
+
249
+ Returns the count expired. Entries past the window are still kept in the
250
+ file (for audit), but their status flips so they no longer surface to
251
+ ``claim_pending``.
252
+ """
253
+ cutoff = datetime.now(timezone.utc) - timedelta(hours=hours)
254
+ entries = list(_iter_entries())
255
+ if not entries:
256
+ return 0
257
+ flipped = 0
258
+ for entry in entries:
259
+ if entry.get("status") != PENDING:
260
+ continue
261
+ ts = _parse_iso(entry.get("queued_at"))
262
+ if ts is None or ts >= cutoff:
263
+ continue
264
+ entry["status"] = EXPIRED
265
+ entry["error"] = "expired_after_window"
266
+ flipped += 1
267
+ if flipped:
268
+ _atomic_rewrite(entries)
269
+ return flipped
270
+
271
+
272
+ def queue_stats() -> Dict[str, Any]:
273
+ """Return a snapshot count: ``{pending, drafted, drafted_failed, expired,
274
+ by_platform: {platform: {pending, drafted, ...}}}``.
275
+ """
276
+ totals = {PENDING: 0, DRAFTED: 0, DRAFTED_FAILED: 0, EXPIRED: 0}
277
+ by_platform: Dict[str, Dict[str, int]] = {}
278
+ for entry in _iter_entries():
279
+ status = entry.get("status") or "unknown"
280
+ if status in totals:
281
+ totals[status] += 1
282
+ plat = entry.get("platform") or "unknown"
283
+ plat_bucket = by_platform.setdefault(plat, {PENDING: 0, DRAFTED: 0, DRAFTED_FAILED: 0, EXPIRED: 0})
284
+ if status in plat_bucket:
285
+ plat_bucket[status] += 1
286
+ return {
287
+ "pending": totals[PENDING],
288
+ "drafted": totals[DRAFTED],
289
+ "drafted_failed": totals[DRAFTED_FAILED],
290
+ "expired": totals[EXPIRED],
291
+ "by_platform": by_platform,
292
+ }
293
+
294
+
295
+ __all__ = [
296
+ "QUEUE_FILE",
297
+ "PENDING",
298
+ "DRAFTED",
299
+ "DRAFTED_FAILED",
300
+ "EXPIRED",
301
+ "enqueue",
302
+ "claim_pending",
303
+ "mark_drafted",
304
+ "mark_failed",
305
+ "expire_older_than",
306
+ "queue_stats",
307
+ ]
@@ -488,8 +488,20 @@ def sync_social_draft(draft: dict):
488
488
  client.table("social_drafts").upsert(row).execute()
489
489
 
490
490
  # ntfy on NEW pending drafts only, dedupe via the WO sent-marker file
491
- # (reused — scoped by draft_id vs wo_id so no collision)
492
- if row["status"] == "pending":
491
+ # (reused — scoped by draft_id vs wo_id so no collision).
492
+ # LED-216 Phase 1: suppress ntfy for capability-drift drafts and the
493
+ # existing rule-blocked / placeholder buckets — the row still lands
494
+ # in Supabase for audit, but the founder is NOT paged on a draft
495
+ # we already know to be unfit for posting.
496
+ _quality = (row.get("quality") or "").lower()
497
+ _suppressed_qualities = {
498
+ "rejected_capability_drift",
499
+ "placeholder",
500
+ }
501
+ if (
502
+ row["status"] == "pending"
503
+ and _quality not in _suppressed_qualities
504
+ ):
493
505
  try:
494
506
  _push_draft_notification(row)
495
507
  except Exception as exc: