delimit-cli 4.5.2 → 4.5.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,360 @@
1
+ """Delimit-fit floor (LED-1240 part B — selectivity bar, 2026-05-05).
2
+
3
+ The validator (``capability_validator``) hard-fails generic-claim drafts
4
+ AFTER the LLM has generated them. That catches drift but burns tokens and
5
+ surfaces weak drafts to founder review. This module pushes the bar
6
+ upstream: a thread/target must pass the Delimit-fit floor BEFORE we even
7
+ draft for it. If the floor rejects, we abstain. Abstain > weak draft.
8
+
9
+ The fit floor accepts a thread when ANY of these holds:
10
+
11
+ 1. The text matches a **Delimit-domain signal** (openapi, breaking change,
12
+ semver, schema diff, merge gate, mcp server, ci governance, soc 2, …).
13
+ 2. The text matches an **AI-coding-orbit signal** (claude code, codex cli,
14
+ cursor, gemini cli, mcp tool, …) AND a **technical-context word**
15
+ (ship, merge, deploy, release, PR, review, audit, test, …). Orbit-only
16
+ mentions without technical context (e.g. "I love Claude Code!") are
17
+ not enough — that's the signal that produces noise drafts.
18
+ 3. The engagement score is high enough that abstaining has clear
19
+ opportunity cost (default ≥ 50). High-engagement-but-off-topic threads
20
+ pass through with ``human_only=True`` so the orchestrator can choose
21
+ to surface them for human review without auto-drafting.
22
+
23
+ A failing thread returns a dict with ``reason="no_delimit_fit"`` and is
24
+ NOT drafted. Logging is the caller's job — this module is pure logic.
25
+
26
+ Topic-coverage cooldown (``recent_topic_drafted``) is a separate function
27
+ that consults ``social_log.jsonl`` (the same source ``x_ranker`` uses for
28
+ author dedupe). If we drafted on the same topic within the cooldown
29
+ window (default 7 days), the next thread on that topic is rejected with
30
+ ``reason="topic_cooldown"``.
31
+
32
+ Centralizing this logic here means ``x_ranker`` (X candidate ordering) and
33
+ ``social_target`` (cross-platform target processing) both consume the same
34
+ gate — no drift between surfaces.
35
+ """
36
+
37
+ from __future__ import annotations
38
+
39
+ import json
40
+ import logging
41
+ import re
42
+ from datetime import datetime, timedelta, timezone
43
+ from pathlib import Path
44
+ from typing import Any, Dict, Iterable, Optional, Set, Tuple
45
+
46
+ logger = logging.getLogger(__name__)
47
+
48
+ # Default cooldown window for topic coverage. Founder directive (2026-05-05):
49
+ # "if we've already drafted on a similar topic in the last 7 days, abstain on
50
+ # the next one to avoid spam-pattern detection."
51
+ DEFAULT_COOLDOWN_DAYS = 7
52
+
53
+ # Default high-engagement opportunity-cost threshold. Threads above this score
54
+ # pass the fit floor even without keyword match, but with ``human_only=True``
55
+ # so they don't auto-draft.
56
+ DEFAULT_HIGH_ENGAGEMENT_FLOOR = 50.0
57
+
58
+ # Same SOCIAL_LOG path as ai.x_ranker — single source of truth for "what we
59
+ # recently posted to". Indirection via a getter so tests can monkeypatch.
60
+ SOCIAL_LOG = Path.home() / ".delimit" / "social_log.jsonl"
61
+
62
+
63
+ def _social_log_path() -> Path:
64
+ """Indirection for tests."""
65
+ return SOCIAL_LOG
66
+
67
+
68
+ # ── Signal vocabularies ──────────────────────────────────────────────
69
+
70
+ # Direct Delimit-domain signals. These are the canonical surfaces the
71
+ # product addresses. Any one of these is sufficient by itself.
72
+ #
73
+ # Keep this list narrow and concrete — it's the difference between
74
+ # "drafts that have something to say" and "drafts that name-drop a
75
+ # topic". Per the LED-1240 directive: abstain > weak draft.
76
+ DELIMIT_DOMAIN_SIGNALS = (
77
+ # API governance
78
+ r"\bopenapi\b",
79
+ r"\bopen[\s-]?api\b",
80
+ r"\bapi[\s-]?versioning\b",
81
+ r"\bbreaking[\s-]?change(?:s)?\b",
82
+ r"\bsemver\b",
83
+ r"\bsemantic[\s-]?versioning\b",
84
+ r"\bschema[\s-]?diff\b",
85
+ r"\bspec[\s-]?validation\b",
86
+ r"\bapi[\s-]?spec(?:s)?\b",
87
+ r"\bapi[\s-]?contract(?:s)?\b",
88
+ r"\bapi[\s-]?migration(?:s)?\b",
89
+ r"\brelease[\s-]?notes\b",
90
+ # Merge / governance
91
+ r"\bmerge[\s-]?gate\b",
92
+ r"\bci[\s-]?governance\b",
93
+ r"\bapi[\s-]?governance\b",
94
+ # MCP
95
+ r"\bmcp[\s-]?server(?:s)?\b",
96
+ r"\bmodel[\s-]?context[\s-]?protocol\b",
97
+ # Compliance / audit
98
+ r"\bsoc[\s-]?2\b",
99
+ r"\bcompliance\b",
100
+ r"\bauditor(?:s)?\b",
101
+ r"\battestation(?:s)?\b",
102
+ r"\baudit[\s-]?trail\b",
103
+ )
104
+
105
+ # Adjacent / orbit signals — these are common in the AI-coding-assistant
106
+ # ecosystem we live in. By themselves they're noise; combined with a
107
+ # technical-context word they're a real signal.
108
+ ORBIT_SIGNALS = (
109
+ r"\bclaude[\s-]?code\b",
110
+ r"\bcodex(?:[\s-]?cli)?\b",
111
+ r"\bcursor(?:[\s-]?ai)?\b",
112
+ r"\bgemini[\s-]?cli\b",
113
+ r"\bmcp[\s-]?tool(?:s)?\b",
114
+ r"\bmcp\b",
115
+ r"\bai[\s-]?coding[\s-]?assistant(?:s)?\b",
116
+ r"\bai[\s-]?agent(?:s)?\b",
117
+ r"\bai[\s-]?coder(?:s)?\b",
118
+ )
119
+
120
+ # Technical-context words — these qualify an orbit mention as real signal.
121
+ # The list is what an engineer would actually say when describing an
122
+ # integration / shipping / review / audit moment.
123
+ TECH_CONTEXT_SIGNALS = (
124
+ r"\bship(?:s|ped|ping)?\b",
125
+ r"\bmerg(?:e|ed|es|ing)\b",
126
+ r"\bdeploy(?:s|ed|ing|ment)?\b",
127
+ r"\brelease(?:s|d|ing)?\b",
128
+ r"\bpr(?:s)?\b",
129
+ r"\bpull[\s-]?request(?:s)?\b",
130
+ r"\breview(?:s|ed|ing)?\b",
131
+ r"\baudit(?:s|ed|ing)?\b",
132
+ r"\btest(?:s|ed|ing)?\b",
133
+ r"\bci(?:/cd)?\b",
134
+ r"\bgithub[\s-]?action(?:s)?\b",
135
+ r"\bspec(?:s)?\b",
136
+ r"\bschema\b",
137
+ r"\bbreaking\b",
138
+ r"\bregression(?:s)?\b",
139
+ )
140
+
141
+
142
+ # Compiled at module load — case-insensitive, matched against full text.
143
+ _DELIMIT_DOMAIN_RE = re.compile("|".join(DELIMIT_DOMAIN_SIGNALS), re.IGNORECASE)
144
+ _ORBIT_RE = re.compile("|".join(ORBIT_SIGNALS), re.IGNORECASE)
145
+ _TECH_CONTEXT_RE = re.compile("|".join(TECH_CONTEXT_SIGNALS), re.IGNORECASE)
146
+
147
+
148
+ # ── Topic extraction (for cooldown) ─────────────────────────────────
149
+
150
+ # Lightweight topic fingerprint: the union of matched Delimit-domain signals
151
+ # + matched orbit signals (lowercased, deduped). If two threads share a
152
+ # matched signal AND we drafted on one inside the cooldown window, the
153
+ # second one is rejected. This avoids 5-tweets-on-MCP-in-3-days drafts.
154
+ def _extract_topic_fingerprint(text: str) -> Set[str]:
155
+ """Return the set of canonical signal terms present in ``text``.
156
+
157
+ Used as the key for the topic-coverage cooldown. Empty string and
158
+ ``None`` return an empty set — those threads can never collide.
159
+ """
160
+ if not text:
161
+ return set()
162
+ out: Set[str] = set()
163
+ for m in _DELIMIT_DOMAIN_RE.finditer(text):
164
+ out.add(m.group(0).lower())
165
+ for m in _ORBIT_RE.finditer(text):
166
+ out.add(m.group(0).lower())
167
+ return out
168
+
169
+
170
+ def _parse_iso(value: Optional[str]) -> Optional[datetime]:
171
+ if not value:
172
+ return None
173
+ try:
174
+ dt = datetime.fromisoformat(value.replace("Z", "+00:00"))
175
+ except (TypeError, ValueError):
176
+ return None
177
+ if dt.tzinfo is None:
178
+ dt = dt.replace(tzinfo=timezone.utc)
179
+ return dt
180
+
181
+
182
+ def _recent_topic_fingerprints(
183
+ cooldown_days: int = DEFAULT_COOLDOWN_DAYS,
184
+ log_path: Optional[Path] = None,
185
+ ) -> Set[str]:
186
+ """Return the union of topic fingerprints found in ``social_log.jsonl``
187
+ within the cooldown window. Tolerant of malformed lines.
188
+ """
189
+ p = log_path or _social_log_path()
190
+ if not p.exists():
191
+ return set()
192
+ cutoff = datetime.now(timezone.utc) - timedelta(days=cooldown_days)
193
+ seen: Set[str] = set()
194
+ try:
195
+ with open(p, "r", encoding="utf-8") as fh:
196
+ for line in fh:
197
+ line = line.strip()
198
+ if not line:
199
+ continue
200
+ try:
201
+ entry = json.loads(line)
202
+ except (json.JSONDecodeError, ValueError):
203
+ continue
204
+ ts = _parse_iso(entry.get("ts"))
205
+ if ts is None or ts < cutoff:
206
+ continue
207
+ # Use the post body + thread title (when present) as the
208
+ # topic surface. Reddit entries log the thread title
209
+ # separately; X entries don't have one but the post body
210
+ # carries the @-prefixed reply context.
211
+ blob = " ".join(
212
+ [
213
+ entry.get("text") or "",
214
+ entry.get("thread_title") or "",
215
+ ]
216
+ )
217
+ seen.update(_extract_topic_fingerprint(blob))
218
+ except OSError as exc:
219
+ logger.warning("fit_floor: failed to read %s: %s", p, exc)
220
+ return seen
221
+
222
+
223
+ # ── Fit-floor decision ──────────────────────────────────────────────
224
+
225
+
226
+ def evaluate_fit(
227
+ text: str,
228
+ engagement_score: float = 0.0,
229
+ high_engagement_floor: float = DEFAULT_HIGH_ENGAGEMENT_FLOOR,
230
+ recent_topics: Optional[Set[str]] = None,
231
+ ) -> Dict[str, Any]:
232
+ """Decide whether a thread/target passes the Delimit-fit floor.
233
+
234
+ Args:
235
+ text: full thread / target text. Empty string returns a hard
236
+ rejection (cannot evaluate).
237
+ engagement_score: pre-computed engagement score (e.g. from
238
+ ``x_ranker.score_target``). Used for the opportunity-cost
239
+ carve-out: very high scores pass the floor even without a
240
+ keyword match, but with ``human_only=True``.
241
+ high_engagement_floor: score threshold for the carve-out.
242
+ recent_topics: set of topic fingerprints we've drafted on inside
243
+ the cooldown window. When the candidate's matched signals
244
+ intersect this set, the candidate is rejected with
245
+ ``reason="topic_cooldown"``. ``None`` skips the cooldown
246
+ check (callers that don't want it just pass ``set()`` or
247
+ ``None``).
248
+
249
+ Returns:
250
+ Dict with:
251
+ - ``passed`` (bool)
252
+ - ``reason`` (str): "delimit_domain" | "orbit_with_context"
253
+ | "high_engagement_human_only" | "no_delimit_fit"
254
+ | "topic_cooldown" | "empty_text"
255
+ - ``human_only`` (bool): True only for the high-engagement
256
+ carve-out — caller should NOT auto-draft.
257
+ - ``matched_signals`` (list[str]): which canonical signal terms
258
+ fired, for audit / replay.
259
+ - ``topic_fingerprint`` (list[str]): for downstream cooldown
260
+ tracking.
261
+ """
262
+ if not text or not text.strip():
263
+ return {
264
+ "passed": False,
265
+ "reason": "empty_text",
266
+ "human_only": False,
267
+ "matched_signals": [],
268
+ "topic_fingerprint": [],
269
+ }
270
+
271
+ domain_matches = sorted({m.group(0).lower() for m in _DELIMIT_DOMAIN_RE.finditer(text)})
272
+ orbit_matches = sorted({m.group(0).lower() for m in _ORBIT_RE.finditer(text)})
273
+ has_tech_context = bool(_TECH_CONTEXT_RE.search(text))
274
+
275
+ matched_signals = sorted(set(domain_matches) | set(orbit_matches))
276
+ topic_fp = matched_signals[:]
277
+
278
+ # Cooldown check — runs FIRST among the pass paths so a thread that
279
+ # would have qualified by Delimit-domain signal is still rejected if
280
+ # the topic is already covered. (Pass-path checks below only run if
281
+ # the cooldown didn't kill the candidate.)
282
+ if recent_topics:
283
+ overlap = set(topic_fp) & recent_topics
284
+ if overlap:
285
+ return {
286
+ "passed": False,
287
+ "reason": "topic_cooldown",
288
+ "human_only": False,
289
+ "matched_signals": matched_signals,
290
+ "topic_fingerprint": topic_fp,
291
+ "cooldown_overlap": sorted(overlap),
292
+ }
293
+
294
+ # Pass path 1: direct Delimit-domain hit.
295
+ if domain_matches:
296
+ return {
297
+ "passed": True,
298
+ "reason": "delimit_domain",
299
+ "human_only": False,
300
+ "matched_signals": matched_signals,
301
+ "topic_fingerprint": topic_fp,
302
+ }
303
+
304
+ # Pass path 2: orbit hit + technical-context word.
305
+ if orbit_matches and has_tech_context:
306
+ return {
307
+ "passed": True,
308
+ "reason": "orbit_with_context",
309
+ "human_only": False,
310
+ "matched_signals": matched_signals,
311
+ "topic_fingerprint": topic_fp,
312
+ }
313
+
314
+ # Pass path 3: high-engagement opportunity-cost carve-out. Pass
315
+ # through but flag human_only so the caller doesn't auto-draft.
316
+ if engagement_score >= high_engagement_floor:
317
+ return {
318
+ "passed": True,
319
+ "reason": "high_engagement_human_only",
320
+ "human_only": True,
321
+ "matched_signals": matched_signals,
322
+ "topic_fingerprint": topic_fp,
323
+ }
324
+
325
+ # Default: reject.
326
+ return {
327
+ "passed": False,
328
+ "reason": "no_delimit_fit",
329
+ "human_only": False,
330
+ "matched_signals": matched_signals,
331
+ "topic_fingerprint": topic_fp,
332
+ }
333
+
334
+
335
+ # ── Audit logging helpers ──────────────────────────────────────────
336
+
337
+ def append_jsonl(path: Path, payload: Dict[str, Any]) -> None:
338
+ """Append a single JSON line to ``path``. Tolerant of write errors —
339
+ logging must never crash the caller's pipeline.
340
+ """
341
+ try:
342
+ path.parent.mkdir(parents=True, exist_ok=True)
343
+ with open(path, "a", encoding="utf-8") as fh:
344
+ fh.write(json.dumps(payload, ensure_ascii=False) + "\n")
345
+ except OSError as exc: # pragma: no cover — best-effort logging
346
+ logger.warning("fit_floor: failed to write %s: %s", path, exc)
347
+
348
+
349
+ __all__ = [
350
+ "DEFAULT_COOLDOWN_DAYS",
351
+ "DEFAULT_HIGH_ENGAGEMENT_FLOOR",
352
+ "SOCIAL_LOG",
353
+ "DELIMIT_DOMAIN_SIGNALS",
354
+ "ORBIT_SIGNALS",
355
+ "TECH_CONTEXT_SIGNALS",
356
+ "evaluate_fit",
357
+ "append_jsonl",
358
+ "_extract_topic_fingerprint",
359
+ "_recent_topic_fingerprints",
360
+ ]
@@ -0,0 +1,14 @@
1
+ """Vendor-news riff system (LED-1250).
2
+
3
+ Sensor + drafter that detects high-engagement vendor announcements on X
4
+ and auto-drafts a brand-voice Delimit-POV riff that rides the news cycle
5
+ for algorithm boost.
6
+
7
+ Public surface:
8
+ from ai.vendor_news import scan_vendor_news, draft_vendor_riff
9
+ """
10
+
11
+ from ai.vendor_news.sensor import scan_vendor_news
12
+ from ai.vendor_news.drafter import draft_vendor_riff
13
+
14
+ __all__ = ["scan_vendor_news", "draft_vendor_riff"]