delimit-cli 4.5.1 → 4.5.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. package/CHANGELOG.md +87 -0
  2. package/README.md +15 -5
  3. package/bin/delimit-cli.js +109 -24
  4. package/gateway/ai/content_engine.py +3 -4
  5. package/gateway/ai/inbox_classifier.py +215 -0
  6. package/gateway/ai/integrations/opensage_wrapper.py +4 -1
  7. package/gateway/ai/ledger_manager.py +218 -38
  8. package/gateway/ai/license.py +26 -0
  9. package/gateway/ai/notify.py +68 -3
  10. package/gateway/ai/reddit_proxy.py +93 -15
  11. package/gateway/ai/reddit_scanner.py +36 -18
  12. package/gateway/ai/remote_resolve.py +422 -0
  13. package/gateway/ai/server.py +301 -117
  14. package/gateway/ai/social_capability/__init__.py +6 -0
  15. package/gateway/ai/social_capability/capability_validator.py +367 -0
  16. package/gateway/ai/social_capability/current_capabilities.yaml +95 -0
  17. package/gateway/ai/social_capability/fit_floor.py +360 -0
  18. package/gateway/ai/social_queue.py +307 -0
  19. package/gateway/ai/supabase_sync.py +14 -2
  20. package/gateway/ai/swarm.py +29 -11
  21. package/gateway/ai/tui.py +6 -2
  22. package/gateway/ai/vendor_news/__init__.py +14 -0
  23. package/gateway/ai/vendor_news/drafter.py +562 -0
  24. package/gateway/ai/vendor_news/sensor.py +509 -0
  25. package/gateway/ai/vendor_news/watchlist.yaml +71 -0
  26. package/gateway/ai/x_ranker.py +417 -0
  27. package/lib/attest-mcp.js +487 -0
  28. package/lib/attest-telemetry.js +48 -0
  29. package/lib/delimit-home.js +35 -0
  30. package/lib/delimit-template.js +14 -0
  31. package/package.json +25 -3
  32. package/scripts/postinstall.js +89 -40
  33. package/adapters/codex-security.js +0 -64
  34. package/adapters/codex-skill.js +0 -78
  35. package/gateway/ai/content_grounding/__init__.py +0 -98
  36. package/gateway/ai/content_grounding/build.py +0 -350
  37. package/gateway/ai/content_grounding/consume.py +0 -280
  38. package/gateway/ai/content_grounding/features.py +0 -218
  39. package/gateway/ai/content_grounding/fixtures/fail/01_missing_evidence.json +0 -9
  40. package/gateway/ai/content_grounding/fixtures/fail/02_unknown_evidence_prefix.json +0 -9
  41. package/gateway/ai/content_grounding/fixtures/fail/03_banned_comparative.json +0 -17
  42. package/gateway/ai/content_grounding/fixtures/fail/04_banned_adoption.json +0 -17
  43. package/gateway/ai/content_grounding/fixtures/fail/05_aggregate_no_numeric.json +0 -17
  44. package/gateway/ai/content_grounding/fixtures/fail/06_unversioned_inference_rule.json +0 -18
  45. package/gateway/ai/content_grounding/fixtures/pass/01_feature_shipped.json +0 -18
  46. package/gateway/ai/content_grounding/fixtures/pass/02_aggregate_claim.json +0 -23
  47. package/gateway/ai/content_grounding/fixtures/pass/03_attestation.json +0 -16
  48. package/gateway/ai/content_grounding/schemas/claim.schema.json +0 -40
  49. package/gateway/ai/content_grounding/schemas/event.schema.json +0 -23
  50. package/gateway/ai/content_grounding/schemas.py +0 -276
  51. package/gateway/ai/content_grounding/telemetry.py +0 -221
  52. package/gateway/ai/inbox_drafts/__init__.py +0 -61
  53. package/gateway/ai/inbox_drafts/registry.py +0 -412
  54. package/gateway/ai/inbox_drafts/schema.py +0 -374
  55. package/gateway/ai/inbox_executor.py +0 -565
@@ -0,0 +1,417 @@
1
+ """X engagement ranker (LED-216 Phase 2 + LED-1240 part B selectivity bar).
2
+
3
+ Filters and orders X (Twitter) candidate posts so we reply to the highest-
4
+ yield originals only. The score formula and the 7-day author dedupe are the
5
+ panel's Q4 conclusion: Codex's age-normalized formula plus Opus's API filters
6
+ (``-is:retweet -is:reply lang:en``) and the founder's anti-spammy posture
7
+ ("don't hammer the same 5 accounts daily").
8
+
9
+ Score formula (per the LED-216 Phase 2 directive):
10
+
11
+ score = (likes + 2 * retweets + 0.5 * replies + 3 * quotes) / max(age_hours, 1)
12
+
13
+ Filter pipeline applied in order:
14
+ 1. ``is_op`` → drop reply-chain targets (we only reply to OPs)
15
+ 2. ``lang == 'en'`` → drop non-English (US/UK builder community is the wedge)
16
+ 3. ``is_retweet == False`` → drop retweets
17
+ 4. dedupe authors we replied to in the last ``replied_authors_window_hours``
18
+ 5. **Engagement-score floor** (LED-1240 part B) — drop low-yield threads
19
+ outright. Configured via ``MIN_ENGAGEMENT_SCORE`` (default 1.5, was 0).
20
+ 6. **Delimit-fit floor** (LED-1240 part B) — drop threads that don't
21
+ match Delimit-domain or orbit-with-context signals. High-engagement
22
+ threads above ``HIGH_ENGAGEMENT_OPPORTUNITY_COST`` pass through but
23
+ are flagged ``_human_only=True`` so callers do NOT auto-draft.
24
+ 7. **Topic-coverage cooldown** (LED-1240 part B) — drop threads whose
25
+ matched signals overlap topics we drafted on inside the last 7 days.
26
+ 8. sort by score DESC
27
+
28
+ Rejected candidates are logged to ``~/.delimit/x_rejected_targets.jsonl``
29
+ (append-only, one JSON line per rejection) so we can audit the bar over
30
+ time without changing the return shape.
31
+
32
+ Tolerant defaults: any field missing on a target is treated as "do not drop"
33
+ so partial Twttr241 payloads still rank instead of being silently filtered.
34
+ """
35
+ from __future__ import annotations
36
+
37
+ import json
38
+ import logging
39
+ from datetime import datetime, timedelta, timezone
40
+ from pathlib import Path
41
+ from typing import Any, Dict, Iterable, List, Optional, Set
42
+
43
+ logger = logging.getLogger(__name__)
44
+
45
+
46
+ SOCIAL_LOG = Path.home() / ".delimit" / "social_log.jsonl"
47
+ REJECTED_TARGETS_LOG = Path.home() / ".delimit" / "x_rejected_targets.jsonl"
48
+
49
+ DEFAULT_WINDOW_HOURS = 24 * 7 # 7 days, per founder's anti-spammy directive
50
+
51
+ # LED-1240 part B selectivity bar.
52
+ # Founder directive 2026-05-05: raise the engagement floor and add a fit
53
+ # floor BEFORE we waste LLM tokens drafting for noise-grade threads.
54
+ #
55
+ # Raising MIN_ENGAGEMENT_SCORE from 0 → 1.5 cuts the long tail of
56
+ # ``_rank_score=0.0`` candidates we observed in the recent X target log
57
+ # (every recent X target with 0 likes / 0 retweets had score=0.0). The
58
+ # threshold is conservative — a single-hour-old post with 1.5 likes still
59
+ # passes — but eliminates the zero-engagement floor.
60
+ MIN_ENGAGEMENT_SCORE = 1.5
61
+
62
+ # Threads above this score pass the Delimit-fit floor even without a
63
+ # keyword match, but are flagged ``_human_only=True`` so the caller does
64
+ # NOT auto-draft. The orchestrator decides whether to surface them.
65
+ HIGH_ENGAGEMENT_OPPORTUNITY_COST = 50.0
66
+
67
+ # Topic-coverage cooldown — same window as author dedupe (7 days).
68
+ DEFAULT_COOLDOWN_DAYS = 7
69
+
70
+
71
+ # ---------------------------------------------------------------------------
72
+ # Helpers
73
+ # ---------------------------------------------------------------------------
74
+
75
+
76
+ def _social_log_path() -> Path:
77
+ """Indirection so tests can monkeypatch the log location."""
78
+ return SOCIAL_LOG
79
+
80
+
81
+ def _parse_iso(value: Optional[str]) -> Optional[datetime]:
82
+ if not value:
83
+ return None
84
+ try:
85
+ dt = datetime.fromisoformat(value.replace("Z", "+00:00"))
86
+ except (TypeError, ValueError):
87
+ return None
88
+ if dt.tzinfo is None:
89
+ dt = dt.replace(tzinfo=timezone.utc)
90
+ return dt
91
+
92
+
93
+ def _parse_twitter_created_at(value: Optional[str]) -> Optional[datetime]:
94
+ """Twttr241 ``created_at`` is a Twitter-style timestamp like
95
+ ``Wed Apr 30 14:23:55 +0000 2026``. Falls back to ISO 8601 for caches
96
+ that have already normalized the value.
97
+ """
98
+ if not value:
99
+ return None
100
+ iso = _parse_iso(value)
101
+ if iso is not None:
102
+ return iso
103
+ try:
104
+ dt = datetime.strptime(value, "%a %b %d %H:%M:%S %z %Y")
105
+ except (TypeError, ValueError):
106
+ return None
107
+ return dt
108
+
109
+
110
+ def _normalize_handle(handle: Optional[str]) -> str:
111
+ if not handle:
112
+ return ""
113
+ h = handle.strip().lower()
114
+ if h.startswith("@"):
115
+ h = h[1:]
116
+ return h
117
+
118
+
119
+ def _replied_authors_within(window_hours: int, log_path: Optional[Path] = None) -> Set[str]:
120
+ """Read ``social_log.jsonl`` and return the set of author handles we
121
+ replied to on Twitter inside the window. Tolerant of malformed lines.
122
+
123
+ Twitter replies log either ``replying_to_user`` (Reddit field, rare on X)
124
+ or carry the original author inside the draft text — we cannot recover
125
+ that retroactively. The reliable signal is ``handle`` (us) plus
126
+ ``reply_to_id`` (their tweet id). Without the id->author mapping we use
127
+ a best-effort: a stored ``replying_to_user`` field if present, else any
128
+ ``@handle`` token at the start of the post text. Both are conservative
129
+ forms of dedupe — better to skip a borderline candidate than spam.
130
+ """
131
+ p = log_path or _social_log_path()
132
+ if not p.exists():
133
+ return set()
134
+ cutoff = datetime.now(timezone.utc) - timedelta(hours=window_hours)
135
+ authors: Set[str] = set()
136
+ try:
137
+ with open(p, "r", encoding="utf-8") as fh:
138
+ for line in fh:
139
+ line = line.strip()
140
+ if not line:
141
+ continue
142
+ try:
143
+ entry = json.loads(line)
144
+ except (json.JSONDecodeError, ValueError):
145
+ continue
146
+ if entry.get("platform") != "twitter":
147
+ continue
148
+ ts = _parse_iso(entry.get("ts"))
149
+ if ts is None or ts < cutoff:
150
+ continue
151
+ # Preferred: explicit replying_to_user field
152
+ explicit = _normalize_handle(entry.get("replying_to_user"))
153
+ if explicit:
154
+ authors.add(explicit)
155
+ continue
156
+ # Fallback: leading @handle in the post text. This is the way
157
+ # X reply text starts when the client appends the reply prefix.
158
+ text = (entry.get("text") or "").lstrip()
159
+ if text.startswith("@"):
160
+ token = text.split()[0][1:]
161
+ token = "".join(c for c in token if c.isalnum() or c == "_").lower()
162
+ if token:
163
+ authors.add(token)
164
+ except OSError as exc:
165
+ logger.warning("x_ranker: failed to read %s: %s", p, exc)
166
+ return authors
167
+
168
+
169
+ def _coerce_int(value: Any) -> int:
170
+ try:
171
+ return int(value or 0)
172
+ except (TypeError, ValueError):
173
+ return 0
174
+
175
+
176
+ def _age_hours(target: Dict[str, Any]) -> float:
177
+ """Best-effort age in hours from ``target['created_at']`` (ISO or Twitter
178
+ style). Missing / unparseable timestamps return ``1.0`` so the score is
179
+ not divided by something pathological — and the candidate is still
180
+ scoreable rather than dropped.
181
+ """
182
+ raw = target.get("created_at") or target.get("created") or ""
183
+ dt = _parse_twitter_created_at(raw) or _parse_iso(raw)
184
+ if dt is None:
185
+ return 1.0
186
+ delta = datetime.now(timezone.utc) - dt
187
+ hours = delta.total_seconds() / 3600.0
188
+ if hours < 1.0:
189
+ return 1.0
190
+ return hours
191
+
192
+
193
+ # ---------------------------------------------------------------------------
194
+ # Core API
195
+ # ---------------------------------------------------------------------------
196
+
197
+
198
+ def score_target(target: Dict[str, Any]) -> float:
199
+ """Engagement-rate score for a single X candidate.
200
+
201
+ score = (likes + 2*retweets + 0.5*replies + 3*quotes) / max(age_hours, 1)
202
+ """
203
+ likes = _coerce_int(target.get("likes") or target.get("favorite_count"))
204
+ retweets = _coerce_int(target.get("retweets") or target.get("retweet_count"))
205
+ replies = _coerce_int(target.get("reply_count") or target.get("replies"))
206
+ quotes = _coerce_int(target.get("quote_count") or target.get("quotes"))
207
+ age = _age_hours(target)
208
+ raw = likes + 2 * retweets + 0.5 * replies + 3 * quotes
209
+ return raw / max(age, 1.0)
210
+
211
+
212
+ def _is_op(target: Dict[str, Any]) -> bool:
213
+ """Return True when the target looks like an OP (not a reply-chain post).
214
+
215
+ Tolerant: missing flags default to OP rather than dropping. Explicit
216
+ ``is_reply=True`` or ``in_reply_to_status_id_str`` set is the kill signal.
217
+ Reply signals win over a stale ``is_op=True`` so a target that carries
218
+ both (e.g. an upstream scanner sets is_op then a follow-up enrichment
219
+ flips is_reply) is correctly dropped.
220
+ """
221
+ if target.get("is_reply") is True:
222
+ return False
223
+ reply_id = target.get("in_reply_to_status_id_str") or target.get("in_reply_to_status_id")
224
+ if reply_id:
225
+ return False
226
+ if target.get("is_op") is False:
227
+ return False
228
+ return True
229
+
230
+
231
+ def _is_english(target: Dict[str, Any]) -> bool:
232
+ lang = target.get("lang")
233
+ if lang is None:
234
+ # Tolerant default — Twttr241 doesn't always populate lang.
235
+ return True
236
+ return str(lang).lower() in ("en", "en-us", "en-gb")
237
+
238
+
239
+ def _is_retweet(target: Dict[str, Any]) -> bool:
240
+ if target.get("is_retweet") is True:
241
+ return True
242
+ text = (target.get("content_snippet") or target.get("text") or "").lstrip()
243
+ if text.startswith("RT @"):
244
+ return True
245
+ return False
246
+
247
+
248
+ def _log_rejection(target: Dict[str, Any], score: float, reason: str,
249
+ extra: Optional[Dict[str, Any]] = None,
250
+ log_path: Optional[Path] = None) -> None:
251
+ """Append a single JSON line to the rejected-targets audit log.
252
+
253
+ Best-effort — never raises into the caller's pipeline.
254
+ """
255
+ p = log_path or REJECTED_TARGETS_LOG
256
+ snippet = (target.get("content_snippet") or target.get("text") or "")[:200]
257
+ payload: Dict[str, Any] = {
258
+ "ts": datetime.now(timezone.utc).isoformat(),
259
+ "tweet_id": target.get("source_id") or target.get("id_str") or target.get("id") or "",
260
+ "fingerprint": target.get("fingerprint") or "",
261
+ "author": target.get("author") or "",
262
+ "score": round(float(score), 4),
263
+ "snippet": snippet,
264
+ "reason": reason,
265
+ }
266
+ if extra:
267
+ payload.update(extra)
268
+ try:
269
+ p.parent.mkdir(parents=True, exist_ok=True)
270
+ with open(p, "a", encoding="utf-8") as fh:
271
+ fh.write(json.dumps(payload, ensure_ascii=False) + "\n")
272
+ except OSError as exc: # pragma: no cover — best-effort logging
273
+ logger.warning("x_ranker: failed to write rejection log %s: %s", p, exc)
274
+
275
+
276
+ def rank_x_targets(
277
+ targets: Iterable[Dict[str, Any]],
278
+ replied_authors_window_hours: int = DEFAULT_WINDOW_HOURS,
279
+ replied_authors: Optional[Set[str]] = None,
280
+ min_engagement_score: float = MIN_ENGAGEMENT_SCORE,
281
+ high_engagement_floor: float = HIGH_ENGAGEMENT_OPPORTUNITY_COST,
282
+ cooldown_days: int = DEFAULT_COOLDOWN_DAYS,
283
+ recent_topics: Optional[Set[str]] = None,
284
+ enable_fit_floor: bool = True,
285
+ log_rejections: bool = True,
286
+ ) -> List[Dict[str, Any]]:
287
+ """Filter and sort X targets by engagement.
288
+
289
+ Args:
290
+ targets: iterable of candidate target dicts from ``_scan_x``.
291
+ replied_authors_window_hours: dedupe window for already-replied authors.
292
+ Default 7 days per founder directive.
293
+ replied_authors: explicit author set (lowercase, no leading ``@``).
294
+ When ``None``, the set is read from ``~/.delimit/social_log.jsonl``.
295
+ Tests inject an explicit set to avoid touching disk.
296
+ min_engagement_score: hard floor on the engagement-rate score.
297
+ Threads below this are dropped before fit-floor evaluation.
298
+ Default ``MIN_ENGAGEMENT_SCORE`` (1.5).
299
+ high_engagement_floor: score above which a thread can pass the
300
+ fit floor without a keyword match (flagged ``_human_only=True``).
301
+ cooldown_days: topic-coverage cooldown window in days.
302
+ recent_topics: explicit set of topic fingerprints already drafted on.
303
+ ``None`` reads from ``social_log.jsonl``. Tests inject directly.
304
+ enable_fit_floor: kill switch for the LED-1240 part B gates. False
305
+ preserves the legacy behavior (no fit floor, no min-score, no
306
+ cooldown) — used by tests that target the legacy filter chain
307
+ in isolation. Default True.
308
+ log_rejections: when True, append rejection records to
309
+ ``~/.delimit/x_rejected_targets.jsonl``. Tests disable this to
310
+ keep the founder's audit log clean.
311
+
312
+ Returns:
313
+ A new list sorted by engagement score DESC. Each item gets a
314
+ ``_rank_score`` key for downstream observability. Filtered items are
315
+ dropped (not kept with score=0) so the caller can blindly slice the
316
+ first N. High-engagement-but-off-topic items survive with
317
+ ``_human_only=True`` so the caller can route them to human review
318
+ without auto-drafting.
319
+ """
320
+ if replied_authors is None:
321
+ replied_authors = _replied_authors_within(replied_authors_window_hours)
322
+ else:
323
+ replied_authors = {_normalize_handle(a) for a in replied_authors}
324
+
325
+ if enable_fit_floor and recent_topics is None:
326
+ try:
327
+ from ai.social_capability.fit_floor import _recent_topic_fingerprints
328
+ recent_topics = _recent_topic_fingerprints(cooldown_days=cooldown_days)
329
+ except Exception as exc: # pragma: no cover — tolerant fallback
330
+ logger.warning("x_ranker: cooldown bootstrap failed (%s) — proceeding without", exc)
331
+ recent_topics = set()
332
+ elif recent_topics is None:
333
+ recent_topics = set()
334
+
335
+ survivors: List[Dict[str, Any]] = []
336
+ for t in targets or []:
337
+ if not isinstance(t, dict):
338
+ continue
339
+ if t.get("error"):
340
+ continue
341
+
342
+ # 1. is_op
343
+ if not _is_op(t):
344
+ continue
345
+ # 2. lang == 'en' (tolerant of missing field)
346
+ if not _is_english(t):
347
+ continue
348
+ # 3. drop retweets
349
+ if _is_retweet(t):
350
+ continue
351
+ # 4. dedupe authors we replied to in window
352
+ author_norm = _normalize_handle(t.get("author"))
353
+ if author_norm and author_norm in replied_authors:
354
+ continue
355
+
356
+ score = round(score_target(t), 4)
357
+
358
+ # 5. engagement-score floor (LED-1240 part B). Off by default for
359
+ # legacy callers that pass enable_fit_floor=False.
360
+ if enable_fit_floor and score < min_engagement_score:
361
+ if log_rejections:
362
+ _log_rejection(
363
+ t, score, "below_engagement_floor",
364
+ extra={"min_score": min_engagement_score},
365
+ )
366
+ continue
367
+
368
+ # 6 + 7. Delimit-fit floor + topic cooldown.
369
+ scored = dict(t)
370
+ scored["_rank_score"] = score
371
+
372
+ if enable_fit_floor:
373
+ try:
374
+ from ai.social_capability.fit_floor import evaluate_fit
375
+ except Exception as exc: # pragma: no cover — tolerant fallback
376
+ logger.warning("x_ranker: fit_floor import failed (%s) — passing through", exc)
377
+ survivors.append(scored)
378
+ continue
379
+
380
+ text = t.get("content_snippet") or t.get("text") or ""
381
+ verdict = evaluate_fit(
382
+ text,
383
+ engagement_score=score,
384
+ high_engagement_floor=high_engagement_floor,
385
+ recent_topics=recent_topics,
386
+ )
387
+ if not verdict["passed"]:
388
+ if log_rejections:
389
+ _log_rejection(
390
+ t, score, verdict["reason"],
391
+ extra={
392
+ "matched_signals": verdict.get("matched_signals", []),
393
+ "topic_fingerprint": verdict.get("topic_fingerprint", []),
394
+ },
395
+ )
396
+ continue
397
+ scored["_fit_reason"] = verdict["reason"]
398
+ scored["_human_only"] = bool(verdict.get("human_only"))
399
+ scored["_matched_signals"] = verdict.get("matched_signals", [])
400
+ scored["_topic_fingerprint"] = verdict.get("topic_fingerprint", [])
401
+
402
+ survivors.append(scored)
403
+
404
+ # 8. sort score DESC, stable
405
+ survivors.sort(key=lambda x: x.get("_rank_score", 0.0), reverse=True)
406
+ return survivors
407
+
408
+
409
+ __all__ = [
410
+ "DEFAULT_WINDOW_HOURS",
411
+ "DEFAULT_COOLDOWN_DAYS",
412
+ "MIN_ENGAGEMENT_SCORE",
413
+ "HIGH_ENGAGEMENT_OPPORTUNITY_COST",
414
+ "REJECTED_TARGETS_LOG",
415
+ "score_target",
416
+ "rank_x_targets",
417
+ ]