delimit-cli 4.5.0 → 4.5.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. package/CHANGELOG.md +108 -0
  2. package/README.md +2 -2
  3. package/adapters/cursor-rules.js +17 -4
  4. package/bin/delimit-cli.js +109 -24
  5. package/gateway/ai/content_engine.py +3 -4
  6. package/gateway/ai/inbox_classifier.py +215 -0
  7. package/gateway/ai/integrations/opensage_wrapper.py +4 -1
  8. package/gateway/ai/ledger_manager.py +218 -38
  9. package/gateway/ai/license.py +26 -0
  10. package/gateway/ai/notify.py +68 -3
  11. package/gateway/ai/reddit_proxy.py +93 -15
  12. package/gateway/ai/reddit_scanner.py +36 -18
  13. package/gateway/ai/server.py +128 -6
  14. package/gateway/ai/social_capability/__init__.py +6 -0
  15. package/gateway/ai/social_capability/capability_validator.py +273 -0
  16. package/gateway/ai/social_capability/current_capabilities.yaml +95 -0
  17. package/gateway/ai/social_queue.py +307 -0
  18. package/gateway/ai/supabase_sync.py +14 -2
  19. package/gateway/ai/swarm.py +29 -11
  20. package/gateway/ai/tui.py +6 -2
  21. package/gateway/ai/x_ranker.py +276 -0
  22. package/lib/attest-mcp.js +487 -0
  23. package/lib/attest-telemetry.js +48 -0
  24. package/lib/delimit-home.js +35 -0
  25. package/lib/delimit-template.js +14 -0
  26. package/lib/managed-section.js +92 -0
  27. package/lib/trust-page-engine.js +6 -2
  28. package/lib/wrap-engine.js +21 -4
  29. package/package.json +8 -2
  30. package/scripts/postinstall.js +89 -40
  31. package/gateway/ai/content_grounding/__init__.py +0 -98
  32. package/gateway/ai/content_grounding/build.py +0 -350
  33. package/gateway/ai/content_grounding/consume.py +0 -280
  34. package/gateway/ai/content_grounding/features.py +0 -218
  35. package/gateway/ai/content_grounding/fixtures/fail/01_missing_evidence.json +0 -9
  36. package/gateway/ai/content_grounding/fixtures/fail/02_unknown_evidence_prefix.json +0 -9
  37. package/gateway/ai/content_grounding/fixtures/fail/03_banned_comparative.json +0 -17
  38. package/gateway/ai/content_grounding/fixtures/fail/04_banned_adoption.json +0 -17
  39. package/gateway/ai/content_grounding/fixtures/fail/05_aggregate_no_numeric.json +0 -17
  40. package/gateway/ai/content_grounding/fixtures/fail/06_unversioned_inference_rule.json +0 -18
  41. package/gateway/ai/content_grounding/fixtures/pass/01_feature_shipped.json +0 -18
  42. package/gateway/ai/content_grounding/fixtures/pass/02_aggregate_claim.json +0 -23
  43. package/gateway/ai/content_grounding/fixtures/pass/03_attestation.json +0 -16
  44. package/gateway/ai/content_grounding/schemas/claim.schema.json +0 -40
  45. package/gateway/ai/content_grounding/schemas/event.schema.json +0 -23
  46. package/gateway/ai/content_grounding/schemas.py +0 -276
  47. package/gateway/ai/content_grounding/telemetry.py +0 -221
  48. package/gateway/ai/inbox_drafts/__init__.py +0 -61
  49. package/gateway/ai/inbox_drafts/registry.py +0 -412
  50. package/gateway/ai/inbox_drafts/schema.py +0 -374
  51. package/gateway/ai/inbox_executor.py +0 -565
@@ -0,0 +1,307 @@
1
+ """Persistent scan queue for the social distribution pipeline (LED-216 Phase 2).
2
+
3
+ The queue decouples target *scanning* from draft *generation*. Scanners append
4
+ discovered targets here; the draft phase claims pending entries, generates a
5
+ post, and marks them drafted (or failed). This allows the Reddit scanner to
6
+ take 120s without blocking X/HN/devto/etc., and lets the draft stage run
7
+ under its own timeout against a stable backlog.
8
+
9
+ Storage: append-only JSONL at ``~/.delimit/social_scan_queue.jsonl``. Each
10
+ line is a single queue entry with the schema:
11
+
12
+ {
13
+ "queued_at": "2026-05-02T15:30:00Z",
14
+ "platform": "reddit",
15
+ "fingerprint": "reddit:vibecoding:abc123",
16
+ "source_id": "...",
17
+ "raw": { /* full target dict from the scanner */ },
18
+ "status": "pending", # pending | drafted | drafted_failed | expired
19
+ "drafted_at": null,
20
+ "draft_id": null,
21
+ "error": null
22
+ }
23
+
24
+ All writes are atomic (tmp + rename for full rewrites; append-only for the
25
+ hot path). Malformed lines are skipped with a warning rather than crashing
26
+ the whole queue. Dedupe-on-enqueue avoids re-queuing a fingerprint that is
27
+ already pending or was drafted within the last 7 days.
28
+ """
29
+ from __future__ import annotations
30
+
31
+ import json
32
+ import logging
33
+ import os
34
+ import tempfile
35
+ from datetime import datetime, timedelta, timezone
36
+ from pathlib import Path
37
+ from typing import Any, Dict, Iterator, List, Optional
38
+
39
+ logger = logging.getLogger(__name__)
40
+
41
+
42
+ # ---------------------------------------------------------------------------
43
+ # Configuration
44
+ # ---------------------------------------------------------------------------
45
+
46
+ QUEUE_FILE = Path.home() / ".delimit" / "social_scan_queue.jsonl"
47
+
48
+ DEFAULT_DEDUPE_HOURS = 24 * 7 # don't re-queue a fingerprint within 7 days
49
+ DEFAULT_EXPIRE_HOURS = 24 * 7 # entries older than 7 days roll to expired
50
+
51
+ PENDING = "pending"
52
+ DRAFTED = "drafted"
53
+ DRAFTED_FAILED = "drafted_failed"
54
+ EXPIRED = "expired"
55
+
56
+
57
+ # ---------------------------------------------------------------------------
58
+ # Internal helpers
59
+ # ---------------------------------------------------------------------------
60
+
61
+
62
+ def _queue_path() -> Path:
63
+ """Resolve the queue file path. Indirection lets tests monkeypatch it."""
64
+ return QUEUE_FILE
65
+
66
+
67
+ def _now_iso() -> str:
68
+ return datetime.now(timezone.utc).isoformat()
69
+
70
+
71
+ def _parse_iso(value: Optional[str]) -> Optional[datetime]:
72
+ if not value:
73
+ return None
74
+ try:
75
+ # ``fromisoformat`` accepts both naive and tz-aware strings.
76
+ dt = datetime.fromisoformat(value.replace("Z", "+00:00"))
77
+ except (TypeError, ValueError):
78
+ return None
79
+ if dt.tzinfo is None:
80
+ dt = dt.replace(tzinfo=timezone.utc)
81
+ return dt
82
+
83
+
84
+ def _ensure_parent(path: Path) -> None:
85
+ path.parent.mkdir(parents=True, exist_ok=True)
86
+
87
+
88
+ def _iter_entries(path: Optional[Path] = None) -> Iterator[Dict[str, Any]]:
89
+ """Yield each parsable JSON line from the queue. Skip and log malformed."""
90
+ p = path or _queue_path()
91
+ if not p.exists():
92
+ return
93
+ try:
94
+ with open(p, "r", encoding="utf-8") as fh:
95
+ for ln_no, line in enumerate(fh, start=1):
96
+ line = line.strip()
97
+ if not line:
98
+ continue
99
+ try:
100
+ yield json.loads(line)
101
+ except (json.JSONDecodeError, ValueError) as exc:
102
+ logger.warning(
103
+ "social_queue: skipping malformed line %d in %s: %s",
104
+ ln_no, p, exc,
105
+ )
106
+ continue
107
+ except OSError as exc:
108
+ logger.warning("social_queue: failed to read %s: %s", p, exc)
109
+
110
+
111
+ def _atomic_rewrite(entries: List[Dict[str, Any]], path: Optional[Path] = None) -> None:
112
+ """Rewrite the queue file atomically via tmp + os.replace."""
113
+ p = path or _queue_path()
114
+ _ensure_parent(p)
115
+ fd, tmp_path = tempfile.mkstemp(prefix=".social_scan_queue.", dir=str(p.parent))
116
+ try:
117
+ with os.fdopen(fd, "w", encoding="utf-8") as fh:
118
+ for entry in entries:
119
+ fh.write(json.dumps(entry, sort_keys=True) + "\n")
120
+ os.replace(tmp_path, p)
121
+ except Exception:
122
+ try:
123
+ os.unlink(tmp_path)
124
+ except OSError:
125
+ pass
126
+ raise
127
+
128
+
129
+ def _append_atomic(entry: Dict[str, Any], path: Optional[Path] = None) -> None:
130
+ """Append a single line. fsync-friendly: opens in 'a', writes, closes."""
131
+ p = path or _queue_path()
132
+ _ensure_parent(p)
133
+ line = json.dumps(entry, sort_keys=True) + "\n"
134
+ with open(p, "a", encoding="utf-8") as fh:
135
+ fh.write(line)
136
+
137
+
138
+ # ---------------------------------------------------------------------------
139
+ # Public API
140
+ # ---------------------------------------------------------------------------
141
+
142
+
143
+ def enqueue(target: Dict[str, Any], dedupe_hours: int = DEFAULT_DEDUPE_HOURS) -> Optional[str]:
144
+ """Append a scanner target to the queue.
145
+
146
+ Returns the fingerprint string on success, or ``None`` when the target
147
+ was deduped (already pending, or drafted within the dedupe window).
148
+ Targets without a ``fingerprint`` field are rejected.
149
+ """
150
+ fingerprint = (target or {}).get("fingerprint")
151
+ if not fingerprint:
152
+ logger.warning("social_queue.enqueue: target missing fingerprint, skipping")
153
+ return None
154
+
155
+ now = datetime.now(timezone.utc)
156
+ cutoff = now - timedelta(hours=dedupe_hours)
157
+
158
+ # Dedupe: scan existing entries for a recent matching fingerprint.
159
+ for existing in _iter_entries():
160
+ if existing.get("fingerprint") != fingerprint:
161
+ continue
162
+ status = existing.get("status")
163
+ if status == PENDING:
164
+ return None
165
+ if status == DRAFTED:
166
+ ts = _parse_iso(existing.get("drafted_at") or existing.get("queued_at"))
167
+ if ts and ts >= cutoff:
168
+ return None
169
+ # drafted_failed or expired or older drafted → allow re-enqueue
170
+
171
+ entry = {
172
+ "queued_at": _now_iso(),
173
+ "platform": target.get("platform", ""),
174
+ "fingerprint": fingerprint,
175
+ "source_id": target.get("source_id", ""),
176
+ "raw": target,
177
+ "status": PENDING,
178
+ "drafted_at": None,
179
+ "draft_id": None,
180
+ "error": None,
181
+ }
182
+ _append_atomic(entry)
183
+ return fingerprint
184
+
185
+
186
+ def claim_pending(platform: Optional[str] = None, limit: int = 20) -> List[Dict[str, Any]]:
187
+ """Return up to ``limit`` pending entries, optionally filtered by platform.
188
+
189
+ Read-only — does NOT mutate state. The caller must call ``mark_drafted``
190
+ or ``mark_failed`` once it processes the entry. Returns oldest-first
191
+ (FIFO) so the queue drains in scan order.
192
+ """
193
+ out: List[Dict[str, Any]] = []
194
+ # Build a list because we want oldest-first; JSONL append order = FIFO.
195
+ for entry in _iter_entries():
196
+ if entry.get("status") != PENDING:
197
+ continue
198
+ if platform and entry.get("platform") != platform:
199
+ continue
200
+ out.append(entry)
201
+ if len(out) >= limit:
202
+ break
203
+ return out
204
+
205
+
206
+ def _update_entry(fingerprint: str, mutator) -> bool:
207
+ """Apply ``mutator(entry) -> entry`` to the most recent entry matching
208
+ fingerprint with status==pending. Returns True on hit, False otherwise.
209
+ Rewrites the queue atomically.
210
+ """
211
+ entries = list(_iter_entries())
212
+ target_idx = -1
213
+ for idx in range(len(entries) - 1, -1, -1):
214
+ e = entries[idx]
215
+ if e.get("fingerprint") == fingerprint and e.get("status") == PENDING:
216
+ target_idx = idx
217
+ break
218
+ if target_idx < 0:
219
+ return False
220
+ entries[target_idx] = mutator(entries[target_idx])
221
+ _atomic_rewrite(entries)
222
+ return True
223
+
224
+
225
+ def mark_drafted(fingerprint: str, draft_id: str) -> bool:
226
+ """Mark a pending entry as drafted. Returns True on hit."""
227
+ def _mutate(entry: Dict[str, Any]) -> Dict[str, Any]:
228
+ entry["status"] = DRAFTED
229
+ entry["drafted_at"] = _now_iso()
230
+ entry["draft_id"] = draft_id or ""
231
+ entry["error"] = None
232
+ return entry
233
+ return _update_entry(fingerprint, _mutate)
234
+
235
+
236
+ def mark_failed(fingerprint: str, error: str) -> bool:
237
+ """Mark a pending entry as drafted_failed with the supplied error string."""
238
+ def _mutate(entry: Dict[str, Any]) -> Dict[str, Any]:
239
+ entry["status"] = DRAFTED_FAILED
240
+ entry["drafted_at"] = _now_iso()
241
+ entry["error"] = (error or "")[:500]
242
+ return entry
243
+ return _update_entry(fingerprint, _mutate)
244
+
245
+
246
+ def expire_older_than(hours: int = DEFAULT_EXPIRE_HOURS) -> int:
247
+ """Roll any pending entry older than ``hours`` into the expired status.
248
+
249
+ Returns the count expired. Entries past the window are still kept in the
250
+ file (for audit), but their status flips so they no longer surface to
251
+ ``claim_pending``.
252
+ """
253
+ cutoff = datetime.now(timezone.utc) - timedelta(hours=hours)
254
+ entries = list(_iter_entries())
255
+ if not entries:
256
+ return 0
257
+ flipped = 0
258
+ for entry in entries:
259
+ if entry.get("status") != PENDING:
260
+ continue
261
+ ts = _parse_iso(entry.get("queued_at"))
262
+ if ts is None or ts >= cutoff:
263
+ continue
264
+ entry["status"] = EXPIRED
265
+ entry["error"] = "expired_after_window"
266
+ flipped += 1
267
+ if flipped:
268
+ _atomic_rewrite(entries)
269
+ return flipped
270
+
271
+
272
+ def queue_stats() -> Dict[str, Any]:
273
+ """Return a snapshot count: ``{pending, drafted, drafted_failed, expired,
274
+ by_platform: {platform: {pending, drafted, ...}}}``.
275
+ """
276
+ totals = {PENDING: 0, DRAFTED: 0, DRAFTED_FAILED: 0, EXPIRED: 0}
277
+ by_platform: Dict[str, Dict[str, int]] = {}
278
+ for entry in _iter_entries():
279
+ status = entry.get("status") or "unknown"
280
+ if status in totals:
281
+ totals[status] += 1
282
+ plat = entry.get("platform") or "unknown"
283
+ plat_bucket = by_platform.setdefault(plat, {PENDING: 0, DRAFTED: 0, DRAFTED_FAILED: 0, EXPIRED: 0})
284
+ if status in plat_bucket:
285
+ plat_bucket[status] += 1
286
+ return {
287
+ "pending": totals[PENDING],
288
+ "drafted": totals[DRAFTED],
289
+ "drafted_failed": totals[DRAFTED_FAILED],
290
+ "expired": totals[EXPIRED],
291
+ "by_platform": by_platform,
292
+ }
293
+
294
+
295
+ __all__ = [
296
+ "QUEUE_FILE",
297
+ "PENDING",
298
+ "DRAFTED",
299
+ "DRAFTED_FAILED",
300
+ "EXPIRED",
301
+ "enqueue",
302
+ "claim_pending",
303
+ "mark_drafted",
304
+ "mark_failed",
305
+ "expire_older_than",
306
+ "queue_stats",
307
+ ]
@@ -488,8 +488,20 @@ def sync_social_draft(draft: dict):
488
488
  client.table("social_drafts").upsert(row).execute()
489
489
 
490
490
  # ntfy on NEW pending drafts only, dedupe via the WO sent-marker file
491
- # (reused — scoped by draft_id vs wo_id so no collision)
492
- if row["status"] == "pending":
491
+ # (reused — scoped by draft_id vs wo_id so no collision).
492
+ # LED-216 Phase 1: suppress ntfy for capability-drift drafts and the
493
+ # existing rule-blocked / placeholder buckets — the row still lands
494
+ # in Supabase for audit, but the founder is NOT paged on a draft
495
+ # we already know to be unfit for posting.
496
+ _quality = (row.get("quality") or "").lower()
497
+ _suppressed_qualities = {
498
+ "rejected_capability_drift",
499
+ "placeholder",
500
+ }
501
+ if (
502
+ row["status"] == "pending"
503
+ and _quality not in _suppressed_qualities
504
+ ):
493
505
  try:
494
506
  _push_draft_notification(row)
495
507
  except Exception as exc:
@@ -946,27 +946,45 @@ def hot_reload(reason: str = "update") -> Dict[str, Any]:
946
946
  # full subprocess restart. Modules with global state are skipped.
947
947
  reloaded_modules: List[str] = []
948
948
  reload_errors: List[str] = []
949
+ # LED-2071f (2026-04-30): reload LEAVES (modules with no internal
950
+ # `from ai.X import ...` deps) BEFORE leaves' importers, so when an
951
+ # importer re-runs its `from` imports during its own reload, it
952
+ # picks up the freshly-reloaded binding rather than the stale one.
953
+ # Symptom of the prior order: ai.social_target reloaded before
954
+ # ai.social, so social_target's `from ai.social import save_draft,
955
+ # generate_tailored_draft, ...` rebound to the OLD social, then
956
+ # ai.social reloaded but social_target kept stale fn references.
957
+ # Fix: ai.social and ai.deliberation (the leaves) come first;
958
+ # ai.social_target (which imports from social) and ai.loop_engine
959
+ # (which imports from social_target) come after, in dependency
960
+ # order.
949
961
  HOT_RELOADABLE = [
950
- "ai.loop_engine",
951
- "ai.social_target",
952
962
  "ai.social",
963
+ "ai.deliberation", # added 2026-04-09 per LED-805 — CLI stdin fix needed hot reload
953
964
  "ai.reddit_scanner",
954
965
  "ai.ledger_manager",
955
- "ai.deliberation", # added 2026-04-09 per LED-805 — CLI stdin fix needed hot reload
956
966
  "ai.backends.repo_bridge",
957
967
  "ai.backends.tools_infra",
958
968
  "backends.repo_bridge", # alias used by server.py lazy imports
969
+ "ai.social_target", # depends on ai.social
970
+ "ai.loop_engine", # depends on ai.social_target
959
971
  "social", # alias
960
972
  "ai.swarm", # self — reload last
961
973
  ]
962
- for modname in HOT_RELOADABLE:
963
- if modname not in _sys.modules:
964
- continue
965
- try:
966
- importlib.reload(_sys.modules[modname])
967
- reloaded_modules.append(modname)
968
- except Exception as e:
969
- reload_errors.append(f"{modname}: {e}")
974
+ # Two-pass reload: pass 1 establishes the new leaf modules; pass 2
975
+ # forces importers to rebind against the now-reloaded leaves. Cheap
976
+ # (in-process module reload only) but kills the entire stale-binding
977
+ # class of bugs in one go.
978
+ for _pass in range(2):
979
+ for modname in HOT_RELOADABLE:
980
+ if modname not in _sys.modules:
981
+ continue
982
+ try:
983
+ importlib.reload(_sys.modules[modname])
984
+ if _pass == 0:
985
+ reloaded_modules.append(modname)
986
+ except Exception as e:
987
+ reload_errors.append(f"pass{_pass} {modname}: {e}")
970
988
 
971
989
  # 1. Capture current state for transfer
972
990
  state = {
package/gateway/ai/tui.py CHANGED
@@ -32,7 +32,11 @@ from typing import Any, Dict, List, Optional, Tuple
32
32
 
33
33
  # -- Data paths ---------------------------------------------------------------
34
34
 
35
- DELIMIT_HOME = Path.home() / ".delimit"
35
+ # LED-1188: route through the canonical resolver so $DELIMIT_HOME /
36
+ # $DELIMIT_NAMESPACE_ROOT overrides apply uniformly across npm + gateway.
37
+ from .continuity import get_namespace_root # noqa: E402
38
+
39
+ DELIMIT_HOME = get_namespace_root()
36
40
  LEDGER_DIR = DELIMIT_HOME / "ledger"
37
41
  SWARM_DIR = DELIMIT_HOME / "swarm"
38
42
  MEMORY_DIR = DELIMIT_HOME / "memory"
@@ -721,7 +725,7 @@ class GovernanceBar(Static):
721
725
  bar = self.query_one("#gov-bar", Static)
722
726
  ledger_count = len(_load_ledger_items("open", 999))
723
727
  swarm = _load_swarm_status()
724
- mode_file = Path.home() / ".delimit" / "enforcement_mode"
728
+ mode_file = DELIMIT_HOME / "enforcement_mode"
725
729
  mode = mode_file.read_text().strip() if mode_file.exists() else "default"
726
730
 
727
731
  # Notification badge
@@ -0,0 +1,276 @@
1
+ """X engagement ranker (LED-216 Phase 2, Q4 of the 2026-05-02 distribution panel).
2
+
3
+ Filters and orders X (Twitter) candidate posts so we reply to the highest-
4
+ yield originals only. The score formula and the 7-day author dedupe are the
5
+ panel's Q4 conclusion: Codex's age-normalized formula plus Opus's API filters
6
+ (``-is:retweet -is:reply lang:en``) and the founder's anti-spammy posture
7
+ ("don't hammer the same 5 accounts daily").
8
+
9
+ Score formula (per the LED-216 Phase 2 directive):
10
+
11
+ score = (likes + 2 * retweets + 0.5 * replies + 3 * quotes) / max(age_hours, 1)
12
+
13
+ Filter pipeline applied in order:
14
+ 1. ``is_op`` → drop reply-chain targets (we only reply to OPs)
15
+ 2. ``lang == 'en'`` → drop non-English (US/UK builder community is the wedge)
16
+ 3. ``is_retweet == False`` → drop retweets
17
+ 4. dedupe authors we replied to in the last ``replied_authors_window_hours``
18
+ 5. sort by score DESC
19
+
20
+ Tolerant defaults: any field missing on a target is treated as "do not drop"
21
+ so partial Twttr241 payloads still rank instead of being silently filtered.
22
+ """
23
+ from __future__ import annotations
24
+
25
+ import json
26
+ import logging
27
+ from datetime import datetime, timedelta, timezone
28
+ from pathlib import Path
29
+ from typing import Any, Dict, Iterable, List, Optional, Set
30
+
31
+ logger = logging.getLogger(__name__)
32
+
33
+
34
+ SOCIAL_LOG = Path.home() / ".delimit" / "social_log.jsonl"
35
+
36
+ DEFAULT_WINDOW_HOURS = 24 * 7 # 7 days, per founder's anti-spammy directive
37
+
38
+
39
+ # ---------------------------------------------------------------------------
40
+ # Helpers
41
+ # ---------------------------------------------------------------------------
42
+
43
+
44
+ def _social_log_path() -> Path:
45
+ """Indirection so tests can monkeypatch the log location."""
46
+ return SOCIAL_LOG
47
+
48
+
49
+ def _parse_iso(value: Optional[str]) -> Optional[datetime]:
50
+ if not value:
51
+ return None
52
+ try:
53
+ dt = datetime.fromisoformat(value.replace("Z", "+00:00"))
54
+ except (TypeError, ValueError):
55
+ return None
56
+ if dt.tzinfo is None:
57
+ dt = dt.replace(tzinfo=timezone.utc)
58
+ return dt
59
+
60
+
61
+ def _parse_twitter_created_at(value: Optional[str]) -> Optional[datetime]:
62
+ """Twttr241 ``created_at`` is a Twitter-style timestamp like
63
+ ``Wed Apr 30 14:23:55 +0000 2026``. Falls back to ISO 8601 for caches
64
+ that have already normalized the value.
65
+ """
66
+ if not value:
67
+ return None
68
+ iso = _parse_iso(value)
69
+ if iso is not None:
70
+ return iso
71
+ try:
72
+ dt = datetime.strptime(value, "%a %b %d %H:%M:%S %z %Y")
73
+ except (TypeError, ValueError):
74
+ return None
75
+ return dt
76
+
77
+
78
+ def _normalize_handle(handle: Optional[str]) -> str:
79
+ if not handle:
80
+ return ""
81
+ h = handle.strip().lower()
82
+ if h.startswith("@"):
83
+ h = h[1:]
84
+ return h
85
+
86
+
87
+ def _replied_authors_within(window_hours: int, log_path: Optional[Path] = None) -> Set[str]:
88
+ """Read ``social_log.jsonl`` and return the set of author handles we
89
+ replied to on Twitter inside the window. Tolerant of malformed lines.
90
+
91
+ Twitter replies log either ``replying_to_user`` (Reddit field, rare on X)
92
+ or carry the original author inside the draft text — we cannot recover
93
+ that retroactively. The reliable signal is ``handle`` (us) plus
94
+ ``reply_to_id`` (their tweet id). Without the id->author mapping we use
95
+ a best-effort: a stored ``replying_to_user`` field if present, else any
96
+ ``@handle`` token at the start of the post text. Both are conservative
97
+ forms of dedupe — better to skip a borderline candidate than spam.
98
+ """
99
+ p = log_path or _social_log_path()
100
+ if not p.exists():
101
+ return set()
102
+ cutoff = datetime.now(timezone.utc) - timedelta(hours=window_hours)
103
+ authors: Set[str] = set()
104
+ try:
105
+ with open(p, "r", encoding="utf-8") as fh:
106
+ for line in fh:
107
+ line = line.strip()
108
+ if not line:
109
+ continue
110
+ try:
111
+ entry = json.loads(line)
112
+ except (json.JSONDecodeError, ValueError):
113
+ continue
114
+ if entry.get("platform") != "twitter":
115
+ continue
116
+ ts = _parse_iso(entry.get("ts"))
117
+ if ts is None or ts < cutoff:
118
+ continue
119
+ # Preferred: explicit replying_to_user field
120
+ explicit = _normalize_handle(entry.get("replying_to_user"))
121
+ if explicit:
122
+ authors.add(explicit)
123
+ continue
124
+ # Fallback: leading @handle in the post text. This is the way
125
+ # X reply text starts when the client appends the reply prefix.
126
+ text = (entry.get("text") or "").lstrip()
127
+ if text.startswith("@"):
128
+ token = text.split()[0][1:]
129
+ token = "".join(c for c in token if c.isalnum() or c == "_").lower()
130
+ if token:
131
+ authors.add(token)
132
+ except OSError as exc:
133
+ logger.warning("x_ranker: failed to read %s: %s", p, exc)
134
+ return authors
135
+
136
+
137
+ def _coerce_int(value: Any) -> int:
138
+ try:
139
+ return int(value or 0)
140
+ except (TypeError, ValueError):
141
+ return 0
142
+
143
+
144
+ def _age_hours(target: Dict[str, Any]) -> float:
145
+ """Best-effort age in hours from ``target['created_at']`` (ISO or Twitter
146
+ style). Missing / unparseable timestamps return ``1.0`` so the score is
147
+ not divided by something pathological — and the candidate is still
148
+ scoreable rather than dropped.
149
+ """
150
+ raw = target.get("created_at") or target.get("created") or ""
151
+ dt = _parse_twitter_created_at(raw) or _parse_iso(raw)
152
+ if dt is None:
153
+ return 1.0
154
+ delta = datetime.now(timezone.utc) - dt
155
+ hours = delta.total_seconds() / 3600.0
156
+ if hours < 1.0:
157
+ return 1.0
158
+ return hours
159
+
160
+
161
+ # ---------------------------------------------------------------------------
162
+ # Core API
163
+ # ---------------------------------------------------------------------------
164
+
165
+
166
+ def score_target(target: Dict[str, Any]) -> float:
167
+ """Engagement-rate score for a single X candidate.
168
+
169
+ score = (likes + 2*retweets + 0.5*replies + 3*quotes) / max(age_hours, 1)
170
+ """
171
+ likes = _coerce_int(target.get("likes") or target.get("favorite_count"))
172
+ retweets = _coerce_int(target.get("retweets") or target.get("retweet_count"))
173
+ replies = _coerce_int(target.get("reply_count") or target.get("replies"))
174
+ quotes = _coerce_int(target.get("quote_count") or target.get("quotes"))
175
+ age = _age_hours(target)
176
+ raw = likes + 2 * retweets + 0.5 * replies + 3 * quotes
177
+ return raw / max(age, 1.0)
178
+
179
+
180
+ def _is_op(target: Dict[str, Any]) -> bool:
181
+ """Return True when the target looks like an OP (not a reply-chain post).
182
+
183
+ Tolerant: missing flags default to OP rather than dropping. Explicit
184
+ ``is_reply=True`` or ``in_reply_to_status_id_str`` set is the kill signal.
185
+ Reply signals win over a stale ``is_op=True`` so a target that carries
186
+ both (e.g. an upstream scanner sets is_op then a follow-up enrichment
187
+ flips is_reply) is correctly dropped.
188
+ """
189
+ if target.get("is_reply") is True:
190
+ return False
191
+ reply_id = target.get("in_reply_to_status_id_str") or target.get("in_reply_to_status_id")
192
+ if reply_id:
193
+ return False
194
+ if target.get("is_op") is False:
195
+ return False
196
+ return True
197
+
198
+
199
+ def _is_english(target: Dict[str, Any]) -> bool:
200
+ lang = target.get("lang")
201
+ if lang is None:
202
+ # Tolerant default — Twttr241 doesn't always populate lang.
203
+ return True
204
+ return str(lang).lower() in ("en", "en-us", "en-gb")
205
+
206
+
207
+ def _is_retweet(target: Dict[str, Any]) -> bool:
208
+ if target.get("is_retweet") is True:
209
+ return True
210
+ text = (target.get("content_snippet") or target.get("text") or "").lstrip()
211
+ if text.startswith("RT @"):
212
+ return True
213
+ return False
214
+
215
+
216
+ def rank_x_targets(
217
+ targets: Iterable[Dict[str, Any]],
218
+ replied_authors_window_hours: int = DEFAULT_WINDOW_HOURS,
219
+ replied_authors: Optional[Set[str]] = None,
220
+ ) -> List[Dict[str, Any]]:
221
+ """Filter and sort X targets by engagement.
222
+
223
+ Args:
224
+ targets: iterable of candidate target dicts from ``_scan_x``.
225
+ replied_authors_window_hours: dedupe window for already-replied authors.
226
+ Default 7 days per founder directive.
227
+ replied_authors: explicit author set (lowercase, no leading ``@``).
228
+ When ``None``, the set is read from ``~/.delimit/social_log.jsonl``.
229
+ Tests inject an explicit set to avoid touching disk.
230
+
231
+ Returns:
232
+ A new list sorted by engagement score DESC. Each item gets a
233
+ ``_rank_score`` key for downstream observability. Filtered items are
234
+ dropped (not kept with score=0) so the caller can blindly slice the
235
+ first N.
236
+ """
237
+ if replied_authors is None:
238
+ replied_authors = _replied_authors_within(replied_authors_window_hours)
239
+ else:
240
+ replied_authors = {_normalize_handle(a) for a in replied_authors}
241
+
242
+ survivors: List[Dict[str, Any]] = []
243
+ for t in targets or []:
244
+ if not isinstance(t, dict):
245
+ continue
246
+ if t.get("error"):
247
+ continue
248
+
249
+ # 1. is_op
250
+ if not _is_op(t):
251
+ continue
252
+ # 2. lang == 'en' (tolerant of missing field)
253
+ if not _is_english(t):
254
+ continue
255
+ # 3. drop retweets
256
+ if _is_retweet(t):
257
+ continue
258
+ # 4. dedupe authors we replied to in window
259
+ author_norm = _normalize_handle(t.get("author"))
260
+ if author_norm and author_norm in replied_authors:
261
+ continue
262
+
263
+ scored = dict(t)
264
+ scored["_rank_score"] = round(score_target(t), 4)
265
+ survivors.append(scored)
266
+
267
+ # 5. sort score DESC, stable
268
+ survivors.sort(key=lambda x: x.get("_rank_score", 0.0), reverse=True)
269
+ return survivors
270
+
271
+
272
+ __all__ = [
273
+ "DEFAULT_WINDOW_HOURS",
274
+ "score_target",
275
+ "rank_x_targets",
276
+ ]