delimit-cli 4.5.1 → 4.5.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +87 -0
- package/README.md +2 -2
- package/bin/delimit-cli.js +109 -24
- package/gateway/ai/content_engine.py +3 -4
- package/gateway/ai/inbox_classifier.py +215 -0
- package/gateway/ai/integrations/opensage_wrapper.py +4 -1
- package/gateway/ai/ledger_manager.py +218 -38
- package/gateway/ai/license.py +26 -0
- package/gateway/ai/notify.py +68 -3
- package/gateway/ai/reddit_proxy.py +93 -15
- package/gateway/ai/reddit_scanner.py +36 -18
- package/gateway/ai/server.py +128 -6
- package/gateway/ai/social_capability/__init__.py +6 -0
- package/gateway/ai/social_capability/capability_validator.py +273 -0
- package/gateway/ai/social_capability/current_capabilities.yaml +95 -0
- package/gateway/ai/social_queue.py +307 -0
- package/gateway/ai/supabase_sync.py +14 -2
- package/gateway/ai/swarm.py +29 -11
- package/gateway/ai/tui.py +6 -2
- package/gateway/ai/x_ranker.py +276 -0
- package/lib/attest-mcp.js +487 -0
- package/lib/attest-telemetry.js +48 -0
- package/lib/delimit-home.js +35 -0
- package/lib/delimit-template.js +14 -0
- package/package.json +8 -2
- package/scripts/postinstall.js +89 -40
- package/gateway/ai/content_grounding/__init__.py +0 -98
- package/gateway/ai/content_grounding/build.py +0 -350
- package/gateway/ai/content_grounding/consume.py +0 -280
- package/gateway/ai/content_grounding/features.py +0 -218
- package/gateway/ai/content_grounding/fixtures/fail/01_missing_evidence.json +0 -9
- package/gateway/ai/content_grounding/fixtures/fail/02_unknown_evidence_prefix.json +0 -9
- package/gateway/ai/content_grounding/fixtures/fail/03_banned_comparative.json +0 -17
- package/gateway/ai/content_grounding/fixtures/fail/04_banned_adoption.json +0 -17
- package/gateway/ai/content_grounding/fixtures/fail/05_aggregate_no_numeric.json +0 -17
- package/gateway/ai/content_grounding/fixtures/fail/06_unversioned_inference_rule.json +0 -18
- package/gateway/ai/content_grounding/fixtures/pass/01_feature_shipped.json +0 -18
- package/gateway/ai/content_grounding/fixtures/pass/02_aggregate_claim.json +0 -23
- package/gateway/ai/content_grounding/fixtures/pass/03_attestation.json +0 -16
- package/gateway/ai/content_grounding/schemas/claim.schema.json +0 -40
- package/gateway/ai/content_grounding/schemas/event.schema.json +0 -23
- package/gateway/ai/content_grounding/schemas.py +0 -276
- package/gateway/ai/content_grounding/telemetry.py +0 -221
- package/gateway/ai/inbox_drafts/__init__.py +0 -61
- package/gateway/ai/inbox_drafts/registry.py +0 -412
- package/gateway/ai/inbox_drafts/schema.py +0 -374
- package/gateway/ai/inbox_executor.py +0 -565
|
@@ -144,7 +144,54 @@ def _register_venture(info: Dict[str, str]):
|
|
|
144
144
|
VENTURES_FILE.write_text(json.dumps(ventures, indent=2))
|
|
145
145
|
|
|
146
146
|
|
|
147
|
-
|
|
147
|
+
# LED-1188 / Plan-C: env-aware home so DELIMIT_HOME / DELIMIT_NAMESPACE_ROOT
|
|
148
|
+
# overrides apply to the ledger paths same as everywhere else. Falls back
|
|
149
|
+
# to ~/.delimit when neither env var is set (back-compat with v4.5.1 and
|
|
150
|
+
# all prior versions).
|
|
151
|
+
def _delimit_home() -> Path:
|
|
152
|
+
for env_key in ("DELIMIT_HOME", "DELIMIT_NAMESPACE_ROOT"):
|
|
153
|
+
val = os.environ.get(env_key, "").strip()
|
|
154
|
+
if val:
|
|
155
|
+
return Path(val)
|
|
156
|
+
return Path.home() / ".delimit"
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
CENTRAL_LEDGER_DIR = _delimit_home() / "ledger"
|
|
160
|
+
LEDGER_V2_DIR = _delimit_home() / "ledger-v2"
|
|
161
|
+
|
|
162
|
+
# LED-1188 D3 (deliberation att_f86e1f51110e8ed6 follow-up, 2026-04-28):
|
|
163
|
+
# Plan-C migration partitions the central ledger into per-venture sub-ledgers
|
|
164
|
+
# under ledger-v2/<slug>/. The resolver below auto-detects which layout is
|
|
165
|
+
# present and reads from it. Slugs match the migration script's canonical
|
|
166
|
+
# names so a v4.5.2 install picks up an existing Plan-C-staged tree without
|
|
167
|
+
# requiring the swap to happen first.
|
|
168
|
+
_VENTURE_CANONICAL = {
|
|
169
|
+
"delimit-mcp": "delimit",
|
|
170
|
+
"delimit-action": "delimit",
|
|
171
|
+
"delimit-ui": "delimit",
|
|
172
|
+
"delimit-cli": "delimit", # npm package name
|
|
173
|
+
"delimit-gateway": "delimit", # gateway repo
|
|
174
|
+
".delimit": "delimit",
|
|
175
|
+
"wirereport": "wire-report",
|
|
176
|
+
"stakeone": "stake-one",
|
|
177
|
+
}
|
|
178
|
+
_KNOWN_VENTURE_SLUGS = {
|
|
179
|
+
"delimit", "wire-report", "domainvested",
|
|
180
|
+
"livetube", "stake-one", "root", "unsorted",
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
|
|
184
|
+
def _canonical_venture_slug(name: str) -> Optional[str]:
|
|
185
|
+
"""Map a detected venture name to a canonical sub-ledger slug.
|
|
186
|
+
|
|
187
|
+
Returns None when the name doesn't match any known venture; callers
|
|
188
|
+
treat that as "no per-venture sub-ledger, use the central layout."
|
|
189
|
+
"""
|
|
190
|
+
if not name:
|
|
191
|
+
return None
|
|
192
|
+
n = name.lower().strip()
|
|
193
|
+
n = _VENTURE_CANONICAL.get(n, n)
|
|
194
|
+
return n if n in _KNOWN_VENTURE_SLUGS else None
|
|
148
195
|
|
|
149
196
|
|
|
150
197
|
def _detect_model() -> str:
|
|
@@ -182,15 +229,45 @@ def _detect_model() -> str:
|
|
|
182
229
|
|
|
183
230
|
|
|
184
231
|
def _project_ledger_dir(project_path: str = ".") -> Path:
|
|
185
|
-
"""
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
232
|
+
"""Resolve the ledger directory for a project, with Plan-C auto-detect.
|
|
233
|
+
|
|
234
|
+
Resolution order (LED-1188 D3, deliberation att_f86e1f51110e8ed6):
|
|
235
|
+
1. Detect venture from project_path -> canonical slug (delimit,
|
|
236
|
+
wire-report, domainvested, livetube, stake-one).
|
|
237
|
+
2. If LEDGER_V2_DIR / <slug> / operations.jsonl exists, return that
|
|
238
|
+
per-venture sub-ledger. (Plan-C staged but not yet swapped.)
|
|
239
|
+
3. If CENTRAL_LEDGER_DIR / <slug> / operations.jsonl exists, return
|
|
240
|
+
that per-venture sub-ledger. (Plan-C swapped.)
|
|
241
|
+
4. Fall back to CENTRAL_LEDGER_DIR (legacy single-file layout).
|
|
242
|
+
|
|
243
|
+
Cross-model handoff fix (still enforced): Codex and Gemini were writing to
|
|
244
|
+
$PWD/.delimit/ledger/ which caused ledger fragmentation. The central
|
|
245
|
+
~/.delimit/ledger/ tree (or its Plan-C-partitioned form) remains the
|
|
246
|
+
single source of truth — per-project .delimit/ dirs are for policies and
|
|
247
|
+
config only.
|
|
193
248
|
"""
|
|
249
|
+
# Quick exit: legacy callers that pass venture="" / project="." and
|
|
250
|
+
# have no Plan-C tree on disk get the original single-file layout.
|
|
251
|
+
if not LEDGER_V2_DIR.exists() and not (CENTRAL_LEDGER_DIR / "delimit").exists():
|
|
252
|
+
return CENTRAL_LEDGER_DIR
|
|
253
|
+
|
|
254
|
+
info = _detect_venture(project_path)
|
|
255
|
+
slug = _canonical_venture_slug(info.get("name", ""))
|
|
256
|
+
if slug is None:
|
|
257
|
+
return CENTRAL_LEDGER_DIR
|
|
258
|
+
|
|
259
|
+
# Plan-C staged: ledger-v2/<slug>/
|
|
260
|
+
staged = LEDGER_V2_DIR / slug
|
|
261
|
+
if (staged / "operations.jsonl").exists():
|
|
262
|
+
return staged
|
|
263
|
+
|
|
264
|
+
# Plan-C swapped: ledger/<slug>/
|
|
265
|
+
swapped = CENTRAL_LEDGER_DIR / slug
|
|
266
|
+
if (swapped / "operations.jsonl").exists():
|
|
267
|
+
return swapped
|
|
268
|
+
|
|
269
|
+
# No partitioned tree for this venture — fall back to the central
|
|
270
|
+
# legacy layout (operations.jsonl + strategy.jsonl directly in ledger/).
|
|
194
271
|
return CENTRAL_LEDGER_DIR
|
|
195
272
|
|
|
196
273
|
|
|
@@ -235,6 +312,90 @@ def _append(path: Path, entry: Dict) -> Dict:
|
|
|
235
312
|
return entry
|
|
236
313
|
|
|
237
314
|
|
|
315
|
+
# ── LED-877 signal guard ─────────────────────────────────────────────
|
|
316
|
+
|
|
317
|
+
|
|
318
|
+
# Sources that originate from sensed observations (social/strategy scans).
|
|
319
|
+
# Centralized so the guard logic is in one place even if more prefixes are
|
|
320
|
+
# added later (e.g. github_sense, reddit_sense).
|
|
321
|
+
_SENSED_SOURCE_PREFIXES = ("social_scan", "social_strategy")
|
|
322
|
+
|
|
323
|
+
|
|
324
|
+
def _check_source_is_ledger_item(
|
|
325
|
+
source: str,
|
|
326
|
+
*,
|
|
327
|
+
purpose: str = "promote_to_ledger",
|
|
328
|
+
title: str = "",
|
|
329
|
+
ledger: str = "",
|
|
330
|
+
) -> None:
|
|
331
|
+
"""LED-877 signal guard.
|
|
332
|
+
|
|
333
|
+
Sensed observations (``source='social_scan:...'``) MUST NOT land in
|
|
334
|
+
the ledger by default — they belong in the intel signal store.
|
|
335
|
+
|
|
336
|
+
LED-216 Phase 1 split: callers can declare *why* they are checking.
|
|
337
|
+
|
|
338
|
+
``purpose='promote_to_ledger'`` (default, original strict behavior)
|
|
339
|
+
Used by ``add_item`` and any code path that actually writes a
|
|
340
|
+
ledger row. Raises ``ValueError`` on a sensed source unless the
|
|
341
|
+
``_DELIMIT_SIGNAL_PROMOTED_BY`` bypass env var is set (which is
|
|
342
|
+
the explicit promote-to-ledger path).
|
|
343
|
+
|
|
344
|
+
``purpose='draft_only'``
|
|
345
|
+
Used by code paths that produce a reply draft from a sensed
|
|
346
|
+
observation but do NOT promote the signal to the ledger. Drafts
|
|
347
|
+
are an acceptable consumer of sensed sources, so the guard is a
|
|
348
|
+
no-op for this purpose.
|
|
349
|
+
|
|
350
|
+
Both purposes still respect ``DELIMIT_SIGNAL_GUARD=shadow`` for the
|
|
351
|
+
shadow-log fallback used during the LED-877 rollout.
|
|
352
|
+
"""
|
|
353
|
+
_src_norm = (source or "").strip().lower()
|
|
354
|
+
if not any(_src_norm.startswith(p) for p in _SENSED_SOURCE_PREFIXES):
|
|
355
|
+
return # Not a sensed source; nothing to guard against.
|
|
356
|
+
|
|
357
|
+
if purpose == "draft_only":
|
|
358
|
+
# Drafts may legitimately reference a sensed observation. The
|
|
359
|
+
# guard exists to prevent ledger writes, not draft generation.
|
|
360
|
+
return
|
|
361
|
+
|
|
362
|
+
if purpose != "promote_to_ledger":
|
|
363
|
+
# Defensive: unknown purpose ⇒ default to strict behavior so a
|
|
364
|
+
# typo can't accidentally weaken the guard.
|
|
365
|
+
pass
|
|
366
|
+
|
|
367
|
+
_promoted_by = os.environ.get("_DELIMIT_SIGNAL_PROMOTED_BY", "")
|
|
368
|
+
if _promoted_by:
|
|
369
|
+
return # Explicit promote_to_ledger path; bypass authorized.
|
|
370
|
+
|
|
371
|
+
msg = (
|
|
372
|
+
f"LED-877 guard: source={source!r} is a sensed observation, not "
|
|
373
|
+
f"a ledger item. Use ai.sensing.signal_store.ingest() instead. "
|
|
374
|
+
f"Promote explicitly via promote_to_ledger(signal_id=...)."
|
|
375
|
+
)
|
|
376
|
+
|
|
377
|
+
_guard_mode = os.environ.get("DELIMIT_SIGNAL_GUARD", "enforce").lower()
|
|
378
|
+
if _guard_mode == "shadow":
|
|
379
|
+
try:
|
|
380
|
+
_shadow_log = Path.home() / ".delimit" / "logs" / "signal_guard_shadow.jsonl"
|
|
381
|
+
_shadow_log.parent.mkdir(parents=True, exist_ok=True)
|
|
382
|
+
with _shadow_log.open("a") as _f:
|
|
383
|
+
_f.write(json.dumps({
|
|
384
|
+
"ts": time.strftime("%Y-%m-%dT%H:%M:%SZ"),
|
|
385
|
+
"title": title,
|
|
386
|
+
"source": source,
|
|
387
|
+
"ledger": ledger,
|
|
388
|
+
"purpose": purpose,
|
|
389
|
+
"msg": msg,
|
|
390
|
+
}) + "\n")
|
|
391
|
+
except Exception:
|
|
392
|
+
pass
|
|
393
|
+
# fall through (shadow mode does not raise)
|
|
394
|
+
return
|
|
395
|
+
|
|
396
|
+
raise ValueError(msg)
|
|
397
|
+
|
|
398
|
+
|
|
238
399
|
def add_item(
|
|
239
400
|
title: str,
|
|
240
401
|
ledger: str = "ops",
|
|
@@ -259,44 +420,63 @@ def add_item(
|
|
|
259
420
|
observations cannot land in the ledger. Observations belong in the intel
|
|
260
421
|
signal store (ai/sensing/signal_store.py). Bypass via env var for the
|
|
261
422
|
promote_to_ledger path: _DELIMIT_SIGNAL_PROMOTED_BY=<who>.
|
|
423
|
+
|
|
424
|
+
LED-216 Phase 1: the guard is now reusable via
|
|
425
|
+
``_check_source_is_ledger_item(..., purpose='draft_only')`` for code
|
|
426
|
+
paths that produce reply drafts from sensed observations without
|
|
427
|
+
promoting the underlying signal to the ledger.
|
|
262
428
|
"""
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
f"LED-877 guard: source={source!r} is a sensed observation, not "
|
|
270
|
-
f"a ledger item. Use ai.sensing.signal_store.ingest() instead. "
|
|
271
|
-
f"Promote explicitly via promote_to_ledger(signal_id=...)."
|
|
272
|
-
)
|
|
273
|
-
if _guard_mode == "shadow":
|
|
274
|
-
try:
|
|
275
|
-
_shadow_log = Path.home() / ".delimit" / "logs" / "signal_guard_shadow.jsonl"
|
|
276
|
-
_shadow_log.parent.mkdir(parents=True, exist_ok=True)
|
|
277
|
-
with _shadow_log.open("a") as _f:
|
|
278
|
-
_f.write(json.dumps({
|
|
279
|
-
"ts": time.strftime("%Y-%m-%dT%H:%M:%SZ"),
|
|
280
|
-
"title": title,
|
|
281
|
-
"source": source,
|
|
282
|
-
"ledger": ledger,
|
|
283
|
-
"msg": msg,
|
|
284
|
-
}) + "\n")
|
|
285
|
-
except Exception:
|
|
286
|
-
pass
|
|
287
|
-
# fall through
|
|
288
|
-
else:
|
|
289
|
-
raise ValueError(msg)
|
|
429
|
+
_check_source_is_ledger_item(
|
|
430
|
+
source,
|
|
431
|
+
purpose="promote_to_ledger",
|
|
432
|
+
title=title,
|
|
433
|
+
ledger=ledger,
|
|
434
|
+
)
|
|
290
435
|
|
|
291
436
|
_ensure(project_path)
|
|
292
437
|
venture = _detect_venture(project_path)
|
|
293
438
|
ledger_dir = _project_ledger_dir(project_path)
|
|
294
439
|
path = ledger_dir / ("strategy.jsonl" if ledger == "strategy" else "operations.jsonl")
|
|
295
440
|
|
|
441
|
+
# LED-824: ID-collision fix. The Plan-C resolver routes delimit-context
|
|
442
|
+
# queries to ledger-v2/<slug>/. Per-venture ID counters used to scan
|
|
443
|
+
# only the active sub-ledger, so newly-created items could collide with
|
|
444
|
+
# IDs already used in the legacy CENTRAL_LEDGER_DIR root files. Now we
|
|
445
|
+
# union all known IDs across (a) the resolved sub-ledger AND (b) every
|
|
446
|
+
# peer sub-ledger AND (c) the legacy root, then pick the next free.
|
|
296
447
|
items = _read_ledger(path)
|
|
297
448
|
prefix = "STR" if ledger == "strategy" else "LED"
|
|
298
|
-
existing_ids =
|
|
299
|
-
|
|
449
|
+
existing_ids = {i.get("id", "") for i in items if i.get("type") != "update"}
|
|
450
|
+
|
|
451
|
+
# Union with all peer files in ledger-v2/* and the legacy root files,
|
|
452
|
+
# for both strategy and operations ledgers (an LED-N could collide
|
|
453
|
+
# whether it lives in operations or strategy in any sub-ledger).
|
|
454
|
+
filename = "strategy.jsonl" if ledger == "strategy" else "operations.jsonl"
|
|
455
|
+
candidate_paths: list[Path] = []
|
|
456
|
+
if LEDGER_V2_DIR.exists():
|
|
457
|
+
for sub in LEDGER_V2_DIR.iterdir():
|
|
458
|
+
if sub.is_dir():
|
|
459
|
+
candidate_paths.append(sub / filename)
|
|
460
|
+
candidate_paths.append(CENTRAL_LEDGER_DIR / filename)
|
|
461
|
+
|
|
462
|
+
for cand in candidate_paths:
|
|
463
|
+
if cand == path:
|
|
464
|
+
continue # already scanned
|
|
465
|
+
if not cand.exists():
|
|
466
|
+
continue
|
|
467
|
+
try:
|
|
468
|
+
for entry in _read_ledger(cand):
|
|
469
|
+
if entry.get("type") == "update":
|
|
470
|
+
continue
|
|
471
|
+
eid = entry.get("id", "")
|
|
472
|
+
if eid:
|
|
473
|
+
existing_ids.add(eid)
|
|
474
|
+
except Exception:
|
|
475
|
+
# Best-effort: a malformed peer file shouldn't block id assignment
|
|
476
|
+
continue
|
|
477
|
+
|
|
478
|
+
# Walk forward from len()+1 until we find a non-colliding slot.
|
|
479
|
+
num = len(items) + 1
|
|
300
480
|
while f"{prefix}-{num:03d}" in existing_ids:
|
|
301
481
|
num += 1
|
|
302
482
|
item_id = f"{prefix}-{num:03d}"
|
package/gateway/ai/license.py
CHANGED
|
@@ -227,3 +227,29 @@ except ImportError:
|
|
|
227
227
|
LICENSE_FILE.parent.mkdir(parents=True, exist_ok=True)
|
|
228
228
|
LICENSE_FILE.write_text(json.dumps(license_data, indent=2))
|
|
229
229
|
return {"status": "activated", "tier": "pro", "message": "Activated (offline fallback). Will validate on next network access."}
|
|
230
|
+
|
|
231
|
+
|
|
232
|
+
# ─── LED-2060 (P1): test-mode license bypass ─────────────────────────────
|
|
233
|
+
# tests/conftest.py sets DELIMIT_TEST_MODE=1 at session start. Without this
|
|
234
|
+
# wrapper, every test that exercises a Pro tool got back a premium_required
|
|
235
|
+
# error and asserted-against-the-wrong-shape, blocking CI on every PR.
|
|
236
|
+
# Bypass is scoped: only active when the env var is explicitly set, only
|
|
237
|
+
# returns None (the "no gate" sentinel), and wraps both compiled-binary
|
|
238
|
+
# and fallback paths. Customers never hit this path because their
|
|
239
|
+
# environments don't set DELIMIT_TEST_MODE.
|
|
240
|
+
import os as _os
|
|
241
|
+
|
|
242
|
+
_original_require_premium = require_premium # type: ignore[has-type]
|
|
243
|
+
_original_is_premium = is_premium # type: ignore[has-type]
|
|
244
|
+
|
|
245
|
+
|
|
246
|
+
def require_premium(tool_name: str): # type: ignore[no-redef]
|
|
247
|
+
if _os.environ.get("DELIMIT_TEST_MODE") == "1":
|
|
248
|
+
return None
|
|
249
|
+
return _original_require_premium(tool_name)
|
|
250
|
+
|
|
251
|
+
|
|
252
|
+
def is_premium() -> bool: # type: ignore[no-redef]
|
|
253
|
+
if _os.environ.get("DELIMIT_TEST_MODE") == "1":
|
|
254
|
+
return True
|
|
255
|
+
return _original_is_premium()
|
package/gateway/ai/notify.py
CHANGED
|
@@ -158,6 +158,30 @@ def _record_notification(entry: Dict[str, Any]) -> None:
|
|
|
158
158
|
logger.warning("Failed to record notification: %s", e)
|
|
159
159
|
|
|
160
160
|
|
|
161
|
+
_QUARANTINE_FILE = Path.home() / ".delimit" / "notifications_quarantine.jsonl"
|
|
162
|
+
|
|
163
|
+
|
|
164
|
+
def _quarantine_record(entry: Dict[str, Any]) -> None:
|
|
165
|
+
"""Log a notification that was suppressed by the test-mode / skip-marker
|
|
166
|
+
guard in send_notification(). The would-be email is NOT delivered;
|
|
167
|
+
this file is for audit only.
|
|
168
|
+
|
|
169
|
+
Added 2026-05-01 after gateway pytest runs were repeatedly leaking
|
|
170
|
+
[Test] / [Test Subject] / [DELIMIT_TEST_MODE=1 skipped] emails into
|
|
171
|
+
the founder's real inbox via test paths that called send_notification
|
|
172
|
+
without stubbing.
|
|
173
|
+
"""
|
|
174
|
+
import datetime as _dt
|
|
175
|
+
try:
|
|
176
|
+
_QUARANTINE_FILE.parent.mkdir(parents=True, exist_ok=True)
|
|
177
|
+
entry = {**entry, "ts": _dt.datetime.now(_dt.timezone.utc).isoformat()}
|
|
178
|
+
with open(_QUARANTINE_FILE, "a", encoding="utf-8") as f:
|
|
179
|
+
f.write(json.dumps(entry) + "\n")
|
|
180
|
+
except OSError:
|
|
181
|
+
# Quarantine log failure must not crash the caller.
|
|
182
|
+
pass
|
|
183
|
+
|
|
184
|
+
|
|
161
185
|
def record_owner_action(entry: Dict[str, Any]) -> None:
|
|
162
186
|
"""Append an owner-action record for dashboard and async fanout."""
|
|
163
187
|
try:
|
|
@@ -1041,9 +1065,17 @@ def _enforce_email_protocol(subject: str, message: str, event_type: str) -> tupl
|
|
|
1041
1065
|
"""Validate and fix email against the protocol. Returns (subject, message, warnings)."""
|
|
1042
1066
|
warnings = []
|
|
1043
1067
|
|
|
1044
|
-
# 1. Subject must have
|
|
1045
|
-
|
|
1046
|
-
|
|
1068
|
+
# 1. Subject must have SOME bracket prefix (e.g. [DONE], [POSTED], [FIX])
|
|
1069
|
+
# so the founder can triage on mobile.
|
|
1070
|
+
#
|
|
1071
|
+
# Founder-tone fix 2026-04-28: previously the validator hard-rejected any
|
|
1072
|
+
# bracket prefix not in _VALID_SUBJECT_PREFIXES and injected [INFO] in
|
|
1073
|
+
# front, producing subjects like "[INFO] [DONE] LED-2056 fixed". The
|
|
1074
|
+
# injected prefix overrode the caller's intent and bloated the subject.
|
|
1075
|
+
# Now any `[WORD]` prefix (uppercase short tag) is accepted as-is, and
|
|
1076
|
+
# we only inject when there's no bracket at all.
|
|
1077
|
+
_has_any_bracket_prefix = bool(_re.match(r"^\[[A-Z][A-Z0-9_-]{0,15}\]\s", subject))
|
|
1078
|
+
if not _has_any_bracket_prefix:
|
|
1047
1079
|
# LED-969: customer-facing emails should not get bracket prefixes.
|
|
1048
1080
|
# Any event_type starting with "customer_" is external-facing and
|
|
1049
1081
|
# the subject should be sent as-is (clean, professional).
|
|
@@ -1135,6 +1167,39 @@ def send_notification(
|
|
|
1135
1167
|
if not message:
|
|
1136
1168
|
return {"error": "message is required"}
|
|
1137
1169
|
|
|
1170
|
+
# ── Contaminated-content guard ────────────────────────────────────
|
|
1171
|
+
# Every gateway pytest run was spamming the founder's real inbox via
|
|
1172
|
+
# tests that called send_notification without stubbing SMTP. Two
|
|
1173
|
+
# failure modes observed (2026-05-01):
|
|
1174
|
+
# 1. Bare test invocations (subject="Test", message="test")
|
|
1175
|
+
# 2. Social drafts where _call_model returned the
|
|
1176
|
+
# "[X skipped under DELIMIT_TEST_MODE=1 ...]" sentinel and the
|
|
1177
|
+
# sentinel string ended up as the draft body.
|
|
1178
|
+
# Either is a noise/leak event. Refuse to send; log to a quarantine
|
|
1179
|
+
# JSONL so the would-be content is auditable.
|
|
1180
|
+
#
|
|
1181
|
+
# Surgical match — only on the specific leaked shapes. Tests that
|
|
1182
|
+
# correctly mock smtplib.SMTP keep working (their mock fires inside
|
|
1183
|
+
# send_email, after this guard, and returns a fake delivered=True).
|
|
1184
|
+
if channel in ("email", "webhook", "slack", "telegram"):
|
|
1185
|
+
body = message or ""
|
|
1186
|
+
subj = subject or ""
|
|
1187
|
+
leak_match = (
|
|
1188
|
+
"skipped under DELIMIT_TEST_MODE" in body
|
|
1189
|
+
or "DELIMIT_TEST_MODE=1" in body
|
|
1190
|
+
or (subj.strip().lower() == "test" and body.strip().lower() == "test")
|
|
1191
|
+
or (subj.strip().lower() == "test subject" and body.strip().lower() == "test body")
|
|
1192
|
+
)
|
|
1193
|
+
if leak_match:
|
|
1194
|
+
_quarantine_record({
|
|
1195
|
+
"reason": "leaked_shape",
|
|
1196
|
+
"channel": channel,
|
|
1197
|
+
"subject": subj[:100],
|
|
1198
|
+
"event_type": event_type,
|
|
1199
|
+
"to": to,
|
|
1200
|
+
})
|
|
1201
|
+
return {"skipped": "leaked shape detected — not sent (audit: ~/.delimit/notifications_quarantine.jsonl)"}
|
|
1202
|
+
|
|
1138
1203
|
# Enforce email protocol for all email notifications
|
|
1139
1204
|
protocol_warnings = []
|
|
1140
1205
|
if channel == "email":
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import json
|
|
2
2
|
import logging
|
|
3
3
|
import os
|
|
4
|
+
import time
|
|
4
5
|
import urllib.parse
|
|
5
6
|
import urllib.request
|
|
6
7
|
from pathlib import Path
|
|
@@ -8,6 +9,62 @@ from typing import Any, Dict, List, Optional
|
|
|
8
9
|
|
|
9
10
|
logger = logging.getLogger("delimit.ai.reddit_proxy")
|
|
10
11
|
|
|
12
|
+
# LED-2068: freshness ceiling. PullPush stopped ingesting around 2025-05-19;
|
|
13
|
+
# the residential proxy gets 403 from Reddit on datacenter IPs; direct fetch
|
|
14
|
+
# is blocked. ALL three tiers can return stale archive data on any given
|
|
15
|
+
# fetch, and stale data is worse than no data for engagement discovery
|
|
16
|
+
# (drafting against year-old threads burns trust). Default to a 14-day
|
|
17
|
+
# freshness ceiling — anything older is dropped before returning.
|
|
18
|
+
#
|
|
19
|
+
# Override via DELIMIT_REDDIT_MAX_AGE_DAYS (set to a large number to disable).
|
|
20
|
+
DEFAULT_MAX_AGE_DAYS = 14
|
|
21
|
+
TIER_PROXY = "proxy"
|
|
22
|
+
TIER_PULLPUSH = "pullpush"
|
|
23
|
+
TIER_DIRECT = "direct"
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def _max_age_seconds() -> float:
|
|
27
|
+
raw = os.environ.get("DELIMIT_REDDIT_MAX_AGE_DAYS", "").strip()
|
|
28
|
+
if raw:
|
|
29
|
+
try:
|
|
30
|
+
return max(0.0, float(raw)) * 86400.0
|
|
31
|
+
except ValueError:
|
|
32
|
+
pass
|
|
33
|
+
return DEFAULT_MAX_AGE_DAYS * 86400.0
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def _stamp_and_filter(posts: List[Dict[str, Any]], tier: str, subreddit: str) -> List[Dict[str, Any]]:
|
|
37
|
+
"""Tag each post with _source_tier and drop anything older than the
|
|
38
|
+
freshness ceiling. Returns kept posts. Also logs the drop count for
|
|
39
|
+
debugging stale-archive regressions (LED-2068)."""
|
|
40
|
+
if not posts:
|
|
41
|
+
return []
|
|
42
|
+
now = time.time()
|
|
43
|
+
max_age = _max_age_seconds()
|
|
44
|
+
if max_age <= 0:
|
|
45
|
+
cutoff = 0.0
|
|
46
|
+
else:
|
|
47
|
+
cutoff = now - max_age
|
|
48
|
+
kept: List[Dict[str, Any]] = []
|
|
49
|
+
dropped = 0
|
|
50
|
+
for p in posts:
|
|
51
|
+
try:
|
|
52
|
+
created = float(p.get("created_utc") or 0)
|
|
53
|
+
except (TypeError, ValueError):
|
|
54
|
+
created = 0.0
|
|
55
|
+
if created and created >= cutoff:
|
|
56
|
+
p["_source_tier"] = tier
|
|
57
|
+
kept.append(p)
|
|
58
|
+
else:
|
|
59
|
+
dropped += 1
|
|
60
|
+
if dropped:
|
|
61
|
+
logger.info(
|
|
62
|
+
"reddit_proxy: dropped %d/%d stale post(s) from %s tier for r/%s "
|
|
63
|
+
"(freshness ceiling=%.1fd)",
|
|
64
|
+
dropped, len(posts), tier, subreddit, max_age / 86400.0,
|
|
65
|
+
)
|
|
66
|
+
return kept
|
|
67
|
+
|
|
11
68
|
def _get_proxy_config() -> Dict[str, str]:
|
|
12
69
|
"""Load proxy config from private secrets or environment.
|
|
13
70
|
|
|
@@ -43,10 +100,12 @@ def _get_proxy_config() -> Dict[str, str]:
|
|
|
43
100
|
def fetch_subreddit(subreddit: str, sort: str = "new", limit: int = 10) -> List[Dict[str, Any]]:
|
|
44
101
|
"""
|
|
45
102
|
Fetch posts from a single subreddit with fallback chain.
|
|
46
|
-
Returns standardized post dicts.
|
|
103
|
+
Returns standardized post dicts. Each post is tagged with _source_tier
|
|
104
|
+
indicating which fallback served it, and stale posts (older than the
|
|
105
|
+
freshness ceiling per LED-2068) are dropped before returning.
|
|
47
106
|
"""
|
|
48
107
|
reddit_url = f"https://www.reddit.com/r/{subreddit}/{sort}.json?limit={limit}&raw_json=1"
|
|
49
|
-
|
|
108
|
+
|
|
50
109
|
# 1. Try Local Proxy (Residential IP)
|
|
51
110
|
proxy_cfg = _get_proxy_config()
|
|
52
111
|
proxy_url = proxy_cfg.get("proxy_url")
|
|
@@ -62,29 +121,48 @@ def fetch_subreddit(subreddit: str, sort: str = "new", limit: int = 10) -> List[
|
|
|
62
121
|
with urllib.request.urlopen(req, timeout=10) as resp:
|
|
63
122
|
body = json.loads(resp.read().decode())
|
|
64
123
|
children = body.get("data", {}).get("children", [])
|
|
65
|
-
|
|
124
|
+
raw = [c.get("data", {}) for c in children if c.get("data")]
|
|
125
|
+
kept = _stamp_and_filter(raw, TIER_PROXY, subreddit)
|
|
126
|
+
if kept:
|
|
127
|
+
return kept
|
|
128
|
+
# If the proxy succeeded but returned only stale data, fall
|
|
129
|
+
# through to next tier rather than returning empty — gives
|
|
130
|
+
# us a chance to find fresh data elsewhere.
|
|
66
131
|
except Exception as e:
|
|
67
132
|
logger.debug(f"Local proxy failed for r/{subreddit}: {e}")
|
|
68
133
|
|
|
69
|
-
#
|
|
134
|
+
# 3. Try Direct (often blocked on datacenter IPs, but fast when it works
|
|
135
|
+
# and is the only tier currently capable of serving fresh data — PullPush
|
|
136
|
+
# stopped ingesting ~2025-05-19, residential proxy 403s from datacenter).
|
|
137
|
+
# Direct moved AHEAD of PullPush in the chain post-LED-2068 because a
|
|
138
|
+
# blocked direct fetch is recoverable via fallback, while a successful
|
|
139
|
+
# PullPush serves stale archive that pollutes downstream classifiers.
|
|
70
140
|
try:
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
with urllib.request.urlopen(req, timeout=10) as resp:
|
|
141
|
+
req = urllib.request.Request(reddit_url, headers={"User-Agent": "Mozilla/5.0 (Delimit)"})
|
|
142
|
+
with urllib.request.urlopen(req, timeout=5) as resp:
|
|
74
143
|
body = json.loads(resp.read().decode())
|
|
75
|
-
|
|
144
|
+
children = body.get("data", {}).get("children", [])
|
|
145
|
+
raw = [c.get("data", {}) for c in children if c.get("data")]
|
|
146
|
+
kept = _stamp_and_filter(raw, TIER_DIRECT, subreddit)
|
|
147
|
+
if kept:
|
|
148
|
+
return kept
|
|
76
149
|
except Exception as e:
|
|
77
|
-
logger.debug(f"
|
|
150
|
+
logger.debug(f"Direct fetch failed for r/{subreddit}: {e}")
|
|
78
151
|
|
|
79
|
-
#
|
|
152
|
+
# 2. Last-resort: PullPush archive. Currently stale (May 2025 ceiling)
|
|
153
|
+
# but the freshness filter will drop everything if so — leaves the door
|
|
154
|
+
# open for the day PullPush resumes ingesting fresh data.
|
|
80
155
|
try:
|
|
81
|
-
|
|
82
|
-
|
|
156
|
+
pp_url = f"https://api.pullpush.io/reddit/search/submission/?subreddit={subreddit}&size={limit}&sort=desc"
|
|
157
|
+
req = urllib.request.Request(pp_url, headers={"User-Agent": "Delimit/1.0"})
|
|
158
|
+
with urllib.request.urlopen(req, timeout=10) as resp:
|
|
83
159
|
body = json.loads(resp.read().decode())
|
|
84
|
-
|
|
85
|
-
|
|
160
|
+
raw = body.get("data", []) or []
|
|
161
|
+
kept = _stamp_and_filter(raw, TIER_PULLPUSH, subreddit)
|
|
162
|
+
if kept:
|
|
163
|
+
return kept
|
|
86
164
|
except Exception as e:
|
|
87
|
-
logger.
|
|
165
|
+
logger.debug(f"PullPush fallback failed for r/{subreddit}: {e}")
|
|
88
166
|
|
|
89
167
|
return []
|
|
90
168
|
|
|
@@ -96,7 +96,34 @@ _PAIN_TO_RELEVANCE: Dict[str, str] = {
|
|
|
96
96
|
"cost": "new_opportunity", # pricing transparency / cost tracking
|
|
97
97
|
}
|
|
98
98
|
|
|
99
|
-
|
|
99
|
+
def _load_proxy_url() -> str:
|
|
100
|
+
"""Load proxy URL from the canonical reddit-proxy.json secrets file.
|
|
101
|
+
|
|
102
|
+
Single source of truth shared with ai.reddit_proxy. Falls back to the
|
|
103
|
+
canonical SSH-tunnel localhost endpoint if the secrets file is missing.
|
|
104
|
+
|
|
105
|
+
LED-2068b note: the residential proxy is reached via an SSH local-port-
|
|
106
|
+
forward — `127.0.0.1:4819/reddit-fetch` is the LOCAL endpoint of the
|
|
107
|
+
tunnel into the *actual* residential machine that performs the Reddit
|
|
108
|
+
fetch. There is also a local Python wrapper at `:8787/fetch` (systemd
|
|
109
|
+
unit `delimit-reddit-proxy.service`) — that one runs on this datacenter
|
|
110
|
+
VM and gets 403 from Reddit's anti-bot wall, so it serves nothing
|
|
111
|
+
useful. Do not change the default away from 4819 without first
|
|
112
|
+
confirming the SSH tunnel is no longer the canonical path.
|
|
113
|
+
"""
|
|
114
|
+
try:
|
|
115
|
+
secrets_path = Path.home() / ".delimit" / "secrets" / "reddit-proxy.json"
|
|
116
|
+
if secrets_path.exists():
|
|
117
|
+
data = json.loads(secrets_path.read_text())
|
|
118
|
+
url = (data.get("proxy_url") or "").strip()
|
|
119
|
+
if url:
|
|
120
|
+
return url
|
|
121
|
+
except Exception:
|
|
122
|
+
pass
|
|
123
|
+
return "http://127.0.0.1:4819/reddit-fetch"
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
PROXY_URL = _load_proxy_url()
|
|
100
127
|
SCANS_DIR = Path.home() / ".delimit" / "reddit_scans"
|
|
101
128
|
VENTURES_CONFIG_PATH = Path.home() / ".delimit" / "social_target_ventures.json"
|
|
102
129
|
|
|
@@ -143,29 +170,20 @@ def _fetch_subreddit(
|
|
|
143
170
|
The proxy endpoint expects a query parameter ``url`` containing the
|
|
144
171
|
actual Reddit JSON URL. Returns a list of extracted post dicts.
|
|
145
172
|
"""
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
)
|
|
153
|
-
|
|
173
|
+
# Delegate to ai.reddit_proxy.fetch_subreddit which has the canonical
|
|
174
|
+
# 3-tier fallback chain (residential proxy → direct → PullPush archive).
|
|
175
|
+
# Datacenter IPs get 403 from Reddit even with auth; the freshness filter
|
|
176
|
+
# in reddit_proxy drops stale-archive results so the scanner returns
|
|
177
|
+
# honest empty rather than fake old data.
|
|
178
|
+
from ai.reddit_proxy import fetch_subreddit as _proxy_fetch
|
|
154
179
|
try:
|
|
155
|
-
|
|
156
|
-
body = json.loads(resp.read().decode())
|
|
180
|
+
raw = _proxy_fetch(subreddit, sort=sort, limit=limit) or []
|
|
157
181
|
except Exception as exc:
|
|
158
182
|
logger.warning("Failed to fetch r/%s: %s", subreddit, exc)
|
|
159
183
|
return []
|
|
160
184
|
|
|
161
|
-
# Reddit returns {"data": {"children": [...]}}
|
|
162
|
-
children = []
|
|
163
|
-
if isinstance(body, dict):
|
|
164
|
-
children = body.get("data", {}).get("children", [])
|
|
165
|
-
|
|
166
185
|
posts: List[Dict[str, Any]] = []
|
|
167
|
-
for
|
|
168
|
-
d = child.get("data", {})
|
|
186
|
+
for d in raw:
|
|
169
187
|
if not d:
|
|
170
188
|
continue
|
|
171
189
|
# Skip stickied
|