npm - delimit-cli - Versions diffs - 4.5.1 → 4.5.2 - Mend

delimit-cli 4.5.1 → 4.5.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (47) hide show

package/CHANGELOG.md +87 -0
package/README.md +2 -2
package/bin/delimit-cli.js +109 -24
package/gateway/ai/content_engine.py +3 -4
package/gateway/ai/inbox_classifier.py +215 -0
package/gateway/ai/integrations/opensage_wrapper.py +4 -1
package/gateway/ai/ledger_manager.py +218 -38
package/gateway/ai/license.py +26 -0
package/gateway/ai/notify.py +68 -3
package/gateway/ai/reddit_proxy.py +93 -15
package/gateway/ai/reddit_scanner.py +36 -18
package/gateway/ai/server.py +128 -6
package/gateway/ai/social_capability/__init__.py +6 -0
package/gateway/ai/social_capability/capability_validator.py +273 -0
package/gateway/ai/social_capability/current_capabilities.yaml +95 -0
package/gateway/ai/social_queue.py +307 -0
package/gateway/ai/supabase_sync.py +14 -2
package/gateway/ai/swarm.py +29 -11
package/gateway/ai/tui.py +6 -2
package/gateway/ai/x_ranker.py +276 -0
package/lib/attest-mcp.js +487 -0
package/lib/attest-telemetry.js +48 -0
package/lib/delimit-home.js +35 -0
package/lib/delimit-template.js +14 -0
package/package.json +8 -2
package/scripts/postinstall.js +89 -40
package/gateway/ai/content_grounding/__init__.py +0 -98
package/gateway/ai/content_grounding/build.py +0 -350
package/gateway/ai/content_grounding/consume.py +0 -280
package/gateway/ai/content_grounding/features.py +0 -218
package/gateway/ai/content_grounding/fixtures/fail/01_missing_evidence.json +0 -9
package/gateway/ai/content_grounding/fixtures/fail/02_unknown_evidence_prefix.json +0 -9
package/gateway/ai/content_grounding/fixtures/fail/03_banned_comparative.json +0 -17
package/gateway/ai/content_grounding/fixtures/fail/04_banned_adoption.json +0 -17
package/gateway/ai/content_grounding/fixtures/fail/05_aggregate_no_numeric.json +0 -17
package/gateway/ai/content_grounding/fixtures/fail/06_unversioned_inference_rule.json +0 -18
package/gateway/ai/content_grounding/fixtures/pass/01_feature_shipped.json +0 -18
package/gateway/ai/content_grounding/fixtures/pass/02_aggregate_claim.json +0 -23
package/gateway/ai/content_grounding/fixtures/pass/03_attestation.json +0 -16
package/gateway/ai/content_grounding/schemas/claim.schema.json +0 -40
package/gateway/ai/content_grounding/schemas/event.schema.json +0 -23
package/gateway/ai/content_grounding/schemas.py +0 -276
package/gateway/ai/content_grounding/telemetry.py +0 -221
package/gateway/ai/inbox_drafts/__init__.py +0 -61
package/gateway/ai/inbox_drafts/registry.py +0 -412
package/gateway/ai/inbox_drafts/schema.py +0 -374
package/gateway/ai/inbox_executor.py +0 -565

package/gateway/ai/ledger_manager.py CHANGED Viewed

@@ -144,7 +144,54 @@ def _register_venture(info: Dict[str, str]):
         VENTURES_FILE.write_text(json.dumps(ventures, indent=2))
-CENTRAL_LEDGER_DIR = Path.home() / ".delimit" / "ledger"
+# LED-1188 / Plan-C: env-aware home so DELIMIT_HOME / DELIMIT_NAMESPACE_ROOT
+# overrides apply to the ledger paths same as everywhere else. Falls back
+# to ~/.delimit when neither env var is set (back-compat with v4.5.1 and
+# all prior versions).
+def _delimit_home() -> Path:
+    for env_key in ("DELIMIT_HOME", "DELIMIT_NAMESPACE_ROOT"):
+        val = os.environ.get(env_key, "").strip()
+        if val:
+            return Path(val)
+    return Path.home() / ".delimit"
+CENTRAL_LEDGER_DIR = _delimit_home() / "ledger"
+LEDGER_V2_DIR = _delimit_home() / "ledger-v2"
+# LED-1188 D3 (deliberation att_f86e1f51110e8ed6 follow-up, 2026-04-28):
+# Plan-C migration partitions the central ledger into per-venture sub-ledgers
+# under ledger-v2/<slug>/. The resolver below auto-detects which layout is
+# present and reads from it. Slugs match the migration script's canonical
+# names so a v4.5.2 install picks up an existing Plan-C-staged tree without
+# requiring the swap to happen first.
+_VENTURE_CANONICAL = {
+    "delimit-mcp": "delimit",
+    "delimit-action": "delimit",
+    "delimit-ui": "delimit",
+    "delimit-cli": "delimit",        # npm package name
+    "delimit-gateway": "delimit",    # gateway repo
+    ".delimit": "delimit",
+    "wirereport": "wire-report",
+    "stakeone": "stake-one",
+}
+_KNOWN_VENTURE_SLUGS = {
+    "delimit", "wire-report", "domainvested",
+    "livetube", "stake-one", "root", "unsorted",
+}
+def _canonical_venture_slug(name: str) -> Optional[str]:
+    """Map a detected venture name to a canonical sub-ledger slug.
+    Returns None when the name doesn't match any known venture; callers
+    treat that as "no per-venture sub-ledger, use the central layout."
+    """
+    if not name:
+        return None
+    n = name.lower().strip()
+    n = _VENTURE_CANONICAL.get(n, n)
+    return n if n in _KNOWN_VENTURE_SLUGS else None
 def _detect_model() -> str:
@@ -182,15 +229,45 @@ def _detect_model() -> str:
 def _project_ledger_dir(project_path: str = ".") -> Path:
-    """Get the ledger directory — ALWAYS uses central ~/.delimit/ledger/.
-    Cross-model handoff fix: Codex and Gemini were writing to $PWD/.delimit/ledger/
-    which caused ledger fragmentation. All models must use the same central location
-    so Claude, Codex, and Gemini see the same items.
-    The central ledger at ~/.delimit/ledger/ is the source of truth.
-    Per-project .delimit/ dirs are for policies and config only, not ledger state.
+    """Resolve the ledger directory for a project, with Plan-C auto-detect.
+    Resolution order (LED-1188 D3, deliberation att_f86e1f51110e8ed6):
+      1. Detect venture from project_path -> canonical slug (delimit,
+         wire-report, domainvested, livetube, stake-one).
+      2. If LEDGER_V2_DIR / <slug> / operations.jsonl exists, return that
+         per-venture sub-ledger. (Plan-C staged but not yet swapped.)
+      3. If CENTRAL_LEDGER_DIR / <slug> / operations.jsonl exists, return
+         that per-venture sub-ledger. (Plan-C swapped.)
+      4. Fall back to CENTRAL_LEDGER_DIR (legacy single-file layout).
+    Cross-model handoff fix (still enforced): Codex and Gemini were writing to
+    $PWD/.delimit/ledger/ which caused ledger fragmentation. The central
+    ~/.delimit/ledger/ tree (or its Plan-C-partitioned form) remains the
+    single source of truth — per-project .delimit/ dirs are for policies and
+    config only.
     """
+    # Quick exit: legacy callers that pass venture="" / project="." and
+    # have no Plan-C tree on disk get the original single-file layout.
+    if not LEDGER_V2_DIR.exists() and not (CENTRAL_LEDGER_DIR / "delimit").exists():
+        return CENTRAL_LEDGER_DIR
+    info = _detect_venture(project_path)
+    slug = _canonical_venture_slug(info.get("name", ""))
+    if slug is None:
+        return CENTRAL_LEDGER_DIR
+    # Plan-C staged: ledger-v2/<slug>/
+    staged = LEDGER_V2_DIR / slug
+    if (staged / "operations.jsonl").exists():
+        return staged
+    # Plan-C swapped: ledger/<slug>/
+    swapped = CENTRAL_LEDGER_DIR / slug
+    if (swapped / "operations.jsonl").exists():
+        return swapped
+    # No partitioned tree for this venture — fall back to the central
+    # legacy layout (operations.jsonl + strategy.jsonl directly in ledger/).
     return CENTRAL_LEDGER_DIR
@@ -235,6 +312,90 @@ def _append(path: Path, entry: Dict) -> Dict:
     return entry
+# ── LED-877 signal guard ─────────────────────────────────────────────
+# Sources that originate from sensed observations (social/strategy scans).
+# Centralized so the guard logic is in one place even if more prefixes are
+# added later (e.g. github_sense, reddit_sense).
+_SENSED_SOURCE_PREFIXES = ("social_scan", "social_strategy")
+def _check_source_is_ledger_item(
+    source: str,
+    *,
+    purpose: str = "promote_to_ledger",
+    title: str = "",
+    ledger: str = "",
+) -> None:
+    """LED-877 signal guard.
+    Sensed observations (``source='social_scan:...'``) MUST NOT land in
+    the ledger by default — they belong in the intel signal store.
+    LED-216 Phase 1 split: callers can declare *why* they are checking.
+    ``purpose='promote_to_ledger'`` (default, original strict behavior)
+        Used by ``add_item`` and any code path that actually writes a
+        ledger row. Raises ``ValueError`` on a sensed source unless the
+        ``_DELIMIT_SIGNAL_PROMOTED_BY`` bypass env var is set (which is
+        the explicit promote-to-ledger path).
+    ``purpose='draft_only'``
+        Used by code paths that produce a reply draft from a sensed
+        observation but do NOT promote the signal to the ledger. Drafts
+        are an acceptable consumer of sensed sources, so the guard is a
+        no-op for this purpose.
+    Both purposes still respect ``DELIMIT_SIGNAL_GUARD=shadow`` for the
+    shadow-log fallback used during the LED-877 rollout.
+    """
+    _src_norm = (source or "").strip().lower()
+    if not any(_src_norm.startswith(p) for p in _SENSED_SOURCE_PREFIXES):
+        return  # Not a sensed source; nothing to guard against.
+    if purpose == "draft_only":
+        # Drafts may legitimately reference a sensed observation. The
+        # guard exists to prevent ledger writes, not draft generation.
+        return
+    if purpose != "promote_to_ledger":
+        # Defensive: unknown purpose ⇒ default to strict behavior so a
+        # typo can't accidentally weaken the guard.
+        pass
+    _promoted_by = os.environ.get("_DELIMIT_SIGNAL_PROMOTED_BY", "")
+    if _promoted_by:
+        return  # Explicit promote_to_ledger path; bypass authorized.
+    msg = (
+        f"LED-877 guard: source={source!r} is a sensed observation, not "
+        f"a ledger item. Use ai.sensing.signal_store.ingest() instead. "
+        f"Promote explicitly via promote_to_ledger(signal_id=...)."
+    )
+    _guard_mode = os.environ.get("DELIMIT_SIGNAL_GUARD", "enforce").lower()
+    if _guard_mode == "shadow":
+        try:
+            _shadow_log = Path.home() / ".delimit" / "logs" / "signal_guard_shadow.jsonl"
+            _shadow_log.parent.mkdir(parents=True, exist_ok=True)
+            with _shadow_log.open("a") as _f:
+                _f.write(json.dumps({
+                    "ts": time.strftime("%Y-%m-%dT%H:%M:%SZ"),
+                    "title": title,
+                    "source": source,
+                    "ledger": ledger,
+                    "purpose": purpose,
+                    "msg": msg,
+                }) + "\n")
+        except Exception:
+            pass
+        # fall through (shadow mode does not raise)
+        return
+    raise ValueError(msg)
 def add_item(
     title: str,
     ledger: str = "ops",
@@ -259,44 +420,63 @@ def add_item(
     observations cannot land in the ledger. Observations belong in the intel
     signal store (ai/sensing/signal_store.py). Bypass via env var for the
     promote_to_ledger path: _DELIMIT_SIGNAL_PROMOTED_BY=<who>.
+    LED-216 Phase 1: the guard is now reusable via
+    ``_check_source_is_ledger_item(..., purpose='draft_only')`` for code
+    paths that produce reply drafts from sensed observations without
+    promoting the underlying signal to the ledger.
     """
-    _src_norm = (source or "").strip().lower()
-    _promoted_by = os.environ.get("_DELIMIT_SIGNAL_PROMOTED_BY", "")
-    _guard_mode = os.environ.get("DELIMIT_SIGNAL_GUARD", "enforce").lower()
-    if _src_norm.startswith("social_scan") or _src_norm.startswith("social_strategy"):
-        if not _promoted_by:
-            msg = (
-                f"LED-877 guard: source={source!r} is a sensed observation, not "
-                f"a ledger item. Use ai.sensing.signal_store.ingest() instead. "
-                f"Promote explicitly via promote_to_ledger(signal_id=...)."
-            )
-            if _guard_mode == "shadow":
-                try:
-                    _shadow_log = Path.home() / ".delimit" / "logs" / "signal_guard_shadow.jsonl"
-                    _shadow_log.parent.mkdir(parents=True, exist_ok=True)
-                    with _shadow_log.open("a") as _f:
-                        _f.write(json.dumps({
-                            "ts": time.strftime("%Y-%m-%dT%H:%M:%SZ"),
-                            "title": title,
-                            "source": source,
-                            "ledger": ledger,
-                            "msg": msg,
-                        }) + "\n")
-                except Exception:
-                    pass
-                # fall through
-            else:
-                raise ValueError(msg)
+    _check_source_is_ledger_item(
+        source,
+        purpose="promote_to_ledger",
+        title=title,
+        ledger=ledger,
+    )
     _ensure(project_path)
     venture = _detect_venture(project_path)
     ledger_dir = _project_ledger_dir(project_path)
     path = ledger_dir / ("strategy.jsonl" if ledger == "strategy" else "operations.jsonl")
+    # LED-824: ID-collision fix. The Plan-C resolver routes delimit-context
+    # queries to ledger-v2/<slug>/. Per-venture ID counters used to scan
+    # only the active sub-ledger, so newly-created items could collide with
+    # IDs already used in the legacy CENTRAL_LEDGER_DIR root files. Now we
+    # union all known IDs across (a) the resolved sub-ledger AND (b) every
+    # peer sub-ledger AND (c) the legacy root, then pick the next free.
     items = _read_ledger(path)
     prefix = "STR" if ledger == "strategy" else "LED"
-    existing_ids = [i.get("id", "") for i in items if i.get("type") != "update"]
-    num = len(existing_ids) + 1
+    existing_ids = {i.get("id", "") for i in items if i.get("type") != "update"}
+    # Union with all peer files in ledger-v2/* and the legacy root files,
+    # for both strategy and operations ledgers (an LED-N could collide
+    # whether it lives in operations or strategy in any sub-ledger).
+    filename = "strategy.jsonl" if ledger == "strategy" else "operations.jsonl"
+    candidate_paths: list[Path] = []
+    if LEDGER_V2_DIR.exists():
+        for sub in LEDGER_V2_DIR.iterdir():
+            if sub.is_dir():
+                candidate_paths.append(sub / filename)
+    candidate_paths.append(CENTRAL_LEDGER_DIR / filename)
+    for cand in candidate_paths:
+        if cand == path:
+            continue  # already scanned
+        if not cand.exists():
+            continue
+        try:
+            for entry in _read_ledger(cand):
+                if entry.get("type") == "update":
+                    continue
+                eid = entry.get("id", "")
+                if eid:
+                    existing_ids.add(eid)
+        except Exception:
+            # Best-effort: a malformed peer file shouldn't block id assignment
+            continue
+    # Walk forward from len()+1 until we find a non-colliding slot.
+    num = len(items) + 1
     while f"{prefix}-{num:03d}" in existing_ids:
         num += 1
     item_id = f"{prefix}-{num:03d}"

package/gateway/ai/license.py CHANGED Viewed

@@ -227,3 +227,29 @@ except ImportError:
         LICENSE_FILE.parent.mkdir(parents=True, exist_ok=True)
         LICENSE_FILE.write_text(json.dumps(license_data, indent=2))
         return {"status": "activated", "tier": "pro", "message": "Activated (offline fallback). Will validate on next network access."}
+# ─── LED-2060 (P1): test-mode license bypass ─────────────────────────────
+# tests/conftest.py sets DELIMIT_TEST_MODE=1 at session start. Without this
+# wrapper, every test that exercises a Pro tool got back a premium_required
+# error and asserted-against-the-wrong-shape, blocking CI on every PR.
+# Bypass is scoped: only active when the env var is explicitly set, only
+# returns None (the "no gate" sentinel), and wraps both compiled-binary
+# and fallback paths. Customers never hit this path because their
+# environments don't set DELIMIT_TEST_MODE.
+import os as _os
+_original_require_premium = require_premium  # type: ignore[has-type]
+_original_is_premium = is_premium  # type: ignore[has-type]
+def require_premium(tool_name: str):  # type: ignore[no-redef]
+    if _os.environ.get("DELIMIT_TEST_MODE") == "1":
+        return None
+    return _original_require_premium(tool_name)
+def is_premium() -> bool:  # type: ignore[no-redef]
+    if _os.environ.get("DELIMIT_TEST_MODE") == "1":
+        return True
+    return _original_is_premium()

package/gateway/ai/notify.py CHANGED Viewed

@@ -158,6 +158,30 @@ def _record_notification(entry: Dict[str, Any]) -> None:
         logger.warning("Failed to record notification: %s", e)
+_QUARANTINE_FILE = Path.home() / ".delimit" / "notifications_quarantine.jsonl"
+def _quarantine_record(entry: Dict[str, Any]) -> None:
+    """Log a notification that was suppressed by the test-mode / skip-marker
+    guard in send_notification(). The would-be email is NOT delivered;
+    this file is for audit only.
+    Added 2026-05-01 after gateway pytest runs were repeatedly leaking
+    [Test] / [Test Subject] / [DELIMIT_TEST_MODE=1 skipped] emails into
+    the founder's real inbox via test paths that called send_notification
+    without stubbing.
+    """
+    import datetime as _dt
+    try:
+        _QUARANTINE_FILE.parent.mkdir(parents=True, exist_ok=True)
+        entry = {**entry, "ts": _dt.datetime.now(_dt.timezone.utc).isoformat()}
+        with open(_QUARANTINE_FILE, "a", encoding="utf-8") as f:
+            f.write(json.dumps(entry) + "\n")
+    except OSError:
+        # Quarantine log failure must not crash the caller.
+        pass
 def record_owner_action(entry: Dict[str, Any]) -> None:
     """Append an owner-action record for dashboard and async fanout."""
     try:
@@ -1041,9 +1065,17 @@ def _enforce_email_protocol(subject: str, message: str, event_type: str) -> tupl
     """Validate and fix email against the protocol. Returns (subject, message, warnings)."""
     warnings = []
-    # 1. Subject must have a valid prefix bracket
-    if not any(subject.startswith(p) for p in _VALID_SUBJECT_PREFIXES):
-        # Try to infer from event_type
+    # 1. Subject must have SOME bracket prefix (e.g. [DONE], [POSTED], [FIX])
+    # so the founder can triage on mobile.
+    #
+    # Founder-tone fix 2026-04-28: previously the validator hard-rejected any
+    # bracket prefix not in _VALID_SUBJECT_PREFIXES and injected [INFO] in
+    # front, producing subjects like "[INFO] [DONE] LED-2056 fixed". The
+    # injected prefix overrode the caller's intent and bloated the subject.
+    # Now any `[WORD]` prefix (uppercase short tag) is accepted as-is, and
+    # we only inject when there's no bracket at all.
+    _has_any_bracket_prefix = bool(_re.match(r"^\[[A-Z][A-Z0-9_-]{0,15}\]\s", subject))
+    if not _has_any_bracket_prefix:
         # LED-969: customer-facing emails should not get bracket prefixes.
         # Any event_type starting with "customer_" is external-facing and
         # the subject should be sent as-is (clean, professional).
@@ -1135,6 +1167,39 @@ def send_notification(
     if not message:
         return {"error": "message is required"}
+    # ── Contaminated-content guard ────────────────────────────────────
+    # Every gateway pytest run was spamming the founder's real inbox via
+    # tests that called send_notification without stubbing SMTP. Two
+    # failure modes observed (2026-05-01):
+    #   1. Bare test invocations (subject="Test", message="test")
+    #   2. Social drafts where _call_model returned the
+    #      "[X skipped under DELIMIT_TEST_MODE=1 ...]" sentinel and the
+    #      sentinel string ended up as the draft body.
+    # Either is a noise/leak event. Refuse to send; log to a quarantine
+    # JSONL so the would-be content is auditable.
+    #
+    # Surgical match — only on the specific leaked shapes. Tests that
+    # correctly mock smtplib.SMTP keep working (their mock fires inside
+    # send_email, after this guard, and returns a fake delivered=True).
+    if channel in ("email", "webhook", "slack", "telegram"):
+        body = message or ""
+        subj = subject or ""
+        leak_match = (
+            "skipped under DELIMIT_TEST_MODE" in body
+            or "DELIMIT_TEST_MODE=1" in body
+            or (subj.strip().lower() == "test" and body.strip().lower() == "test")
+            or (subj.strip().lower() == "test subject" and body.strip().lower() == "test body")
+        )
+        if leak_match:
+            _quarantine_record({
+                "reason": "leaked_shape",
+                "channel": channel,
+                "subject": subj[:100],
+                "event_type": event_type,
+                "to": to,
+            })
+            return {"skipped": "leaked shape detected — not sent (audit: ~/.delimit/notifications_quarantine.jsonl)"}
     # Enforce email protocol for all email notifications
     protocol_warnings = []
     if channel == "email":

package/gateway/ai/reddit_proxy.py CHANGED Viewed

@@ -1,6 +1,7 @@
 import json
 import logging
 import os
+import time
 import urllib.parse
 import urllib.request
 from pathlib import Path
@@ -8,6 +9,62 @@ from typing import Any, Dict, List, Optional
 logger = logging.getLogger("delimit.ai.reddit_proxy")
+# LED-2068: freshness ceiling. PullPush stopped ingesting around 2025-05-19;
+# the residential proxy gets 403 from Reddit on datacenter IPs; direct fetch
+# is blocked. ALL three tiers can return stale archive data on any given
+# fetch, and stale data is worse than no data for engagement discovery
+# (drafting against year-old threads burns trust). Default to a 14-day
+# freshness ceiling — anything older is dropped before returning.
+#
+# Override via DELIMIT_REDDIT_MAX_AGE_DAYS (set to a large number to disable).
+DEFAULT_MAX_AGE_DAYS = 14
+TIER_PROXY = "proxy"
+TIER_PULLPUSH = "pullpush"
+TIER_DIRECT = "direct"
+def _max_age_seconds() -> float:
+    raw = os.environ.get("DELIMIT_REDDIT_MAX_AGE_DAYS", "").strip()
+    if raw:
+        try:
+            return max(0.0, float(raw)) * 86400.0
+        except ValueError:
+            pass
+    return DEFAULT_MAX_AGE_DAYS * 86400.0
+def _stamp_and_filter(posts: List[Dict[str, Any]], tier: str, subreddit: str) -> List[Dict[str, Any]]:
+    """Tag each post with _source_tier and drop anything older than the
+    freshness ceiling. Returns kept posts. Also logs the drop count for
+    debugging stale-archive regressions (LED-2068)."""
+    if not posts:
+        return []
+    now = time.time()
+    max_age = _max_age_seconds()
+    if max_age <= 0:
+        cutoff = 0.0
+    else:
+        cutoff = now - max_age
+    kept: List[Dict[str, Any]] = []
+    dropped = 0
+    for p in posts:
+        try:
+            created = float(p.get("created_utc") or 0)
+        except (TypeError, ValueError):
+            created = 0.0
+        if created and created >= cutoff:
+            p["_source_tier"] = tier
+            kept.append(p)
+        else:
+            dropped += 1
+    if dropped:
+        logger.info(
+            "reddit_proxy: dropped %d/%d stale post(s) from %s tier for r/%s "
+            "(freshness ceiling=%.1fd)",
+            dropped, len(posts), tier, subreddit, max_age / 86400.0,
+        )
+    return kept
 def _get_proxy_config() -> Dict[str, str]:
     """Load proxy config from private secrets or environment.
@@ -43,10 +100,12 @@ def _get_proxy_config() -> Dict[str, str]:
 def fetch_subreddit(subreddit: str, sort: str = "new", limit: int = 10) -> List[Dict[str, Any]]:
     """
     Fetch posts from a single subreddit with fallback chain.
-    Returns standardized post dicts.
+    Returns standardized post dicts. Each post is tagged with _source_tier
+    indicating which fallback served it, and stale posts (older than the
+    freshness ceiling per LED-2068) are dropped before returning.
     """
     reddit_url = f"https://www.reddit.com/r/{subreddit}/{sort}.json?limit={limit}&raw_json=1"
     # 1. Try Local Proxy (Residential IP)
     proxy_cfg = _get_proxy_config()
     proxy_url = proxy_cfg.get("proxy_url")
@@ -62,29 +121,48 @@ def fetch_subreddit(subreddit: str, sort: str = "new", limit: int = 10) -> List[
             with urllib.request.urlopen(req, timeout=10) as resp:
                 body = json.loads(resp.read().decode())
                 children = body.get("data", {}).get("children", [])
-                return [c.get("data", {}) for c in children if c.get("data")]
+                raw = [c.get("data", {}) for c in children if c.get("data")]
+                kept = _stamp_and_filter(raw, TIER_PROXY, subreddit)
+                if kept:
+                    return kept
+                # If the proxy succeeded but returned only stale data, fall
+                # through to next tier rather than returning empty — gives
+                # us a chance to find fresh data elsewhere.
         except Exception as e:
             logger.debug(f"Local proxy failed for r/{subreddit}: {e}")
-    # 2. Fallback: PullPush API (Public Archive)
+    # 3. Try Direct (often blocked on datacenter IPs, but fast when it works
+    # and is the only tier currently capable of serving fresh data — PullPush
+    # stopped ingesting ~2025-05-19, residential proxy 403s from datacenter).
+    # Direct moved AHEAD of PullPush in the chain post-LED-2068 because a
+    # blocked direct fetch is recoverable via fallback, while a successful
+    # PullPush serves stale archive that pollutes downstream classifiers.
     try:
-        pp_url = f"https://api.pullpush.io/reddit/search/submission/?subreddit={subreddit}&size={limit}&sort=desc"
-        req = urllib.request.Request(pp_url, headers={"User-Agent": "Delimit/1.0"})
-        with urllib.request.urlopen(req, timeout=10) as resp:
+        req = urllib.request.Request(reddit_url, headers={"User-Agent": "Mozilla/5.0 (Delimit)"})
+        with urllib.request.urlopen(req, timeout=5) as resp:
             body = json.loads(resp.read().decode())
-            return body.get("data", [])
+            children = body.get("data", {}).get("children", [])
+            raw = [c.get("data", {}) for c in children if c.get("data")]
+            kept = _stamp_and_filter(raw, TIER_DIRECT, subreddit)
+            if kept:
+                return kept
     except Exception as e:
-        logger.debug(f"PullPush fallback failed for r/{subreddit}: {e}")
+        logger.debug(f"Direct fetch failed for r/{subreddit}: {e}")
-    # 3. Fallback: Direct (Often blocked on servers)
+    # 2. Last-resort: PullPush archive. Currently stale (May 2025 ceiling)
+    # but the freshness filter will drop everything if so — leaves the door
+    # open for the day PullPush resumes ingesting fresh data.
     try:
-        req = urllib.request.Request(reddit_url, headers={"User-Agent": "Mozilla/5.0 (Delimit)"})
-        with urllib.request.urlopen(req, timeout=5) as resp:
+        pp_url = f"https://api.pullpush.io/reddit/search/submission/?subreddit={subreddit}&size={limit}&sort=desc"
+        req = urllib.request.Request(pp_url, headers={"User-Agent": "Delimit/1.0"})
+        with urllib.request.urlopen(req, timeout=10) as resp:
             body = json.loads(resp.read().decode())
-            children = body.get("data", {}).get("children", [])
-            return [c.get("data", {}) for c in children if c.get("data")]
+            raw = body.get("data", []) or []
+            kept = _stamp_and_filter(raw, TIER_PULLPUSH, subreddit)
+            if kept:
+                return kept
     except Exception as e:
-        logger.warning(f"Direct fetch failed for r/{subreddit}: {e}")
+        logger.debug(f"PullPush fallback failed for r/{subreddit}: {e}")
     return []

package/gateway/ai/reddit_scanner.py CHANGED Viewed

@@ -96,7 +96,34 @@ _PAIN_TO_RELEVANCE: Dict[str, str] = {
     "cost": "new_opportunity",                 # pricing transparency / cost tracking
 }
-PROXY_URL = "http://127.0.0.1:4819/reddit-fetch"
+def _load_proxy_url() -> str:
+    """Load proxy URL from the canonical reddit-proxy.json secrets file.
+    Single source of truth shared with ai.reddit_proxy. Falls back to the
+    canonical SSH-tunnel localhost endpoint if the secrets file is missing.
+    LED-2068b note: the residential proxy is reached via an SSH local-port-
+    forward — `127.0.0.1:4819/reddit-fetch` is the LOCAL endpoint of the
+    tunnel into the *actual* residential machine that performs the Reddit
+    fetch. There is also a local Python wrapper at `:8787/fetch` (systemd
+    unit `delimit-reddit-proxy.service`) — that one runs on this datacenter
+    VM and gets 403 from Reddit's anti-bot wall, so it serves nothing
+    useful. Do not change the default away from 4819 without first
+    confirming the SSH tunnel is no longer the canonical path.
+    """
+    try:
+        secrets_path = Path.home() / ".delimit" / "secrets" / "reddit-proxy.json"
+        if secrets_path.exists():
+            data = json.loads(secrets_path.read_text())
+            url = (data.get("proxy_url") or "").strip()
+            if url:
+                return url
+    except Exception:
+        pass
+    return "http://127.0.0.1:4819/reddit-fetch"
+PROXY_URL = _load_proxy_url()
 SCANS_DIR = Path.home() / ".delimit" / "reddit_scans"
 VENTURES_CONFIG_PATH = Path.home() / ".delimit" / "social_target_ventures.json"
@@ -143,29 +170,20 @@ def _fetch_subreddit(
     The proxy endpoint expects a query parameter ``url`` containing the
     actual Reddit JSON URL.  Returns a list of extracted post dicts.
     """
-    reddit_url = f"https://www.reddit.com/r/{subreddit}/{sort}.json?limit={limit}&raw_json=1"
-    fetch_url = f"{proxy_url}?url={urllib.request.quote(reddit_url, safe='')}"
-    req = urllib.request.Request(
-        fetch_url,
-        headers={"User-Agent": "delimit-scanner/1.0", "Accept": "application/json"},
-    )
+    # Delegate to ai.reddit_proxy.fetch_subreddit which has the canonical
+    # 3-tier fallback chain (residential proxy → direct → PullPush archive).
+    # Datacenter IPs get 403 from Reddit even with auth; the freshness filter
+    # in reddit_proxy drops stale-archive results so the scanner returns
+    # honest empty rather than fake old data.
+    from ai.reddit_proxy import fetch_subreddit as _proxy_fetch
     try:
-        with urllib.request.urlopen(req, timeout=15) as resp:
-            body = json.loads(resp.read().decode())
+        raw = _proxy_fetch(subreddit, sort=sort, limit=limit) or []
     except Exception as exc:
         logger.warning("Failed to fetch r/%s: %s", subreddit, exc)
         return []
-    # Reddit returns {"data": {"children": [...]}}
-    children = []
-    if isinstance(body, dict):
-        children = body.get("data", {}).get("children", [])
     posts: List[Dict[str, Any]] = []
-    for child in children:
-        d = child.get("data", {})
+    for d in raw:
         if not d:
             continue
         # Skip stickied