npm - delimit-cli - Versions diffs - 4.6.0 → 4.6.2 - Mend

delimit-cli 4.6.0 → 4.6.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (33) hide show

package/CHANGELOG.md +71 -8
package/bin/delimit-cli.js +59 -9
package/bin/delimit-setup.js +7 -3
package/gateway/ai/agent_dispatch.py +5 -0
package/gateway/ai/backends/gateway_core.py +6 -0
package/gateway/ai/backends/git_health.py +175 -0
package/gateway/ai/backends/memory_bridge.py +210 -53
package/gateway/ai/backends/tools_infra.py +93 -0
package/gateway/ai/backends/tools_real.py +53 -7
package/gateway/ai/cli_contract.py +185 -0
package/gateway/ai/governance.py +181 -0
package/gateway/ai/heartbeat.py +290 -0
package/gateway/ai/ledger_manager.py +81 -4
package/gateway/ai/ledger_proof.py +127 -0
package/gateway/ai/license.py +132 -47
package/gateway/ai/license_core.cpython-310-x86_64-linux-gnu.so +0 -0
package/gateway/ai/license_core.pyi +1 -1
package/gateway/ai/outreach_loop_daemon.py +349 -0
package/gateway/ai/outreach_substantive.py +768 -7
package/gateway/ai/pro_tools.yaml +167 -0
package/gateway/ai/reddit_scanner.py +7 -1
package/gateway/ai/server.py +295 -116
package/gateway/ai/session_phoenix.py +121 -0
package/gateway/ai/social_queue.py +166 -10
package/gateway/ai/tenant_auth.py +329 -0
package/gateway/ai/tenant_data.py +339 -0
package/gateway/ai/tenant_paths.py +150 -0
package/gateway/core/diff_engine_v2.py +517 -54
package/gateway/core/semver_classifier.py +52 -6
package/package.json +4 -1
package/scripts/build-license-core.sh +0 -85
package/scripts/security-check.sh +0 -66
package/scripts/test-license-core-so.sh +0 -107

package/gateway/ai/social_queue.py CHANGED Viewed

@@ -48,6 +48,55 @@ QUEUE_FILE = Path.home() / ".delimit" / "social_scan_queue.jsonl"
 DEFAULT_DEDUPE_HOURS = 24 * 7  # don't re-queue a fingerprint within 7 days
 DEFAULT_EXPIRE_HOURS = 24 * 7  # entries older than 7 days roll to expired
+# Per-platform freshness cap at claim_pending time. Reddit posts decay
+# in comment-visibility VERY fast (Boris-Cherny LED-1335: <6h high-yield,
+# <12h marginal, ~zero after 24h), so drafting on a 3-day-old post wastes
+# a brand-account engagement. Other platforms (github, devto) have longer
+# half-lives — issue threads can be relevant weeks later — so we don't
+# apply the freshness cap there.
+#
+# Founder regression report 2026-05-18: drafts were being generated on
+# posts queued 3+ days earlier because the queue is FIFO and the drafter
+# falls behind the scanner. This filter ensures claim_pending() never
+# returns reddit entries whose `queued_at` is more than CLAIM_FRESHNESS_HOURS
+# old, regardless of queue position.
+CLAIM_FRESHNESS_HOURS_BY_PLATFORM: Dict[str, int] = {
+    "reddit": 24,
+    # Phase C (2026-05-18): github targets fail ~96% of the time
+    # (historical 3256/3389 marked drafted_failed) and the queue grew
+    # to 1122 pending dominating FIFO order. 24h cap drains stale crud
+    # while preserving fresh github targets the drafter would actually
+    # process. Without this, github starves reddit/x/hn even with the
+    # round-robin claim (see CLAIM_MAX_PER_PLATFORM).
+    "github": 24,
+}
+# Phase C (2026-05-18): round-robin claim_pending across platforms so a
+# noisy platform doesn't starve quieter ones. Drafter calls claim_pending
+# with limit=10; pre-Phase-C this returned 10 oldest entries regardless
+# of platform, which with github=1122 pending meant 10/10 github and
+# reddit drafts never fired. With this cap, drafter sees a balanced mix.
+# Within-platform order is FIFO (oldest pending first) EXCEPT for
+# platforms in CLAIM_LIFO_PLATFORMS — see below.
+CLAIM_MAX_PER_PLATFORM: int = 3
+# Phase D (2026-05-18 founder request): "we need first-poster advantage."
+# For time-critical engagement platforms, the drafter should pick the
+# FRESHEST pending entry, not the oldest. Reddit comment visibility decays
+# sharply after the first 15-30 minutes of a thread (the first 5-10 visible
+# comments capture the bulk of upvotes + clickthrough). Pre-Phase-D's
+# within-reddit FIFO meant the drafter pulled 22-24h-old entries (near
+# the freshness cap) instead of brand-new ones.
+#
+# LIFO-within-platform reverses that for the listed platforms: within the
+# eligible bucket, sort newest queued_at first. Across platforms, round-
+# robin still applies. Entries that get displaced by newer ones are
+# naturally cleaned up by the freshness cap groomer.
+#
+# Other platforms (github, devto, etc.) keep FIFO — their content has a
+# longer half-life and oldest-first is the right discipline there.
+CLAIM_LIFO_PLATFORMS: set = {"reddit"}
 PENDING = "pending"
 DRAFTED = "drafted"
 DRAFTED_FAILED = "drafted_failed"
@@ -184,23 +233,130 @@ def enqueue(target: Dict[str, Any], dedupe_hours: int = DEFAULT_DEDUPE_HOURS) ->
 def claim_pending(platform: Optional[str] = None, limit: int = 20) -> List[Dict[str, Any]]:
-    """Return up to ``limit`` pending entries, optionally filtered by platform.
+    """Return up to ``limit`` pending entries, with round-robin balancing
+    across platforms when ``platform`` is None.
     Read-only — does NOT mutate state. The caller must call ``mark_drafted``
-    or ``mark_failed`` once it processes the entry. Returns oldest-first
-    (FIFO) so the queue drains in scan order.
+    or ``mark_failed`` once it processes the entry.
+    Round-robin (Phase C, 2026-05-18): without a platform filter, returns
+    at most CLAIM_MAX_PER_PLATFORM entries from any single platform per
+    call. Within each platform, oldest-first (FIFO). Across platforms,
+    interleaved so the drafter sees a balanced mix instead of saturating
+    on whichever platform has the deepest backlog.
+    With an explicit ``platform`` filter, behaves as FIFO over that single
+    platform's pending entries (no per-platform cap, since the caller is
+    already targeting one platform).
+    Freshness cap (Phase A, 2026-05-18): entries whose platform has a
+    CLAIM_FRESHNESS_HOURS_BY_PLATFORM cap and whose ``queued_at`` is
+    older than that cap are skipped silently. Those stale entries stay
+    in the file with status=pending; the separate ``expire_stale_for_
+    freshness_caps`` pass flips them so they don't pile up forever.
     """
-    out: List[Dict[str, Any]] = []
-    # Build a list because we want oldest-first; JSONL append order = FIFO.
-    for entry in _iter_entries():
+    now = datetime.now(timezone.utc)
+    def _is_eligible(entry: Dict[str, Any]) -> bool:
         if entry.get("status") != PENDING:
-            continue
+            return False
         if platform and entry.get("platform") != platform:
+            return False
+        cap = CLAIM_FRESHNESS_HOURS_BY_PLATFORM.get(entry.get("platform"))
+        if cap is not None:
+            qts = _parse_iso(entry.get("queued_at"))
+            if qts is not None and (now - qts) > timedelta(hours=cap):
+                return False
+        return True
+    # Single-platform filter path: keep legacy strict-FIFO behavior.
+    if platform:
+        out: List[Dict[str, Any]] = []
+        for entry in _iter_entries():
+            if not _is_eligible(entry):
+                continue
+            out.append(entry)
+            if len(out) >= limit:
+                break
+        return out
+    # Round-robin path: group by platform first (preserving within-platform
+    # FIFO via iteration order), then cap per-platform and interleave.
+    # CRITICAL Phase D change: for CLAIM_LIFO_PLATFORMS, collect the full
+    # eligible set per platform first, then sort newest-first BEFORE
+    # truncation — otherwise the early-break truncation in the FIFO path
+    # would keep the oldest entries even when we want the newest.
+    by_platform: Dict[str, List[Dict[str, Any]]] = {}
+    for entry in _iter_entries():
+        if not _is_eligible(entry):
             continue
-        out.append(entry)
-        if len(out) >= limit:
+        plat = entry.get("platform") or "unknown"
+        by_platform.setdefault(plat, []).append(entry)
+    # Sort + truncate each bucket.
+    for plat in list(by_platform.keys()):
+        if plat in CLAIM_LIFO_PLATFORMS:
+            # Newest queued_at first. Parse-failures sort last so a
+            # corrupted-timestamp entry doesn't block legitimate fresh
+            # entries from being claimed.
+            by_platform[plat].sort(
+                key=lambda e: _parse_iso(e.get("queued_at")) or datetime.min.replace(tzinfo=timezone.utc),
+                reverse=True,
+            )
+        # FIFO platforms keep insertion order (which is JSONL-append =
+        # oldest first); no sort needed.
+        by_platform[plat] = by_platform[plat][:CLAIM_MAX_PER_PLATFORM]
+    # Interleave: round-robin across platforms in alphabetical order for
+    # determinism. Stop when limit is reached or all buckets are drained.
+    out2: List[Dict[str, Any]] = []
+    plat_order = sorted(by_platform.keys())
+    idx = {p: 0 for p in plat_order}
+    while len(out2) < limit:
+        added = False
+        for p in plat_order:
+            if idx[p] < len(by_platform[p]):
+                out2.append(by_platform[p][idx[p]])
+                idx[p] += 1
+                added = True
+                if len(out2) >= limit:
+                    break
+        if not added:
             break
-    return out
+    return out2
+def expire_stale_for_freshness_caps() -> Dict[str, int]:
+    """Roll pending entries past their platform's claim freshness cap to expired.
+    Companion to ``claim_pending``'s in-flight skip: without this, the
+    queue file fills up with pending-but-permanently-skipped entries
+    that we still re-scan on every claim. Returns a dict
+    ``{platform: count_expired}`` for observability.
+    """
+    now = datetime.now(timezone.utc)
+    entries = list(_iter_entries())
+    if not entries:
+        return {}
+    flipped: Dict[str, int] = {}
+    changed = False
+    for entry in entries:
+        if entry.get("status") != PENDING:
+            continue
+        plat = entry.get("platform")
+        cap = CLAIM_FRESHNESS_HOURS_BY_PLATFORM.get(plat)
+        if cap is None:
+            continue
+        qts = _parse_iso(entry.get("queued_at"))
+        if qts is None or (now - qts) <= timedelta(hours=cap):
+            continue
+        entry["status"] = EXPIRED
+        entry["error"] = f"expired_freshness_cap_{cap}h"
+        flipped[plat] = flipped.get(plat, 0) + 1
+        changed = True
+    if changed:
+        _atomic_rewrite(entries)
+    return flipped
 def _update_entry(fingerprint: str, mutator) -> bool:

package/gateway/ai/tenant_auth.py ADDED Viewed

@@ -0,0 +1,329 @@
+"""LED-2268 P0 Phase 0.1 — gateway-side tenant API key validator.
+The dashboard at app.delimit.ai (`/dashboard/api-keys`) issues per-user
+keys with the `dlmt_<43-char-base64url>` shape. Only the sha256 of the
+plaintext is stored — see supabase migration 034 + lib/user-api-keys.ts.
+This module owns the gateway side of that contract:
+  - parse `Authorization: ApiKey dlmt_xxx` from an HTTP header
+  - sha256-hash the plaintext
+  - look up the hash in `user_api_keys` via service-role Supabase REST
+  - return `{user_id, scope, key_id}` for a live (non-revoked) match
+  - return None for anything else (bad shape, no match, revoked, etc.)
+Phase 0.1 stays minimal on purpose:
+  - no `last_used_at` write (deferred — adds a write per call; Phase 0.2)
+  - no cache (every call hits Supabase; fine at current volume)
+  - no JWT, no rotation grace period — soft-delete is hard once set
+Phase 0.2 will add tenant-scoped data routing (per-user data root under
+~/.delimit/tenants/<user_id>/); this module only resolves identity.
+"""
+from __future__ import annotations
+import hashlib
+import json
+import logging
+import os
+import threading
+import urllib.error
+import urllib.parse
+import urllib.request
+from datetime import datetime, timezone
+from typing import Optional, TypedDict
+logger = logging.getLogger("delimit.tenant_auth")
+# Process-local counter for failed last_used_at PATCH writes. Lets
+# operators (and future /heartbeats-style health surfaces) see whether
+# the audit-write fire-and-forget is silently dropping a sustained
+# burst — debug log on every error is too quiet to notice in journalctl
+# during a Supabase outage. Reset only on process restart by design.
+_last_used_dropped_count = 0
+_last_used_dropped_lock = threading.Lock()
+# Log at INFO every Nth drop so a sustained outage surfaces without
+# flooding the journal on transient blips. First drop is also INFO so
+# the first sign of trouble is visible.
+_LAST_USED_DROP_LOG_EVERY = 10
+def get_last_used_dropped_count() -> int:
+    """How many last_used_at PATCH writes have been dropped since process start.
+    Read-only; intended for /heartbeats, future metrics endpoints, and
+    operational tooling. NOT a security signal — dropped writes don't
+    affect auth correctness, only audit completeness.
+    """
+    with _last_used_dropped_lock:
+        return _last_used_dropped_count
+class TenantIdentity(TypedDict):
+    """Resolved tenant identity for a presented API key."""
+    user_id: str
+    scope: str
+    key_id: str
+# The plaintext shape issued by lib/user-api-keys.ts is `dlmt_` + 43
+# base64url chars (32 random bytes encoded). Reject anything that doesn't
+# fit before hashing — saves a Supabase round-trip on malformed input.
+_KEY_PREFIX = "dlmt_"
+_KEY_PLAINTEXT_LEN_MIN = len(_KEY_PREFIX) + 32  # be lenient on lower bound
+_KEY_PLAINTEXT_LEN_MAX = len(_KEY_PREFIX) + 128  # cap to defeat absurd inputs
+def parse_auth_header(header: str) -> Optional[tuple[str, str]]:
+    """Parse `Authorization` into (scheme, token).
+    Recognizes two schemes:
+      - `Bearer <token>` — existing shared-bearer pattern (founder/system)
+      - `ApiKey <plaintext>` — per-user tenant key (this module's domain)
+    Returns (scheme_lowercase, token) on match, None on anything else.
+    Caller decides which scheme is acceptable for which endpoint.
+    """
+    if not header:
+        return None
+    parts = header.split(None, 1)
+    if len(parts) != 2:
+        return None
+    scheme, token = parts[0].strip().lower(), parts[1].strip()
+    if scheme in ("bearer", "apikey") and token:
+        return (scheme, token)
+    return None
+def _hash_key(plaintext: str) -> str:
+    """sha256(plaintext) as lowercase hex — matches lib/user-api-keys.ts."""
+    return hashlib.sha256(plaintext.encode("utf-8")).hexdigest()
+def _looks_like_tenant_key(plaintext: str) -> bool:
+    """Cheap shape check before we bother Supabase."""
+    if not plaintext.startswith(_KEY_PREFIX):
+        return False
+    n = len(plaintext)
+    return _KEY_PLAINTEXT_LEN_MIN <= n <= _KEY_PLAINTEXT_LEN_MAX
+def validate_api_key(plaintext: str) -> Optional[TenantIdentity]:
+    """Resolve `dlmt_xxx` plaintext to a tenant identity, or None.
+    Returns None for: malformed input, no Supabase config, network
+    failure, no row matched, row marked revoked. Caller treats None as
+    "unauthorized" — never leak why specifically.
+    This function is intentionally synchronous + fire-and-forget on
+    errors. Logs them at debug level. Production audit comes from the
+    request-log layer (each endpoint logs the resolved user_id, not
+    the validator).
+    """
+    if not _looks_like_tenant_key(plaintext):
+        return None
+    supabase_url = os.environ.get("SUPABASE_URL", "").rstrip("/")
+    service_key = os.environ.get("SUPABASE_SERVICE_ROLE_KEY", "")
+    if not supabase_url or not service_key:
+        # If the gateway host hasn't been configured for Supabase, tenant
+        # auth simply doesn't work — the shared-bearer path stays intact.
+        logger.debug("validate_api_key: supabase env not configured")
+        return None
+    key_hash = _hash_key(plaintext)
+    # Active-only lookup: the partial index `idx_user_api_keys_active_hash`
+    # makes this O(log n) and gauarantees revoked keys never match.
+    url = (
+        f"{supabase_url}/rest/v1/user_api_keys"
+        f"?select=id,user_id,scope"
+        f"&key_hash=eq.{urllib.parse.quote(key_hash, safe='')}"
+        f"&revoked_at=is.null"
+        f"&limit=1"
+    )
+    req = urllib.request.Request(
+        url,
+        headers={
+            "apikey": service_key,
+            "Authorization": f"Bearer {service_key}",
+            "Accept": "application/json",
+        },
+    )
+    try:
+        with urllib.request.urlopen(req, timeout=5) as resp:
+            body = resp.read()
+    except urllib.error.HTTPError as e:
+        logger.debug("validate_api_key supabase HTTP %s", getattr(e, "code", "?"))
+        return None
+    except (urllib.error.URLError, OSError, TimeoutError) as e:
+        logger.debug("validate_api_key supabase net err: %s", e)
+        return None
+    try:
+        rows = json.loads(body)
+    except json.JSONDecodeError:
+        logger.debug("validate_api_key non-json response")
+        return None
+    if not isinstance(rows, list) or not rows:
+        return None
+    row = rows[0]
+    if not isinstance(row, dict):
+        return None
+    user_id = row.get("user_id") or ""
+    if not user_id:
+        return None
+    key_id = str(row.get("id") or "")
+    # Phase 0.2: fire-and-forget last_used_at write. Lets operators see
+    # "this key was actually used in the last N hours" in the dashboard
+    # API-keys list, which is important for rotation hygiene (you can
+    # tell which keys are dead before deciding what to revoke).
+    # Backgrounded so the validate path stays as fast as it was in 0.1.
+    if key_id:
+        _fire_last_used_update(supabase_url, service_key, key_id)
+    return TenantIdentity(
+        user_id=str(user_id),
+        scope=str(row.get("scope") or ""),
+        key_id=key_id,
+    )
+def _fire_last_used_update(supabase_url: str, service_key: str, key_id: str) -> None:
+    """Background-thread PATCH to bump last_used_at on a successful validate.
+    Errors are swallowed; the validate path NEVER blocks on this and the
+    foreground response is unaffected. The point is best-effort audit
+    signal, not authorization.
+    The thread is daemonised so a hung Supabase call can't keep the
+    process alive past shutdown.
+    """
+    def _patch():
+        try:
+            url = (
+                f"{supabase_url.rstrip('/')}/rest/v1/user_api_keys"
+                f"?id=eq.{urllib.parse.quote(key_id, safe='')}"
+            )
+            body = json.dumps({
+                "last_used_at": datetime.now(timezone.utc).isoformat(),
+            }).encode("utf-8")
+            req = urllib.request.Request(
+                url,
+                data=body,
+                method="PATCH",
+                headers={
+                    "apikey": service_key,
+                    "Authorization": f"Bearer {service_key}",
+                    "Content-Type": "application/json",
+                    # Prefer: return=minimal — we don't need the row back.
+                    "Prefer": "return=minimal",
+                },
+            )
+            with urllib.request.urlopen(req, timeout=5):
+                pass
+        except Exception as e:  # noqa: BLE001 — fire-and-forget; never raise
+            # Bump the process-local dropped-write counter and log at
+            # INFO every Nth drop (plus the first). Lets a sustained
+            # outage surface in journalctl without spam on blips.
+            global _last_used_dropped_count
+            with _last_used_dropped_lock:
+                _last_used_dropped_count += 1
+                count = _last_used_dropped_count
+            if count == 1 or count % _LAST_USED_DROP_LOG_EVERY == 0:
+                logger.info(
+                    "last_used_at update dropped (cum_dropped=%d): %s",
+                    count, e,
+                )
+            else:
+                logger.debug(
+                    "last_used_at update dropped (cum_dropped=%d): %s",
+                    count, e,
+                )
+    t = threading.Thread(target=_patch, daemon=True, name="delimit-last-used-update")
+    t.start()
+def authenticate(
+    header: str,
+    shared_bearer: str = "",
+    impersonation_header: str = "",
+) -> Optional[dict]:
+    """End-to-end auth resolver for an HTTP request.
+    Returns a dict describing the resolved identity, or None if the
+    request should be rejected. Three accepted-request outcomes:
+      - `{"auth_mode": "bearer", "is_tenant_scoped": False}` — shared-
+        bearer match WITHOUT impersonation. Founder/system access to
+        the shared `~/.delimit/` view. No user_id field present.
+      - `{"auth_mode": "bearer", "is_tenant_scoped": True, "user_id":
+        ..., "scope": "", "key_id": "bearer-impersonation"}` — shared
+        bearer match WITH a valid impersonation header. The trusted
+        BFF/system is acting on behalf of a specific tenant (LED-2268
+        Phase 0.5a, lets the Vercel dashboard read/write tenant data
+        on behalf of a NextAuth-authenticated user without the user
+        ever exposing their plaintext API key to the BFF).
+      - `{"auth_mode": "apikey", "is_tenant_scoped": True, "user_id":
+        ..., "scope": ..., "key_id": ...}` — tenant key match.
+    Trust model: the shared bearer is held only by a SMALL set of
+    trusted clients (Vercel BFF + the gateway host). If it leaks, the
+    blast radius is already total (founder-class access to everything
+    the gateway serves). The impersonation header just lets that
+    bearer be more granular per-request; it does NOT grant access the
+    bearer didn't already have.
+    Order: Bearer first (cheap string compare), then ApiKey (Supabase
+    round-trip). A request can only present one Authorization header,
+    so the order is which-scheme-wins-when-the-shape-fits.
+    """
+    parsed = parse_auth_header(header)
+    if not parsed:
+        return None
+    scheme, token = parsed
+    if scheme == "bearer":
+        if not shared_bearer or token != shared_bearer:
+            return None
+        # Phase 0.5a — optional tenant impersonation. If the BFF/system
+        # presented a tenant header AND it sanitises to a valid segment,
+        # treat as tenant-scoped under that user_id. Validate via the
+        # SAME sanitiser tenant_paths uses for filesystem routing so the
+        # downstream code sees a consistent identity.
+        if impersonation_header:
+            # Lazy import to avoid circular: tenant_paths only needed when
+            # impersonation is actually requested.
+            from . import tenant_paths
+            seg = tenant_paths.safe_user_segment(impersonation_header)
+            if seg is None:
+                # Header was present but garbage. Reject the request
+                # entirely rather than silently falling back to shared
+                # scope — a confused BFF surfacing here is exactly the
+                # class of bug that header validation should catch.
+                logger.info(
+                    "authenticate: bearer + invalid impersonation header rejected: %r",
+                    impersonation_header[:64],
+                )
+                return None
+            # We pass the RAW header value (not the sanitised segment)
+            # downstream so callers see the same user_id shape as the
+            # ApiKey path. tenant_paths.safe_user_segment runs again
+            # inside tenant_data_root for actual fs routing.
+            return {
+                "auth_mode": "bearer",
+                "is_tenant_scoped": True,
+                "user_id": impersonation_header,
+                "scope": "",
+                "key_id": "bearer-impersonation",
+            }
+        return {"auth_mode": "bearer", "is_tenant_scoped": False}
+    if scheme == "apikey":
+        identity = validate_api_key(token)
+        if identity is None:
+            return None
+        return {
+            "auth_mode": "apikey",
+            "is_tenant_scoped": True,
+            "user_id": identity["user_id"],
+            "scope": identity["scope"],
+            "key_id": identity["key_id"],
+        }
+    return None