npm - delimit-cli - Versions diffs - 4.5.7 → 4.5.8 - Mend

delimit-cli 4.5.7 → 4.5.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

package/gateway/ai/backends/gateway_core.py +62 -1
package/gateway/ai/led193_daemon/__init__.py +61 -0
package/gateway/ai/led193_daemon/audit.py +174 -0
package/gateway/ai/led193_daemon/cost.py +133 -0
package/gateway/ai/led193_daemon/executor.py +683 -0
package/gateway/ai/led193_daemon/gate.py +300 -0
package/gateway/ai/led193_daemon/pause.py +83 -0
package/gateway/ai/led193_daemon/picker.py +236 -0
package/gateway/ai/social_capability/current_capabilities.yaml +1 -0
package/gateway/ai/workers/executor.py +18 -9
package/package.json +1 -1

package/gateway/ai/backends/gateway_core.py CHANGED Viewed

@@ -8,6 +8,8 @@ Adapter Boundary Contract v1.0:
 - No schema forking (gateway types are canonical)
 """
+import os
+import re
 import sys
 import json
 import logging
@@ -16,6 +18,58 @@ from typing import Any, Dict, List, Optional
 logger = logging.getLogger("delimit.ai.gateway_core")
+# LED-1265: identity-string redaction filter for changelog output. Patterns are
+# loaded from the DELIMIT_CHANGELOG_REDACT_PATTERNS env var (semicolon-separated
+# regex|replacement pairs, e.g. "FOO|[redacted];BAR|[redacted]"), NOT hardcoded.
+# Why env-var-driven: hardcoding the patterns inline would itself constitute the
+# leak the filter exists to prevent — the patterns must match the strings being
+# redacted, so committing them to source recreates the leak. When the env var is
+# unset (customer machines), this is a no-op pass-through. The internal gateway
+# sets the env var at process start with the exact patterns to scrub before any
+# auto-generated changelog reaches a public surface.
+def _load_redaction_patterns() -> List[tuple]:
+    """Parse DELIMIT_CHANGELOG_REDACT_PATTERNS env var into compiled patterns.
+    Format: semicolon-separated `regex|replacement` pairs. Empty / unset returns [].
+    Invalid regexes are warned and skipped (filter is fail-open: better to skip a
+    bad pattern than block all output).
+    """
+    raw = os.environ.get("DELIMIT_CHANGELOG_REDACT_PATTERNS", "").strip()
+    if not raw:
+        return []
+    patterns = []
+    for entry in raw.split(";"):
+        entry = entry.strip()
+        if not entry or "|" not in entry:
+            continue
+        pat, _, repl = entry.partition("|")
+        try:
+            patterns.append((re.compile(pat), repl))
+        except re.error as exc:
+            logger.warning("ignoring invalid changelog redaction pattern %r: %s", pat, exc)
+    return patterns
+_IDENTITY_STRING_PATTERNS = _load_redaction_patterns()
+def _redact_identity_strings(text: str) -> str:
+    """Apply env-var-loaded redaction patterns to text. No-op when env var unset.
+    Defensive filter for changelog generation: immutable commit messages on
+    public repos may contain identity strings the auto-generated CHANGELOG would
+    re-leak to a fresh public surface. The filter is opt-in per gateway
+    deployment via env var; customers don't need to set it.
+    """
+    if not text or not _IDENTITY_STRING_PATTERNS:
+        return text
+    for pattern, replacement in _IDENTITY_STRING_PATTERNS:
+        text = pattern.sub(replacement, text)
+    return text
 # Add gateway root to path so we can import core modules
 GATEWAY_ROOT = Path(__file__).resolve().parent.parent.parent
 if str(GATEWAY_ROOT) not in sys.path:
@@ -582,6 +636,11 @@ def run_changelog_from_git(
                     ctype = cat
                     break
+        # LED-1265: redact founder-holdco identity strings from the commit
+        # subject (msg) and author before they enter any output path.
+        msg = _redact_identity_strings(msg)
+        author = _redact_identity_strings(author)
         bucket = ctype if ctype in categories else "other"
         entry = {"sha": sha[:8], "message": msg, "author": author, "category": bucket}
         categories[bucket].append(entry)
@@ -641,9 +700,11 @@ def run_changelog_from_git(
                                     continue
                             except (ValueError, TypeError):
                                 pass  # If parsing fails, include the item
+                        # LED-1265: ledger titles may contain identity strings
+                        # (e.g. LED items filed before the doctrine bound).
                         ledger_items.append({
                             "id": item_id,
-                            "title": item.get("title", ""),
+                            "title": _redact_identity_strings(item.get("title", "")),
                             "priority": item.get("priority", ""),
                         })
         except Exception:

package/gateway/ai/led193_daemon/__init__.py ADDED Viewed

@@ -0,0 +1,61 @@
+"""LED-193 autonomous daemon (MVP).
+Cron-spawn, stateless, append-only audit. Picks ledger items tagged
+``auto_execute=class_a:<profile>`` and executes a deterministic profile
+(``format_fix``, ``lockfile_refresh``, ``docs_typo``) on a feature branch.
+NEVER merges. Opens a PR for human review only after local pre-push gates
+pass (security_audit + test_smoke + lint when applicable).
+Panel decision (UNANIMOUS, 2026-05-07):
+    `/home/delimit/delimit-private/deliberations/2026-05-07-led-193-autonomous-daemon-shape.md`
+Design siblings the cron pattern of LED-1264 ``scan_bridge``:
+    - cron-spawn (no long-running process)
+    - lockfile concurrency=1
+    - append-only audit log
+    - kill switch via env var
+    - circuit breakers (consecutive failures, daily caps)
+Public entry points:
+- :func:`picker.pick_next_item`      — ledger-item selection
+- :func:`executor.execute_item`      — profile dispatch
+- :func:`gate.run_pre_push_gate`     — local pre-push validation
+- :func:`audit.log_execution`        — append-only execution log
+- :func:`pause.is_paused` / :func:`pause.pause` / :func:`pause.clear`
+- :func:`cost.check_caps` / :func:`cost.record_run`
+The cron entry is :mod:`scripts.led193_cron`. Founder applies the
+crontab line manually after review (NOT auto-installed).
+"""
+from ai.led193_daemon.audit import log_execution
+from ai.led193_daemon.cost import check_caps, record_run
+from ai.led193_daemon.executor import execute_item
+from ai.led193_daemon.gate import run_pre_push_gate
+from ai.led193_daemon.pause import clear as clear_pause
+from ai.led193_daemon.pause import is_paused, pause as pause_daemon
+from ai.led193_daemon.picker import pick_next_item
+# Re-export the submodules so callers can do
+# ``from ai.led193_daemon import audit, cost, executor, gate, pause, picker``
+# without the function-named exports above shadowing the ``pause`` module.
+from ai.led193_daemon import audit, cost, executor, gate, pause, picker  # noqa: E402,F401
+__all__ = [
+    "audit",
+    "check_caps",
+    "clear_pause",
+    "cost",
+    "execute_item",
+    "executor",
+    "gate",
+    "is_paused",
+    "log_execution",
+    "pause",
+    "pause_daemon",
+    "pick_next_item",
+    "picker",
+    "record_run",
+    "run_pre_push_gate",
+]

package/gateway/ai/led193_daemon/audit.py ADDED Viewed

@@ -0,0 +1,174 @@
+"""LED-193 append-only execution audit log.
+Every pickup attempt logs one JSON line — success or failure — so
+incidents can be replayed against the daemon's actual behaviour.
+Schema:
+    {
+        "ts": ISO8601 UTC,
+        "item_id": str,
+        "profile": "format_fix" | "lockfile_refresh" | "docs_typo" | "",
+        "branch": str | "",          # auto/{profile}-{item_id}-{short_hash}
+        "pr_url": str | "",          # populated only on success
+        "result": "success" | "failed" | "noop" | "skipped" | "ci_failed_after_open",
+        "reason": str,               # human-readable detail
+        "cost_estimate": float,      # USD; 0.0 for deterministic profiles
+        "files_changed": int,
+        # optional, populated when known:
+        "elapsed_s": float,
+        "gate_results": dict,
+    }
+The log is append-only — never rewritten. Path:
+    ``~/.delimit/led193_executions.jsonl``
+"""
+from __future__ import annotations
+import json
+import logging
+import os
+from datetime import datetime, timezone
+from pathlib import Path
+from typing import Any, Dict, Optional
+logger = logging.getLogger("delimit.ai.led193_daemon.audit")
+AUDIT_LOG = Path.home() / ".delimit" / "led193_executions.jsonl"
+VALID_RESULTS = {
+    "success",
+    "failed",
+    "noop",
+    "skipped",
+    "ci_failed_after_open",
+}
+def log_execution(
+    *,
+    item_id: str,
+    profile: str = "",
+    branch: str = "",
+    pr_url: str = "",
+    result: str = "failed",
+    reason: str = "",
+    cost_estimate: float = 0.0,
+    files_changed: int = 0,
+    elapsed_s: Optional[float] = None,
+    gate_results: Optional[Dict[str, Any]] = None,
+    audit_log_path: Optional[Path] = None,
+) -> Dict[str, Any]:
+    """Write one append-only audit line.
+    Returns the record actually written (useful for tests + the cron
+    summary). Best-effort — a write failure logs a warning but never
+    raises (the daemon must not crash on disk-full).
+    """
+    if result not in VALID_RESULTS:
+        # Don't reject — coerce to "failed" with a clarifying reason so
+        # we never silently drop an audit row over a typo in caller code.
+        reason = f"invalid_result={result!r}; original_reason={reason!r}"
+        result = "failed"
+    record: Dict[str, Any] = {
+        "ts": datetime.now(timezone.utc).isoformat(),
+        "item_id": item_id,
+        "profile": profile,
+        "branch": branch,
+        "pr_url": pr_url,
+        "result": result,
+        "reason": reason,
+        "cost_estimate": float(cost_estimate),
+        "files_changed": int(files_changed),
+    }
+    if elapsed_s is not None:
+        record["elapsed_s"] = round(float(elapsed_s), 3)
+    if gate_results is not None:
+        record["gate_results"] = gate_results
+    target = audit_log_path or AUDIT_LOG
+    try:
+        target.parent.mkdir(parents=True, exist_ok=True)
+        with target.open("a", encoding="utf-8") as fh:
+            fh.write(json.dumps(record, ensure_ascii=False) + "\n")
+    except OSError as exc:  # pragma: no cover — best-effort
+        logger.warning("led193_daemon: failed to write audit log %s: %s", target, exc)
+    return record
+def recent_results(
+    *,
+    audit_log_path: Optional[Path] = None,
+    limit: int = 100,
+) -> list:
+    """Read the most recent N records from the audit log (newest first).
+    Used by the consecutive-failures circuit breaker and by the cron
+    summary. Returns ``[]`` when the file doesn't exist or is empty.
+    """
+    target = audit_log_path or AUDIT_LOG
+    if not target.exists():
+        return []
+    try:
+        lines = target.read_text(encoding="utf-8").splitlines()
+    except OSError:
+        return []
+    out = []
+    for line in reversed(lines):
+        line = line.strip()
+        if not line:
+            continue
+        try:
+            out.append(json.loads(line))
+        except (json.JSONDecodeError, ValueError):
+            continue
+        if len(out) >= limit:
+            break
+    return out
+def consecutive_failures(
+    *,
+    audit_log_path: Optional[Path] = None,
+) -> int:
+    """Count CONSECUTIVE failures from the most recent record backward.
+    Stops at the first non-failure (success/noop/skipped). Used by the
+    3-strike circuit breaker. ``ci_failed_after_open`` counts as a
+    failure for breaker purposes — if the daemon keeps opening PRs that
+    break CI, that's a signal to pause.
+    """
+    failures = 0
+    for rec in recent_results(audit_log_path=audit_log_path, limit=20):
+        if rec.get("result") in ("failed", "ci_failed_after_open"):
+            failures += 1
+            continue
+        break
+    return failures
+def prs_opened_today(
+    *,
+    audit_log_path: Optional[Path] = None,
+    now: Optional[datetime] = None,
+) -> int:
+    """Count successful PRs opened in the last 24 hours.
+    Used by the action-volume circuit breaker (max 5 PRs / day).
+    """
+    now = now or datetime.now(timezone.utc)
+    cutoff = now.timestamp() - 86400.0
+    n = 0
+    for rec in recent_results(audit_log_path=audit_log_path, limit=200):
+        if rec.get("result") != "success" or not rec.get("pr_url"):
+            continue
+        ts = rec.get("ts") or ""
+        try:
+            rec_dt = datetime.fromisoformat(ts.replace("Z", "+00:00"))
+            if rec_dt.timestamp() >= cutoff:
+                n += 1
+        except (ValueError, TypeError):
+            continue
+    return n

package/gateway/ai/led193_daemon/cost.py ADDED Viewed

@@ -0,0 +1,133 @@
+"""LED-193 cost tracking + circuit breakers.
+MVP profiles are deterministic (no LLM): cost is always 0.0. The cost
+infrastructure exists ahead of time so when Class C ``bounded_patch``
+graduates, the breakers are wired and unit-tested rather than bolted
+on under pressure.
+Hard caps (panel-locked):
+    - Per-item LLM cost: $2  (DELIMIT_LED193_PER_ITEM_USD override)
+    - Daily LLM cost:    $10 (DELIMIT_LED193_DAILY_USD override)
+Daily window = trailing 24h, summed from the audit log
+(``cost_estimate`` field). Per-item is enforced by callers BEFORE
+incurring the cost — exceeded → return ``CapTriggered`` and the executor
+short-circuits.
+"""
+from __future__ import annotations
+import os
+from datetime import datetime, timezone
+from pathlib import Path
+from typing import Optional
+from ai.led193_daemon.audit import recent_results
+DEFAULT_PER_ITEM_USD = 2.00
+DEFAULT_DAILY_USD = 10.00
+def per_item_cap() -> float:
+    raw = os.environ.get("DELIMIT_LED193_PER_ITEM_USD", "")
+    if raw:
+        try:
+            v = float(raw)
+            if v >= 0:
+                return v
+        except (TypeError, ValueError):
+            pass
+    return DEFAULT_PER_ITEM_USD
+def daily_cap() -> float:
+    raw = os.environ.get("DELIMIT_LED193_DAILY_USD", "")
+    if raw:
+        try:
+            v = float(raw)
+            if v >= 0:
+                return v
+        except (TypeError, ValueError):
+            pass
+    return DEFAULT_DAILY_USD
+def daily_spend(
+    *,
+    audit_log_path: Optional[Path] = None,
+    now: Optional[datetime] = None,
+) -> float:
+    """Sum of cost_estimate across audit records in the last 24h."""
+    now = now or datetime.now(timezone.utc)
+    cutoff = now.timestamp() - 86400.0
+    total = 0.0
+    for rec in recent_results(audit_log_path=audit_log_path, limit=500):
+        ts = rec.get("ts") or ""
+        try:
+            rec_dt = datetime.fromisoformat(ts.replace("Z", "+00:00"))
+        except (ValueError, TypeError):
+            continue
+        if rec_dt.timestamp() < cutoff:
+            continue
+        try:
+            total += float(rec.get("cost_estimate") or 0.0)
+        except (TypeError, ValueError):
+            continue
+    return total
+def check_caps(
+    *,
+    estimated_cost: float = 0.0,
+    audit_log_path: Optional[Path] = None,
+    now: Optional[datetime] = None,
+) -> dict:
+    """Return ``{"ok": bool, "reason": str, ...}``.
+    Caller passes ``estimated_cost`` for the proposed item; we check
+    BOTH the per-item cap AND the projected daily total. Deterministic
+    profiles pass ``estimated_cost=0.0`` and always return ``ok=True``
+    unless the daily cap is already breached (which would only happen
+    under a misconfigured override).
+    """
+    per_cap = per_item_cap()
+    if estimated_cost > per_cap:
+        return {
+            "ok": False,
+            "reason": f"per_item_cap_exceeded: ${estimated_cost:.2f} > ${per_cap:.2f}",
+            "estimated_cost": estimated_cost,
+            "per_item_cap": per_cap,
+        }
+    spent = daily_spend(audit_log_path=audit_log_path, now=now)
+    d_cap = daily_cap()
+    projected = spent + estimated_cost
+    if projected > d_cap:
+        return {
+            "ok": False,
+            "reason": f"daily_cap_exceeded: ${spent:.2f} + ${estimated_cost:.2f} > ${d_cap:.2f}",
+            "daily_spend": spent,
+            "daily_cap": d_cap,
+            "estimated_cost": estimated_cost,
+        }
+    return {
+        "ok": True,
+        "reason": "",
+        "daily_spend": spent,
+        "daily_cap": d_cap,
+        "per_item_cap": per_cap,
+        "estimated_cost": estimated_cost,
+    }
+def record_run(actual_cost: float) -> float:
+    """Pass-through for callers that want to declare an actual cost.
+    The actual cost lands in the audit log via the ``cost_estimate``
+    field on the record. This helper exists so executor call-sites read
+    consistently. Returns the validated, clamped cost.
+    """
+    try:
+        v = float(actual_cost)
+    except (TypeError, ValueError):
+        return 0.0
+    return max(0.0, v)