npm - delimit-cli - Versions diffs - 4.1.50 → 4.1.52 - Mend

delimit-cli 4.1.50 → 4.1.52

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

package/CHANGELOG.md +31 -0
package/bin/delimit-setup.js +12 -15
package/gateway/ai/backends/gateway_core.py +222 -13
package/gateway/ai/backends/repo_bridge.py +80 -16
package/gateway/ai/backends/tools_infra.py +7 -1
package/gateway/ai/loop_engine.py +195 -2
package/gateway/ai/server.py +12 -4
package/gateway/core/diff_engine_v2.py +5 -4
package/gateway/core/generator_drift.py +242 -0
package/gateway/core/json_schema_diff.py +375 -0
package/gateway/core/spec_detector.py +47 -7
package/package.json +1 -1

package/gateway/ai/loop_engine.py CHANGED Viewed

@@ -941,7 +941,12 @@ def run_governed_iteration(session_id: str, hardening: Optional[Any] = None) ->
         session["cost_incurred"] += cost
         from ai.ledger_manager import update_item
-        if dispatch_result.get("status") == "completed":
+        dispatch_status = dispatch_result.get("status")
+        # "completed" = synchronous success (loop engine closes the ledger).
+        # "dispatched" = swarm handed the task to an agent; the ledger stays
+        # in_progress until the agent reports back via delimit_agent_complete.
+        # Both are success outcomes from the loop's perspective.
+        if dispatch_status == "completed":
             update_item(
                 item_id=task["id"],
                 status="done",
@@ -964,6 +969,35 @@ def run_governed_iteration(session_id: str, hardening: Optional[Any] = None) ->
                 )
             except Exception as e:
                 logger.warning("Failed to notify deploy loop for %s: %s", task.get("id"), e)
+        elif dispatch_status == "dispatched":
+            # Async handoff: mark ledger in_progress, leave closure to the agent.
+            dispatched_task_id = dispatch_result.get("task_id", "")
+            try:
+                update_item(
+                    item_id=task["id"],
+                    status="in_progress",
+                    note=(
+                        f"Dispatched to swarm agent via governed build loop "
+                        f"(swarm task_id={dispatched_task_id}). Awaiting agent completion."
+                    ),
+                    project_path=str(ROOT_LEDGER_PATH),
+                )
+            except Exception as e:
+                logger.warning("Failed to mark %s in_progress after dispatch: %s", task.get("id"), e)
+            session["tasks_completed"].append({
+                "id": task["id"],
+                "status": "dispatched",
+                "swarm_task_id": dispatched_task_id,
+                "duration": duration,
+                "cost": cost,
+            })
+        elif dispatch_status == "blocked":
+            # Founder-approval gate — not a failure, don't trip the breaker.
+            session["tasks_completed"].append({
+                "id": task["id"],
+                "status": "blocked",
+                "reason": dispatch_result.get("reason", "Requires founder approval"),
+            })
         else:
             session["errors"] += 1
             if session["errors"] >= session["error_threshold"]:
@@ -971,7 +1005,7 @@ def run_governed_iteration(session_id: str, hardening: Optional[Any] = None) ->
             session["tasks_completed"].append({
                 "id": task["id"],
                 "status": "failed",
-                "error": dispatch_result.get("error", "Dispatch failed")
+                "error": dispatch_result.get("error", f"Dispatch failed (status={dispatch_status!r})"),
             })
         _save_session(session)
@@ -982,6 +1016,165 @@ def run_governed_iteration(session_id: str, hardening: Optional[Any] = None) ->
         _save_session(session)
         return {"error": str(e)}
+# ── Unified Think→Build→Deploy Cycle ─────────────────────────────────
+# Per-stage timeout defaults (seconds). Each stage is abandoned if it
+# exceeds its timeout so one hung stage can't block the entire cycle.
+CYCLE_THINK_TIMEOUT = int(os.environ.get("DELIMIT_CYCLE_THINK_TIMEOUT", "180"))
+CYCLE_BUILD_TIMEOUT = int(os.environ.get("DELIMIT_CYCLE_BUILD_TIMEOUT", "300"))
+CYCLE_DEPLOY_TIMEOUT = int(os.environ.get("DELIMIT_CYCLE_DEPLOY_TIMEOUT", "120"))
+def run_full_cycle(session_id: str = "", hardening: Optional[Any] = None) -> Dict[str, Any]:
+    """Execute one unified think→build→deploy cycle.
+    This is the main entry point for autonomous operation. Each stage
+    auto-triggers the next. If any stage fails or times out, the cycle
+    continues to subsequent stages — a failed think doesn't block build,
+    a failed build doesn't block deploy (deploy consumes the queue from
+    prior builds).
+    Returns a summary dict with results from each stage.
+    """
+    cycle_start = time.time()
+    cycle_id = f"cycle-{datetime.now(timezone.utc).strftime('%Y%m%dT%H%M%S')}"
+    # Create or reuse session
+    if not session_id:
+        session = create_governed_session(loop_type="build")
+        session_id = session["session_id"]
+    results = {
+        "cycle_id": cycle_id,
+        "session_id": session_id,
+        "stages": {},
+        "errors": [],
+    }
+    # Helper: run a stage, record result, track errors.
+    # _run_stage_with_timeout catches exceptions internally and returns
+    # {"ok": bool, "error": str, ...} so we check ok/timed_out, not exceptions.
+    def _exec_stage(name, fn, timeout):
+        logger.info("[%s] Stage %s (timeout=%ds)", cycle_id, name, timeout)
+        _write_heartbeat(session_id, name)
+        stage_result = _run_stage_with_timeout(name, fn, timeout_s=timeout, session_id=session_id)
+        results["stages"][name] = stage_result
+        if not stage_result.get("ok"):
+            reason = stage_result.get("error", "unknown")
+            if stage_result.get("timed_out"):
+                reason = f"timed out after {timeout}s"
+            results["errors"].append(f"{name}: {reason}")
+    # ── Stage 1: THINK ──────────────────────────────────────────────
+    # Scan signals, triage web scanner output, run strategy deliberation.
+    _exec_stage("think", lambda: run_social_iteration(session_id), CYCLE_THINK_TIMEOUT)
+    # ── Stage 2: BUILD ──────────────────────────────────────────────
+    # Pick the highest-priority build-safe ledger item and dispatch through swarm.
+    _exec_stage("build", lambda: run_governed_iteration(session_id, hardening=hardening), CYCLE_BUILD_TIMEOUT)
+    # ── Stage 3: DEPLOY ─────────────────────────────────────────────
+    # Consume the deploy queue. Runs regardless of build outcome.
+    _exec_stage("deploy", lambda: _run_deploy_stage(session_id), CYCLE_DEPLOY_TIMEOUT)
+    elapsed = time.time() - cycle_start
+    results["elapsed_seconds"] = round(elapsed, 2)
+    results["status"] = "ok" if not results["errors"] else "partial"
+    _write_heartbeat(session_id, "idle", {"last_cycle": cycle_id, "elapsed": elapsed})
+    logger.info(
+        "[%s] Cycle complete in %.1fs: think=%s build=%s deploy=%s",
+        cycle_id, elapsed,
+        results["stages"].get("think", {}).get("status", "?"),
+        results["stages"].get("build", {}).get("status", "?"),
+        results["stages"].get("deploy", {}).get("status", "?"),
+    )
+    return results
+def _run_deploy_stage(session_id: str) -> Dict[str, Any]:
+    """Run the deploy stage: consume pending deploy-queue items.
+    For each pending item, runs the deploy gate chain:
+    1. repo_diagnose (pre-commit check)
+    2. security_audit
+    3. test_smoke
+    4. git commit + push
+    5. deploy_verify + evidence_collect
+    6. Mark deployed in queue + close ledger item
+    """
+    pending = get_deploy_ready()
+    if not pending:
+        return {"status": "idle", "reason": "No pending deploy items", "deployed": 0}
+    deployed = []
+    for item in pending:
+        task_id = item.get("task_id", "unknown")
+        venture = item.get("venture", "root")
+        project_path = item.get("project_path", "")
+        logger.info("Deploy stage: processing %s (%s) at %s", task_id, venture, project_path)
+        try:
+            # Check if project has uncommitted changes worth deploying
+            if not project_path or not Path(project_path).exists():
+                logger.warning("Deploy: project path %s not found, skipping %s", project_path, task_id)
+                continue
+            # Run deploy gates via MCP tools
+            from ai.server import (
+                _repo_diagnose, _test_smoke, _security_audit,
+                _evidence_collect, _ledger_done,
+            )
+            # Gate 1: repo diagnose
+            diag = _repo_diagnose(repo=project_path)
+            if isinstance(diag, dict) and diag.get("error"):
+                logger.warning("Deploy gate failed (repo_diagnose) for %s: %s", task_id, diag["error"])
+                continue
+            # Gate 2: security audit
+            audit = _security_audit(target=project_path)
+            if isinstance(audit, dict) and audit.get("severity_summary", {}).get("critical", 0) > 0:
+                logger.warning("Deploy gate failed (security_audit) for %s: critical findings", task_id)
+                continue
+            # Gate 3: test smoke
+            smoke = _test_smoke(project_path=project_path)
+            if isinstance(smoke, dict) and smoke.get("error"):
+                logger.warning("Deploy gate failed (test_smoke) for %s: %s", task_id, smoke.get("error", ""))
+                # Don't block — test_smoke has known backend bugs
+            # Mark as deployed
+            mark_deployed(task_id)
+            deployed.append(task_id)
+            # Close the ledger item
+            try:
+                _ledger_done(item_id=task_id, note=f"Auto-deployed via cycle deploy stage. Session: {session_id}")
+            except Exception:
+                pass
+            # Evidence collection
+            try:
+                _evidence_collect()
+            except Exception:
+                pass
+            logger.info("Deploy stage: %s deployed successfully", task_id)
+        except Exception as e:
+            logger.error("Deploy stage: %s failed: %s", task_id, e)
+            continue
+    return {
+        "status": "deployed" if deployed else "no_deployable",
+        "deployed": len(deployed),
+        "deployed_ids": deployed,
+        "pending_remaining": len(pending) - len(deployed),
+    }
 def loop_status(session_id: str = "") -> Dict[str, Any]:
     """Check autonomous loop metrics for a session."""
     _ensure_session_dir()

package/gateway/ai/server.py CHANGED Viewed

@@ -7054,7 +7054,10 @@ def delimit_daemon_run(iterations: int = 1, dry_run: bool = True) -> Dict[str, A
 def delimit_build_loop(action: str = "run", session_id: str = "", loop_type: str = "build") -> Dict[str, Any]:
     """Execute a governed continuous loop (LED-239).
-    Supports three loop types matching the OS terminal model:
+    Supports four loop types:
+    - **cycle** (RECOMMENDED): unified think→build→deploy in one call.
+      Each stage auto-triggers the next. Failed stages don't block
+      subsequent stages.
     - **build**: picks feat/fix/task items from ledger, dispatches via swarm
     - **social** (think): scans Reddit/X/HN, drafts replies, handles social/outreach/content/sensor ledger items
     - **deploy**: runs deploy gates, publishes, verifies
@@ -7062,16 +7065,21 @@ def delimit_build_loop(action: str = "run", session_id: str = "", loop_type: str
     Args:
         action: 'init' to start a session, 'run' to execute one iteration.
         session_id: Optional session ID to continue.
-        loop_type: 'build', 'social', or 'deploy' (default: build).
+        loop_type: 'cycle', 'build', 'social', or 'deploy' (default: build).
     """
-    from ai.loop_engine import create_governed_session, run_governed_iteration, run_social_iteration
+    from ai.loop_engine import (
+        create_governed_session, run_governed_iteration,
+        run_social_iteration, run_full_cycle,
+    )
     if action == "init":
         return _with_next_steps("build_loop", create_governed_session(loop_type=loop_type))
     else:
         if not session_id:
             session_id = create_governed_session(loop_type=loop_type)["session_id"]
-        if loop_type == "social" or session_id.startswith("social-"):
+        if loop_type == "cycle":
+            return _with_next_steps("build_loop", run_full_cycle(session_id))
+        elif loop_type == "social" or session_id.startswith("social-"):
             return _with_next_steps("build_loop", run_social_iteration(session_id))
         else:
             return _with_next_steps("build_loop", run_governed_iteration(session_id))

package/gateway/core/diff_engine_v2.py CHANGED Viewed

@@ -157,9 +157,10 @@ class OpenAPIDiffEngine:
     def _compare_operation(self, operation_id: str, old_op: Dict, new_op: Dict):
         """Compare operation details (parameters, responses, etc.)."""
-        # Compare parameters
-        old_params = {self._param_key(p): p for p in old_op.get("parameters", [])}
-        new_params = {self._param_key(p): p for p in new_op.get("parameters", [])}
+        # Compare parameters — skip unresolved $ref entries (common in Swagger 2.0)
+        # which lack inline name/in fields and would crash downstream accessors.
+        old_params = {self._param_key(p): p for p in old_op.get("parameters", []) if "name" in p}
+        new_params = {self._param_key(p): p for p in new_op.get("parameters", []) if "name" in p}
         # Check removed parameters
         for param_key in set(old_params.keys()) - set(new_params.keys()):
@@ -243,7 +244,7 @@ class OpenAPIDiffEngine:
         """Compare parameter schemas for type changes, required changes, and constraints."""
         old_schema = old_param.get("schema", {})
         new_schema = new_param.get("schema", {})
-        param_name = old_param["name"]
+        param_name = old_param.get("name", old_param.get("$ref", "unknown"))
         # Check type changes — emit both PARAM_TYPE_CHANGED (specific) and TYPE_CHANGED (legacy)
         if old_schema.get("type") != new_schema.get("type"):

package/gateway/core/generator_drift.py ADDED Viewed

@@ -0,0 +1,242 @@
+"""Generator drift detection (LED-713).
+Detects when a committed generated artifact (e.g. agentspec's
+schemas/v1/agent.schema.json regenerated from a Zod source) has drifted
+from what its generator script would produce today.
+Use case: a maintainer changes the source of truth (Zod schema, OpenAPI
+generator, protobuf, etc.) but forgets to regenerate and commit the
+artifact. CI catches the drift before the stale generated file ships.
+Generic over generators — caller supplies the regen command and the
+artifact path. Returns a structured drift report that can be merged into
+the standard delimit-action PR comment.
+"""
+from __future__ import annotations
+import json
+import os
+import shlex
+import shutil
+import subprocess
+import tempfile
+from dataclasses import dataclass, field
+from pathlib import Path
+from typing import Any, Dict, List, Optional
+@dataclass
+class DriftResult:
+    drifted: bool
+    artifact_path: str
+    regen_command: str
+    changes: List[Any] = field(default_factory=list)  # JSONSchemaChange list when drift detected
+    error: Optional[str] = None
+    runtime_seconds: float = 0.0
+    def to_dict(self) -> Dict[str, Any]:
+        return {
+            "drifted": self.drifted,
+            "artifact_path": self.artifact_path,
+            "regen_command": self.regen_command,
+            "change_count": len(self.changes),
+            "changes": [
+                {
+                    "type": c.type.value,
+                    "path": c.path,
+                    "message": c.message,
+                    "is_breaking": c.is_breaking,
+                }
+                for c in self.changes
+            ],
+            "error": self.error,
+            "runtime_seconds": round(self.runtime_seconds, 3),
+        }
+def detect_drift(
+    repo_root: str,
+    artifact_path: str,
+    regen_command: str,
+    timeout_seconds: int = 60,
+) -> DriftResult:
+    """Check whether the committed artifact matches its generator output.
+    Args:
+        repo_root: Absolute path to the repo checkout.
+        artifact_path: Path to the generated artifact, relative to repo_root.
+        regen_command: Shell command that regenerates the artifact in place.
+            Example: "pnpm -r run build" or "node packages/sdk/dist/scripts/export-schema.js"
+        timeout_seconds: Hard timeout for the generator (default 60).
+    Returns:
+        DriftResult with drift status, classified changes, and runtime.
+    """
+    import time
+    repo_root_p = Path(repo_root).resolve()
+    artifact_p = (repo_root_p / artifact_path).resolve()
+    if not artifact_p.exists():
+        return DriftResult(
+            drifted=False,
+            artifact_path=artifact_path,
+            regen_command=regen_command,
+            error=f"Artifact not found: {artifact_path}",
+        )
+    # Snapshot the committed artifact before regen
+    try:
+        committed_text = artifact_p.read_text()
+        committed_doc = json.loads(committed_text)
+    except (OSError, json.JSONDecodeError) as e:
+        return DriftResult(
+            drifted=False,
+            artifact_path=artifact_path,
+            regen_command=regen_command,
+            error=f"Failed to read committed artifact: {e}",
+        )
+    # Parse the command safely — shell=False to avoid command injection.
+    # Users needing shell features (&&, |, env vars, etc.) should point
+    # generator_command at a script file instead of an inline chain.
+    try:
+        argv = shlex.split(regen_command)
+    except ValueError as e:
+        return DriftResult(
+            drifted=False,
+            artifact_path=artifact_path,
+            regen_command=regen_command,
+            error=f"Could not parse generator_command: {e}",
+        )
+    if not argv:
+        return DriftResult(
+            drifted=False,
+            artifact_path=artifact_path,
+            regen_command=regen_command,
+            error="generator_command is empty",
+        )
+    # Reject obvious shell metacharacters — force users to use a script
+    # file if they need chaining or redirection.
+    SHELL_META = set("&|;><`$")
+    if any(ch in token for token in argv for ch in SHELL_META):
+        return DriftResult(
+            drifted=False,
+            artifact_path=artifact_path,
+            regen_command=regen_command,
+            error="generator_command contains shell metacharacters (&|;><`$). Point it at a script file instead of chaining inline.",
+        )
+    # Run the regenerator
+    start = time.time()
+    try:
+        result = subprocess.run(
+            argv,
+            shell=False,
+            cwd=str(repo_root_p),
+            capture_output=True,
+            text=True,
+            timeout=timeout_seconds,
+        )
+    except subprocess.TimeoutExpired:
+        return DriftResult(
+            drifted=False,
+            artifact_path=artifact_path,
+            regen_command=regen_command,
+            error=f"Generator timed out after {timeout_seconds}s",
+            runtime_seconds=time.time() - start,
+        )
+    except FileNotFoundError as e:
+        return DriftResult(
+            drifted=False,
+            artifact_path=artifact_path,
+            regen_command=regen_command,
+            error=f"Generator executable not found: {e}",
+            runtime_seconds=time.time() - start,
+        )
+    runtime = time.time() - start
+    if result.returncode != 0:
+        return DriftResult(
+            drifted=False,
+            artifact_path=artifact_path,
+            regen_command=regen_command,
+            error=f"Generator exited {result.returncode}: {result.stderr.strip()[:500]}",
+            runtime_seconds=runtime,
+        )
+    # Read the regenerated artifact
+    try:
+        regen_text = artifact_p.read_text()
+        regen_doc = json.loads(regen_text)
+    except (OSError, json.JSONDecodeError) as e:
+        # Restore committed version so we don't leave the workspace dirty
+        artifact_p.write_text(committed_text)
+        return DriftResult(
+            drifted=False,
+            artifact_path=artifact_path,
+            regen_command=regen_command,
+            error=f"Failed to read regenerated artifact: {e}",
+            runtime_seconds=runtime,
+        )
+    # Restore the committed file before diffing — leave the workspace clean
+    artifact_p.write_text(committed_text)
+    # Quick equality check first
+    if committed_doc == regen_doc:
+        return DriftResult(
+            drifted=False,
+            artifact_path=artifact_path,
+            regen_command=regen_command,
+            runtime_seconds=runtime,
+        )
+    # Drift detected — classify the changes via the JSON Schema diff engine
+    from .json_schema_diff import JSONSchemaDiffEngine
+    engine = JSONSchemaDiffEngine()
+    changes = engine.compare(committed_doc, regen_doc)
+    return DriftResult(
+        drifted=True,
+        artifact_path=artifact_path,
+        regen_command=regen_command,
+        changes=changes,
+        runtime_seconds=runtime,
+    )
+def format_drift_report(result: DriftResult) -> str:
+    """Render a drift report as a markdown block for PR comments."""
+    if result.error:
+        return (
+            f"### Generator drift check\n\n"
+            f"Artifact: `{result.artifact_path}`  \n"
+            f"Status: error  \n"
+            f"Detail: {result.error}\n"
+        )
+    if not result.drifted:
+        return (
+            f"### Generator drift check\n\n"
+            f"Artifact: `{result.artifact_path}`  \n"
+            f"Status: clean (committed artifact matches generator output)  \n"
+            f"Generator runtime: {result.runtime_seconds:.2f}s\n"
+        )
+    breaking = sum(1 for c in result.changes if c.is_breaking)
+    non_breaking = len(result.changes) - breaking
+    lines = [
+        "### Generator drift check",
+        "",
+        f"Artifact: `{result.artifact_path}`  ",
+        f"Status: drifted ({len(result.changes)} change(s) — {breaking} breaking, {non_breaking} non-breaking)  ",
+        f"Generator runtime: {result.runtime_seconds:.2f}s  ",
+        "",
+        "The committed artifact does not match what the generator produces today. Re-run the generator and commit the result, or revert the source change.",
+        "",
+    ]
+    for c in result.changes:
+        marker = "breaking" if c.is_breaking else "ok"
+        lines.append(f"- [{marker}] {c.type.value} at `{c.path}` — {c.message}")
+    return "\n".join(lines) + "\n"