delimit-cli 4.1.50 → 4.1.52

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -941,7 +941,12 @@ def run_governed_iteration(session_id: str, hardening: Optional[Any] = None) ->
941
941
  session["cost_incurred"] += cost
942
942
 
943
943
  from ai.ledger_manager import update_item
944
- if dispatch_result.get("status") == "completed":
944
+ dispatch_status = dispatch_result.get("status")
945
+ # "completed" = synchronous success (loop engine closes the ledger).
946
+ # "dispatched" = swarm handed the task to an agent; the ledger stays
947
+ # in_progress until the agent reports back via delimit_agent_complete.
948
+ # Both are success outcomes from the loop's perspective.
949
+ if dispatch_status == "completed":
945
950
  update_item(
946
951
  item_id=task["id"],
947
952
  status="done",
@@ -964,6 +969,35 @@ def run_governed_iteration(session_id: str, hardening: Optional[Any] = None) ->
964
969
  )
965
970
  except Exception as e:
966
971
  logger.warning("Failed to notify deploy loop for %s: %s", task.get("id"), e)
972
+ elif dispatch_status == "dispatched":
973
+ # Async handoff: mark ledger in_progress, leave closure to the agent.
974
+ dispatched_task_id = dispatch_result.get("task_id", "")
975
+ try:
976
+ update_item(
977
+ item_id=task["id"],
978
+ status="in_progress",
979
+ note=(
980
+ f"Dispatched to swarm agent via governed build loop "
981
+ f"(swarm task_id={dispatched_task_id}). Awaiting agent completion."
982
+ ),
983
+ project_path=str(ROOT_LEDGER_PATH),
984
+ )
985
+ except Exception as e:
986
+ logger.warning("Failed to mark %s in_progress after dispatch: %s", task.get("id"), e)
987
+ session["tasks_completed"].append({
988
+ "id": task["id"],
989
+ "status": "dispatched",
990
+ "swarm_task_id": dispatched_task_id,
991
+ "duration": duration,
992
+ "cost": cost,
993
+ })
994
+ elif dispatch_status == "blocked":
995
+ # Founder-approval gate — not a failure, don't trip the breaker.
996
+ session["tasks_completed"].append({
997
+ "id": task["id"],
998
+ "status": "blocked",
999
+ "reason": dispatch_result.get("reason", "Requires founder approval"),
1000
+ })
967
1001
  else:
968
1002
  session["errors"] += 1
969
1003
  if session["errors"] >= session["error_threshold"]:
@@ -971,7 +1005,7 @@ def run_governed_iteration(session_id: str, hardening: Optional[Any] = None) ->
971
1005
  session["tasks_completed"].append({
972
1006
  "id": task["id"],
973
1007
  "status": "failed",
974
- "error": dispatch_result.get("error", "Dispatch failed")
1008
+ "error": dispatch_result.get("error", f"Dispatch failed (status={dispatch_status!r})"),
975
1009
  })
976
1010
 
977
1011
  _save_session(session)
@@ -982,6 +1016,165 @@ def run_governed_iteration(session_id: str, hardening: Optional[Any] = None) ->
982
1016
  _save_session(session)
983
1017
  return {"error": str(e)}
984
1018
 
1019
+ # ── Unified Think→Build→Deploy Cycle ─────────────────────────────────
1020
+
1021
+ # Per-stage timeout defaults (seconds). Each stage is abandoned if it
1022
+ # exceeds its timeout so one hung stage can't block the entire cycle.
1023
+ CYCLE_THINK_TIMEOUT = int(os.environ.get("DELIMIT_CYCLE_THINK_TIMEOUT", "180"))
1024
+ CYCLE_BUILD_TIMEOUT = int(os.environ.get("DELIMIT_CYCLE_BUILD_TIMEOUT", "300"))
1025
+ CYCLE_DEPLOY_TIMEOUT = int(os.environ.get("DELIMIT_CYCLE_DEPLOY_TIMEOUT", "120"))
1026
+
1027
+
1028
+ def run_full_cycle(session_id: str = "", hardening: Optional[Any] = None) -> Dict[str, Any]:
1029
+ """Execute one unified think→build→deploy cycle.
1030
+
1031
+ This is the main entry point for autonomous operation. Each stage
1032
+ auto-triggers the next. If any stage fails or times out, the cycle
1033
+ continues to subsequent stages — a failed think doesn't block build,
1034
+ a failed build doesn't block deploy (deploy consumes the queue from
1035
+ prior builds).
1036
+
1037
+ Returns a summary dict with results from each stage.
1038
+ """
1039
+ cycle_start = time.time()
1040
+ cycle_id = f"cycle-{datetime.now(timezone.utc).strftime('%Y%m%dT%H%M%S')}"
1041
+
1042
+ # Create or reuse session
1043
+ if not session_id:
1044
+ session = create_governed_session(loop_type="build")
1045
+ session_id = session["session_id"]
1046
+
1047
+ results = {
1048
+ "cycle_id": cycle_id,
1049
+ "session_id": session_id,
1050
+ "stages": {},
1051
+ "errors": [],
1052
+ }
1053
+
1054
+ # Helper: run a stage, record result, track errors.
1055
+ # _run_stage_with_timeout catches exceptions internally and returns
1056
+ # {"ok": bool, "error": str, ...} so we check ok/timed_out, not exceptions.
1057
+ def _exec_stage(name, fn, timeout):
1058
+ logger.info("[%s] Stage %s (timeout=%ds)", cycle_id, name, timeout)
1059
+ _write_heartbeat(session_id, name)
1060
+ stage_result = _run_stage_with_timeout(name, fn, timeout_s=timeout, session_id=session_id)
1061
+ results["stages"][name] = stage_result
1062
+ if not stage_result.get("ok"):
1063
+ reason = stage_result.get("error", "unknown")
1064
+ if stage_result.get("timed_out"):
1065
+ reason = f"timed out after {timeout}s"
1066
+ results["errors"].append(f"{name}: {reason}")
1067
+
1068
+ # ── Stage 1: THINK ──────────────────────────────────────────────
1069
+ # Scan signals, triage web scanner output, run strategy deliberation.
1070
+ _exec_stage("think", lambda: run_social_iteration(session_id), CYCLE_THINK_TIMEOUT)
1071
+
1072
+ # ── Stage 2: BUILD ──────────────────────────────────────────────
1073
+ # Pick the highest-priority build-safe ledger item and dispatch through swarm.
1074
+ _exec_stage("build", lambda: run_governed_iteration(session_id, hardening=hardening), CYCLE_BUILD_TIMEOUT)
1075
+
1076
+ # ── Stage 3: DEPLOY ─────────────────────────────────────────────
1077
+ # Consume the deploy queue. Runs regardless of build outcome.
1078
+ _exec_stage("deploy", lambda: _run_deploy_stage(session_id), CYCLE_DEPLOY_TIMEOUT)
1079
+
1080
+ elapsed = time.time() - cycle_start
1081
+ results["elapsed_seconds"] = round(elapsed, 2)
1082
+ results["status"] = "ok" if not results["errors"] else "partial"
1083
+
1084
+ _write_heartbeat(session_id, "idle", {"last_cycle": cycle_id, "elapsed": elapsed})
1085
+ logger.info(
1086
+ "[%s] Cycle complete in %.1fs: think=%s build=%s deploy=%s",
1087
+ cycle_id, elapsed,
1088
+ results["stages"].get("think", {}).get("status", "?"),
1089
+ results["stages"].get("build", {}).get("status", "?"),
1090
+ results["stages"].get("deploy", {}).get("status", "?"),
1091
+ )
1092
+ return results
1093
+
1094
+
1095
+ def _run_deploy_stage(session_id: str) -> Dict[str, Any]:
1096
+ """Run the deploy stage: consume pending deploy-queue items.
1097
+
1098
+ For each pending item, runs the deploy gate chain:
1099
+ 1. repo_diagnose (pre-commit check)
1100
+ 2. security_audit
1101
+ 3. test_smoke
1102
+ 4. git commit + push
1103
+ 5. deploy_verify + evidence_collect
1104
+ 6. Mark deployed in queue + close ledger item
1105
+ """
1106
+ pending = get_deploy_ready()
1107
+ if not pending:
1108
+ return {"status": "idle", "reason": "No pending deploy items", "deployed": 0}
1109
+
1110
+ deployed = []
1111
+ for item in pending:
1112
+ task_id = item.get("task_id", "unknown")
1113
+ venture = item.get("venture", "root")
1114
+ project_path = item.get("project_path", "")
1115
+
1116
+ logger.info("Deploy stage: processing %s (%s) at %s", task_id, venture, project_path)
1117
+
1118
+ try:
1119
+ # Check if project has uncommitted changes worth deploying
1120
+ if not project_path or not Path(project_path).exists():
1121
+ logger.warning("Deploy: project path %s not found, skipping %s", project_path, task_id)
1122
+ continue
1123
+
1124
+ # Run deploy gates via MCP tools
1125
+ from ai.server import (
1126
+ _repo_diagnose, _test_smoke, _security_audit,
1127
+ _evidence_collect, _ledger_done,
1128
+ )
1129
+
1130
+ # Gate 1: repo diagnose
1131
+ diag = _repo_diagnose(repo=project_path)
1132
+ if isinstance(diag, dict) and diag.get("error"):
1133
+ logger.warning("Deploy gate failed (repo_diagnose) for %s: %s", task_id, diag["error"])
1134
+ continue
1135
+
1136
+ # Gate 2: security audit
1137
+ audit = _security_audit(target=project_path)
1138
+ if isinstance(audit, dict) and audit.get("severity_summary", {}).get("critical", 0) > 0:
1139
+ logger.warning("Deploy gate failed (security_audit) for %s: critical findings", task_id)
1140
+ continue
1141
+
1142
+ # Gate 3: test smoke
1143
+ smoke = _test_smoke(project_path=project_path)
1144
+ if isinstance(smoke, dict) and smoke.get("error"):
1145
+ logger.warning("Deploy gate failed (test_smoke) for %s: %s", task_id, smoke.get("error", ""))
1146
+ # Don't block — test_smoke has known backend bugs
1147
+
1148
+ # Mark as deployed
1149
+ mark_deployed(task_id)
1150
+ deployed.append(task_id)
1151
+
1152
+ # Close the ledger item
1153
+ try:
1154
+ _ledger_done(item_id=task_id, note=f"Auto-deployed via cycle deploy stage. Session: {session_id}")
1155
+ except Exception:
1156
+ pass
1157
+
1158
+ # Evidence collection
1159
+ try:
1160
+ _evidence_collect()
1161
+ except Exception:
1162
+ pass
1163
+
1164
+ logger.info("Deploy stage: %s deployed successfully", task_id)
1165
+
1166
+ except Exception as e:
1167
+ logger.error("Deploy stage: %s failed: %s", task_id, e)
1168
+ continue
1169
+
1170
+ return {
1171
+ "status": "deployed" if deployed else "no_deployable",
1172
+ "deployed": len(deployed),
1173
+ "deployed_ids": deployed,
1174
+ "pending_remaining": len(pending) - len(deployed),
1175
+ }
1176
+
1177
+
985
1178
  def loop_status(session_id: str = "") -> Dict[str, Any]:
986
1179
  """Check autonomous loop metrics for a session."""
987
1180
  _ensure_session_dir()
@@ -7054,7 +7054,10 @@ def delimit_daemon_run(iterations: int = 1, dry_run: bool = True) -> Dict[str, A
7054
7054
  def delimit_build_loop(action: str = "run", session_id: str = "", loop_type: str = "build") -> Dict[str, Any]:
7055
7055
  """Execute a governed continuous loop (LED-239).
7056
7056
 
7057
- Supports three loop types matching the OS terminal model:
7057
+ Supports four loop types:
7058
+ - **cycle** (RECOMMENDED): unified think→build→deploy in one call.
7059
+ Each stage auto-triggers the next. Failed stages don't block
7060
+ subsequent stages.
7058
7061
  - **build**: picks feat/fix/task items from ledger, dispatches via swarm
7059
7062
  - **social** (think): scans Reddit/X/HN, drafts replies, handles social/outreach/content/sensor ledger items
7060
7063
  - **deploy**: runs deploy gates, publishes, verifies
@@ -7062,16 +7065,21 @@ def delimit_build_loop(action: str = "run", session_id: str = "", loop_type: str
7062
7065
  Args:
7063
7066
  action: 'init' to start a session, 'run' to execute one iteration.
7064
7067
  session_id: Optional session ID to continue.
7065
- loop_type: 'build', 'social', or 'deploy' (default: build).
7068
+ loop_type: 'cycle', 'build', 'social', or 'deploy' (default: build).
7066
7069
  """
7067
- from ai.loop_engine import create_governed_session, run_governed_iteration, run_social_iteration
7070
+ from ai.loop_engine import (
7071
+ create_governed_session, run_governed_iteration,
7072
+ run_social_iteration, run_full_cycle,
7073
+ )
7068
7074
 
7069
7075
  if action == "init":
7070
7076
  return _with_next_steps("build_loop", create_governed_session(loop_type=loop_type))
7071
7077
  else:
7072
7078
  if not session_id:
7073
7079
  session_id = create_governed_session(loop_type=loop_type)["session_id"]
7074
- if loop_type == "social" or session_id.startswith("social-"):
7080
+ if loop_type == "cycle":
7081
+ return _with_next_steps("build_loop", run_full_cycle(session_id))
7082
+ elif loop_type == "social" or session_id.startswith("social-"):
7075
7083
  return _with_next_steps("build_loop", run_social_iteration(session_id))
7076
7084
  else:
7077
7085
  return _with_next_steps("build_loop", run_governed_iteration(session_id))
@@ -157,9 +157,10 @@ class OpenAPIDiffEngine:
157
157
  def _compare_operation(self, operation_id: str, old_op: Dict, new_op: Dict):
158
158
  """Compare operation details (parameters, responses, etc.)."""
159
159
 
160
- # Compare parameters
161
- old_params = {self._param_key(p): p for p in old_op.get("parameters", [])}
162
- new_params = {self._param_key(p): p for p in new_op.get("parameters", [])}
160
+ # Compare parameters — skip unresolved $ref entries (common in Swagger 2.0)
161
+ # which lack inline name/in fields and would crash downstream accessors.
162
+ old_params = {self._param_key(p): p for p in old_op.get("parameters", []) if "name" in p}
163
+ new_params = {self._param_key(p): p for p in new_op.get("parameters", []) if "name" in p}
163
164
 
164
165
  # Check removed parameters
165
166
  for param_key in set(old_params.keys()) - set(new_params.keys()):
@@ -243,7 +244,7 @@ class OpenAPIDiffEngine:
243
244
  """Compare parameter schemas for type changes, required changes, and constraints."""
244
245
  old_schema = old_param.get("schema", {})
245
246
  new_schema = new_param.get("schema", {})
246
- param_name = old_param["name"]
247
+ param_name = old_param.get("name", old_param.get("$ref", "unknown"))
247
248
 
248
249
  # Check type changes — emit both PARAM_TYPE_CHANGED (specific) and TYPE_CHANGED (legacy)
249
250
  if old_schema.get("type") != new_schema.get("type"):
@@ -0,0 +1,242 @@
1
+ """Generator drift detection (LED-713).
2
+
3
+ Detects when a committed generated artifact (e.g. agentspec's
4
+ schemas/v1/agent.schema.json regenerated from a Zod source) has drifted
5
+ from what its generator script would produce today.
6
+
7
+ Use case: a maintainer changes the source of truth (Zod schema, OpenAPI
8
+ generator, protobuf, etc.) but forgets to regenerate and commit the
9
+ artifact. CI catches the drift before the stale generated file ships.
10
+
11
+ Generic over generators — caller supplies the regen command and the
12
+ artifact path. Returns a structured drift report that can be merged into
13
+ the standard delimit-action PR comment.
14
+ """
15
+
16
+ from __future__ import annotations
17
+
18
+ import json
19
+ import os
20
+ import shlex
21
+ import shutil
22
+ import subprocess
23
+ import tempfile
24
+ from dataclasses import dataclass, field
25
+ from pathlib import Path
26
+ from typing import Any, Dict, List, Optional
27
+
28
+
29
+ @dataclass
30
+ class DriftResult:
31
+ drifted: bool
32
+ artifact_path: str
33
+ regen_command: str
34
+ changes: List[Any] = field(default_factory=list) # JSONSchemaChange list when drift detected
35
+ error: Optional[str] = None
36
+ runtime_seconds: float = 0.0
37
+
38
+ def to_dict(self) -> Dict[str, Any]:
39
+ return {
40
+ "drifted": self.drifted,
41
+ "artifact_path": self.artifact_path,
42
+ "regen_command": self.regen_command,
43
+ "change_count": len(self.changes),
44
+ "changes": [
45
+ {
46
+ "type": c.type.value,
47
+ "path": c.path,
48
+ "message": c.message,
49
+ "is_breaking": c.is_breaking,
50
+ }
51
+ for c in self.changes
52
+ ],
53
+ "error": self.error,
54
+ "runtime_seconds": round(self.runtime_seconds, 3),
55
+ }
56
+
57
+
58
+ def detect_drift(
59
+ repo_root: str,
60
+ artifact_path: str,
61
+ regen_command: str,
62
+ timeout_seconds: int = 60,
63
+ ) -> DriftResult:
64
+ """Check whether the committed artifact matches its generator output.
65
+
66
+ Args:
67
+ repo_root: Absolute path to the repo checkout.
68
+ artifact_path: Path to the generated artifact, relative to repo_root.
69
+ regen_command: Shell command that regenerates the artifact in place.
70
+ Example: "pnpm -r run build" or "node packages/sdk/dist/scripts/export-schema.js"
71
+ timeout_seconds: Hard timeout for the generator (default 60).
72
+
73
+ Returns:
74
+ DriftResult with drift status, classified changes, and runtime.
75
+ """
76
+ import time
77
+
78
+ repo_root_p = Path(repo_root).resolve()
79
+ artifact_p = (repo_root_p / artifact_path).resolve()
80
+
81
+ if not artifact_p.exists():
82
+ return DriftResult(
83
+ drifted=False,
84
+ artifact_path=artifact_path,
85
+ regen_command=regen_command,
86
+ error=f"Artifact not found: {artifact_path}",
87
+ )
88
+
89
+ # Snapshot the committed artifact before regen
90
+ try:
91
+ committed_text = artifact_p.read_text()
92
+ committed_doc = json.loads(committed_text)
93
+ except (OSError, json.JSONDecodeError) as e:
94
+ return DriftResult(
95
+ drifted=False,
96
+ artifact_path=artifact_path,
97
+ regen_command=regen_command,
98
+ error=f"Failed to read committed artifact: {e}",
99
+ )
100
+
101
+ # Parse the command safely — shell=False to avoid command injection.
102
+ # Users needing shell features (&&, |, env vars, etc.) should point
103
+ # generator_command at a script file instead of an inline chain.
104
+ try:
105
+ argv = shlex.split(regen_command)
106
+ except ValueError as e:
107
+ return DriftResult(
108
+ drifted=False,
109
+ artifact_path=artifact_path,
110
+ regen_command=regen_command,
111
+ error=f"Could not parse generator_command: {e}",
112
+ )
113
+ if not argv:
114
+ return DriftResult(
115
+ drifted=False,
116
+ artifact_path=artifact_path,
117
+ regen_command=regen_command,
118
+ error="generator_command is empty",
119
+ )
120
+ # Reject obvious shell metacharacters — force users to use a script
121
+ # file if they need chaining or redirection.
122
+ SHELL_META = set("&|;><`$")
123
+ if any(ch in token for token in argv for ch in SHELL_META):
124
+ return DriftResult(
125
+ drifted=False,
126
+ artifact_path=artifact_path,
127
+ regen_command=regen_command,
128
+ error="generator_command contains shell metacharacters (&|;><`$). Point it at a script file instead of chaining inline.",
129
+ )
130
+
131
+ # Run the regenerator
132
+ start = time.time()
133
+ try:
134
+ result = subprocess.run(
135
+ argv,
136
+ shell=False,
137
+ cwd=str(repo_root_p),
138
+ capture_output=True,
139
+ text=True,
140
+ timeout=timeout_seconds,
141
+ )
142
+ except subprocess.TimeoutExpired:
143
+ return DriftResult(
144
+ drifted=False,
145
+ artifact_path=artifact_path,
146
+ regen_command=regen_command,
147
+ error=f"Generator timed out after {timeout_seconds}s",
148
+ runtime_seconds=time.time() - start,
149
+ )
150
+ except FileNotFoundError as e:
151
+ return DriftResult(
152
+ drifted=False,
153
+ artifact_path=artifact_path,
154
+ regen_command=regen_command,
155
+ error=f"Generator executable not found: {e}",
156
+ runtime_seconds=time.time() - start,
157
+ )
158
+
159
+ runtime = time.time() - start
160
+
161
+ if result.returncode != 0:
162
+ return DriftResult(
163
+ drifted=False,
164
+ artifact_path=artifact_path,
165
+ regen_command=regen_command,
166
+ error=f"Generator exited {result.returncode}: {result.stderr.strip()[:500]}",
167
+ runtime_seconds=runtime,
168
+ )
169
+
170
+ # Read the regenerated artifact
171
+ try:
172
+ regen_text = artifact_p.read_text()
173
+ regen_doc = json.loads(regen_text)
174
+ except (OSError, json.JSONDecodeError) as e:
175
+ # Restore committed version so we don't leave the workspace dirty
176
+ artifact_p.write_text(committed_text)
177
+ return DriftResult(
178
+ drifted=False,
179
+ artifact_path=artifact_path,
180
+ regen_command=regen_command,
181
+ error=f"Failed to read regenerated artifact: {e}",
182
+ runtime_seconds=runtime,
183
+ )
184
+
185
+ # Restore the committed file before diffing — leave the workspace clean
186
+ artifact_p.write_text(committed_text)
187
+
188
+ # Quick equality check first
189
+ if committed_doc == regen_doc:
190
+ return DriftResult(
191
+ drifted=False,
192
+ artifact_path=artifact_path,
193
+ regen_command=regen_command,
194
+ runtime_seconds=runtime,
195
+ )
196
+
197
+ # Drift detected — classify the changes via the JSON Schema diff engine
198
+ from .json_schema_diff import JSONSchemaDiffEngine
199
+
200
+ engine = JSONSchemaDiffEngine()
201
+ changes = engine.compare(committed_doc, regen_doc)
202
+ return DriftResult(
203
+ drifted=True,
204
+ artifact_path=artifact_path,
205
+ regen_command=regen_command,
206
+ changes=changes,
207
+ runtime_seconds=runtime,
208
+ )
209
+
210
+
211
+ def format_drift_report(result: DriftResult) -> str:
212
+ """Render a drift report as a markdown block for PR comments."""
213
+ if result.error:
214
+ return (
215
+ f"### Generator drift check\n\n"
216
+ f"Artifact: `{result.artifact_path}` \n"
217
+ f"Status: error \n"
218
+ f"Detail: {result.error}\n"
219
+ )
220
+ if not result.drifted:
221
+ return (
222
+ f"### Generator drift check\n\n"
223
+ f"Artifact: `{result.artifact_path}` \n"
224
+ f"Status: clean (committed artifact matches generator output) \n"
225
+ f"Generator runtime: {result.runtime_seconds:.2f}s\n"
226
+ )
227
+ breaking = sum(1 for c in result.changes if c.is_breaking)
228
+ non_breaking = len(result.changes) - breaking
229
+ lines = [
230
+ "### Generator drift check",
231
+ "",
232
+ f"Artifact: `{result.artifact_path}` ",
233
+ f"Status: drifted ({len(result.changes)} change(s) — {breaking} breaking, {non_breaking} non-breaking) ",
234
+ f"Generator runtime: {result.runtime_seconds:.2f}s ",
235
+ "",
236
+ "The committed artifact does not match what the generator produces today. Re-run the generator and commit the result, or revert the source change.",
237
+ "",
238
+ ]
239
+ for c in result.changes:
240
+ marker = "breaking" if c.is_breaking else "ok"
241
+ lines.append(f"- [{marker}] {c.type.value} at `{c.path}` — {c.message}")
242
+ return "\n".join(lines) + "\n"