@dewtech/dare-cli 3.8.2 → 3.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (140) hide show
  1. package/README.md +18 -0
  2. package/dist/__tests__/ide-command-parity.test.js +1 -0
  3. package/dist/__tests__/ide-command-parity.test.js.map +1 -1
  4. package/dist/__tests__/secure-executor-regression.test.d.ts +2 -0
  5. package/dist/__tests__/secure-executor-regression.test.d.ts.map +1 -0
  6. package/dist/__tests__/secure-executor-regression.test.js +294 -0
  7. package/dist/__tests__/secure-executor-regression.test.js.map +1 -0
  8. package/dist/agent/__tests__/budget.test.d.ts +2 -0
  9. package/dist/agent/__tests__/budget.test.d.ts.map +1 -0
  10. package/dist/agent/__tests__/budget.test.js +36 -0
  11. package/dist/agent/__tests__/budget.test.js.map +1 -0
  12. package/dist/agent/__tests__/claude-driver.test.d.ts +2 -0
  13. package/dist/agent/__tests__/claude-driver.test.d.ts.map +1 -0
  14. package/dist/agent/__tests__/claude-driver.test.js +88 -0
  15. package/dist/agent/__tests__/claude-driver.test.js.map +1 -0
  16. package/dist/agent/__tests__/cost-telemetry.test.d.ts +2 -0
  17. package/dist/agent/__tests__/cost-telemetry.test.d.ts.map +1 -0
  18. package/dist/agent/__tests__/cost-telemetry.test.js +61 -0
  19. package/dist/agent/__tests__/cost-telemetry.test.js.map +1 -0
  20. package/dist/agent/__tests__/driver.types.test.d.ts +2 -0
  21. package/dist/agent/__tests__/driver.types.test.d.ts.map +1 -0
  22. package/dist/agent/__tests__/driver.types.test.js +33 -0
  23. package/dist/agent/__tests__/driver.types.test.js.map +1 -0
  24. package/dist/agent/__tests__/mock-driver.test.d.ts +2 -0
  25. package/dist/agent/__tests__/mock-driver.test.d.ts.map +1 -0
  26. package/dist/agent/__tests__/mock-driver.test.js +100 -0
  27. package/dist/agent/__tests__/mock-driver.test.js.map +1 -0
  28. package/dist/agent/__tests__/no-llm-in-core.test.d.ts +2 -0
  29. package/dist/agent/__tests__/no-llm-in-core.test.d.ts.map +1 -0
  30. package/dist/agent/__tests__/no-llm-in-core.test.js +46 -0
  31. package/dist/agent/__tests__/no-llm-in-core.test.js.map +1 -0
  32. package/dist/agent/budget.d.ts +15 -0
  33. package/dist/agent/budget.d.ts.map +1 -0
  34. package/dist/agent/budget.js +23 -0
  35. package/dist/agent/budget.js.map +1 -0
  36. package/dist/agent/driver.d.ts +46 -0
  37. package/dist/agent/driver.d.ts.map +1 -0
  38. package/dist/agent/driver.js +2 -0
  39. package/dist/agent/driver.js.map +1 -0
  40. package/dist/agent/drivers/claude.d.ts +43 -0
  41. package/dist/agent/drivers/claude.d.ts.map +1 -0
  42. package/dist/agent/drivers/claude.js +134 -0
  43. package/dist/agent/drivers/claude.js.map +1 -0
  44. package/dist/agent/drivers/mock.d.ts +3 -0
  45. package/dist/agent/drivers/mock.d.ts.map +1 -0
  46. package/dist/agent/drivers/mock.js +56 -0
  47. package/dist/agent/drivers/mock.js.map +1 -0
  48. package/dist/agent/drivers/noop.d.ts +3 -0
  49. package/dist/agent/drivers/noop.d.ts.map +1 -0
  50. package/dist/agent/drivers/noop.js +13 -0
  51. package/dist/agent/drivers/noop.js.map +1 -0
  52. package/dist/agent/telemetry.d.ts +4 -0
  53. package/dist/agent/telemetry.d.ts.map +1 -0
  54. package/dist/agent/telemetry.js +25 -0
  55. package/dist/agent/telemetry.js.map +1 -0
  56. package/dist/bin/dare.js +2 -0
  57. package/dist/bin/dare.js.map +1 -1
  58. package/dist/commands/__tests__/execute-agent.test.d.ts +2 -0
  59. package/dist/commands/__tests__/execute-agent.test.d.ts.map +1 -0
  60. package/dist/commands/__tests__/execute-agent.test.js +191 -0
  61. package/dist/commands/__tests__/execute-agent.test.js.map +1 -0
  62. package/dist/commands/__tests__/require-approval.test.d.ts +2 -0
  63. package/dist/commands/__tests__/require-approval.test.d.ts.map +1 -0
  64. package/dist/commands/__tests__/require-approval.test.js +158 -0
  65. package/dist/commands/__tests__/require-approval.test.js.map +1 -0
  66. package/dist/commands/execute.d.ts +68 -0
  67. package/dist/commands/execute.d.ts.map +1 -1
  68. package/dist/commands/execute.js +558 -2
  69. package/dist/commands/execute.js.map +1 -1
  70. package/dist/commands/guard.d.ts +13 -0
  71. package/dist/commands/guard.d.ts.map +1 -0
  72. package/dist/commands/guard.js +338 -0
  73. package/dist/commands/guard.js.map +1 -0
  74. package/dist/guard/__tests__/boundary.test.d.ts +2 -0
  75. package/dist/guard/__tests__/boundary.test.d.ts.map +1 -0
  76. package/dist/guard/__tests__/boundary.test.js +37 -0
  77. package/dist/guard/__tests__/boundary.test.js.map +1 -0
  78. package/dist/guard/__tests__/guard-command.test.d.ts +2 -0
  79. package/dist/guard/__tests__/guard-command.test.d.ts.map +1 -0
  80. package/dist/guard/__tests__/guard-command.test.js +88 -0
  81. package/dist/guard/__tests__/guard-command.test.js.map +1 -0
  82. package/dist/guard/__tests__/guard-config.test.d.ts +2 -0
  83. package/dist/guard/__tests__/guard-config.test.d.ts.map +1 -0
  84. package/dist/guard/__tests__/guard-config.test.js +34 -0
  85. package/dist/guard/__tests__/guard-config.test.js.map +1 -0
  86. package/dist/guard/__tests__/guard-integration.test.d.ts +2 -0
  87. package/dist/guard/__tests__/guard-integration.test.d.ts.map +1 -0
  88. package/dist/guard/__tests__/guard-integration.test.js +218 -0
  89. package/dist/guard/__tests__/guard-integration.test.js.map +1 -0
  90. package/dist/guard/__tests__/guard-types.test.d.ts +2 -0
  91. package/dist/guard/__tests__/guard-types.test.d.ts.map +1 -0
  92. package/dist/guard/__tests__/guard-types.test.js +28 -0
  93. package/dist/guard/__tests__/guard-types.test.js.map +1 -0
  94. package/dist/guard/__tests__/provenance.test.d.ts +2 -0
  95. package/dist/guard/__tests__/provenance.test.d.ts.map +1 -0
  96. package/dist/guard/__tests__/provenance.test.js +80 -0
  97. package/dist/guard/__tests__/provenance.test.js.map +1 -0
  98. package/dist/guard/__tests__/scan.test.d.ts +2 -0
  99. package/dist/guard/__tests__/scan.test.d.ts.map +1 -0
  100. package/dist/guard/__tests__/scan.test.js +62 -0
  101. package/dist/guard/__tests__/scan.test.js.map +1 -0
  102. package/dist/guard/__tests__/unicode.test.d.ts +2 -0
  103. package/dist/guard/__tests__/unicode.test.d.ts.map +1 -0
  104. package/dist/guard/__tests__/unicode.test.js +75 -0
  105. package/dist/guard/__tests__/unicode.test.js.map +1 -0
  106. package/dist/guard/boundary.d.ts +9 -0
  107. package/dist/guard/boundary.d.ts.map +1 -0
  108. package/dist/guard/boundary.js +17 -0
  109. package/dist/guard/boundary.js.map +1 -0
  110. package/dist/guard/config.d.ts +60 -0
  111. package/dist/guard/config.d.ts.map +1 -0
  112. package/dist/guard/config.js +64 -0
  113. package/dist/guard/config.js.map +1 -0
  114. package/dist/guard/pipeline.d.ts +13 -0
  115. package/dist/guard/pipeline.d.ts.map +1 -0
  116. package/dist/guard/pipeline.js +120 -0
  117. package/dist/guard/pipeline.js.map +1 -0
  118. package/dist/guard/provenance.d.ts +18 -0
  119. package/dist/guard/provenance.d.ts.map +1 -0
  120. package/dist/guard/provenance.js +108 -0
  121. package/dist/guard/provenance.js.map +1 -0
  122. package/dist/guard/scan.d.ts +6 -0
  123. package/dist/guard/scan.d.ts.map +1 -0
  124. package/dist/guard/scan.js +84 -0
  125. package/dist/guard/scan.js.map +1 -0
  126. package/dist/guard/types.d.ts +28 -0
  127. package/dist/guard/types.d.ts.map +1 -0
  128. package/dist/guard/types.js +2 -0
  129. package/dist/guard/types.js.map +1 -0
  130. package/dist/guard/unicode.d.ts +8 -0
  131. package/dist/guard/unicode.d.ts.map +1 -0
  132. package/dist/guard/unicode.js +126 -0
  133. package/dist/guard/unicode.js.map +1 -0
  134. package/dist/utils/project-generator.d.ts.map +1 -1
  135. package/dist/utils/project-generator.js +2 -0
  136. package/dist/utils/project-generator.js.map +1 -1
  137. package/package.json +4 -1
  138. package/templates/ide/antigravity/.agents/skills/dare-guard/SKILL.md +16 -0
  139. package/templates/ide/claude/.claude/commands/dare-guard.md +16 -0
  140. package/templates/ide/cursor/.cursor/commands/dare-guard.md +16 -0
@@ -2,22 +2,35 @@ import { Command } from 'commander';
2
2
  import chalk from 'chalk';
3
3
  import fs from 'fs-extra';
4
4
  import path from 'path';
5
+ import { createInterface } from 'node:readline/promises';
5
6
  import { applyCascadingSkip, buildTaskPrompt, computeRanks, markDone, markFailed, markRunning, nextExecutableTasks, renderCanvas, } from '../dag-runner/run_dag.js';
6
7
  import { convertYamlToDag } from '../utils/dag-converter.js';
7
8
  import { createGraph, loadGraphConfig } from '../graphrag/index.js';
8
- import { DEFAULT_STATE_PATH, loadAndApplyState, saveState, } from '../dag-runner/state-store.js';
9
+ import { DEFAULT_STATE_PATH, appendAttempt, getAttempts, loadAndApplyState, saveState, } from '../dag-runner/state-store.js';
9
10
  import { resolveStackFromConfig, runRalphLoop, } from '../dag-runner/ralph-loop.js';
10
11
  import { runReview } from '../utils/ReviewRunner.js';
11
12
  import { readProjectConfig } from '../utils/UpdateDetector.js';
12
13
  import { buildLocateContext, loadGraphLocateConfig } from '../dag-runner/graph-locate.js';
14
+ import { BudgetTracker } from '../agent/budget.js';
15
+ import { recordCostTelemetry } from '../agent/telemetry.js';
16
+ import { mockDriver } from '../agent/drivers/mock.js';
17
+ import { AgentSdkMissingError, createClaudeDriver, } from '../agent/drivers/claude.js';
13
18
  import { gateToAspect, loadVerificationConfig, recordFailureAndVerdict, runPostRalphVerification, shouldRunVerification, validateBestOf, validatePolicy, validateFormalBackend, applyPolicyOverride, resolveBestOfCount, } from './execute-verification.js';
19
+ import { parseVerificationConfig } from '../verification/config.js';
20
+ import { decideNextAction } from '../verification/decay/policy.js';
21
+ import { failureSignature } from '../verification/decay/signature.js';
14
22
  import { recordVerification, recordFormalProof } from '../verification/telemetry.js';
15
23
  import { runBestOfN } from '../verification/best-of-n/runner.js';
24
+ import { NoViableCandidateError, selectByPareto, } from '../verification/best-of-n/selector/pareto.js';
16
25
  import { createLogger } from '../utils/logger.js';
26
+ import { parseGuardConfig } from '../guard/config.js';
27
+ import { runGuardPipeline } from '../guard/pipeline.js';
28
+ import { loadSteeringFiles } from '../steering/loader.js';
17
29
  const execLog = createLogger('execute');
18
30
  export const executeCommand = new Command('execute')
19
31
  .description('Orchestrate DAG execution (the IDE agent runs each task)')
20
32
  .option('--dag <file>', 'Path to dare-dag.yaml', 'DARE/dare-dag.yaml')
33
+ .option('--agent', 'Run the autonomous executor loop', false)
21
34
  .option('--next', 'Print the next executable tasks (with composed prompts)', false)
22
35
  .option('--status', 'Render canvas and show summary (default action)', false)
23
36
  .option('--watch', 'Stream task readiness (re-print on every state change). Implies --next.', false)
@@ -40,6 +53,10 @@ export const executeCommand = new Command('execute')
40
53
  .option('--formal', 'Enable formal verification gate for this completion', false)
41
54
  .option('--no-formal', 'Skip formal verification even when enabled in config', false)
42
55
  .option('--formal-backend <backend>', 'Formal backend override (dafny|verus|lean)')
56
+ .option('--budget-tokens <n>', 'Token budget cap for --agent mode')
57
+ .option('--require-approval <mode>', 'Approval mode for --agent (rank|none)', 'rank')
58
+ .option('--on-fail <mode>', 'Action when a failed attempt does not resolve (replan|escalate|stop)', 'escalate')
59
+ .option('--dry-run', 'Use mock driver instead of Claude SDK driver', false)
43
60
  .action(async (options) => {
44
61
  const cwd = process.cwd();
45
62
  const dagPath = path.resolve(cwd, options.dag);
@@ -54,7 +71,10 @@ export const executeCommand = new Command('execute')
54
71
  await loadAndApplyState(dag, stateFile);
55
72
  const graph = options.noGraph ? undefined : await tryOpenGraph(cwd);
56
73
  try {
57
- if (options.complete) {
74
+ if (options.agent) {
75
+ await handleAgent(dag, options, stateFile, canvasPath, graph, cwd);
76
+ }
77
+ else if (options.complete) {
58
78
  await handleComplete(dag, options, stateFile, canvasPath, graph);
59
79
  }
60
80
  else if (options.fail) {
@@ -78,7 +98,543 @@ export const executeCommand = new Command('execute')
78
98
  await Promise.resolve(graph?.close());
79
99
  }
80
100
  });
101
+ const GUARD_FAIL_EXIT_CODE = 6;
102
+ const DEFAULT_AGENT_MODEL = 'claude-sonnet-4-5';
103
+ const ZERO_USAGE = {
104
+ inputTokens: 0,
105
+ outputTokens: 0,
106
+ costUsd: 0,
107
+ model: 'agent-error',
108
+ };
109
+ function normalizePreflightPath(rawPath) {
110
+ return rawPath
111
+ .replace(/\\/g, '/')
112
+ .replace(/^\.\/+/, '')
113
+ .replace(/\/{2,}/g, '/')
114
+ .replace(/^\/+/, '');
115
+ }
116
+ function inferBoundaryIntent(artifactPath) {
117
+ const normalized = normalizePreflightPath(artifactPath).toLowerCase();
118
+ if (normalized.includes('/hooks/'))
119
+ return 'execute-hook';
120
+ if (normalized.includes('/gates/') || normalized.endsWith('dare-dag.yaml')) {
121
+ return 'reorder-gate';
122
+ }
123
+ return 'read';
124
+ }
125
+ function preflightFailReason(verdict) {
126
+ const finding = verdict.findings.find((item) => item.severity === 'FAIL') ??
127
+ verdict.findings[0];
128
+ if (!finding)
129
+ return `${verdict.artifact}: guard preflight failed`;
130
+ return `${verdict.artifact}: [${finding.layer}:${finding.rule}] ${finding.evidence}`;
131
+ }
132
+ async function collectPreflightTargets(cwd, task) {
133
+ const targets = new Map();
134
+ if (task.spec_file && task.spec_file.trim().length > 0) {
135
+ targets.set(normalizePreflightPath(task.spec_file), 'spec');
136
+ }
137
+ for (const steering of loadSteeringFiles(cwd)) {
138
+ const normalized = normalizePreflightPath(steering.path);
139
+ if (!targets.has(normalized))
140
+ targets.set(normalized, 'steering');
141
+ }
142
+ return [...targets.entries()].map(([filePath, role]) => ({
143
+ path: filePath,
144
+ role,
145
+ }));
146
+ }
147
+ const defaultPreflightGuard = async (task, guardConfig) => {
148
+ if (!guardConfig.enabled || !guardConfig.onExecute) {
149
+ return { verdict: 'PASS', artifacts: [] };
150
+ }
151
+ const cwd = process.cwd();
152
+ let targets;
153
+ try {
154
+ targets = await collectPreflightTargets(cwd, task);
155
+ }
156
+ catch (err) {
157
+ return {
158
+ verdict: 'FAIL',
159
+ artifacts: [],
160
+ reason: err instanceof Error ? err.message : String(err),
161
+ };
162
+ }
163
+ const steeringArtifacts = [];
164
+ let overallVerdict = 'PASS';
165
+ for (const target of targets) {
166
+ const artifactAbsPath = path.resolve(cwd, target.path);
167
+ if (!(await fs.pathExists(artifactAbsPath)))
168
+ continue;
169
+ const stat = await fs.stat(artifactAbsPath);
170
+ if (!stat.isFile())
171
+ continue;
172
+ const content = await fs.readFile(artifactAbsPath);
173
+ const pipeline = runGuardPipeline(artifactAbsPath, content, guardConfig, {
174
+ cwd,
175
+ boundaryIntent: inferBoundaryIntent(target.path),
176
+ });
177
+ if (target.role === 'steering' && pipeline.result.verdict !== 'FAIL') {
178
+ steeringArtifacts.push(pipeline.artifact);
179
+ }
180
+ if (pipeline.result.verdict === 'FAIL') {
181
+ return {
182
+ verdict: 'FAIL',
183
+ artifacts: [],
184
+ reason: preflightFailReason(pipeline.result),
185
+ };
186
+ }
187
+ if (pipeline.result.verdict === 'WARN') {
188
+ overallVerdict = 'WARN';
189
+ }
190
+ }
191
+ return {
192
+ verdict: overallVerdict,
193
+ artifacts: steeringArtifacts,
194
+ };
195
+ };
196
+ const defaultRankApproval = async (rank, tasks) => {
197
+ const rl = createInterface({
198
+ input: process.stdin,
199
+ output: process.stdout,
200
+ });
201
+ try {
202
+ const answer = await rl.question(`Rank ${rank}: ${tasks.length} task(s). Proceder? [y/N] `);
203
+ const normalized = answer.trim().toLowerCase();
204
+ return normalized === 'y' || normalized === 'yes';
205
+ }
206
+ finally {
207
+ rl.close();
208
+ }
209
+ };
210
+ let preflightGuardImpl = defaultPreflightGuard;
211
+ let resolveDriverOverride = null;
212
+ let rankApprovalImpl = defaultRankApproval;
213
+ export function setPreflightGuardForTests(fn) {
214
+ preflightGuardImpl = fn ?? defaultPreflightGuard;
215
+ }
216
+ export function setResolveDriverForTests(fn) {
217
+ resolveDriverOverride = fn;
218
+ }
219
+ export function setRankApprovalForTests(fn) {
220
+ rankApprovalImpl = fn ?? defaultRankApproval;
221
+ }
222
+ export async function resolveDriver(args, config) {
223
+ if (resolveDriverOverride) {
224
+ return resolveDriverOverride(args, config);
225
+ }
226
+ if (args.dryRun)
227
+ return mockDriver;
228
+ return createClaudeDriver({
229
+ model: config.model,
230
+ apiKeyEnv: config.apiKeyEnv,
231
+ maxTokens: config.maxTokens,
232
+ });
233
+ }
234
+ function parseRequireApprovalMode(raw) {
235
+ if (!raw || raw === 'rank')
236
+ return 'rank';
237
+ if (raw === 'none')
238
+ return 'none';
239
+ return null;
240
+ }
241
+ function isInteractiveRuntime() {
242
+ return Boolean(process.stdin.isTTY && process.stdout.isTTY);
243
+ }
244
+ async function gateRank(rank, tasks, mode) {
245
+ if (mode === 'none')
246
+ return 'proceed';
247
+ const ok = await rankApprovalImpl(rank, tasks);
248
+ return ok ? 'proceed' : 'stop';
249
+ }
250
+ function parseOnFailMode(raw) {
251
+ if (!raw || raw === 'escalate')
252
+ return 'escalate';
253
+ if (raw === 'replan')
254
+ return 'replan';
255
+ if (raw === 'stop')
256
+ return 'stop';
257
+ return null;
258
+ }
259
+ function parsePositiveInt(value) {
260
+ if (value === undefined)
261
+ return undefined;
262
+ const parsed = Number.parseInt(value, 10);
263
+ if (!Number.isInteger(parsed) || parsed < 1)
264
+ return undefined;
265
+ return parsed;
266
+ }
267
+ function usageTotals(candidates) {
268
+ const totals = candidates.reduce((acc, candidate) => {
269
+ acc.inputTokens += candidate.run.usage.inputTokens;
270
+ acc.outputTokens += candidate.run.usage.outputTokens;
271
+ acc.costUsd += candidate.run.usage.costUsd;
272
+ acc.models.add(candidate.run.usage.model);
273
+ return acc;
274
+ }, { inputTokens: 0, outputTokens: 0, costUsd: 0, models: new Set() });
275
+ const model = totals.models.size === 1 ? (totals.models.values().next().value ?? 'unknown') : 'mixed';
276
+ return {
277
+ inputTokens: totals.inputTokens,
278
+ outputTokens: totals.outputTokens,
279
+ costUsd: Number(totals.costUsd.toFixed(6)),
280
+ model,
281
+ };
282
+ }
283
+ function verificationFromRun(taskId, run) {
284
+ if (run.status === 'implemented') {
285
+ return {
286
+ taskId,
287
+ passed: true,
288
+ aspects: [
289
+ { aspect: 'build', verdict: 'PASS', reason: 'agent candidate built', durationMs: 0 },
290
+ { aspect: 'test', verdict: 'PASS', reason: 'agent candidate tested', durationMs: 0 },
291
+ { aspect: 'lint', verdict: 'PASS', reason: 'agent candidate linted', durationMs: 0 },
292
+ ],
293
+ durationMs: 0,
294
+ };
295
+ }
296
+ const reason = run.status === 'aborted'
297
+ ? 'driver aborted candidate execution'
298
+ : (run.failureSignature ?? run.summary) || 'driver failed candidate execution';
299
+ return {
300
+ taskId,
301
+ passed: false,
302
+ aspects: [
303
+ {
304
+ aspect: 'test',
305
+ verdict: 'FAIL',
306
+ reason,
307
+ durationMs: 0,
308
+ },
309
+ ],
310
+ durationMs: 0,
311
+ };
312
+ }
313
+ function selectParetoCandidate(candidates) {
314
+ const mapped = candidates.map((candidate) => ({
315
+ id: candidate.id,
316
+ worktree: {
317
+ id: candidate.id,
318
+ path: candidate.run.worktree,
319
+ branch: `dare/agent/${candidate.id}`,
320
+ },
321
+ verification: candidate.verification,
322
+ }));
323
+ try {
324
+ const winner = selectByPareto(mapped);
325
+ return (candidates.find((candidate) => candidate.id === winner.id) ??
326
+ candidates[0]);
327
+ }
328
+ catch (err) {
329
+ if (err instanceof NoViableCandidateError) {
330
+ return candidates[0];
331
+ }
332
+ throw err;
333
+ }
334
+ }
335
+ function failureReasonFromVerification(result) {
336
+ const failed = result.aspects.filter((aspect) => aspect.verdict === 'FAIL');
337
+ if (failed.length === 0)
338
+ return 'candidate verification failed';
339
+ return failed.map((aspect) => `${aspect.aspect}: ${aspect.reason}`).join('\n');
340
+ }
341
+ function resolveAgentAction(action, onFail, budgetExhausted) {
342
+ if (action === 'DONE')
343
+ return 'DONE';
344
+ if (budgetExhausted)
345
+ return 'ESCALATE';
346
+ if (action !== 'CONTINUE')
347
+ return action;
348
+ if (onFail === 'replan')
349
+ return 'REPLAN';
350
+ if (onFail === 'stop')
351
+ return 'STOP';
352
+ return 'ESCALATE';
353
+ }
354
+ async function loadTaskSpec(cwd, task) {
355
+ if (!task.spec_file)
356
+ return task.subtask_prompt;
357
+ const specPath = path.resolve(cwd, task.spec_file);
358
+ if (!(await fs.pathExists(specPath)))
359
+ return task.subtask_prompt;
360
+ return fs.readFile(specPath, 'utf8');
361
+ }
362
+ async function loadAgentRuntimeConfig(cwd) {
363
+ let rawConfig = {};
364
+ try {
365
+ rawConfig = (await readProjectConfig(cwd));
366
+ }
367
+ catch {
368
+ rawConfig = {};
369
+ }
370
+ const guard = parseGuardConfig(rawConfig);
371
+ const agent = typeof rawConfig.agent === 'object' && rawConfig.agent !== null
372
+ ? rawConfig.agent
373
+ : {};
374
+ const model = typeof agent.model === 'string' && agent.model.trim().length > 0
375
+ ? agent.model
376
+ : DEFAULT_AGENT_MODEL;
377
+ const apiKeyEnv = typeof agent.apiKeyEnv === 'string' && agent.apiKeyEnv.trim().length > 0
378
+ ? agent.apiKeyEnv
379
+ : undefined;
380
+ const maxTokens = typeof agent.maxTokens === 'number' &&
381
+ Number.isInteger(agent.maxTokens) &&
382
+ agent.maxTokens > 0
383
+ ? agent.maxTokens
384
+ : undefined;
385
+ return { model, apiKeyEnv, maxTokens, guard };
386
+ }
387
+ async function loadVerificationConfigForAgent(cwd, options) {
388
+ try {
389
+ return await loadVerificationConfig(cwd, options.fullMutation, {
390
+ formal: options.formal,
391
+ noFormal: options.noFormal,
392
+ formalBackend: options.formalBackend,
393
+ });
394
+ }
395
+ catch (err) {
396
+ const msg = err instanceof Error ? err.message : String(err);
397
+ if (msg.includes('dare.config.json not found')) {
398
+ return parseVerificationConfig({});
399
+ }
400
+ throw err;
401
+ }
402
+ }
403
+ async function runAgentCandidates(args) {
404
+ const controller = new AbortController();
405
+ if (args.budget.exhausted())
406
+ controller.abort();
407
+ const runs = Array.from({ length: args.n }, async (_unused, idx) => {
408
+ const id = `cand-${idx + 1}`;
409
+ const worktree = path.resolve(args.cwd, '.dare', 'agent-worktrees', args.task.id, id);
410
+ await fs.ensureDir(worktree);
411
+ let run;
412
+ try {
413
+ run = await args.driver.run({
414
+ taskId: args.task.id,
415
+ spec: args.spec,
416
+ steering: args.steering,
417
+ worktree,
418
+ budgetRemaining: args.budget.remaining(),
419
+ signal: controller.signal,
420
+ });
421
+ }
422
+ catch (err) {
423
+ run = {
424
+ status: 'failed',
425
+ worktree,
426
+ summary: `driver threw: ${err instanceof Error ? err.message : String(err)}`,
427
+ usage: ZERO_USAGE,
428
+ failureSignature: err instanceof Error ? err.name : 'DriverError',
429
+ };
430
+ }
431
+ args.budget.add(run.usage);
432
+ if (args.budget.exhausted() && !controller.signal.aborted) {
433
+ controller.abort();
434
+ }
435
+ return {
436
+ id,
437
+ run,
438
+ verification: verificationFromRun(args.task.id, run),
439
+ };
440
+ });
441
+ return Promise.all(runs);
442
+ }
81
443
  // ─── Handlers ────────────────────────────────────────────────────────────────
444
+ async function handleAgent(dag, options, stateFile, canvasPath, graph, cwd) {
445
+ if (options.complete ||
446
+ options.fail ||
447
+ options.reset ||
448
+ options.next ||
449
+ options.watch ||
450
+ options.status) {
451
+ console.error(chalk.red('Error: --agent cannot be combined with --status/--next/--watch/--complete/--fail/--reset.'));
452
+ process.exit(1);
453
+ return;
454
+ }
455
+ const requireApproval = parseRequireApprovalMode(options.requireApproval);
456
+ if (!requireApproval) {
457
+ console.error(chalk.red(`Error: --require-approval must be 'rank' or 'none' (got '${options.requireApproval}')`));
458
+ process.exit(1);
459
+ return;
460
+ }
461
+ if (requireApproval === 'rank' && !isInteractiveRuntime()) {
462
+ console.error(chalk.red("Error: --require-approval rank needs an interactive TTY. Use '--require-approval none' in CI/non-interactive environments."));
463
+ process.exit(1);
464
+ return;
465
+ }
466
+ const onFail = parseOnFailMode(options.onFail);
467
+ if (!onFail) {
468
+ console.error(chalk.red(`Error: --on-fail must be 'replan', 'escalate' or 'stop' (got '${options.onFail}')`));
469
+ process.exit(1);
470
+ return;
471
+ }
472
+ if (options.policy) {
473
+ const policyErr = validatePolicy(options.policy);
474
+ if (policyErr) {
475
+ console.error(chalk.red(policyErr));
476
+ process.exit(1);
477
+ return;
478
+ }
479
+ }
480
+ if (options.formalBackend) {
481
+ const formalErr = validateFormalBackend(options.formalBackend);
482
+ if (formalErr) {
483
+ console.error(chalk.red(formalErr));
484
+ process.exit(1);
485
+ return;
486
+ }
487
+ }
488
+ let verificationConfig = await loadVerificationConfigForAgent(cwd, options);
489
+ if (options.policy) {
490
+ verificationConfig = applyPolicyOverride(verificationConfig, options.policy);
491
+ }
492
+ const bestOfFlag = parsePositiveInt(options.bestOf);
493
+ if (options.bestOf !== undefined && bestOfFlag === undefined) {
494
+ console.error(chalk.red(`Error: --best-of must be between 1 and ${verificationConfig.bestOfN.max} (got ${options.bestOf})`));
495
+ process.exit(1);
496
+ return;
497
+ }
498
+ const bestOfN = resolveBestOfCount(bestOfFlag, verificationConfig);
499
+ const bestOfErr = validateBestOf(bestOfN, verificationConfig.bestOfN.max);
500
+ if (bestOfErr) {
501
+ console.error(chalk.red(bestOfErr));
502
+ process.exit(1);
503
+ return;
504
+ }
505
+ const budgetTokens = options.budgetTokens
506
+ ? parsePositiveInt(options.budgetTokens)
507
+ : verificationConfig.bestOfN.budgetTokens;
508
+ if (options.budgetTokens !== undefined && budgetTokens === undefined) {
509
+ console.error(chalk.red(`Error: --budget-tokens must be a positive integer (got ${options.budgetTokens})`));
510
+ process.exit(1);
511
+ return;
512
+ }
513
+ const runtimeConfig = await loadAgentRuntimeConfig(cwd);
514
+ const budget = new BudgetTracker(budgetTokens ?? null);
515
+ while (true) {
516
+ const newlySkipped = applyCascadingSkip(dag);
517
+ if (newlySkipped.length > 0) {
518
+ console.log(chalk.gray(`↷ Auto-skipped ${newlySkipped.length} blocked task(s).`));
519
+ }
520
+ const ready = nextExecutableTasks(dag, true);
521
+ if (ready.length === 0)
522
+ break;
523
+ const rank = computeRanks(dag.tasks).get(ready[0].id) ?? 0;
524
+ const rankGate = await gateRank(rank, ready, requireApproval);
525
+ if (rankGate === 'stop') {
526
+ console.log(chalk.yellow(`⏸ Rank ${rank} paused by approval policy. State preserved as PENDING/RUNNING.`));
527
+ await persist(dag, stateFile, canvasPath);
528
+ return;
529
+ }
530
+ for (const task of ready) {
531
+ if (budget.exhausted()) {
532
+ console.error(chalk.red('❌ Budget exhausted before starting next task. Escalating.'));
533
+ await persist(dag, stateFile, canvasPath);
534
+ process.exit(1);
535
+ return;
536
+ }
537
+ markRunning(dag, task.id);
538
+ await persist(dag, stateFile, canvasPath);
539
+ const guarded = await preflightGuardImpl(task, runtimeConfig.guard);
540
+ if (guarded.verdict === 'FAIL') {
541
+ markFailed(dag, task.id, {
542
+ error: guarded.reason ?? 'guard preflight failed',
543
+ graph,
544
+ });
545
+ await persist(dag, stateFile, canvasPath);
546
+ process.exit(GUARD_FAIL_EXIT_CODE);
547
+ return;
548
+ }
549
+ let driver;
550
+ try {
551
+ driver = await resolveDriver(options, runtimeConfig);
552
+ }
553
+ catch (err) {
554
+ if (err instanceof AgentSdkMissingError) {
555
+ console.error(chalk.red(err.message));
556
+ await persist(dag, stateFile, canvasPath);
557
+ process.exit(1);
558
+ return;
559
+ }
560
+ throw err;
561
+ }
562
+ const spec = await loadTaskSpec(cwd, task);
563
+ const candidates = await runAgentCandidates({
564
+ cwd,
565
+ task,
566
+ spec,
567
+ steering: guarded.artifacts,
568
+ driver,
569
+ budget,
570
+ n: bestOfN,
571
+ });
572
+ const winner = selectParetoCandidate(candidates);
573
+ const totals = usageTotals(candidates);
574
+ const failedAspect = winner.verification.aspects.find((a) => a.verdict === 'FAIL')
575
+ ?.aspect;
576
+ const failureBody = failureReasonFromVerification(winner.verification);
577
+ const sig = winner.verification.passed
578
+ ? undefined
579
+ : winner.run.failureSignature ??
580
+ failureSignature({
581
+ failedAspect: failedAspect ?? 'test',
582
+ stderr: failureBody,
583
+ });
584
+ const current = await appendAttempt(cwd, task.id, {
585
+ at: new Date().toISOString(),
586
+ passed: winner.verification.passed,
587
+ failureSignature: sig,
588
+ failedAspect: winner.verification.passed ? undefined : failedAspect,
589
+ });
590
+ const history = await getAttempts(cwd, task.id);
591
+ const verdict = decideNextAction({
592
+ result: winner.verification,
593
+ current,
594
+ history,
595
+ loop: verificationConfig.loop,
596
+ });
597
+ const action = resolveAgentAction(verdict.action, onFail, budget.exhausted());
598
+ if (action === 'DONE') {
599
+ markDone(dag, task.id, {
600
+ output: winner.run.summary,
601
+ tokens: totals.inputTokens + totals.outputTokens,
602
+ durationMs: winner.verification.durationMs,
603
+ graph,
604
+ });
605
+ if (graph) {
606
+ try {
607
+ recordCostTelemetry(graph, task.id, totals, candidates.length);
608
+ }
609
+ catch (err) {
610
+ execLog.warn({ err: err instanceof Error ? err.message : String(err), taskId: task.id }, 'cost telemetry write failed (best-effort)');
611
+ }
612
+ }
613
+ await persist(dag, stateFile, canvasPath);
614
+ continue;
615
+ }
616
+ markFailed(dag, task.id, {
617
+ error: `${action}: ${verdict.reason}`,
618
+ durationMs: winner.verification.durationMs,
619
+ graph,
620
+ });
621
+ if (graph) {
622
+ try {
623
+ recordCostTelemetry(graph, task.id, totals, candidates.length);
624
+ }
625
+ catch (err) {
626
+ execLog.warn({ err: err instanceof Error ? err.message : String(err), taskId: task.id }, 'cost telemetry write failed (best-effort)');
627
+ }
628
+ }
629
+ await persist(dag, stateFile, canvasPath);
630
+ console.error(chalk.red(`❌ ${task.id} ${action} — ${verdict.reason}`));
631
+ process.exit(1);
632
+ return;
633
+ }
634
+ }
635
+ await persist(dag, stateFile, canvasPath);
636
+ console.log(chalk.green('✅ Autonomous agent loop completed all executable tasks.'));
637
+ }
82
638
  async function handleNext(dag, options, stateFile, canvasPath, graph, cwd) {
83
639
  const newlySkipped = applyCascadingSkip(dag);
84
640
  if (newlySkipped.length > 0) {