@alevental/cccp 0.4.3 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/runner.js CHANGED
@@ -8,7 +8,7 @@ import { ConsoleLogger } from "./logger.js";
8
8
  import { writeMcpConfigFile } from "./mcp/mcp-config.js";
9
9
  import { runAutoresearchCycle } from "./autoresearch.js";
10
10
  import { loadPipeline } from "./pipeline.js";
11
- import { runPgeCycle } from "./pge.js";
11
+ import { runPgeCycle, dispatchEvaluatorWithFeedback } from "./pge.js";
12
12
  import { interpolate, resolveTaskBody, loadAgentMarkdown, buildTaskContext, writeSystemPromptFile } from "./prompt.js";
13
13
  import { updatePipelineStatus, notifyPipelineComplete } from "./tui/cmux.js";
14
14
  import { createState, flattenStageEntries, saveState, saveStateWithEvent, updateStageStatus, finishPipeline, findResumePoint, } from "./state.js";
@@ -25,19 +25,15 @@ function getDispatcher(ctx) {
25
25
  // ---------------------------------------------------------------------------
26
26
  // Stage dispatch — type: agent
27
27
  // ---------------------------------------------------------------------------
28
- async function runAgentStage(stage, ctx) {
28
+ async function runAgentStage(stage, ctx, state) {
29
29
  const start = Date.now();
30
30
  const vars = { ...ctx.variables, ...(stage.variables ?? {}) };
31
31
  const taskDescription = await resolveTaskBody(stage, vars, `Execute stage: ${stage.name}`);
32
32
  const output = stage.output ? interpolate(stage.output, vars) : undefined;
33
33
  const inputs = stage.inputs?.map((i) => interpolate(i, vars));
34
- const userPrompt = buildTaskContext({
35
- task: taskDescription,
36
- inputs,
37
- output,
38
- });
39
34
  // In dry-run mode, try to resolve but don't fail if files are missing.
40
35
  if (ctx.dryRun) {
36
+ const userPrompt = buildTaskContext({ task: taskDescription, inputs, output });
41
37
  let resolvedPath = stage.agent;
42
38
  try {
43
39
  const resolved = await resolveAgent(stage.agent, ctx.agentSearchPaths, stage.operation, ctx.projectDir);
@@ -75,67 +71,156 @@ async function runAgentStage(stage, ctx) {
75
71
  const mcpConfigFile = ctx.projectConfig
76
72
  ? await writeMcpConfigFile(stage.mcp_profile, ctx.projectConfig, ctx.tempTracker)
77
73
  : undefined;
78
- const result = await getDispatcher(ctx).dispatch({
79
- userPrompt,
80
- systemPromptFile,
81
- mcpConfigFile,
82
- expectedOutput: output ? resolve(ctx.projectDir, output) : undefined,
83
- cwd: ctx.projectDir,
84
- allowedTools: stage.allowed_tools,
85
- agentName: stage.agent.replace(/[/\\]/g, "-").replace(/\.md$/, ""),
86
- streamLogDir: resolve(ctx.artifactDir, ".cccp"),
87
- claudeConfigDir: ctx.projectConfig?.claude_config_dir,
88
- permissionMode: ctx.projectConfig?.permission_mode,
89
- onActivity: (activity) => activityBus.emit("activity", activity),
90
- quiet: ctx.quiet,
91
- });
92
- if (result.exitCode !== 0) {
93
- throw new AgentCrashError(stage.agent, result.exitCode);
94
- }
95
- if (output && !result.outputExists) {
96
- throw new MissingOutputError(stage.agent, output);
74
+ let gateFeedbackPath;
75
+ let gateRetries = 0;
76
+ // Retry loop for human_review feedback.
77
+ while (true) {
78
+ const userPrompt = buildTaskContext({
79
+ task: taskDescription,
80
+ inputs,
81
+ output,
82
+ gateFeedback: gateFeedbackPath,
83
+ });
84
+ const result = await getDispatcher(ctx).dispatch({
85
+ userPrompt,
86
+ systemPromptFile,
87
+ mcpConfigFile,
88
+ expectedOutput: output ? resolve(ctx.projectDir, output) : undefined,
89
+ cwd: ctx.projectDir,
90
+ allowedTools: stage.allowed_tools,
91
+ agentName: stage.agent.replace(/[/\\]/g, "-").replace(/\.md$/, ""),
92
+ streamLogDir: resolve(ctx.artifactDir, ".cccp"),
93
+ claudeConfigDir: ctx.projectConfig?.claude_config_dir,
94
+ permissionMode: ctx.projectConfig?.permission_mode,
95
+ onActivity: (activity) => activityBus.emit("activity", activity),
96
+ quiet: ctx.quiet,
97
+ });
98
+ if (result.exitCode !== 0) {
99
+ throw new AgentCrashError(stage.agent, result.exitCode);
100
+ }
101
+ if (output && !result.outputExists) {
102
+ throw new MissingOutputError(stage.agent, output);
103
+ }
104
+ // Check human_review gate.
105
+ if (stage.human_review && ctx.gateStrategy) {
106
+ const reviewGate = {
107
+ stageName: stage.name,
108
+ status: "pending",
109
+ prompt: `Agent stage "${stage.name}" completed. Review the output and approve or reject with feedback.`,
110
+ };
111
+ state.gate = reviewGate;
112
+ await saveState(state);
113
+ const reviewResponse = await ctx.gateStrategy.waitForGate(reviewGate);
114
+ state.gate = undefined;
115
+ await saveState(state);
116
+ if (!reviewResponse.approved && reviewResponse.feedbackPath && gateRetries < MAX_GATE_RETRIES) {
117
+ gateRetries++;
118
+ getLogger(ctx).log(` human review rejected with feedback — retrying agent (retry ${gateRetries}/${MAX_GATE_RETRIES})`);
119
+ gateFeedbackPath = reviewResponse.feedbackPath;
120
+ continue;
121
+ }
122
+ if (!reviewResponse.approved) {
123
+ return {
124
+ stageName: stage.name,
125
+ status: "failed",
126
+ result,
127
+ error: gateRetries >= MAX_GATE_RETRIES
128
+ ? `Failed and max gate retries (${MAX_GATE_RETRIES}) reached`
129
+ : `Human review rejected${reviewResponse.feedback ? `: ${reviewResponse.feedback}` : ""}`,
130
+ durationMs: Date.now() - start,
131
+ };
132
+ }
133
+ getLogger(ctx).log(` human review approved`);
134
+ }
135
+ return {
136
+ stageName: stage.name,
137
+ status: "passed",
138
+ result,
139
+ durationMs: Date.now() - start,
140
+ };
97
141
  }
98
- return {
99
- stageName: stage.name,
100
- status: "passed",
101
- result,
102
- durationMs: Date.now() - start,
103
- };
104
142
  }
105
143
  // ---------------------------------------------------------------------------
106
144
  // Stage dispatch — type: pge
107
145
  // ---------------------------------------------------------------------------
146
+ const MAX_GATE_RETRIES = 3;
108
147
  async function runPgeStage(stage, ctx, state) {
109
148
  const start = Date.now();
110
- const pgeResult = await runPgeCycle(stage, ctx, state, async (eventType, eventData) => {
149
+ let pgeOptions;
150
+ let gateRetries = 0;
151
+ const onProgress = async (eventType, eventData) => {
111
152
  if (eventType) {
112
153
  await saveStateWithEvent(state, eventType, stage.name, eventData);
113
154
  }
114
155
  else {
115
156
  await saveState(state);
116
157
  }
117
- });
118
- if (pgeResult.outcome === "pass") {
119
- return {
120
- stageName: stage.name,
121
- status: "passed",
122
- result: pgeResult,
123
- durationMs: Date.now() - start,
124
- };
125
- }
126
- if (pgeResult.outcome === "error") {
127
- return {
128
- stageName: stage.name,
129
- status: "error",
130
- result: pgeResult,
131
- error: "Evaluation parse error",
132
- durationMs: Date.now() - start,
133
- };
134
- }
135
- // outcome === "fail" — apply escalation strategy
136
- const strategy = stage.on_fail ?? "stop";
137
- switch (strategy) {
138
- case "stop":
158
+ };
159
+ // Retry loop: runs the PGE cycle, handles escalation, and retries with feedback.
160
+ while (true) {
161
+ const pgeResult = await runPgeCycle(stage, ctx, state, onProgress, pgeOptions);
162
+ if (pgeResult.outcome === "pass") {
163
+ // Check human_review gate — fire a gate after PGE passes for human quality review.
164
+ if (stage.human_review && ctx.gateStrategy && !ctx.dryRun) {
165
+ const reviewGate = {
166
+ stageName: stage.name,
167
+ status: "pending",
168
+ prompt: `PGE stage "${stage.name}" passed evaluation. Review the deliverable and approve or reject with feedback.`,
169
+ };
170
+ state.gate = reviewGate;
171
+ await saveState(state);
172
+ const reviewResponse = await ctx.gateStrategy.waitForGate(reviewGate);
173
+ state.gate = undefined;
174
+ await saveState(state);
175
+ if (!reviewResponse.approved && reviewResponse.feedbackPath && gateRetries < MAX_GATE_RETRIES) {
176
+ gateRetries++;
177
+ getLogger(ctx).log(` human review rejected with feedback — dispatching evaluator with feedback (retry ${gateRetries}/${MAX_GATE_RETRIES})`);
178
+ // Route feedback through the evaluator to produce a structured FAIL evaluation.
179
+ const humanEvalPath = await dispatchEvaluatorWithFeedback(stage, ctx, state, pgeResult, reviewResponse.feedbackPath, onProgress);
180
+ // Re-enter GE loop with the human-mediated evaluation.
181
+ pgeOptions = {
182
+ existingContractPath: pgeResult.contractPath,
183
+ existingTaskPlanPath: pgeResult.taskPlanPath,
184
+ };
185
+ // Inject the human evaluation as the "last eval" by setting it as gate feedback
186
+ // so the generator sees it alongside any previous evaluations.
187
+ // Actually, we need the generator to see this as previousEvaluation, not gateFeedback.
188
+ // The cleanest way: pass it as gateFeedbackPath (the generator will read both).
189
+ pgeOptions.gateFeedbackPath = humanEvalPath;
190
+ continue;
191
+ }
192
+ if (!reviewResponse.approved) {
193
+ return {
194
+ stageName: stage.name,
195
+ status: "failed",
196
+ result: pgeResult,
197
+ error: gateRetries >= MAX_GATE_RETRIES
198
+ ? `Failed and max gate retries (${MAX_GATE_RETRIES}) reached`
199
+ : `Human review rejected${reviewResponse.feedback ? `: ${reviewResponse.feedback}` : ""}`,
200
+ durationMs: Date.now() - start,
201
+ };
202
+ }
203
+ getLogger(ctx).log(` human review approved`);
204
+ }
205
+ return {
206
+ stageName: stage.name,
207
+ status: "passed",
208
+ result: pgeResult,
209
+ durationMs: Date.now() - start,
210
+ };
211
+ }
212
+ if (pgeResult.outcome === "error") {
213
+ return {
214
+ stageName: stage.name,
215
+ status: "error",
216
+ result: pgeResult,
217
+ error: "Evaluation parse error",
218
+ durationMs: Date.now() - start,
219
+ };
220
+ }
221
+ // outcome === "fail" — apply escalation strategy
222
+ const strategy = stage.on_fail ?? "stop";
223
+ if (strategy === "stop") {
139
224
  return {
140
225
  stageName: stage.name,
141
226
  status: "failed",
@@ -143,7 +228,8 @@ async function runPgeStage(stage, ctx, state) {
143
228
  error: `Failed after ${pgeResult.iterations}/${pgeResult.maxIterations} iterations`,
144
229
  durationMs: Date.now() - start,
145
230
  };
146
- case "skip":
231
+ }
232
+ if (strategy === "skip") {
147
233
  getLogger(ctx).log(` escalation: skip — continuing pipeline`);
148
234
  return {
149
235
  stageName: stage.name,
@@ -151,44 +237,59 @@ async function runPgeStage(stage, ctx, state) {
151
237
  result: pgeResult,
152
238
  durationMs: Date.now() - start,
153
239
  };
154
- case "human_gate":
155
- if (!ctx.gateStrategy) {
156
- getLogger(ctx).log(` escalation: human_gate — no gate strategy, stopping`);
157
- return {
158
- stageName: stage.name,
159
- status: "failed",
160
- result: pgeResult,
161
- error: `Failed after ${pgeResult.iterations} iterations (no gate strategy configured)`,
162
- durationMs: Date.now() - start,
163
- };
164
- }
165
- getLogger(ctx).log(` escalation: human_gate — awaiting approval`);
166
- const gateInfo = {
240
+ }
241
+ // strategy === "human_gate"
242
+ if (!ctx.gateStrategy) {
243
+ getLogger(ctx).log(` escalation: human_gate — no gate strategy, stopping`);
244
+ return {
167
245
  stageName: stage.name,
168
- status: "pending",
169
- prompt: `PGE stage "${stage.name}" failed after ${pgeResult.iterations} iterations. Approve to continue or reject to stop.`,
246
+ status: "failed",
247
+ result: pgeResult,
248
+ error: `Failed after ${pgeResult.iterations} iterations (no gate strategy configured)`,
249
+ durationMs: Date.now() - start,
170
250
  };
171
- state.gate = gateInfo;
172
- await saveState(state);
173
- const gateResponse = await ctx.gateStrategy.waitForGate(gateInfo);
174
- state.gate = undefined;
175
- await saveState(state);
176
- if (gateResponse.approved) {
177
- getLogger(ctx).log(` gate approved — continuing pipeline`);
178
- return {
179
- stageName: stage.name,
180
- status: "skipped",
181
- result: pgeResult,
182
- durationMs: Date.now() - start,
183
- };
184
- }
251
+ }
252
+ getLogger(ctx).log(` escalation: human_gate — awaiting approval`);
253
+ const gateInfo = {
254
+ stageName: stage.name,
255
+ status: "pending",
256
+ prompt: `PGE stage "${stage.name}" failed after ${pgeResult.iterations} iterations. Approve to skip and continue, reject to stop, or reject with feedback to retry the generation cycle.`,
257
+ };
258
+ state.gate = gateInfo;
259
+ await saveState(state);
260
+ const gateResponse = await ctx.gateStrategy.waitForGate(gateInfo);
261
+ state.gate = undefined;
262
+ await saveState(state);
263
+ if (gateResponse.approved) {
264
+ getLogger(ctx).log(` gate approved — continuing pipeline`);
185
265
  return {
186
266
  stageName: stage.name,
187
- status: "failed",
267
+ status: "skipped",
188
268
  result: pgeResult,
189
- error: `Failed and gate rejected${gateResponse.feedback ? `: ${gateResponse.feedback}` : ""}`,
190
269
  durationMs: Date.now() - start,
191
270
  };
271
+ }
272
+ // Rejected — check for feedback retry
273
+ if (gateResponse.feedbackPath && gateRetries < MAX_GATE_RETRIES) {
274
+ gateRetries++;
275
+ getLogger(ctx).log(` gate rejected with feedback — retrying PGE cycle (retry ${gateRetries}/${MAX_GATE_RETRIES})`);
276
+ pgeOptions = {
277
+ gateFeedbackPath: gateResponse.feedbackPath,
278
+ existingContractPath: pgeResult.contractPath,
279
+ existingTaskPlanPath: pgeResult.taskPlanPath,
280
+ };
281
+ continue; // Re-enter the PGE cycle with feedback
282
+ }
283
+ // Rejected without feedback, or max retries reached
284
+ return {
285
+ stageName: stage.name,
286
+ status: "failed",
287
+ result: pgeResult,
288
+ error: gateRetries >= MAX_GATE_RETRIES
289
+ ? `Failed and max gate retries (${MAX_GATE_RETRIES}) reached`
290
+ : `Failed and gate rejected${gateResponse.feedback ? `: ${gateResponse.feedback}` : ""}`,
291
+ durationMs: Date.now() - start,
292
+ };
192
293
  }
193
294
  }
194
295
  // ---------------------------------------------------------------------------
@@ -196,38 +297,42 @@ async function runPgeStage(stage, ctx, state) {
196
297
  // ---------------------------------------------------------------------------
197
298
  async function runAutoresearchStage(stage, ctx, state) {
198
299
  const start = Date.now();
199
- const result = await runAutoresearchCycle(stage, ctx, state, async (eventType, eventData) => {
300
+ let arOptions;
301
+ let gateRetries = 0;
302
+ const onProgress = async (eventType, eventData) => {
200
303
  if (eventType) {
201
304
  await saveStateWithEvent(state, eventType, stage.name, eventData);
202
305
  }
203
306
  else {
204
307
  await saveState(state);
205
308
  }
206
- });
207
- if (result.outcome === "pass") {
208
- return {
209
- stageName: stage.name,
210
- status: "passed",
211
- result,
212
- durationMs: Date.now() - start,
213
- };
214
- }
215
- if (result.outcome === "error") {
216
- return {
217
- stageName: stage.name,
218
- status: "error",
219
- result,
220
- error: "Evaluation parse error",
221
- durationMs: Date.now() - start,
222
- };
223
- }
224
- // outcome === "fail" — apply escalation strategy
225
- const strategy = stage.on_fail ?? "stop";
226
- const iterLabel = result.maxIterations
227
- ? `${result.iterations}/${result.maxIterations}`
228
- : `${result.iterations}`;
229
- switch (strategy) {
230
- case "stop":
309
+ };
310
+ // Retry loop: runs autoresearch cycle, handles escalation, retries with feedback.
311
+ while (true) {
312
+ const result = await runAutoresearchCycle(stage, ctx, state, onProgress, arOptions);
313
+ if (result.outcome === "pass") {
314
+ return {
315
+ stageName: stage.name,
316
+ status: "passed",
317
+ result,
318
+ durationMs: Date.now() - start,
319
+ };
320
+ }
321
+ if (result.outcome === "error") {
322
+ return {
323
+ stageName: stage.name,
324
+ status: "error",
325
+ result,
326
+ error: "Evaluation parse error",
327
+ durationMs: Date.now() - start,
328
+ };
329
+ }
330
+ // outcome === "fail" — apply escalation strategy
331
+ const strategy = stage.on_fail ?? "stop";
332
+ const iterLabel = result.maxIterations
333
+ ? `${result.iterations}/${result.maxIterations}`
334
+ : `${result.iterations}`;
335
+ if (strategy === "stop") {
231
336
  return {
232
337
  stageName: stage.name,
233
338
  status: "failed",
@@ -235,7 +340,8 @@ async function runAutoresearchStage(stage, ctx, state) {
235
340
  error: `Failed after ${iterLabel} iterations`,
236
341
  durationMs: Date.now() - start,
237
342
  };
238
- case "skip":
343
+ }
344
+ if (strategy === "skip") {
239
345
  getLogger(ctx).log(` escalation: skip — continuing pipeline`);
240
346
  return {
241
347
  stageName: stage.name,
@@ -243,44 +349,55 @@ async function runAutoresearchStage(stage, ctx, state) {
243
349
  result,
244
350
  durationMs: Date.now() - start,
245
351
  };
246
- case "human_gate":
247
- if (!ctx.gateStrategy) {
248
- getLogger(ctx).log(` escalation: human_gate — no gate strategy, stopping`);
249
- return {
250
- stageName: stage.name,
251
- status: "failed",
252
- result,
253
- error: `Failed after ${iterLabel} iterations (no gate strategy configured)`,
254
- durationMs: Date.now() - start,
255
- };
256
- }
257
- getLogger(ctx).log(` escalation: human_gate — awaiting approval`);
258
- const gateInfo = {
352
+ }
353
+ // strategy === "human_gate"
354
+ if (!ctx.gateStrategy) {
355
+ getLogger(ctx).log(` escalation: human_gate — no gate strategy, stopping`);
356
+ return {
259
357
  stageName: stage.name,
260
- status: "pending",
261
- prompt: `Autoresearch stage "${stage.name}" failed after ${iterLabel} iterations. Approve to continue or reject to stop.`,
358
+ status: "failed",
359
+ result,
360
+ error: `Failed after ${iterLabel} iterations (no gate strategy configured)`,
361
+ durationMs: Date.now() - start,
262
362
  };
263
- state.gate = gateInfo;
264
- await saveState(state);
265
- const gateResponse = await ctx.gateStrategy.waitForGate(gateInfo);
266
- state.gate = undefined;
267
- await saveState(state);
268
- if (gateResponse.approved) {
269
- getLogger(ctx).log(` gate approved — continuing pipeline`);
270
- return {
271
- stageName: stage.name,
272
- status: "skipped",
273
- result,
274
- durationMs: Date.now() - start,
275
- };
276
- }
363
+ }
364
+ getLogger(ctx).log(` escalation: human_gate — awaiting approval`);
365
+ const gateInfo = {
366
+ stageName: stage.name,
367
+ status: "pending",
368
+ prompt: `Autoresearch stage "${stage.name}" failed after ${iterLabel} iterations. Approve to skip and continue, reject to stop, or reject with feedback to retry.`,
369
+ };
370
+ state.gate = gateInfo;
371
+ await saveState(state);
372
+ const gateResponse = await ctx.gateStrategy.waitForGate(gateInfo);
373
+ state.gate = undefined;
374
+ await saveState(state);
375
+ if (gateResponse.approved) {
376
+ getLogger(ctx).log(` gate approved — continuing pipeline`);
277
377
  return {
278
378
  stageName: stage.name,
279
- status: "failed",
379
+ status: "skipped",
280
380
  result,
281
- error: `Failed and gate rejected${gateResponse.feedback ? `: ${gateResponse.feedback}` : ""}`,
282
381
  durationMs: Date.now() - start,
283
382
  };
383
+ }
384
+ // Rejected — check for feedback retry
385
+ if (gateResponse.feedbackPath && gateRetries < MAX_GATE_RETRIES) {
386
+ gateRetries++;
387
+ getLogger(ctx).log(` gate rejected with feedback — retrying autoresearch cycle (retry ${gateRetries}/${MAX_GATE_RETRIES})`);
388
+ arOptions = { gateFeedbackPath: gateResponse.feedbackPath };
389
+ continue;
390
+ }
391
+ // Rejected without feedback, or max retries reached
392
+ return {
393
+ stageName: stage.name,
394
+ status: "failed",
395
+ result,
396
+ error: gateRetries >= MAX_GATE_RETRIES
397
+ ? `Failed and max gate retries (${MAX_GATE_RETRIES}) reached`
398
+ : `Failed and gate rejected${gateResponse.feedback ? `: ${gateResponse.feedback}` : ""}`,
399
+ durationMs: Date.now() - start,
400
+ };
284
401
  }
285
402
  }
286
403
  // ---------------------------------------------------------------------------
@@ -469,7 +586,7 @@ async function runPipelineStage(stage, ctx, state) {
469
586
  async function runStage(stage, ctx, state) {
470
587
  switch (stage.type) {
471
588
  case "agent":
472
- return runAgentStage(stage, ctx);
589
+ return runAgentStage(stage, ctx, state);
473
590
  case "pge":
474
591
  return runPgeStage(stage, ctx, state);
475
592
  case "human_gate":
@@ -660,7 +777,7 @@ async function runStages(ctx, existingState) {
660
777
  }
661
778
  }
662
779
  else {
663
- state = createState(ctx.pipeline.name, ctx.project, ctx.pipelineFile, flattenStageEntries(ctx.pipeline.stages), ctx.artifactDir, ctx.projectDir);
780
+ state = createState(ctx.pipeline.name, ctx.project, ctx.pipelineFile, flattenStageEntries(ctx.pipeline.stages), ctx.artifactDir, ctx.projectDir, ctx.sessionId);
664
781
  }
665
782
  if (!ctx.dryRun) {
666
783
  await saveState(state);
@@ -732,7 +849,7 @@ export async function runPipeline(ctx, opts) {
732
849
  // Create gate strategy if not provided.
733
850
  if (!ctx.gateStrategy && !ctx.headless && !ctx.dryRun) {
734
851
  // Need a runId — use existing state or generate one via createState.
735
- const tempState = opts?.existingState ?? createState(ctx.pipeline.name, ctx.project, ctx.pipelineFile, flattenStageEntries(ctx.pipeline.stages), ctx.artifactDir, ctx.projectDir);
852
+ const tempState = opts?.existingState ?? createState(ctx.pipeline.name, ctx.project, ctx.pipelineFile, flattenStageEntries(ctx.pipeline.stages), ctx.artifactDir, ctx.projectDir, ctx.sessionId);
736
853
  ctx.gateStrategy = new FilesystemGateStrategy(tempState.runId, ctx.projectDir, ctx.quiet);
737
854
  }
738
855
  // Initialize visited pipelines for cycle detection.