planpong 0.3.0 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/src/config/defaults.js +1 -0
- package/dist/src/config/defaults.js.map +1 -1
- package/dist/src/config/loader.d.ts +1 -0
- package/dist/src/config/loader.js +3 -0
- package/dist/src/config/loader.js.map +1 -1
- package/dist/src/core/apply-edits.d.ts +40 -0
- package/dist/src/core/apply-edits.js +220 -0
- package/dist/src/core/apply-edits.js.map +1 -0
- package/dist/src/core/convergence.d.ts +57 -4
- package/dist/src/core/convergence.js +134 -6
- package/dist/src/core/convergence.js.map +1 -1
- package/dist/src/core/loop.js +3 -3
- package/dist/src/core/loop.js.map +1 -1
- package/dist/src/core/operations.d.ts +14 -1
- package/dist/src/core/operations.js +592 -56
- package/dist/src/core/operations.js.map +1 -1
- package/dist/src/core/plan-diff.d.ts +23 -0
- package/dist/src/core/plan-diff.js +135 -0
- package/dist/src/core/plan-diff.js.map +1 -0
- package/dist/src/core/session.d.ts +11 -0
- package/dist/src/core/session.js +51 -1
- package/dist/src/core/session.js.map +1 -1
- package/dist/src/mcp/tools/get-feedback.d.ts +16 -0
- package/dist/src/mcp/tools/get-feedback.js +118 -114
- package/dist/src/mcp/tools/get-feedback.js.map +1 -1
- package/dist/src/mcp/tools/revise.d.ts +16 -0
- package/dist/src/mcp/tools/revise.js +76 -61
- package/dist/src/mcp/tools/revise.js.map +1 -1
- package/dist/src/mcp/tools/status.js +15 -1
- package/dist/src/mcp/tools/status.js.map +1 -1
- package/dist/src/prompts/planner.d.ts +34 -1
- package/dist/src/prompts/planner.js +272 -17
- package/dist/src/prompts/planner.js.map +1 -1
- package/dist/src/prompts/reviewer.d.ts +14 -1
- package/dist/src/prompts/reviewer.js +84 -1
- package/dist/src/prompts/reviewer.js.map +1 -1
- package/dist/src/providers/claude.d.ts +3 -0
- package/dist/src/providers/claude.js +151 -13
- package/dist/src/providers/claude.js.map +1 -1
- package/dist/src/providers/codex.d.ts +3 -0
- package/dist/src/providers/codex.js +150 -14
- package/dist/src/providers/codex.js.map +1 -1
- package/dist/src/providers/types.d.ts +69 -3
- package/dist/src/schemas/config.d.ts +3 -0
- package/dist/src/schemas/config.js +6 -0
- package/dist/src/schemas/config.js.map +1 -1
- package/dist/src/schemas/json-schema.d.ts +21 -0
- package/dist/src/schemas/json-schema.js +172 -0
- package/dist/src/schemas/json-schema.js.map +1 -0
- package/dist/src/schemas/metrics.d.ts +171 -0
- package/dist/src/schemas/metrics.js +49 -0
- package/dist/src/schemas/metrics.js.map +1 -0
- package/dist/src/schemas/revision.d.ts +166 -2
- package/dist/src/schemas/revision.js +35 -2
- package/dist/src/schemas/revision.js.map +1 -1
- package/dist/src/schemas/session.d.ts +6 -0
- package/dist/src/schemas/session.js +10 -0
- package/dist/src/schemas/session.js.map +1 -1
- package/package.json +4 -2
|
@@ -1,10 +1,15 @@
|
|
|
1
1
|
import { createHash } from "node:crypto";
|
|
2
2
|
import { readFileSync, writeFileSync, existsSync } from "node:fs";
|
|
3
3
|
import { relative, resolve } from "node:path";
|
|
4
|
-
import {
|
|
5
|
-
import {
|
|
6
|
-
import {
|
|
7
|
-
import {
|
|
4
|
+
import { isEditsRevision, isDirectionRevision, } from "../schemas/revision.js";
|
|
5
|
+
import { buildRevisionPrompt, buildEditsRetryPrompt, } from "../prompts/planner.js";
|
|
6
|
+
import { buildReviewPrompt, buildIncrementalReviewPrompt, formatPriorDecisions, getReviewPhase, } from "../prompts/reviewer.js";
|
|
7
|
+
import { buildPlanDiff } from "./plan-diff.js";
|
|
8
|
+
import { parseFeedbackForPhase, parseRevision, parseStructuredFeedbackForPhase, parseStructuredRevision, isConverged, StructuredOutputParseError, ZodValidationError, } from "./convergence.js";
|
|
9
|
+
import { getFeedbackJsonSchemaForPhase, getRevisionJsonSchema, } from "../schemas/json-schema.js";
|
|
10
|
+
import { applyEdits, logFailures, summarizeApply, } from "./apply-edits.js";
|
|
11
|
+
import { createSession, writeSessionState, writeRoundFeedback, writeRoundResponse, readRoundFeedback, readRoundResponse, writeInitialPlan, writeRoundMetrics, writeRoundPlanSnapshot, readRoundPlanSnapshot, } from "./session.js";
|
|
12
|
+
import { summarizeTiming, } from "../schemas/metrics.js";
|
|
8
13
|
// --- Utility functions ---
|
|
9
14
|
export function hashFile(path) {
|
|
10
15
|
const content = readFileSync(path, "utf-8");
|
|
@@ -226,6 +231,216 @@ function buildPriorDecisions(cwd, sessionId, currentRound) {
|
|
|
226
231
|
return null;
|
|
227
232
|
return formatPriorDecisions(priorRounds);
|
|
228
233
|
}
|
|
234
|
+
/**
|
|
235
|
+
* Invocation state machine — single owner of all retry/downgrade logic for
|
|
236
|
+
* provider invocations. Providers are single-shot; this function decides
|
|
237
|
+
* when to downgrade from structured output to legacy mode.
|
|
238
|
+
*
|
|
239
|
+
* Strict 2-attempt cap: structured (1) -> legacy fallback (1) -> terminal.
|
|
240
|
+
*
|
|
241
|
+
* Failure handling:
|
|
242
|
+
* - Provider `capability` error in structured mode → downgrade
|
|
243
|
+
* - Provider `fatal` error → terminal (no downgrade)
|
|
244
|
+
* - JSON.parse failure on structured output → downgrade
|
|
245
|
+
* - Zod validation failure on structured output → terminal (NOT retried)
|
|
246
|
+
* - Any failure in legacy mode → terminal
|
|
247
|
+
*
|
|
248
|
+
* Observability: when `metricsContext` is provided, each attempt emits a
|
|
249
|
+
* start/end line to stderr, collects `InvocationAttempt` records, and
|
|
250
|
+
* persists a `RoundMetrics` file in the session directory. All telemetry
|
|
251
|
+
* I/O is fail-open — failures log a warning and are swallowed, never
|
|
252
|
+
* altering the invocation outcome. The in-memory metrics object is
|
|
253
|
+
* returned alongside the result so callers get timing data without a
|
|
254
|
+
* filesystem round-trip.
|
|
255
|
+
*/
|
|
256
|
+
async function invokeWithStateMachine(args) {
|
|
257
|
+
const { provider, invokeOptions, jsonSchema, buildPrompt, parseStructured, parseLegacy, roundLabel, metricsContext, } = args;
|
|
258
|
+
const supported = await provider.checkStructuredOutputSupport();
|
|
259
|
+
let mode = supported ? "structured" : "legacy";
|
|
260
|
+
let attempt = 0;
|
|
261
|
+
const maxAttempts = 2;
|
|
262
|
+
let lastError = null;
|
|
263
|
+
// Metrics collection — only active when metricsContext is provided.
|
|
264
|
+
const attempts = [];
|
|
265
|
+
const startedAt = new Date().toISOString();
|
|
266
|
+
const startedAtMs = Date.now();
|
|
267
|
+
const providerLabel = buildProviderLabel(provider.name, invokeOptions.model, invokeOptions.effort);
|
|
268
|
+
const writeMetricsNow = () => {
|
|
269
|
+
if (!metricsContext)
|
|
270
|
+
return;
|
|
271
|
+
try {
|
|
272
|
+
const metrics = {
|
|
273
|
+
schema_version: 1,
|
|
274
|
+
session_id: metricsContext.sessionId,
|
|
275
|
+
round: metricsContext.round,
|
|
276
|
+
phase: metricsContext.phase,
|
|
277
|
+
role: metricsContext.role,
|
|
278
|
+
started_at: startedAt,
|
|
279
|
+
completed_at: new Date().toISOString(),
|
|
280
|
+
total_duration_ms: Date.now() - startedAtMs,
|
|
281
|
+
attempts,
|
|
282
|
+
};
|
|
283
|
+
writeRoundMetrics(invokeOptions.cwd, metricsContext.sessionId, metricsContext.round, metricsContext.role, metrics);
|
|
284
|
+
}
|
|
285
|
+
catch {
|
|
286
|
+
// writeRoundMetrics is already fail-open; catch here belts-and-braces
|
|
287
|
+
// against unexpected synchronous errors building the metrics object.
|
|
288
|
+
}
|
|
289
|
+
};
|
|
290
|
+
const buildMetrics = () => {
|
|
291
|
+
if (!metricsContext)
|
|
292
|
+
return null;
|
|
293
|
+
try {
|
|
294
|
+
return {
|
|
295
|
+
schema_version: 1,
|
|
296
|
+
session_id: metricsContext.sessionId,
|
|
297
|
+
round: metricsContext.round,
|
|
298
|
+
phase: metricsContext.phase,
|
|
299
|
+
role: metricsContext.role,
|
|
300
|
+
started_at: startedAt,
|
|
301
|
+
completed_at: new Date().toISOString(),
|
|
302
|
+
total_duration_ms: Date.now() - startedAtMs,
|
|
303
|
+
attempts,
|
|
304
|
+
};
|
|
305
|
+
}
|
|
306
|
+
catch {
|
|
307
|
+
return null;
|
|
308
|
+
}
|
|
309
|
+
};
|
|
310
|
+
try {
|
|
311
|
+
while (attempt < maxAttempts) {
|
|
312
|
+
attempt++;
|
|
313
|
+
const prompt = buildPrompt(mode === "structured");
|
|
314
|
+
const promptChars = prompt.length;
|
|
315
|
+
const promptLines = prompt.split("\n").length;
|
|
316
|
+
const options = mode === "structured"
|
|
317
|
+
? { ...invokeOptions, jsonSchema }
|
|
318
|
+
: { ...invokeOptions };
|
|
319
|
+
logStart(roundLabel, providerLabel, mode, promptChars, metricsContext);
|
|
320
|
+
const response = await provider.invoke(prompt, options);
|
|
321
|
+
// Base attempt record — filled in below.
|
|
322
|
+
const attemptRecord = {
|
|
323
|
+
mode,
|
|
324
|
+
provider: provider.name,
|
|
325
|
+
model: invokeOptions.model ?? null,
|
|
326
|
+
effort: invokeOptions.effort ?? null,
|
|
327
|
+
prompt_chars: promptChars,
|
|
328
|
+
prompt_lines: promptLines,
|
|
329
|
+
output_chars: null,
|
|
330
|
+
output_lines: null,
|
|
331
|
+
duration_ms: response.duration ?? 0,
|
|
332
|
+
ok: false,
|
|
333
|
+
error_kind: null,
|
|
334
|
+
error_exit_code: null,
|
|
335
|
+
};
|
|
336
|
+
if (!response.ok) {
|
|
337
|
+
attemptRecord.ok = false;
|
|
338
|
+
attemptRecord.error_kind = response.error.kind;
|
|
339
|
+
attemptRecord.error_exit_code = response.error.exitCode;
|
|
340
|
+
attempts.push(attemptRecord);
|
|
341
|
+
logEnd(roundLabel, providerLabel, mode, promptChars, null, response.duration ?? 0, false, `${response.error.kind}: ${truncate(response.error.message, 200)}`, metricsContext);
|
|
342
|
+
if (mode === "structured" &&
|
|
343
|
+
response.error.kind === "capability" &&
|
|
344
|
+
attempt < maxAttempts) {
|
|
345
|
+
provider.markNonCapable();
|
|
346
|
+
mode = "legacy";
|
|
347
|
+
continue;
|
|
348
|
+
}
|
|
349
|
+
// Fatal, or already in legacy mode — terminal
|
|
350
|
+
throw new Error(`${roundLabel} failed (exit ${response.error.exitCode}, ${response.error.kind}):\n${response.error.message}`);
|
|
351
|
+
}
|
|
352
|
+
// Provider returned output — record output size, try to parse.
|
|
353
|
+
const outputChars = response.output.length;
|
|
354
|
+
const outputLines = response.output.split("\n").length;
|
|
355
|
+
attemptRecord.output_chars = outputChars;
|
|
356
|
+
attemptRecord.output_lines = outputLines;
|
|
357
|
+
try {
|
|
358
|
+
const parsed = mode === "structured"
|
|
359
|
+
? parseStructured(response.output)
|
|
360
|
+
: parseLegacy(response.output);
|
|
361
|
+
attemptRecord.ok = true;
|
|
362
|
+
attempts.push(attemptRecord);
|
|
363
|
+
logEnd(roundLabel, providerLabel, mode, promptChars, outputChars, response.duration ?? 0, true, null, metricsContext);
|
|
364
|
+
return {
|
|
365
|
+
result: parsed,
|
|
366
|
+
metrics: buildMetrics(),
|
|
367
|
+
sessionId: response.ok ? response.sessionId : undefined,
|
|
368
|
+
};
|
|
369
|
+
}
|
|
370
|
+
catch (parseError) {
|
|
371
|
+
lastError = parseError instanceof Error ? parseError : new Error(String(parseError));
|
|
372
|
+
// Zod validation failure on structured output is terminal — the model
|
|
373
|
+
// produced semantically invalid content, retrying won't help.
|
|
374
|
+
if (parseError instanceof ZodValidationError) {
|
|
375
|
+
attemptRecord.ok = false;
|
|
376
|
+
attemptRecord.error_kind = "zod";
|
|
377
|
+
attempts.push(attemptRecord);
|
|
378
|
+
logEnd(roundLabel, providerLabel, mode, promptChars, outputChars, response.duration ?? 0, false, `zod: ${truncate(lastError.message, 200)}`, metricsContext);
|
|
379
|
+
throw parseError;
|
|
380
|
+
}
|
|
381
|
+
// JSON.parse failure on structured output triggers downgrade
|
|
382
|
+
if (mode === "structured" &&
|
|
383
|
+
parseError instanceof StructuredOutputParseError &&
|
|
384
|
+
attempt < maxAttempts) {
|
|
385
|
+
attemptRecord.ok = false;
|
|
386
|
+
attemptRecord.error_kind = "parse";
|
|
387
|
+
attempts.push(attemptRecord);
|
|
388
|
+
logEnd(roundLabel, providerLabel, mode, promptChars, outputChars, response.duration ?? 0, false, `parse: ${truncate(lastError.message, 200)}`, metricsContext);
|
|
389
|
+
provider.markNonCapable();
|
|
390
|
+
mode = "legacy";
|
|
391
|
+
continue;
|
|
392
|
+
}
|
|
393
|
+
// Legacy parse failure — terminal
|
|
394
|
+
attemptRecord.ok = false;
|
|
395
|
+
attemptRecord.error_kind = "parse";
|
|
396
|
+
attempts.push(attemptRecord);
|
|
397
|
+
logEnd(roundLabel, providerLabel, mode, promptChars, outputChars, response.duration ?? 0, false, `parse: ${truncate(lastError.message, 200)}`, metricsContext);
|
|
398
|
+
throw new Error(`${roundLabel} parse failed in ${mode} mode: ${lastError.message}`);
|
|
399
|
+
}
|
|
400
|
+
}
|
|
401
|
+
// Unreachable in normal flow — defensive
|
|
402
|
+
throw lastError ?? new Error(`${roundLabel} exhausted all attempts`);
|
|
403
|
+
}
|
|
404
|
+
finally {
|
|
405
|
+
// Persist metrics on every exit (success or throw). Fail-open — this
|
|
406
|
+
// never throws; writeRoundMetrics catches its own errors.
|
|
407
|
+
writeMetricsNow();
|
|
408
|
+
}
|
|
409
|
+
}
|
|
410
|
+
function buildProviderLabel(providerName, model, effort) {
|
|
411
|
+
return formatProviderLabel({
|
|
412
|
+
provider: providerName,
|
|
413
|
+
model: model ?? undefined,
|
|
414
|
+
effort: effort ?? undefined,
|
|
415
|
+
});
|
|
416
|
+
}
|
|
417
|
+
function truncate(text, max) {
|
|
418
|
+
return text.length > max ? text.slice(0, max) : text;
|
|
419
|
+
}
|
|
420
|
+
function safeStderr(line) {
|
|
421
|
+
try {
|
|
422
|
+
process.stderr.write(line);
|
|
423
|
+
}
|
|
424
|
+
catch {
|
|
425
|
+
// stderr unavailable — nothing else we can do
|
|
426
|
+
}
|
|
427
|
+
}
|
|
428
|
+
function logStart(roundLabel, providerLabel, mode, promptChars, ctx) {
|
|
429
|
+
if (!ctx)
|
|
430
|
+
return;
|
|
431
|
+
safeStderr(`[planpong] R${ctx.round} ${ctx.role} | ${providerLabel} | ${mode} | prompt=${promptChars}c\n`);
|
|
432
|
+
}
|
|
433
|
+
function logEnd(roundLabel, providerLabel, mode, promptChars, outputChars, durationMs, ok, failDetail, ctx) {
|
|
434
|
+
if (!ctx)
|
|
435
|
+
return;
|
|
436
|
+
const durationStr = formatDuration(durationMs);
|
|
437
|
+
if (ok && outputChars !== null) {
|
|
438
|
+
safeStderr(`[planpong] R${ctx.round} ${ctx.role} | ${providerLabel} | ${mode} | prompt=${promptChars}c output=${outputChars}c duration=${durationStr} | ok\n`);
|
|
439
|
+
}
|
|
440
|
+
else {
|
|
441
|
+
safeStderr(`[planpong] R${ctx.round} ${ctx.role} | ${providerLabel} | ${mode} | prompt=${promptChars}c duration=${durationStr} | fail (${failDetail ?? "unknown"})\n`);
|
|
442
|
+
}
|
|
443
|
+
}
|
|
229
444
|
/**
|
|
230
445
|
* Run a single review round: send current plan to the reviewer for critique.
|
|
231
446
|
*/
|
|
@@ -235,34 +450,68 @@ export async function runReviewRound(session, cwd, config, reviewerProvider) {
|
|
|
235
450
|
const planContent = readFileSync(planPath, "utf-8");
|
|
236
451
|
const phase = getReviewPhase(round);
|
|
237
452
|
const priorDecisions = buildPriorDecisions(cwd, session.id, round);
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
//
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
453
|
+
// Persist a snapshot of the plan as the reviewer is about to see it. On
|
|
454
|
+
// round N+1 we'll diff against this snapshot to produce the incremental
|
|
455
|
+
// "what changed" content for the resumed reviewer session.
|
|
456
|
+
writeRoundPlanSnapshot(cwd, session.id, round, planContent);
|
|
457
|
+
// Reviewer-side persistent sessions. Both claude and codex support this:
|
|
458
|
+
// - claude: we generate the UUID and pass it via --session-id (first)
|
|
459
|
+
// or --resume (subsequent).
|
|
460
|
+
// - codex: codex generates its own thread_id; we capture it from the
|
|
461
|
+
// `--json` event stream and pass it via `codex exec resume <id>`
|
|
462
|
+
// on subsequent calls.
|
|
463
|
+
// The canonical reviewer session ID is `session.reviewerSessionId` — for
|
|
464
|
+
// claude this is the pre-generated UUID; for codex it's overwritten
|
|
465
|
+
// after the first call with the captured thread_id.
|
|
466
|
+
const reviewerSessionInited = session.reviewerSessionInitialized === true;
|
|
467
|
+
const isResumedReviewerSession = reviewerSessionInited;
|
|
468
|
+
const priorPlanContent = isResumedReviewerSession
|
|
469
|
+
? readRoundPlanSnapshot(cwd, session.id, round - 1)
|
|
470
|
+
: null;
|
|
471
|
+
const planDiff = priorPlanContent
|
|
472
|
+
? buildPlanDiff(priorPlanContent, planContent)
|
|
473
|
+
: null;
|
|
474
|
+
const newSessionId = !reviewerSessionInited && reviewerProvider.name === "claude"
|
|
475
|
+
? session.reviewerSessionId
|
|
476
|
+
: undefined;
|
|
477
|
+
const resumeSessionId = reviewerSessionInited
|
|
478
|
+
? session.reviewerSessionId
|
|
479
|
+
: undefined;
|
|
480
|
+
const { result: feedback, metrics, sessionId: capturedSessionId, } = await invokeWithStateMachine({
|
|
481
|
+
provider: reviewerProvider,
|
|
482
|
+
invokeOptions: {
|
|
257
483
|
cwd,
|
|
258
484
|
model: config.reviewer.model,
|
|
259
485
|
effort: config.reviewer.effort,
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
486
|
+
newSessionId,
|
|
487
|
+
resumeSessionId,
|
|
488
|
+
},
|
|
489
|
+
jsonSchema: getFeedbackJsonSchemaForPhase(phase),
|
|
490
|
+
buildPrompt: (structuredOutput) => isResumedReviewerSession
|
|
491
|
+
? buildIncrementalReviewPrompt(planDiff ?? planContent, priorDecisions, phase, structuredOutput)
|
|
492
|
+
: buildReviewPrompt(planContent, priorDecisions, phase, structuredOutput),
|
|
493
|
+
parseStructured: (output) => parseStructuredFeedbackForPhase(output, phase),
|
|
494
|
+
parseLegacy: (output) => parseFeedbackForPhase(output, phase),
|
|
495
|
+
roundLabel: `Round ${round} review`,
|
|
496
|
+
metricsContext: {
|
|
497
|
+
sessionId: session.id,
|
|
498
|
+
round,
|
|
499
|
+
phase,
|
|
500
|
+
role: "review",
|
|
501
|
+
},
|
|
502
|
+
});
|
|
263
503
|
writeRoundFeedback(cwd, session.id, round, feedback);
|
|
264
504
|
const severity = severityFromFeedback(feedback);
|
|
265
505
|
const converged = isConverged(feedback);
|
|
506
|
+
const timing = metrics ? summarizeTiming(metrics) : undefined;
|
|
507
|
+
// Persist the canonical reviewer session ID. For claude this is the
|
|
508
|
+
// UUID we generated; for codex it's the thread_id captured from --json
|
|
509
|
+
// output. Either way, future rounds resume this conversation.
|
|
510
|
+
if (!reviewerSessionInited && capturedSessionId) {
|
|
511
|
+
session.reviewerSessionId = capturedSessionId;
|
|
512
|
+
session.reviewerSessionInitialized = true;
|
|
513
|
+
writeSessionState(cwd, session);
|
|
514
|
+
}
|
|
266
515
|
// Extract phase-specific extras for status line
|
|
267
516
|
const phaseExtras = {};
|
|
268
517
|
if (feedback.verdict === "blocked") {
|
|
@@ -281,7 +530,7 @@ export async function runReviewRound(session, cwd, config, reviewerProvider) {
|
|
|
281
530
|
phaseExtras.risks_promoted = feedback.issues.length;
|
|
282
531
|
}
|
|
283
532
|
}
|
|
284
|
-
return { round, feedback, severity, converged, phaseExtras };
|
|
533
|
+
return { round, feedback, severity, converged, phaseExtras, timing };
|
|
285
534
|
}
|
|
286
535
|
/**
|
|
287
536
|
* Run a single revision round: send plan + feedback to the planner for revision.
|
|
@@ -296,37 +545,86 @@ export async function runRevisionRound(session, cwd, config, plannerProvider) {
|
|
|
296
545
|
}
|
|
297
546
|
const phase = getReviewPhase(round);
|
|
298
547
|
const keyDecisions = extractKeyDecisions(planContent);
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
//
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
}
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
if (revisionResponse.exitCode !== 0) {
|
|
313
|
-
throw new Error(`Planner revision failed (exit ${revisionResponse.exitCode}):\n${revisionResponse.content.slice(0, 500)}`);
|
|
314
|
-
}
|
|
315
|
-
// Exit was 0 but parse failed — retry
|
|
316
|
-
const retryPrompt = `Your previous response could not be parsed. Please output ONLY a valid JSON object wrapped in <planpong-revision> tags. The error was: ${parseError instanceof Error ? parseError.message : "parse error"}\n\nOriginal prompt:\n${revisionPrompt}`;
|
|
317
|
-
const retryResponse = await plannerProvider.invoke(retryPrompt, {
|
|
548
|
+
// Direction phase always uses full-plan output. Risk + detail honor
|
|
549
|
+
// config.revision_mode. The shape decision is made once here and threaded
|
|
550
|
+
// through prompt + JSON schema + parser.
|
|
551
|
+
const useEdits = phase !== "direction" && config.revision_mode === "edits";
|
|
552
|
+
const revisionShape = useEdits ? "edits" : "full";
|
|
553
|
+
const jsonSchema = getRevisionJsonSchema(phase, config.revision_mode);
|
|
554
|
+
// Planner-side persistent sessions were tested and found to INCREASE wall
|
|
555
|
+
// time — the model used the spared context budget to do more work per
|
|
556
|
+
// round (more edits, deeper revisions), not to do the same work faster.
|
|
557
|
+
// Reviewer-side persistent sessions are kept (see runReviewRound).
|
|
558
|
+
const { result: revision, metrics } = await invokeWithStateMachine({
|
|
559
|
+
provider: plannerProvider,
|
|
560
|
+
invokeOptions: {
|
|
318
561
|
cwd,
|
|
319
562
|
model: config.planner.model,
|
|
320
563
|
effort: config.planner.effort,
|
|
564
|
+
},
|
|
565
|
+
jsonSchema,
|
|
566
|
+
buildPrompt: (structuredOutput) => buildRevisionPrompt(planContent, feedback, keyDecisions, null, phase, structuredOutput, config.revision_mode),
|
|
567
|
+
parseStructured: (output) => parseStructuredRevision(output, revisionShape),
|
|
568
|
+
parseLegacy: (output) => parseRevision(output, revisionShape),
|
|
569
|
+
roundLabel: `Round ${round} revision`,
|
|
570
|
+
metricsContext: {
|
|
571
|
+
sessionId: session.id,
|
|
572
|
+
round,
|
|
573
|
+
phase,
|
|
574
|
+
role: "revision",
|
|
575
|
+
},
|
|
576
|
+
});
|
|
577
|
+
writeRoundResponse(cwd, session.id, round, revision);
|
|
578
|
+
const timing = metrics ? summarizeTiming(metrics) : undefined;
|
|
579
|
+
// Apply revision to disk. Two paths: full (today's behavior) or edits
|
|
580
|
+
// (apply edit list, retry failures, atomic write).
|
|
581
|
+
let editTelemetry;
|
|
582
|
+
let finalRevision = revision;
|
|
583
|
+
if (useEdits && isEditsRevision(revision)) {
|
|
584
|
+
const result = await applyRevisionEdits({
|
|
585
|
+
session,
|
|
586
|
+
cwd,
|
|
587
|
+
planPath,
|
|
588
|
+
planContent,
|
|
589
|
+
revision,
|
|
590
|
+
plannerProvider,
|
|
591
|
+
config,
|
|
592
|
+
phase,
|
|
593
|
+
metrics,
|
|
321
594
|
});
|
|
322
|
-
|
|
595
|
+
finalRevision = result.revision;
|
|
596
|
+
editTelemetry = result.telemetry;
|
|
323
597
|
}
|
|
324
|
-
|
|
325
|
-
|
|
598
|
+
else if (isDirectionRevision(revision)) {
|
|
599
|
+
writeFileSync(planPath, revision.updated_plan);
|
|
600
|
+
editTelemetry = {
|
|
601
|
+
revision_mode: "full",
|
|
602
|
+
edits_attempted: null,
|
|
603
|
+
edits_applied: null,
|
|
604
|
+
edits_failed: null,
|
|
605
|
+
edits_retried: null,
|
|
606
|
+
edits_recovered: null,
|
|
607
|
+
retry_invoked: false,
|
|
608
|
+
};
|
|
609
|
+
persistRevisionMetrics({
|
|
610
|
+
cwd,
|
|
611
|
+
session,
|
|
612
|
+
round,
|
|
613
|
+
phase,
|
|
614
|
+
metrics,
|
|
615
|
+
telemetry: editTelemetry,
|
|
616
|
+
});
|
|
617
|
+
}
|
|
618
|
+
else {
|
|
619
|
+
throw new Error(`runRevisionRound: revision shape mismatch — expected ${useEdits ? "edits" : "full"} but got ${"updated_plan" in revision ? "full" : "edits"}`);
|
|
620
|
+
}
|
|
621
|
+
session.planHash = hashFile(planPath);
|
|
622
|
+
writeSessionState(cwd, session);
|
|
623
|
+
// Tally responses (use the possibly-downgraded responses from finalRevision).
|
|
326
624
|
let accepted = 0;
|
|
327
625
|
let rejected = 0;
|
|
328
626
|
let deferred = 0;
|
|
329
|
-
for (const resp of
|
|
627
|
+
for (const resp of finalRevision.responses) {
|
|
330
628
|
if (resp.action === "accepted")
|
|
331
629
|
accepted++;
|
|
332
630
|
else if (resp.action === "rejected")
|
|
@@ -334,19 +632,257 @@ export async function runRevisionRound(session, cwd, config, plannerProvider) {
|
|
|
334
632
|
else if (resp.action === "deferred")
|
|
335
633
|
deferred++;
|
|
336
634
|
}
|
|
337
|
-
// Write updated plan to disk
|
|
338
|
-
const updatedPlan = revision.updated_plan;
|
|
339
|
-
writeFileSync(planPath, updatedPlan);
|
|
340
|
-
session.planHash = hashFile(planPath);
|
|
341
|
-
writeSessionState(cwd, session);
|
|
342
635
|
return {
|
|
343
636
|
round,
|
|
344
|
-
revision,
|
|
637
|
+
revision: finalRevision,
|
|
345
638
|
accepted,
|
|
346
639
|
rejected,
|
|
347
640
|
deferred,
|
|
348
641
|
planUpdated: true,
|
|
642
|
+
timing,
|
|
643
|
+
edits: editTelemetry,
|
|
644
|
+
};
|
|
645
|
+
}
|
|
646
|
+
/**
|
|
647
|
+
* Apply an edits-mode revision: first-pass apply, targeted retry on failures,
|
|
648
|
+
* atomic write, response-edit consistency check. All mutations to the plan
|
|
649
|
+
* happen in memory; a single writeFileSync persists the final state.
|
|
650
|
+
*/
|
|
651
|
+
async function applyRevisionEdits(args) {
|
|
652
|
+
const { session, cwd, planPath, planContent, revision, plannerProvider, config, phase, metrics, } = args;
|
|
653
|
+
const round = session.currentRound;
|
|
654
|
+
const editsAttempted = revision.edits.length;
|
|
655
|
+
// First-pass apply.
|
|
656
|
+
const firstPass = applyEdits(planContent, revision.edits);
|
|
657
|
+
if (firstPass.failures.length > 0) {
|
|
658
|
+
logFailures(`R${round} edits first-pass`, firstPass.failures);
|
|
659
|
+
}
|
|
660
|
+
safeStderr(`[planpong] R${round} edits | first-pass | ${summarizeApply(firstPass)}\n`);
|
|
661
|
+
let working = firstPass.plan;
|
|
662
|
+
const successfulEdits = firstPass.applied.map((a) => a.edit);
|
|
663
|
+
const recoveredEdits = [];
|
|
664
|
+
const unrecoverableFailures = [];
|
|
665
|
+
let retryInvoked = false;
|
|
666
|
+
let retriedCount = 0;
|
|
667
|
+
if (firstPass.failures.length > 0) {
|
|
668
|
+
retryInvoked = true;
|
|
669
|
+
retriedCount = firstPass.failures.length;
|
|
670
|
+
try {
|
|
671
|
+
const retryResult = await runEditsRetry({
|
|
672
|
+
cwd,
|
|
673
|
+
session,
|
|
674
|
+
round,
|
|
675
|
+
phase,
|
|
676
|
+
plannerProvider,
|
|
677
|
+
config,
|
|
678
|
+
currentPlan: working,
|
|
679
|
+
failures: firstPass.failures,
|
|
680
|
+
});
|
|
681
|
+
const secondPass = applyEdits(working, retryResult.edits);
|
|
682
|
+
if (secondPass.failures.length > 0) {
|
|
683
|
+
logFailures(`R${round} edits retry`, secondPass.failures);
|
|
684
|
+
}
|
|
685
|
+
safeStderr(`[planpong] R${round} edits | retry | ${summarizeApply(secondPass)}\n`);
|
|
686
|
+
working = secondPass.plan;
|
|
687
|
+
for (const a of secondPass.applied)
|
|
688
|
+
recoveredEdits.push(a.edit);
|
|
689
|
+
unrecoverableFailures.push(...secondPass.failures);
|
|
690
|
+
// Track the retry as an additional invocation attempt in metrics.
|
|
691
|
+
if (metrics) {
|
|
692
|
+
metrics.attempts.push(retryResult.attemptRecord);
|
|
693
|
+
}
|
|
694
|
+
}
|
|
695
|
+
catch (err) {
|
|
696
|
+
// Retry failed entirely (provider error, parse error). Surface but
|
|
697
|
+
// keep first-pass partial result — strictly better than nothing.
|
|
698
|
+
safeStderr(`[planpong] R${round} edits | retry failed: ${err instanceof Error ? err.message : String(err)}\n`);
|
|
699
|
+
unrecoverableFailures.push(...firstPass.failures);
|
|
700
|
+
}
|
|
701
|
+
}
|
|
702
|
+
// Atomic write of the final plan state.
|
|
703
|
+
writeFileSync(planPath, working);
|
|
704
|
+
// Response-edit consistency check: if an `accepted` response has no
|
|
705
|
+
// surviving edit anywhere in its rationale or suggestion's section, the
|
|
706
|
+
// planner claimed to have addressed an issue without a corresponding plan
|
|
707
|
+
// change. Downgrade to `deferred`. The match is heuristic — keyed on the
|
|
708
|
+
// response's `issue_id` appearing in the edit's after text or in any
|
|
709
|
+
// edit's section that maps to the issue's section field. This is the same
|
|
710
|
+
// tradeoff the plan documents (R3 F2 issue, accepted as heuristic).
|
|
711
|
+
const survivingEdits = [...successfulEdits, ...recoveredEdits];
|
|
712
|
+
const downgraded = downgradeOrphanedResponses(revision, survivingEdits, unrecoverableFailures);
|
|
713
|
+
// Persist failure metadata in the round response JSON alongside responses.
|
|
714
|
+
// We rewrite the response file to include the (possibly-downgraded)
|
|
715
|
+
// responses + edit application result.
|
|
716
|
+
writeRoundResponse(cwd, session.id, round, downgraded);
|
|
717
|
+
const telemetry = {
|
|
718
|
+
revision_mode: "edits",
|
|
719
|
+
edits_attempted: editsAttempted,
|
|
720
|
+
edits_applied: successfulEdits.length,
|
|
721
|
+
edits_failed: firstPass.failures.length,
|
|
722
|
+
edits_retried: retriedCount,
|
|
723
|
+
edits_recovered: recoveredEdits.length,
|
|
724
|
+
retry_invoked: retryInvoked,
|
|
725
|
+
};
|
|
726
|
+
persistRevisionMetrics({
|
|
727
|
+
cwd,
|
|
728
|
+
session,
|
|
729
|
+
round,
|
|
730
|
+
phase,
|
|
731
|
+
metrics,
|
|
732
|
+
telemetry,
|
|
733
|
+
});
|
|
734
|
+
return { revision: downgraded, telemetry };
|
|
735
|
+
}
|
|
736
|
+
/**
|
|
737
|
+
* One-shot retry for failed edits. Builds a targeted prompt with only the
|
|
738
|
+
* failures + current (partially-edited) plan and asks the planner to
|
|
739
|
+
* re-express each failed edit. The retry is best-effort — provider/parse
|
|
740
|
+
* errors are caught by the caller and treated as "no recovery."
|
|
741
|
+
*/
|
|
742
|
+
async function runEditsRetry(args) {
|
|
743
|
+
const { plannerProvider, config, currentPlan, failures } = args;
|
|
744
|
+
const supported = await plannerProvider.checkStructuredOutputSupport();
|
|
745
|
+
const useStructured = supported;
|
|
746
|
+
const prompt = buildEditsRetryPrompt(currentPlan, failures.map((f) => ({
|
|
747
|
+
edit: f.edit,
|
|
748
|
+
reason: f.reason,
|
|
749
|
+
section_searched: f.section_searched,
|
|
750
|
+
diagnostic: f.diagnostic,
|
|
751
|
+
})), useStructured);
|
|
752
|
+
// Use a minimal JSON schema for the retry — only `edits` array. We lift
|
|
753
|
+
// the EditsRevisionJsonSchema's `edits` block by using the full schema
|
|
754
|
+
// and then ignoring the `responses` field (the planner is asked to omit
|
|
755
|
+
// it). For simplicity reuse the full edits schema; the retry prompt
|
|
756
|
+
// explicitly tells the planner not to include `responses`.
|
|
757
|
+
const jsonSchema = getRevisionJsonSchema("detail", "edits");
|
|
758
|
+
const promptChars = prompt.length;
|
|
759
|
+
const promptLines = prompt.split("\n").length;
|
|
760
|
+
const options = useStructured
|
|
761
|
+
? {
|
|
762
|
+
cwd: args.cwd,
|
|
763
|
+
model: config.planner.model,
|
|
764
|
+
effort: config.planner.effort,
|
|
765
|
+
jsonSchema,
|
|
766
|
+
}
|
|
767
|
+
: {
|
|
768
|
+
cwd: args.cwd,
|
|
769
|
+
model: config.planner.model,
|
|
770
|
+
effort: config.planner.effort,
|
|
771
|
+
};
|
|
772
|
+
const response = await plannerProvider.invoke(prompt, options);
|
|
773
|
+
const attemptRecord = {
|
|
774
|
+
mode: useStructured ? "structured" : "legacy",
|
|
775
|
+
provider: plannerProvider.name,
|
|
776
|
+
model: config.planner.model ?? null,
|
|
777
|
+
effort: config.planner.effort ?? null,
|
|
778
|
+
prompt_chars: promptChars,
|
|
779
|
+
prompt_lines: promptLines,
|
|
780
|
+
output_chars: response.ok ? response.output.length : null,
|
|
781
|
+
output_lines: response.ok ? response.output.split("\n").length : null,
|
|
782
|
+
duration_ms: response.duration ?? 0,
|
|
783
|
+
ok: false,
|
|
784
|
+
error_kind: "edit-retry",
|
|
785
|
+
error_exit_code: null,
|
|
349
786
|
};
|
|
787
|
+
if (!response.ok) {
|
|
788
|
+
throw new Error(`edits retry: provider error (${response.error.kind}: ${response.error.exitCode})`);
|
|
789
|
+
}
|
|
790
|
+
// Parse the retry response — accept either a full edits revision (with
|
|
791
|
+
// empty responses) or just an `edits` array wrapped in the standard tags.
|
|
792
|
+
let edits;
|
|
793
|
+
try {
|
|
794
|
+
if (useStructured) {
|
|
795
|
+
const parsed = JSON.parse(response.output);
|
|
796
|
+
edits = extractEditsFromRetryPayload(parsed);
|
|
797
|
+
}
|
|
798
|
+
else {
|
|
799
|
+
const json = response.output.match(/<planpong-revision>([\s\S]*?)<\/planpong-revision>/i)?.[1] ??
|
|
800
|
+
response.output;
|
|
801
|
+
const parsed = JSON.parse(json);
|
|
802
|
+
edits = extractEditsFromRetryPayload(parsed);
|
|
803
|
+
}
|
|
804
|
+
}
|
|
805
|
+
catch (err) {
|
|
806
|
+
throw new Error(`edits retry: parse failed: ${err instanceof Error ? err.message : String(err)}`);
|
|
807
|
+
}
|
|
808
|
+
attemptRecord.ok = true;
|
|
809
|
+
return { edits, attemptRecord };
|
|
810
|
+
}
|
|
811
|
+
function extractEditsFromRetryPayload(payload) {
|
|
812
|
+
if (payload &&
|
|
813
|
+
typeof payload === "object" &&
|
|
814
|
+
"edits" in payload &&
|
|
815
|
+
Array.isArray(payload.edits)) {
|
|
816
|
+
return payload.edits;
|
|
817
|
+
}
|
|
818
|
+
if (Array.isArray(payload))
|
|
819
|
+
return payload;
|
|
820
|
+
throw new Error("retry payload missing `edits` array");
|
|
821
|
+
}
|
|
822
|
+
/**
|
|
823
|
+
* Heuristic response-edit consistency check.
|
|
824
|
+
*
|
|
825
|
+
* For each `accepted` response, look for at least one surviving edit in the
|
|
826
|
+
* response's `section`. If none exists, downgrade the response action to
|
|
827
|
+
* `deferred` with rationale prefixed `edit_not_applied: ...`. The plan
|
|
828
|
+
* acknowledges this is heuristic (no explicit issue↔edit ID mapping in the
|
|
829
|
+
* schema). False negatives are possible — an accepted response that didn't
|
|
830
|
+
* require a plan change (e.g., "this was already addressed") is incorrectly
|
|
831
|
+
* downgraded if no edit lands in its declared section. To reduce noise, we
|
|
832
|
+
* only downgrade when there's at least one unrecoverable failure — if every
|
|
833
|
+
* edit succeeded, the planner's accepts are taken at face value.
|
|
834
|
+
*/
|
|
835
|
+
function downgradeOrphanedResponses(revision, survivingEdits, unrecoverableFailures) {
|
|
836
|
+
if (unrecoverableFailures.length === 0)
|
|
837
|
+
return revision;
|
|
838
|
+
// Build a set of sections that have at least one surviving edit.
|
|
839
|
+
const editedSections = new Set(survivingEdits.map((e) => e.section.trim()));
|
|
840
|
+
const downgradedResponses = revision.responses.map((resp) => {
|
|
841
|
+
if (resp.action !== "accepted")
|
|
842
|
+
return resp;
|
|
843
|
+
// Section is not on IssueResponse; we have no per-issue section mapping
|
|
844
|
+
// (R3 F2 limitation). Without that, we treat ANY surviving-edit set as
|
|
845
|
+
// "the planner did some work" and only downgrade accepts when ALL edits
|
|
846
|
+
// failed — i.e., the plan didn't change at all. This is conservative
|
|
847
|
+
// but minimizes false-positive downgrades while still preventing the
|
|
848
|
+
// worst case ("everything accepted, no edits applied").
|
|
849
|
+
if (editedSections.size === 0) {
|
|
850
|
+
return {
|
|
851
|
+
...resp,
|
|
852
|
+
action: "deferred",
|
|
853
|
+
rationale: `edit_not_applied: corresponding plan edit failed and could not be recovered. Original rationale: ${resp.rationale}`,
|
|
854
|
+
};
|
|
855
|
+
}
|
|
856
|
+
return resp;
|
|
857
|
+
});
|
|
858
|
+
return { ...revision, responses: downgradedResponses };
|
|
859
|
+
}
|
|
860
|
+
/**
|
|
861
|
+
* Re-persist the revision metrics file with augmented edit telemetry. The
|
|
862
|
+
* state machine has already written the basic metrics file in its finally
|
|
863
|
+
* block; this overwrites with the same data plus revision_mode + edit
|
|
864
|
+
* counts. Fail-open — telemetry write errors never propagate.
|
|
865
|
+
*/
|
|
866
|
+
function persistRevisionMetrics(args) {
|
|
867
|
+
const { cwd, session, round, metrics, telemetry } = args;
|
|
868
|
+
if (!metrics)
|
|
869
|
+
return;
|
|
870
|
+
try {
|
|
871
|
+
const augmented = {
|
|
872
|
+
...metrics,
|
|
873
|
+
revision_mode: telemetry.revision_mode,
|
|
874
|
+
edits_attempted: telemetry.edits_attempted,
|
|
875
|
+
edits_applied: telemetry.edits_applied,
|
|
876
|
+
edits_failed: telemetry.edits_failed,
|
|
877
|
+
edits_retried: telemetry.edits_retried,
|
|
878
|
+
edits_recovered: telemetry.edits_recovered,
|
|
879
|
+
retry_invoked: telemetry.retry_invoked,
|
|
880
|
+
};
|
|
881
|
+
writeRoundMetrics(cwd, session.id, round, "revision", augmented);
|
|
882
|
+
}
|
|
883
|
+
catch {
|
|
884
|
+
// fail-open — telemetry never breaks the run
|
|
885
|
+
}
|
|
350
886
|
}
|
|
351
887
|
/**
|
|
352
888
|
* Mark the session as approved and update the plan's status line.
|