ralphctl 0.4.6 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{add-YVXM34RP.mjs → add-67UFUI54.mjs} +2 -2
- package/dist/{chunk-ZLWSPLWI.mjs → chunk-62HYDA7L.mjs} +11 -0
- package/dist/{chunk-PYZEQ2VK.mjs → chunk-BT5FKIZX.mjs} +1 -1
- package/dist/{chunk-B3RCOHW3.mjs → chunk-D6QZNEYN.mjs} +125 -124
- package/dist/{chunk-O566EEDL.mjs → chunk-ZE2BRQA2.mjs} +5 -5
- package/dist/cli.mjs +7 -7
- package/dist/{mount-B3MLHNVY.mjs → mount-NCYR22SN.mjs} +4 -4
- package/dist/{start-FP7MVN5P.mjs → start-T34NI3LF.mjs} +2 -2
- package/package.json +1 -1
|
@@ -2,8 +2,8 @@
|
|
|
2
2
|
import {
|
|
3
3
|
addCheckScriptToRepository,
|
|
4
4
|
projectAddCommand
|
|
5
|
-
} from "./chunk-
|
|
6
|
-
import "./chunk-
|
|
5
|
+
} from "./chunk-BT5FKIZX.mjs";
|
|
6
|
+
import "./chunk-62HYDA7L.mjs";
|
|
7
7
|
import "./chunk-CFUVE2BP.mjs";
|
|
8
8
|
import "./chunk-747KW2RW.mjs";
|
|
9
9
|
import "./chunk-BSB4EDGR.mjs";
|
|
@@ -1091,6 +1091,16 @@ ${ctx.existingAgentsMd}
|
|
|
1091
1091
|
FILE_NAME: ctx.fileName
|
|
1092
1092
|
});
|
|
1093
1093
|
}
|
|
1094
|
+
function buildEvaluationResumePrompt(ctx) {
|
|
1095
|
+
const template = loadTemplate("task-evaluation-resume");
|
|
1096
|
+
const commitInstruction = ctx.needsCommit ? "\n - **Then commit the fix** with a descriptive message before signaling completion." : "";
|
|
1097
|
+
return composePrompt(template, {
|
|
1098
|
+
HARNESS_CONTEXT: loadPartial("harness-context"),
|
|
1099
|
+
SIGNALS: loadPartial("signals-task"),
|
|
1100
|
+
CRITIQUE: ctx.critique,
|
|
1101
|
+
COMMIT_INSTRUCTION: commitInstruction
|
|
1102
|
+
});
|
|
1103
|
+
}
|
|
1094
1104
|
|
|
1095
1105
|
export {
|
|
1096
1106
|
buildInteractivePrompt,
|
|
@@ -1103,6 +1113,7 @@ export {
|
|
|
1103
1113
|
buildSprintFeedbackPrompt,
|
|
1104
1114
|
buildCheckScriptDiscoverPrompt,
|
|
1105
1115
|
buildRepoOnboardPrompt,
|
|
1116
|
+
buildEvaluationResumePrompt,
|
|
1106
1117
|
processLifecycleAdapter,
|
|
1107
1118
|
resolveProvider,
|
|
1108
1119
|
providerDisplayName,
|
|
@@ -3,6 +3,7 @@ import {
|
|
|
3
3
|
ProviderAiSessionAdapter,
|
|
4
4
|
SignalParser,
|
|
5
5
|
buildAutoPrompt,
|
|
6
|
+
buildEvaluationResumePrompt,
|
|
6
7
|
buildEvaluatorPrompt,
|
|
7
8
|
buildIdeateAutoPrompt,
|
|
8
9
|
buildIdeatePrompt,
|
|
@@ -13,7 +14,7 @@ import {
|
|
|
13
14
|
buildTicketRefinePrompt,
|
|
14
15
|
getActiveProvider,
|
|
15
16
|
spawnInteractive
|
|
16
|
-
} from "./chunk-
|
|
17
|
+
} from "./chunk-62HYDA7L.mjs";
|
|
17
18
|
import {
|
|
18
19
|
fetchIssueFromUrl,
|
|
19
20
|
formatIssueContext,
|
|
@@ -2488,10 +2489,9 @@ var EvaluateTaskUseCase = class {
|
|
|
2488
2489
|
options
|
|
2489
2490
|
);
|
|
2490
2491
|
if (!fixSuccess) {
|
|
2491
|
-
|
|
2492
|
-
await this.persistEvaluationStub(sprintId, taskId, i + 2, reason);
|
|
2493
|
-
break;
|
|
2492
|
+
log2.debug(`Fix attempt ${String(i + 1)}: generator did not signal completion \u2014 re-evaluating anyway`);
|
|
2494
2493
|
}
|
|
2494
|
+
if (i === maxIterations - 1) break;
|
|
2495
2495
|
const previousEvalResult = evalResult;
|
|
2496
2496
|
const stopReeval = log2.time("evaluator-re-spawn");
|
|
2497
2497
|
evalResult = await this.runSingleEvaluation(
|
|
@@ -2515,7 +2515,7 @@ var EvaluateTaskUseCase = class {
|
|
|
2515
2515
|
}
|
|
2516
2516
|
const finalStatus = plateaued ? "plateau" : evalResult.status;
|
|
2517
2517
|
await this.updateTaskEvaluation(sprintId, taskId, evalResult, finalStatus);
|
|
2518
|
-
this.reportResult(task.name, evalResult,
|
|
2518
|
+
this.reportResult(task.name, evalResult, totalIterations, plateaued);
|
|
2519
2519
|
return Result.ok({
|
|
2520
2520
|
taskId,
|
|
2521
2521
|
status: finalStatus,
|
|
@@ -2559,47 +2559,47 @@ var EvaluateTaskUseCase = class {
|
|
|
2559
2559
|
}
|
|
2560
2560
|
}
|
|
2561
2561
|
/**
|
|
2562
|
-
* Spawn a single evaluator session and parse the result.
|
|
2563
|
-
*
|
|
2564
|
-
*
|
|
2565
|
-
* pre-resolved — both are stable across fix-loop iterations, so the
|
|
2566
|
-
* caller computes them once and threads them through.
|
|
2562
|
+
* Spawn a single evaluator session and parse the result. Stable inputs
|
|
2563
|
+
* (`checkScriptSection`, `projectToolingSection`) are passed in
|
|
2564
|
+
* pre-resolved so the fix loop doesn't re-compute them per iteration.
|
|
2567
2565
|
*/
|
|
2568
2566
|
async runSingleEvaluation(task, sprint, repoPath, generatorModel, provider, checkScriptSection, projectToolingSection, options) {
|
|
2569
2567
|
const evaluatorModel = getEvaluatorModel(generatorModel, provider);
|
|
2570
|
-
const sprintDir = this.fs.getSprintDir(sprint.id);
|
|
2571
2568
|
const prompt = this.promptBuilder.buildTaskEvaluationPrompt(
|
|
2572
2569
|
task,
|
|
2573
2570
|
repoPath,
|
|
2574
2571
|
checkScriptSection,
|
|
2575
2572
|
projectToolingSection
|
|
2576
2573
|
);
|
|
2577
|
-
const args = ["--add-dir",
|
|
2574
|
+
const args = ["--add-dir", this.fs.getSprintDir(sprint.id)];
|
|
2578
2575
|
if (provider === "claude") {
|
|
2579
|
-
if (evaluatorModel)
|
|
2580
|
-
args.push("--model", evaluatorModel);
|
|
2581
|
-
}
|
|
2576
|
+
if (evaluatorModel) args.push("--model", evaluatorModel);
|
|
2582
2577
|
args.push("--max-turns", String(options?.maxTurns ?? EVALUATOR_MAX_TURNS));
|
|
2583
2578
|
}
|
|
2584
|
-
|
|
2579
|
+
const result = await this.spawnOrNull(prompt, {
|
|
2580
|
+
cwd: repoPath,
|
|
2581
|
+
args,
|
|
2582
|
+
env: this.aiSession.getSpawnEnv(),
|
|
2583
|
+
abortSignal: options?.abortSignal
|
|
2584
|
+
});
|
|
2585
|
+
if (!result.ok) {
|
|
2586
|
+
this.logger.warning(`Evaluator spawn failed for ${task.name}: ${result.message} \u2014 marking malformed`);
|
|
2587
|
+
return { status: "malformed", dimensions: [], rawOutput: `Evaluator spawn failed: ${result.message}` };
|
|
2588
|
+
}
|
|
2589
|
+
return this.parser.parseEvaluation(result.value.output);
|
|
2590
|
+
}
|
|
2591
|
+
/**
|
|
2592
|
+
* Wrap `spawnWithRetry` in a try/catch so callers can handle spawn
|
|
2593
|
+
* failures without nested error handling. Returns a small discriminated
|
|
2594
|
+
* union — ok with the session result, or !ok with the message.
|
|
2595
|
+
*/
|
|
2596
|
+
async spawnOrNull(prompt, opts) {
|
|
2585
2597
|
try {
|
|
2586
|
-
|
|
2587
|
-
|
|
2588
|
-
args,
|
|
2589
|
-
env: this.aiSession.getSpawnEnv(),
|
|
2590
|
-
abortSignal: options?.abortSignal
|
|
2591
|
-
});
|
|
2598
|
+
const value = await this.aiSession.spawnWithRetry(prompt, opts);
|
|
2599
|
+
return { ok: true, value };
|
|
2592
2600
|
} catch (err) {
|
|
2593
|
-
|
|
2594
|
-
`Evaluator spawn failed for ${task.name}: ${err instanceof Error ? err.message : String(err)} \u2014 marking malformed`
|
|
2595
|
-
);
|
|
2596
|
-
return {
|
|
2597
|
-
status: "malformed",
|
|
2598
|
-
dimensions: [],
|
|
2599
|
-
rawOutput: `Evaluator spawn failed: ${err instanceof Error ? err.message : String(err)}`
|
|
2600
|
-
};
|
|
2601
|
+
return { ok: false, message: err instanceof Error ? err.message : String(err) };
|
|
2601
2602
|
}
|
|
2602
|
-
return this.parser.parseEvaluation(result.output);
|
|
2603
2603
|
}
|
|
2604
2604
|
/**
|
|
2605
2605
|
* Resolve the repo's `checkScript` and render it as the evaluator's
|
|
@@ -2628,33 +2628,39 @@ var EvaluateTaskUseCase = class {
|
|
|
2628
2628
|
}
|
|
2629
2629
|
/**
|
|
2630
2630
|
* Resume the generator session with the evaluator critique.
|
|
2631
|
-
*
|
|
2631
|
+
*
|
|
2632
|
+
* Two load-bearing properties (covered by `fix-loop fence` tests):
|
|
2633
|
+
* 1. Prompt comes from `buildTaskEvaluationResumePrompt` — full template
|
|
2634
|
+
* with signals / fix-protocol / commit instruction. A regression to
|
|
2635
|
+
* an inline string silently drops signal requirements.
|
|
2636
|
+
* 2. When `options.generatorSessionId` is set, `resumeSessionId` is
|
|
2637
|
+
* threaded so the fix continues the original session (`--resume`).
|
|
2638
|
+
* Absent an ID, spawn fresh and log at debug.
|
|
2639
|
+
*
|
|
2640
|
+
* Returns true iff the generator signaled `<task-complete>` — used as a
|
|
2641
|
+
* diagnostic only; the evaluator settles whether the fix actually worked.
|
|
2632
2642
|
*/
|
|
2633
2643
|
async resumeGeneratorWithCritique(task, sprint, repoPath, critique, options) {
|
|
2634
|
-
const
|
|
2635
|
-
const
|
|
2636
|
-
|
|
2637
|
-
|
|
2638
|
-
|
|
2639
|
-
|
|
2640
|
-
|
|
2641
|
-
|
|
2642
|
-
|
|
2643
|
-
|
|
2644
|
-
|
|
2645
|
-
|
|
2646
|
-
|
|
2647
|
-
|
|
2648
|
-
|
|
2649
|
-
|
|
2650
|
-
});
|
|
2651
|
-
spinner.succeed(`Fix attempt completed: ${task.name}`);
|
|
2652
|
-
const signals = this.parser.parseExecutionSignals(result.output);
|
|
2653
|
-
return signals.complete;
|
|
2654
|
-
} catch {
|
|
2655
|
-
spinner?.fail(`Fix attempt failed: ${task.name}`);
|
|
2644
|
+
const resumePrompt = this.promptBuilder.buildTaskEvaluationResumePrompt(critique, options?.needsCommit ?? true);
|
|
2645
|
+
const resumeSessionId = options?.generatorSessionId;
|
|
2646
|
+
this.logger.debug(
|
|
2647
|
+
resumeSessionId ? `Resuming generator session ${resumeSessionId} for fix attempt: ${task.name}` : `No generator session ID \u2014 spawning fresh fix attempt: ${task.name}`
|
|
2648
|
+
);
|
|
2649
|
+
const spinner = this.logger.spinner(`Fixing evaluation issues: ${task.name}`);
|
|
2650
|
+
const result = await this.spawnOrNull(resumePrompt, {
|
|
2651
|
+
cwd: repoPath,
|
|
2652
|
+
args: ["--add-dir", this.fs.getSprintDir(sprint.id)],
|
|
2653
|
+
env: this.aiSession.getSpawnEnv(),
|
|
2654
|
+
maxTurns: options?.maxTurns,
|
|
2655
|
+
resumeSessionId,
|
|
2656
|
+
abortSignal: options?.abortSignal
|
|
2657
|
+
});
|
|
2658
|
+
if (!result.ok) {
|
|
2659
|
+
spinner.fail(`Fix attempt failed: ${task.name}`);
|
|
2656
2660
|
return false;
|
|
2657
2661
|
}
|
|
2662
|
+
spinner.succeed(`Fix attempt completed: ${task.name}`);
|
|
2663
|
+
return this.parser.parseExecutionSignals(result.value.output).complete;
|
|
2658
2664
|
}
|
|
2659
2665
|
/**
|
|
2660
2666
|
* Persist a real evaluation entry to the sidecar file.
|
|
@@ -2674,49 +2680,46 @@ var EvaluateTaskUseCase = class {
|
|
|
2674
2680
|
}
|
|
2675
2681
|
}
|
|
2676
2682
|
/**
|
|
2677
|
-
*
|
|
2678
|
-
|
|
2679
|
-
|
|
2680
|
-
|
|
2681
|
-
await this.persistence.writeEvaluation(sprintId, taskId, iteration, "failed", `_(no re-evaluation: ${reason})_`);
|
|
2682
|
-
} catch {
|
|
2683
|
-
this.logger.warning(`Could not persist evaluation stub for task ${taskId}`);
|
|
2684
|
-
}
|
|
2685
|
-
}
|
|
2686
|
-
/**
|
|
2687
|
-
* Update the task record with evaluation fields.
|
|
2688
|
-
*
|
|
2689
|
-
* `statusOverride` is set when plateau detection fires: the critique body
|
|
2690
|
-
* is still saved (truncated) for traceability, but the discriminator in
|
|
2691
|
-
* `tasks.json` records `'plateau'` so consumers can distinguish it from
|
|
2692
|
-
* a plain `'failed'` run.
|
|
2683
|
+
* Update the task record with evaluation fields. `statusOverride` is set
|
|
2684
|
+
* for plateau — the body is still the real critique, but the status
|
|
2685
|
+
* column records `'plateau'` so readers can distinguish it from a plain
|
|
2686
|
+
* failure.
|
|
2693
2687
|
*/
|
|
2694
2688
|
async updateTaskEvaluation(sprintId, taskId, evalResult, statusOverride) {
|
|
2695
|
-
|
|
2696
|
-
|
|
2697
|
-
|
|
2698
|
-
(t) => t.id === taskId ? {
|
|
2699
|
-
...t,
|
|
2689
|
+
await this.persistence.updateTask(
|
|
2690
|
+
taskId,
|
|
2691
|
+
{
|
|
2700
2692
|
evaluated: true,
|
|
2701
|
-
evaluationStatus: status,
|
|
2693
|
+
evaluationStatus: statusOverride ?? evalResult.status,
|
|
2702
2694
|
evaluationOutput: evalResult.rawOutput.slice(0, MAX_EVAL_OUTPUT)
|
|
2703
|
-
}
|
|
2695
|
+
},
|
|
2696
|
+
sprintId
|
|
2704
2697
|
);
|
|
2705
|
-
await this.persistence.saveTasks(updatedTasks, sprintId);
|
|
2706
2698
|
}
|
|
2707
2699
|
/**
|
|
2708
2700
|
* Report the evaluation outcome to the user.
|
|
2701
|
+
*
|
|
2702
|
+
* `totalIterations` is the *actual* number of evaluator spawns (initial +
|
|
2703
|
+
* any re-evaluations after fix attempts), NOT the configured maximum.
|
|
2704
|
+
* When the loop breaks early (plateau), runs out of fix budget, or skips
|
|
2705
|
+
* the final re-eval, these two diverge — and the log line must reflect
|
|
2706
|
+
* reality so "6 fix attempts" never shows up when only 1 actually ran.
|
|
2707
|
+
*
|
|
2708
|
+
* The evaluator is advisory: a failing outcome doesn't stop the task
|
|
2709
|
+
* from being marked done; the sprint proceeds. The critique is persisted
|
|
2710
|
+
* in the sidecar for later review, and the warning log lets the user
|
|
2711
|
+
* see what didn't pass without scrolling the evaluations directory.
|
|
2709
2712
|
*/
|
|
2710
|
-
reportResult(taskName, evalResult,
|
|
2713
|
+
reportResult(taskName, evalResult, totalIterations, plateaued) {
|
|
2711
2714
|
if (plateaued) {
|
|
2712
2715
|
this.logger.warning(
|
|
2713
|
-
`Evaluation plateaued on the same failures
|
|
2716
|
+
`Evaluation plateaued on the same failures after ${String(totalIterations)} iteration(s): ${taskName}`
|
|
2714
2717
|
);
|
|
2715
2718
|
} else if (evalResult.status === "malformed") {
|
|
2716
|
-
this.logger.warning(`Evaluator output was malformed for ${taskName}
|
|
2719
|
+
this.logger.warning(`Evaluator output was malformed for ${taskName}`);
|
|
2717
2720
|
} else if (!isPassed(evalResult)) {
|
|
2718
2721
|
this.logger.warning(
|
|
2719
|
-
`Evaluation did not pass after ${String(
|
|
2722
|
+
`Evaluation did not pass after ${String(totalIterations)} iteration(s) \u2014 marking done: ${taskName}`
|
|
2720
2723
|
);
|
|
2721
2724
|
} else {
|
|
2722
2725
|
this.logger.success(`Evaluation passed: ${taskName}`);
|
|
@@ -2739,40 +2742,24 @@ function loadTaskStep(persistence) {
|
|
|
2739
2742
|
function checkAlreadyEvaluatedStep(options) {
|
|
2740
2743
|
return step("check-already-evaluated", (ctx) => {
|
|
2741
2744
|
const task = ctx.tasks?.[0];
|
|
2742
|
-
if (!
|
|
2743
|
-
const
|
|
2744
|
-
return Result.ok(
|
|
2745
|
+
if (task && task.evaluated && !options.force) {
|
|
2746
|
+
const summary = { taskId: task.id, status: "skipped", iterations: 0 };
|
|
2747
|
+
return Result.ok({ evaluationSummary: summary });
|
|
2745
2748
|
}
|
|
2746
|
-
|
|
2747
|
-
const summary = {
|
|
2748
|
-
taskId: task.id,
|
|
2749
|
-
status: "skipped",
|
|
2750
|
-
iterations: 0
|
|
2751
|
-
};
|
|
2752
|
-
const partial = { evaluationSummary: summary };
|
|
2753
|
-
return Result.ok(partial);
|
|
2754
|
-
}
|
|
2755
|
-
const empty = {};
|
|
2756
|
-
return Result.ok(empty);
|
|
2749
|
+
return Result.ok({});
|
|
2757
2750
|
});
|
|
2758
2751
|
}
|
|
2759
2752
|
function runEvaluatorLoopStep(useCase, options) {
|
|
2760
2753
|
return step("run-evaluator-loop", async (ctx) => {
|
|
2761
2754
|
if (ctx.evaluationSummary?.status === "skipped") {
|
|
2762
|
-
|
|
2763
|
-
return Result.ok(empty);
|
|
2755
|
+
return Result.ok({});
|
|
2764
2756
|
}
|
|
2765
2757
|
const result = await useCase.execute(ctx.sprintId, ctx.taskId, {
|
|
2766
|
-
|
|
2767
|
-
maxTurns: options.maxTurns,
|
|
2768
|
-
fallbackModel: ctx.generatorModel ?? void 0,
|
|
2758
|
+
...options,
|
|
2769
2759
|
abortSignal: ctx.abortSignal ?? options.abortSignal
|
|
2770
2760
|
});
|
|
2771
|
-
if (!result.ok)
|
|
2772
|
-
|
|
2773
|
-
}
|
|
2774
|
-
const partial = { evaluationSummary: result.value };
|
|
2775
|
-
return Result.ok(partial);
|
|
2761
|
+
if (!result.ok) return Result.error(result.error);
|
|
2762
|
+
return Result.ok({ evaluationSummary: result.value });
|
|
2776
2763
|
});
|
|
2777
2764
|
}
|
|
2778
2765
|
function createEvaluatorPipeline(deps, options = {}) {
|
|
@@ -2798,10 +2785,7 @@ function createEvaluatorPipeline(deps, options = {}) {
|
|
|
2798
2785
|
function evaluateTask(deps) {
|
|
2799
2786
|
return step("evaluate-task", async (ctx) => {
|
|
2800
2787
|
const evalCfg = await deps.useCase.getEvaluationConfig(deps.options);
|
|
2801
|
-
if (!evalCfg.enabled) {
|
|
2802
|
-
const empty = {};
|
|
2803
|
-
return Result.ok(empty);
|
|
2804
|
-
}
|
|
2788
|
+
if (!evalCfg.enabled) return Result.ok({});
|
|
2805
2789
|
const innerPipeline = createEvaluatorPipeline(
|
|
2806
2790
|
{
|
|
2807
2791
|
persistence: deps.persistence,
|
|
@@ -2816,38 +2800,52 @@ function evaluateTask(deps) {
|
|
|
2816
2800
|
{
|
|
2817
2801
|
iterations: evalCfg.iterations,
|
|
2818
2802
|
maxTurns: deps.options.maxTurns,
|
|
2803
|
+
// noCommit (ExecutionOptions) inverted — if the generator committed
|
|
2804
|
+
// the initial work, the fix must commit too.
|
|
2805
|
+
needsCommit: !deps.options.noCommit,
|
|
2806
|
+
// Model ladder input: evaluator uses a cheaper model than the
|
|
2807
|
+
// generator's. Null when the generator didn't report one (Copilot,
|
|
2808
|
+
// blocked tasks).
|
|
2809
|
+
fallbackModel: ctx.generatorModel ?? void 0,
|
|
2810
|
+
// --resume <id> so the fix continues the generator's session
|
|
2811
|
+
// rather than cold-starting. Undefined → fresh spawn (rare fallback).
|
|
2812
|
+
generatorSessionId: ctx.executionResult?.sessionId,
|
|
2819
2813
|
abortSignal: ctx.abortSignal
|
|
2820
2814
|
}
|
|
2821
2815
|
);
|
|
2822
2816
|
const innerCtx = {
|
|
2823
2817
|
sprintId: ctx.sprint.id,
|
|
2824
2818
|
taskId: ctx.task.id,
|
|
2825
|
-
generatorModel: ctx.generatorModel ?? null,
|
|
2826
2819
|
abortSignal: ctx.abortSignal
|
|
2827
2820
|
};
|
|
2828
|
-
let stepNames = [];
|
|
2829
2821
|
try {
|
|
2830
|
-
const
|
|
2831
|
-
|
|
2832
|
-
// Even on failure the framework populates stepResults up to and
|
|
2833
|
-
// including the failing step. Extract them opportunistically —
|
|
2834
|
-
// if unavailable, proceed with an empty list.
|
|
2835
|
-
[]
|
|
2836
|
-
);
|
|
2837
|
-
if (!result.ok) {
|
|
2822
|
+
const innerResult = await executePipeline(innerPipeline, innerCtx);
|
|
2823
|
+
if (!innerResult.ok) {
|
|
2838
2824
|
deps.logger.warning(
|
|
2839
|
-
`
|
|
2825
|
+
`Evaluator pipeline errored for ${ctx.task.name}: ${innerResult.error.message} \u2014 proceeding with task completion`
|
|
2840
2826
|
);
|
|
2827
|
+
return Result.ok({ evaluationStepNames: [] });
|
|
2841
2828
|
}
|
|
2829
|
+
logIfNonTerminal(deps.logger, ctx.task.name, innerResult.value.context.evaluationSummary);
|
|
2830
|
+
return Result.ok({
|
|
2831
|
+
evaluationStepNames: innerResult.value.stepResults.map((r) => r.stepName)
|
|
2832
|
+
});
|
|
2842
2833
|
} catch (err) {
|
|
2843
2834
|
deps.logger.warning(
|
|
2844
|
-
`Evaluator threw for ${ctx.task.name}: ${err instanceof Error ? err.message : String(err)}
|
|
2835
|
+
`Evaluator threw for ${ctx.task.name}: ${err instanceof Error ? err.message : String(err)} \u2014 proceeding with task completion`
|
|
2845
2836
|
);
|
|
2837
|
+
return Result.ok({ evaluationStepNames: [] });
|
|
2846
2838
|
}
|
|
2847
|
-
const partial = { evaluationStepNames: stepNames };
|
|
2848
|
-
return Result.ok(partial);
|
|
2849
2839
|
});
|
|
2850
2840
|
}
|
|
2841
|
+
function logIfNonTerminal(logger, taskName, summary) {
|
|
2842
|
+
if (!summary) return;
|
|
2843
|
+
if (summary.status === "failed" || summary.status === "malformed" || summary.status === "plateau") {
|
|
2844
|
+
logger.warning(
|
|
2845
|
+
`Evaluation ${summary.status} for ${taskName} after ${String(summary.iterations)} iteration(s) \u2014 proceeding with task completion`
|
|
2846
|
+
);
|
|
2847
|
+
}
|
|
2848
|
+
}
|
|
2851
2849
|
|
|
2852
2850
|
// src/business/pipelines/execute/steps/recover-dirty-tree.ts
|
|
2853
2851
|
function recoverDirtyTree2(deps) {
|
|
@@ -4033,6 +4031,9 @@ var TextPromptBuilderAdapter = class {
|
|
|
4033
4031
|
extraDimensions: task.extraDimensions ?? []
|
|
4034
4032
|
});
|
|
4035
4033
|
}
|
|
4034
|
+
buildTaskEvaluationResumePrompt(critique, needsCommit) {
|
|
4035
|
+
return buildEvaluationResumePrompt({ critique, needsCommit });
|
|
4036
|
+
}
|
|
4036
4037
|
buildFeedbackPrompt(sprintName, completedTasks, feedback, branch) {
|
|
4037
4038
|
return buildSprintFeedbackPrompt(sprintName, completedTasks, feedback, branch);
|
|
4038
4039
|
}
|
|
@@ -6,7 +6,7 @@ import {
|
|
|
6
6
|
getAllConfigSchemaEntries,
|
|
7
7
|
getConfigDefaultValue,
|
|
8
8
|
parseConfigValue
|
|
9
|
-
} from "./chunk-
|
|
9
|
+
} from "./chunk-BT5FKIZX.mjs";
|
|
10
10
|
import {
|
|
11
11
|
editorInput
|
|
12
12
|
} from "./chunk-OGEXYSFS.mjs";
|
|
@@ -41,14 +41,14 @@ import {
|
|
|
41
41
|
updateTask,
|
|
42
42
|
updateTaskStatus,
|
|
43
43
|
validateImportTasks
|
|
44
|
-
} from "./chunk-
|
|
44
|
+
} from "./chunk-D6QZNEYN.mjs";
|
|
45
45
|
import {
|
|
46
46
|
SignalParser,
|
|
47
47
|
buildTicketRefinePrompt,
|
|
48
48
|
processLifecycleAdapter,
|
|
49
49
|
providerDisplayName,
|
|
50
50
|
resolveProvider
|
|
51
|
-
} from "./chunk-
|
|
51
|
+
} from "./chunk-62HYDA7L.mjs";
|
|
52
52
|
import {
|
|
53
53
|
fetchIssueFromUrl,
|
|
54
54
|
formatIssueContext,
|
|
@@ -185,7 +185,7 @@ import {
|
|
|
185
185
|
// package.json
|
|
186
186
|
var package_default = {
|
|
187
187
|
name: "ralphctl",
|
|
188
|
-
version: "0.
|
|
188
|
+
version: "0.5.0",
|
|
189
189
|
description: "Agent harness for long-running AI coding tasks \u2014 orchestrates Claude Code & GitHub Copilot across repositories",
|
|
190
190
|
homepage: "https://github.com/lukas-grigis/ralphctl",
|
|
191
191
|
type: "module",
|
|
@@ -2214,7 +2214,7 @@ async function selectProject(message = "Select project:") {
|
|
|
2214
2214
|
default: true
|
|
2215
2215
|
});
|
|
2216
2216
|
if (create) {
|
|
2217
|
-
const { projectAddCommand } = await import("./add-
|
|
2217
|
+
const { projectAddCommand } = await import("./add-67UFUI54.mjs");
|
|
2218
2218
|
await projectAddCommand({ interactive: true });
|
|
2219
2219
|
const updated = await listProjects();
|
|
2220
2220
|
if (updated.length === 0) return null;
|
package/dist/cli.mjs
CHANGED
|
@@ -41,10 +41,10 @@ import {
|
|
|
41
41
|
ticketRefineCommand,
|
|
42
42
|
ticketRemoveCommand,
|
|
43
43
|
ticketShowCommand
|
|
44
|
-
} from "./chunk-
|
|
44
|
+
} from "./chunk-ZE2BRQA2.mjs";
|
|
45
45
|
import {
|
|
46
46
|
projectAddCommand
|
|
47
|
-
} from "./chunk-
|
|
47
|
+
} from "./chunk-BT5FKIZX.mjs";
|
|
48
48
|
import {
|
|
49
49
|
sprintCreateCommand
|
|
50
50
|
} from "./chunk-FNAAA32W.mjs";
|
|
@@ -56,8 +56,8 @@ import {
|
|
|
56
56
|
executePipeline,
|
|
57
57
|
getTasks,
|
|
58
58
|
sprintStartCommand
|
|
59
|
-
} from "./chunk-
|
|
60
|
-
import "./chunk-
|
|
59
|
+
} from "./chunk-D6QZNEYN.mjs";
|
|
60
|
+
import "./chunk-62HYDA7L.mjs";
|
|
61
61
|
import {
|
|
62
62
|
truncate
|
|
63
63
|
} from "./chunk-GQ2WFKBN.mjs";
|
|
@@ -756,7 +756,7 @@ async function main() {
|
|
|
756
756
|
const isBare = argv.length <= 2;
|
|
757
757
|
const isInteractive = argv[2] === "interactive";
|
|
758
758
|
if (isBare || isInteractive) {
|
|
759
|
-
const { mountInkApp } = await import("./mount-
|
|
759
|
+
const { mountInkApp } = await import("./mount-NCYR22SN.mjs");
|
|
760
760
|
const { fallback } = await mountInkApp({ initialView: "repl" });
|
|
761
761
|
if (!fallback) return;
|
|
762
762
|
printBanner();
|
|
@@ -767,10 +767,10 @@ async function main() {
|
|
|
767
767
|
return;
|
|
768
768
|
}
|
|
769
769
|
if (argv[2] === "sprint" && argv[3] === "start") {
|
|
770
|
-
const { parseSprintStartArgs } = await import("./start-
|
|
770
|
+
const { parseSprintStartArgs } = await import("./start-T34NI3LF.mjs");
|
|
771
771
|
const parsed = parseSprintStartArgs(argv.slice(4));
|
|
772
772
|
if (parsed.ok) {
|
|
773
|
-
const { mountInkApp } = await import("./mount-
|
|
773
|
+
const { mountInkApp } = await import("./mount-NCYR22SN.mjs");
|
|
774
774
|
const { getSharedDeps: getSharedDeps2 } = await import("./bootstrap-FMHG6DRY.mjs");
|
|
775
775
|
let sprintId;
|
|
776
776
|
try {
|
|
@@ -62,7 +62,7 @@ import {
|
|
|
62
62
|
ticketRemoveCommand,
|
|
63
63
|
ticketShowCommand,
|
|
64
64
|
useCurrentPrompt
|
|
65
|
-
} from "./chunk-
|
|
65
|
+
} from "./chunk-ZE2BRQA2.mjs";
|
|
66
66
|
import {
|
|
67
67
|
PromptCancelledError,
|
|
68
68
|
detectCheckScriptCandidates,
|
|
@@ -73,7 +73,7 @@ import {
|
|
|
73
73
|
projectAddCommand,
|
|
74
74
|
suggestCheckScript,
|
|
75
75
|
validateConfigValue
|
|
76
|
-
} from "./chunk-
|
|
76
|
+
} from "./chunk-BT5FKIZX.mjs";
|
|
77
77
|
import {
|
|
78
78
|
sprintCreateCommand
|
|
79
79
|
} from "./chunk-FNAAA32W.mjs";
|
|
@@ -99,7 +99,7 @@ import {
|
|
|
99
99
|
reorderTask,
|
|
100
100
|
sprintStartCommand,
|
|
101
101
|
updateTaskStatus
|
|
102
|
-
} from "./chunk-
|
|
102
|
+
} from "./chunk-D6QZNEYN.mjs";
|
|
103
103
|
import {
|
|
104
104
|
ProviderAiSessionAdapter,
|
|
105
105
|
SignalParser,
|
|
@@ -107,7 +107,7 @@ import {
|
|
|
107
107
|
exitAltScreen,
|
|
108
108
|
registerTuiInstance,
|
|
109
109
|
withSuspendedTui
|
|
110
|
-
} from "./chunk-
|
|
110
|
+
} from "./chunk-62HYDA7L.mjs";
|
|
111
111
|
import {
|
|
112
112
|
addTicket,
|
|
113
113
|
allRequirementsApproved,
|
|
@@ -2,8 +2,8 @@
|
|
|
2
2
|
import {
|
|
3
3
|
parseSprintStartArgs,
|
|
4
4
|
sprintStartCommand
|
|
5
|
-
} from "./chunk-
|
|
6
|
-
import "./chunk-
|
|
5
|
+
} from "./chunk-D6QZNEYN.mjs";
|
|
6
|
+
import "./chunk-62HYDA7L.mjs";
|
|
7
7
|
import "./chunk-GQ2WFKBN.mjs";
|
|
8
8
|
import "./chunk-CFUVE2BP.mjs";
|
|
9
9
|
import "./chunk-747KW2RW.mjs";
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "ralphctl",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.5.0",
|
|
4
4
|
"description": "Agent harness for long-running AI coding tasks — orchestrates Claude Code & GitHub Copilot across repositories",
|
|
5
5
|
"homepage": "https://github.com/lukas-grigis/ralphctl",
|
|
6
6
|
"type": "module",
|