ralph-research 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +98 -0
- package/dist/adapters/extractor/command-extractor.d.ts +9 -0
- package/dist/adapters/extractor/command-extractor.js +93 -0
- package/dist/adapters/extractor/command-extractor.js.map +1 -0
- package/dist/adapters/extractor/llm-judge-extractor.d.ts +9 -0
- package/dist/adapters/extractor/llm-judge-extractor.js +12 -0
- package/dist/adapters/extractor/llm-judge-extractor.js.map +1 -0
- package/dist/adapters/fs/json-file-decision-store.d.ts +10 -0
- package/dist/adapters/fs/json-file-decision-store.js +53 -0
- package/dist/adapters/fs/json-file-decision-store.js.map +1 -0
- package/dist/adapters/fs/json-file-frontier-store.d.ts +8 -0
- package/dist/adapters/fs/json-file-frontier-store.js +29 -0
- package/dist/adapters/fs/json-file-frontier-store.js.map +1 -0
- package/dist/adapters/fs/json-file-run-store.d.ts +10 -0
- package/dist/adapters/fs/json-file-run-store.js +53 -0
- package/dist/adapters/fs/json-file-run-store.js.map +1 -0
- package/dist/adapters/fs/lockfile.d.ts +24 -0
- package/dist/adapters/fs/lockfile.js +110 -0
- package/dist/adapters/fs/lockfile.js.map +1 -0
- package/dist/adapters/fs/manifest-loader.d.ts +10 -0
- package/dist/adapters/fs/manifest-loader.js +43 -0
- package/dist/adapters/fs/manifest-loader.js.map +1 -0
- package/dist/adapters/git/git-client.d.ts +9 -0
- package/dist/adapters/git/git-client.js +23 -0
- package/dist/adapters/git/git-client.js.map +1 -0
- package/dist/adapters/index.d.ts +1 -0
- package/dist/adapters/index.js +3 -0
- package/dist/adapters/index.js.map +1 -0
- package/dist/adapters/judge/llm-judge-provider.d.ts +33 -0
- package/dist/adapters/judge/llm-judge-provider.js +90 -0
- package/dist/adapters/judge/llm-judge-provider.js.map +1 -0
- package/dist/adapters/proposer/command-proposer.d.ts +15 -0
- package/dist/adapters/proposer/command-proposer.js +29 -0
- package/dist/adapters/proposer/command-proposer.js.map +1 -0
- package/dist/app/context.d.ts +5 -0
- package/dist/app/context.js +7 -0
- package/dist/app/context.js.map +1 -0
- package/dist/app/services/manual-decision-service.d.ts +20 -0
- package/dist/app/services/manual-decision-service.js +143 -0
- package/dist/app/services/manual-decision-service.js.map +1 -0
- package/dist/app/services/project-state-service.d.ts +52 -0
- package/dist/app/services/project-state-service.js +92 -0
- package/dist/app/services/project-state-service.js.map +1 -0
- package/dist/app/services/run-cycle-service.d.ts +25 -0
- package/dist/app/services/run-cycle-service.js +69 -0
- package/dist/app/services/run-cycle-service.js.map +1 -0
- package/dist/cli/commands/accept.d.ts +10 -0
- package/dist/cli/commands/accept.js +54 -0
- package/dist/cli/commands/accept.js.map +1 -0
- package/dist/cli/commands/demo.d.ts +9 -0
- package/dist/cli/commands/demo.js +108 -0
- package/dist/cli/commands/demo.js.map +1 -0
- package/dist/cli/commands/frontier.d.ts +8 -0
- package/dist/cli/commands/frontier.js +48 -0
- package/dist/cli/commands/frontier.js.map +1 -0
- package/dist/cli/commands/init.d.ts +10 -0
- package/dist/cli/commands/init.js +123 -0
- package/dist/cli/commands/init.js.map +1 -0
- package/dist/cli/commands/inspect.d.ts +8 -0
- package/dist/cli/commands/inspect.js +55 -0
- package/dist/cli/commands/inspect.js.map +1 -0
- package/dist/cli/commands/reject.d.ts +10 -0
- package/dist/cli/commands/reject.js +54 -0
- package/dist/cli/commands/reject.js.map +1 -0
- package/dist/cli/commands/run.d.ts +13 -0
- package/dist/cli/commands/run.js +71 -0
- package/dist/cli/commands/run.js.map +1 -0
- package/dist/cli/commands/serve-mcp.d.ts +7 -0
- package/dist/cli/commands/serve-mcp.js +32 -0
- package/dist/cli/commands/serve-mcp.js.map +1 -0
- package/dist/cli/commands/status.d.ts +8 -0
- package/dist/cli/commands/status.js +53 -0
- package/dist/cli/commands/status.js.map +1 -0
- package/dist/cli/commands/validate.d.ts +11 -0
- package/dist/cli/commands/validate.js +56 -0
- package/dist/cli/commands/validate.js.map +1 -0
- package/dist/cli/main.d.ts +2 -0
- package/dist/cli/main.js +38 -0
- package/dist/cli/main.js.map +1 -0
- package/dist/core/engine/anchor-checker.d.ts +35 -0
- package/dist/core/engine/anchor-checker.js +84 -0
- package/dist/core/engine/anchor-checker.js.map +1 -0
- package/dist/core/engine/audit-sampler.d.ts +16 -0
- package/dist/core/engine/audit-sampler.js +25 -0
- package/dist/core/engine/audit-sampler.js.map +1 -0
- package/dist/core/engine/change-budget.d.ts +11 -0
- package/dist/core/engine/change-budget.js +10 -0
- package/dist/core/engine/change-budget.js.map +1 -0
- package/dist/core/engine/cycle-runner.d.ts +39 -0
- package/dist/core/engine/cycle-runner.js +652 -0
- package/dist/core/engine/cycle-runner.js.map +1 -0
- package/dist/core/engine/experiment-runner.d.ts +13 -0
- package/dist/core/engine/experiment-runner.js +24 -0
- package/dist/core/engine/experiment-runner.js.map +1 -0
- package/dist/core/engine/history-compactor.d.ts +15 -0
- package/dist/core/engine/history-compactor.js +76 -0
- package/dist/core/engine/history-compactor.js.map +1 -0
- package/dist/core/engine/judge-pack.d.ts +44 -0
- package/dist/core/engine/judge-pack.js +111 -0
- package/dist/core/engine/judge-pack.js.map +1 -0
- package/dist/core/engine/parallel-proposer.d.ts +21 -0
- package/dist/core/engine/parallel-proposer.js +58 -0
- package/dist/core/engine/parallel-proposer.js.map +1 -0
- package/dist/core/engine/scope-checker.d.ts +35 -0
- package/dist/core/engine/scope-checker.js +166 -0
- package/dist/core/engine/scope-checker.js.map +1 -0
- package/dist/core/engine/workspace-manager.d.ts +32 -0
- package/dist/core/engine/workspace-manager.js +145 -0
- package/dist/core/engine/workspace-manager.js.map +1 -0
- package/dist/core/index.d.ts +1 -0
- package/dist/core/index.js +3 -0
- package/dist/core/index.js.map +1 -0
- package/dist/core/manifest/defaults.d.ts +55 -0
- package/dist/core/manifest/defaults.js +56 -0
- package/dist/core/manifest/defaults.js.map +1 -0
- package/dist/core/manifest/schema.d.ts +647 -0
- package/dist/core/manifest/schema.js +254 -0
- package/dist/core/manifest/schema.js.map +1 -0
- package/dist/core/model/decision-record.d.ts +38 -0
- package/dist/core/model/decision-record.js +29 -0
- package/dist/core/model/decision-record.js.map +1 -0
- package/dist/core/model/frontier-entry.d.ts +24 -0
- package/dist/core/model/frontier-entry.js +15 -0
- package/dist/core/model/frontier-entry.js.map +1 -0
- package/dist/core/model/metric.d.ts +13 -0
- package/dist/core/model/metric.js +10 -0
- package/dist/core/model/metric.js.map +1 -0
- package/dist/core/model/run-record.d.ts +110 -0
- package/dist/core/model/run-record.js +104 -0
- package/dist/core/model/run-record.js.map +1 -0
- package/dist/core/ports/decision-store.d.ts +6 -0
- package/dist/core/ports/decision-store.js +2 -0
- package/dist/core/ports/decision-store.js.map +1 -0
- package/dist/core/ports/frontier-store.d.ts +5 -0
- package/dist/core/ports/frontier-store.js +2 -0
- package/dist/core/ports/frontier-store.js.map +1 -0
- package/dist/core/ports/run-store.d.ts +6 -0
- package/dist/core/ports/run-store.js +2 -0
- package/dist/core/ports/run-store.js.map +1 -0
- package/dist/core/state/constraint-engine.d.ts +18 -0
- package/dist/core/state/constraint-engine.js +42 -0
- package/dist/core/state/constraint-engine.js.map +1 -0
- package/dist/core/state/frontier-engine.d.ts +24 -0
- package/dist/core/state/frontier-engine.js +178 -0
- package/dist/core/state/frontier-engine.js.map +1 -0
- package/dist/core/state/ratchet-engine.d.ts +28 -0
- package/dist/core/state/ratchet-engine.js +177 -0
- package/dist/core/state/ratchet-engine.js.map +1 -0
- package/dist/core/state/run-state-machine.d.ts +17 -0
- package/dist/core/state/run-state-machine.js +94 -0
- package/dist/core/state/run-state-machine.js.map +1 -0
- package/dist/mcp/main.d.ts +1 -0
- package/dist/mcp/main.js +8 -0
- package/dist/mcp/main.js.map +1 -0
- package/dist/mcp/server.d.ts +6 -0
- package/dist/mcp/server.js +97 -0
- package/dist/mcp/server.js.map +1 -0
- package/dist/shared/fs-errors.d.ts +1 -0
- package/dist/shared/fs-errors.js +4 -0
- package/dist/shared/fs-errors.js.map +1 -0
- package/dist/shared/logger.d.ts +2 -0
- package/dist/shared/logger.js +5 -0
- package/dist/shared/logger.js.map +1 -0
- package/dist/shared/template-utils.d.ts +9 -0
- package/dist/shared/template-utils.js +50 -0
- package/dist/shared/template-utils.js.map +1 -0
- package/package.json +44 -0
- package/templates/writing/docs/draft.md +1 -0
- package/templates/writing/prompts/judge.md +15 -0
- package/templates/writing/ralph.yaml +63 -0
- package/templates/writing/scripts/experiment.mjs +6 -0
- package/templates/writing/scripts/metric.mjs +24 -0
- package/templates/writing/scripts/propose.mjs +13 -0
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
import { RunCycleService } from "../../app/services/run-cycle-service.js";
|
|
2
|
+
const defaultCommandIO = {
|
|
3
|
+
stdout: (message) => {
|
|
4
|
+
process.stdout.write(`${message}\n`);
|
|
5
|
+
},
|
|
6
|
+
stderr: (message) => {
|
|
7
|
+
process.stderr.write(`${message}\n`);
|
|
8
|
+
},
|
|
9
|
+
};
|
|
10
|
+
export async function runRunCommand(options, io = defaultCommandIO) {
|
|
11
|
+
try {
|
|
12
|
+
const service = new RunCycleService();
|
|
13
|
+
const cycles = options.cycles ?? 1;
|
|
14
|
+
const results = [];
|
|
15
|
+
for (let index = 0; index < cycles; index += 1) {
|
|
16
|
+
const result = await service.run({
|
|
17
|
+
repoRoot: process.cwd(),
|
|
18
|
+
...(options.path ? { manifestPath: options.path } : {}),
|
|
19
|
+
...(options.resume ? { resume: options.resume } : {}),
|
|
20
|
+
});
|
|
21
|
+
results.push(result);
|
|
22
|
+
if (result.status === "failed" || result.status === "resume_required") {
|
|
23
|
+
if (options.json) {
|
|
24
|
+
io.stdout(JSON.stringify({ ok: false, results }, null, 2));
|
|
25
|
+
}
|
|
26
|
+
else {
|
|
27
|
+
io.stderr(`Cycle ${index + 1} ended with status ${result.status}`);
|
|
28
|
+
}
|
|
29
|
+
return 1;
|
|
30
|
+
}
|
|
31
|
+
}
|
|
32
|
+
if (options.json) {
|
|
33
|
+
io.stdout(JSON.stringify({
|
|
34
|
+
ok: true,
|
|
35
|
+
cycles,
|
|
36
|
+
results,
|
|
37
|
+
}, null, 2));
|
|
38
|
+
}
|
|
39
|
+
else {
|
|
40
|
+
const latest = results.at(-1);
|
|
41
|
+
io.stdout(`Completed ${cycles} cycle(s); latest status=${latest?.status ?? "unknown"}`);
|
|
42
|
+
}
|
|
43
|
+
return 0;
|
|
44
|
+
}
|
|
45
|
+
catch (error) {
|
|
46
|
+
const message = error instanceof Error ? error.message : "Failed to run cycle";
|
|
47
|
+
if (options.json) {
|
|
48
|
+
io.stderr(JSON.stringify({ ok: false, error: message }, null, 2));
|
|
49
|
+
}
|
|
50
|
+
else {
|
|
51
|
+
io.stderr(message);
|
|
52
|
+
}
|
|
53
|
+
return 1;
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
export function registerRunCommand(program) {
|
|
57
|
+
program
|
|
58
|
+
.command("run")
|
|
59
|
+
.description("Run one or more research cycles.")
|
|
60
|
+
.option("-p, --path <path>", "Path to the manifest file")
|
|
61
|
+
.option("-c, --cycles <count>", "Number of cycles to run", (value) => Number.parseInt(value, 10), 1)
|
|
62
|
+
.option("--resume", "Resume if the latest run is recoverable", false)
|
|
63
|
+
.option("--json", "Emit machine-readable output", false)
|
|
64
|
+
.action(async (options) => {
|
|
65
|
+
const exitCode = await runRunCommand(options);
|
|
66
|
+
if (exitCode !== 0) {
|
|
67
|
+
process.exitCode = exitCode;
|
|
68
|
+
}
|
|
69
|
+
});
|
|
70
|
+
}
|
|
71
|
+
//# sourceMappingURL=run.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"run.js","sourceRoot":"","sources":["../../../src/cli/commands/run.ts"],"names":[],"mappings":"AAEA,OAAO,EAAE,eAAe,EAAE,MAAM,yCAAyC,CAAC;AAc1E,MAAM,gBAAgB,GAAc;IAClC,MAAM,EAAE,CAAC,OAAO,EAAE,EAAE;QAClB,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,GAAG,OAAO,IAAI,CAAC,CAAC;IACvC,CAAC;IACD,MAAM,EAAE,CAAC,OAAO,EAAE,EAAE;QAClB,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,GAAG,OAAO,IAAI,CAAC,CAAC;IACvC,CAAC;CACF,CAAC;AAEF,MAAM,CAAC,KAAK,UAAU,aAAa,CACjC,OAA0B,EAC1B,KAAgB,gBAAgB;IAEhC,IAAI,CAAC;QACH,MAAM,OAAO,GAAG,IAAI,eAAe,EAAE,CAAC;QACtC,MAAM,MAAM,GAAG,OAAO,CAAC,MAAM,IAAI,CAAC,CAAC;QACnC,MAAM,OAAO,GAAG,EAAE,CAAC;QAEnB,KAAK,IAAI,KAAK,GAAG,CAAC,EAAE,KAAK,GAAG,MAAM,EAAE,KAAK,IAAI,CAAC,EAAE,CAAC;YAC/C,MAAM,MAAM,GAAG,MAAM,OAAO,CAAC,GAAG,CAAC;gBAC/B,QAAQ,EAAE,OAAO,CAAC,GAAG,EAAE;gBACvB,GAAG,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,YAAY,EAAE,OAAO,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;gBACvD,GAAG,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,MAAM,EAAE,OAAO,CAAC,MAAM,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;aACtD,CAAC,CAAC;YACH,OAAO,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;YAErB,IAAI,MAAM,CAAC,MAAM,KAAK,QAAQ,IAAI,MAAM,CAAC,MAAM,KAAK,iBAAiB,EAAE,CAAC;gBACtE,IAAI,OAAO,CAAC,IAAI,EAAE,CAAC;oBACjB,EAAE,CAAC,MAAM,CAAC,IAAI,CAAC,SAAS,CAAC,EAAE,EAAE,EAAE,KAAK,EAAE,OAAO,EAAE,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC,CAAC;gBAC7D,CAAC;qBAAM,CAAC;oBACN,EAAE,CAAC,MAAM,CAAC,SAAS,KAAK,GAAG,CAAC,sBAAsB,MAAM,CAAC,MAAM,EAAE,CAAC,CAAC;gBACrE,CAAC;gBACD,OAAO,CAAC,CAAC;YACX,CAAC;QACH,CAAC;QAED,IAAI,OAAO,CAAC,IAAI,EAAE,CAAC;YACjB,EAAE,CAAC,MAAM,CACP,IAAI,CAAC,SAAS,CACZ;gBACE,EAAE,EAAE,IAAI;gBACR,MAAM;gBACN,OAAO;aACR,EACD,IAAI,EACJ,CAAC,CACF,CACF,CAAC;QACJ,CAAC;aAAM,CAAC;YACN,MAAM,MAAM,GAAG,OAAO,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC;YAC9B,EAAE,CAAC,MAAM,CAAC,aAAa,MAAM,4BAA4B,MAAM,EAAE,MAAM,IAAI,SAAS,EAAE,CAAC,CAAC;QAC1F,CAAC;QAED,OAAO,CAAC,CAAC;IACX,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,MAAM,OAAO,GAAG,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,qBAAqB,CAAC;QAC/E,IAAI,OAAO,CAAC,IAAI,EAAE,CAAC;YACjB,EAAE,CAAC,MAAM,CAAC,IAAI,CAAC,SAAS,CAAC,EAAE,EAAE,EAAE,KAAK,EAAE,KAAK,EAAE,OAAO,EAAE,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC,CAAC;QACpE,CAAC;aAAM,CAAC;YACN,EAAE,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC;QACrB,CAAC;QACD,OAAO,CAAC,CAAC;IACX,CAAC;AACH,CAAC;AAED,MAAM,UAAU,kBAAkB,CAAC,OAAgB;IACjD,OAAO;SACJ,OAAO,CAAC,KAAK,CAAC;SACd,WAAW,CAAC,kCAAkC,CAAC;SAC/C,MAAM,CAAC,mBAAmB,EAAE,2BAA2B,CAAC;SACxD,MAAM,CAAC,sBAAsB,EAAE,yBAAyB,EAAE,CAAC,KAAK,EAAE,EAAE,CAAC,MAAM,CAAC,QAAQ,CAAC,KAAK,EAAE,EAAE,CAAC,EAAE,CAAC,CAAC;SACnG,MAAM,CAAC,UAAU,EAAE,yCAAyC,EAAE,KAAK,CAAC;SACpE,MAAM,CAAC,QAAQ,EAAE,8BAA8B,EAAE,KAAK,CAAC;SACvD,MAAM,CAAC,KAAK,EAAE,OAA0B,EAAE,EAAE;QAC3C,MAAM,QAAQ,GAAG,MAAM,aAAa,CAAC,OAAO,CAAC,CAAC;QAC9C,IAAI,QAAQ,KAAK,CAAC,EAAE,CAAC;YACnB,OAAO,CAAC,QAAQ,GAAG,QAAQ,CAAC;QAC9B,CAAC;IACH,CAAC,CAAC,CAAC;AACP,CAAC"}
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
import type { Command } from "commander";
|
|
2
|
+
import type { CommandIO } from "./run.js";
|
|
3
|
+
export interface ServeMcpCommandOptions {
|
|
4
|
+
stdio?: boolean;
|
|
5
|
+
}
|
|
6
|
+
export declare function runServeMcpCommand(options: ServeMcpCommandOptions, io?: CommandIO): Promise<number>;
|
|
7
|
+
export declare function registerServeMcpCommand(program: Command): void;
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
import { startMcpServer } from "../../mcp/server.js";
|
|
2
|
+
const defaultCommandIO = {
|
|
3
|
+
stdout: (message) => {
|
|
4
|
+
process.stdout.write(`${message}\n`);
|
|
5
|
+
},
|
|
6
|
+
stderr: (message) => {
|
|
7
|
+
process.stderr.write(`${message}\n`);
|
|
8
|
+
},
|
|
9
|
+
};
|
|
10
|
+
export async function runServeMcpCommand(options, io = defaultCommandIO) {
|
|
11
|
+
if (options.stdio === false) {
|
|
12
|
+
io.stderr("Only stdio transport is supported in v0.1");
|
|
13
|
+
return 1;
|
|
14
|
+
}
|
|
15
|
+
await startMcpServer({
|
|
16
|
+
repoRoot: process.cwd(),
|
|
17
|
+
});
|
|
18
|
+
return 0;
|
|
19
|
+
}
|
|
20
|
+
export function registerServeMcpCommand(program) {
|
|
21
|
+
program
|
|
22
|
+
.command("serve-mcp")
|
|
23
|
+
.description("Start the minimal MCP server over stdio.")
|
|
24
|
+
.option("--stdio", "Use stdio transport", true)
|
|
25
|
+
.action(async (options) => {
|
|
26
|
+
const exitCode = await runServeMcpCommand(options);
|
|
27
|
+
if (exitCode !== 0) {
|
|
28
|
+
process.exitCode = exitCode;
|
|
29
|
+
}
|
|
30
|
+
});
|
|
31
|
+
}
|
|
32
|
+
//# sourceMappingURL=serve-mcp.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"serve-mcp.js","sourceRoot":"","sources":["../../../src/cli/commands/serve-mcp.ts"],"names":[],"mappings":"AAEA,OAAO,EAAE,cAAc,EAAE,MAAM,qBAAqB,CAAC;AAOrD,MAAM,gBAAgB,GAAc;IAClC,MAAM,EAAE,CAAC,OAAO,EAAE,EAAE;QAClB,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,GAAG,OAAO,IAAI,CAAC,CAAC;IACvC,CAAC;IACD,MAAM,EAAE,CAAC,OAAO,EAAE,EAAE;QAClB,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,GAAG,OAAO,IAAI,CAAC,CAAC;IACvC,CAAC;CACF,CAAC;AAEF,MAAM,CAAC,KAAK,UAAU,kBAAkB,CACtC,OAA+B,EAC/B,KAAgB,gBAAgB;IAEhC,IAAI,OAAO,CAAC,KAAK,KAAK,KAAK,EAAE,CAAC;QAC5B,EAAE,CAAC,MAAM,CAAC,2CAA2C,CAAC,CAAC;QACvD,OAAO,CAAC,CAAC;IACX,CAAC;IAED,MAAM,cAAc,CAAC;QACnB,QAAQ,EAAE,OAAO,CAAC,GAAG,EAAE;KACxB,CAAC,CAAC;IAEH,OAAO,CAAC,CAAC;AACX,CAAC;AAED,MAAM,UAAU,uBAAuB,CAAC,OAAgB;IACtD,OAAO;SACJ,OAAO,CAAC,WAAW,CAAC;SACpB,WAAW,CAAC,0CAA0C,CAAC;SACvD,MAAM,CAAC,SAAS,EAAE,qBAAqB,EAAE,IAAI,CAAC;SAC9C,MAAM,CAAC,KAAK,EAAE,OAA+B,EAAE,EAAE;QAChD,MAAM,QAAQ,GAAG,MAAM,kBAAkB,CAAC,OAAO,CAAC,CAAC;QACnD,IAAI,QAAQ,KAAK,CAAC,EAAE,CAAC;YACnB,OAAO,CAAC,QAAQ,GAAG,QAAQ,CAAC;QAC9B,CAAC;IACH,CAAC,CAAC,CAAC;AACP,CAAC"}
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
import type { Command } from "commander";
|
|
2
|
+
import type { CommandIO } from "./run.js";
|
|
3
|
+
export interface StatusCommandOptions {
|
|
4
|
+
path?: string;
|
|
5
|
+
json?: boolean;
|
|
6
|
+
}
|
|
7
|
+
export declare function runStatusCommand(options: StatusCommandOptions, io?: CommandIO): Promise<number>;
|
|
8
|
+
export declare function registerStatusCommand(program: Command): void;
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
import { getProjectStatus } from "../../app/services/project-state-service.js";
|
|
2
|
+
const defaultCommandIO = {
|
|
3
|
+
stdout: (message) => {
|
|
4
|
+
process.stdout.write(`${message}\n`);
|
|
5
|
+
},
|
|
6
|
+
stderr: (message) => {
|
|
7
|
+
process.stderr.write(`${message}\n`);
|
|
8
|
+
},
|
|
9
|
+
};
|
|
10
|
+
export async function runStatusCommand(options, io = defaultCommandIO) {
|
|
11
|
+
try {
|
|
12
|
+
const status = await getProjectStatus({
|
|
13
|
+
repoRoot: process.cwd(),
|
|
14
|
+
...(options.path ? { manifestPath: options.path } : {}),
|
|
15
|
+
});
|
|
16
|
+
if (options.json) {
|
|
17
|
+
io.stdout(JSON.stringify(status, null, 2));
|
|
18
|
+
}
|
|
19
|
+
else {
|
|
20
|
+
io.stdout([
|
|
21
|
+
`manifest: ${status.manifestPath}`,
|
|
22
|
+
`latest run: ${status.latestRun?.runId ?? "none"} (${status.latestRun?.status ?? "n/a"})`,
|
|
23
|
+
`frontier entries: ${status.frontier.length}`,
|
|
24
|
+
`pending human: ${status.pendingHumanRuns.length}`,
|
|
25
|
+
].join("\n"));
|
|
26
|
+
}
|
|
27
|
+
return 0;
|
|
28
|
+
}
|
|
29
|
+
catch (error) {
|
|
30
|
+
const message = error instanceof Error ? error.message : "Failed to load project status";
|
|
31
|
+
if (options.json) {
|
|
32
|
+
io.stderr(JSON.stringify({ ok: false, error: message }, null, 2));
|
|
33
|
+
}
|
|
34
|
+
else {
|
|
35
|
+
io.stderr(message);
|
|
36
|
+
}
|
|
37
|
+
return 1;
|
|
38
|
+
}
|
|
39
|
+
}
|
|
40
|
+
export function registerStatusCommand(program) {
|
|
41
|
+
program
|
|
42
|
+
.command("status")
|
|
43
|
+
.description("Show project status, current frontier, and pending human review.")
|
|
44
|
+
.option("-p, --path <path>", "Path to the manifest file")
|
|
45
|
+
.option("--json", "Emit machine-readable output", false)
|
|
46
|
+
.action(async (options) => {
|
|
47
|
+
const exitCode = await runStatusCommand(options);
|
|
48
|
+
if (exitCode !== 0) {
|
|
49
|
+
process.exitCode = exitCode;
|
|
50
|
+
}
|
|
51
|
+
});
|
|
52
|
+
}
|
|
53
|
+
//# sourceMappingURL=status.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"status.js","sourceRoot":"","sources":["../../../src/cli/commands/status.ts"],"names":[],"mappings":"AAEA,OAAO,EAAE,gBAAgB,EAAE,MAAM,6CAA6C,CAAC;AAQ/E,MAAM,gBAAgB,GAAc;IAClC,MAAM,EAAE,CAAC,OAAO,EAAE,EAAE;QAClB,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,GAAG,OAAO,IAAI,CAAC,CAAC;IACvC,CAAC;IACD,MAAM,EAAE,CAAC,OAAO,EAAE,EAAE;QAClB,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,GAAG,OAAO,IAAI,CAAC,CAAC;IACvC,CAAC;CACF,CAAC;AAEF,MAAM,CAAC,KAAK,UAAU,gBAAgB,CACpC,OAA6B,EAC7B,KAAgB,gBAAgB;IAEhC,IAAI,CAAC;QACH,MAAM,MAAM,GAAG,MAAM,gBAAgB,CAAC;YACpC,QAAQ,EAAE,OAAO,CAAC,GAAG,EAAE;YACvB,GAAG,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,YAAY,EAAE,OAAO,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;SACxD,CAAC,CAAC;QAEH,IAAI,OAAO,CAAC,IAAI,EAAE,CAAC;YACjB,EAAE,CAAC,MAAM,CAAC,IAAI,CAAC,SAAS,CAAC,MAAM,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC,CAAC;QAC7C,CAAC;aAAM,CAAC;YACN,EAAE,CAAC,MAAM,CACP;gBACE,aAAa,MAAM,CAAC,YAAY,EAAE;gBAClC,eAAe,MAAM,CAAC,SAAS,EAAE,KAAK,IAAI,MAAM,KAAK,MAAM,CAAC,SAAS,EAAE,MAAM,IAAI,KAAK,GAAG;gBACzF,qBAAqB,MAAM,CAAC,QAAQ,CAAC,MAAM,EAAE;gBAC7C,kBAAkB,MAAM,CAAC,gBAAgB,CAAC,MAAM,EAAE;aACnD,CAAC,IAAI,CAAC,IAAI,CAAC,CACb,CAAC;QACJ,CAAC;QAED,OAAO,CAAC,CAAC;IACX,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,MAAM,OAAO,GAAG,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,+BAA+B,CAAC;QACzF,IAAI,OAAO,CAAC,IAAI,EAAE,CAAC;YACjB,EAAE,CAAC,MAAM,CAAC,IAAI,CAAC,SAAS,CAAC,EAAE,EAAE,EAAE,KAAK,EAAE,KAAK,EAAE,OAAO,EAAE,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC,CAAC;QACpE,CAAC;aAAM,CAAC;YACN,EAAE,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC;QACrB,CAAC;QACD,OAAO,CAAC,CAAC;IACX,CAAC;AACH,CAAC;AAED,MAAM,UAAU,qBAAqB,CAAC,OAAgB;IACpD,OAAO;SACJ,OAAO,CAAC,QAAQ,CAAC;SACjB,WAAW,CAAC,kEAAkE,CAAC;SAC/E,MAAM,CAAC,mBAAmB,EAAE,2BAA2B,CAAC;SACxD,MAAM,CAAC,QAAQ,EAAE,8BAA8B,EAAE,KAAK,CAAC;SACvD,MAAM,CAAC,KAAK,EAAE,OAA6B,EAAE,EAAE;QAC9C,MAAM,QAAQ,GAAG,MAAM,gBAAgB,CAAC,OAAO,CAAC,CAAC;QACjD,IAAI,QAAQ,KAAK,CAAC,EAAE,CAAC;YACnB,OAAO,CAAC,QAAQ,GAAG,QAAQ,CAAC;QAC9B,CAAC;IACH,CAAC,CAAC,CAAC;AACP,CAAC"}
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
import type { Command } from "commander";
|
|
2
|
+
export interface ValidateCommandOptions {
|
|
3
|
+
path: string;
|
|
4
|
+
json?: boolean;
|
|
5
|
+
}
|
|
6
|
+
export interface ValidateCommandIO {
|
|
7
|
+
stdout(message: string): void;
|
|
8
|
+
stderr(message: string): void;
|
|
9
|
+
}
|
|
10
|
+
export declare function runValidateCommand(options: ValidateCommandOptions, io?: ValidateCommandIO): Promise<number>;
|
|
11
|
+
export declare function registerValidateCommand(program: Command): void;
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
import { loadManifestFromFile, ManifestLoadError } from "../../adapters/fs/manifest-loader.js";
|
|
2
|
+
import { DEFAULT_MANIFEST_FILENAME } from "../../core/manifest/schema.js";
|
|
3
|
+
const defaultValidateCommandIO = {
|
|
4
|
+
stdout: (message) => {
|
|
5
|
+
process.stdout.write(`${message}\n`);
|
|
6
|
+
},
|
|
7
|
+
stderr: (message) => {
|
|
8
|
+
process.stderr.write(`${message}\n`);
|
|
9
|
+
},
|
|
10
|
+
};
|
|
11
|
+
export async function runValidateCommand(options, io = defaultValidateCommandIO) {
|
|
12
|
+
try {
|
|
13
|
+
const loaded = await loadManifestFromFile(options.path);
|
|
14
|
+
if (options.json) {
|
|
15
|
+
io.stdout(JSON.stringify({
|
|
16
|
+
ok: true,
|
|
17
|
+
path: loaded.path,
|
|
18
|
+
project: loaded.manifest.project.name,
|
|
19
|
+
schemaVersion: loaded.manifest.schemaVersion,
|
|
20
|
+
}, null, 2));
|
|
21
|
+
}
|
|
22
|
+
else {
|
|
23
|
+
io.stdout(`Manifest is valid: ${loaded.path}`);
|
|
24
|
+
}
|
|
25
|
+
return 0;
|
|
26
|
+
}
|
|
27
|
+
catch (error) {
|
|
28
|
+
const message = error instanceof ManifestLoadError ? error.message : "Unknown validation error";
|
|
29
|
+
const details = error instanceof ManifestLoadError && error.causeValue ? `\n${JSON.stringify(error.causeValue, null, 2)}` : "";
|
|
30
|
+
if (options.json) {
|
|
31
|
+
io.stderr(JSON.stringify({
|
|
32
|
+
ok: false,
|
|
33
|
+
error: message,
|
|
34
|
+
details: error instanceof ManifestLoadError ? error.causeValue : undefined,
|
|
35
|
+
}, null, 2));
|
|
36
|
+
}
|
|
37
|
+
else {
|
|
38
|
+
io.stderr(`${message}${details}`);
|
|
39
|
+
}
|
|
40
|
+
return 1;
|
|
41
|
+
}
|
|
42
|
+
}
|
|
43
|
+
export function registerValidateCommand(program) {
|
|
44
|
+
program
|
|
45
|
+
.command("validate")
|
|
46
|
+
.description("Validate a ralph-research manifest.")
|
|
47
|
+
.option("-p, --path <path>", "Path to the manifest file", DEFAULT_MANIFEST_FILENAME)
|
|
48
|
+
.option("--json", "Emit machine-readable output", false)
|
|
49
|
+
.action(async (options) => {
|
|
50
|
+
const exitCode = await runValidateCommand(options);
|
|
51
|
+
if (exitCode !== 0) {
|
|
52
|
+
process.exitCode = exitCode;
|
|
53
|
+
}
|
|
54
|
+
});
|
|
55
|
+
}
|
|
56
|
+
//# sourceMappingURL=validate.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"validate.js","sourceRoot":"","sources":["../../../src/cli/commands/validate.ts"],"names":[],"mappings":"AAEA,OAAO,EAAE,oBAAoB,EAAE,iBAAiB,EAAE,MAAM,sCAAsC,CAAC;AAC/F,OAAO,EAAE,yBAAyB,EAAE,MAAM,+BAA+B,CAAC;AAY1E,MAAM,wBAAwB,GAAsB;IAClD,MAAM,EAAE,CAAC,OAAO,EAAE,EAAE;QAClB,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,GAAG,OAAO,IAAI,CAAC,CAAC;IACvC,CAAC;IACD,MAAM,EAAE,CAAC,OAAO,EAAE,EAAE;QAClB,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,GAAG,OAAO,IAAI,CAAC,CAAC;IACvC,CAAC;CACF,CAAC;AAEF,MAAM,CAAC,KAAK,UAAU,kBAAkB,CACtC,OAA+B,EAC/B,KAAwB,wBAAwB;IAEhD,IAAI,CAAC;QACH,MAAM,MAAM,GAAG,MAAM,oBAAoB,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC;QAExD,IAAI,OAAO,CAAC,IAAI,EAAE,CAAC;YACjB,EAAE,CAAC,MAAM,CACP,IAAI,CAAC,SAAS,CACZ;gBACE,EAAE,EAAE,IAAI;gBACR,IAAI,EAAE,MAAM,CAAC,IAAI;gBACjB,OAAO,EAAE,MAAM,CAAC,QAAQ,CAAC,OAAO,CAAC,IAAI;gBACrC,aAAa,EAAE,MAAM,CAAC,QAAQ,CAAC,aAAa;aAC7C,EACD,IAAI,EACJ,CAAC,CACF,CACF,CAAC;QACJ,CAAC;aAAM,CAAC;YACN,EAAE,CAAC,MAAM,CAAC,sBAAsB,MAAM,CAAC,IAAI,EAAE,CAAC,CAAC;QACjD,CAAC;QAED,OAAO,CAAC,CAAC;IACX,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,MAAM,OAAO,GAAG,KAAK,YAAY,iBAAiB,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,0BAA0B,CAAC;QAChG,MAAM,OAAO,GAAG,KAAK,YAAY,iBAAiB,IAAI,KAAK,CAAC,UAAU,CAAC,CAAC,CAAC,KAAK,IAAI,CAAC,SAAS,CAAC,KAAK,CAAC,UAAU,EAAE,IAAI,EAAE,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;QAE/H,IAAI,OAAO,CAAC,IAAI,EAAE,CAAC;YACjB,EAAE,CAAC,MAAM,CACP,IAAI,CAAC,SAAS,CACZ;gBACE,EAAE,EAAE,KAAK;gBACT,KAAK,EAAE,OAAO;gBACd,OAAO,EAAE,KAAK,YAAY,iBAAiB,CAAC,CAAC,CAAC,KAAK,CAAC,UAAU,CAAC,CAAC,CAAC,SAAS;aAC3E,EACD,IAAI,EACJ,CAAC,CACF,CACF,CAAC;QACJ,CAAC;aAAM,CAAC;YACN,EAAE,CAAC,MAAM,CAAC,GAAG,OAAO,GAAG,OAAO,EAAE,CAAC,CAAC;QACpC,CAAC;QAED,OAAO,CAAC,CAAC;IACX,CAAC;AACH,CAAC;AAED,MAAM,UAAU,uBAAuB,CAAC,OAAgB;IACtD,OAAO;SACJ,OAAO,CAAC,UAAU,CAAC;SACnB,WAAW,CAAC,qCAAqC,CAAC;SAClD,MAAM,CAAC,mBAAmB,EAAE,2BAA2B,EAAE,yBAAyB,CAAC;SACnF,MAAM,CAAC,QAAQ,EAAE,8BAA8B,EAAE,KAAK,CAAC;SACvD,MAAM,CAAC,KAAK,EAAE,OAA+B,EAAE,EAAE;QAChD,MAAM,QAAQ,GAAG,MAAM,kBAAkB,CAAC,OAAO,CAAC,CAAC;QACnD,IAAI,QAAQ,KAAK,CAAC,EAAE,CAAC;YACnB,OAAO,CAAC,QAAQ,GAAG,QAAQ,CAAC;QAC9B,CAAC;IACH,CAAC,CAAC,CAAC;AACP,CAAC"}
|
package/dist/cli/main.js
ADDED
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
import { Command } from "commander";
|
|
3
|
+
import { createAppContext } from "../app/context.js";
|
|
4
|
+
import { registerAcceptCommand } from "./commands/accept.js";
|
|
5
|
+
import { registerDemoCommand } from "./commands/demo.js";
|
|
6
|
+
import { registerFrontierCommand } from "./commands/frontier.js";
|
|
7
|
+
import { registerInitCommand } from "./commands/init.js";
|
|
8
|
+
import { registerInspectCommand } from "./commands/inspect.js";
|
|
9
|
+
import { registerRejectCommand } from "./commands/reject.js";
|
|
10
|
+
import { registerRunCommand } from "./commands/run.js";
|
|
11
|
+
import { registerServeMcpCommand } from "./commands/serve-mcp.js";
|
|
12
|
+
import { registerStatusCommand } from "./commands/status.js";
|
|
13
|
+
import { registerValidateCommand } from "./commands/validate.js";
|
|
14
|
+
import { logger } from "../shared/logger.js";
|
|
15
|
+
const program = new Command();
|
|
16
|
+
program
|
|
17
|
+
.name("rrx")
|
|
18
|
+
.description("Local-first runtime for recursive research improvement.")
|
|
19
|
+
.version("0.1.0");
|
|
20
|
+
program
|
|
21
|
+
.command("doctor")
|
|
22
|
+
.description("Print scaffold status.")
|
|
23
|
+
.action(() => {
|
|
24
|
+
const context = createAppContext();
|
|
25
|
+
logger.info({ appName: context.appName, phase: context.phase }, "scaffold ready");
|
|
26
|
+
});
|
|
27
|
+
registerValidateCommand(program);
|
|
28
|
+
registerInitCommand(program);
|
|
29
|
+
registerDemoCommand(program);
|
|
30
|
+
registerRunCommand(program);
|
|
31
|
+
registerStatusCommand(program);
|
|
32
|
+
registerFrontierCommand(program);
|
|
33
|
+
registerInspectCommand(program);
|
|
34
|
+
registerAcceptCommand(program);
|
|
35
|
+
registerRejectCommand(program);
|
|
36
|
+
registerServeMcpCommand(program);
|
|
37
|
+
await program.parseAsync(process.argv);
|
|
38
|
+
//# sourceMappingURL=main.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"main.js","sourceRoot":"","sources":["../../src/cli/main.ts"],"names":[],"mappings":";AAEA,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AAEpC,OAAO,EAAE,gBAAgB,EAAE,MAAM,mBAAmB,CAAC;AACrD,OAAO,EAAE,qBAAqB,EAAE,MAAM,sBAAsB,CAAC;AAC7D,OAAO,EAAE,mBAAmB,EAAE,MAAM,oBAAoB,CAAC;AACzD,OAAO,EAAE,uBAAuB,EAAE,MAAM,wBAAwB,CAAC;AACjE,OAAO,EAAE,mBAAmB,EAAE,MAAM,oBAAoB,CAAC;AACzD,OAAO,EAAE,sBAAsB,EAAE,MAAM,uBAAuB,CAAC;AAC/D,OAAO,EAAE,qBAAqB,EAAE,MAAM,sBAAsB,CAAC;AAC7D,OAAO,EAAE,kBAAkB,EAAE,MAAM,mBAAmB,CAAC;AACvD,OAAO,EAAE,uBAAuB,EAAE,MAAM,yBAAyB,CAAC;AAClE,OAAO,EAAE,qBAAqB,EAAE,MAAM,sBAAsB,CAAC;AAC7D,OAAO,EAAE,uBAAuB,EAAE,MAAM,wBAAwB,CAAC;AACjE,OAAO,EAAE,MAAM,EAAE,MAAM,qBAAqB,CAAC;AAE7C,MAAM,OAAO,GAAG,IAAI,OAAO,EAAE,CAAC;AAE9B,OAAO;KACJ,IAAI,CAAC,KAAK,CAAC;KACX,WAAW,CAAC,yDAAyD,CAAC;KACtE,OAAO,CAAC,OAAO,CAAC,CAAC;AAEpB,OAAO;KACJ,OAAO,CAAC,QAAQ,CAAC;KACjB,WAAW,CAAC,wBAAwB,CAAC;KACrC,MAAM,CAAC,GAAG,EAAE;IACX,MAAM,OAAO,GAAG,gBAAgB,EAAE,CAAC;IACnC,MAAM,CAAC,IAAI,CAAC,EAAE,OAAO,EAAE,OAAO,CAAC,OAAO,EAAE,KAAK,EAAE,OAAO,CAAC,KAAK,EAAE,EAAE,gBAAgB,CAAC,CAAC;AACpF,CAAC,CAAC,CAAC;AAEL,uBAAuB,CAAC,OAAO,CAAC,CAAC;AACjC,mBAAmB,CAAC,OAAO,CAAC,CAAC;AAC7B,mBAAmB,CAAC,OAAO,CAAC,CAAC;AAC7B,kBAAkB,CAAC,OAAO,CAAC,CAAC;AAC5B,qBAAqB,CAAC,OAAO,CAAC,CAAC;AAC/B,uBAAuB,CAAC,OAAO,CAAC,CAAC;AACjC,sBAAsB,CAAC,OAAO,CAAC,CAAC;AAChC,qBAAqB,CAAC,OAAO,CAAC,CAAC;AAC/B,qBAAqB,CAAC,OAAO,CAAC,CAAC;AAC/B,uBAAuB,CAAC,OAAO,CAAC,CAAC;AAEjC,MAAM,OAAO,CAAC,UAAU,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC"}
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
import { z } from "zod";
|
|
2
|
+
import type { JudgePack, LlmJudgeMetricExtractorConfig } from "../manifest/schema.js";
|
|
3
|
+
import type { JudgeProvider } from "../../adapters/judge/llm-judge-provider.js";
|
|
4
|
+
declare const anchorRecordSchema: z.ZodObject<{
|
|
5
|
+
id: z.ZodString;
|
|
6
|
+
prompt: z.ZodString;
|
|
7
|
+
expectedWinner: z.ZodEnum<{
|
|
8
|
+
candidate: "candidate";
|
|
9
|
+
incumbent: "incumbent";
|
|
10
|
+
tie: "tie";
|
|
11
|
+
}>;
|
|
12
|
+
}, z.core.$strip>;
|
|
13
|
+
export type AnchorRecord = z.infer<typeof anchorRecordSchema>;
|
|
14
|
+
export interface AnchorCheckResult {
|
|
15
|
+
checked: boolean;
|
|
16
|
+
passed: boolean;
|
|
17
|
+
agreement: number;
|
|
18
|
+
minAgreement: number;
|
|
19
|
+
autoAcceptAllowed: boolean;
|
|
20
|
+
reason: string;
|
|
21
|
+
sampleCount: number;
|
|
22
|
+
}
|
|
23
|
+
export interface EvaluateAnchorAgreementInput {
|
|
24
|
+
pack: JudgePack;
|
|
25
|
+
extractor: LlmJudgeMetricExtractorConfig;
|
|
26
|
+
provider: JudgeProvider;
|
|
27
|
+
anchors?: AnchorRecord[];
|
|
28
|
+
}
|
|
29
|
+
export declare function loadAnchorRecords(path: string): Promise<AnchorRecord[]>;
|
|
30
|
+
export declare function evaluateAnchorAgreement(input: EvaluateAnchorAgreementInput): Promise<AnchorCheckResult>;
|
|
31
|
+
export declare function applyAnchorAgreementGate(tentativeOutcome: "accepted" | "rejected" | "needs_human", anchorCheck: AnchorCheckResult): {
|
|
32
|
+
outcome: "accepted" | "rejected" | "needs_human";
|
|
33
|
+
reason: string;
|
|
34
|
+
};
|
|
35
|
+
export {};
|
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
import { readFile } from "node:fs/promises";
|
|
2
|
+
import { resolve } from "node:path";
|
|
3
|
+
import { z } from "zod";
|
|
4
|
+
import { evaluateJudgePack } from "./judge-pack.js";
|
|
5
|
+
const anchorRecordSchema = z.object({
|
|
6
|
+
id: z.string().min(1),
|
|
7
|
+
prompt: z.string().min(1),
|
|
8
|
+
expectedWinner: z.enum(["candidate", "incumbent", "tie"]),
|
|
9
|
+
});
|
|
10
|
+
export async function loadAnchorRecords(path) {
|
|
11
|
+
const raw = await readFile(resolve(path), "utf8");
|
|
12
|
+
return raw
|
|
13
|
+
.split("\n")
|
|
14
|
+
.map((line) => line.trim())
|
|
15
|
+
.filter(Boolean)
|
|
16
|
+
.map((line) => anchorRecordSchema.parse(JSON.parse(line)));
|
|
17
|
+
}
|
|
18
|
+
export async function evaluateAnchorAgreement(input) {
|
|
19
|
+
const anchors = input.anchors ?? [];
|
|
20
|
+
const minAgreement = input.pack.anchors?.minAgreementWithHuman ?? 0;
|
|
21
|
+
if (!input.pack.anchors || anchors.length === 0) {
|
|
22
|
+
return {
|
|
23
|
+
checked: false,
|
|
24
|
+
passed: true,
|
|
25
|
+
agreement: 1,
|
|
26
|
+
minAgreement,
|
|
27
|
+
autoAcceptAllowed: true,
|
|
28
|
+
reason: "no anchors configured",
|
|
29
|
+
sampleCount: 0,
|
|
30
|
+
};
|
|
31
|
+
}
|
|
32
|
+
let agreed = 0;
|
|
33
|
+
for (const anchor of anchors) {
|
|
34
|
+
const samples = [];
|
|
35
|
+
for (let repeat = 0; repeat < input.pack.repeats; repeat += 1) {
|
|
36
|
+
for (const judge of input.pack.judges) {
|
|
37
|
+
samples.push(await input.provider.evaluate({
|
|
38
|
+
mode: input.extractor.mode,
|
|
39
|
+
prompt: anchor.prompt,
|
|
40
|
+
model: judge.model,
|
|
41
|
+
}));
|
|
42
|
+
}
|
|
43
|
+
}
|
|
44
|
+
const aggregation = evaluateJudgePack({ pack: input.pack, samples });
|
|
45
|
+
const winner = aggregation.mode === "pairwise" ? aggregation.winner : aggregation.score >= 0.5 ? "candidate" : "incumbent";
|
|
46
|
+
if (winner === anchor.expectedWinner) {
|
|
47
|
+
agreed += 1;
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
const sampleCount = anchors.length;
|
|
51
|
+
const agreement = sampleCount === 0 ? 1 : agreed / sampleCount;
|
|
52
|
+
const passed = agreement >= minAgreement;
|
|
53
|
+
const autoAcceptAllowed = passed || !input.pack.audit.freezeAutoAcceptIfAnchorFails;
|
|
54
|
+
return {
|
|
55
|
+
checked: true,
|
|
56
|
+
passed,
|
|
57
|
+
agreement,
|
|
58
|
+
minAgreement,
|
|
59
|
+
autoAcceptAllowed,
|
|
60
|
+
reason: passed
|
|
61
|
+
? `anchor agreement ${agreement.toFixed(2)} passed threshold ${minAgreement.toFixed(2)}`
|
|
62
|
+
: `anchor agreement ${agreement.toFixed(2)} below threshold ${minAgreement.toFixed(2)}`,
|
|
63
|
+
sampleCount,
|
|
64
|
+
};
|
|
65
|
+
}
|
|
66
|
+
export function applyAnchorAgreementGate(tentativeOutcome, anchorCheck) {
|
|
67
|
+
if (tentativeOutcome !== "accepted") {
|
|
68
|
+
return {
|
|
69
|
+
outcome: tentativeOutcome,
|
|
70
|
+
reason: anchorCheck.reason,
|
|
71
|
+
};
|
|
72
|
+
}
|
|
73
|
+
if (!anchorCheck.autoAcceptAllowed) {
|
|
74
|
+
return {
|
|
75
|
+
outcome: "needs_human",
|
|
76
|
+
reason: `${anchorCheck.reason}; auto-accept disabled`,
|
|
77
|
+
};
|
|
78
|
+
}
|
|
79
|
+
return {
|
|
80
|
+
outcome: tentativeOutcome,
|
|
81
|
+
reason: anchorCheck.reason,
|
|
82
|
+
};
|
|
83
|
+
}
|
|
84
|
+
//# sourceMappingURL=anchor-checker.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"anchor-checker.js","sourceRoot":"","sources":["../../../src/core/engine/anchor-checker.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,MAAM,kBAAkB,CAAC;AAC5C,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AAEpC,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AAIxB,OAAO,EAAE,iBAAiB,EAAE,MAAM,iBAAiB,CAAC;AAEpD,MAAM,kBAAkB,GAAG,CAAC,CAAC,MAAM,CAAC;IAClC,EAAE,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,GAAG,CAAC,CAAC,CAAC;IACrB,MAAM,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,GAAG,CAAC,CAAC,CAAC;IACzB,cAAc,EAAE,CAAC,CAAC,IAAI,CAAC,CAAC,WAAW,EAAE,WAAW,EAAE,KAAK,CAAC,CAAC;CAC1D,CAAC,CAAC;AAqBH,MAAM,CAAC,KAAK,UAAU,iBAAiB,CAAC,IAAY;IAClD,MAAM,GAAG,GAAG,MAAM,QAAQ,CAAC,OAAO,CAAC,IAAI,CAAC,EAAE,MAAM,CAAC,CAAC;IAClD,OAAO,GAAG;SACP,KAAK,CAAC,IAAI,CAAC;SACX,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC;SAC1B,MAAM,CAAC,OAAO,CAAC;SACf,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,kBAAkB,CAAC,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;AAC/D,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,uBAAuB,CAAC,KAAmC;IAC/E,MAAM,OAAO,GAAG,KAAK,CAAC,OAAO,IAAI,EAAE,CAAC;IACpC,MAAM,YAAY,GAAG,KAAK,CAAC,IAAI,CAAC,OAAO,EAAE,qBAAqB,IAAI,CAAC,CAAC;IAEpE,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,OAAO,IAAI,OAAO,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QAChD,OAAO;YACL,OAAO,EAAE,KAAK;YACd,MAAM,EAAE,IAAI;YACZ,SAAS,EAAE,CAAC;YACZ,YAAY;YACZ,iBAAiB,EAAE,IAAI;YACvB,MAAM,EAAE,uBAAuB;YAC/B,WAAW,EAAE,CAAC;SACf,CAAC;IACJ,CAAC;IAED,IAAI,MAAM,GAAG,CAAC,CAAC;IACf,KAAK,MAAM,MAAM,IAAI,OAAO,EAAE,CAAC;QAC7B,MAAM,OAAO,GAAG,EAAE,CAAC;QACnB,KAAK,IAAI,MAAM,GAAG,CAAC,EAAE,MAAM,GAAG,KAAK,CAAC,IAAI,CAAC,OAAO,EAAE,MAAM,IAAI,CAAC,EAAE,CAAC;YAC9D,KAAK,MAAM,KAAK,IAAI,KAAK,CAAC,IAAI,CAAC,MAAM,EAAE,CAAC;gBACtC,OAAO,CAAC,IAAI,CACV,MAAM,KAAK,CAAC,QAAQ,CAAC,QAAQ,CAAC;oBAC5B,IAAI,EAAE,KAAK,CAAC,SAAS,CAAC,IAAI;oBAC1B,MAAM,EAAE,MAAM,CAAC,MAAM;oBACrB,KAAK,EAAE,KAAK,CAAC,KAAK;iBACnB,CAAC,CACH,CAAC;YACJ,CAAC;QACH,CAAC;QAED,MAAM,WAAW,GAAG,iBAAiB,CAAC,EAAE,IAAI,EAAE,KAAK,CAAC,IAAI,EAAE,OAAO,EAAE,CAAC,CAAC;QACrE,MAAM,MAAM,GAAG,WAAW,CAAC,IAAI,KAAK,UAAU,CAAC,CAAC,CAAC,WAAW,CAAC,MAAM,CAAC,CAAC,CAAC,WAAW,CAAC,KAAK,IAAI,GAAG,CAAC,CAAC,CAAC,WAAW,CAAC,CAAC,CAAC,WAAW,CAAC;QAC3H,IAAI,MAAM,KAAK,MAAM,CAAC,cAAc,EAAE,CAAC;YACrC,MAAM,IAAI,CAAC,CAAC;QACd,CAAC;IACH,CAAC;IAED,MAAM,WAAW,GAAG,OAAO,CAAC,MAAM,CAAC;IACnC,MAAM,SAAS,GAAG,WAAW,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,MAAM,GAAG,WAAW,CAAC;IAC/D,MAAM,MAAM,GAAG,SAAS,IAAI,YAAY,CAAC;IACzC,MAAM,iBAAiB,GAAG,MAAM,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,6BAA6B,CAAC;IAEpF,OAAO;QACL,OAAO,EAAE,IAAI;QACb,MAAM;QACN,SAAS;QACT,YAAY;QACZ,iBAAiB;QACjB,MAAM,EAAE,MAAM;YACZ,CAAC,CAAC,oBAAoB,SAAS,CAAC,OAAO,CAAC,CAAC,CAAC,qBAAqB,YAAY,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE;YACxF,CAAC,CAAC,oBAAoB,SAAS,CAAC,OAAO,CAAC,CAAC,CAAC,oBAAoB,YAAY,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE;QACzF,WAAW;KACZ,CAAC;AACJ,CAAC;AAED,MAAM,UAAU,wBAAwB,CACtC,gBAAyD,EACzD,WAA8B;IAE9B,IAAI,gBAAgB,KAAK,UAAU,EAAE,CAAC;QACpC,OAAO;YACL,OAAO,EAAE,gBAAgB;YACzB,MAAM,EAAE,WAAW,CAAC,MAAM;SAC3B,CAAC;IACJ,CAAC;IAED,IAAI,CAAC,WAAW,CAAC,iBAAiB,EAAE,CAAC;QACnC,OAAO;YACL,OAAO,EAAE,aAAa;YACtB,MAAM,EAAE,GAAG,WAAW,CAAC,MAAM,wBAAwB;SACtD,CAAC;IACJ,CAAC;IAED,OAAO;QACL,OAAO,EAAE,gBAAgB;QACzB,MAAM,EAAE,WAAW,CAAC,MAAM;KAC3B,CAAC;AACJ,CAAC"}
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
import type { JudgePack } from "../manifest/schema.js";
|
|
2
|
+
export interface AuditCandidate {
|
|
3
|
+
runId: string;
|
|
4
|
+
decisionId?: string;
|
|
5
|
+
outcome: "accepted" | "rejected" | "needs_human";
|
|
6
|
+
metricId: string;
|
|
7
|
+
reason: string;
|
|
8
|
+
}
|
|
9
|
+
export interface AuditQueueItem extends AuditCandidate {
|
|
10
|
+
auditId: string;
|
|
11
|
+
sampledAt: string;
|
|
12
|
+
sampleRate: number;
|
|
13
|
+
trigger: "sample_rate";
|
|
14
|
+
}
|
|
15
|
+
export declare function shouldSampleAudit(candidate: Pick<AuditCandidate, "runId" | "outcome">, sampleRate: number): boolean;
|
|
16
|
+
export declare function sampleAuditQueue(candidates: AuditCandidate[], pack: Pick<JudgePack, "audit">, sampledAt?: string): AuditQueueItem[];
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
export function shouldSampleAudit(candidate, sampleRate) {
|
|
2
|
+
if (sampleRate <= 0 || candidate.outcome === "needs_human") {
|
|
3
|
+
return false;
|
|
4
|
+
}
|
|
5
|
+
return stableFraction(`${candidate.runId}:${candidate.outcome}`) < sampleRate;
|
|
6
|
+
}
|
|
7
|
+
export function sampleAuditQueue(candidates, pack, sampledAt = new Date().toISOString()) {
|
|
8
|
+
return candidates
|
|
9
|
+
.filter((candidate) => shouldSampleAudit(candidate, pack.audit.sampleRate))
|
|
10
|
+
.map((candidate) => ({
|
|
11
|
+
...candidate,
|
|
12
|
+
auditId: `audit-${candidate.runId}`,
|
|
13
|
+
sampledAt,
|
|
14
|
+
sampleRate: pack.audit.sampleRate,
|
|
15
|
+
trigger: "sample_rate",
|
|
16
|
+
}));
|
|
17
|
+
}
|
|
18
|
+
function stableFraction(input) {
|
|
19
|
+
let hash = 0;
|
|
20
|
+
for (let index = 0; index < input.length; index += 1) {
|
|
21
|
+
hash = (hash * 31 + input.charCodeAt(index)) >>> 0;
|
|
22
|
+
}
|
|
23
|
+
return hash / 0x1_0000_0000;
|
|
24
|
+
}
|
|
25
|
+
//# sourceMappingURL=audit-sampler.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"audit-sampler.js","sourceRoot":"","sources":["../../../src/core/engine/audit-sampler.ts"],"names":[],"mappings":"AAiBA,MAAM,UAAU,iBAAiB,CAAC,SAAoD,EAAE,UAAkB;IACxG,IAAI,UAAU,IAAI,CAAC,IAAI,SAAS,CAAC,OAAO,KAAK,aAAa,EAAE,CAAC;QAC3D,OAAO,KAAK,CAAC;IACf,CAAC;IAED,OAAO,cAAc,CAAC,GAAG,SAAS,CAAC,KAAK,IAAI,SAAS,CAAC,OAAO,EAAE,CAAC,GAAG,UAAU,CAAC;AAChF,CAAC;AAED,MAAM,UAAU,gBAAgB,CAC9B,UAA4B,EAC5B,IAA8B,EAC9B,SAAS,GAAG,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;IAEpC,OAAO,UAAU;SACd,MAAM,CAAC,CAAC,SAAS,EAAE,EAAE,CAAC,iBAAiB,CAAC,SAAS,EAAE,IAAI,CAAC,KAAK,CAAC,UAAU,CAAC,CAAC;SAC1E,GAAG,CAAC,CAAC,SAAS,EAAE,EAAE,CAAC,CAAC;QACnB,GAAG,SAAS;QACZ,OAAO,EAAE,SAAS,SAAS,CAAC,KAAK,EAAE;QACnC,SAAS;QACT,UAAU,EAAE,IAAI,CAAC,KAAK,CAAC,UAAU;QACjC,OAAO,EAAE,aAAa;KACvB,CAAC,CAAC,CAAC;AACR,CAAC;AAED,SAAS,cAAc,CAAC,KAAa;IACnC,IAAI,IAAI,GAAG,CAAC,CAAC;IACb,KAAK,IAAI,KAAK,GAAG,CAAC,EAAE,KAAK,GAAG,KAAK,CAAC,MAAM,EAAE,KAAK,IAAI,CAAC,EAAE,CAAC;QACrD,IAAI,GAAG,CAAC,IAAI,GAAG,EAAE,GAAG,KAAK,CAAC,UAAU,CAAC,KAAK,CAAC,CAAC,KAAK,CAAC,CAAC;IACrD,CAAC;IACD,OAAO,IAAI,GAAG,aAAa,CAAC;AAC9B,CAAC"}
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
import type { ScopeConfig } from "../manifest/schema.js";
|
|
2
|
+
import type { ScopeCheckResult } from "./scope-checker.js";
|
|
3
|
+
export interface EvaluateChangeBudgetInput {
|
|
4
|
+
workspacePath: string;
|
|
5
|
+
scope: ScopeConfig;
|
|
6
|
+
violationOutcome?: "rejected" | "needs_human";
|
|
7
|
+
}
|
|
8
|
+
export interface ChangeBudgetDecision extends ScopeCheckResult {
|
|
9
|
+
outcome: "none" | "rejected" | "needs_human";
|
|
10
|
+
}
|
|
11
|
+
export declare function evaluateChangeBudget(input: EvaluateChangeBudgetInput): Promise<ChangeBudgetDecision>;
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
import { checkScope, collectDiffSummary } from "./scope-checker.js";
|
|
2
|
+
export async function evaluateChangeBudget(input) {
|
|
3
|
+
const summary = await collectDiffSummary(input.workspacePath);
|
|
4
|
+
const scopeCheck = checkScope(summary, input.scope);
|
|
5
|
+
return {
|
|
6
|
+
...scopeCheck,
|
|
7
|
+
outcome: scopeCheck.withinBudget ? "none" : (input.violationOutcome ?? "rejected"),
|
|
8
|
+
};
|
|
9
|
+
}
|
|
10
|
+
//# sourceMappingURL=change-budget.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"change-budget.js","sourceRoot":"","sources":["../../../src/core/engine/change-budget.ts"],"names":[],"mappings":"AAEA,OAAO,EAAE,UAAU,EAAE,kBAAkB,EAAE,MAAM,oBAAoB,CAAC;AAYpE,MAAM,CAAC,KAAK,UAAU,oBAAoB,CAAC,KAAgC;IACzE,MAAM,OAAO,GAAG,MAAM,kBAAkB,CAAC,KAAK,CAAC,aAAa,CAAC,CAAC;IAC9D,MAAM,UAAU,GAAG,UAAU,CAAC,OAAO,EAAE,KAAK,CAAC,KAAK,CAAC,CAAC;IAEpD,OAAO;QACL,GAAG,UAAU;QACb,OAAO,EAAE,UAAU,CAAC,YAAY,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,gBAAgB,IAAI,UAAU,CAAC;KACnF,CAAC;AACJ,CAAC"}
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
import type { DecisionStore } from "../ports/decision-store.js";
|
|
2
|
+
import type { FrontierStore } from "../ports/frontier-store.js";
|
|
3
|
+
import type { RunStore } from "../ports/run-store.js";
|
|
4
|
+
import type { RalphManifest } from "../manifest/schema.js";
|
|
5
|
+
import type { DecisionRecord } from "../model/decision-record.js";
|
|
6
|
+
import type { FrontierEntry } from "../model/frontier-entry.js";
|
|
7
|
+
import type { RunRecord } from "../model/run-record.js";
|
|
8
|
+
import { type AnchorCheckResult } from "./anchor-checker.js";
|
|
9
|
+
import { type AuditQueueItem } from "./audit-sampler.js";
|
|
10
|
+
import { type ChangeBudgetDecision } from "./change-budget.js";
|
|
11
|
+
import { GitWorktreeWorkspaceManager } from "./workspace-manager.js";
|
|
12
|
+
import { GitClient } from "../../adapters/git/git-client.js";
|
|
13
|
+
import type { JudgeProvider } from "../../adapters/judge/llm-judge-provider.js";
|
|
14
|
+
export interface CycleRunnerDependencies {
|
|
15
|
+
runStore: RunStore;
|
|
16
|
+
decisionStore: DecisionStore;
|
|
17
|
+
frontierStore: FrontierStore;
|
|
18
|
+
workspaceManager: GitWorktreeWorkspaceManager;
|
|
19
|
+
gitClient: GitClient;
|
|
20
|
+
judgeProvider?: JudgeProvider;
|
|
21
|
+
now?: () => Date;
|
|
22
|
+
}
|
|
23
|
+
export interface RunCycleInput {
|
|
24
|
+
repoRoot: string;
|
|
25
|
+
manifestPath: string;
|
|
26
|
+
manifest: RalphManifest;
|
|
27
|
+
currentFrontier: FrontierEntry[];
|
|
28
|
+
}
|
|
29
|
+
export type CycleRunStatus = "accepted" | "rejected" | "needs_human" | "failed";
|
|
30
|
+
export interface CycleRunResult {
|
|
31
|
+
status: CycleRunStatus;
|
|
32
|
+
run: RunRecord;
|
|
33
|
+
decision?: DecisionRecord;
|
|
34
|
+
frontier: FrontierEntry[];
|
|
35
|
+
auditQueue: AuditQueueItem[];
|
|
36
|
+
changeBudget?: ChangeBudgetDecision;
|
|
37
|
+
anchorCheck?: AnchorCheckResult;
|
|
38
|
+
}
|
|
39
|
+
export declare function runCycle(input: RunCycleInput, dependencies: CycleRunnerDependencies): Promise<CycleRunResult>;
|