rad-experiment 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +87 -0
- package/dist/cli/commands/list.d.ts +1 -0
- package/dist/cli/commands/list.js +35 -0
- package/dist/cli/commands/publish.d.ts +1 -0
- package/dist/cli/commands/publish.js +63 -0
- package/dist/cli/commands/reproduce.d.ts +1 -0
- package/dist/cli/commands/reproduce.js +45 -0
- package/dist/cli/commands/show.d.ts +1 -0
- package/dist/cli/commands/show.js +61 -0
- package/dist/cli/format.d.ts +9 -0
- package/dist/cli/format.js +21 -0
- package/dist/cli/helpers.d.ts +49 -0
- package/dist/cli/helpers.js +90 -0
- package/dist/cli/rad.d.ts +11 -0
- package/dist/cli/rad.js +64 -0
- package/dist/cob/actions.d.ts +35 -0
- package/dist/cob/actions.js +57 -0
- package/dist/cob/state.d.ts +7 -0
- package/dist/cob/state.js +97 -0
- package/dist/rad-cob-experiment.d.ts +2 -0
- package/dist/rad-cob-experiment.js +33 -0
- package/dist/rad-experiment.d.ts +2 -0
- package/dist/rad-experiment.js +74 -0
- package/dist/types.d.ts +102 -0
- package/dist/types.js +9 -0
- package/package.json +24 -0
- package/src/__tests__/actions.test.ts +122 -0
- package/src/__tests__/cob-protocol.test.ts +138 -0
- package/src/__tests__/fixtures.ts +119 -0
- package/src/__tests__/format.test.ts +55 -0
- package/src/__tests__/golden/publish-action.json +46 -0
- package/src/__tests__/golden/publish-minimal.json +25 -0
- package/src/__tests__/golden/publish-with-samples.json +38 -0
- package/src/__tests__/golden/reproduce-action.json +19 -0
- package/src/__tests__/golden/reproduce-minimal.json +18 -0
- package/src/__tests__/helpers.test.ts +138 -0
- package/src/__tests__/integration.test.ts +124 -0
- package/src/__tests__/serialization.test.ts +175 -0
- package/src/__tests__/state.test.ts +191 -0
- package/src/cli/commands/list.ts +45 -0
- package/src/cli/commands/publish.ts +68 -0
- package/src/cli/commands/reproduce.ts +52 -0
- package/src/cli/commands/show.ts +70 -0
- package/src/cli/format.ts +27 -0
- package/src/cli/helpers.ts +101 -0
- package/src/cli/rad.ts +87 -0
- package/src/cob/actions.ts +100 -0
- package/src/cob/state.ts +120 -0
- package/src/rad-cob-experiment.ts +39 -0
- package/src/rad-experiment.ts +85 -0
- package/src/types.ts +133 -0
- package/tsconfig.json +16 -0
|
@@ -0,0 +1,101 @@
|
|
|
1
|
+
// Shared CLI helpers for arg parsing and measurement construction.
|
|
2
|
+
|
|
3
|
+
import type { Measurement, MetricValue, Verdict } from "../types.js";
|
|
4
|
+
|
|
5
|
+
export const VERDICTS: Verdict[] = ["confirmed", "failed", "inconclusive"];
|
|
6
|
+
|
|
7
|
+
// Shared parseArgs option specs for baseline/candidate measurement fields.
|
|
8
|
+
export const MEASUREMENT_OPTIONS = {
|
|
9
|
+
"baseline-median": { type: "string" as const },
|
|
10
|
+
"baseline-std": { type: "string" as const, default: "0" },
|
|
11
|
+
"baseline-samples": { type: "string" as const, default: "" },
|
|
12
|
+
"baseline-n": { type: "string" as const },
|
|
13
|
+
"candidate-median": { type: "string" as const },
|
|
14
|
+
"candidate-std": { type: "string" as const, default: "0" },
|
|
15
|
+
"candidate-samples": { type: "string" as const, default: "" },
|
|
16
|
+
"candidate-n": { type: "string" as const },
|
|
17
|
+
delta: { type: "string" as const },
|
|
18
|
+
};
|
|
19
|
+
|
|
20
|
+
/** Error thrown by die() — caught at the CLI entry point. */
|
|
21
|
+
export class CliError extends Error {
|
|
22
|
+
constructor(message: string) {
|
|
23
|
+
super(message);
|
|
24
|
+
this.name = "CliError";
|
|
25
|
+
}
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
export function die(msg: string): never {
|
|
29
|
+
throw new CliError(msg);
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
export function requireArg(values: Record<string, unknown>, name: string): string {
|
|
33
|
+
const val = values[name];
|
|
34
|
+
if (val == null || val === "") {
|
|
35
|
+
die(`missing required argument: --${name}`);
|
|
36
|
+
}
|
|
37
|
+
return String(val);
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
export function requireInt(values: Record<string, unknown>, name: string): number {
|
|
41
|
+
const val = requireArg(values, name);
|
|
42
|
+
const n = parseInt(val, 10);
|
|
43
|
+
if (isNaN(n)) die(`invalid integer for --${name}: ${val}`);
|
|
44
|
+
return n;
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
export function optionalInt(values: Record<string, unknown>, name: string, defaultVal: number): number {
|
|
48
|
+
const val = values[name];
|
|
49
|
+
if (val == null || val === "") return defaultVal;
|
|
50
|
+
const n = parseInt(String(val), 10);
|
|
51
|
+
if (isNaN(n)) die(`invalid integer for --${name}: ${val}`);
|
|
52
|
+
return n;
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
function parseSamples(s: string): number[] {
|
|
56
|
+
if (!s || s.trim() === "") return [];
|
|
57
|
+
return s
|
|
58
|
+
.split(",")
|
|
59
|
+
.map((v) => parseInt(v.trim(), 10))
|
|
60
|
+
.filter((v) => !isNaN(v));
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
export function buildMeasurement(
|
|
64
|
+
values: Record<string, unknown>,
|
|
65
|
+
prefix: "baseline" | "candidate",
|
|
66
|
+
): Measurement {
|
|
67
|
+
const median = requireInt(values, `${prefix}-median`);
|
|
68
|
+
const std = optionalInt(values, `${prefix}-std`, 0);
|
|
69
|
+
const n = requireInt(values, `${prefix}-n`);
|
|
70
|
+
const samples = parseSamples((values[`${prefix}-samples`] as string) ?? "");
|
|
71
|
+
|
|
72
|
+
const m: Measurement = { n, medianX1000: median, stdX1000: std };
|
|
73
|
+
if (samples.length > 0) m.samplesX1000 = samples;
|
|
74
|
+
return m;
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
/**
|
|
78
|
+
* Parse "name:unit:baseline_x1000:candidate_x1000:delta_x100[:regressed]"
|
|
79
|
+
* Mirrors parse_secondary in the Rust CLI.
|
|
80
|
+
*/
|
|
81
|
+
export function parseSecondary(s: string): MetricValue {
|
|
82
|
+
const parts = s.split(":");
|
|
83
|
+
if (parts.length < 5) {
|
|
84
|
+
die(`secondary metric needs at least 5 colon-separated fields: ${s}`);
|
|
85
|
+
}
|
|
86
|
+
const baselineMedian = parseInt(parts[2], 10);
|
|
87
|
+
const candidateMedian = parseInt(parts[3], 10);
|
|
88
|
+
const delta = parseInt(parts[4], 10);
|
|
89
|
+
if (isNaN(baselineMedian) || isNaN(candidateMedian) || isNaN(delta)) {
|
|
90
|
+
die(`secondary metric has non-integer numeric fields: ${s}`);
|
|
91
|
+
}
|
|
92
|
+
const regressed = parts[5] === "true";
|
|
93
|
+
return {
|
|
94
|
+
name: parts[0],
|
|
95
|
+
unit: parts[1],
|
|
96
|
+
baseline: { n: 1, medianX1000: baselineMedian, stdX1000: 0 },
|
|
97
|
+
candidate: { n: 1, medianX1000: candidateMedian, stdX1000: 0 },
|
|
98
|
+
deltaPctX100: delta,
|
|
99
|
+
regressed,
|
|
100
|
+
};
|
|
101
|
+
}
|
package/src/cli/rad.ts
ADDED
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
// Wrapper around `rad` CLI commands for COB operations.
|
|
2
|
+
|
|
3
|
+
import { execFileSync } from "node:child_process";
|
|
4
|
+
import { writeFileSync, rmSync, mkdtempSync } from "node:fs";
|
|
5
|
+
import { join, dirname } from "node:path";
|
|
6
|
+
import { tmpdir } from "node:os";
|
|
7
|
+
import type { Action, Experiment } from "../types.js";
|
|
8
|
+
|
|
9
|
+
const COB_TYPE = "cc.experiment";
|
|
10
|
+
|
|
11
|
+
/** Resolve the RID for a repository path. */
|
|
12
|
+
export function getRepoId(repoPath?: string): string {
|
|
13
|
+
const args = ["inspect", "--rid"];
|
|
14
|
+
const opts = repoPath ? { cwd: repoPath } : {};
|
|
15
|
+
return execFileSync("rad", args, { encoding: "utf-8", ...opts }).trim();
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
/** Write actions to a temp JSONL file, return its path. */
|
|
19
|
+
function writeTempActions(actions: Action[]): string {
|
|
20
|
+
const dir = mkdtempSync(join(tmpdir(), "rad-exp-"));
|
|
21
|
+
const path = join(dir, "actions.jsonl");
|
|
22
|
+
const content = actions.map((a) => JSON.stringify(a)).join("\n") + "\n";
|
|
23
|
+
writeFileSync(path, content);
|
|
24
|
+
return path;
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
/** Clean up a temp file and its parent directory (best-effort). */
|
|
28
|
+
function cleanupTemp(path: string): void {
|
|
29
|
+
try {
|
|
30
|
+
rmSync(dirname(path), { recursive: true });
|
|
31
|
+
} catch {
|
|
32
|
+
// ignore
|
|
33
|
+
}
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
/** Shared implementation for cobCreate and cobUpdate. */
|
|
37
|
+
function cobExec(
|
|
38
|
+
rid: string,
|
|
39
|
+
subcommand: "create" | "update",
|
|
40
|
+
actions: Action[],
|
|
41
|
+
message: string,
|
|
42
|
+
objectId?: string,
|
|
43
|
+
): string {
|
|
44
|
+
const tmpFile = writeTempActions(actions);
|
|
45
|
+
try {
|
|
46
|
+
const args = ["cob", subcommand, "--repo", rid, "--type", COB_TYPE];
|
|
47
|
+
if (objectId) args.push("--object", objectId);
|
|
48
|
+
args.push("--message", message, tmpFile);
|
|
49
|
+
|
|
50
|
+
return execFileSync("rad", args, { encoding: "utf-8" }).trim();
|
|
51
|
+
} finally {
|
|
52
|
+
cleanupTemp(tmpFile);
|
|
53
|
+
}
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
/** Create a new COB. Returns the ObjectId. */
|
|
57
|
+
export function cobCreate(rid: string, actions: Action[], message: string): string {
|
|
58
|
+
return cobExec(rid, "create", actions, message);
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
/** Update an existing COB. */
|
|
62
|
+
export function cobUpdate(rid: string, objectId: string, actions: Action[], message: string): string {
|
|
63
|
+
return cobExec(rid, "update", actions, message, objectId);
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
/** List all COB object IDs for cc.experiment type. */
|
|
67
|
+
export function cobList(rid: string): string[] {
|
|
68
|
+
const stdout = execFileSync(
|
|
69
|
+
"rad",
|
|
70
|
+
["cob", "list", "--repo", rid, "--type", COB_TYPE],
|
|
71
|
+
{ encoding: "utf-8" },
|
|
72
|
+
);
|
|
73
|
+
return stdout
|
|
74
|
+
.split("\n")
|
|
75
|
+
.map((s) => s.trim())
|
|
76
|
+
.filter((s) => s.length > 0);
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
/** Show a COB's state as JSON. */
|
|
80
|
+
export function cobShow(rid: string, objectId: string): Experiment {
|
|
81
|
+
const stdout = execFileSync(
|
|
82
|
+
"rad",
|
|
83
|
+
["cob", "show", "--repo", rid, "--type", COB_TYPE, "--object", objectId, "--format", "json"],
|
|
84
|
+
{ encoding: "utf-8" },
|
|
85
|
+
);
|
|
86
|
+
return JSON.parse(stdout);
|
|
87
|
+
}
|
|
@@ -0,0 +1,100 @@
|
|
|
1
|
+
// Action builders — constructs action objects with correct field naming.
|
|
2
|
+
//
|
|
3
|
+
// Actions use snake_case for Action-level fields (matching Rust serde output),
|
|
4
|
+
// but camelCase for nested Measurement/MetricValue fields.
|
|
5
|
+
// Optional/empty fields are omitted (matching Rust skip_serializing_if).
|
|
6
|
+
|
|
7
|
+
import type {
|
|
8
|
+
Measurement,
|
|
9
|
+
MetricValue,
|
|
10
|
+
PublishAction,
|
|
11
|
+
ReproduceAction,
|
|
12
|
+
Verdict,
|
|
13
|
+
} from "../types.js";
|
|
14
|
+
|
|
15
|
+
/** Strip empty samplesX1000 from a Measurement before serialization. */
|
|
16
|
+
export function cleanMeasurement(m: Measurement): Measurement {
|
|
17
|
+
const result: Measurement = { n: m.n, medianX1000: m.medianX1000, stdX1000: m.stdX1000 };
|
|
18
|
+
if (m.samplesX1000 && m.samplesX1000.length > 0) {
|
|
19
|
+
result.samplesX1000 = m.samplesX1000;
|
|
20
|
+
}
|
|
21
|
+
return result;
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
/** Build a Publish action matching exact Rust serde output. */
|
|
25
|
+
export function buildPublishAction(opts: {
|
|
26
|
+
description?: string;
|
|
27
|
+
base: string;
|
|
28
|
+
oid: string;
|
|
29
|
+
metricName: string;
|
|
30
|
+
metricUnit: string;
|
|
31
|
+
direction: string;
|
|
32
|
+
runnerClass: string;
|
|
33
|
+
os: string;
|
|
34
|
+
cpu: string;
|
|
35
|
+
baseline: Measurement;
|
|
36
|
+
candidate: Measurement;
|
|
37
|
+
deltaPctX100: number;
|
|
38
|
+
buildOk: boolean;
|
|
39
|
+
testsOk: boolean;
|
|
40
|
+
sanitizersOk: boolean;
|
|
41
|
+
agentSystem: string;
|
|
42
|
+
agentModel: string;
|
|
43
|
+
secondaryMetrics: MetricValue[];
|
|
44
|
+
}): PublishAction {
|
|
45
|
+
// Field order matches Rust struct declaration for consistent JSON output.
|
|
46
|
+
const action: Record<string, unknown> = { type: "publish" };
|
|
47
|
+
|
|
48
|
+
if (opts.description != null) action.description = opts.description;
|
|
49
|
+
|
|
50
|
+
// Git object dependencies — prevents GC from pruning commits.
|
|
51
|
+
action.parents = [opts.base, opts.oid];
|
|
52
|
+
action.base = opts.base;
|
|
53
|
+
action.oid = opts.oid;
|
|
54
|
+
action.metric_name = opts.metricName;
|
|
55
|
+
action.metric_unit = opts.metricUnit;
|
|
56
|
+
action.direction = opts.direction;
|
|
57
|
+
action.runner_class = opts.runnerClass;
|
|
58
|
+
|
|
59
|
+
if (opts.os !== "") action.os = opts.os;
|
|
60
|
+
if (opts.cpu !== "") action.cpu = opts.cpu;
|
|
61
|
+
|
|
62
|
+
action.baseline = cleanMeasurement(opts.baseline);
|
|
63
|
+
action.candidate = cleanMeasurement(opts.candidate);
|
|
64
|
+
action.delta_pct_x100 = Math.abs(opts.deltaPctX100);
|
|
65
|
+
action.build_ok = opts.buildOk;
|
|
66
|
+
action.tests_ok = opts.testsOk;
|
|
67
|
+
action.sanitizers_ok = opts.sanitizersOk;
|
|
68
|
+
action.agent_system = opts.agentSystem;
|
|
69
|
+
action.agent_model = opts.agentModel;
|
|
70
|
+
|
|
71
|
+
if (opts.secondaryMetrics.length > 0) action.secondary_metrics = opts.secondaryMetrics;
|
|
72
|
+
|
|
73
|
+
return action as unknown as PublishAction;
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
/** Build a Reproduce action matching exact Rust serde output. */
|
|
77
|
+
export function buildReproduceAction(opts: {
|
|
78
|
+
verdict: Verdict;
|
|
79
|
+
runnerClass: string;
|
|
80
|
+
baseline: Measurement;
|
|
81
|
+
candidate: Measurement;
|
|
82
|
+
deltaPctX100: number;
|
|
83
|
+
buildOk: boolean;
|
|
84
|
+
testsOk: boolean;
|
|
85
|
+
notes?: string;
|
|
86
|
+
}): ReproduceAction {
|
|
87
|
+
const action: Record<string, unknown> = { type: "reproduce" };
|
|
88
|
+
|
|
89
|
+
action.verdict = opts.verdict;
|
|
90
|
+
action.runner_class = opts.runnerClass;
|
|
91
|
+
action.baseline = cleanMeasurement(opts.baseline);
|
|
92
|
+
action.candidate = cleanMeasurement(opts.candidate);
|
|
93
|
+
action.delta_pct_x100 = Math.abs(opts.deltaPctX100);
|
|
94
|
+
action.build_ok = opts.buildOk;
|
|
95
|
+
action.tests_ok = opts.testsOk;
|
|
96
|
+
|
|
97
|
+
if (opts.notes != null) action.notes = opts.notes;
|
|
98
|
+
|
|
99
|
+
return action as unknown as ReproduceAction;
|
|
100
|
+
}
|
package/src/cob/state.ts
ADDED
|
@@ -0,0 +1,120 @@
|
|
|
1
|
+
// State evaluation: applies actions to build Experiment state.
|
|
2
|
+
// Mirrors the Rust from_root / apply_action / op logic in lib.rs.
|
|
3
|
+
//
|
|
4
|
+
// Actions arrive with snake_case field names (matching Rust serde).
|
|
5
|
+
// State output uses camelCase field names (matching Experiment struct serde).
|
|
6
|
+
|
|
7
|
+
import type {
|
|
8
|
+
Author,
|
|
9
|
+
Experiment,
|
|
10
|
+
Measurement,
|
|
11
|
+
MetricValue,
|
|
12
|
+
Op,
|
|
13
|
+
OpMessage,
|
|
14
|
+
Verdict,
|
|
15
|
+
} from "../types.js";
|
|
16
|
+
|
|
17
|
+
type RawAction = Record<string, unknown>;
|
|
18
|
+
|
|
19
|
+
/** Build an Author from a raw public key string. */
|
|
20
|
+
function authorFromKey(key: string): Author {
|
|
21
|
+
const id = key.startsWith("did:key:") ? key : `did:key:${key}`;
|
|
22
|
+
return { id };
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
function snakeToCamel(s: string): string {
|
|
26
|
+
return s.replace(/_([a-z0-9])/g, (_, c) => c.toUpperCase());
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
/**
|
|
30
|
+
* Read a field from an action object, trying snake_case first then camelCase.
|
|
31
|
+
* Handles both Rust-created (snake_case) and legacy TS-created (camelCase) COBs.
|
|
32
|
+
*/
|
|
33
|
+
function field(action: RawAction, snakeCase: string): unknown {
|
|
34
|
+
if (snakeCase in action) return action[snakeCase];
|
|
35
|
+
return action[snakeToCamel(snakeCase)];
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
function rawActions(op: Op): RawAction[] {
|
|
39
|
+
return op.actions as unknown as RawAction[];
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
/** Apply a single action to an experiment (mutates in place). */
|
|
43
|
+
function applyAction(exp: Experiment, action: RawAction, author: string, timestamp: number): void {
|
|
44
|
+
if (action.type !== "reproduce") return;
|
|
45
|
+
|
|
46
|
+
exp.reproductions.push({
|
|
47
|
+
verdict: action.verdict as Verdict,
|
|
48
|
+
runnerClass: (field(action, "runner_class") ?? "") as string,
|
|
49
|
+
baseline: action.baseline as Measurement,
|
|
50
|
+
candidate: action.candidate as Measurement,
|
|
51
|
+
deltaPctX100: (field(action, "delta_pct_x100") ?? 0) as number,
|
|
52
|
+
author: authorFromKey(author),
|
|
53
|
+
timestamp,
|
|
54
|
+
});
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
/** Create an Experiment from the root operation. Mirrors Experiment::from_root. */
|
|
58
|
+
export function fromRoot(op: Op): Experiment {
|
|
59
|
+
const actions = [...rawActions(op)];
|
|
60
|
+
const first = actions.shift();
|
|
61
|
+
if (!first || first.type !== "publish") {
|
|
62
|
+
throw new Error("the first action must be of type `publish`");
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
const description = first.description as string | undefined;
|
|
66
|
+
const secondaryMetrics = (field(first, "secondary_metrics") ?? []) as MetricValue[];
|
|
67
|
+
|
|
68
|
+
const exp: Experiment = {
|
|
69
|
+
...(description != null ? { description } : {}),
|
|
70
|
+
base: first.base as string,
|
|
71
|
+
oid: first.oid as string,
|
|
72
|
+
metricName: (field(first, "metric_name") ?? "") as string,
|
|
73
|
+
metricUnit: (field(first, "metric_unit") ?? "") as string,
|
|
74
|
+
direction: first.direction as string,
|
|
75
|
+
runnerClass: (field(first, "runner_class") ?? "") as string,
|
|
76
|
+
os: (first.os ?? "") as string,
|
|
77
|
+
cpu: (first.cpu ?? "") as string,
|
|
78
|
+
baseline: first.baseline as Measurement,
|
|
79
|
+
candidate: first.candidate as Measurement,
|
|
80
|
+
deltaPctX100: (field(first, "delta_pct_x100") ?? 0) as number,
|
|
81
|
+
buildOk: (field(first, "build_ok") ?? true) as boolean,
|
|
82
|
+
testsOk: (field(first, "tests_ok") ?? true) as boolean,
|
|
83
|
+
sanitizersOk: (field(first, "sanitizers_ok") ?? false) as boolean,
|
|
84
|
+
agentSystem: (field(first, "agent_system") ?? "") as string,
|
|
85
|
+
agentModel: (field(first, "agent_model") ?? "") as string,
|
|
86
|
+
...(secondaryMetrics.length > 0 ? { secondaryMetrics } : {}),
|
|
87
|
+
reproductions: [],
|
|
88
|
+
author: authorFromKey(op.author),
|
|
89
|
+
createdAt: op.timestamp,
|
|
90
|
+
};
|
|
91
|
+
|
|
92
|
+
for (const action of actions) {
|
|
93
|
+
applyAction(exp, action, op.author, op.timestamp);
|
|
94
|
+
}
|
|
95
|
+
return exp;
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
/** Apply a subsequent operation to an existing experiment. */
|
|
99
|
+
export function applyOp(exp: Experiment, op: Op): void {
|
|
100
|
+
for (const action of rawActions(op)) {
|
|
101
|
+
applyAction(exp, action, op.author, op.timestamp);
|
|
102
|
+
}
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
/** Process an OpMessage from Radicle's external COB protocol. */
|
|
106
|
+
export function handleOpMessage(msg: OpMessage): Experiment {
|
|
107
|
+
let exp: Experiment;
|
|
108
|
+
|
|
109
|
+
if (msg.value == null || (typeof msg.value === "object" && Object.keys(msg.value).length === 0)) {
|
|
110
|
+
exp = fromRoot(msg.op);
|
|
111
|
+
} else {
|
|
112
|
+
exp = msg.value as unknown as Experiment;
|
|
113
|
+
applyOp(exp, msg.op);
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
for (const cop of msg.concurrent) {
|
|
117
|
+
applyOp(exp, cop);
|
|
118
|
+
}
|
|
119
|
+
return exp;
|
|
120
|
+
}
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
|
|
3
|
+
// rad-cob-experiment: External COB helper for the Radicle external COB protocol.
|
|
4
|
+
//
|
|
5
|
+
// Protocol (JSON Lines on stdin/stdout):
|
|
6
|
+
// 1. Read one JSON Line: { value, op, concurrent }
|
|
7
|
+
// 2. Apply op (and concurrent ops) to value -> new state
|
|
8
|
+
// 3. Write new state as JSON Line to stdout
|
|
9
|
+
// 4. Repeat until stdin closes
|
|
10
|
+
// 5. Exit 0 on success, non-zero on failure
|
|
11
|
+
|
|
12
|
+
import { createInterface } from "node:readline";
|
|
13
|
+
import { handleOpMessage } from "./cob/state.js";
|
|
14
|
+
import type { OpMessage } from "./types.js";
|
|
15
|
+
|
|
16
|
+
function fatal(err: unknown): never {
|
|
17
|
+
const msg = err instanceof Error ? err.message : String(err);
|
|
18
|
+
process.stderr.write(`rad-cob-experiment: ${msg}\n`);
|
|
19
|
+
process.exit(1);
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
async function main(): Promise<void> {
|
|
23
|
+
const rl = createInterface({ input: process.stdin });
|
|
24
|
+
|
|
25
|
+
for await (const line of rl) {
|
|
26
|
+
const trimmed = line.trim();
|
|
27
|
+
if (trimmed === "") continue;
|
|
28
|
+
|
|
29
|
+
try {
|
|
30
|
+
const msg: OpMessage = JSON.parse(trimmed);
|
|
31
|
+
const newState = handleOpMessage(msg);
|
|
32
|
+
process.stdout.write(JSON.stringify(newState) + "\n");
|
|
33
|
+
} catch (err: unknown) {
|
|
34
|
+
fatal(err);
|
|
35
|
+
}
|
|
36
|
+
}
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
main().catch(fatal);
|
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
|
|
3
|
+
// rad-experiment CLI — thin wrapper around rad cob commands.
|
|
4
|
+
// Subcommands live in cli/commands/; shared helpers in cli/helpers.ts.
|
|
5
|
+
|
|
6
|
+
import { CliError } from "./cli/helpers.js";
|
|
7
|
+
import { cmdPublish } from "./cli/commands/publish.js";
|
|
8
|
+
import { cmdList } from "./cli/commands/list.js";
|
|
9
|
+
import { cmdShow } from "./cli/commands/show.js";
|
|
10
|
+
import { cmdReproduce } from "./cli/commands/reproduce.js";
|
|
11
|
+
|
|
12
|
+
const COMMANDS: Record<string, (args: string[]) => void> = {
|
|
13
|
+
publish: cmdPublish,
|
|
14
|
+
list: cmdList,
|
|
15
|
+
show: cmdShow,
|
|
16
|
+
reproduce: cmdReproduce,
|
|
17
|
+
};
|
|
18
|
+
|
|
19
|
+
function usage(): never {
|
|
20
|
+
console.error(
|
|
21
|
+
`Usage: rad-experiment [--repo <repo_path>] <command> [options]
|
|
22
|
+
|
|
23
|
+
Commands:
|
|
24
|
+
publish Publish a new optimization experiment
|
|
25
|
+
list List all experiments
|
|
26
|
+
show Show experiment details
|
|
27
|
+
reproduce Add a reproduction to an experiment`,
|
|
28
|
+
);
|
|
29
|
+
process.exit(1);
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
function main(): void {
|
|
33
|
+
const rawArgs = process.argv.slice(2);
|
|
34
|
+
|
|
35
|
+
// Extract global --repo/-r before subcommand.
|
|
36
|
+
let globalRepo: string | undefined;
|
|
37
|
+
const subArgs: string[] = [];
|
|
38
|
+
let foundCommand = false;
|
|
39
|
+
|
|
40
|
+
for (let i = 0; i < rawArgs.length; i++) {
|
|
41
|
+
if (!foundCommand && (rawArgs[i] === "--repo" || rawArgs[i] === "-r")) {
|
|
42
|
+
globalRepo = rawArgs[++i];
|
|
43
|
+
continue;
|
|
44
|
+
}
|
|
45
|
+
if (!foundCommand && rawArgs[i] in COMMANDS) {
|
|
46
|
+
foundCommand = true;
|
|
47
|
+
subArgs.push(rawArgs[i]);
|
|
48
|
+
continue;
|
|
49
|
+
}
|
|
50
|
+
subArgs.push(rawArgs[i]);
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
if (subArgs.length === 0) usage();
|
|
54
|
+
|
|
55
|
+
const command = subArgs.shift()!;
|
|
56
|
+
const handler = COMMANDS[command];
|
|
57
|
+
if (!handler) usage();
|
|
58
|
+
|
|
59
|
+
if (globalRepo && !subArgs.includes("--repo") && !subArgs.includes("-r")) {
|
|
60
|
+
subArgs.unshift("--repo", globalRepo);
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
handler(subArgs);
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
try {
|
|
67
|
+
main();
|
|
68
|
+
} catch (err: unknown) {
|
|
69
|
+
// CliError: validation failures from die() — print and exit 1.
|
|
70
|
+
if (err instanceof CliError) {
|
|
71
|
+
process.stderr.write(`error: ${err.message}\n`);
|
|
72
|
+
process.exit(1);
|
|
73
|
+
}
|
|
74
|
+
// execFileSync errors: child process failures from rad CLI.
|
|
75
|
+
if (err instanceof Error && "status" in err) {
|
|
76
|
+
const execErr = err as Error & { stderr?: Buffer };
|
|
77
|
+
const stderr = execErr.stderr?.toString().trim();
|
|
78
|
+
process.stderr.write(`error: ${stderr || err.message}\n`);
|
|
79
|
+
process.exit(1);
|
|
80
|
+
}
|
|
81
|
+
// Unexpected errors.
|
|
82
|
+
const msg = err instanceof Error ? err.message : String(err);
|
|
83
|
+
process.stderr.write(`error: ${msg}\n`);
|
|
84
|
+
process.exit(1);
|
|
85
|
+
}
|
package/src/types.ts
ADDED
|
@@ -0,0 +1,133 @@
|
|
|
1
|
+
// Types matching the Rust serde serialization exactly.
|
|
2
|
+
//
|
|
3
|
+
// IMPORTANT naming conventions (matching Rust serde):
|
|
4
|
+
// - Action enum fields: snake_case (enum-level rename_all only affects variant names)
|
|
5
|
+
// - Measurement struct fields: camelCase (has its own rename_all = "camelCase")
|
|
6
|
+
// - MetricValue struct fields: camelCase (has its own rename_all = "camelCase")
|
|
7
|
+
// - Experiment state fields: camelCase (has its own rename_all = "camelCase")
|
|
8
|
+
// - Reproduction state fields: camelCase (has its own rename_all = "camelCase")
|
|
9
|
+
|
|
10
|
+
// --- Shared types (camelCase — these structs have rename_all = "camelCase") ---
|
|
11
|
+
|
|
12
|
+
export interface Measurement {
|
|
13
|
+
n: number;
|
|
14
|
+
medianX1000: number;
|
|
15
|
+
stdX1000: number;
|
|
16
|
+
samplesX1000?: number[]; // omitted when empty (skip_serializing_if)
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
export type Verdict = "confirmed" | "failed" | "inconclusive";
|
|
20
|
+
|
|
21
|
+
export interface MetricValue {
|
|
22
|
+
name: string;
|
|
23
|
+
unit: string;
|
|
24
|
+
baseline: Measurement;
|
|
25
|
+
candidate: Measurement;
|
|
26
|
+
deltaPctX100: number;
|
|
27
|
+
regressed: boolean;
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
// --- State types (camelCase — Experiment has rename_all = "camelCase") ---
|
|
31
|
+
|
|
32
|
+
export interface Reproduction {
|
|
33
|
+
verdict: Verdict;
|
|
34
|
+
runnerClass: string;
|
|
35
|
+
baseline: Measurement;
|
|
36
|
+
candidate: Measurement;
|
|
37
|
+
deltaPctX100: number;
|
|
38
|
+
author: Author;
|
|
39
|
+
timestamp: number;
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
export interface Author {
|
|
43
|
+
id: string; // "did:key:z6Mk..."
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
export interface Experiment {
|
|
47
|
+
description?: string;
|
|
48
|
+
base: string;
|
|
49
|
+
oid: string;
|
|
50
|
+
metricName: string;
|
|
51
|
+
metricUnit: string;
|
|
52
|
+
direction: string;
|
|
53
|
+
runnerClass: string;
|
|
54
|
+
os: string;
|
|
55
|
+
cpu: string;
|
|
56
|
+
baseline: Measurement;
|
|
57
|
+
candidate: Measurement;
|
|
58
|
+
deltaPctX100: number;
|
|
59
|
+
buildOk: boolean;
|
|
60
|
+
testsOk: boolean;
|
|
61
|
+
sanitizersOk: boolean;
|
|
62
|
+
agentSystem: string;
|
|
63
|
+
agentModel: string;
|
|
64
|
+
secondaryMetrics?: MetricValue[];
|
|
65
|
+
reproductions: Reproduction[];
|
|
66
|
+
author: Author;
|
|
67
|
+
createdAt: number;
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
// --- Action types (snake_case for Action fields, camelCase for nested structs) ---
|
|
71
|
+
// The Rust enum has #[serde(tag = "type", rename_all = "camelCase")] but
|
|
72
|
+
// rename_all on an enum only affects variant names, NOT field names within variants.
|
|
73
|
+
// Field names stay as their Rust snake_case names.
|
|
74
|
+
|
|
75
|
+
export interface PublishAction {
|
|
76
|
+
type: "publish";
|
|
77
|
+
description?: string;
|
|
78
|
+
base: string;
|
|
79
|
+
oid: string;
|
|
80
|
+
metric_name: string;
|
|
81
|
+
metric_unit: string;
|
|
82
|
+
direction: string;
|
|
83
|
+
runner_class: string;
|
|
84
|
+
os?: string;
|
|
85
|
+
cpu?: string;
|
|
86
|
+
baseline: Measurement; // Measurement itself is camelCase
|
|
87
|
+
candidate: Measurement;
|
|
88
|
+
delta_pct_x100: number;
|
|
89
|
+
build_ok: boolean;
|
|
90
|
+
tests_ok: boolean;
|
|
91
|
+
sanitizers_ok: boolean;
|
|
92
|
+
agent_system: string;
|
|
93
|
+
agent_model: string;
|
|
94
|
+
secondary_metrics?: MetricValue[]; // MetricValue itself is camelCase
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
export interface ReproduceAction {
|
|
98
|
+
type: "reproduce";
|
|
99
|
+
verdict: Verdict;
|
|
100
|
+
runner_class: string;
|
|
101
|
+
baseline: Measurement;
|
|
102
|
+
candidate: Measurement;
|
|
103
|
+
delta_pct_x100: number;
|
|
104
|
+
build_ok: boolean;
|
|
105
|
+
tests_ok: boolean;
|
|
106
|
+
notes?: string;
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
export type Action = PublishAction | ReproduceAction;
|
|
110
|
+
|
|
111
|
+
// --- Op structure matching radicle::cob::Op<Action> serialization ---
|
|
112
|
+
|
|
113
|
+
export interface Manifest {
|
|
114
|
+
typeName: string;
|
|
115
|
+
version: number;
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
export interface Op {
|
|
119
|
+
id: string;
|
|
120
|
+
actions: Action[];
|
|
121
|
+
author: string;
|
|
122
|
+
timestamp: number;
|
|
123
|
+
parents: string[];
|
|
124
|
+
related: string[];
|
|
125
|
+
identity: string | null;
|
|
126
|
+
manifest: Manifest;
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
export interface OpMessage {
|
|
130
|
+
value: Record<string, unknown>;
|
|
131
|
+
op: Op;
|
|
132
|
+
concurrent: Op[];
|
|
133
|
+
}
|
package/tsconfig.json
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
{
|
|
2
|
+
"compilerOptions": {
|
|
3
|
+
"target": "ES2022",
|
|
4
|
+
"module": "node16",
|
|
5
|
+
"moduleResolution": "node16",
|
|
6
|
+
"outDir": "./dist",
|
|
7
|
+
"rootDir": "./src",
|
|
8
|
+
"strict": true,
|
|
9
|
+
"esModuleInterop": true,
|
|
10
|
+
"declaration": true,
|
|
11
|
+
"sourceMap": false,
|
|
12
|
+
"skipLibCheck": true
|
|
13
|
+
},
|
|
14
|
+
"include": ["src"],
|
|
15
|
+
"exclude": ["src/__tests__"]
|
|
16
|
+
}
|