rad-experiment 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +87 -0
- package/dist/cli/commands/list.d.ts +1 -0
- package/dist/cli/commands/list.js +35 -0
- package/dist/cli/commands/publish.d.ts +1 -0
- package/dist/cli/commands/publish.js +63 -0
- package/dist/cli/commands/reproduce.d.ts +1 -0
- package/dist/cli/commands/reproduce.js +45 -0
- package/dist/cli/commands/show.d.ts +1 -0
- package/dist/cli/commands/show.js +61 -0
- package/dist/cli/format.d.ts +9 -0
- package/dist/cli/format.js +21 -0
- package/dist/cli/helpers.d.ts +49 -0
- package/dist/cli/helpers.js +90 -0
- package/dist/cli/rad.d.ts +11 -0
- package/dist/cli/rad.js +64 -0
- package/dist/cob/actions.d.ts +35 -0
- package/dist/cob/actions.js +57 -0
- package/dist/cob/state.d.ts +7 -0
- package/dist/cob/state.js +97 -0
- package/dist/rad-cob-experiment.d.ts +2 -0
- package/dist/rad-cob-experiment.js +33 -0
- package/dist/rad-experiment.d.ts +2 -0
- package/dist/rad-experiment.js +74 -0
- package/dist/types.d.ts +102 -0
- package/dist/types.js +9 -0
- package/package.json +24 -0
- package/src/__tests__/actions.test.ts +122 -0
- package/src/__tests__/cob-protocol.test.ts +138 -0
- package/src/__tests__/fixtures.ts +119 -0
- package/src/__tests__/format.test.ts +55 -0
- package/src/__tests__/golden/publish-action.json +46 -0
- package/src/__tests__/golden/publish-minimal.json +25 -0
- package/src/__tests__/golden/publish-with-samples.json +38 -0
- package/src/__tests__/golden/reproduce-action.json +19 -0
- package/src/__tests__/golden/reproduce-minimal.json +18 -0
- package/src/__tests__/helpers.test.ts +138 -0
- package/src/__tests__/integration.test.ts +124 -0
- package/src/__tests__/serialization.test.ts +175 -0
- package/src/__tests__/state.test.ts +191 -0
- package/src/cli/commands/list.ts +45 -0
- package/src/cli/commands/publish.ts +68 -0
- package/src/cli/commands/reproduce.ts +52 -0
- package/src/cli/commands/show.ts +70 -0
- package/src/cli/format.ts +27 -0
- package/src/cli/helpers.ts +101 -0
- package/src/cli/rad.ts +87 -0
- package/src/cob/actions.ts +100 -0
- package/src/cob/state.ts +120 -0
- package/src/rad-cob-experiment.ts +39 -0
- package/src/rad-experiment.ts +85 -0
- package/src/types.ts +133 -0
- package/tsconfig.json +16 -0
|
@@ -0,0 +1,175 @@
|
|
|
1
|
+
// Golden-file tests: verify TS action builders produce JSON identical to the Rust binary.
|
|
2
|
+
// The golden files in ./golden/ were generated by running the Rust rad-experiment CLI
|
|
3
|
+
// and extracting the raw action JSON via `rad cob log --format json`.
|
|
4
|
+
|
|
5
|
+
import { describe, it } from "node:test";
|
|
6
|
+
import assert from "node:assert/strict";
|
|
7
|
+
import { readFileSync } from "node:fs";
|
|
8
|
+
import { fileURLToPath } from "node:url";
|
|
9
|
+
import { dirname, join } from "node:path";
|
|
10
|
+
|
|
11
|
+
import { buildPublishAction, buildReproduceAction } from "../cob/actions.js";
|
|
12
|
+
import { makeMeasurement, makeMetricValue } from "./fixtures.js";
|
|
13
|
+
|
|
14
|
+
const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
15
|
+
const goldenDir = join(__dirname, "golden");
|
|
16
|
+
|
|
17
|
+
function loadGolden(name: string): Record<string, unknown> {
|
|
18
|
+
return JSON.parse(readFileSync(join(goldenDir, name), "utf-8"));
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
/**
|
|
22
|
+
* Normalize for comparison: deep-sort all object keys, strip `parents`
|
|
23
|
+
* (only present in TS-created actions via external COB path).
|
|
24
|
+
*/
|
|
25
|
+
function normalize(obj: unknown): string {
|
|
26
|
+
return JSON.stringify(obj, (_, v) => {
|
|
27
|
+
if (v && typeof v === "object" && !Array.isArray(v)) {
|
|
28
|
+
const sorted: Record<string, unknown> = {};
|
|
29
|
+
for (const k of Object.keys(v).sort()) {
|
|
30
|
+
if (k !== "parents") sorted[k] = v[k];
|
|
31
|
+
}
|
|
32
|
+
return sorted;
|
|
33
|
+
}
|
|
34
|
+
return v;
|
|
35
|
+
});
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
// --- Publish: full (all optional fields present) ---
|
|
39
|
+
|
|
40
|
+
describe("golden: publish-action (all fields)", () => {
|
|
41
|
+
const golden = loadGolden("publish-action.json");
|
|
42
|
+
|
|
43
|
+
it("matches Rust output", () => {
|
|
44
|
+
const action = buildPublishAction({
|
|
45
|
+
description: "SIMD vectorization in parser loop",
|
|
46
|
+
base: golden.base as string,
|
|
47
|
+
oid: golden.oid as string,
|
|
48
|
+
metricName: "wall_time",
|
|
49
|
+
metricUnit: "ms",
|
|
50
|
+
direction: "lower_is_better",
|
|
51
|
+
runnerClass: "arm64",
|
|
52
|
+
os: "linux",
|
|
53
|
+
cpu: "apple-m2",
|
|
54
|
+
baseline: makeMeasurement(),
|
|
55
|
+
candidate: makeMeasurement({ n: 5, medianX1000: 45200, stdX1000: 950 }),
|
|
56
|
+
deltaPctX100: 2378,
|
|
57
|
+
buildOk: true,
|
|
58
|
+
testsOk: true,
|
|
59
|
+
sanitizersOk: false,
|
|
60
|
+
agentSystem: "claude-code",
|
|
61
|
+
agentModel: "claude-opus-4-6",
|
|
62
|
+
secondaryMetrics: [makeMetricValue()],
|
|
63
|
+
});
|
|
64
|
+
assert.equal(normalize(action), normalize(golden));
|
|
65
|
+
});
|
|
66
|
+
});
|
|
67
|
+
|
|
68
|
+
// --- Publish: minimal (no description, no os/cpu, no secondary_metrics) ---
|
|
69
|
+
|
|
70
|
+
describe("golden: publish-minimal (skip_serializing_if paths)", () => {
|
|
71
|
+
const golden = loadGolden("publish-minimal.json");
|
|
72
|
+
|
|
73
|
+
it("matches Rust output — omitted fields absent", () => {
|
|
74
|
+
const action = buildPublishAction({
|
|
75
|
+
base: golden.base as string,
|
|
76
|
+
oid: golden.oid as string,
|
|
77
|
+
metricName: "throughput",
|
|
78
|
+
metricUnit: "ops/s",
|
|
79
|
+
direction: "higher_is_better",
|
|
80
|
+
runnerClass: "amd64",
|
|
81
|
+
os: "",
|
|
82
|
+
cpu: "",
|
|
83
|
+
baseline: makeMeasurement({ n: 10, medianX1000: 50000, stdX1000: 0 }),
|
|
84
|
+
candidate: makeMeasurement({ n: 10, medianX1000: 55000, stdX1000: 0 }),
|
|
85
|
+
deltaPctX100: 1000,
|
|
86
|
+
buildOk: true,
|
|
87
|
+
testsOk: true,
|
|
88
|
+
sanitizersOk: false,
|
|
89
|
+
agentSystem: "claude-code",
|
|
90
|
+
agentModel: "claude-opus-4-6",
|
|
91
|
+
secondaryMetrics: [],
|
|
92
|
+
});
|
|
93
|
+
assert.equal(normalize(action), normalize(golden));
|
|
94
|
+
});
|
|
95
|
+
|
|
96
|
+
it("has no description, os, cpu, or secondary_metrics keys", () => {
|
|
97
|
+
assert.equal("description" in golden, false);
|
|
98
|
+
assert.equal("os" in golden, false);
|
|
99
|
+
assert.equal("cpu" in golden, false);
|
|
100
|
+
assert.equal("secondary_metrics" in golden, false);
|
|
101
|
+
});
|
|
102
|
+
});
|
|
103
|
+
|
|
104
|
+
// --- Publish: with samplesX1000 in Measurements ---
|
|
105
|
+
|
|
106
|
+
describe("golden: publish-with-samples (nested Measurement samples)", () => {
|
|
107
|
+
const golden = loadGolden("publish-with-samples.json");
|
|
108
|
+
|
|
109
|
+
it("matches Rust output — samplesX1000 preserved", () => {
|
|
110
|
+
const action = buildPublishAction({
|
|
111
|
+
description: "With per-run samples",
|
|
112
|
+
base: golden.base as string,
|
|
113
|
+
oid: golden.oid as string,
|
|
114
|
+
metricName: "wall_time",
|
|
115
|
+
metricUnit: "ms",
|
|
116
|
+
direction: "lower_is_better",
|
|
117
|
+
runnerClass: "arm64",
|
|
118
|
+
os: "linux",
|
|
119
|
+
cpu: "apple-m2",
|
|
120
|
+
baseline: makeMeasurement({ n: 3, medianX1000: 59340, stdX1000: 1200, samplesX1000: [58000, 59340, 60680] }),
|
|
121
|
+
candidate: makeMeasurement({ n: 3, medianX1000: 45200, stdX1000: 950, samplesX1000: [44500, 45200, 45900] }),
|
|
122
|
+
deltaPctX100: 2378,
|
|
123
|
+
buildOk: true,
|
|
124
|
+
testsOk: true,
|
|
125
|
+
sanitizersOk: false,
|
|
126
|
+
agentSystem: "claude-code",
|
|
127
|
+
agentModel: "claude-opus-4-6",
|
|
128
|
+
secondaryMetrics: [],
|
|
129
|
+
});
|
|
130
|
+
assert.equal(normalize(action), normalize(golden));
|
|
131
|
+
});
|
|
132
|
+
});
|
|
133
|
+
|
|
134
|
+
// --- Reproduce: full (with notes) ---
|
|
135
|
+
|
|
136
|
+
describe("golden: reproduce-action (with notes)", () => {
|
|
137
|
+
const golden = loadGolden("reproduce-action.json");
|
|
138
|
+
|
|
139
|
+
it("matches Rust output", () => {
|
|
140
|
+
const action = buildReproduceAction({
|
|
141
|
+
verdict: "confirmed",
|
|
142
|
+
runnerClass: "amd64",
|
|
143
|
+
baseline: makeMeasurement({ n: 10, medianX1000: 59000, stdX1000: 800 }),
|
|
144
|
+
candidate: makeMeasurement({ n: 10, medianX1000: 45500, stdX1000: 600 }),
|
|
145
|
+
deltaPctX100: 2288,
|
|
146
|
+
buildOk: true,
|
|
147
|
+
testsOk: true,
|
|
148
|
+
notes: "Reproduced on CI",
|
|
149
|
+
});
|
|
150
|
+
assert.equal(normalize(action), normalize(golden));
|
|
151
|
+
});
|
|
152
|
+
});
|
|
153
|
+
|
|
154
|
+
// --- Reproduce: minimal (no notes) ---
|
|
155
|
+
|
|
156
|
+
describe("golden: reproduce-minimal (no notes)", () => {
|
|
157
|
+
const golden = loadGolden("reproduce-minimal.json");
|
|
158
|
+
|
|
159
|
+
it("matches Rust output — notes absent", () => {
|
|
160
|
+
const action = buildReproduceAction({
|
|
161
|
+
verdict: "inconclusive",
|
|
162
|
+
runnerClass: "arm64",
|
|
163
|
+
baseline: makeMeasurement({ n: 5, medianX1000: 50500, stdX1000: 0 }),
|
|
164
|
+
candidate: makeMeasurement({ n: 5, medianX1000: 50800, stdX1000: 0 }),
|
|
165
|
+
deltaPctX100: 59,
|
|
166
|
+
buildOk: true,
|
|
167
|
+
testsOk: true,
|
|
168
|
+
});
|
|
169
|
+
assert.equal(normalize(action), normalize(golden));
|
|
170
|
+
});
|
|
171
|
+
|
|
172
|
+
it("has no notes key", () => {
|
|
173
|
+
assert.equal("notes" in golden, false);
|
|
174
|
+
});
|
|
175
|
+
});
|
|
@@ -0,0 +1,191 @@
|
|
|
1
|
+
import { describe, it } from "node:test";
|
|
2
|
+
import assert from "node:assert/strict";
|
|
3
|
+
|
|
4
|
+
import { fromRoot, applyOp, handleOpMessage } from "../cob/state.js";
|
|
5
|
+
import {
|
|
6
|
+
makeOp,
|
|
7
|
+
makeRawPublishAction,
|
|
8
|
+
makeRawReproduceAction,
|
|
9
|
+
makeExperiment,
|
|
10
|
+
SAMPLE_AUTHOR_KEY,
|
|
11
|
+
} from "./fixtures.js";
|
|
12
|
+
|
|
13
|
+
describe("fromRoot", () => {
|
|
14
|
+
it("creates Experiment from a publish op", () => {
|
|
15
|
+
const op = makeOp();
|
|
16
|
+
const exp = fromRoot(op);
|
|
17
|
+
assert.equal(exp.metricName, "wall_time");
|
|
18
|
+
assert.equal(exp.metricUnit, "ms");
|
|
19
|
+
assert.equal(exp.direction, "lower_is_better");
|
|
20
|
+
assert.equal(exp.runnerClass, "arm64");
|
|
21
|
+
assert.equal(exp.deltaPctX100, 2378);
|
|
22
|
+
assert.equal(exp.buildOk, true);
|
|
23
|
+
assert.equal(exp.agentSystem, "claude-code");
|
|
24
|
+
assert.deepEqual(exp.reproductions, []);
|
|
25
|
+
});
|
|
26
|
+
|
|
27
|
+
it("prefixes author with did:key:", () => {
|
|
28
|
+
const exp = fromRoot(makeOp({ author: "z6Mktest" }));
|
|
29
|
+
assert.equal(exp.author.id, "did:key:z6Mktest");
|
|
30
|
+
});
|
|
31
|
+
|
|
32
|
+
it("does not double-prefix did:key:", () => {
|
|
33
|
+
const exp = fromRoot(makeOp({ author: "did:key:z6Mktest" }));
|
|
34
|
+
assert.equal(exp.author.id, "did:key:z6Mktest");
|
|
35
|
+
});
|
|
36
|
+
|
|
37
|
+
it("preserves description when present", () => {
|
|
38
|
+
const exp = fromRoot(makeOp({ actions: [makeRawPublishAction({ description: "hello" })] }));
|
|
39
|
+
assert.equal(exp.description, "hello");
|
|
40
|
+
});
|
|
41
|
+
|
|
42
|
+
it("omits description when absent", () => {
|
|
43
|
+
const action = makeRawPublishAction();
|
|
44
|
+
delete action.description;
|
|
45
|
+
const exp = fromRoot(makeOp({ actions: [action] }));
|
|
46
|
+
assert.equal("description" in exp, false);
|
|
47
|
+
});
|
|
48
|
+
|
|
49
|
+
it("reads snake_case fields from Rust-created actions", () => {
|
|
50
|
+
const exp = fromRoot(makeOp({
|
|
51
|
+
actions: [makeRawPublishAction({ metric_name: "throughput", runner_class: "x86" })],
|
|
52
|
+
}));
|
|
53
|
+
assert.equal(exp.metricName, "throughput");
|
|
54
|
+
assert.equal(exp.runnerClass, "x86");
|
|
55
|
+
});
|
|
56
|
+
|
|
57
|
+
it("reads camelCase fields from legacy TS-created actions", () => {
|
|
58
|
+
const action = makeRawPublishAction();
|
|
59
|
+
// Replace snake_case with camelCase (simulate legacy format)
|
|
60
|
+
delete action.metric_name;
|
|
61
|
+
delete action.runner_class;
|
|
62
|
+
delete action.delta_pct_x100;
|
|
63
|
+
delete action.build_ok;
|
|
64
|
+
delete action.tests_ok;
|
|
65
|
+
delete action.sanitizers_ok;
|
|
66
|
+
delete action.agent_system;
|
|
67
|
+
delete action.agent_model;
|
|
68
|
+
(action as Record<string, unknown>).metricName = "throughput";
|
|
69
|
+
(action as Record<string, unknown>).runnerClass = "x86";
|
|
70
|
+
(action as Record<string, unknown>).deltaPctX100 = 500;
|
|
71
|
+
(action as Record<string, unknown>).buildOk = true;
|
|
72
|
+
(action as Record<string, unknown>).testsOk = true;
|
|
73
|
+
(action as Record<string, unknown>).sanitizersOk = false;
|
|
74
|
+
(action as Record<string, unknown>).agentSystem = "claude-code";
|
|
75
|
+
(action as Record<string, unknown>).agentModel = "claude-opus-4-6";
|
|
76
|
+
|
|
77
|
+
const exp = fromRoot(makeOp({ actions: [action] }));
|
|
78
|
+
assert.equal(exp.metricName, "throughput");
|
|
79
|
+
assert.equal(exp.runnerClass, "x86");
|
|
80
|
+
assert.equal(exp.deltaPctX100, 500);
|
|
81
|
+
});
|
|
82
|
+
|
|
83
|
+
it("propagates secondary_metrics when non-empty", () => {
|
|
84
|
+
const action = makeRawPublishAction({
|
|
85
|
+
secondary_metrics: [{ name: "rss", unit: "KB", baseline: { n: 1, medianX1000: 100, stdX1000: 0 }, candidate: { n: 1, medianX1000: 90, stdX1000: 0 }, deltaPctX100: -1000, regressed: false }],
|
|
86
|
+
});
|
|
87
|
+
const exp = fromRoot(makeOp({ actions: [action] }));
|
|
88
|
+
assert.equal(exp.secondaryMetrics?.length, 1);
|
|
89
|
+
assert.equal(exp.secondaryMetrics?.[0].name, "rss");
|
|
90
|
+
});
|
|
91
|
+
|
|
92
|
+
it("omits secondaryMetrics when empty", () => {
|
|
93
|
+
const action = makeRawPublishAction({ secondary_metrics: [] });
|
|
94
|
+
const exp = fromRoot(makeOp({ actions: [action] }));
|
|
95
|
+
assert.equal("secondaryMetrics" in exp, false);
|
|
96
|
+
});
|
|
97
|
+
|
|
98
|
+
it("applies remaining actions in root op", () => {
|
|
99
|
+
const publish = makeRawPublishAction();
|
|
100
|
+
const reproduce = makeRawReproduceAction();
|
|
101
|
+
const exp = fromRoot(makeOp({ actions: [publish, reproduce] }));
|
|
102
|
+
assert.equal(exp.reproductions.length, 1);
|
|
103
|
+
assert.equal(exp.reproductions[0].verdict, "confirmed");
|
|
104
|
+
});
|
|
105
|
+
|
|
106
|
+
it("throws on non-publish first action", () => {
|
|
107
|
+
assert.throws(
|
|
108
|
+
() => fromRoot(makeOp({ actions: [makeRawReproduceAction()] })),
|
|
109
|
+
{ message: "the first action must be of type `publish`" },
|
|
110
|
+
);
|
|
111
|
+
});
|
|
112
|
+
|
|
113
|
+
it("throws on empty actions", () => {
|
|
114
|
+
assert.throws(
|
|
115
|
+
() => fromRoot(makeOp({ actions: [] })),
|
|
116
|
+
{ message: "the first action must be of type `publish`" },
|
|
117
|
+
);
|
|
118
|
+
});
|
|
119
|
+
});
|
|
120
|
+
|
|
121
|
+
describe("applyOp", () => {
|
|
122
|
+
it("adds reproduction to experiment", () => {
|
|
123
|
+
const exp = makeExperiment();
|
|
124
|
+
applyOp(exp, makeOp({ actions: [makeRawReproduceAction()] }));
|
|
125
|
+
assert.equal(exp.reproductions.length, 1);
|
|
126
|
+
assert.equal(exp.reproductions[0].verdict, "confirmed");
|
|
127
|
+
assert.equal(exp.reproductions[0].runnerClass, "amd64");
|
|
128
|
+
});
|
|
129
|
+
|
|
130
|
+
it("ignores publish actions (no-op)", () => {
|
|
131
|
+
const exp = makeExperiment();
|
|
132
|
+
applyOp(exp, makeOp({ actions: [makeRawPublishAction()] }));
|
|
133
|
+
assert.equal(exp.reproductions.length, 0);
|
|
134
|
+
});
|
|
135
|
+
|
|
136
|
+
it("applies multiple actions in one op", () => {
|
|
137
|
+
const exp = makeExperiment();
|
|
138
|
+
applyOp(exp, makeOp({
|
|
139
|
+
actions: [
|
|
140
|
+
makeRawReproduceAction({ verdict: "confirmed" }),
|
|
141
|
+
makeRawReproduceAction({ verdict: "failed" }),
|
|
142
|
+
],
|
|
143
|
+
}));
|
|
144
|
+
assert.equal(exp.reproductions.length, 2);
|
|
145
|
+
assert.equal(exp.reproductions[0].verdict, "confirmed");
|
|
146
|
+
assert.equal(exp.reproductions[1].verdict, "failed");
|
|
147
|
+
});
|
|
148
|
+
});
|
|
149
|
+
|
|
150
|
+
describe("handleOpMessage", () => {
|
|
151
|
+
it("creates experiment from empty value (root)", () => {
|
|
152
|
+
const result = handleOpMessage({
|
|
153
|
+
value: {},
|
|
154
|
+
op: makeOp(),
|
|
155
|
+
concurrent: [],
|
|
156
|
+
});
|
|
157
|
+
assert.equal(result.metricName, "wall_time");
|
|
158
|
+
assert.deepEqual(result.reproductions, []);
|
|
159
|
+
});
|
|
160
|
+
|
|
161
|
+
it("creates experiment from null value", () => {
|
|
162
|
+
const result = handleOpMessage({
|
|
163
|
+
value: null as unknown as Record<string, unknown>,
|
|
164
|
+
op: makeOp(),
|
|
165
|
+
concurrent: [],
|
|
166
|
+
});
|
|
167
|
+
assert.equal(result.metricName, "wall_time");
|
|
168
|
+
});
|
|
169
|
+
|
|
170
|
+
it("applies op to existing state", () => {
|
|
171
|
+
const existing = makeExperiment() as unknown as Record<string, unknown>;
|
|
172
|
+
const result = handleOpMessage({
|
|
173
|
+
value: existing,
|
|
174
|
+
op: makeOp({ actions: [makeRawReproduceAction()] }),
|
|
175
|
+
concurrent: [],
|
|
176
|
+
});
|
|
177
|
+
assert.equal(result.reproductions.length, 1);
|
|
178
|
+
});
|
|
179
|
+
|
|
180
|
+
it("applies concurrent ops", () => {
|
|
181
|
+
const result = handleOpMessage({
|
|
182
|
+
value: {},
|
|
183
|
+
op: makeOp(),
|
|
184
|
+
concurrent: [
|
|
185
|
+
makeOp({ actions: [makeRawReproduceAction({ verdict: "confirmed" })] }),
|
|
186
|
+
makeOp({ actions: [makeRawReproduceAction({ verdict: "failed" })] }),
|
|
187
|
+
],
|
|
188
|
+
});
|
|
189
|
+
assert.equal(result.reproductions.length, 2);
|
|
190
|
+
});
|
|
191
|
+
});
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
import { parseArgs } from "node:util";
|
|
2
|
+
import { deltaDisplay, shortId, confirmedCount } from "../format.js";
|
|
3
|
+
import { getRepoId, cobList, cobShow } from "../rad.js";
|
|
4
|
+
import type { Experiment } from "../../types.js";
|
|
5
|
+
|
|
6
|
+
export function cmdList(args: string[]): void {
|
|
7
|
+
const { values } = parseArgs({
|
|
8
|
+
args,
|
|
9
|
+
options: {
|
|
10
|
+
repo: { type: "string", short: "r" },
|
|
11
|
+
reproduced: { type: "boolean", default: false },
|
|
12
|
+
unverified: { type: "boolean", default: false },
|
|
13
|
+
},
|
|
14
|
+
strict: true,
|
|
15
|
+
});
|
|
16
|
+
|
|
17
|
+
const rid = getRepoId(values.repo as string | undefined);
|
|
18
|
+
const ids = cobList(rid);
|
|
19
|
+
|
|
20
|
+
const experiments: { id: string; exp: Experiment }[] = [];
|
|
21
|
+
for (const id of ids) {
|
|
22
|
+
experiments.push({ id, exp: cobShow(rid, id) });
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
experiments.sort((a, b) => b.exp.createdAt - a.exp.createdAt);
|
|
26
|
+
|
|
27
|
+
let count = 0;
|
|
28
|
+
for (const { id, exp } of experiments) {
|
|
29
|
+
const reproCount = confirmedCount(exp);
|
|
30
|
+
|
|
31
|
+
if (values.reproduced && reproCount === 0) continue;
|
|
32
|
+
if (values.unverified && reproCount > 0) continue;
|
|
33
|
+
|
|
34
|
+
count++;
|
|
35
|
+
const reproLabel = reproCount > 0 ? ` [${reproCount} verified]` : "";
|
|
36
|
+
|
|
37
|
+
console.log(
|
|
38
|
+
`${shortId(id)} ${exp.metricName} ${deltaDisplay(exp.deltaPctX100)}${reproLabel}`,
|
|
39
|
+
);
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
if (count === 0) {
|
|
43
|
+
console.log("No experiments found.");
|
|
44
|
+
}
|
|
45
|
+
}
|
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
import { parseArgs } from "node:util";
|
|
2
|
+
import { buildPublishAction } from "../../cob/actions.js";
|
|
3
|
+
import { MEASUREMENT_OPTIONS, buildMeasurement, parseSecondary, requireArg, requireInt } from "../helpers.js";
|
|
4
|
+
import { deltaDisplay, measurementDisplay } from "../format.js";
|
|
5
|
+
import { getRepoId, cobCreate } from "../rad.js";
|
|
6
|
+
|
|
7
|
+
export function cmdPublish(args: string[]): void {
|
|
8
|
+
const { values } = parseArgs({
|
|
9
|
+
args,
|
|
10
|
+
options: {
|
|
11
|
+
repo: { type: "string", short: "r" },
|
|
12
|
+
description: { type: "string", short: "d" },
|
|
13
|
+
base: { type: "string" },
|
|
14
|
+
head: { type: "string" },
|
|
15
|
+
metric: { type: "string" },
|
|
16
|
+
unit: { type: "string" },
|
|
17
|
+
direction: { type: "string" },
|
|
18
|
+
runner: { type: "string" },
|
|
19
|
+
...MEASUREMENT_OPTIONS,
|
|
20
|
+
secondary: { type: "string", multiple: true, default: [] },
|
|
21
|
+
"agent-system": { type: "string", default: "claude-code" },
|
|
22
|
+
"agent-model": { type: "string", default: "claude-opus-4-6" },
|
|
23
|
+
os: { type: "string", default: "" },
|
|
24
|
+
cpu: { type: "string", default: "" },
|
|
25
|
+
},
|
|
26
|
+
strict: true,
|
|
27
|
+
});
|
|
28
|
+
|
|
29
|
+
const rid = getRepoId(values.repo as string | undefined);
|
|
30
|
+
const base = requireArg(values, "base");
|
|
31
|
+
const head = requireArg(values, "head");
|
|
32
|
+
const metric = requireArg(values, "metric");
|
|
33
|
+
const unit = requireArg(values, "unit");
|
|
34
|
+
const direction = requireArg(values, "direction");
|
|
35
|
+
const runner = requireArg(values, "runner");
|
|
36
|
+
const delta = requireInt(values, "delta");
|
|
37
|
+
const baseline = buildMeasurement(values, "baseline");
|
|
38
|
+
const candidate = buildMeasurement(values, "candidate");
|
|
39
|
+
const secondaryMetrics = ((values.secondary ?? []) as string[]).map(parseSecondary);
|
|
40
|
+
|
|
41
|
+
const action = buildPublishAction({
|
|
42
|
+
description: (values.description as string) ?? undefined,
|
|
43
|
+
base,
|
|
44
|
+
oid: head,
|
|
45
|
+
metricName: metric,
|
|
46
|
+
metricUnit: unit,
|
|
47
|
+
direction,
|
|
48
|
+
runnerClass: runner,
|
|
49
|
+
os: (values.os as string) ?? "",
|
|
50
|
+
cpu: (values.cpu as string) ?? "",
|
|
51
|
+
baseline,
|
|
52
|
+
candidate,
|
|
53
|
+
deltaPctX100: delta,
|
|
54
|
+
buildOk: true,
|
|
55
|
+
testsOk: true,
|
|
56
|
+
sanitizersOk: false,
|
|
57
|
+
agentSystem: (values["agent-system"] as string) ?? "claude-code",
|
|
58
|
+
agentModel: (values["agent-model"] as string) ?? "claude-opus-4-6",
|
|
59
|
+
secondaryMetrics,
|
|
60
|
+
});
|
|
61
|
+
|
|
62
|
+
const objectId = cobCreate(rid, [action], "Publish experiment");
|
|
63
|
+
|
|
64
|
+
console.log(`Experiment published: ${objectId}`);
|
|
65
|
+
console.log(` metric: ${metric} ${deltaDisplay(delta)}`);
|
|
66
|
+
console.log(` baseline: ${measurementDisplay(baseline, unit)}`);
|
|
67
|
+
console.log(` candidate: ${measurementDisplay(candidate, unit)}`);
|
|
68
|
+
}
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
import { parseArgs } from "node:util";
|
|
2
|
+
import { buildReproduceAction } from "../../cob/actions.js";
|
|
3
|
+
import { MEASUREMENT_OPTIONS, VERDICTS, buildMeasurement, die, requireArg, requireInt } from "../helpers.js";
|
|
4
|
+
import { shortId } from "../format.js";
|
|
5
|
+
import { getRepoId, cobUpdate } from "../rad.js";
|
|
6
|
+
import type { Verdict } from "../../types.js";
|
|
7
|
+
|
|
8
|
+
export function cmdReproduce(args: string[]): void {
|
|
9
|
+
const { values, positionals } = parseArgs({
|
|
10
|
+
args,
|
|
11
|
+
options: {
|
|
12
|
+
repo: { type: "string", short: "r" },
|
|
13
|
+
verdict: { type: "string" },
|
|
14
|
+
runner: { type: "string" },
|
|
15
|
+
...MEASUREMENT_OPTIONS,
|
|
16
|
+
notes: { type: "string" },
|
|
17
|
+
},
|
|
18
|
+
allowPositionals: true,
|
|
19
|
+
strict: true,
|
|
20
|
+
});
|
|
21
|
+
|
|
22
|
+
if (positionals.length === 0) die("missing experiment ID");
|
|
23
|
+
const id = positionals[0];
|
|
24
|
+
const rid = getRepoId(values.repo as string | undefined);
|
|
25
|
+
|
|
26
|
+
const verdictStr = requireArg(values, "verdict").toLowerCase();
|
|
27
|
+
if (!VERDICTS.includes(verdictStr as Verdict)) {
|
|
28
|
+
die(`Invalid verdict: unknown verdict: ${verdictStr}`);
|
|
29
|
+
}
|
|
30
|
+
const verdict = verdictStr as Verdict;
|
|
31
|
+
|
|
32
|
+
const runner = requireArg(values, "runner");
|
|
33
|
+
const delta = requireInt(values, "delta");
|
|
34
|
+
const baseline = buildMeasurement(values, "baseline");
|
|
35
|
+
const candidate = buildMeasurement(values, "candidate");
|
|
36
|
+
|
|
37
|
+
const action = buildReproduceAction({
|
|
38
|
+
verdict,
|
|
39
|
+
runnerClass: runner,
|
|
40
|
+
baseline,
|
|
41
|
+
candidate,
|
|
42
|
+
deltaPctX100: delta,
|
|
43
|
+
buildOk: true,
|
|
44
|
+
testsOk: true,
|
|
45
|
+
notes: (values.notes as string) ?? undefined,
|
|
46
|
+
});
|
|
47
|
+
|
|
48
|
+
cobUpdate(rid, id, [action], "Reproduce");
|
|
49
|
+
|
|
50
|
+
console.log(`Reproduction added to ${shortId(id)}`);
|
|
51
|
+
console.log(` verdict: ${verdict}`);
|
|
52
|
+
}
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
import { parseArgs } from "node:util";
|
|
2
|
+
import { die } from "../helpers.js";
|
|
3
|
+
import { deltaDisplay, measurementDisplay } from "../format.js";
|
|
4
|
+
import { getRepoId, cobShow } from "../rad.js";
|
|
5
|
+
import type { Experiment } from "../../types.js";
|
|
6
|
+
|
|
7
|
+
export function cmdShow(args: string[]): void {
|
|
8
|
+
const { values, positionals } = parseArgs({
|
|
9
|
+
args,
|
|
10
|
+
options: {
|
|
11
|
+
repo: { type: "string", short: "r" },
|
|
12
|
+
json: { type: "boolean", default: false },
|
|
13
|
+
},
|
|
14
|
+
allowPositionals: true,
|
|
15
|
+
strict: true,
|
|
16
|
+
});
|
|
17
|
+
|
|
18
|
+
if (positionals.length === 0) die("missing experiment ID");
|
|
19
|
+
const id = positionals[0];
|
|
20
|
+
const rid = getRepoId(values.repo as string | undefined);
|
|
21
|
+
|
|
22
|
+
let exp: Experiment;
|
|
23
|
+
try {
|
|
24
|
+
exp = cobShow(rid, id);
|
|
25
|
+
} catch {
|
|
26
|
+
die(`Experiment not found: ${id}`);
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
if (values.json) {
|
|
30
|
+
console.log(JSON.stringify(exp, null, 2));
|
|
31
|
+
} else {
|
|
32
|
+
console.log(`Experiment ${id}`);
|
|
33
|
+
if (exp.description) {
|
|
34
|
+
console.log();
|
|
35
|
+
console.log(` ${exp.description}`);
|
|
36
|
+
}
|
|
37
|
+
console.log();
|
|
38
|
+
console.log(` base: ${exp.base}`);
|
|
39
|
+
console.log(` head: ${exp.oid}`);
|
|
40
|
+
console.log();
|
|
41
|
+
console.log(` metric: ${exp.metricName} (${exp.metricUnit})`);
|
|
42
|
+
console.log(` direction: ${exp.direction}`);
|
|
43
|
+
console.log();
|
|
44
|
+
console.log(
|
|
45
|
+
` baseline: ${measurementDisplay(exp.baseline, exp.metricUnit)} (n=${exp.baseline.n})`,
|
|
46
|
+
);
|
|
47
|
+
console.log(
|
|
48
|
+
` candidate: ${measurementDisplay(exp.candidate, exp.metricUnit)} (n=${exp.candidate.n})`,
|
|
49
|
+
);
|
|
50
|
+
console.log(` delta: ${deltaDisplay(exp.deltaPctX100)}`);
|
|
51
|
+
console.log();
|
|
52
|
+
console.log(` runner: ${exp.runnerClass} (${exp.os}, ${exp.cpu})`);
|
|
53
|
+
console.log(` build: ${exp.buildOk ? "ok" : "FAIL"}`);
|
|
54
|
+
console.log(` tests: ${exp.testsOk ? "ok" : "FAIL"}`);
|
|
55
|
+
console.log(` agent: ${exp.agentSystem}/${exp.agentModel}`);
|
|
56
|
+
console.log(` author: ${exp.author.id}`);
|
|
57
|
+
|
|
58
|
+
if (exp.reproductions && exp.reproductions.length > 0) {
|
|
59
|
+
console.log();
|
|
60
|
+
console.log(` Reproductions (${exp.reproductions.length}):`);
|
|
61
|
+
for (const r of exp.reproductions) {
|
|
62
|
+
const sign = r.deltaPctX100 >= 0 ? "+" : "";
|
|
63
|
+
const abs = Math.abs(r.deltaPctX100 % 100);
|
|
64
|
+
console.log(
|
|
65
|
+
` ${r.verdict} by ${r.author.id} on ${r.runnerClass} (${sign}${Math.trunc(r.deltaPctX100 / 100)}.${String(abs).padStart(2, "0")}%)`,
|
|
66
|
+
);
|
|
67
|
+
}
|
|
68
|
+
}
|
|
69
|
+
}
|
|
70
|
+
}
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
// Display formatting helpers — mirrors the Rust Display impls.
|
|
2
|
+
|
|
3
|
+
import type { Experiment, Measurement } from "../types.js";
|
|
4
|
+
|
|
5
|
+
/** Format delta as "+5.47%" or "-0.78%". */
|
|
6
|
+
export function deltaDisplay(deltaPctX100: number): string {
|
|
7
|
+
const abs = Math.abs(deltaPctX100);
|
|
8
|
+
const sign = deltaPctX100 >= 0 ? "+" : "-";
|
|
9
|
+
return `${sign}${Math.floor(abs / 100)}.${String(abs % 100).padStart(2, "0")}%`;
|
|
10
|
+
}
|
|
11
|
+
|
|
12
|
+
/** Format measurement as "59.340 ms". */
|
|
13
|
+
export function measurementDisplay(m: Measurement, unit: string): string {
|
|
14
|
+
const whole = Math.floor(m.medianX1000 / 1000);
|
|
15
|
+
const frac = Math.abs(m.medianX1000 % 1000);
|
|
16
|
+
return `${whole}.${frac} ${unit}`;
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
/** First 7 chars of an ID. */
|
|
20
|
+
export function shortId(id: string): string {
|
|
21
|
+
return id.substring(0, 7);
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
/** Confirmed reproduction count. */
|
|
25
|
+
export function confirmedCount(exp: Experiment): number {
|
|
26
|
+
return exp.reproductions.filter((r) => r.verdict === "confirmed").length;
|
|
27
|
+
}
|