rad-experiment 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. package/README.md +87 -0
  2. package/dist/cli/commands/list.d.ts +1 -0
  3. package/dist/cli/commands/list.js +35 -0
  4. package/dist/cli/commands/publish.d.ts +1 -0
  5. package/dist/cli/commands/publish.js +63 -0
  6. package/dist/cli/commands/reproduce.d.ts +1 -0
  7. package/dist/cli/commands/reproduce.js +45 -0
  8. package/dist/cli/commands/show.d.ts +1 -0
  9. package/dist/cli/commands/show.js +61 -0
  10. package/dist/cli/format.d.ts +9 -0
  11. package/dist/cli/format.js +21 -0
  12. package/dist/cli/helpers.d.ts +49 -0
  13. package/dist/cli/helpers.js +90 -0
  14. package/dist/cli/rad.d.ts +11 -0
  15. package/dist/cli/rad.js +64 -0
  16. package/dist/cob/actions.d.ts +35 -0
  17. package/dist/cob/actions.js +57 -0
  18. package/dist/cob/state.d.ts +7 -0
  19. package/dist/cob/state.js +97 -0
  20. package/dist/rad-cob-experiment.d.ts +2 -0
  21. package/dist/rad-cob-experiment.js +33 -0
  22. package/dist/rad-experiment.d.ts +2 -0
  23. package/dist/rad-experiment.js +74 -0
  24. package/dist/types.d.ts +102 -0
  25. package/dist/types.js +9 -0
  26. package/package.json +24 -0
  27. package/src/__tests__/actions.test.ts +122 -0
  28. package/src/__tests__/cob-protocol.test.ts +138 -0
  29. package/src/__tests__/fixtures.ts +119 -0
  30. package/src/__tests__/format.test.ts +55 -0
  31. package/src/__tests__/golden/publish-action.json +46 -0
  32. package/src/__tests__/golden/publish-minimal.json +25 -0
  33. package/src/__tests__/golden/publish-with-samples.json +38 -0
  34. package/src/__tests__/golden/reproduce-action.json +19 -0
  35. package/src/__tests__/golden/reproduce-minimal.json +18 -0
  36. package/src/__tests__/helpers.test.ts +138 -0
  37. package/src/__tests__/integration.test.ts +124 -0
  38. package/src/__tests__/serialization.test.ts +175 -0
  39. package/src/__tests__/state.test.ts +191 -0
  40. package/src/cli/commands/list.ts +45 -0
  41. package/src/cli/commands/publish.ts +68 -0
  42. package/src/cli/commands/reproduce.ts +52 -0
  43. package/src/cli/commands/show.ts +70 -0
  44. package/src/cli/format.ts +27 -0
  45. package/src/cli/helpers.ts +101 -0
  46. package/src/cli/rad.ts +87 -0
  47. package/src/cob/actions.ts +100 -0
  48. package/src/cob/state.ts +120 -0
  49. package/src/rad-cob-experiment.ts +39 -0
  50. package/src/rad-experiment.ts +85 -0
  51. package/src/types.ts +133 -0
  52. package/tsconfig.json +16 -0
@@ -0,0 +1,97 @@
1
+ // State evaluation: applies actions to build Experiment state.
2
+ // Mirrors the Rust from_root / apply_action / op logic in lib.rs.
3
+ //
4
+ // Actions arrive with snake_case field names (matching Rust serde).
5
+ // State output uses camelCase field names (matching Experiment struct serde).
6
+ /** Build an Author from a raw public key string. */
7
+ function authorFromKey(key) {
8
+ const id = key.startsWith("did:key:") ? key : `did:key:${key}`;
9
+ return { id };
10
+ }
11
+ function snakeToCamel(s) {
12
+ return s.replace(/_([a-z0-9])/g, (_, c) => c.toUpperCase());
13
+ }
14
+ /**
15
+ * Read a field from an action object, trying snake_case first then camelCase.
16
+ * Handles both Rust-created (snake_case) and legacy TS-created (camelCase) COBs.
17
+ */
18
+ function field(action, snakeCase) {
19
+ if (snakeCase in action)
20
+ return action[snakeCase];
21
+ return action[snakeToCamel(snakeCase)];
22
+ }
23
+ function rawActions(op) {
24
+ return op.actions;
25
+ }
26
+ /** Apply a single action to an experiment (mutates in place). */
27
+ function applyAction(exp, action, author, timestamp) {
28
+ if (action.type !== "reproduce")
29
+ return;
30
+ exp.reproductions.push({
31
+ verdict: action.verdict,
32
+ runnerClass: (field(action, "runner_class") ?? ""),
33
+ baseline: action.baseline,
34
+ candidate: action.candidate,
35
+ deltaPctX100: (field(action, "delta_pct_x100") ?? 0),
36
+ author: authorFromKey(author),
37
+ timestamp,
38
+ });
39
+ }
40
+ /** Create an Experiment from the root operation. Mirrors Experiment::from_root. */
41
+ export function fromRoot(op) {
42
+ const actions = [...rawActions(op)];
43
+ const first = actions.shift();
44
+ if (!first || first.type !== "publish") {
45
+ throw new Error("the first action must be of type `publish`");
46
+ }
47
+ const description = first.description;
48
+ const secondaryMetrics = (field(first, "secondary_metrics") ?? []);
49
+ const exp = {
50
+ ...(description != null ? { description } : {}),
51
+ base: first.base,
52
+ oid: first.oid,
53
+ metricName: (field(first, "metric_name") ?? ""),
54
+ metricUnit: (field(first, "metric_unit") ?? ""),
55
+ direction: first.direction,
56
+ runnerClass: (field(first, "runner_class") ?? ""),
57
+ os: (first.os ?? ""),
58
+ cpu: (first.cpu ?? ""),
59
+ baseline: first.baseline,
60
+ candidate: first.candidate,
61
+ deltaPctX100: (field(first, "delta_pct_x100") ?? 0),
62
+ buildOk: (field(first, "build_ok") ?? true),
63
+ testsOk: (field(first, "tests_ok") ?? true),
64
+ sanitizersOk: (field(first, "sanitizers_ok") ?? false),
65
+ agentSystem: (field(first, "agent_system") ?? ""),
66
+ agentModel: (field(first, "agent_model") ?? ""),
67
+ ...(secondaryMetrics.length > 0 ? { secondaryMetrics } : {}),
68
+ reproductions: [],
69
+ author: authorFromKey(op.author),
70
+ createdAt: op.timestamp,
71
+ };
72
+ for (const action of actions) {
73
+ applyAction(exp, action, op.author, op.timestamp);
74
+ }
75
+ return exp;
76
+ }
77
+ /** Apply a subsequent operation to an existing experiment. */
78
+ export function applyOp(exp, op) {
79
+ for (const action of rawActions(op)) {
80
+ applyAction(exp, action, op.author, op.timestamp);
81
+ }
82
+ }
83
+ /** Process an OpMessage from Radicle's external COB protocol. */
84
+ export function handleOpMessage(msg) {
85
+ let exp;
86
+ if (msg.value == null || (typeof msg.value === "object" && Object.keys(msg.value).length === 0)) {
87
+ exp = fromRoot(msg.op);
88
+ }
89
+ else {
90
+ exp = msg.value;
91
+ applyOp(exp, msg.op);
92
+ }
93
+ for (const cop of msg.concurrent) {
94
+ applyOp(exp, cop);
95
+ }
96
+ return exp;
97
+ }
@@ -0,0 +1,2 @@
1
+ #!/usr/bin/env node
2
+ export {};
@@ -0,0 +1,33 @@
1
+ #!/usr/bin/env node
2
+ // rad-cob-experiment: External COB helper for the Radicle external COB protocol.
3
+ //
4
+ // Protocol (JSON Lines on stdin/stdout):
5
+ // 1. Read one JSON Line: { value, op, concurrent }
6
+ // 2. Apply op (and concurrent ops) to value -> new state
7
+ // 3. Write new state as JSON Line to stdout
8
+ // 4. Repeat until stdin closes
9
+ // 5. Exit 0 on success, non-zero on failure
10
+ import { createInterface } from "node:readline";
11
+ import { handleOpMessage } from "./cob/state.js";
12
+ function fatal(err) {
13
+ const msg = err instanceof Error ? err.message : String(err);
14
+ process.stderr.write(`rad-cob-experiment: ${msg}\n`);
15
+ process.exit(1);
16
+ }
17
+ async function main() {
18
+ const rl = createInterface({ input: process.stdin });
19
+ for await (const line of rl) {
20
+ const trimmed = line.trim();
21
+ if (trimmed === "")
22
+ continue;
23
+ try {
24
+ const msg = JSON.parse(trimmed);
25
+ const newState = handleOpMessage(msg);
26
+ process.stdout.write(JSON.stringify(newState) + "\n");
27
+ }
28
+ catch (err) {
29
+ fatal(err);
30
+ }
31
+ }
32
+ }
33
+ main().catch(fatal);
@@ -0,0 +1,2 @@
1
+ #!/usr/bin/env node
2
+ export {};
@@ -0,0 +1,74 @@
1
+ #!/usr/bin/env node
2
+ // rad-experiment CLI — thin wrapper around rad cob commands.
3
+ // Subcommands live in cli/commands/; shared helpers in cli/helpers.ts.
4
+ import { CliError } from "./cli/helpers.js";
5
+ import { cmdPublish } from "./cli/commands/publish.js";
6
+ import { cmdList } from "./cli/commands/list.js";
7
+ import { cmdShow } from "./cli/commands/show.js";
8
+ import { cmdReproduce } from "./cli/commands/reproduce.js";
9
+ const COMMANDS = {
10
+ publish: cmdPublish,
11
+ list: cmdList,
12
+ show: cmdShow,
13
+ reproduce: cmdReproduce,
14
+ };
15
+ function usage() {
16
+ console.error(`Usage: rad-experiment [--repo <repo_path>] <command> [options]
17
+
18
+ Commands:
19
+ publish Publish a new optimization experiment
20
+ list List all experiments
21
+ show Show experiment details
22
+ reproduce Add a reproduction to an experiment`);
23
+ process.exit(1);
24
+ }
25
+ function main() {
26
+ const rawArgs = process.argv.slice(2);
27
+ // Extract global --repo/-r before subcommand.
28
+ let globalRepo;
29
+ const subArgs = [];
30
+ let foundCommand = false;
31
+ for (let i = 0; i < rawArgs.length; i++) {
32
+ if (!foundCommand && (rawArgs[i] === "--repo" || rawArgs[i] === "-r")) {
33
+ globalRepo = rawArgs[++i];
34
+ continue;
35
+ }
36
+ if (!foundCommand && rawArgs[i] in COMMANDS) {
37
+ foundCommand = true;
38
+ subArgs.push(rawArgs[i]);
39
+ continue;
40
+ }
41
+ subArgs.push(rawArgs[i]);
42
+ }
43
+ if (subArgs.length === 0)
44
+ usage();
45
+ const command = subArgs.shift();
46
+ const handler = COMMANDS[command];
47
+ if (!handler)
48
+ usage();
49
+ if (globalRepo && !subArgs.includes("--repo") && !subArgs.includes("-r")) {
50
+ subArgs.unshift("--repo", globalRepo);
51
+ }
52
+ handler(subArgs);
53
+ }
54
+ try {
55
+ main();
56
+ }
57
+ catch (err) {
58
+ // CliError: validation failures from die() — print and exit 1.
59
+ if (err instanceof CliError) {
60
+ process.stderr.write(`error: ${err.message}\n`);
61
+ process.exit(1);
62
+ }
63
+ // execFileSync errors: child process failures from rad CLI.
64
+ if (err instanceof Error && "status" in err) {
65
+ const execErr = err;
66
+ const stderr = execErr.stderr?.toString().trim();
67
+ process.stderr.write(`error: ${stderr || err.message}\n`);
68
+ process.exit(1);
69
+ }
70
+ // Unexpected errors.
71
+ const msg = err instanceof Error ? err.message : String(err);
72
+ process.stderr.write(`error: ${msg}\n`);
73
+ process.exit(1);
74
+ }
@@ -0,0 +1,102 @@
1
+ export interface Measurement {
2
+ n: number;
3
+ medianX1000: number;
4
+ stdX1000: number;
5
+ samplesX1000?: number[];
6
+ }
7
+ export type Verdict = "confirmed" | "failed" | "inconclusive";
8
+ export interface MetricValue {
9
+ name: string;
10
+ unit: string;
11
+ baseline: Measurement;
12
+ candidate: Measurement;
13
+ deltaPctX100: number;
14
+ regressed: boolean;
15
+ }
16
+ export interface Reproduction {
17
+ verdict: Verdict;
18
+ runnerClass: string;
19
+ baseline: Measurement;
20
+ candidate: Measurement;
21
+ deltaPctX100: number;
22
+ author: Author;
23
+ timestamp: number;
24
+ }
25
+ export interface Author {
26
+ id: string;
27
+ }
28
+ export interface Experiment {
29
+ description?: string;
30
+ base: string;
31
+ oid: string;
32
+ metricName: string;
33
+ metricUnit: string;
34
+ direction: string;
35
+ runnerClass: string;
36
+ os: string;
37
+ cpu: string;
38
+ baseline: Measurement;
39
+ candidate: Measurement;
40
+ deltaPctX100: number;
41
+ buildOk: boolean;
42
+ testsOk: boolean;
43
+ sanitizersOk: boolean;
44
+ agentSystem: string;
45
+ agentModel: string;
46
+ secondaryMetrics?: MetricValue[];
47
+ reproductions: Reproduction[];
48
+ author: Author;
49
+ createdAt: number;
50
+ }
51
+ export interface PublishAction {
52
+ type: "publish";
53
+ description?: string;
54
+ base: string;
55
+ oid: string;
56
+ metric_name: string;
57
+ metric_unit: string;
58
+ direction: string;
59
+ runner_class: string;
60
+ os?: string;
61
+ cpu?: string;
62
+ baseline: Measurement;
63
+ candidate: Measurement;
64
+ delta_pct_x100: number;
65
+ build_ok: boolean;
66
+ tests_ok: boolean;
67
+ sanitizers_ok: boolean;
68
+ agent_system: string;
69
+ agent_model: string;
70
+ secondary_metrics?: MetricValue[];
71
+ }
72
+ export interface ReproduceAction {
73
+ type: "reproduce";
74
+ verdict: Verdict;
75
+ runner_class: string;
76
+ baseline: Measurement;
77
+ candidate: Measurement;
78
+ delta_pct_x100: number;
79
+ build_ok: boolean;
80
+ tests_ok: boolean;
81
+ notes?: string;
82
+ }
83
+ export type Action = PublishAction | ReproduceAction;
84
+ export interface Manifest {
85
+ typeName: string;
86
+ version: number;
87
+ }
88
+ export interface Op {
89
+ id: string;
90
+ actions: Action[];
91
+ author: string;
92
+ timestamp: number;
93
+ parents: string[];
94
+ related: string[];
95
+ identity: string | null;
96
+ manifest: Manifest;
97
+ }
98
+ export interface OpMessage {
99
+ value: Record<string, unknown>;
100
+ op: Op;
101
+ concurrent: Op[];
102
+ }
package/dist/types.js ADDED
@@ -0,0 +1,9 @@
1
+ // Types matching the Rust serde serialization exactly.
2
+ //
3
+ // IMPORTANT naming conventions (matching Rust serde):
4
+ // - Action enum fields: snake_case (enum-level rename_all only affects variant names)
5
+ // - Measurement struct fields: camelCase (has its own rename_all = "camelCase")
6
+ // - MetricValue struct fields: camelCase (has its own rename_all = "camelCase")
7
+ // - Experiment state fields: camelCase (has its own rename_all = "camelCase")
8
+ // - Reproduction state fields: camelCase (has its own rename_all = "camelCase")
9
+ export {};
package/package.json ADDED
@@ -0,0 +1,24 @@
1
+ {
2
+ "name": "rad-experiment",
3
+ "version": "0.1.0",
4
+ "description": "Radicle COB type for AI-generated optimization experiments (TypeScript)",
5
+ "type": "module",
6
+ "bin": {
7
+ "rad-experiment": "./dist/rad-experiment.js",
8
+ "rad-cob-experiment": "./dist/rad-cob-experiment.js"
9
+ },
10
+ "scripts": {
11
+ "build": "tsc",
12
+ "prepublishOnly": "tsc",
13
+ "test": "tsc --noEmit && npx tsx --test src/__tests__/format.test.ts src/__tests__/actions.test.ts src/__tests__/state.test.ts src/__tests__/helpers.test.ts src/__tests__/serialization.test.ts src/__tests__/cob-protocol.test.ts src/__tests__/integration.test.ts",
14
+ "test:unit": "npx tsx --test src/__tests__/format.test.ts src/__tests__/actions.test.ts src/__tests__/state.test.ts src/__tests__/helpers.test.ts src/__tests__/serialization.test.ts",
15
+ "test:protocol": "npx tsx --test src/__tests__/cob-protocol.test.ts",
16
+ "test:integration": "npx tsx --test src/__tests__/integration.test.ts"
17
+ },
18
+ "devDependencies": {
19
+ "@types/node": "^18.0.0",
20
+ "tsx": "^4.21.0",
21
+ "typescript": "^5.4.0"
22
+ },
23
+ "license": "MIT OR Apache-2.0"
24
+ }
@@ -0,0 +1,122 @@
1
+ import { describe, it } from "node:test";
2
+ import assert from "node:assert/strict";
3
+
4
+ import { cleanMeasurement, buildPublishAction, buildReproduceAction } from "../cob/actions.js";
5
+ import { makeMeasurement, makeMetricValue } from "./fixtures.js";
6
+
7
+ describe("cleanMeasurement", () => {
8
+ it("strips empty samplesX1000", () => {
9
+ const result = cleanMeasurement({ n: 5, medianX1000: 100, stdX1000: 10, samplesX1000: [] });
10
+ assert.equal("samplesX1000" in result, false);
11
+ });
12
+
13
+ it("keeps non-empty samplesX1000", () => {
14
+ const result = cleanMeasurement({ n: 5, medianX1000: 100, stdX1000: 10, samplesX1000: [100, 200] });
15
+ assert.deepEqual(result.samplesX1000, [100, 200]);
16
+ });
17
+
18
+ it("handles undefined samplesX1000", () => {
19
+ const result = cleanMeasurement({ n: 5, medianX1000: 100, stdX1000: 10 });
20
+ assert.equal("samplesX1000" in result, false);
21
+ });
22
+ });
23
+
24
+ describe("buildPublishAction", () => {
25
+ const defaults = {
26
+ base: "abc", oid: "abc",
27
+ metricName: "wall_time", metricUnit: "ms", direction: "lower_is_better",
28
+ runnerClass: "arm64", os: "", cpu: "",
29
+ baseline: makeMeasurement(), candidate: makeMeasurement(),
30
+ deltaPctX100: 100, buildOk: true, testsOk: true, sanitizersOk: false,
31
+ agentSystem: "claude-code", agentModel: "claude-opus-4-6",
32
+ secondaryMetrics: [],
33
+ };
34
+
35
+ it("sets type to publish", () => {
36
+ const a = buildPublishAction(defaults) as unknown as Record<string, unknown>;
37
+ assert.equal(a.type, "publish");
38
+ });
39
+
40
+ it("uses snake_case field names", () => {
41
+ const a = buildPublishAction(defaults) as unknown as Record<string, unknown>;
42
+ assert.ok("metric_name" in a);
43
+ assert.ok("runner_class" in a);
44
+ assert.ok("delta_pct_x100" in a);
45
+ assert.ok("build_ok" in a);
46
+ assert.ok("agent_system" in a);
47
+ // camelCase versions should NOT be present
48
+ assert.ok(!("metricName" in a));
49
+ assert.ok(!("runnerClass" in a));
50
+ });
51
+
52
+ it("omits description when undefined", () => {
53
+ const a = buildPublishAction(defaults) as unknown as Record<string, unknown>;
54
+ assert.equal("description" in a, false);
55
+ });
56
+
57
+ it("includes description when provided", () => {
58
+ const a = buildPublishAction({ ...defaults, description: "test" }) as unknown as Record<string, unknown>;
59
+ assert.equal(a.description, "test");
60
+ });
61
+
62
+ it("omits os/cpu when empty string", () => {
63
+ const a = buildPublishAction(defaults) as unknown as Record<string, unknown>;
64
+ assert.equal("os" in a, false);
65
+ assert.equal("cpu" in a, false);
66
+ });
67
+
68
+ it("includes os/cpu when set", () => {
69
+ const a = buildPublishAction({ ...defaults, os: "linux", cpu: "x86" }) as unknown as Record<string, unknown>;
70
+ assert.equal(a.os, "linux");
71
+ assert.equal(a.cpu, "x86");
72
+ });
73
+
74
+ it("omits empty secondary_metrics", () => {
75
+ const a = buildPublishAction(defaults) as unknown as Record<string, unknown>;
76
+ assert.equal("secondary_metrics" in a, false);
77
+ });
78
+
79
+ it("includes non-empty secondary_metrics", () => {
80
+ const a = buildPublishAction({ ...defaults, secondaryMetrics: [makeMetricValue()] }) as unknown as Record<string, unknown>;
81
+ assert.ok("secondary_metrics" in a);
82
+ });
83
+
84
+ it("takes Math.abs of negative delta", () => {
85
+ const a = buildPublishAction({ ...defaults, deltaPctX100: -500 }) as unknown as Record<string, unknown>;
86
+ assert.equal(a.delta_pct_x100, 500);
87
+ });
88
+
89
+ it("includes parents field for GC safety", () => {
90
+ const a = buildPublishAction({ ...defaults, base: "aaa", oid: "bbb" }) as unknown as Record<string, unknown>;
91
+ assert.deepEqual(a.parents, ["aaa", "bbb"]);
92
+ });
93
+ });
94
+
95
+ describe("buildReproduceAction", () => {
96
+ const defaults = {
97
+ verdict: "confirmed" as const,
98
+ runnerClass: "amd64",
99
+ baseline: makeMeasurement(), candidate: makeMeasurement(),
100
+ deltaPctX100: 100, buildOk: true, testsOk: true,
101
+ };
102
+
103
+ it("sets type to reproduce", () => {
104
+ const a = buildReproduceAction(defaults) as unknown as Record<string, unknown>;
105
+ assert.equal(a.type, "reproduce");
106
+ });
107
+
108
+ it("omits notes when undefined", () => {
109
+ const a = buildReproduceAction(defaults) as unknown as Record<string, unknown>;
110
+ assert.equal("notes" in a, false);
111
+ });
112
+
113
+ it("includes notes when provided", () => {
114
+ const a = buildReproduceAction({ ...defaults, notes: "test" }) as unknown as Record<string, unknown>;
115
+ assert.equal(a.notes, "test");
116
+ });
117
+
118
+ it("takes Math.abs of negative delta", () => {
119
+ const a = buildReproduceAction({ ...defaults, deltaPctX100: -300 }) as unknown as Record<string, unknown>;
120
+ assert.equal(a.delta_pct_x100, 300);
121
+ });
122
+ });
@@ -0,0 +1,138 @@
1
+ // Tests for the rad-cob-experiment binary as a black-box process.
2
+ // Spawns the helper, pipes JSON Lines to stdin, verifies stdout output.
3
+
4
+ import { describe, it } from "node:test";
5
+ import assert from "node:assert/strict";
6
+ import { execFileSync } from "node:child_process";
7
+ import { fileURLToPath } from "node:url";
8
+ import { dirname, join } from "node:path";
9
+
10
+ import type { Experiment, OpMessage } from "../types.js";
11
+ import {
12
+ makeOp,
13
+ makeRawPublishAction,
14
+ makeRawReproduceAction,
15
+ } from "./fixtures.js";
16
+
17
+ const __dirname = dirname(fileURLToPath(import.meta.url));
18
+ const HELPER = join(__dirname, "..", "rad-cob-experiment.ts");
19
+ const TSX = join(__dirname, "..", "..", "node_modules", ".bin", "tsx");
20
+
21
+ /** Run the COB helper with input lines, return output lines. */
22
+ function runHelper(inputLines: string[]): string[] {
23
+ const input = inputLines.join("\n") + "\n";
24
+ const stdout = execFileSync(TSX, [HELPER], {
25
+ input,
26
+ encoding: "utf-8",
27
+ timeout: 5000,
28
+ });
29
+ return stdout.trim().split("\n").filter((l) => l.length > 0);
30
+ }
31
+
32
+ function makeOpMessage(overrides?: Partial<OpMessage>): OpMessage {
33
+ return {
34
+ value: {},
35
+ op: makeOp(),
36
+ concurrent: [],
37
+ ...overrides,
38
+ };
39
+ }
40
+
41
+ describe("cob-protocol: single publish (root creation)", () => {
42
+ it("returns valid Experiment state", () => {
43
+ const msg = makeOpMessage();
44
+ const lines = runHelper([JSON.stringify(msg)]);
45
+ assert.equal(lines.length, 1);
46
+
47
+ const exp: Experiment = JSON.parse(lines[0]);
48
+ assert.equal(exp.metricName, "wall_time");
49
+ assert.equal(exp.deltaPctX100, 2378);
50
+ assert.equal(exp.runnerClass, "arm64");
51
+ assert.deepEqual(exp.reproductions, []);
52
+ assert.ok(exp.author.id.startsWith("did:key:"));
53
+ });
54
+ });
55
+
56
+ describe("cob-protocol: multi-message sequence (publish then reproduce)", () => {
57
+ it("accumulates reproductions across messages", () => {
58
+ // Message 1: create experiment
59
+ const msg1 = makeOpMessage();
60
+ const line1 = runHelper([JSON.stringify(msg1)]);
61
+ const state1: Experiment = JSON.parse(line1[0]);
62
+ assert.equal(state1.reproductions.length, 0);
63
+
64
+ // Message 2: add reproduction (value = output of message 1)
65
+ const msg2 = makeOpMessage({
66
+ value: state1 as unknown as Record<string, unknown>,
67
+ op: makeOp({ actions: [makeRawReproduceAction()] }),
68
+ });
69
+ const line2 = runHelper([JSON.stringify(msg2)]);
70
+ const state2: Experiment = JSON.parse(line2[0]);
71
+ assert.equal(state2.reproductions.length, 1);
72
+ assert.equal(state2.reproductions[0].verdict, "confirmed");
73
+ assert.equal(state2.reproductions[0].runnerClass, "amd64");
74
+ });
75
+ });
76
+
77
+ describe("cob-protocol: concurrent ops", () => {
78
+ it("applies concurrent reproductions", () => {
79
+ const msg = makeOpMessage({
80
+ concurrent: [
81
+ makeOp({ actions: [makeRawReproduceAction({ verdict: "confirmed" })] }),
82
+ makeOp({ actions: [makeRawReproduceAction({ verdict: "failed" })] }),
83
+ ],
84
+ });
85
+ const lines = runHelper([JSON.stringify(msg)]);
86
+ const exp: Experiment = JSON.parse(lines[0]);
87
+ assert.equal(exp.reproductions.length, 2);
88
+ });
89
+ });
90
+
91
+ describe("cob-protocol: multiple JSON Lines in one session", () => {
92
+ it("processes each line independently", () => {
93
+ const msg1 = makeOpMessage();
94
+ const msg2 = makeOpMessage({
95
+ op: makeOp({ actions: [makeRawPublishAction({ description: "second" })] }),
96
+ });
97
+ const lines = runHelper([JSON.stringify(msg1), JSON.stringify(msg2)]);
98
+ assert.equal(lines.length, 2);
99
+
100
+ const exp1: Experiment = JSON.parse(lines[0]);
101
+ const exp2: Experiment = JSON.parse(lines[1]);
102
+ assert.equal(exp1.description, "SIMD vectorization in parser loop");
103
+ assert.equal(exp2.description, "second");
104
+ });
105
+ });
106
+
107
+ describe("cob-protocol: empty lines are skipped", () => {
108
+ it("only produces output for valid JSON Lines", () => {
109
+ const msg = makeOpMessage();
110
+ const lines = runHelper(["", JSON.stringify(msg), "", ""]);
111
+ assert.equal(lines.length, 1);
112
+ });
113
+ });
114
+
115
+ describe("cob-protocol: error handling", () => {
116
+ it("exits non-zero on invalid JSON", () => {
117
+ assert.throws(
118
+ () => runHelper(["not valid json"]),
119
+ (err: unknown) => {
120
+ const e = err as { status: number; stderr: Buffer };
121
+ return e.status !== 0 && e.stderr.toString().includes("rad-cob-experiment:");
122
+ },
123
+ );
124
+ });
125
+
126
+ it("exits non-zero when first action is not publish", () => {
127
+ const msg = makeOpMessage({
128
+ op: makeOp({ actions: [makeRawReproduceAction()] }),
129
+ });
130
+ assert.throws(
131
+ () => runHelper([JSON.stringify(msg)]),
132
+ (err: unknown) => {
133
+ const e = err as { status: number; stderr: Buffer };
134
+ return e.status !== 0 && e.stderr.toString().includes("publish");
135
+ },
136
+ );
137
+ });
138
+ });