akm-cli 0.7.0-rc1 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (83) hide show
  1. package/dist/src/cli.js +100 -16
  2. package/dist/src/commands/config-cli.js +42 -0
  3. package/dist/src/commands/history.js +78 -7
  4. package/dist/src/commands/registry-search.js +69 -6
  5. package/dist/src/commands/search.js +30 -3
  6. package/dist/src/commands/show.js +29 -0
  7. package/dist/src/commands/source-add.js +5 -1
  8. package/dist/src/commands/source-manage.js +7 -1
  9. package/dist/src/core/config.js +28 -0
  10. package/dist/src/indexer/db-search.js +1 -0
  11. package/dist/src/indexer/indexer.js +16 -2
  12. package/dist/src/indexer/matchers.js +1 -1
  13. package/dist/src/indexer/search-source.js +4 -2
  14. package/dist/src/integrations/agent/profiles.js +1 -1
  15. package/dist/src/integrations/agent/spawn.js +67 -16
  16. package/dist/src/integrations/github.js +9 -3
  17. package/dist/src/llm/embedders/remote.js +37 -3
  18. package/dist/src/output/cli-hints.js +15 -2
  19. package/dist/src/output/renderers.js +3 -1
  20. package/dist/src/output/shapes.js +8 -1
  21. package/dist/src/output/text.js +156 -3
  22. package/dist/src/registry/build-index.js +5 -4
  23. package/dist/src/registry/providers/static-index.js +3 -1
  24. package/dist/src/setup/setup.js +9 -0
  25. package/dist/src/wiki/wiki.js +54 -6
  26. package/dist/src/workflows/runs.js +37 -3
  27. package/dist/tests/architecture/agent-no-llm-sdk-guard.test.js +1 -1
  28. package/dist/tests/bench/attribution.test.js +24 -23
  29. package/dist/tests/bench/cleanup.js +31 -0
  30. package/dist/tests/bench/cli.js +366 -31
  31. package/dist/tests/bench/cli.test.js +282 -14
  32. package/dist/tests/bench/corpus.js +3 -0
  33. package/dist/tests/bench/corpus.test.js +10 -10
  34. package/dist/tests/bench/doctor.js +525 -0
  35. package/dist/tests/bench/driver.js +77 -22
  36. package/dist/tests/bench/driver.test.js +142 -1
  37. package/dist/tests/bench/environment.js +233 -0
  38. package/dist/tests/bench/environment.test.js +199 -0
  39. package/dist/tests/bench/evolve.js +67 -0
  40. package/dist/tests/bench/evolve.test.js +12 -4
  41. package/dist/tests/bench/failure-modes.test.js +52 -3
  42. package/dist/tests/bench/feedback-integrity.test.js +3 -2
  43. package/dist/tests/bench/leakage.test.js +105 -2
  44. package/dist/tests/bench/learning-curve.test.js +3 -2
  45. package/dist/tests/bench/metrics.js +102 -26
  46. package/dist/tests/bench/metrics.test.js +10 -4
  47. package/dist/tests/bench/opencode-config.js +194 -0
  48. package/dist/tests/bench/opencode-config.test.js +370 -0
  49. package/dist/tests/bench/report.js +73 -9
  50. package/dist/tests/bench/report.test.js +59 -10
  51. package/dist/tests/bench/run-config.js +355 -0
  52. package/dist/tests/bench/run-config.test.js +298 -0
  53. package/dist/tests/bench/run-curate-test.js +32 -0
  54. package/dist/tests/bench/run-failing-tasks.js +56 -0
  55. package/dist/tests/bench/run-full-bench.js +51 -0
  56. package/dist/tests/bench/run-items36-targeted.js +69 -0
  57. package/dist/tests/bench/run-nano-quick.js +42 -0
  58. package/dist/tests/bench/run-waveg-targeted.js +62 -0
  59. package/dist/tests/bench/runner.js +257 -94
  60. package/dist/tests/bench/tmp.js +90 -0
  61. package/dist/tests/bench/trajectory.js +2 -2
  62. package/dist/tests/bench/verifier.js +6 -1
  63. package/dist/tests/bench/workflow-spec.js +11 -24
  64. package/dist/tests/bench/workflow-spec.test.js +1 -1
  65. package/dist/tests/bench/workflow-trace.js +34 -0
  66. package/dist/tests/cli-errors.test.js +1 -0
  67. package/dist/tests/commands/history.test.js +195 -0
  68. package/dist/tests/config.test.js +25 -0
  69. package/dist/tests/e2e.test.js +23 -2
  70. package/dist/tests/fixtures/stashes/load.js +1 -1
  71. package/dist/tests/fixtures/stashes/load.test.js +11 -2
  72. package/dist/tests/indexer.test.js +12 -1
  73. package/dist/tests/output-baseline.test.js +2 -1
  74. package/dist/tests/output-shapes-unit.test.js +3 -1
  75. package/dist/tests/registry-build-index.test.js +17 -1
  76. package/dist/tests/registry-providers/static-index.test.js +34 -0
  77. package/dist/tests/registry-search.test.js +200 -0
  78. package/dist/tests/remember-frontmatter.test.js +11 -13
  79. package/dist/tests/source-qa-fixes.test.js +18 -0
  80. package/dist/tests/source-registry.test.js +3 -3
  81. package/dist/tests/source-source.test.js +61 -1
  82. package/dist/tests/workflow-qa-fixes.test.js +18 -0
  83. package/package.json +1 -1
@@ -286,6 +286,147 @@ describe("runOne", () => {
286
286
  process.env.AKM_STASH_DIR = prior;
287
287
  }
288
288
  });
289
+ // ── opencodeProviders: materialise tests ──────────────────────────────────
290
+ test("runOne with opencodeProviders writes opencode.json into OPENCODE_CONFIG before spawn", async () => {
291
+ // We need to capture the OPENCODE_CONFIG path from the child env to
292
+ // check the file was written. We do this by saving it from the spawn
293
+ // invocation then checking AFTER the run returns (before dir teardown
294
+ // occurs — note: driver tears down dirs in finally; but we copy the path
295
+ // from the invocation). Actually: dirs are torn down in the driver's
296
+ // finally block AFTER runAgent returns, so by the time our fake spawn
297
+ // is called the file SHOULD be present. We check via a closure.
298
+ let capturedOpencodeCfgDir;
299
+ let fileExistedAtSpawnTime = false;
300
+ const checkingSpawn = (cmd, options) => {
301
+ // Capture the OPENCODE_CONFIG dir from the child env.
302
+ const env = options.env;
303
+ if (env?.OPENCODE_CONFIG) {
304
+ capturedOpencodeCfgDir = env.OPENCODE_CONFIG;
305
+ // OPENCODE_CONFIG now points directly to the opencode.json file.
306
+ fileExistedAtSpawnTime = require("node:fs").existsSync(env.OPENCODE_CONFIG);
307
+ }
308
+ // Behave like the normal fake (agent exits 0, stdout = "ok").
309
+ const { spawn: inner } = scriptedSpawn({ exitCode: 0, stdout: "ok" });
310
+ return inner(cmd, options);
311
+ };
312
+ const fakeProviders = {
313
+ source: "/fake/providers.json",
314
+ providers: {
315
+ testprov: {
316
+ npm: "@ai-sdk/openai-compatible",
317
+ options: { baseURL: "http://localhost:9999/v1" },
318
+ },
319
+ },
320
+ defaultModel: "testprov/my-model",
321
+ };
322
+ const result = await runOne({
323
+ ...baseOptions,
324
+ workspace,
325
+ model: "testprov/my-model",
326
+ spawn: checkingSpawn,
327
+ opencodeProviders: fakeProviders,
328
+ });
329
+ // The run should succeed or fail on the verifier — the key thing is it
330
+ // is not harness_error from the provider materialise step.
331
+ expect(result.outcome).not.toBe("harness_error");
332
+ // The file MUST have existed at spawn time.
333
+ expect(fileExistedAtSpawnTime).toBe(true);
334
+ // Regression: the OPENCODE_CONFIG dir is torn down after the run.
335
+ if (capturedOpencodeCfgDir) {
336
+ // Dir should be cleaned up by the driver's finally block.
337
+ // (We can't assert it's gone because the test itself runs in the same
338
+ // process; just verify the captured path was non-empty.)
339
+ expect(capturedOpencodeCfgDir.length).toBeGreaterThan(0);
340
+ }
341
+ });
342
+ test("runOne WITHOUT opencodeProviders writes minimal stub to OPENCODE_CONFIG (regression guard)", async () => {
343
+ let capturedDir;
344
+ let filesAtSpawnTime = [];
345
+ let stubContent;
346
+ const checkingSpawn = (cmd, options) => {
347
+ const env = options.env;
348
+ if (env?.OPENCODE_CONFIG) {
349
+ capturedDir = env.OPENCODE_CONFIG;
350
+ try {
351
+ // OPENCODE_CONFIG points to the file, so read it directly.
352
+ stubContent = require("node:fs").readFileSync(env.OPENCODE_CONFIG, "utf8");
353
+ filesAtSpawnTime = ["opencode.json"];
354
+ }
355
+ catch {
356
+ filesAtSpawnTime = [];
357
+ }
358
+ }
359
+ const { spawn: inner } = scriptedSpawn({ exitCode: 0, stdout: "ok" });
360
+ return inner(cmd, options);
361
+ };
362
+ await runOne({
363
+ ...baseOptions,
364
+ workspace,
365
+ spawn: checkingSpawn,
366
+ // No opencodeProviders
367
+ });
368
+ expect(capturedDir).toBeDefined();
369
+ // Without opencodeProviders, the driver writes a minimal stub opencode.json.
370
+ expect(filesAtSpawnTime).toEqual(["opencode.json"]);
371
+ expect(stubContent).toBeDefined();
372
+ const parsed = JSON.parse(stubContent ?? "{}");
373
+ expect(parsed.$schema).toBe("https://opencode.ai/config.json");
374
+ expect(parsed.provider).toBeUndefined();
375
+ });
376
+ test("runOne falls back to model-only stub when provider prefix not in map (cloud/built-in models)", async () => {
377
+ // "opencode" is a BUILTIN_CLOUD_PREFIX — not in fakeProviders — should write
378
+ // a model-only stub and proceed rather than returning harness_error. Built-in
379
+ // cloud models like "opencode/big-pickle" resolve via opencode's own registry
380
+ // and do NOT need a custom provider entry.
381
+ const fakeProviders = {
382
+ source: "/fake/providers.json",
383
+ providers: { myprov: {} },
384
+ };
385
+ let stubContent;
386
+ const checkingSpawn = (cmd, options) => {
387
+ const env = options.env;
388
+ if (env?.OPENCODE_CONFIG) {
389
+ try {
390
+ stubContent = require("node:fs").readFileSync(env.OPENCODE_CONFIG, "utf8");
391
+ }
392
+ catch {
393
+ /* file may not exist */
394
+ }
395
+ }
396
+ const { spawn: inner } = scriptedSpawn({ exitCode: 0, stdout: "ok" });
397
+ return inner(cmd, options);
398
+ };
399
+ const result = await runOne({
400
+ ...baseOptions,
401
+ workspace,
402
+ model: "opencode/big-pickle",
403
+ spawn: checkingSpawn,
404
+ opencodeProviders: fakeProviders,
405
+ });
406
+ // Should NOT be harness_error — built-in cloud prefix falls back to stub.
407
+ expect(result.outcome).not.toBe("harness_error");
408
+ // The written stub should have model key but no provider block.
409
+ expect(stubContent).toBeDefined();
410
+ const written = JSON.parse(stubContent ?? "{}");
411
+ expect(written.model).toBe("opencode/big-pickle");
412
+ expect(written.provider).toBeUndefined();
413
+ });
414
+ test("harness_error: custom provider prefix without opencodeProviders refuses to run", async () => {
415
+ // "shredder/qwen/qwen3.5-9b" has a custom prefix. Without opencodeProviders,
416
+ // opencode would silently fall back to a cloud model and burn API credits.
417
+ // The harness must refuse to run rather than allow that.
418
+ const { spawn } = scriptedSpawn({ exitCode: 0, stdout: "ok" });
419
+ const result = await runOne({
420
+ ...baseOptions,
421
+ workspace,
422
+ model: "shredder/qwen/qwen3.5-9b",
423
+ spawn,
424
+ // opencodeProviders deliberately omitted
425
+ });
426
+ expect(result.outcome).toBe("harness_error");
427
+ // Error surfaces via setupBenchEnvironment (may be wrapped in "environment setup failed")
428
+ expect(result.verifierStdout).toMatch(/custom provider prefix|environment setup failed/);
429
+ });
289
430
  });
290
431
  describe("driver helpers", () => {
291
432
  test("createIsolationDirs creates four dirs under a single root", () => {
@@ -359,7 +500,7 @@ describe("driver helpers", () => {
359
500
  const env = buildIsolatedEnv(dirs, "model-x");
360
501
  expect(env.XDG_CACHE_HOME).toBe(dirs.cacheHome);
361
502
  expect(env.XDG_CONFIG_HOME).toBe(dirs.configHome);
362
- expect(env.OPENCODE_CONFIG).toBe(dirs.opencodeConfig);
503
+ expect(env.OPENCODE_CONFIG).toBe(path.join(dirs.opencodeConfig, "opencode.json"));
363
504
  expect(env.AKM_STASH_DIR).toBe("/tmp/stash");
364
505
  expect(env.BENCH_OPENCODE_MODEL).toBe("model-x");
365
506
  }
@@ -0,0 +1,233 @@
1
+ /**
2
+ * environment.ts — unified bench environment setup.
3
+ *
4
+ * `setupBenchEnvironment` is the single function that owns all per-run
5
+ * isolation: isolation dirs, opencode.json, akm config, FTS5 index. Both
6
+ * `runOne` (driver.ts) and the doctor's live-run check call this function,
7
+ * guaranteeing they produce identical environments.
8
+ *
9
+ * Key design decisions:
10
+ * - `BENCH_OPENCODE_INVARIANTS` (plugin:[], permission block) are always
11
+ * written — they are bench isolation invariants, not conditional on the
12
+ * provider path. No silent stub fallbacks.
13
+ * - `dryRun: true` skips the akm config and index writes. Unit tests set
14
+ * this so the setup path is exercised without spawning a real agent.
15
+ * - `validateFixtureCorpus` is called at bench startup to catch missing
16
+ * fixtures before any work items start, not per-task mid-run.
17
+ */
18
+ import fs from "node:fs";
19
+ import path from "node:path";
20
+ import { buildIsolatedEnv, buildSanitizedEnvSource, createIsolationDirs } from "./driver";
21
+ import { BenchConfigError, selectProviderForModel } from "./opencode-config";
22
+ import { benchMkdtemp } from "./tmp";
23
+ // ── Bench isolation invariants ───────────────────────────────────────────────
24
+ /**
25
+ * Top-level keys written unconditionally into every bench-generated
26
+ * opencode.json. These are isolation invariants — never conditional on
27
+ * provider resolution or model type.
28
+ *
29
+ * - `plugin: []` — prevents operator plugins (akm-opencode, etc.) from
30
+ * running lifecycle hooks that override AKM_STASH_DIR, warm indexes
31
+ * against the wrong stash, or prompt akm setup wizards.
32
+ * - `permission` — opencode in non-interactive (`opencode run`) mode
33
+ * silently skips tool calls without explicit permission grants.
34
+ */
35
+ export const BENCH_OPENCODE_INVARIANTS = {
36
+ plugin: [],
37
+ permission: {
38
+ bash: "allow",
39
+ edit: "allow",
40
+ write: "allow",
41
+ read: "allow",
42
+ webfetch: "allow",
43
+ },
44
+ };
45
+ // ── Built-in cloud prefixes ──────────────────────────────────────────────────
46
+ /**
47
+ * opencode provider prefixes that resolve via its built-in cloud-provider
48
+ * registry. Models with one of these prefixes do not need a custom provider
49
+ * entry in the bench providers JSON. Models with any other prefix require
50
+ * `opencodeProviders` — the harness refuses to run without it to prevent
51
+ * silent cloud-model fallback and unexpected API charges.
52
+ */
53
+ export const BUILTIN_CLOUD_PREFIXES = new Set([
54
+ "anthropic",
55
+ "openai",
56
+ "openrouter",
57
+ "opencode",
58
+ "google",
59
+ "amazon",
60
+ "azure",
61
+ "vertex",
62
+ "bedrock",
63
+ "mistral",
64
+ "groq",
65
+ "together",
66
+ "fireworks",
67
+ ]);
68
+ /**
69
+ * Write an `opencode.json` into `opencodeConfigDir`.
70
+ *
71
+ * Always includes `BENCH_OPENCODE_INVARIANTS` (plugin:[], permission block).
72
+ * When `providers` is supplied and the model prefix resolves, the `provider`
73
+ * block is added. When the prefix is not found in the providers map (built-in
74
+ * cloud model), the file is written without a provider block and a warning is
75
+ * returned — this is not an error because built-in cloud models resolve via
76
+ * opencode's own registry.
77
+ *
78
+ * Returns a `WriteOpencodeJsonResult` — never throws for expected cases.
79
+ * Throws for unexpected FS errors.
80
+ */
81
+ export function writeOpencodeJson(opencodeConfigDir, model, providers) {
82
+ const warnings = [];
83
+ let providerKey;
84
+ let providerBlock;
85
+ if (providers) {
86
+ try {
87
+ const selected = selectProviderForModel(providers, model);
88
+ providerKey = selected.providerKey;
89
+ providerBlock = { [selected.providerKey]: selected.entry };
90
+ }
91
+ catch (err) {
92
+ if (err instanceof BenchConfigError) {
93
+ // Check if this is a local-provider model that MUST have a provider block.
94
+ const modelPrefix = model.split("/")[0];
95
+ if (modelPrefix && !BUILTIN_CLOUD_PREFIXES.has(modelPrefix)) {
96
+ // Local-prefix model not in providers map — this is a hard error, not a
97
+ // fallback. Writing opencode.json without a provider block would cause
98
+ // opencode to use cloud resolution, skewing results and incurring costs.
99
+ throw new BenchConfigError(`model "${model}" uses local prefix "${modelPrefix}" but was not found in the providers config. ` +
100
+ `Add it to the providers file or use a built-in cloud model prefix.`, true);
101
+ }
102
+ warnings.push(`model "${model}" not found in providers config; writing stub (expected for built-in cloud models)`);
103
+ }
104
+ else {
105
+ throw err;
106
+ }
107
+ }
108
+ }
109
+ const config = {
110
+ $schema: "https://opencode.ai/config.json",
111
+ model,
112
+ ...BENCH_OPENCODE_INVARIANTS,
113
+ ...(providerBlock ? { provider: providerBlock } : {}),
114
+ };
115
+ fs.writeFileSync(path.join(opencodeConfigDir, "opencode.json"), JSON.stringify(config, null, 2), { mode: 0o600 });
116
+ return { providerKey, warnings };
117
+ }
118
+ /**
119
+ * Set up a complete bench run environment.
120
+ *
121
+ * 1. Creates isolation dirs (XDG_CACHE_HOME, XDG_CONFIG_HOME, OPENCODE_CONFIG).
122
+ * 2. Writes opencode.json with BENCH_OPENCODE_INVARIANTS + optional provider.
123
+ * 3. Writes $XDG_CONFIG_HOME/akm/config.json so the akm CLI and any plugin
124
+ * find the correct stash via `akm config get stashDir`.
125
+ * 4. Copies the pre-built FTS5 index into XDG_CACHE_HOME, or re-indexes as
126
+ * fallback if no pre-built cache is available.
127
+ *
128
+ * Throws `BenchConfigError` for model prefix / provider mismatches.
129
+ */
130
+ export function setupBenchEnvironment(params) {
131
+ const { model, arm, stashDir: rawStashDir, indexCacheHome, providers, dryRun = false, warnings = [] } = params;
132
+ // Synthetic arm must never carry a stash.
133
+ const stashDir = arm === "synthetic" ? undefined : rawStashDir;
134
+ // Safety: refuse to run local-provider models without a providers config.
135
+ const modelParts = model.split("/");
136
+ if (modelParts.length >= 2 && !BUILTIN_CLOUD_PREFIXES.has(modelParts[0]) && !providers) {
137
+ throw new BenchConfigError(`model "${model}" uses custom provider prefix "${modelParts[0]}" — supply opencodeProviders to avoid silent fallback to a cloud model`, false);
138
+ }
139
+ const dirs = createIsolationDirs(stashDir);
140
+ const env = buildIsolatedEnv(dirs, model);
141
+ // Synthetic arm must not carry AKM_STASH_DIR even if createIsolationDirs
142
+ // somehow set it (recurrence guard for the #243 fixup pattern).
143
+ if (arm === "synthetic") {
144
+ delete env.AKM_STASH_DIR;
145
+ }
146
+ // Write opencode.json with invariants + optional provider block.
147
+ const result = writeOpencodeJson(dirs.opencodeConfig, model, providers);
148
+ for (const w of result.warnings)
149
+ warnings.push(w);
150
+ // Wire akm config and index only when a real stash is on disk.
151
+ const stashOnDisk = stashDir ? fs.existsSync(stashDir) : false;
152
+ if (stashDir && stashOnDisk && !dryRun) {
153
+ // akm config: so `akm config get stashDir` returns the fixture path
154
+ // and the akm-opencode plugin (if somehow re-enabled) injects the right
155
+ // AKM_STASH_DIR into the bash-tool env via its shell.env hook.
156
+ const akmConfigDir = path.join(dirs.configHome, "akm");
157
+ fs.mkdirSync(akmConfigDir, { recursive: true });
158
+ fs.writeFileSync(path.join(akmConfigDir, "config.json"), JSON.stringify({ stashDir }), { mode: 0o600 });
159
+ // FTS5 index: fast-path copy from pre-built cache; slow-path re-index.
160
+ const destAkmDir = path.join(dirs.cacheHome, "akm");
161
+ fs.mkdirSync(destAkmDir, { recursive: true });
162
+ if (indexCacheHome) {
163
+ const srcAkmDir = path.join(indexCacheHome, "akm");
164
+ try {
165
+ for (const entry of fs.readdirSync(srcAkmDir)) {
166
+ fs.copyFileSync(path.join(srcAkmDir, entry), path.join(destAkmDir, entry));
167
+ }
168
+ }
169
+ catch (err) {
170
+ warnings.push(`index copy failed, falling back to re-index: ${err.message}`);
171
+ _runAkmIndex(stashDir, env);
172
+ }
173
+ }
174
+ else {
175
+ _runAkmIndex(stashDir, env);
176
+ }
177
+ }
178
+ return {
179
+ dirs,
180
+ env,
181
+ teardown() {
182
+ try {
183
+ fs.rmSync(dirs.root, { recursive: true, force: true });
184
+ }
185
+ catch {
186
+ /* swallow */
187
+ }
188
+ },
189
+ };
190
+ }
191
+ function _runAkmIndex(stashDir, env) {
192
+ const cliEntry = path.resolve(__dirname, "..", "..", "src", "cli.ts");
193
+ Bun.spawnSync({
194
+ cmd: ["bun", "run", cliEntry, "index", "--full"],
195
+ cwd: stashDir,
196
+ env: { ...buildSanitizedEnvSource(), ...env },
197
+ stdout: "pipe",
198
+ stderr: "pipe",
199
+ });
200
+ }
201
+ // ── validateFixtureCorpus ────────────────────────────────────────────────────
202
+ const FIXTURES_ROOT = path.resolve(__dirname, "..", "fixtures", "stashes");
203
+ /**
204
+ * Validate that all task stash references name fixtures that exist on disk
205
+ * (i.e. have a MANIFEST.json). Returns the set of missing fixture names.
206
+ *
207
+ * Call at bench startup before creating any work items. A non-empty `missing`
208
+ * set means those tasks will produce `harness_error` at run time — better to
209
+ * surface that now with named failures than to discover it per-seed.
210
+ */
211
+ export function validateFixtureCorpus(tasks) {
212
+ const byFixture = new Map();
213
+ for (const t of tasks) {
214
+ if (!byFixture.has(t.stash))
215
+ byFixture.set(t.stash, []);
216
+ byFixture.get(t.stash)?.push(t.id);
217
+ }
218
+ const valid = new Set();
219
+ const missing = new Map();
220
+ for (const [fixture, taskIds] of byFixture) {
221
+ const manifestPath = path.join(FIXTURES_ROOT, fixture, "MANIFEST.json");
222
+ if (fs.existsSync(manifestPath)) {
223
+ valid.add(fixture);
224
+ }
225
+ else {
226
+ missing.set(fixture, taskIds);
227
+ }
228
+ }
229
+ return { valid, missing };
230
+ }
231
+ // Re-export from driver for consumers that previously imported from there.
232
+ export { buildIsolatedEnv, buildSanitizedEnvSource, createIsolationDirs } from "./driver";
233
+ export { benchMkdtemp };
@@ -0,0 +1,199 @@
1
+ /**
2
+ * Tests for environment.ts — writeOpencodeJson, validateFixtureCorpus,
3
+ * BENCH_OPENCODE_INVARIANTS, and setupBenchEnvironment (dryRun mode).
4
+ */
5
+ import { afterAll, beforeAll, describe, expect, test } from "bun:test";
6
+ import fs from "node:fs";
7
+ import path from "node:path";
8
+ import { BENCH_OPENCODE_INVARIANTS, BUILTIN_CLOUD_PREFIXES, setupBenchEnvironment, validateFixtureCorpus, writeOpencodeJson, } from "./environment";
9
+ import { benchMkdtemp } from "./tmp";
10
+ // ── writeOpencodeJson ────────────────────────────────────────────────────────
11
+ describe("writeOpencodeJson", () => {
12
+ let tmp;
13
+ beforeAll(() => {
14
+ tmp = benchMkdtemp("bench-env-test-");
15
+ });
16
+ afterAll(() => {
17
+ fs.rmSync(tmp, { recursive: true, force: true });
18
+ });
19
+ test("always writes plugin:[] and permission block (isolation invariants)", () => {
20
+ const dir = path.join(tmp, "invariants");
21
+ fs.mkdirSync(dir, { recursive: true });
22
+ writeOpencodeJson(dir, "anthropic/claude-opus-4-7");
23
+ const config = JSON.parse(fs.readFileSync(path.join(dir, "opencode.json"), "utf8"));
24
+ expect(config.plugin).toEqual([]);
25
+ expect(config.permission?.bash).toBe("allow");
26
+ expect(config.permission?.edit).toBe("allow");
27
+ expect(config.permission?.write).toBe("allow");
28
+ });
29
+ test("writes provider block when model resolves in providers map", () => {
30
+ const dir = path.join(tmp, "with-provider");
31
+ fs.mkdirSync(dir, { recursive: true });
32
+ const providers = {
33
+ source: "/fake/providers.json",
34
+ providers: { myprov: { npm: "@ai-sdk/openai-compatible", name: "My Provider" } },
35
+ };
36
+ const result = writeOpencodeJson(dir, "myprov/my-model", providers);
37
+ expect(result.providerKey).toBe("myprov");
38
+ expect(result.warnings).toHaveLength(0);
39
+ const config = JSON.parse(fs.readFileSync(path.join(dir, "opencode.json"), "utf8"));
40
+ expect(config.provider?.myprov).toBeDefined();
41
+ expect(config.model).toBe("myprov/my-model");
42
+ });
43
+ test("writes stub (no provider block) and returns warning for built-in cloud model not in providers map", () => {
44
+ const dir = path.join(tmp, "cloud-stub");
45
+ fs.mkdirSync(dir, { recursive: true });
46
+ const providers = {
47
+ source: "/fake/providers.json",
48
+ providers: { otherprov: {} },
49
+ };
50
+ const result = writeOpencodeJson(dir, "opencode/big-pickle", providers);
51
+ expect(result.providerKey).toBeUndefined();
52
+ expect(result.warnings.length).toBeGreaterThan(0);
53
+ const config = JSON.parse(fs.readFileSync(path.join(dir, "opencode.json"), "utf8"));
54
+ expect(config.provider).toBeUndefined();
55
+ // Invariants still present.
56
+ expect(config.plugin).toEqual([]);
57
+ });
58
+ test("throws BenchConfigError for local-prefix model not found in providers map", () => {
59
+ const dir = path.join(tmp, "local-prefix-missing");
60
+ fs.mkdirSync(dir, { recursive: true });
61
+ const providers = {
62
+ source: "/fake/providers.json",
63
+ providers: { otherprov: {} },
64
+ };
65
+ // "shredder" is not in BUILTIN_CLOUD_PREFIXES and not in the providers map.
66
+ expect(() => writeOpencodeJson(dir, "shredder/qwen3.5-9b", providers)).toThrow(/local prefix/);
67
+ // The opencode.json must NOT have been written (or if partially written, provider block is absent).
68
+ // We check that the function threw rather than silently wrote a cloud-fallback stub.
69
+ });
70
+ test("writes provider block for local-prefix model that IS found in providers map", () => {
71
+ const dir = path.join(tmp, "local-prefix-found");
72
+ fs.mkdirSync(dir, { recursive: true });
73
+ const providers = {
74
+ source: "/fake/providers.json",
75
+ providers: { shredder: { npm: "@ai-sdk/openai-compatible", name: "Shredder" } },
76
+ };
77
+ const result = writeOpencodeJson(dir, "shredder/qwen3.5-9b", providers);
78
+ expect(result.providerKey).toBe("shredder");
79
+ expect(result.warnings).toHaveLength(0);
80
+ const config = JSON.parse(fs.readFileSync(path.join(dir, "opencode.json"), "utf8"));
81
+ expect(config.provider?.shredder).toBeDefined();
82
+ expect(config.model).toBe("shredder/qwen3.5-9b");
83
+ });
84
+ test("mode 0o600 (not world-readable)", () => {
85
+ const dir = path.join(tmp, "mode-check");
86
+ fs.mkdirSync(dir, { recursive: true });
87
+ writeOpencodeJson(dir, "anthropic/claude-opus-4-7");
88
+ const stat = fs.statSync(path.join(dir, "opencode.json"));
89
+ expect(stat.mode & 0o777).toBe(0o600);
90
+ });
91
+ });
92
+ // ── BENCH_OPENCODE_INVARIANTS ────────────────────────────────────────────────
93
+ describe("BENCH_OPENCODE_INVARIANTS", () => {
94
+ test("plugin is an empty readonly array", () => {
95
+ expect(BENCH_OPENCODE_INVARIANTS.plugin).toEqual([]);
96
+ expect(Array.isArray(BENCH_OPENCODE_INVARIANTS.plugin)).toBe(true);
97
+ });
98
+ test("permission.bash is 'allow'", () => {
99
+ expect(BENCH_OPENCODE_INVARIANTS.permission.bash).toBe("allow");
100
+ });
101
+ });
102
+ // ── BUILTIN_CLOUD_PREFIXES ───────────────────────────────────────────────────
103
+ describe("BUILTIN_CLOUD_PREFIXES", () => {
104
+ test("includes anthropic, openai, opencode", () => {
105
+ expect(BUILTIN_CLOUD_PREFIXES.has("anthropic")).toBe(true);
106
+ expect(BUILTIN_CLOUD_PREFIXES.has("openai")).toBe(true);
107
+ expect(BUILTIN_CLOUD_PREFIXES.has("opencode")).toBe(true);
108
+ });
109
+ test("does not include custom provider prefixes like 'shredder' or 'don'", () => {
110
+ expect(BUILTIN_CLOUD_PREFIXES.has("shredder")).toBe(false);
111
+ expect(BUILTIN_CLOUD_PREFIXES.has("don")).toBe(false);
112
+ });
113
+ });
114
+ // ── validateFixtureCorpus ────────────────────────────────────────────────────
115
+ describe("validateFixtureCorpus", () => {
116
+ test("returns known fixtures as valid", () => {
117
+ const tasks = [{ id: "az-cli/foo", stash: "az-cli" }];
118
+ const { valid, missing } = validateFixtureCorpus(tasks);
119
+ expect(valid.has("az-cli")).toBe(true);
120
+ expect(missing.size).toBe(0);
121
+ });
122
+ test("returns nonexistent fixture as missing with its task IDs", () => {
123
+ const tasks = [
124
+ { id: "ghost/task-1", stash: "ghost-fixture" },
125
+ { id: "ghost/task-2", stash: "ghost-fixture" },
126
+ ];
127
+ const { valid, missing } = validateFixtureCorpus(tasks);
128
+ expect(valid.has("ghost-fixture")).toBe(false);
129
+ expect(missing.has("ghost-fixture")).toBe(true);
130
+ expect(missing.get("ghost-fixture")).toEqual(["ghost/task-1", "ghost/task-2"]);
131
+ });
132
+ test("handles empty task list", () => {
133
+ const { valid, missing } = validateFixtureCorpus([]);
134
+ expect(valid.size).toBe(0);
135
+ expect(missing.size).toBe(0);
136
+ });
137
+ test("deduplicates fixture names across tasks", () => {
138
+ const tasks = [
139
+ { id: "az-cli/a", stash: "az-cli" },
140
+ { id: "az-cli/b", stash: "az-cli" },
141
+ { id: "az-cli/c", stash: "az-cli" },
142
+ ];
143
+ const { valid } = validateFixtureCorpus(tasks);
144
+ expect(valid.size).toBe(1);
145
+ });
146
+ });
147
+ // ── setupBenchEnvironment (dryRun) ───────────────────────────────────────────
148
+ describe("setupBenchEnvironment dryRun", () => {
149
+ test("creates isolation dirs and writes opencode.json with invariants", () => {
150
+ const env = setupBenchEnvironment({
151
+ model: "anthropic/claude-opus-4-7",
152
+ arm: "akm",
153
+ dryRun: true,
154
+ });
155
+ try {
156
+ expect(fs.existsSync(env.dirs.cacheHome)).toBe(true);
157
+ expect(fs.existsSync(env.dirs.configHome)).toBe(true);
158
+ expect(fs.existsSync(env.dirs.opencodeConfig)).toBe(true);
159
+ const config = JSON.parse(fs.readFileSync(path.join(env.dirs.opencodeConfig, "opencode.json"), "utf8"));
160
+ expect(config.plugin).toEqual([]);
161
+ expect(config.permission?.bash).toBe("allow");
162
+ }
163
+ finally {
164
+ env.teardown();
165
+ }
166
+ });
167
+ test("throws for custom provider prefix without providers config", () => {
168
+ expect(() => setupBenchEnvironment({
169
+ model: "shredder/qwen/qwen3.5-9b",
170
+ arm: "akm",
171
+ dryRun: true,
172
+ })).toThrow(/custom provider prefix/);
173
+ });
174
+ test("synthetic arm never sets AKM_STASH_DIR", () => {
175
+ const env = setupBenchEnvironment({
176
+ model: "anthropic/claude-opus-4-7",
177
+ arm: "synthetic",
178
+ stashDir: "/some/stash",
179
+ dryRun: true,
180
+ });
181
+ try {
182
+ expect(env.env.AKM_STASH_DIR).toBeUndefined();
183
+ }
184
+ finally {
185
+ env.teardown();
186
+ }
187
+ });
188
+ test("teardown removes the isolation dirs", () => {
189
+ const env = setupBenchEnvironment({
190
+ model: "anthropic/claude-opus-4-7",
191
+ arm: "akm",
192
+ dryRun: true,
193
+ });
194
+ const { root } = env.dirs;
195
+ expect(fs.existsSync(root)).toBe(true);
196
+ env.teardown();
197
+ expect(fs.existsSync(root)).toBe(false);
198
+ });
199
+ });