akm-cli 0.7.0 → 0.7.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +8 -8
- package/dist/tests/add-website-source.test.js +0 -119
- package/dist/tests/agent/agent-config-loader.test.js +0 -70
- package/dist/tests/agent/agent-config.test.js +0 -221
- package/dist/tests/agent/agent-detect.test.js +0 -100
- package/dist/tests/agent/agent-spawn.test.js +0 -234
- package/dist/tests/agent-output.test.js +0 -186
- package/dist/tests/architecture/agent-no-llm-sdk-guard.test.js +0 -103
- package/dist/tests/architecture/agent-spawn-seam.test.js +0 -193
- package/dist/tests/architecture/llm-stateless-seam.test.js +0 -112
- package/dist/tests/asset-ref.test.js +0 -192
- package/dist/tests/asset-registry.test.js +0 -103
- package/dist/tests/asset-spec.test.js +0 -241
- package/dist/tests/bench/attribution.test.js +0 -996
- package/dist/tests/bench/cleanup-sigint.test.js +0 -83
- package/dist/tests/bench/cleanup.js +0 -234
- package/dist/tests/bench/cleanup.test.js +0 -166
- package/dist/tests/bench/cli.js +0 -1018
- package/dist/tests/bench/cli.test.js +0 -445
- package/dist/tests/bench/compare.test.js +0 -556
- package/dist/tests/bench/corpus.js +0 -317
- package/dist/tests/bench/corpus.test.js +0 -258
- package/dist/tests/bench/doctor.js +0 -525
- package/dist/tests/bench/driver.js +0 -401
- package/dist/tests/bench/driver.test.js +0 -584
- package/dist/tests/bench/environment.js +0 -233
- package/dist/tests/bench/environment.test.js +0 -199
- package/dist/tests/bench/evolve-metrics.js +0 -179
- package/dist/tests/bench/evolve-metrics.test.js +0 -187
- package/dist/tests/bench/evolve.js +0 -647
- package/dist/tests/bench/evolve.test.js +0 -624
- package/dist/tests/bench/failure-modes.test.js +0 -349
- package/dist/tests/bench/feedback-integrity.test.js +0 -457
- package/dist/tests/bench/leakage.test.js +0 -228
- package/dist/tests/bench/learning-curve.test.js +0 -134
- package/dist/tests/bench/metrics.js +0 -2395
- package/dist/tests/bench/metrics.test.js +0 -1150
- package/dist/tests/bench/no-os-tmpdir-invariant.test.js +0 -43
- package/dist/tests/bench/opencode-config.js +0 -194
- package/dist/tests/bench/opencode-config.test.js +0 -370
- package/dist/tests/bench/report.js +0 -1885
- package/dist/tests/bench/report.test.js +0 -1038
- package/dist/tests/bench/run-config.js +0 -355
- package/dist/tests/bench/run-config.test.js +0 -298
- package/dist/tests/bench/run-curate-test.js +0 -32
- package/dist/tests/bench/run-failing-tasks.js +0 -56
- package/dist/tests/bench/run-full-bench.js +0 -51
- package/dist/tests/bench/run-items36-targeted.js +0 -69
- package/dist/tests/bench/run-nano-quick.js +0 -42
- package/dist/tests/bench/run-waveg-targeted.js +0 -62
- package/dist/tests/bench/runner.js +0 -699
- package/dist/tests/bench/runner.test.js +0 -958
- package/dist/tests/bench/search-bridge.test.js +0 -331
- package/dist/tests/bench/tmp.js +0 -131
- package/dist/tests/bench/trajectory.js +0 -116
- package/dist/tests/bench/trajectory.test.js +0 -127
- package/dist/tests/bench/verifier.js +0 -114
- package/dist/tests/bench/verifier.test.js +0 -118
- package/dist/tests/bench/workflow-evaluator.js +0 -557
- package/dist/tests/bench/workflow-evaluator.test.js +0 -421
- package/dist/tests/bench/workflow-spec.js +0 -345
- package/dist/tests/bench/workflow-spec.test.js +0 -363
- package/dist/tests/bench/workflow-trace.js +0 -472
- package/dist/tests/bench/workflow-trace.test.js +0 -254
- package/dist/tests/benchmark-search-quality.js +0 -536
- package/dist/tests/benchmark-suite.js +0 -1441
- package/dist/tests/capture-cli.test.js +0 -112
- package/dist/tests/cli-errors.test.js +0 -204
- package/dist/tests/commands/events.test.js +0 -370
- package/dist/tests/commands/history.test.js +0 -418
- package/dist/tests/commands/import.test.js +0 -103
- package/dist/tests/commands/proposal-cli.test.js +0 -209
- package/dist/tests/commands/reflect-propose-cli.test.js +0 -333
- package/dist/tests/commands/remember.test.js +0 -97
- package/dist/tests/commands/scope-flags.test.js +0 -300
- package/dist/tests/commands/search.test.js +0 -537
- package/dist/tests/commands/show-indexer-parity.test.js +0 -117
- package/dist/tests/commands/show.test.js +0 -294
- package/dist/tests/common.test.js +0 -266
- package/dist/tests/completions.test.js +0 -142
- package/dist/tests/config-cli.test.js +0 -193
- package/dist/tests/config-llm-features.test.js +0 -139
- package/dist/tests/config.test.js +0 -569
- package/dist/tests/contracts/migration-baseline.test.js +0 -43
- package/dist/tests/contracts/reflect-propose-envelope.test.js +0 -139
- package/dist/tests/contracts/spec-helpers.js +0 -46
- package/dist/tests/contracts/v1-spec-section-11-proposal-queue.test.js +0 -228
- package/dist/tests/contracts/v1-spec-section-12-agent-config.test.js +0 -56
- package/dist/tests/contracts/v1-spec-section-13-lesson-type.test.js +0 -34
- package/dist/tests/contracts/v1-spec-section-14-llm-features.test.js +0 -94
- package/dist/tests/contracts/v1-spec-section-4-1-asset-types.test.js +0 -39
- package/dist/tests/contracts/v1-spec-section-4-2-quality-rules.test.js +0 -44
- package/dist/tests/contracts/v1-spec-section-5-configuration.test.js +0 -47
- package/dist/tests/contracts/v1-spec-section-6-orchestration.test.js +0 -40
- package/dist/tests/contracts/v1-spec-section-7-module-layout.test.js +0 -58
- package/dist/tests/contracts/v1-spec-section-8-extension-points.test.js +0 -34
- package/dist/tests/contracts/v1-spec-section-9-4-cli-surface.test.js +0 -75
- package/dist/tests/contracts/v1-spec-section-9-7-llm-agent-boundary.test.js +0 -36
- package/dist/tests/core/write-source.test.js +0 -366
- package/dist/tests/curate-command.test.js +0 -87
- package/dist/tests/db-scoring.test.js +0 -201
- package/dist/tests/db.test.js +0 -654
- package/dist/tests/distill-cli-flag.test.js +0 -208
- package/dist/tests/distill.test.js +0 -515
- package/dist/tests/docker-install.test.js +0 -120
- package/dist/tests/e2e.test.js +0 -1419
- package/dist/tests/embedder.test.js +0 -340
- package/dist/tests/embedding-model-config.test.js +0 -379
- package/dist/tests/feedback-command.test.js +0 -172
- package/dist/tests/file-context.test.js +0 -552
- package/dist/tests/fixtures/scripts/git/summarize-diff.js +0 -9
- package/dist/tests/fixtures/scripts/lint/eslint-check.js +0 -7
- package/dist/tests/fixtures/stashes/load.js +0 -166
- package/dist/tests/fixtures/stashes/load.test.js +0 -97
- package/dist/tests/fixtures/stashes/ranking-baseline/scripts/mem0-search.js +0 -12
- package/dist/tests/frontmatter.test.js +0 -190
- package/dist/tests/fts-field-weighting.test.js +0 -254
- package/dist/tests/fuzzy-search.test.js +0 -230
- package/dist/tests/git-provider-clone.test.js +0 -45
- package/dist/tests/github.test.js +0 -161
- package/dist/tests/graph-boost-ranking.test.js +0 -305
- package/dist/tests/graph-extraction.test.js +0 -282
- package/dist/tests/helpers/usage-events.js +0 -8
- package/dist/tests/index-pass-llm.test.js +0 -161
- package/dist/tests/indexer.test.js +0 -570
- package/dist/tests/info-command.test.js +0 -166
- package/dist/tests/init.test.js +0 -69
- package/dist/tests/install-script.test.js +0 -246
- package/dist/tests/integration/agent-real-profile.test.js +0 -94
- package/dist/tests/issue-36-repro.test.js +0 -304
- package/dist/tests/issues-191-194.test.js +0 -160
- package/dist/tests/lesson-lint.test.js +0 -111
- package/dist/tests/llm-client.test.js +0 -115
- package/dist/tests/llm-feature-gate.test.js +0 -151
- package/dist/tests/llm.test.js +0 -139
- package/dist/tests/lockfile.test.js +0 -216
- package/dist/tests/manifest.test.js +0 -205
- package/dist/tests/markdown.test.js +0 -126
- package/dist/tests/matchers-unit.test.js +0 -189
- package/dist/tests/memory-inference.test.js +0 -299
- package/dist/tests/merge-scoring.test.js +0 -136
- package/dist/tests/metadata.test.js +0 -313
- package/dist/tests/migration-help.test.js +0 -89
- package/dist/tests/origin-resolve.test.js +0 -124
- package/dist/tests/output-baseline.test.js +0 -218
- package/dist/tests/output-shapes-unit.test.js +0 -478
- package/dist/tests/parallel-search.test.js +0 -272
- package/dist/tests/parameter-metadata.test.js +0 -365
- package/dist/tests/paths.test.js +0 -177
- package/dist/tests/progressive-disclosure.test.js +0 -280
- package/dist/tests/proposals.test.js +0 -279
- package/dist/tests/proposed-quality.test.js +0 -271
- package/dist/tests/provider-registry.test.js +0 -32
- package/dist/tests/ranking-regression.test.js +0 -548
- package/dist/tests/reflect-propose.test.js +0 -455
- package/dist/tests/registry-build-index.test.js +0 -394
- package/dist/tests/registry-cli.test.js +0 -290
- package/dist/tests/registry-index-v2.test.js +0 -430
- package/dist/tests/registry-install.test.js +0 -728
- package/dist/tests/registry-providers/parity.test.js +0 -189
- package/dist/tests/registry-providers/skills-sh.test.js +0 -309
- package/dist/tests/registry-providers/static-index.test.js +0 -238
- package/dist/tests/registry-resolve.test.js +0 -126
- package/dist/tests/registry-search.test.js +0 -923
- package/dist/tests/remember-frontmatter.test.js +0 -378
- package/dist/tests/remember-unit.test.js +0 -123
- package/dist/tests/ripgrep-install.test.js +0 -251
- package/dist/tests/ripgrep-resolve.test.js +0 -108
- package/dist/tests/ripgrep.test.js +0 -163
- package/dist/tests/save-command.test.js +0 -94
- package/dist/tests/save-trust-qa-fixes.test.js +0 -270
- package/dist/tests/scoring-pipeline.test.js +0 -648
- package/dist/tests/search-include-proposed-cli.test.js +0 -118
- package/dist/tests/self-update.test.js +0 -442
- package/dist/tests/semantic-search-e2e.test.js +0 -512
- package/dist/tests/semantic-status.test.js +0 -471
- package/dist/tests/setup-run.integration.js +0 -877
- package/dist/tests/setup-wizard.test.js +0 -198
- package/dist/tests/setup.test.js +0 -131
- package/dist/tests/source-add.test.js +0 -11
- package/dist/tests/source-clone.test.js +0 -254
- package/dist/tests/source-manage.test.js +0 -366
- package/dist/tests/source-providers/filesystem.test.js +0 -82
- package/dist/tests/source-providers/git.test.js +0 -252
- package/dist/tests/source-providers/website.test.js +0 -128
- package/dist/tests/source-qa-fixes.test.js +0 -286
- package/dist/tests/source-registry.test.js +0 -350
- package/dist/tests/source-resolve.test.js +0 -100
- package/dist/tests/source-source.test.js +0 -281
- package/dist/tests/source.test.js +0 -533
- package/dist/tests/tar-utils-scan.test.js +0 -73
- package/dist/tests/toggle-components.test.js +0 -73
- package/dist/tests/usage-telemetry.test.js +0 -265
- package/dist/tests/utility-scoring.test.js +0 -558
- package/dist/tests/vault-load-error.test.js +0 -78
- package/dist/tests/vault-qa-fixes.test.js +0 -194
- package/dist/tests/vault.test.js +0 -429
- package/dist/tests/vector-search.test.js +0 -608
- package/dist/tests/walker.test.js +0 -252
- package/dist/tests/wave2-cluster-bc.test.js +0 -228
- package/dist/tests/wave2-cluster-d.test.js +0 -180
- package/dist/tests/wave2-cluster-e.test.js +0 -179
- package/dist/tests/wiki-qa-fixes.test.js +0 -270
- package/dist/tests/wiki.test.js +0 -529
- package/dist/tests/workflow-cli.test.js +0 -271
- package/dist/tests/workflow-markdown.test.js +0 -171
- package/dist/tests/workflow-path-escape.test.js +0 -132
- package/dist/tests/workflow-qa-fixes.test.js +0 -395
- package/dist/tests/workflows/indexer-rejection.test.js +0 -213
- /package/dist/{src/cli.js → cli.js} +0 -0
- /package/dist/{src/commands → commands}/completions.js +0 -0
- /package/dist/{src/commands → commands}/config-cli.js +0 -0
- /package/dist/{src/commands → commands}/curate.js +0 -0
- /package/dist/{src/commands → commands}/distill.js +0 -0
- /package/dist/{src/commands → commands}/events.js +0 -0
- /package/dist/{src/commands → commands}/history.js +0 -0
- /package/dist/{src/commands → commands}/info.js +0 -0
- /package/dist/{src/commands → commands}/init.js +0 -0
- /package/dist/{src/commands → commands}/install-audit.js +0 -0
- /package/dist/{src/commands → commands}/installed-stashes.js +0 -0
- /package/dist/{src/commands → commands}/migration-help.js +0 -0
- /package/dist/{src/commands → commands}/proposal.js +0 -0
- /package/dist/{src/commands → commands}/propose.js +0 -0
- /package/dist/{src/commands → commands}/reflect.js +0 -0
- /package/dist/{src/commands → commands}/registry-search.js +0 -0
- /package/dist/{src/commands → commands}/remember.js +0 -0
- /package/dist/{src/commands → commands}/search.js +0 -0
- /package/dist/{src/commands → commands}/self-update.js +0 -0
- /package/dist/{src/commands → commands}/show.js +0 -0
- /package/dist/{src/commands → commands}/source-add.js +0 -0
- /package/dist/{src/commands → commands}/source-clone.js +0 -0
- /package/dist/{src/commands → commands}/source-manage.js +0 -0
- /package/dist/{src/commands → commands}/vault.js +0 -0
- /package/dist/{src/core → core}/asset-ref.js +0 -0
- /package/dist/{src/core → core}/asset-registry.js +0 -0
- /package/dist/{src/core → core}/asset-spec.js +0 -0
- /package/dist/{src/core → core}/common.js +0 -0
- /package/dist/{src/core → core}/config.js +0 -0
- /package/dist/{src/core → core}/errors.js +0 -0
- /package/dist/{src/core → core}/events.js +0 -0
- /package/dist/{src/core → core}/frontmatter.js +0 -0
- /package/dist/{src/core → core}/lesson-lint.js +0 -0
- /package/dist/{src/core → core}/markdown.js +0 -0
- /package/dist/{src/core → core}/paths.js +0 -0
- /package/dist/{src/core → core}/proposals.js +0 -0
- /package/dist/{src/core → core}/warn.js +0 -0
- /package/dist/{src/core → core}/write-source.js +0 -0
- /package/dist/{src/indexer → indexer}/db-search.js +0 -0
- /package/dist/{src/indexer → indexer}/db.js +0 -0
- /package/dist/{src/indexer → indexer}/file-context.js +0 -0
- /package/dist/{src/indexer → indexer}/graph-boost.js +0 -0
- /package/dist/{src/indexer → indexer}/graph-extraction.js +0 -0
- /package/dist/{src/indexer → indexer}/indexer.js +0 -0
- /package/dist/{src/indexer → indexer}/manifest.js +0 -0
- /package/dist/{src/indexer → indexer}/matchers.js +0 -0
- /package/dist/{src/indexer → indexer}/memory-inference.js +0 -0
- /package/dist/{src/indexer → indexer}/metadata.js +0 -0
- /package/dist/{src/indexer → indexer}/search-fields.js +0 -0
- /package/dist/{src/indexer → indexer}/search-source.js +0 -0
- /package/dist/{src/indexer → indexer}/semantic-status.js +0 -0
- /package/dist/{src/indexer → indexer}/usage-events.js +0 -0
- /package/dist/{src/indexer → indexer}/walker.js +0 -0
- /package/dist/{src/integrations → integrations}/agent/config.js +0 -0
- /package/dist/{src/integrations → integrations}/agent/detect.js +0 -0
- /package/dist/{src/integrations → integrations}/agent/index.js +0 -0
- /package/dist/{src/integrations → integrations}/agent/profiles.js +0 -0
- /package/dist/{src/integrations → integrations}/agent/prompts.js +0 -0
- /package/dist/{src/integrations → integrations}/agent/spawn.js +0 -0
- /package/dist/{src/integrations → integrations}/github.js +0 -0
- /package/dist/{src/integrations → integrations}/lockfile.js +0 -0
- /package/dist/{src/llm → llm}/client.js +0 -0
- /package/dist/{src/llm → llm}/embedder.js +0 -0
- /package/dist/{src/llm → llm}/embedders/cache.js +0 -0
- /package/dist/{src/llm → llm}/embedders/local.js +0 -0
- /package/dist/{src/llm → llm}/embedders/remote.js +0 -0
- /package/dist/{src/llm → llm}/embedders/types.js +0 -0
- /package/dist/{src/llm → llm}/feature-gate.js +0 -0
- /package/dist/{src/llm → llm}/graph-extract.js +0 -0
- /package/dist/{src/llm → llm}/index-passes.js +0 -0
- /package/dist/{src/llm → llm}/memory-infer.js +0 -0
- /package/dist/{src/llm → llm}/metadata-enhance.js +0 -0
- /package/dist/{src/output → output}/cli-hints.js +0 -0
- /package/dist/{src/output → output}/context.js +0 -0
- /package/dist/{src/output → output}/renderers.js +0 -0
- /package/dist/{src/output → output}/shapes.js +0 -0
- /package/dist/{src/output → output}/text.js +0 -0
- /package/dist/{src/registry → registry}/build-index.js +0 -0
- /package/dist/{src/registry → registry}/create-provider-registry.js +0 -0
- /package/dist/{src/registry → registry}/factory.js +0 -0
- /package/dist/{src/registry → registry}/origin-resolve.js +0 -0
- /package/dist/{src/registry → registry}/providers/index.js +0 -0
- /package/dist/{src/registry → registry}/providers/skills-sh.js +0 -0
- /package/dist/{src/registry → registry}/providers/static-index.js +0 -0
- /package/dist/{src/registry → registry}/providers/types.js +0 -0
- /package/dist/{src/registry → registry}/resolve.js +0 -0
- /package/dist/{src/registry → registry}/types.js +0 -0
- /package/dist/{src/setup → setup}/detect.js +0 -0
- /package/dist/{src/setup → setup}/ripgrep-install.js +0 -0
- /package/dist/{src/setup → setup}/ripgrep-resolve.js +0 -0
- /package/dist/{src/setup → setup}/setup.js +0 -0
- /package/dist/{src/setup → setup}/steps.js +0 -0
- /package/dist/{src/sources → sources}/include.js +0 -0
- /package/dist/{src/sources → sources}/provider-factory.js +0 -0
- /package/dist/{src/sources → sources}/provider.js +0 -0
- /package/dist/{src/sources → sources}/providers/filesystem.js +0 -0
- /package/dist/{src/sources → sources}/providers/git.js +0 -0
- /package/dist/{src/sources → sources}/providers/index.js +0 -0
- /package/dist/{src/sources → sources}/providers/install-types.js +0 -0
- /package/dist/{src/sources → sources}/providers/npm.js +0 -0
- /package/dist/{src/sources → sources}/providers/provider-utils.js +0 -0
- /package/dist/{src/sources → sources}/providers/sync-from-ref.js +0 -0
- /package/dist/{src/sources → sources}/providers/tar-utils.js +0 -0
- /package/dist/{src/sources → sources}/providers/website.js +0 -0
- /package/dist/{src/sources → sources}/resolve.js +0 -0
- /package/dist/{src/sources → sources}/types.js +0 -0
- /package/dist/{src/templates → templates}/wiki-templates.js +0 -0
- /package/dist/{src/version.js → version.js} +0 -0
- /package/dist/{src/wiki → wiki}/wiki.js +0 -0
- /package/dist/{src/workflows → workflows}/authoring.js +0 -0
- /package/dist/{src/workflows → workflows}/cli.js +0 -0
- /package/dist/{src/workflows → workflows}/db.js +0 -0
- /package/dist/{src/workflows → workflows}/document-cache.js +0 -0
- /package/dist/{src/workflows → workflows}/parser.js +0 -0
- /package/dist/{src/workflows → workflows}/renderer.js +0 -0
- /package/dist/{src/workflows → workflows}/runs.js +0 -0
- /package/dist/{src/workflows → workflows}/schema.js +0 -0
- /package/dist/{src/workflows → workflows}/validator.js +0 -0
|
@@ -1,127 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Unit tests for the trajectory parser.
|
|
3
|
-
*/
|
|
4
|
-
import { describe, expect, test } from "bun:test";
|
|
5
|
-
import { computeTrajectory, VERIFIER_STDOUT_SCAN_CAP } from "./trajectory";
|
|
6
|
-
function fakeRun(overrides = {}) {
|
|
7
|
-
return {
|
|
8
|
-
schemaVersion: 1,
|
|
9
|
-
taskId: "x",
|
|
10
|
-
arm: "akm",
|
|
11
|
-
seed: 0,
|
|
12
|
-
model: "m",
|
|
13
|
-
outcome: "pass",
|
|
14
|
-
tokens: { input: 0, output: 0 },
|
|
15
|
-
wallclockMs: 0,
|
|
16
|
-
trajectory: { correctAssetLoaded: null, feedbackRecorded: null },
|
|
17
|
-
events: [],
|
|
18
|
-
verifierStdout: "",
|
|
19
|
-
verifierExitCode: 0,
|
|
20
|
-
assetsLoaded: [],
|
|
21
|
-
...overrides,
|
|
22
|
-
};
|
|
23
|
-
}
|
|
24
|
-
function feedbackEvent() {
|
|
25
|
-
return {
|
|
26
|
-
schemaVersion: 1,
|
|
27
|
-
id: 0,
|
|
28
|
-
ts: "2026-04-27T00:00:00.000Z",
|
|
29
|
-
eventType: "feedback",
|
|
30
|
-
ref: "skill:foo",
|
|
31
|
-
};
|
|
32
|
-
}
|
|
33
|
-
describe("computeTrajectory.correctAssetLoaded", () => {
|
|
34
|
-
test("null when goldRef is missing on the task", () => {
|
|
35
|
-
const traj = computeTrajectory({}, fakeRun({ verifierStdout: "akm show skill:irrelevant" }));
|
|
36
|
-
expect(traj.correctAssetLoaded).toBeNull();
|
|
37
|
-
});
|
|
38
|
-
test("true when verifierStdout contains `akm show <goldRef>`", () => {
|
|
39
|
-
const traj = computeTrajectory({ goldRef: "skill:docker-homelab" }, fakeRun({
|
|
40
|
-
verifierStdout: "tool: akm show skill:docker-homelab\nresult: ok\n",
|
|
41
|
-
}));
|
|
42
|
-
expect(traj.correctAssetLoaded).toBe(true);
|
|
43
|
-
});
|
|
44
|
-
test("true when tool-call JSON form contains the ref", () => {
|
|
45
|
-
const traj = computeTrajectory({ goldRef: "skill:docker-homelab" }, fakeRun({
|
|
46
|
-
verifierStdout: '{"command":"akm","args":["show","skill:docker-homelab"]}',
|
|
47
|
-
}));
|
|
48
|
-
expect(traj.correctAssetLoaded).toBe(true);
|
|
49
|
-
});
|
|
50
|
-
test("false when verifierStdout shows a different ref", () => {
|
|
51
|
-
const traj = computeTrajectory({ goldRef: "skill:docker-homelab" }, fakeRun({ verifierStdout: "akm show skill:az-cli\n" }));
|
|
52
|
-
expect(traj.correctAssetLoaded).toBe(false);
|
|
53
|
-
});
|
|
54
|
-
test("false on empty trace", () => {
|
|
55
|
-
const traj = computeTrajectory({ goldRef: "skill:docker-homelab" }, fakeRun({ verifierStdout: "" }));
|
|
56
|
-
expect(traj.correctAssetLoaded).toBe(false);
|
|
57
|
-
});
|
|
58
|
-
test("true when an event metadata.ref carries the goldRef", () => {
|
|
59
|
-
const event = {
|
|
60
|
-
schemaVersion: 1,
|
|
61
|
-
id: 1,
|
|
62
|
-
ts: "2026-04-27T00:00:00.000Z",
|
|
63
|
-
eventType: "tool_call",
|
|
64
|
-
metadata: { ref: "skill:docker-homelab" },
|
|
65
|
-
};
|
|
66
|
-
const traj = computeTrajectory({ goldRef: "skill:docker-homelab" }, fakeRun({ events: [event] }));
|
|
67
|
-
expect(traj.correctAssetLoaded).toBe(true);
|
|
68
|
-
});
|
|
69
|
-
});
|
|
70
|
-
describe("computeTrajectory.feedbackRecorded", () => {
|
|
71
|
-
test("true when events stream contains a `feedback` event", () => {
|
|
72
|
-
const traj = computeTrajectory({ goldRef: "skill:foo" }, fakeRun({ events: [feedbackEvent()] }));
|
|
73
|
-
expect(traj.feedbackRecorded).toBe(true);
|
|
74
|
-
});
|
|
75
|
-
test("false when events stream is empty", () => {
|
|
76
|
-
const traj = computeTrajectory({ goldRef: "skill:foo" }, fakeRun({ events: [] }));
|
|
77
|
-
expect(traj.feedbackRecorded).toBe(false);
|
|
78
|
-
});
|
|
79
|
-
test("false when events contain other types but no `feedback`", () => {
|
|
80
|
-
const event = {
|
|
81
|
-
schemaVersion: 1,
|
|
82
|
-
id: 0,
|
|
83
|
-
ts: "2026-04-27T00:00:00.000Z",
|
|
84
|
-
eventType: "remember",
|
|
85
|
-
ref: "memory:alpha",
|
|
86
|
-
};
|
|
87
|
-
const traj = computeTrajectory({ goldRef: "skill:foo" }, fakeRun({ events: [event] }));
|
|
88
|
-
expect(traj.feedbackRecorded).toBe(false);
|
|
89
|
-
});
|
|
90
|
-
});
|
|
91
|
-
describe("computeTrajectory verifierStdout cap", () => {
|
|
92
|
-
test("trajectory still computes from the prefix when stdout exceeds the cap, and a warning is recorded", () => {
|
|
93
|
-
// Construct a stdout: prefix has the canonical `akm show` invocation;
|
|
94
|
-
// the rest is GBs-of-junk simulated as a long filler past the cap.
|
|
95
|
-
const ref = "skill:docker-homelab";
|
|
96
|
-
const prefix = `tool: akm show ${ref}\n`;
|
|
97
|
-
const fillerSize = VERIFIER_STDOUT_SCAN_CAP + 1024;
|
|
98
|
-
// Use repeated 'a' so total length comfortably exceeds the cap.
|
|
99
|
-
const filler = "a".repeat(fillerSize);
|
|
100
|
-
const verifierStdout = prefix + filler;
|
|
101
|
-
expect(verifierStdout.length).toBeGreaterThan(VERIFIER_STDOUT_SCAN_CAP);
|
|
102
|
-
const warnings = [];
|
|
103
|
-
const traj = computeTrajectory({ goldRef: ref }, fakeRun({ verifierStdout }), { warnings });
|
|
104
|
-
expect(traj.correctAssetLoaded).toBe(true);
|
|
105
|
-
expect(warnings.length).toBe(1);
|
|
106
|
-
expect(warnings[0]).toContain("verifierStdout truncated");
|
|
107
|
-
expect(warnings[0]).toContain(String(VERIFIER_STDOUT_SCAN_CAP));
|
|
108
|
-
});
|
|
109
|
-
test("no warning when stdout is within the cap", () => {
|
|
110
|
-
const warnings = [];
|
|
111
|
-
computeTrajectory({ goldRef: "skill:foo" }, fakeRun({ verifierStdout: "akm show skill:foo\n" }), { warnings });
|
|
112
|
-
expect(warnings).toEqual([]);
|
|
113
|
-
});
|
|
114
|
-
test("match found in the prefix even though tail mentions the ref past the cap", () => {
|
|
115
|
-
// Prefix has only filler; the gold ref appears only AFTER the cap.
|
|
116
|
-
// The scan should miss it (correctly — the agent's effective behaviour
|
|
117
|
-
// within the budgeted prefix did not include the show call).
|
|
118
|
-
const ref = "skill:never-loaded";
|
|
119
|
-
const filler = "x".repeat(VERIFIER_STDOUT_SCAN_CAP);
|
|
120
|
-
const verifierStdout = `${filler}akm show ${ref}\n`;
|
|
121
|
-
const warnings = [];
|
|
122
|
-
const traj = computeTrajectory({ goldRef: ref }, fakeRun({ verifierStdout }), { warnings });
|
|
123
|
-
expect(traj.correctAssetLoaded).toBe(false);
|
|
124
|
-
expect(warnings.length).toBe(1);
|
|
125
|
-
expect(warnings[0]).toContain("verifierStdout truncated");
|
|
126
|
-
});
|
|
127
|
-
});
|
|
@@ -1,114 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* akm-bench verifier dispatcher (spec §5.3).
|
|
3
|
-
*
|
|
4
|
-
* • `script` — spawn `<taskDir>/verify.sh` with cwd = workspace.
|
|
5
|
-
* • `pytest` — spawn `pytest -q --tb=line` with cwd = workspace.
|
|
6
|
-
* • `regex` — match `expected_match` against `agentStdout`.
|
|
7
|
-
*
|
|
8
|
-
* No LLM-as-judge anywhere. Static dispatch only.
|
|
9
|
-
*
|
|
10
|
-
* Missing runtime (e.g. `pytest` not on PATH) returns exit code 127 with a
|
|
11
|
-
* clear stdout message. The driver maps that to `outcome: "harness_error"`,
|
|
12
|
-
* NOT `fail` — a missing tool is not an agent failure.
|
|
13
|
-
*/
|
|
14
|
-
import fs from "node:fs";
|
|
15
|
-
import path from "node:path";
|
|
16
|
-
function resolveSpawn(config) {
|
|
17
|
-
if (config?.spawn)
|
|
18
|
-
return config.spawn;
|
|
19
|
-
const bun = globalThis.Bun;
|
|
20
|
-
if (!bun?.spawn)
|
|
21
|
-
throw new Error("Bun.spawn unavailable; pass config.spawn");
|
|
22
|
-
return bun.spawn.bind(bun);
|
|
23
|
-
}
|
|
24
|
-
async function readStream(stream) {
|
|
25
|
-
if (!stream)
|
|
26
|
-
return "";
|
|
27
|
-
try {
|
|
28
|
-
return await new Response(stream).text();
|
|
29
|
-
}
|
|
30
|
-
catch {
|
|
31
|
-
return "";
|
|
32
|
-
}
|
|
33
|
-
}
|
|
34
|
-
async function runProcess(cmd, cwd, spawn) {
|
|
35
|
-
let proc;
|
|
36
|
-
try {
|
|
37
|
-
proc = spawn(cmd, {
|
|
38
|
-
cwd,
|
|
39
|
-
stdout: "pipe",
|
|
40
|
-
stderr: "pipe",
|
|
41
|
-
});
|
|
42
|
-
}
|
|
43
|
-
catch (err) {
|
|
44
|
-
const message = err instanceof Error ? err.message : String(err);
|
|
45
|
-
// ENOENT (binary missing) maps to 127 — the conventional "command not found".
|
|
46
|
-
return {
|
|
47
|
-
exitCode: 127,
|
|
48
|
-
stdout: `verifier failed to spawn: ${message}`,
|
|
49
|
-
};
|
|
50
|
-
}
|
|
51
|
-
const stdoutPromise = readStream(proc.stdout ?? null);
|
|
52
|
-
const stderrPromise = readStream(proc.stderr ?? null);
|
|
53
|
-
let exitCode;
|
|
54
|
-
try {
|
|
55
|
-
exitCode = await proc.exited;
|
|
56
|
-
}
|
|
57
|
-
catch (err) {
|
|
58
|
-
const message = err instanceof Error ? err.message : String(err);
|
|
59
|
-
return {
|
|
60
|
-
exitCode: 127,
|
|
61
|
-
stdout: `verifier exited with error: ${message}`,
|
|
62
|
-
};
|
|
63
|
-
}
|
|
64
|
-
const [stdout, stderr] = await Promise.all([stdoutPromise, stderrPromise]);
|
|
65
|
-
// Combine stdout+stderr so the operator sees the full verifier output.
|
|
66
|
-
const combined = stderr ? `${stdout}\n--- stderr ---\n${stderr}` : stdout;
|
|
67
|
-
return { exitCode, stdout: combined };
|
|
68
|
-
}
|
|
69
|
-
/**
|
|
70
|
-
* Dispatch a verifier run. Each branch maps a `task.yaml` `verifier:` field
|
|
71
|
-
* onto a deterministic check.
|
|
72
|
-
*/
|
|
73
|
-
export async function runVerifier(taskDir, workspace, kind, config) {
|
|
74
|
-
if (kind === "script") {
|
|
75
|
-
const script = path.join(taskDir, "verify.sh");
|
|
76
|
-
if (!fs.existsSync(script)) {
|
|
77
|
-
return { exitCode: 127, stdout: `verify.sh not found at ${script}` };
|
|
78
|
-
}
|
|
79
|
-
return runProcess(["bash", script], workspace, resolveSpawn(config));
|
|
80
|
-
}
|
|
81
|
-
if (kind === "pytest") {
|
|
82
|
-
// Test files live at <taskDir>/tests/, not inside the workspace copy.
|
|
83
|
-
// Pass the absolute path so pytest discovers them while running with
|
|
84
|
-
// cwd=workspace (which lets relative paths like pathlib.Path("file.yml") work).
|
|
85
|
-
const testsDir = path.join(taskDir, "tests");
|
|
86
|
-
const testArgs = fs.existsSync(testsDir) ? [testsDir] : [];
|
|
87
|
-
return runProcess(["pytest", "-q", "--tb=line", ...testArgs], workspace, resolveSpawn(config));
|
|
88
|
-
}
|
|
89
|
-
if (kind === "regex") {
|
|
90
|
-
const pattern = config?.expectedMatch;
|
|
91
|
-
const input = config?.agentStdout ?? "";
|
|
92
|
-
if (!pattern) {
|
|
93
|
-
return {
|
|
94
|
-
exitCode: 127,
|
|
95
|
-
stdout: 'regex verifier requires "expected_match" in task.yaml',
|
|
96
|
-
};
|
|
97
|
-
}
|
|
98
|
-
let regex;
|
|
99
|
-
try {
|
|
100
|
-
regex = new RegExp(pattern);
|
|
101
|
-
}
|
|
102
|
-
catch (err) {
|
|
103
|
-
const message = err instanceof Error ? err.message : String(err);
|
|
104
|
-
return { exitCode: 127, stdout: `invalid regex: ${message}` };
|
|
105
|
-
}
|
|
106
|
-
const matched = regex.test(input);
|
|
107
|
-
return {
|
|
108
|
-
exitCode: matched ? 0 : 1,
|
|
109
|
-
stdout: matched ? `regex match: ${pattern}` : `regex did not match: ${pattern}`,
|
|
110
|
-
};
|
|
111
|
-
}
|
|
112
|
-
// Compiler should refuse to land an unknown kind; runtime guard is belt-and-braces.
|
|
113
|
-
return { exitCode: 127, stdout: `unknown verifier kind: ${String(kind)}` };
|
|
114
|
-
}
|
|
@@ -1,118 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Unit tests for the verifier dispatcher. Covers each of the three
|
|
3
|
-
* verifier kinds plus the missing-pytest graceful-127 path.
|
|
4
|
-
*/
|
|
5
|
-
import { afterAll, beforeAll, describe, expect, test } from "bun:test";
|
|
6
|
-
import fs from "node:fs";
|
|
7
|
-
import path from "node:path";
|
|
8
|
-
import { benchMkdtemp } from "./tmp";
|
|
9
|
-
import { runVerifier } from "./verifier";
|
|
10
|
-
let scratch;
|
|
11
|
-
beforeAll(() => {
|
|
12
|
-
scratch = benchMkdtemp("bench-verifier-test-");
|
|
13
|
-
});
|
|
14
|
-
afterAll(() => {
|
|
15
|
-
fs.rmSync(scratch, { recursive: true, force: true });
|
|
16
|
-
});
|
|
17
|
-
function fakeSpawn(exitCode, stdout = "", stderr = "", throwSync) {
|
|
18
|
-
return (_cmd, _options) => {
|
|
19
|
-
if (throwSync)
|
|
20
|
-
throw throwSync;
|
|
21
|
-
const proc = {
|
|
22
|
-
exitCode,
|
|
23
|
-
exited: Promise.resolve(exitCode),
|
|
24
|
-
stdout: stdout
|
|
25
|
-
? new ReadableStream({
|
|
26
|
-
start(controller) {
|
|
27
|
-
controller.enqueue(new TextEncoder().encode(stdout));
|
|
28
|
-
controller.close();
|
|
29
|
-
},
|
|
30
|
-
})
|
|
31
|
-
: null,
|
|
32
|
-
stderr: stderr
|
|
33
|
-
? new ReadableStream({
|
|
34
|
-
start(controller) {
|
|
35
|
-
controller.enqueue(new TextEncoder().encode(stderr));
|
|
36
|
-
controller.close();
|
|
37
|
-
},
|
|
38
|
-
})
|
|
39
|
-
: null,
|
|
40
|
-
stdin: null,
|
|
41
|
-
kill() {
|
|
42
|
-
/* noop */
|
|
43
|
-
},
|
|
44
|
-
};
|
|
45
|
-
return proc;
|
|
46
|
-
};
|
|
47
|
-
}
|
|
48
|
-
describe("runVerifier — script", () => {
|
|
49
|
-
test("returns exit 0 when verify.sh succeeds", async () => {
|
|
50
|
-
const taskDir = path.join(scratch, "script-pass");
|
|
51
|
-
fs.mkdirSync(taskDir);
|
|
52
|
-
fs.writeFileSync(path.join(taskDir, "verify.sh"), "");
|
|
53
|
-
const workspace = fs.mkdtempSync(path.join(scratch, "ws-"));
|
|
54
|
-
const result = await runVerifier(taskDir, workspace, "script", {
|
|
55
|
-
spawn: fakeSpawn(0, "ok"),
|
|
56
|
-
});
|
|
57
|
-
expect(result.exitCode).toBe(0);
|
|
58
|
-
expect(result.stdout).toContain("ok");
|
|
59
|
-
});
|
|
60
|
-
test("returns 127 when verify.sh is missing", async () => {
|
|
61
|
-
const taskDir = path.join(scratch, "script-missing");
|
|
62
|
-
fs.mkdirSync(taskDir);
|
|
63
|
-
const workspace = fs.mkdtempSync(path.join(scratch, "ws-"));
|
|
64
|
-
const result = await runVerifier(taskDir, workspace, "script", {
|
|
65
|
-
spawn: fakeSpawn(0),
|
|
66
|
-
});
|
|
67
|
-
expect(result.exitCode).toBe(127);
|
|
68
|
-
expect(result.stdout).toContain("verify.sh not found");
|
|
69
|
-
});
|
|
70
|
-
});
|
|
71
|
-
describe("runVerifier — regex", () => {
|
|
72
|
-
test("returns 0 when expected_match matches agent stdout", async () => {
|
|
73
|
-
const result = await runVerifier(scratch, scratch, "regex", {
|
|
74
|
-
agentStdout: "the agent printed: hello world",
|
|
75
|
-
expectedMatch: "hello",
|
|
76
|
-
});
|
|
77
|
-
expect(result.exitCode).toBe(0);
|
|
78
|
-
});
|
|
79
|
-
test("returns 1 when expected_match does not match", async () => {
|
|
80
|
-
const result = await runVerifier(scratch, scratch, "regex", {
|
|
81
|
-
agentStdout: "different output",
|
|
82
|
-
expectedMatch: "hello",
|
|
83
|
-
});
|
|
84
|
-
expect(result.exitCode).toBe(1);
|
|
85
|
-
});
|
|
86
|
-
test("returns 127 when expected_match missing", async () => {
|
|
87
|
-
const result = await runVerifier(scratch, scratch, "regex", {
|
|
88
|
-
agentStdout: "anything",
|
|
89
|
-
});
|
|
90
|
-
expect(result.exitCode).toBe(127);
|
|
91
|
-
expect(result.stdout).toContain("expected_match");
|
|
92
|
-
});
|
|
93
|
-
test("returns 127 on invalid regex pattern", async () => {
|
|
94
|
-
const result = await runVerifier(scratch, scratch, "regex", {
|
|
95
|
-
agentStdout: "x",
|
|
96
|
-
expectedMatch: "(",
|
|
97
|
-
});
|
|
98
|
-
expect(result.exitCode).toBe(127);
|
|
99
|
-
expect(result.stdout).toContain("invalid regex");
|
|
100
|
-
});
|
|
101
|
-
});
|
|
102
|
-
describe("runVerifier — pytest", () => {
|
|
103
|
-
test("returns 127 with a clear message when pytest is missing", async () => {
|
|
104
|
-
const result = await runVerifier(scratch, scratch, "pytest", {
|
|
105
|
-
// Simulate ENOENT: spawn throws when bin not on PATH.
|
|
106
|
-
spawn: fakeSpawn(0, "", "", new Error("ENOENT: pytest not found")),
|
|
107
|
-
});
|
|
108
|
-
expect(result.exitCode).toBe(127);
|
|
109
|
-
expect(result.stdout).toContain("ENOENT");
|
|
110
|
-
});
|
|
111
|
-
test("returns the pytest exit code when present", async () => {
|
|
112
|
-
const result = await runVerifier(scratch, scratch, "pytest", {
|
|
113
|
-
spawn: fakeSpawn(0, "1 passed in 0.05s"),
|
|
114
|
-
});
|
|
115
|
-
expect(result.exitCode).toBe(0);
|
|
116
|
-
expect(result.stdout).toContain("passed");
|
|
117
|
-
});
|
|
118
|
-
});
|