akm-cli 0.7.0 → 0.7.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +8 -8
- package/dist/tests/add-website-source.test.js +0 -119
- package/dist/tests/agent/agent-config-loader.test.js +0 -70
- package/dist/tests/agent/agent-config.test.js +0 -221
- package/dist/tests/agent/agent-detect.test.js +0 -100
- package/dist/tests/agent/agent-spawn.test.js +0 -234
- package/dist/tests/agent-output.test.js +0 -186
- package/dist/tests/architecture/agent-no-llm-sdk-guard.test.js +0 -103
- package/dist/tests/architecture/agent-spawn-seam.test.js +0 -193
- package/dist/tests/architecture/llm-stateless-seam.test.js +0 -112
- package/dist/tests/asset-ref.test.js +0 -192
- package/dist/tests/asset-registry.test.js +0 -103
- package/dist/tests/asset-spec.test.js +0 -241
- package/dist/tests/bench/attribution.test.js +0 -996
- package/dist/tests/bench/cleanup-sigint.test.js +0 -83
- package/dist/tests/bench/cleanup.js +0 -234
- package/dist/tests/bench/cleanup.test.js +0 -166
- package/dist/tests/bench/cli.js +0 -1018
- package/dist/tests/bench/cli.test.js +0 -445
- package/dist/tests/bench/compare.test.js +0 -556
- package/dist/tests/bench/corpus.js +0 -317
- package/dist/tests/bench/corpus.test.js +0 -258
- package/dist/tests/bench/doctor.js +0 -525
- package/dist/tests/bench/driver.js +0 -401
- package/dist/tests/bench/driver.test.js +0 -584
- package/dist/tests/bench/environment.js +0 -233
- package/dist/tests/bench/environment.test.js +0 -199
- package/dist/tests/bench/evolve-metrics.js +0 -179
- package/dist/tests/bench/evolve-metrics.test.js +0 -187
- package/dist/tests/bench/evolve.js +0 -647
- package/dist/tests/bench/evolve.test.js +0 -624
- package/dist/tests/bench/failure-modes.test.js +0 -349
- package/dist/tests/bench/feedback-integrity.test.js +0 -457
- package/dist/tests/bench/leakage.test.js +0 -228
- package/dist/tests/bench/learning-curve.test.js +0 -134
- package/dist/tests/bench/metrics.js +0 -2395
- package/dist/tests/bench/metrics.test.js +0 -1150
- package/dist/tests/bench/no-os-tmpdir-invariant.test.js +0 -43
- package/dist/tests/bench/opencode-config.js +0 -194
- package/dist/tests/bench/opencode-config.test.js +0 -370
- package/dist/tests/bench/report.js +0 -1885
- package/dist/tests/bench/report.test.js +0 -1038
- package/dist/tests/bench/run-config.js +0 -355
- package/dist/tests/bench/run-config.test.js +0 -298
- package/dist/tests/bench/run-curate-test.js +0 -32
- package/dist/tests/bench/run-failing-tasks.js +0 -56
- package/dist/tests/bench/run-full-bench.js +0 -51
- package/dist/tests/bench/run-items36-targeted.js +0 -69
- package/dist/tests/bench/run-nano-quick.js +0 -42
- package/dist/tests/bench/run-waveg-targeted.js +0 -62
- package/dist/tests/bench/runner.js +0 -699
- package/dist/tests/bench/runner.test.js +0 -958
- package/dist/tests/bench/search-bridge.test.js +0 -331
- package/dist/tests/bench/tmp.js +0 -131
- package/dist/tests/bench/trajectory.js +0 -116
- package/dist/tests/bench/trajectory.test.js +0 -127
- package/dist/tests/bench/verifier.js +0 -114
- package/dist/tests/bench/verifier.test.js +0 -118
- package/dist/tests/bench/workflow-evaluator.js +0 -557
- package/dist/tests/bench/workflow-evaluator.test.js +0 -421
- package/dist/tests/bench/workflow-spec.js +0 -345
- package/dist/tests/bench/workflow-spec.test.js +0 -363
- package/dist/tests/bench/workflow-trace.js +0 -472
- package/dist/tests/bench/workflow-trace.test.js +0 -254
- package/dist/tests/benchmark-search-quality.js +0 -536
- package/dist/tests/benchmark-suite.js +0 -1441
- package/dist/tests/capture-cli.test.js +0 -112
- package/dist/tests/cli-errors.test.js +0 -204
- package/dist/tests/commands/events.test.js +0 -370
- package/dist/tests/commands/history.test.js +0 -418
- package/dist/tests/commands/import.test.js +0 -103
- package/dist/tests/commands/proposal-cli.test.js +0 -209
- package/dist/tests/commands/reflect-propose-cli.test.js +0 -333
- package/dist/tests/commands/remember.test.js +0 -97
- package/dist/tests/commands/scope-flags.test.js +0 -300
- package/dist/tests/commands/search.test.js +0 -537
- package/dist/tests/commands/show-indexer-parity.test.js +0 -117
- package/dist/tests/commands/show.test.js +0 -294
- package/dist/tests/common.test.js +0 -266
- package/dist/tests/completions.test.js +0 -142
- package/dist/tests/config-cli.test.js +0 -193
- package/dist/tests/config-llm-features.test.js +0 -139
- package/dist/tests/config.test.js +0 -569
- package/dist/tests/contracts/migration-baseline.test.js +0 -43
- package/dist/tests/contracts/reflect-propose-envelope.test.js +0 -139
- package/dist/tests/contracts/spec-helpers.js +0 -46
- package/dist/tests/contracts/v1-spec-section-11-proposal-queue.test.js +0 -228
- package/dist/tests/contracts/v1-spec-section-12-agent-config.test.js +0 -56
- package/dist/tests/contracts/v1-spec-section-13-lesson-type.test.js +0 -34
- package/dist/tests/contracts/v1-spec-section-14-llm-features.test.js +0 -94
- package/dist/tests/contracts/v1-spec-section-4-1-asset-types.test.js +0 -39
- package/dist/tests/contracts/v1-spec-section-4-2-quality-rules.test.js +0 -44
- package/dist/tests/contracts/v1-spec-section-5-configuration.test.js +0 -47
- package/dist/tests/contracts/v1-spec-section-6-orchestration.test.js +0 -40
- package/dist/tests/contracts/v1-spec-section-7-module-layout.test.js +0 -58
- package/dist/tests/contracts/v1-spec-section-8-extension-points.test.js +0 -34
- package/dist/tests/contracts/v1-spec-section-9-4-cli-surface.test.js +0 -75
- package/dist/tests/contracts/v1-spec-section-9-7-llm-agent-boundary.test.js +0 -36
- package/dist/tests/core/write-source.test.js +0 -366
- package/dist/tests/curate-command.test.js +0 -87
- package/dist/tests/db-scoring.test.js +0 -201
- package/dist/tests/db.test.js +0 -654
- package/dist/tests/distill-cli-flag.test.js +0 -208
- package/dist/tests/distill.test.js +0 -515
- package/dist/tests/docker-install.test.js +0 -120
- package/dist/tests/e2e.test.js +0 -1419
- package/dist/tests/embedder.test.js +0 -340
- package/dist/tests/embedding-model-config.test.js +0 -379
- package/dist/tests/feedback-command.test.js +0 -172
- package/dist/tests/file-context.test.js +0 -552
- package/dist/tests/fixtures/scripts/git/summarize-diff.js +0 -9
- package/dist/tests/fixtures/scripts/lint/eslint-check.js +0 -7
- package/dist/tests/fixtures/stashes/load.js +0 -166
- package/dist/tests/fixtures/stashes/load.test.js +0 -97
- package/dist/tests/fixtures/stashes/ranking-baseline/scripts/mem0-search.js +0 -12
- package/dist/tests/frontmatter.test.js +0 -190
- package/dist/tests/fts-field-weighting.test.js +0 -254
- package/dist/tests/fuzzy-search.test.js +0 -230
- package/dist/tests/git-provider-clone.test.js +0 -45
- package/dist/tests/github.test.js +0 -161
- package/dist/tests/graph-boost-ranking.test.js +0 -305
- package/dist/tests/graph-extraction.test.js +0 -282
- package/dist/tests/helpers/usage-events.js +0 -8
- package/dist/tests/index-pass-llm.test.js +0 -161
- package/dist/tests/indexer.test.js +0 -570
- package/dist/tests/info-command.test.js +0 -166
- package/dist/tests/init.test.js +0 -69
- package/dist/tests/install-script.test.js +0 -246
- package/dist/tests/integration/agent-real-profile.test.js +0 -94
- package/dist/tests/issue-36-repro.test.js +0 -304
- package/dist/tests/issues-191-194.test.js +0 -160
- package/dist/tests/lesson-lint.test.js +0 -111
- package/dist/tests/llm-client.test.js +0 -115
- package/dist/tests/llm-feature-gate.test.js +0 -151
- package/dist/tests/llm.test.js +0 -139
- package/dist/tests/lockfile.test.js +0 -216
- package/dist/tests/manifest.test.js +0 -205
- package/dist/tests/markdown.test.js +0 -126
- package/dist/tests/matchers-unit.test.js +0 -189
- package/dist/tests/memory-inference.test.js +0 -299
- package/dist/tests/merge-scoring.test.js +0 -136
- package/dist/tests/metadata.test.js +0 -313
- package/dist/tests/migration-help.test.js +0 -89
- package/dist/tests/origin-resolve.test.js +0 -124
- package/dist/tests/output-baseline.test.js +0 -218
- package/dist/tests/output-shapes-unit.test.js +0 -478
- package/dist/tests/parallel-search.test.js +0 -272
- package/dist/tests/parameter-metadata.test.js +0 -365
- package/dist/tests/paths.test.js +0 -177
- package/dist/tests/progressive-disclosure.test.js +0 -280
- package/dist/tests/proposals.test.js +0 -279
- package/dist/tests/proposed-quality.test.js +0 -271
- package/dist/tests/provider-registry.test.js +0 -32
- package/dist/tests/ranking-regression.test.js +0 -548
- package/dist/tests/reflect-propose.test.js +0 -455
- package/dist/tests/registry-build-index.test.js +0 -394
- package/dist/tests/registry-cli.test.js +0 -290
- package/dist/tests/registry-index-v2.test.js +0 -430
- package/dist/tests/registry-install.test.js +0 -728
- package/dist/tests/registry-providers/parity.test.js +0 -189
- package/dist/tests/registry-providers/skills-sh.test.js +0 -309
- package/dist/tests/registry-providers/static-index.test.js +0 -238
- package/dist/tests/registry-resolve.test.js +0 -126
- package/dist/tests/registry-search.test.js +0 -923
- package/dist/tests/remember-frontmatter.test.js +0 -378
- package/dist/tests/remember-unit.test.js +0 -123
- package/dist/tests/ripgrep-install.test.js +0 -251
- package/dist/tests/ripgrep-resolve.test.js +0 -108
- package/dist/tests/ripgrep.test.js +0 -163
- package/dist/tests/save-command.test.js +0 -94
- package/dist/tests/save-trust-qa-fixes.test.js +0 -270
- package/dist/tests/scoring-pipeline.test.js +0 -648
- package/dist/tests/search-include-proposed-cli.test.js +0 -118
- package/dist/tests/self-update.test.js +0 -442
- package/dist/tests/semantic-search-e2e.test.js +0 -512
- package/dist/tests/semantic-status.test.js +0 -471
- package/dist/tests/setup-run.integration.js +0 -877
- package/dist/tests/setup-wizard.test.js +0 -198
- package/dist/tests/setup.test.js +0 -131
- package/dist/tests/source-add.test.js +0 -11
- package/dist/tests/source-clone.test.js +0 -254
- package/dist/tests/source-manage.test.js +0 -366
- package/dist/tests/source-providers/filesystem.test.js +0 -82
- package/dist/tests/source-providers/git.test.js +0 -252
- package/dist/tests/source-providers/website.test.js +0 -128
- package/dist/tests/source-qa-fixes.test.js +0 -286
- package/dist/tests/source-registry.test.js +0 -350
- package/dist/tests/source-resolve.test.js +0 -100
- package/dist/tests/source-source.test.js +0 -281
- package/dist/tests/source.test.js +0 -533
- package/dist/tests/tar-utils-scan.test.js +0 -73
- package/dist/tests/toggle-components.test.js +0 -73
- package/dist/tests/usage-telemetry.test.js +0 -265
- package/dist/tests/utility-scoring.test.js +0 -558
- package/dist/tests/vault-load-error.test.js +0 -78
- package/dist/tests/vault-qa-fixes.test.js +0 -194
- package/dist/tests/vault.test.js +0 -429
- package/dist/tests/vector-search.test.js +0 -608
- package/dist/tests/walker.test.js +0 -252
- package/dist/tests/wave2-cluster-bc.test.js +0 -228
- package/dist/tests/wave2-cluster-d.test.js +0 -180
- package/dist/tests/wave2-cluster-e.test.js +0 -179
- package/dist/tests/wiki-qa-fixes.test.js +0 -270
- package/dist/tests/wiki.test.js +0 -529
- package/dist/tests/workflow-cli.test.js +0 -271
- package/dist/tests/workflow-markdown.test.js +0 -171
- package/dist/tests/workflow-path-escape.test.js +0 -132
- package/dist/tests/workflow-qa-fixes.test.js +0 -395
- package/dist/tests/workflows/indexer-rejection.test.js +0 -213
- /package/dist/{src/cli.js → cli.js} +0 -0
- /package/dist/{src/commands → commands}/completions.js +0 -0
- /package/dist/{src/commands → commands}/config-cli.js +0 -0
- /package/dist/{src/commands → commands}/curate.js +0 -0
- /package/dist/{src/commands → commands}/distill.js +0 -0
- /package/dist/{src/commands → commands}/events.js +0 -0
- /package/dist/{src/commands → commands}/history.js +0 -0
- /package/dist/{src/commands → commands}/info.js +0 -0
- /package/dist/{src/commands → commands}/init.js +0 -0
- /package/dist/{src/commands → commands}/install-audit.js +0 -0
- /package/dist/{src/commands → commands}/installed-stashes.js +0 -0
- /package/dist/{src/commands → commands}/migration-help.js +0 -0
- /package/dist/{src/commands → commands}/proposal.js +0 -0
- /package/dist/{src/commands → commands}/propose.js +0 -0
- /package/dist/{src/commands → commands}/reflect.js +0 -0
- /package/dist/{src/commands → commands}/registry-search.js +0 -0
- /package/dist/{src/commands → commands}/remember.js +0 -0
- /package/dist/{src/commands → commands}/search.js +0 -0
- /package/dist/{src/commands → commands}/self-update.js +0 -0
- /package/dist/{src/commands → commands}/show.js +0 -0
- /package/dist/{src/commands → commands}/source-add.js +0 -0
- /package/dist/{src/commands → commands}/source-clone.js +0 -0
- /package/dist/{src/commands → commands}/source-manage.js +0 -0
- /package/dist/{src/commands → commands}/vault.js +0 -0
- /package/dist/{src/core → core}/asset-ref.js +0 -0
- /package/dist/{src/core → core}/asset-registry.js +0 -0
- /package/dist/{src/core → core}/asset-spec.js +0 -0
- /package/dist/{src/core → core}/common.js +0 -0
- /package/dist/{src/core → core}/config.js +0 -0
- /package/dist/{src/core → core}/errors.js +0 -0
- /package/dist/{src/core → core}/events.js +0 -0
- /package/dist/{src/core → core}/frontmatter.js +0 -0
- /package/dist/{src/core → core}/lesson-lint.js +0 -0
- /package/dist/{src/core → core}/markdown.js +0 -0
- /package/dist/{src/core → core}/paths.js +0 -0
- /package/dist/{src/core → core}/proposals.js +0 -0
- /package/dist/{src/core → core}/warn.js +0 -0
- /package/dist/{src/core → core}/write-source.js +0 -0
- /package/dist/{src/indexer → indexer}/db-search.js +0 -0
- /package/dist/{src/indexer → indexer}/db.js +0 -0
- /package/dist/{src/indexer → indexer}/file-context.js +0 -0
- /package/dist/{src/indexer → indexer}/graph-boost.js +0 -0
- /package/dist/{src/indexer → indexer}/graph-extraction.js +0 -0
- /package/dist/{src/indexer → indexer}/indexer.js +0 -0
- /package/dist/{src/indexer → indexer}/manifest.js +0 -0
- /package/dist/{src/indexer → indexer}/matchers.js +0 -0
- /package/dist/{src/indexer → indexer}/memory-inference.js +0 -0
- /package/dist/{src/indexer → indexer}/metadata.js +0 -0
- /package/dist/{src/indexer → indexer}/search-fields.js +0 -0
- /package/dist/{src/indexer → indexer}/search-source.js +0 -0
- /package/dist/{src/indexer → indexer}/semantic-status.js +0 -0
- /package/dist/{src/indexer → indexer}/usage-events.js +0 -0
- /package/dist/{src/indexer → indexer}/walker.js +0 -0
- /package/dist/{src/integrations → integrations}/agent/config.js +0 -0
- /package/dist/{src/integrations → integrations}/agent/detect.js +0 -0
- /package/dist/{src/integrations → integrations}/agent/index.js +0 -0
- /package/dist/{src/integrations → integrations}/agent/profiles.js +0 -0
- /package/dist/{src/integrations → integrations}/agent/prompts.js +0 -0
- /package/dist/{src/integrations → integrations}/agent/spawn.js +0 -0
- /package/dist/{src/integrations → integrations}/github.js +0 -0
- /package/dist/{src/integrations → integrations}/lockfile.js +0 -0
- /package/dist/{src/llm → llm}/client.js +0 -0
- /package/dist/{src/llm → llm}/embedder.js +0 -0
- /package/dist/{src/llm → llm}/embedders/cache.js +0 -0
- /package/dist/{src/llm → llm}/embedders/local.js +0 -0
- /package/dist/{src/llm → llm}/embedders/remote.js +0 -0
- /package/dist/{src/llm → llm}/embedders/types.js +0 -0
- /package/dist/{src/llm → llm}/feature-gate.js +0 -0
- /package/dist/{src/llm → llm}/graph-extract.js +0 -0
- /package/dist/{src/llm → llm}/index-passes.js +0 -0
- /package/dist/{src/llm → llm}/memory-infer.js +0 -0
- /package/dist/{src/llm → llm}/metadata-enhance.js +0 -0
- /package/dist/{src/output → output}/cli-hints.js +0 -0
- /package/dist/{src/output → output}/context.js +0 -0
- /package/dist/{src/output → output}/renderers.js +0 -0
- /package/dist/{src/output → output}/shapes.js +0 -0
- /package/dist/{src/output → output}/text.js +0 -0
- /package/dist/{src/registry → registry}/build-index.js +0 -0
- /package/dist/{src/registry → registry}/create-provider-registry.js +0 -0
- /package/dist/{src/registry → registry}/factory.js +0 -0
- /package/dist/{src/registry → registry}/origin-resolve.js +0 -0
- /package/dist/{src/registry → registry}/providers/index.js +0 -0
- /package/dist/{src/registry → registry}/providers/skills-sh.js +0 -0
- /package/dist/{src/registry → registry}/providers/static-index.js +0 -0
- /package/dist/{src/registry → registry}/providers/types.js +0 -0
- /package/dist/{src/registry → registry}/resolve.js +0 -0
- /package/dist/{src/registry → registry}/types.js +0 -0
- /package/dist/{src/setup → setup}/detect.js +0 -0
- /package/dist/{src/setup → setup}/ripgrep-install.js +0 -0
- /package/dist/{src/setup → setup}/ripgrep-resolve.js +0 -0
- /package/dist/{src/setup → setup}/setup.js +0 -0
- /package/dist/{src/setup → setup}/steps.js +0 -0
- /package/dist/{src/sources → sources}/include.js +0 -0
- /package/dist/{src/sources → sources}/provider-factory.js +0 -0
- /package/dist/{src/sources → sources}/provider.js +0 -0
- /package/dist/{src/sources → sources}/providers/filesystem.js +0 -0
- /package/dist/{src/sources → sources}/providers/git.js +0 -0
- /package/dist/{src/sources → sources}/providers/index.js +0 -0
- /package/dist/{src/sources → sources}/providers/install-types.js +0 -0
- /package/dist/{src/sources → sources}/providers/npm.js +0 -0
- /package/dist/{src/sources → sources}/providers/provider-utils.js +0 -0
- /package/dist/{src/sources → sources}/providers/sync-from-ref.js +0 -0
- /package/dist/{src/sources → sources}/providers/tar-utils.js +0 -0
- /package/dist/{src/sources → sources}/providers/website.js +0 -0
- /package/dist/{src/sources → sources}/resolve.js +0 -0
- /package/dist/{src/sources → sources}/types.js +0 -0
- /package/dist/{src/templates → templates}/wiki-templates.js +0 -0
- /package/dist/{src/version.js → version.js} +0 -0
- /package/dist/{src/wiki → wiki}/wiki.js +0 -0
- /package/dist/{src/workflows → workflows}/authoring.js +0 -0
- /package/dist/{src/workflows → workflows}/cli.js +0 -0
- /package/dist/{src/workflows → workflows}/db.js +0 -0
- /package/dist/{src/workflows → workflows}/document-cache.js +0 -0
- /package/dist/{src/workflows → workflows}/parser.js +0 -0
- /package/dist/{src/workflows → workflows}/renderer.js +0 -0
- /package/dist/{src/workflows → workflows}/runs.js +0 -0
- /package/dist/{src/workflows → workflows}/schema.js +0 -0
- /package/dist/{src/workflows → workflows}/validator.js +0 -0
|
@@ -1,32 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* OBSOLETE: superseded by `bun run tests/bench/cli.ts tests/bench/configs/curate-test.json`.
|
|
3
|
-
* Kept for backward compatibility; will be removed in the standalone-bench-repo extraction.
|
|
4
|
-
*
|
|
5
|
-
* Test akm curate as first command on configure-scaling.
|
|
6
|
-
* Usage: bun run tests/bench/run-curate-test.ts
|
|
7
|
-
*/
|
|
8
|
-
import fs from "node:fs";
|
|
9
|
-
import path from "node:path";
|
|
10
|
-
import { loadTask } from "./corpus";
|
|
11
|
-
import { loadOpencodeProviders } from "./opencode-config";
|
|
12
|
-
import { runUtility } from "./runner";
|
|
13
|
-
process.stderr.write("[obsolete] run-curate-test.ts → see tests/bench/configs/curate-test.json (`bun run tests/bench/cli.ts tests/bench/configs/curate-test.json`)\n");
|
|
14
|
-
const tasks = [loadTask("inkwell/configure-scaling")];
|
|
15
|
-
const LOCAL = path.resolve(__dirname, "..", "fixtures", "bench", "opencode-providers.local.json");
|
|
16
|
-
const DEFAULT = path.resolve(__dirname, "..", "fixtures", "bench", "opencode-providers.json");
|
|
17
|
-
const providers = loadOpencodeProviders(fs.existsSync(LOCAL) ? LOCAL : DEFAULT);
|
|
18
|
-
process.stderr.write(`Running configure-scaling × 5 seeds (curate as first cmd)\nModel: ${providers.defaultModel}\n\n`);
|
|
19
|
-
const report = await runUtility({
|
|
20
|
-
tasks,
|
|
21
|
-
arms: ["akm"],
|
|
22
|
-
model: providers.defaultModel,
|
|
23
|
-
seedsPerArm: 5,
|
|
24
|
-
budgetTokens: 25000,
|
|
25
|
-
budgetWallMs: 360000,
|
|
26
|
-
parallel: 3,
|
|
27
|
-
opencodeProviders: providers,
|
|
28
|
-
});
|
|
29
|
-
process.stdout.write(`${JSON.stringify(report, null, 2)}\n`);
|
|
30
|
-
const t = report.tasks?.[0];
|
|
31
|
-
const rate = t?.akm?.passRate ?? 0;
|
|
32
|
-
process.stderr.write(`\nconfigure-scaling: ${(rate * 100).toFixed(0)}% (baseline 80%)\n`);
|
|
@@ -1,56 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* OBSOLETE: superseded by `bun run tests/bench/cli.ts tests/bench/configs/failing-tasks.json`.
|
|
3
|
-
* Kept for backward compatibility; will be removed in the standalone-bench-repo extraction.
|
|
4
|
-
*
|
|
5
|
-
* Targeted retest of failing/partial tasks after stash improvements.
|
|
6
|
-
* Usage: bun run tests/bench/run-failing-tasks.ts
|
|
7
|
-
*/
|
|
8
|
-
import fs from "node:fs";
|
|
9
|
-
import path from "node:path";
|
|
10
|
-
import { loadTask } from "./corpus";
|
|
11
|
-
import { loadOpencodeProviders } from "./opencode-config";
|
|
12
|
-
import { runUtility } from "./runner";
|
|
13
|
-
process.stderr.write("[obsolete] run-failing-tasks.ts → see tests/bench/configs/failing-tasks.json (`bun run tests/bench/cli.ts tests/bench/configs/failing-tasks.json`)\n");
|
|
14
|
-
const TASK_IDS = [
|
|
15
|
-
"drillbit/backup-policy",
|
|
16
|
-
"drillbit/canary-enable",
|
|
17
|
-
"inkwell/add-healthcheck",
|
|
18
|
-
"inkwell/configure-scaling",
|
|
19
|
-
"opencode/select-correct-skill",
|
|
20
|
-
];
|
|
21
|
-
const tasks = TASK_IDS.map((id) => loadTask(id));
|
|
22
|
-
const LOCAL = path.resolve(__dirname, "..", "fixtures", "bench", "opencode-providers.local.json");
|
|
23
|
-
const DEFAULT = path.resolve(__dirname, "..", "fixtures", "bench", "opencode-providers.json");
|
|
24
|
-
const providers = loadOpencodeProviders(fs.existsSync(LOCAL) ? LOCAL : DEFAULT);
|
|
25
|
-
process.stderr.write(`Running ${tasks.length} tasks × 5 seeds (akm only)\nModel: ${providers.defaultModel}\n\n`);
|
|
26
|
-
const report = await runUtility({
|
|
27
|
-
tasks,
|
|
28
|
-
arms: ["akm"],
|
|
29
|
-
model: providers.defaultModel,
|
|
30
|
-
seedsPerArm: 5,
|
|
31
|
-
budgetTokens: 25000,
|
|
32
|
-
budgetWallMs: 360000,
|
|
33
|
-
parallel: 3,
|
|
34
|
-
opencodeProviders: providers,
|
|
35
|
-
});
|
|
36
|
-
process.stdout.write(`${JSON.stringify(report, null, 2)}\n`);
|
|
37
|
-
const agg = report.aggregateAkm;
|
|
38
|
-
process.stderr.write(`\n=== RESULTS vs BASELINE ===\n`);
|
|
39
|
-
// Qwen 9B baseline for comparison
|
|
40
|
-
const BASELINE = {
|
|
41
|
-
"drillbit/backup-policy": 1.0,
|
|
42
|
-
"drillbit/canary-enable": 1.0,
|
|
43
|
-
"inkwell/add-healthcheck": 0.8,
|
|
44
|
-
"inkwell/configure-scaling": 0.8,
|
|
45
|
-
"opencode/select-correct-skill": 1.0,
|
|
46
|
-
};
|
|
47
|
-
for (const t of report.tasks ?? []) {
|
|
48
|
-
const rate = t.akm?.passRate ?? 0;
|
|
49
|
-
const base = BASELINE[t.id] ?? 0;
|
|
50
|
-
const delta = rate - base;
|
|
51
|
-
const arrow = delta > 0 ? "↑" : delta < 0 ? "↓" : "=";
|
|
52
|
-
const bar = "█".repeat(Math.round(rate * 5)) + "░".repeat(5 - Math.round(rate * 5));
|
|
53
|
-
const deltaStr = delta !== 0 ? ` (${arrow}${Math.abs(delta * 100).toFixed(0)}pp)` : "";
|
|
54
|
-
process.stderr.write(`${t.id.padEnd(48)} ${(rate * 100).toFixed(0).padStart(3)}% ${bar}${deltaStr}\n`);
|
|
55
|
-
}
|
|
56
|
-
process.stderr.write(`\nOverall: ${((agg?.passRate ?? 0) * 100).toFixed(1)}%\n`);
|
|
@@ -1,51 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* OBSOLETE: superseded by `bun run tests/bench/cli.ts tests/bench/configs/full.json`.
|
|
3
|
-
* Kept for backward compatibility; will be removed in the standalone-bench-repo extraction.
|
|
4
|
-
*
|
|
5
|
-
* Full benchmark run — all tasks, 5 seeds, akm arm only.
|
|
6
|
-
* Usage: bun run tests/bench/run-full-bench.ts
|
|
7
|
-
*/
|
|
8
|
-
import fs from "node:fs";
|
|
9
|
-
import path from "node:path";
|
|
10
|
-
import { listTasks } from "./corpus";
|
|
11
|
-
import { loadOpencodeProviders } from "./opencode-config";
|
|
12
|
-
import { runUtility } from "./runner";
|
|
13
|
-
process.stderr.write("[obsolete] run-full-bench.ts → see tests/bench/configs/full.json (`bun run tests/bench/cli.ts tests/bench/configs/full.json`)\n");
|
|
14
|
-
const tasks = listTasks();
|
|
15
|
-
const LOCAL = path.resolve(__dirname, "..", "fixtures", "bench", "opencode-providers.local.json");
|
|
16
|
-
const DEFAULT = path.resolve(__dirname, "..", "fixtures", "bench", "opencode-providers.json");
|
|
17
|
-
const providers = loadOpencodeProviders(fs.existsSync(LOCAL) ? LOCAL : DEFAULT);
|
|
18
|
-
process.stderr.write(`Running ${tasks.length} tasks × 5 seeds (akm only)\nModel: ${providers.defaultModel}\n\n`);
|
|
19
|
-
const report = await runUtility({
|
|
20
|
-
tasks,
|
|
21
|
-
arms: ["akm"],
|
|
22
|
-
model: providers.defaultModel,
|
|
23
|
-
seedsPerArm: 5,
|
|
24
|
-
budgetTokens: 25000,
|
|
25
|
-
budgetWallMs: 360000,
|
|
26
|
-
parallel: 3,
|
|
27
|
-
opencodeProviders: providers,
|
|
28
|
-
});
|
|
29
|
-
process.stdout.write(`${JSON.stringify(report, null, 2)}\n`);
|
|
30
|
-
const BASELINE = {
|
|
31
|
-
"drillbit/backup-policy": 1.0,
|
|
32
|
-
"drillbit/canary-enable": 1.0,
|
|
33
|
-
"inkwell/add-healthcheck": 0.8,
|
|
34
|
-
"inkwell/configure-scaling": 0.8,
|
|
35
|
-
"opencode/select-correct-skill": 1.0,
|
|
36
|
-
};
|
|
37
|
-
process.stderr.write(`\n=== RESULTS vs BASELINE ===\n`);
|
|
38
|
-
for (const t of report.tasks ?? []) {
|
|
39
|
-
const rate = t.akm?.passRate ?? 0;
|
|
40
|
-
const base = BASELINE[t.id] ?? null;
|
|
41
|
-
const bar = "█".repeat(Math.round(rate * 5)) + "░".repeat(5 - Math.round(rate * 5));
|
|
42
|
-
const deltaStr = base !== null
|
|
43
|
-
? (() => {
|
|
44
|
-
const d = rate - base;
|
|
45
|
-
const arrow = d > 0 ? "↑" : d < 0 ? "↓" : "=";
|
|
46
|
-
return d !== 0 ? ` (${arrow}${Math.abs(d * 100).toFixed(0)}pp)` : " (=)";
|
|
47
|
-
})()
|
|
48
|
-
: "";
|
|
49
|
-
process.stderr.write(`${t.id.padEnd(52)} ${(rate * 100).toFixed(0).padStart(3)}% ${bar}${deltaStr}\n`);
|
|
50
|
-
}
|
|
51
|
-
process.stderr.write(`\nOverall: ${((report.aggregateAkm?.passRate ?? 0) * 100).toFixed(1)}%\n`);
|
|
@@ -1,69 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Items 3-6 targeted bench — tasks most directly affected by fixture stash
|
|
3
|
-
* content additions and skill frontmatter strip (commit 92196c7).
|
|
4
|
-
* Usage: bun run tests/bench/run-items36-targeted.ts
|
|
5
|
-
*/
|
|
6
|
-
import fs from "node:fs";
|
|
7
|
-
import path from "node:path";
|
|
8
|
-
import { loadTask } from "./corpus";
|
|
9
|
-
import { loadOpencodeProviders } from "./opencode-config";
|
|
10
|
-
import { runUtility } from "./runner";
|
|
11
|
-
const TARGET_TASKS = [
|
|
12
|
-
// item 4: env_file section moved to top of compose-conventions.md
|
|
13
|
-
"docker-homelab/env-from-file",
|
|
14
|
-
// item 3: az-storage-lifecycle knowledge added to az-cli stash
|
|
15
|
-
"workflow-compliance/repeated-fail-storage-lifecycle-a",
|
|
16
|
-
// item 5: memory assets (compound-tag-filter, null-value-trap)
|
|
17
|
-
"az-cli/query-by-tag",
|
|
18
|
-
// item 5: memory asset (healthcheck-test-cmd)
|
|
19
|
-
"inkwell/add-healthcheck-train",
|
|
20
|
-
// item 6: skill frontmatter strip — previously low-scoring tasks
|
|
21
|
-
"docker-homelab/restart-policy",
|
|
22
|
-
"docker-homelab/redis-healthcheck",
|
|
23
|
-
"docker-homelab/named-volume",
|
|
24
|
-
"az-cli/storage-account-create",
|
|
25
|
-
"inkwell/configure-scaling",
|
|
26
|
-
];
|
|
27
|
-
const tasks = TARGET_TASKS.map((id) => loadTask(id));
|
|
28
|
-
const LOCAL = path.resolve(__dirname, "..", "fixtures", "bench", "opencode-providers.local.json");
|
|
29
|
-
const DEFAULT = path.resolve(__dirname, "..", "fixtures", "bench", "opencode-providers.json");
|
|
30
|
-
const providers = loadOpencodeProviders(fs.existsSync(LOCAL) ? LOCAL : DEFAULT);
|
|
31
|
-
process.stderr.write(`Items 3-6 targeted bench: ${tasks.length} tasks × 3 seeds\nModel: ${providers.defaultModel}\n\n`);
|
|
32
|
-
const report = await runUtility({
|
|
33
|
-
tasks,
|
|
34
|
-
arms: ["akm"],
|
|
35
|
-
model: providers.defaultModel,
|
|
36
|
-
seedsPerArm: 3,
|
|
37
|
-
budgetTokens: 25000,
|
|
38
|
-
budgetWallMs: 360000,
|
|
39
|
-
parallel: 3,
|
|
40
|
-
opencodeProviders: providers,
|
|
41
|
-
});
|
|
42
|
-
process.stdout.write(`${JSON.stringify(report, null, 2)}\n`);
|
|
43
|
-
// Wave G baselines from 2026-05-03 targeted run
|
|
44
|
-
const BASELINE = {
|
|
45
|
-
"docker-homelab/env-from-file": 0.0,
|
|
46
|
-
"workflow-compliance/repeated-fail-storage-lifecycle-a": 0.0,
|
|
47
|
-
"az-cli/query-by-tag": 0.4,
|
|
48
|
-
"inkwell/add-healthcheck-train": 0.67,
|
|
49
|
-
"docker-homelab/restart-policy": 0.33,
|
|
50
|
-
"docker-homelab/redis-healthcheck": 0.33,
|
|
51
|
-
"docker-homelab/named-volume": 0.33,
|
|
52
|
-
"az-cli/storage-account-create": 1.0,
|
|
53
|
-
"inkwell/configure-scaling": 0.6,
|
|
54
|
-
};
|
|
55
|
-
process.stderr.write(`\n=== RESULTS vs Wave G BASELINE ===\n`);
|
|
56
|
-
for (const t of report.tasks ?? []) {
|
|
57
|
-
const rate = t.akm?.passRate ?? 0;
|
|
58
|
-
const base = BASELINE[t.id] ?? null;
|
|
59
|
-
const bar = "█".repeat(Math.round(rate * 5)) + "░".repeat(5 - Math.round(rate * 5));
|
|
60
|
-
const deltaStr = base !== null
|
|
61
|
-
? (() => {
|
|
62
|
-
const d = rate - base;
|
|
63
|
-
const arrow = d > 0 ? "↑" : d < 0 ? "↓" : "=";
|
|
64
|
-
return d !== 0 ? ` (${arrow}${Math.abs(d * 100).toFixed(0)}pp)` : " (=)";
|
|
65
|
-
})()
|
|
66
|
-
: "";
|
|
67
|
-
process.stderr.write(`${t.id.padEnd(52)} ${(rate * 100).toFixed(0).padStart(3)}% ${bar}${deltaStr}\n`);
|
|
68
|
-
}
|
|
69
|
-
process.stderr.write(`\nOverall: ${((report.aggregateAkm?.passRate ?? 0) * 100).toFixed(1)}%\n`);
|
|
@@ -1,42 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* OBSOLETE: superseded by `bun run tests/bench/cli.ts tests/bench/configs/nano-quick.json`.
|
|
3
|
-
* Kept for backward compatibility; will be removed in the standalone-bench-repo extraction.
|
|
4
|
-
*
|
|
5
|
-
* Quick 5-task × 2-seed run for Nemotron Nano evaluation.
|
|
6
|
-
* Usage: bun run tests/bench/run-nano-quick.ts
|
|
7
|
-
*/
|
|
8
|
-
import fs from "node:fs";
|
|
9
|
-
import path from "node:path";
|
|
10
|
-
import { loadTask } from "./corpus";
|
|
11
|
-
import { loadOpencodeProviders } from "./opencode-config";
|
|
12
|
-
import { runUtility } from "./runner";
|
|
13
|
-
process.stderr.write("[obsolete] run-nano-quick.ts → see tests/bench/configs/nano-quick.json (`bun run tests/bench/cli.ts tests/bench/configs/nano-quick.json`)\n");
|
|
14
|
-
const TASK_IDS = [
|
|
15
|
-
"drillbit/backup-policy",
|
|
16
|
-
"drillbit/canary-enable",
|
|
17
|
-
"inkwell/add-healthcheck",
|
|
18
|
-
"inkwell/configure-scaling",
|
|
19
|
-
"opencode/select-correct-skill",
|
|
20
|
-
];
|
|
21
|
-
const tasks = TASK_IDS.map((id) => loadTask(id));
|
|
22
|
-
const LOCAL = path.resolve(__dirname, "..", "fixtures", "bench", "opencode-providers.local.json");
|
|
23
|
-
const DEFAULT = path.resolve(__dirname, "..", "fixtures", "bench", "opencode-providers.json");
|
|
24
|
-
const providers = loadOpencodeProviders(fs.existsSync(LOCAL) ? LOCAL : DEFAULT);
|
|
25
|
-
process.stderr.write(`Running ${tasks.length} tasks × 2 seeds\nModel: ${providers.defaultModel}\n\n`);
|
|
26
|
-
const report = await runUtility({
|
|
27
|
-
tasks,
|
|
28
|
-
arms: ["akm"],
|
|
29
|
-
model: providers.defaultModel,
|
|
30
|
-
seedsPerArm: 2,
|
|
31
|
-
budgetTokens: 25000,
|
|
32
|
-
budgetWallMs: 360000,
|
|
33
|
-
parallel: 2,
|
|
34
|
-
opencodeProviders: providers,
|
|
35
|
-
});
|
|
36
|
-
process.stdout.write(`${JSON.stringify(report, null, 2)}\n`);
|
|
37
|
-
for (const t of report.tasks ?? []) {
|
|
38
|
-
const rate = t.akm?.passRate ?? 0;
|
|
39
|
-
const bar = "█".repeat(Math.round(rate * 5)) + "░".repeat(5 - Math.round(rate * 5));
|
|
40
|
-
process.stderr.write(`${t.id.padEnd(48)} ${(rate * 100).toFixed(0).padStart(3)}% ${bar}\n`);
|
|
41
|
-
}
|
|
42
|
-
process.stderr.write(`\nOverall: ${((report.aggregateAkm?.passRate ?? 0) * 100).toFixed(1)}%\n`);
|
|
@@ -1,62 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Wave G targeted bench — 9 previously-failing tasks, 3 seeds.
|
|
3
|
-
* Usage: bun run tests/bench/run-waveg-targeted.ts
|
|
4
|
-
*/
|
|
5
|
-
import fs from "node:fs";
|
|
6
|
-
import path from "node:path";
|
|
7
|
-
import { loadTask } from "./corpus";
|
|
8
|
-
import { loadOpencodeProviders } from "./opencode-config";
|
|
9
|
-
import { runUtility } from "./runner";
|
|
10
|
-
const TARGET_TASKS = [
|
|
11
|
-
"inkwell/configure-scaling",
|
|
12
|
-
"inkwell/add-healthcheck-train",
|
|
13
|
-
"inkwell/full-config",
|
|
14
|
-
"az-cli/storage-account-create",
|
|
15
|
-
"docker-homelab/bridge-network",
|
|
16
|
-
"docker-homelab/compose-version-upgrade",
|
|
17
|
-
"docker-homelab/env-from-file",
|
|
18
|
-
"workflow-compliance/feedback-trap-az-tag-list",
|
|
19
|
-
"workflow-compliance/repeated-fail-storage-lifecycle-a",
|
|
20
|
-
];
|
|
21
|
-
const tasks = TARGET_TASKS.map((id) => loadTask(id));
|
|
22
|
-
const LOCAL = path.resolve(__dirname, "..", "fixtures", "bench", "opencode-providers.local.json");
|
|
23
|
-
const DEFAULT = path.resolve(__dirname, "..", "fixtures", "bench", "opencode-providers.json");
|
|
24
|
-
const providers = loadOpencodeProviders(fs.existsSync(LOCAL) ? LOCAL : DEFAULT);
|
|
25
|
-
process.stderr.write(`Wave G targeted bench: ${tasks.length} tasks × 3 seeds\nModel: ${providers.defaultModel}\n\n`);
|
|
26
|
-
const report = await runUtility({
|
|
27
|
-
tasks,
|
|
28
|
-
arms: ["akm"],
|
|
29
|
-
model: providers.defaultModel,
|
|
30
|
-
seedsPerArm: 3,
|
|
31
|
-
budgetTokens: 25000,
|
|
32
|
-
budgetWallMs: 360000,
|
|
33
|
-
parallel: 3,
|
|
34
|
-
opencodeProviders: providers,
|
|
35
|
-
});
|
|
36
|
-
process.stdout.write(`${JSON.stringify(report, null, 2)}\n`);
|
|
37
|
-
const BASELINE = {
|
|
38
|
-
"inkwell/configure-scaling": 0.6,
|
|
39
|
-
"inkwell/add-healthcheck-train": 0.4,
|
|
40
|
-
"inkwell/full-config": 0.0,
|
|
41
|
-
"az-cli/storage-account-create": 0.4,
|
|
42
|
-
"docker-homelab/bridge-network": 0.2,
|
|
43
|
-
"docker-homelab/compose-version-upgrade": 0.4,
|
|
44
|
-
"docker-homelab/env-from-file": 0.0,
|
|
45
|
-
"workflow-compliance/feedback-trap-az-tag-list": 0.2,
|
|
46
|
-
"workflow-compliance/repeated-fail-storage-lifecycle-a": 0.0,
|
|
47
|
-
};
|
|
48
|
-
process.stderr.write(`\n=== RESULTS vs 2026-05-03 BASELINE ===\n`);
|
|
49
|
-
for (const t of report.tasks ?? []) {
|
|
50
|
-
const rate = t.akm?.passRate ?? 0;
|
|
51
|
-
const base = BASELINE[t.id] ?? null;
|
|
52
|
-
const bar = "█".repeat(Math.round(rate * 5)) + "░".repeat(5 - Math.round(rate * 5));
|
|
53
|
-
const deltaStr = base !== null
|
|
54
|
-
? (() => {
|
|
55
|
-
const d = rate - base;
|
|
56
|
-
const arrow = d > 0 ? "↑" : d < 0 ? "↓" : "=";
|
|
57
|
-
return d !== 0 ? ` (${arrow}${Math.abs(d * 100).toFixed(0)}pp)` : " (=)";
|
|
58
|
-
})()
|
|
59
|
-
: "";
|
|
60
|
-
process.stderr.write(`${t.id.padEnd(52)} ${(rate * 100).toFixed(0).padStart(3)}% ${bar}${deltaStr}\n`);
|
|
61
|
-
}
|
|
62
|
-
process.stderr.write(`\nOverall: ${((report.aggregateAkm?.passRate ?? 0) * 100).toFixed(1)}%\n`);
|