akm-cli 0.7.0 → 0.7.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +8 -8
- package/dist/tests/add-website-source.test.js +0 -119
- package/dist/tests/agent/agent-config-loader.test.js +0 -70
- package/dist/tests/agent/agent-config.test.js +0 -221
- package/dist/tests/agent/agent-detect.test.js +0 -100
- package/dist/tests/agent/agent-spawn.test.js +0 -234
- package/dist/tests/agent-output.test.js +0 -186
- package/dist/tests/architecture/agent-no-llm-sdk-guard.test.js +0 -103
- package/dist/tests/architecture/agent-spawn-seam.test.js +0 -193
- package/dist/tests/architecture/llm-stateless-seam.test.js +0 -112
- package/dist/tests/asset-ref.test.js +0 -192
- package/dist/tests/asset-registry.test.js +0 -103
- package/dist/tests/asset-spec.test.js +0 -241
- package/dist/tests/bench/attribution.test.js +0 -996
- package/dist/tests/bench/cleanup-sigint.test.js +0 -83
- package/dist/tests/bench/cleanup.js +0 -234
- package/dist/tests/bench/cleanup.test.js +0 -166
- package/dist/tests/bench/cli.js +0 -1018
- package/dist/tests/bench/cli.test.js +0 -445
- package/dist/tests/bench/compare.test.js +0 -556
- package/dist/tests/bench/corpus.js +0 -317
- package/dist/tests/bench/corpus.test.js +0 -258
- package/dist/tests/bench/doctor.js +0 -525
- package/dist/tests/bench/driver.js +0 -401
- package/dist/tests/bench/driver.test.js +0 -584
- package/dist/tests/bench/environment.js +0 -233
- package/dist/tests/bench/environment.test.js +0 -199
- package/dist/tests/bench/evolve-metrics.js +0 -179
- package/dist/tests/bench/evolve-metrics.test.js +0 -187
- package/dist/tests/bench/evolve.js +0 -647
- package/dist/tests/bench/evolve.test.js +0 -624
- package/dist/tests/bench/failure-modes.test.js +0 -349
- package/dist/tests/bench/feedback-integrity.test.js +0 -457
- package/dist/tests/bench/leakage.test.js +0 -228
- package/dist/tests/bench/learning-curve.test.js +0 -134
- package/dist/tests/bench/metrics.js +0 -2395
- package/dist/tests/bench/metrics.test.js +0 -1150
- package/dist/tests/bench/no-os-tmpdir-invariant.test.js +0 -43
- package/dist/tests/bench/opencode-config.js +0 -194
- package/dist/tests/bench/opencode-config.test.js +0 -370
- package/dist/tests/bench/report.js +0 -1885
- package/dist/tests/bench/report.test.js +0 -1038
- package/dist/tests/bench/run-config.js +0 -355
- package/dist/tests/bench/run-config.test.js +0 -298
- package/dist/tests/bench/run-curate-test.js +0 -32
- package/dist/tests/bench/run-failing-tasks.js +0 -56
- package/dist/tests/bench/run-full-bench.js +0 -51
- package/dist/tests/bench/run-items36-targeted.js +0 -69
- package/dist/tests/bench/run-nano-quick.js +0 -42
- package/dist/tests/bench/run-waveg-targeted.js +0 -62
- package/dist/tests/bench/runner.js +0 -699
- package/dist/tests/bench/runner.test.js +0 -958
- package/dist/tests/bench/search-bridge.test.js +0 -331
- package/dist/tests/bench/tmp.js +0 -131
- package/dist/tests/bench/trajectory.js +0 -116
- package/dist/tests/bench/trajectory.test.js +0 -127
- package/dist/tests/bench/verifier.js +0 -114
- package/dist/tests/bench/verifier.test.js +0 -118
- package/dist/tests/bench/workflow-evaluator.js +0 -557
- package/dist/tests/bench/workflow-evaluator.test.js +0 -421
- package/dist/tests/bench/workflow-spec.js +0 -345
- package/dist/tests/bench/workflow-spec.test.js +0 -363
- package/dist/tests/bench/workflow-trace.js +0 -472
- package/dist/tests/bench/workflow-trace.test.js +0 -254
- package/dist/tests/benchmark-search-quality.js +0 -536
- package/dist/tests/benchmark-suite.js +0 -1441
- package/dist/tests/capture-cli.test.js +0 -112
- package/dist/tests/cli-errors.test.js +0 -204
- package/dist/tests/commands/events.test.js +0 -370
- package/dist/tests/commands/history.test.js +0 -418
- package/dist/tests/commands/import.test.js +0 -103
- package/dist/tests/commands/proposal-cli.test.js +0 -209
- package/dist/tests/commands/reflect-propose-cli.test.js +0 -333
- package/dist/tests/commands/remember.test.js +0 -97
- package/dist/tests/commands/scope-flags.test.js +0 -300
- package/dist/tests/commands/search.test.js +0 -537
- package/dist/tests/commands/show-indexer-parity.test.js +0 -117
- package/dist/tests/commands/show.test.js +0 -294
- package/dist/tests/common.test.js +0 -266
- package/dist/tests/completions.test.js +0 -142
- package/dist/tests/config-cli.test.js +0 -193
- package/dist/tests/config-llm-features.test.js +0 -139
- package/dist/tests/config.test.js +0 -569
- package/dist/tests/contracts/migration-baseline.test.js +0 -43
- package/dist/tests/contracts/reflect-propose-envelope.test.js +0 -139
- package/dist/tests/contracts/spec-helpers.js +0 -46
- package/dist/tests/contracts/v1-spec-section-11-proposal-queue.test.js +0 -228
- package/dist/tests/contracts/v1-spec-section-12-agent-config.test.js +0 -56
- package/dist/tests/contracts/v1-spec-section-13-lesson-type.test.js +0 -34
- package/dist/tests/contracts/v1-spec-section-14-llm-features.test.js +0 -94
- package/dist/tests/contracts/v1-spec-section-4-1-asset-types.test.js +0 -39
- package/dist/tests/contracts/v1-spec-section-4-2-quality-rules.test.js +0 -44
- package/dist/tests/contracts/v1-spec-section-5-configuration.test.js +0 -47
- package/dist/tests/contracts/v1-spec-section-6-orchestration.test.js +0 -40
- package/dist/tests/contracts/v1-spec-section-7-module-layout.test.js +0 -58
- package/dist/tests/contracts/v1-spec-section-8-extension-points.test.js +0 -34
- package/dist/tests/contracts/v1-spec-section-9-4-cli-surface.test.js +0 -75
- package/dist/tests/contracts/v1-spec-section-9-7-llm-agent-boundary.test.js +0 -36
- package/dist/tests/core/write-source.test.js +0 -366
- package/dist/tests/curate-command.test.js +0 -87
- package/dist/tests/db-scoring.test.js +0 -201
- package/dist/tests/db.test.js +0 -654
- package/dist/tests/distill-cli-flag.test.js +0 -208
- package/dist/tests/distill.test.js +0 -515
- package/dist/tests/docker-install.test.js +0 -120
- package/dist/tests/e2e.test.js +0 -1419
- package/dist/tests/embedder.test.js +0 -340
- package/dist/tests/embedding-model-config.test.js +0 -379
- package/dist/tests/feedback-command.test.js +0 -172
- package/dist/tests/file-context.test.js +0 -552
- package/dist/tests/fixtures/scripts/git/summarize-diff.js +0 -9
- package/dist/tests/fixtures/scripts/lint/eslint-check.js +0 -7
- package/dist/tests/fixtures/stashes/load.js +0 -166
- package/dist/tests/fixtures/stashes/load.test.js +0 -97
- package/dist/tests/fixtures/stashes/ranking-baseline/scripts/mem0-search.js +0 -12
- package/dist/tests/frontmatter.test.js +0 -190
- package/dist/tests/fts-field-weighting.test.js +0 -254
- package/dist/tests/fuzzy-search.test.js +0 -230
- package/dist/tests/git-provider-clone.test.js +0 -45
- package/dist/tests/github.test.js +0 -161
- package/dist/tests/graph-boost-ranking.test.js +0 -305
- package/dist/tests/graph-extraction.test.js +0 -282
- package/dist/tests/helpers/usage-events.js +0 -8
- package/dist/tests/index-pass-llm.test.js +0 -161
- package/dist/tests/indexer.test.js +0 -570
- package/dist/tests/info-command.test.js +0 -166
- package/dist/tests/init.test.js +0 -69
- package/dist/tests/install-script.test.js +0 -246
- package/dist/tests/integration/agent-real-profile.test.js +0 -94
- package/dist/tests/issue-36-repro.test.js +0 -304
- package/dist/tests/issues-191-194.test.js +0 -160
- package/dist/tests/lesson-lint.test.js +0 -111
- package/dist/tests/llm-client.test.js +0 -115
- package/dist/tests/llm-feature-gate.test.js +0 -151
- package/dist/tests/llm.test.js +0 -139
- package/dist/tests/lockfile.test.js +0 -216
- package/dist/tests/manifest.test.js +0 -205
- package/dist/tests/markdown.test.js +0 -126
- package/dist/tests/matchers-unit.test.js +0 -189
- package/dist/tests/memory-inference.test.js +0 -299
- package/dist/tests/merge-scoring.test.js +0 -136
- package/dist/tests/metadata.test.js +0 -313
- package/dist/tests/migration-help.test.js +0 -89
- package/dist/tests/origin-resolve.test.js +0 -124
- package/dist/tests/output-baseline.test.js +0 -218
- package/dist/tests/output-shapes-unit.test.js +0 -478
- package/dist/tests/parallel-search.test.js +0 -272
- package/dist/tests/parameter-metadata.test.js +0 -365
- package/dist/tests/paths.test.js +0 -177
- package/dist/tests/progressive-disclosure.test.js +0 -280
- package/dist/tests/proposals.test.js +0 -279
- package/dist/tests/proposed-quality.test.js +0 -271
- package/dist/tests/provider-registry.test.js +0 -32
- package/dist/tests/ranking-regression.test.js +0 -548
- package/dist/tests/reflect-propose.test.js +0 -455
- package/dist/tests/registry-build-index.test.js +0 -394
- package/dist/tests/registry-cli.test.js +0 -290
- package/dist/tests/registry-index-v2.test.js +0 -430
- package/dist/tests/registry-install.test.js +0 -728
- package/dist/tests/registry-providers/parity.test.js +0 -189
- package/dist/tests/registry-providers/skills-sh.test.js +0 -309
- package/dist/tests/registry-providers/static-index.test.js +0 -238
- package/dist/tests/registry-resolve.test.js +0 -126
- package/dist/tests/registry-search.test.js +0 -923
- package/dist/tests/remember-frontmatter.test.js +0 -378
- package/dist/tests/remember-unit.test.js +0 -123
- package/dist/tests/ripgrep-install.test.js +0 -251
- package/dist/tests/ripgrep-resolve.test.js +0 -108
- package/dist/tests/ripgrep.test.js +0 -163
- package/dist/tests/save-command.test.js +0 -94
- package/dist/tests/save-trust-qa-fixes.test.js +0 -270
- package/dist/tests/scoring-pipeline.test.js +0 -648
- package/dist/tests/search-include-proposed-cli.test.js +0 -118
- package/dist/tests/self-update.test.js +0 -442
- package/dist/tests/semantic-search-e2e.test.js +0 -512
- package/dist/tests/semantic-status.test.js +0 -471
- package/dist/tests/setup-run.integration.js +0 -877
- package/dist/tests/setup-wizard.test.js +0 -198
- package/dist/tests/setup.test.js +0 -131
- package/dist/tests/source-add.test.js +0 -11
- package/dist/tests/source-clone.test.js +0 -254
- package/dist/tests/source-manage.test.js +0 -366
- package/dist/tests/source-providers/filesystem.test.js +0 -82
- package/dist/tests/source-providers/git.test.js +0 -252
- package/dist/tests/source-providers/website.test.js +0 -128
- package/dist/tests/source-qa-fixes.test.js +0 -286
- package/dist/tests/source-registry.test.js +0 -350
- package/dist/tests/source-resolve.test.js +0 -100
- package/dist/tests/source-source.test.js +0 -281
- package/dist/tests/source.test.js +0 -533
- package/dist/tests/tar-utils-scan.test.js +0 -73
- package/dist/tests/toggle-components.test.js +0 -73
- package/dist/tests/usage-telemetry.test.js +0 -265
- package/dist/tests/utility-scoring.test.js +0 -558
- package/dist/tests/vault-load-error.test.js +0 -78
- package/dist/tests/vault-qa-fixes.test.js +0 -194
- package/dist/tests/vault.test.js +0 -429
- package/dist/tests/vector-search.test.js +0 -608
- package/dist/tests/walker.test.js +0 -252
- package/dist/tests/wave2-cluster-bc.test.js +0 -228
- package/dist/tests/wave2-cluster-d.test.js +0 -180
- package/dist/tests/wave2-cluster-e.test.js +0 -179
- package/dist/tests/wiki-qa-fixes.test.js +0 -270
- package/dist/tests/wiki.test.js +0 -529
- package/dist/tests/workflow-cli.test.js +0 -271
- package/dist/tests/workflow-markdown.test.js +0 -171
- package/dist/tests/workflow-path-escape.test.js +0 -132
- package/dist/tests/workflow-qa-fixes.test.js +0 -395
- package/dist/tests/workflows/indexer-rejection.test.js +0 -213
- /package/dist/{src/cli.js → cli.js} +0 -0
- /package/dist/{src/commands → commands}/completions.js +0 -0
- /package/dist/{src/commands → commands}/config-cli.js +0 -0
- /package/dist/{src/commands → commands}/curate.js +0 -0
- /package/dist/{src/commands → commands}/distill.js +0 -0
- /package/dist/{src/commands → commands}/events.js +0 -0
- /package/dist/{src/commands → commands}/history.js +0 -0
- /package/dist/{src/commands → commands}/info.js +0 -0
- /package/dist/{src/commands → commands}/init.js +0 -0
- /package/dist/{src/commands → commands}/install-audit.js +0 -0
- /package/dist/{src/commands → commands}/installed-stashes.js +0 -0
- /package/dist/{src/commands → commands}/migration-help.js +0 -0
- /package/dist/{src/commands → commands}/proposal.js +0 -0
- /package/dist/{src/commands → commands}/propose.js +0 -0
- /package/dist/{src/commands → commands}/reflect.js +0 -0
- /package/dist/{src/commands → commands}/registry-search.js +0 -0
- /package/dist/{src/commands → commands}/remember.js +0 -0
- /package/dist/{src/commands → commands}/search.js +0 -0
- /package/dist/{src/commands → commands}/self-update.js +0 -0
- /package/dist/{src/commands → commands}/show.js +0 -0
- /package/dist/{src/commands → commands}/source-add.js +0 -0
- /package/dist/{src/commands → commands}/source-clone.js +0 -0
- /package/dist/{src/commands → commands}/source-manage.js +0 -0
- /package/dist/{src/commands → commands}/vault.js +0 -0
- /package/dist/{src/core → core}/asset-ref.js +0 -0
- /package/dist/{src/core → core}/asset-registry.js +0 -0
- /package/dist/{src/core → core}/asset-spec.js +0 -0
- /package/dist/{src/core → core}/common.js +0 -0
- /package/dist/{src/core → core}/config.js +0 -0
- /package/dist/{src/core → core}/errors.js +0 -0
- /package/dist/{src/core → core}/events.js +0 -0
- /package/dist/{src/core → core}/frontmatter.js +0 -0
- /package/dist/{src/core → core}/lesson-lint.js +0 -0
- /package/dist/{src/core → core}/markdown.js +0 -0
- /package/dist/{src/core → core}/paths.js +0 -0
- /package/dist/{src/core → core}/proposals.js +0 -0
- /package/dist/{src/core → core}/warn.js +0 -0
- /package/dist/{src/core → core}/write-source.js +0 -0
- /package/dist/{src/indexer → indexer}/db-search.js +0 -0
- /package/dist/{src/indexer → indexer}/db.js +0 -0
- /package/dist/{src/indexer → indexer}/file-context.js +0 -0
- /package/dist/{src/indexer → indexer}/graph-boost.js +0 -0
- /package/dist/{src/indexer → indexer}/graph-extraction.js +0 -0
- /package/dist/{src/indexer → indexer}/indexer.js +0 -0
- /package/dist/{src/indexer → indexer}/manifest.js +0 -0
- /package/dist/{src/indexer → indexer}/matchers.js +0 -0
- /package/dist/{src/indexer → indexer}/memory-inference.js +0 -0
- /package/dist/{src/indexer → indexer}/metadata.js +0 -0
- /package/dist/{src/indexer → indexer}/search-fields.js +0 -0
- /package/dist/{src/indexer → indexer}/search-source.js +0 -0
- /package/dist/{src/indexer → indexer}/semantic-status.js +0 -0
- /package/dist/{src/indexer → indexer}/usage-events.js +0 -0
- /package/dist/{src/indexer → indexer}/walker.js +0 -0
- /package/dist/{src/integrations → integrations}/agent/config.js +0 -0
- /package/dist/{src/integrations → integrations}/agent/detect.js +0 -0
- /package/dist/{src/integrations → integrations}/agent/index.js +0 -0
- /package/dist/{src/integrations → integrations}/agent/profiles.js +0 -0
- /package/dist/{src/integrations → integrations}/agent/prompts.js +0 -0
- /package/dist/{src/integrations → integrations}/agent/spawn.js +0 -0
- /package/dist/{src/integrations → integrations}/github.js +0 -0
- /package/dist/{src/integrations → integrations}/lockfile.js +0 -0
- /package/dist/{src/llm → llm}/client.js +0 -0
- /package/dist/{src/llm → llm}/embedder.js +0 -0
- /package/dist/{src/llm → llm}/embedders/cache.js +0 -0
- /package/dist/{src/llm → llm}/embedders/local.js +0 -0
- /package/dist/{src/llm → llm}/embedders/remote.js +0 -0
- /package/dist/{src/llm → llm}/embedders/types.js +0 -0
- /package/dist/{src/llm → llm}/feature-gate.js +0 -0
- /package/dist/{src/llm → llm}/graph-extract.js +0 -0
- /package/dist/{src/llm → llm}/index-passes.js +0 -0
- /package/dist/{src/llm → llm}/memory-infer.js +0 -0
- /package/dist/{src/llm → llm}/metadata-enhance.js +0 -0
- /package/dist/{src/output → output}/cli-hints.js +0 -0
- /package/dist/{src/output → output}/context.js +0 -0
- /package/dist/{src/output → output}/renderers.js +0 -0
- /package/dist/{src/output → output}/shapes.js +0 -0
- /package/dist/{src/output → output}/text.js +0 -0
- /package/dist/{src/registry → registry}/build-index.js +0 -0
- /package/dist/{src/registry → registry}/create-provider-registry.js +0 -0
- /package/dist/{src/registry → registry}/factory.js +0 -0
- /package/dist/{src/registry → registry}/origin-resolve.js +0 -0
- /package/dist/{src/registry → registry}/providers/index.js +0 -0
- /package/dist/{src/registry → registry}/providers/skills-sh.js +0 -0
- /package/dist/{src/registry → registry}/providers/static-index.js +0 -0
- /package/dist/{src/registry → registry}/providers/types.js +0 -0
- /package/dist/{src/registry → registry}/resolve.js +0 -0
- /package/dist/{src/registry → registry}/types.js +0 -0
- /package/dist/{src/setup → setup}/detect.js +0 -0
- /package/dist/{src/setup → setup}/ripgrep-install.js +0 -0
- /package/dist/{src/setup → setup}/ripgrep-resolve.js +0 -0
- /package/dist/{src/setup → setup}/setup.js +0 -0
- /package/dist/{src/setup → setup}/steps.js +0 -0
- /package/dist/{src/sources → sources}/include.js +0 -0
- /package/dist/{src/sources → sources}/provider-factory.js +0 -0
- /package/dist/{src/sources → sources}/provider.js +0 -0
- /package/dist/{src/sources → sources}/providers/filesystem.js +0 -0
- /package/dist/{src/sources → sources}/providers/git.js +0 -0
- /package/dist/{src/sources → sources}/providers/index.js +0 -0
- /package/dist/{src/sources → sources}/providers/install-types.js +0 -0
- /package/dist/{src/sources → sources}/providers/npm.js +0 -0
- /package/dist/{src/sources → sources}/providers/provider-utils.js +0 -0
- /package/dist/{src/sources → sources}/providers/sync-from-ref.js +0 -0
- /package/dist/{src/sources → sources}/providers/tar-utils.js +0 -0
- /package/dist/{src/sources → sources}/providers/website.js +0 -0
- /package/dist/{src/sources → sources}/resolve.js +0 -0
- /package/dist/{src/sources → sources}/types.js +0 -0
- /package/dist/{src/templates → templates}/wiki-templates.js +0 -0
- /package/dist/{src/version.js → version.js} +0 -0
- /package/dist/{src/wiki → wiki}/wiki.js +0 -0
- /package/dist/{src/workflows → workflows}/authoring.js +0 -0
- /package/dist/{src/workflows → workflows}/cli.js +0 -0
- /package/dist/{src/workflows → workflows}/db.js +0 -0
- /package/dist/{src/workflows → workflows}/document-cache.js +0 -0
- /package/dist/{src/workflows → workflows}/parser.js +0 -0
- /package/dist/{src/workflows → workflows}/renderer.js +0 -0
- /package/dist/{src/workflows → workflows}/runs.js +0 -0
- /package/dist/{src/workflows → workflows}/schema.js +0 -0
- /package/dist/{src/workflows → workflows}/validator.js +0 -0
|
@@ -1,1441 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env bun
|
|
2
|
-
/**
|
|
3
|
-
* Comprehensive benchmark suite for akm search system.
|
|
4
|
-
*
|
|
5
|
-
* Standalone script (NOT a bun:test suite) that covers:
|
|
6
|
-
* 1. Search Quality (MRR, Recall@5, Recall@10)
|
|
7
|
-
* 2. Search Performance (latency in ms)
|
|
8
|
-
* 3. Indexing Performance (time in ms)
|
|
9
|
-
* 4. Token Efficiency (byte savings %)
|
|
10
|
-
* 5. Utility Scoring (M-2)
|
|
11
|
-
* 6. Feature Correctness
|
|
12
|
-
*
|
|
13
|
-
* Usage:
|
|
14
|
-
* bun run tests/benchmark-suite.ts
|
|
15
|
-
* bun run tests/benchmark-suite.ts --json # machine-readable output only
|
|
16
|
-
*/
|
|
17
|
-
import fs from "node:fs";
|
|
18
|
-
import os from "node:os";
|
|
19
|
-
import path from "node:path";
|
|
20
|
-
import { assembleInfo } from "../src/commands/info";
|
|
21
|
-
import { akmSearch } from "../src/commands/search";
|
|
22
|
-
import { saveConfig } from "../src/core/config";
|
|
23
|
-
import { getDbPath } from "../src/core/paths";
|
|
24
|
-
import { closeDatabase, openDatabase, rebuildFts, upsertUtilityScore } from "../src/indexer/db";
|
|
25
|
-
import { recomputeUtilityScores } from "../src/indexer/indexer";
|
|
26
|
-
import { buildSearchFields } from "../src/indexer/search-fields";
|
|
27
|
-
import { insertUsageEvent } from "../src/indexer/usage-events";
|
|
28
|
-
import { recordUsageEvent } from "./helpers/usage-events";
|
|
29
|
-
// ── CLI flags ────────────────────────────────────────────────────────────────
|
|
30
|
-
const jsonOnly = process.argv.includes("--json");
|
|
31
|
-
function log(msg) {
|
|
32
|
-
if (!jsonOnly)
|
|
33
|
-
process.stderr.write(msg);
|
|
34
|
-
}
|
|
35
|
-
// ── Environment isolation ────────────────────────────────────────────────────
|
|
36
|
-
const tmpRoot = fs.mkdtempSync(path.join(os.tmpdir(), "akm-benchsuite-"));
|
|
37
|
-
const testCacheDir = path.join(tmpRoot, "cache");
|
|
38
|
-
const testConfigDir = path.join(tmpRoot, "config");
|
|
39
|
-
fs.mkdirSync(testCacheDir, { recursive: true });
|
|
40
|
-
fs.mkdirSync(testConfigDir, { recursive: true });
|
|
41
|
-
const origXdgCache = process.env.XDG_CACHE_HOME;
|
|
42
|
-
const origXdgConfig = process.env.XDG_CONFIG_HOME;
|
|
43
|
-
const origStashDir = process.env.AKM_STASH_DIR;
|
|
44
|
-
process.env.XDG_CACHE_HOME = testCacheDir;
|
|
45
|
-
process.env.XDG_CONFIG_HOME = testConfigDir;
|
|
46
|
-
function cleanup() {
|
|
47
|
-
if (origXdgCache === undefined)
|
|
48
|
-
delete process.env.XDG_CACHE_HOME;
|
|
49
|
-
else
|
|
50
|
-
process.env.XDG_CACHE_HOME = origXdgCache;
|
|
51
|
-
if (origXdgConfig === undefined)
|
|
52
|
-
delete process.env.XDG_CONFIG_HOME;
|
|
53
|
-
else
|
|
54
|
-
process.env.XDG_CONFIG_HOME = origXdgConfig;
|
|
55
|
-
if (origStashDir === undefined)
|
|
56
|
-
delete process.env.AKM_STASH_DIR;
|
|
57
|
-
else
|
|
58
|
-
process.env.AKM_STASH_DIR = origStashDir;
|
|
59
|
-
fs.rmSync(tmpRoot, { recursive: true, force: true });
|
|
60
|
-
}
|
|
61
|
-
// ── Asset definitions (30+ assets) ───────────────────────────────────────────
|
|
62
|
-
const ASSETS = [
|
|
63
|
-
// ── 5 Skills (varying metadata quality) ──
|
|
64
|
-
{
|
|
65
|
-
dir: "skills/k8s-deploy",
|
|
66
|
-
filename: "SKILL.md",
|
|
67
|
-
fileContent: "# Kubernetes Deployment\n\nDeploy applications to Kubernetes clusters using kubectl.\n",
|
|
68
|
-
stashEntry: {
|
|
69
|
-
name: "k8s-deploy",
|
|
70
|
-
type: "skill",
|
|
71
|
-
description: "Deploy applications to Kubernetes clusters",
|
|
72
|
-
tags: ["kubernetes", "deploy", "k8s", "containers"],
|
|
73
|
-
searchHints: ["deploy to kubernetes", "kubectl apply", "container orchestration"],
|
|
74
|
-
aliases: ["kube-deploy"],
|
|
75
|
-
filename: "SKILL.md",
|
|
76
|
-
quality: "curated",
|
|
77
|
-
confidence: 0.95,
|
|
78
|
-
},
|
|
79
|
-
},
|
|
80
|
-
{
|
|
81
|
-
dir: "skills/code-review",
|
|
82
|
-
filename: "SKILL.md",
|
|
83
|
-
fileContent: "# Code Review\n\nReview pull requests for code quality and best practices.\n",
|
|
84
|
-
stashEntry: {
|
|
85
|
-
name: "code-review",
|
|
86
|
-
type: "skill",
|
|
87
|
-
description: "Review code for quality issues and best practices",
|
|
88
|
-
tags: ["review", "quality", "pull-request"],
|
|
89
|
-
searchHints: ["review pull request", "check code quality"],
|
|
90
|
-
filename: "SKILL.md",
|
|
91
|
-
quality: "curated",
|
|
92
|
-
confidence: 0.9,
|
|
93
|
-
},
|
|
94
|
-
},
|
|
95
|
-
{
|
|
96
|
-
dir: "skills/api-design",
|
|
97
|
-
filename: "SKILL.md",
|
|
98
|
-
fileContent: "# API Design\n\nDesign RESTful APIs following best practices.\n",
|
|
99
|
-
stashEntry: {
|
|
100
|
-
name: "api-design",
|
|
101
|
-
type: "skill",
|
|
102
|
-
description: "Design RESTful APIs with OpenAPI specifications",
|
|
103
|
-
tags: ["api", "rest", "openapi", "design"],
|
|
104
|
-
searchHints: ["design a REST API", "create API specification"],
|
|
105
|
-
filename: "SKILL.md",
|
|
106
|
-
quality: "curated",
|
|
107
|
-
confidence: 0.9,
|
|
108
|
-
},
|
|
109
|
-
},
|
|
110
|
-
{
|
|
111
|
-
dir: "skills/refactor",
|
|
112
|
-
filename: "SKILL.md",
|
|
113
|
-
fileContent: "# Code Refactoring\n\nRefactor code to improve readability and performance.\n",
|
|
114
|
-
stashEntry: {
|
|
115
|
-
name: "refactor",
|
|
116
|
-
type: "skill",
|
|
117
|
-
description: "Refactor code to improve structure and maintainability",
|
|
118
|
-
tags: ["refactor", "clean-code", "maintenance"],
|
|
119
|
-
searchHints: ["improve code structure", "clean up codebase"],
|
|
120
|
-
filename: "SKILL.md",
|
|
121
|
-
// Sparse metadata — no quality or confidence
|
|
122
|
-
},
|
|
123
|
-
},
|
|
124
|
-
{
|
|
125
|
-
dir: "skills/security-audit",
|
|
126
|
-
filename: "SKILL.md",
|
|
127
|
-
fileContent: "# Security Audit\n\nAudit applications for security vulnerabilities.\n",
|
|
128
|
-
stashEntry: {
|
|
129
|
-
name: "security-audit",
|
|
130
|
-
type: "skill",
|
|
131
|
-
description: "Audit code and infrastructure for security vulnerabilities",
|
|
132
|
-
tags: ["security", "audit", "vulnerability", "pentest"],
|
|
133
|
-
searchHints: ["find security vulnerabilities", "security scan"],
|
|
134
|
-
filename: "SKILL.md",
|
|
135
|
-
quality: "generated",
|
|
136
|
-
confidence: 0.6,
|
|
137
|
-
},
|
|
138
|
-
},
|
|
139
|
-
// ── 5 Commands with $ARGUMENTS parameters ──
|
|
140
|
-
{
|
|
141
|
-
dir: "commands",
|
|
142
|
-
filename: "test-runner.md",
|
|
143
|
-
fileContent: "---\ndescription: Run test suites across the project\nparams:\n suite: Test suite to run\n---\n# Test Runner\n\nRun $ARGUMENTS tests.\n",
|
|
144
|
-
stashEntry: {
|
|
145
|
-
name: "test-runner",
|
|
146
|
-
type: "command",
|
|
147
|
-
description: "Run test suites across the project",
|
|
148
|
-
tags: ["test", "testing", "ci", "runner"],
|
|
149
|
-
searchHints: ["run tests", "execute test suite"],
|
|
150
|
-
filename: "test-runner.md",
|
|
151
|
-
parameters: [{ name: "ARGUMENTS", description: "test suite path or pattern" }],
|
|
152
|
-
},
|
|
153
|
-
},
|
|
154
|
-
{
|
|
155
|
-
dir: "commands",
|
|
156
|
-
filename: "lint-check.md",
|
|
157
|
-
fileContent: "---\ndescription: Run linting checks on the codebase\n---\n# Lint Check\n\nRun lint on $ARGUMENTS.\n",
|
|
158
|
-
stashEntry: {
|
|
159
|
-
name: "lint-check",
|
|
160
|
-
type: "command",
|
|
161
|
-
description: "Run linting checks on the codebase",
|
|
162
|
-
tags: ["lint", "eslint", "code-quality"],
|
|
163
|
-
searchHints: ["lint code", "check for style issues"],
|
|
164
|
-
filename: "lint-check.md",
|
|
165
|
-
parameters: [{ name: "ARGUMENTS", description: "files to lint" }],
|
|
166
|
-
},
|
|
167
|
-
},
|
|
168
|
-
{
|
|
169
|
-
dir: "commands",
|
|
170
|
-
filename: "git-summary.md",
|
|
171
|
-
fileContent: "---\ndescription: Summarize recent git changes\n---\n# Git Summary\n\nSummarize $ARGUMENTS git log.\n",
|
|
172
|
-
stashEntry: {
|
|
173
|
-
name: "git-summary",
|
|
174
|
-
type: "command",
|
|
175
|
-
description: "Summarize recent git changes and commit history",
|
|
176
|
-
tags: ["git", "summary", "changelog"],
|
|
177
|
-
searchHints: ["summarize git commits", "show recent changes"],
|
|
178
|
-
filename: "git-summary.md",
|
|
179
|
-
parameters: [{ name: "ARGUMENTS", description: "branch or date range" }],
|
|
180
|
-
},
|
|
181
|
-
},
|
|
182
|
-
{
|
|
183
|
-
dir: "commands",
|
|
184
|
-
filename: "deploy-status.md",
|
|
185
|
-
fileContent: "---\ndescription: Check deployment status\n---\n# Deploy Status\n\nCheck $ARGUMENTS deployment status.\n",
|
|
186
|
-
stashEntry: {
|
|
187
|
-
name: "deploy-status",
|
|
188
|
-
type: "command",
|
|
189
|
-
description: "Check the current deployment status of services",
|
|
190
|
-
tags: ["deploy", "status", "monitoring"],
|
|
191
|
-
searchHints: ["check deployment", "is service deployed"],
|
|
192
|
-
filename: "deploy-status.md",
|
|
193
|
-
parameters: [{ name: "ARGUMENTS", description: "service name" }],
|
|
194
|
-
},
|
|
195
|
-
},
|
|
196
|
-
{
|
|
197
|
-
dir: "commands",
|
|
198
|
-
filename: "docker-build.md",
|
|
199
|
-
fileContent: "---\ndescription: Build Docker images from Dockerfile\nparams:\n image: Docker image name and tag\n context: Build context directory\n---\n# Docker Build\n\nBuild docker image $1 from $2.\n",
|
|
200
|
-
stashEntry: {
|
|
201
|
-
name: "docker-build",
|
|
202
|
-
type: "command",
|
|
203
|
-
description: "Build Docker images from Dockerfile",
|
|
204
|
-
tags: ["docker", "build", "image", "containers"],
|
|
205
|
-
searchHints: ["build docker image", "create container image"],
|
|
206
|
-
filename: "docker-build.md",
|
|
207
|
-
parameters: [
|
|
208
|
-
{ name: "image", description: "Docker image name and tag" },
|
|
209
|
-
{ name: "context", description: "Build context directory" },
|
|
210
|
-
],
|
|
211
|
-
intent: { when: "Need to build a container image", input: "Dockerfile path", output: "Built image" },
|
|
212
|
-
},
|
|
213
|
-
},
|
|
214
|
-
// ── 5 Scripts with @param JSDoc ──
|
|
215
|
-
{
|
|
216
|
-
dir: "scripts/pg-backup",
|
|
217
|
-
filename: "pg-backup.sh",
|
|
218
|
-
fileContent: '#!/bin/bash\n# @param {string} database - PostgreSQL database name\n# @param {string} output - Output file path for the dump\n# Backup PostgreSQL database\npg_dump "$1" > "$2"\n',
|
|
219
|
-
stashEntry: {
|
|
220
|
-
name: "pg-backup",
|
|
221
|
-
type: "script",
|
|
222
|
-
description: "Backup PostgreSQL database to a SQL dump file",
|
|
223
|
-
tags: ["database", "backup", "postgresql", "postgres"],
|
|
224
|
-
searchHints: ["backup database", "export postgres data", "pg_dump"],
|
|
225
|
-
filename: "pg-backup.sh",
|
|
226
|
-
parameters: [
|
|
227
|
-
{ name: "database", type: "string", description: "PostgreSQL database name" },
|
|
228
|
-
{ name: "output", type: "string", description: "Output file path for the dump" },
|
|
229
|
-
],
|
|
230
|
-
},
|
|
231
|
-
},
|
|
232
|
-
{
|
|
233
|
-
dir: "scripts/docker-clean",
|
|
234
|
-
filename: "docker-clean.sh",
|
|
235
|
-
fileContent: "#!/bin/bash\n# @param {string} filter - Optional image filter pattern\n# Clean up Docker resources\ndocker system prune -af\n",
|
|
236
|
-
stashEntry: {
|
|
237
|
-
name: "docker-clean",
|
|
238
|
-
type: "script",
|
|
239
|
-
description: "Clean up unused Docker images, containers, and volumes",
|
|
240
|
-
tags: ["docker", "cleanup", "containers"],
|
|
241
|
-
searchHints: ["clean docker", "remove unused images"],
|
|
242
|
-
filename: "docker-clean.sh",
|
|
243
|
-
parameters: [{ name: "filter", type: "string", description: "Optional image filter pattern" }],
|
|
244
|
-
},
|
|
245
|
-
},
|
|
246
|
-
{
|
|
247
|
-
dir: "scripts/ssl-renew",
|
|
248
|
-
filename: "ssl-renew.sh",
|
|
249
|
-
fileContent: "#!/bin/bash\n# @param {string} domain - Domain name for certificate renewal\n# Renew SSL certificates\ncertbot renew --domain $1\n",
|
|
250
|
-
stashEntry: {
|
|
251
|
-
name: "ssl-renew",
|
|
252
|
-
type: "script",
|
|
253
|
-
description: "Renew SSL/TLS certificates using certbot",
|
|
254
|
-
tags: ["ssl", "tls", "certificate", "certbot"],
|
|
255
|
-
searchHints: ["renew certificates", "ssl renewal"],
|
|
256
|
-
filename: "ssl-renew.sh",
|
|
257
|
-
parameters: [{ name: "domain", type: "string", description: "Domain name for certificate renewal" }],
|
|
258
|
-
},
|
|
259
|
-
},
|
|
260
|
-
{
|
|
261
|
-
dir: "scripts/log-rotate",
|
|
262
|
-
filename: "log-rotate.sh",
|
|
263
|
-
fileContent: "#!/bin/bash\n# @param {number} days - Number of days to keep logs\n# Rotate application logs\nlogrotate /etc/logrotate.conf\n",
|
|
264
|
-
stashEntry: {
|
|
265
|
-
name: "log-rotate",
|
|
266
|
-
type: "script",
|
|
267
|
-
description: "Rotate and compress application log files",
|
|
268
|
-
tags: ["logs", "rotation", "maintenance"],
|
|
269
|
-
searchHints: ["rotate logs", "compress old logs"],
|
|
270
|
-
filename: "log-rotate.sh",
|
|
271
|
-
parameters: [{ name: "days", type: "number", description: "Number of days to keep logs" }],
|
|
272
|
-
},
|
|
273
|
-
},
|
|
274
|
-
{
|
|
275
|
-
dir: "scripts/env-setup",
|
|
276
|
-
filename: "env-setup.sh",
|
|
277
|
-
fileContent: "#!/bin/bash\n# @param {string} environment - Target environment (dev, staging, prod)\n# Set up development environment\nnpm install && cp .env.example .env\n",
|
|
278
|
-
stashEntry: {
|
|
279
|
-
name: "env-setup",
|
|
280
|
-
type: "script",
|
|
281
|
-
description: "Set up local development environment with dependencies",
|
|
282
|
-
tags: ["setup", "environment", "development", "onboarding"],
|
|
283
|
-
searchHints: ["set up dev environment", "install dependencies"],
|
|
284
|
-
filename: "env-setup.sh",
|
|
285
|
-
parameters: [{ name: "environment", type: "string", description: "Target environment (dev, staging, prod)" }],
|
|
286
|
-
},
|
|
287
|
-
},
|
|
288
|
-
// ── 5 Knowledge docs (some with deep TOC, some minimal) ──
|
|
289
|
-
{
|
|
290
|
-
dir: "knowledge",
|
|
291
|
-
filename: "architecture-guide.md",
|
|
292
|
-
fileContent: "---\ndescription: System architecture overview\n---\n# Architecture Guide\n\n## Microservices\n\nOverview of service boundaries.\n\n## Data Flow\n\nHow data moves through the system.\n\n## Database Schema\n\nRelational model overview.\n\n## API Gateway\n\nRouting and authentication.\n",
|
|
293
|
-
stashEntry: {
|
|
294
|
-
name: "architecture-guide",
|
|
295
|
-
type: "knowledge",
|
|
296
|
-
description: "System architecture overview and design decisions",
|
|
297
|
-
tags: ["architecture", "design", "microservices"],
|
|
298
|
-
searchHints: ["system architecture", "how the system works"],
|
|
299
|
-
filename: "architecture-guide.md",
|
|
300
|
-
},
|
|
301
|
-
},
|
|
302
|
-
{
|
|
303
|
-
dir: "knowledge",
|
|
304
|
-
filename: "runbook-incidents.md",
|
|
305
|
-
fileContent: "---\ndescription: Incident response runbook\n---\n# Incident Runbook\n\n## Severity Levels\n\n## Escalation\n\n## Post-mortem\n",
|
|
306
|
-
stashEntry: {
|
|
307
|
-
name: "runbook-incidents",
|
|
308
|
-
type: "knowledge",
|
|
309
|
-
description: "Incident response procedures and escalation paths",
|
|
310
|
-
tags: ["incident", "runbook", "on-call", "ops"],
|
|
311
|
-
searchHints: ["handle incident", "escalation procedure"],
|
|
312
|
-
filename: "runbook-incidents.md",
|
|
313
|
-
},
|
|
314
|
-
},
|
|
315
|
-
{
|
|
316
|
-
dir: "knowledge",
|
|
317
|
-
filename: "coding-standards.md",
|
|
318
|
-
fileContent: "---\ndescription: Team coding standards\n---\n# Coding Standards\n\n## Naming Conventions\n\n## Error Handling\n\n## Testing Requirements\n",
|
|
319
|
-
stashEntry: {
|
|
320
|
-
name: "coding-standards",
|
|
321
|
-
type: "knowledge",
|
|
322
|
-
description: "Team coding standards and conventions",
|
|
323
|
-
tags: ["standards", "conventions", "style-guide"],
|
|
324
|
-
searchHints: ["coding style", "naming conventions"],
|
|
325
|
-
filename: "coding-standards.md",
|
|
326
|
-
},
|
|
327
|
-
},
|
|
328
|
-
{
|
|
329
|
-
dir: "knowledge",
|
|
330
|
-
filename: "onboarding.md",
|
|
331
|
-
fileContent: "---\ndescription: New team member onboarding guide\n---\n# Onboarding Guide\n\n## First Day\n\n## Access Setup\n\n## Development Environment\n\n## Team Norms\n\n## Resources\n",
|
|
332
|
-
stashEntry: {
|
|
333
|
-
name: "onboarding",
|
|
334
|
-
type: "knowledge",
|
|
335
|
-
description: "New team member onboarding guide with checklists",
|
|
336
|
-
tags: ["onboarding", "new-hire", "team"],
|
|
337
|
-
searchHints: ["new team member", "getting started"],
|
|
338
|
-
filename: "onboarding.md",
|
|
339
|
-
},
|
|
340
|
-
},
|
|
341
|
-
{
|
|
342
|
-
dir: "knowledge",
|
|
343
|
-
filename: "troubleshooting.md",
|
|
344
|
-
fileContent: "---\ndescription: Common troubleshooting steps\n---\n# Troubleshooting\n\nBasic debugging tips.\n",
|
|
345
|
-
stashEntry: {
|
|
346
|
-
name: "troubleshooting",
|
|
347
|
-
type: "knowledge",
|
|
348
|
-
description: "Common troubleshooting steps for production issues",
|
|
349
|
-
tags: ["troubleshooting", "debugging", "production"],
|
|
350
|
-
searchHints: ["debug production issue", "common errors"],
|
|
351
|
-
filename: "troubleshooting.md",
|
|
352
|
-
},
|
|
353
|
-
},
|
|
354
|
-
// ── 5 Agents ──
|
|
355
|
-
{
|
|
356
|
-
dir: "agents",
|
|
357
|
-
filename: "devops-engineer.md",
|
|
358
|
-
fileContent: "---\ndescription: DevOps engineering agent\n---\nYou are a DevOps engineer specializing in CI/CD pipelines and infrastructure automation.\n",
|
|
359
|
-
stashEntry: {
|
|
360
|
-
name: "devops-engineer",
|
|
361
|
-
type: "agent",
|
|
362
|
-
description: "DevOps engineering agent for CI/CD and infrastructure",
|
|
363
|
-
tags: ["devops", "ci-cd", "infrastructure", "automation"],
|
|
364
|
-
searchHints: ["automate infrastructure", "CI/CD pipeline"],
|
|
365
|
-
filename: "devops-engineer.md",
|
|
366
|
-
},
|
|
367
|
-
},
|
|
368
|
-
{
|
|
369
|
-
dir: "agents",
|
|
370
|
-
filename: "data-analyst.md",
|
|
371
|
-
fileContent: "---\ndescription: Data analysis agent\n---\nYou are a data analyst who helps explore datasets and generate insights.\n",
|
|
372
|
-
stashEntry: {
|
|
373
|
-
name: "data-analyst",
|
|
374
|
-
type: "agent",
|
|
375
|
-
description: "Data analysis agent for exploring datasets and generating insights",
|
|
376
|
-
tags: ["data", "analysis", "statistics", "insights"],
|
|
377
|
-
searchHints: ["analyze data", "generate reports"],
|
|
378
|
-
filename: "data-analyst.md",
|
|
379
|
-
},
|
|
380
|
-
},
|
|
381
|
-
{
|
|
382
|
-
dir: "agents",
|
|
383
|
-
filename: "technical-writer.md",
|
|
384
|
-
fileContent: "---\ndescription: Technical writing agent\n---\nYou are a technical writer who creates clear documentation.\n",
|
|
385
|
-
stashEntry: {
|
|
386
|
-
name: "technical-writer",
|
|
387
|
-
type: "agent",
|
|
388
|
-
description: "Technical writing agent for creating documentation",
|
|
389
|
-
tags: ["documentation", "writing", "technical"],
|
|
390
|
-
searchHints: ["write documentation", "create technical docs"],
|
|
391
|
-
filename: "technical-writer.md",
|
|
392
|
-
},
|
|
393
|
-
},
|
|
394
|
-
{
|
|
395
|
-
dir: "agents",
|
|
396
|
-
filename: "frontend-dev.md",
|
|
397
|
-
fileContent: "---\ndescription: Frontend development agent\n---\nYou are a frontend developer specializing in React and TypeScript.\n",
|
|
398
|
-
stashEntry: {
|
|
399
|
-
name: "frontend-dev",
|
|
400
|
-
type: "agent",
|
|
401
|
-
description: "Frontend development agent specializing in React and TypeScript",
|
|
402
|
-
tags: ["frontend", "react", "typescript", "ui"],
|
|
403
|
-
searchHints: ["build React component", "frontend development"],
|
|
404
|
-
filename: "frontend-dev.md",
|
|
405
|
-
},
|
|
406
|
-
},
|
|
407
|
-
{
|
|
408
|
-
dir: "agents",
|
|
409
|
-
filename: "dba-specialist.md",
|
|
410
|
-
fileContent: "---\ndescription: Database administration specialist\n---\nYou are a DBA specialist who optimizes queries and manages schemas.\n",
|
|
411
|
-
stashEntry: {
|
|
412
|
-
name: "dba-specialist",
|
|
413
|
-
type: "agent",
|
|
414
|
-
description: "Database administration specialist for query optimization",
|
|
415
|
-
tags: ["database", "sql", "optimization", "dba"],
|
|
416
|
-
searchHints: ["optimize database query", "schema management"],
|
|
417
|
-
filename: "dba-specialist.md",
|
|
418
|
-
},
|
|
419
|
-
},
|
|
420
|
-
// ── 5 Assets with overlapping terms in different fields (field weighting tests) ──
|
|
421
|
-
{
|
|
422
|
-
dir: "skills/deploy-helper",
|
|
423
|
-
filename: "SKILL.md",
|
|
424
|
-
fileContent: "# Deploy Helper\n\nHelps with deployment workflows.\n",
|
|
425
|
-
stashEntry: {
|
|
426
|
-
name: "deploy-helper",
|
|
427
|
-
type: "skill",
|
|
428
|
-
description: "Assists with deployment workflow automation and rollbacks",
|
|
429
|
-
tags: ["workflow", "automation", "rollback"],
|
|
430
|
-
searchHints: ["automate deployment workflow"],
|
|
431
|
-
filename: "SKILL.md",
|
|
432
|
-
// Name contains "deploy" -- should rank higher for "deploy" than
|
|
433
|
-
// assets that only have "deploy" in description or tags
|
|
434
|
-
},
|
|
435
|
-
},
|
|
436
|
-
{
|
|
437
|
-
dir: "knowledge",
|
|
438
|
-
filename: "deploy-checklist.md",
|
|
439
|
-
fileContent: "---\ndescription: Pre-deployment checklist for production releases\n---\n# Pre-deployment Checklist\n\n## Steps\n\n1. Run tests\n2. Review changes\n",
|
|
440
|
-
stashEntry: {
|
|
441
|
-
name: "deploy-checklist",
|
|
442
|
-
type: "knowledge",
|
|
443
|
-
description: "Pre-deployment checklist for production releases",
|
|
444
|
-
tags: ["checklist", "production", "release"],
|
|
445
|
-
filename: "deploy-checklist.md",
|
|
446
|
-
// Name also contains "deploy" in name field
|
|
447
|
-
},
|
|
448
|
-
},
|
|
449
|
-
{
|
|
450
|
-
dir: "scripts/metrics-collector",
|
|
451
|
-
filename: "metrics-collector.sh",
|
|
452
|
-
fileContent: "#!/bin/bash\n# Collect deployment metrics from monitoring API\ncurl http://metrics.internal/deploy\n",
|
|
453
|
-
stashEntry: {
|
|
454
|
-
name: "metrics-collector",
|
|
455
|
-
type: "script",
|
|
456
|
-
description: "Collect deployment metrics from monitoring infrastructure",
|
|
457
|
-
tags: ["metrics", "monitoring", "deploy"],
|
|
458
|
-
searchHints: ["collect metrics"],
|
|
459
|
-
filename: "metrics-collector.sh",
|
|
460
|
-
// "deploy" only in tags and description, NOT in name
|
|
461
|
-
},
|
|
462
|
-
},
|
|
463
|
-
{
|
|
464
|
-
dir: "commands",
|
|
465
|
-
filename: "health-check.md",
|
|
466
|
-
fileContent: "---\ndescription: Run health checks against deployed services\n---\n# Health Check\n\nCheck service health after deployment.\n",
|
|
467
|
-
stashEntry: {
|
|
468
|
-
name: "health-check",
|
|
469
|
-
type: "command",
|
|
470
|
-
description: "Run health checks against deployed services",
|
|
471
|
-
tags: ["health", "monitoring", "services"],
|
|
472
|
-
searchHints: ["check service health", "verify deployment"],
|
|
473
|
-
filename: "health-check.md",
|
|
474
|
-
// "deploy" only in description and hints, NOT in name or tags
|
|
475
|
-
},
|
|
476
|
-
},
|
|
477
|
-
{
|
|
478
|
-
dir: "knowledge",
|
|
479
|
-
filename: "monitoring-guide.md",
|
|
480
|
-
fileContent: "---\ndescription: Guide to monitoring deployed applications\n---\n# Monitoring Guide\n\n## Alerting\n\n## Dashboards\n\n## Incident Response\n",
|
|
481
|
-
stashEntry: {
|
|
482
|
-
name: "monitoring-guide",
|
|
483
|
-
type: "knowledge",
|
|
484
|
-
description: "Guide to monitoring deployed applications and setting up alerts",
|
|
485
|
-
tags: ["monitoring", "alerting", "dashboards", "observability"],
|
|
486
|
-
filename: "monitoring-guide.md",
|
|
487
|
-
// "deploy" only in description content
|
|
488
|
-
},
|
|
489
|
-
},
|
|
490
|
-
];
|
|
491
|
-
// ── Stash creation ───────────────────────────────────────────────────────────
|
|
492
|
-
function createBenchmarkStash() {
|
|
493
|
-
const stashDir = path.join(tmpRoot, "stash");
|
|
494
|
-
for (const sub of ["skills", "commands", "agents", "knowledge", "scripts"]) {
|
|
495
|
-
fs.mkdirSync(path.join(stashDir, sub), { recursive: true });
|
|
496
|
-
}
|
|
497
|
-
for (const asset of ASSETS) {
|
|
498
|
-
const dirPath = path.join(stashDir, asset.dir);
|
|
499
|
-
fs.mkdirSync(dirPath, { recursive: true });
|
|
500
|
-
fs.writeFileSync(path.join(dirPath, asset.filename), asset.fileContent);
|
|
501
|
-
const stashJsonPath = path.join(dirPath, ".stash.json");
|
|
502
|
-
let entries = [];
|
|
503
|
-
if (fs.existsSync(stashJsonPath)) {
|
|
504
|
-
const existing = JSON.parse(fs.readFileSync(stashJsonPath, "utf8"));
|
|
505
|
-
entries = existing.entries;
|
|
506
|
-
}
|
|
507
|
-
entries.push(asset.stashEntry);
|
|
508
|
-
fs.writeFileSync(stashJsonPath, JSON.stringify({ entries }, null, 2));
|
|
509
|
-
}
|
|
510
|
-
return stashDir;
|
|
511
|
-
}
|
|
512
|
-
// ── Git helpers ──────────────────────────────────────────────────────────────
|
|
513
|
-
function gitInfo() {
|
|
514
|
-
try {
|
|
515
|
-
const branch = Bun.spawnSync(["git", "rev-parse", "--abbrev-ref", "HEAD"], {
|
|
516
|
-
cwd: import.meta.dir,
|
|
517
|
-
})
|
|
518
|
-
.stdout.toString()
|
|
519
|
-
.trim();
|
|
520
|
-
const commit = Bun.spawnSync(["git", "rev-parse", "--short", "HEAD"], {
|
|
521
|
-
cwd: import.meta.dir,
|
|
522
|
-
})
|
|
523
|
-
.stdout.toString()
|
|
524
|
-
.trim();
|
|
525
|
-
return { branch, commit };
|
|
526
|
-
}
|
|
527
|
-
catch {
|
|
528
|
-
return { branch: "unknown", commit: "unknown" };
|
|
529
|
-
}
|
|
530
|
-
}
|
|
531
|
-
// ── Timing utility ───────────────────────────────────────────────────────────
|
|
532
|
-
function timeMs(fn) {
|
|
533
|
-
const t0 = performance.now();
|
|
534
|
-
fn();
|
|
535
|
-
return Math.round((performance.now() - t0) * 100) / 100;
|
|
536
|
-
}
|
|
537
|
-
async function timeMsAsync(fn) {
|
|
538
|
-
const t0 = performance.now();
|
|
539
|
-
await fn();
|
|
540
|
-
return Math.round((performance.now() - t0) * 100) / 100;
|
|
541
|
-
}
|
|
542
|
-
const QUALITY_QUERIES = [
|
|
543
|
-
// Exact keyword matches
|
|
544
|
-
{ id: "sq-01", query: "kubernetes", expectedName: "k8s-deploy", expectedType: "skill", aspect: "exact-keyword-tag" },
|
|
545
|
-
{
|
|
546
|
-
id: "sq-02",
|
|
547
|
-
query: "database backup",
|
|
548
|
-
expectedName: "pg-backup",
|
|
549
|
-
expectedType: "script",
|
|
550
|
-
aspect: "exact-keyword-desc-tag",
|
|
551
|
-
},
|
|
552
|
-
{
|
|
553
|
-
id: "sq-03",
|
|
554
|
-
query: "test runner",
|
|
555
|
-
expectedName: "test-runner",
|
|
556
|
-
expectedType: "command",
|
|
557
|
-
aspect: "exact-keyword-name",
|
|
558
|
-
},
|
|
559
|
-
{
|
|
560
|
-
id: "sq-04",
|
|
561
|
-
query: "security audit",
|
|
562
|
-
expectedName: "security-audit",
|
|
563
|
-
expectedType: "skill",
|
|
564
|
-
aspect: "exact-keyword-name",
|
|
565
|
-
},
|
|
566
|
-
// Partial/prefix matches (S-1 fuzzy search)
|
|
567
|
-
{
|
|
568
|
-
id: "sq-05",
|
|
569
|
-
query: "kube",
|
|
570
|
-
expectedName: "k8s-deploy",
|
|
571
|
-
expectedType: "skill",
|
|
572
|
-
aspect: "prefix-alias",
|
|
573
|
-
},
|
|
574
|
-
{
|
|
575
|
-
id: "sq-06",
|
|
576
|
-
query: "cert",
|
|
577
|
-
expectedName: "ssl-renew",
|
|
578
|
-
expectedType: "script",
|
|
579
|
-
aspect: "prefix-tag",
|
|
580
|
-
},
|
|
581
|
-
// Multi-word queries
|
|
582
|
-
{
|
|
583
|
-
id: "sq-07",
|
|
584
|
-
query: "ci cd pipeline",
|
|
585
|
-
expectedName: "devops-engineer",
|
|
586
|
-
expectedType: "agent",
|
|
587
|
-
aspect: "multi-word-tags",
|
|
588
|
-
},
|
|
589
|
-
{
|
|
590
|
-
id: "sq-08",
|
|
591
|
-
query: "code quality review",
|
|
592
|
-
expectedName: "code-review",
|
|
593
|
-
expectedType: "skill",
|
|
594
|
-
aspect: "multi-word-desc",
|
|
595
|
-
},
|
|
596
|
-
// Natural language intent queries
|
|
597
|
-
{
|
|
598
|
-
id: "sq-09",
|
|
599
|
-
query: "renew ssl certificate",
|
|
600
|
-
expectedName: "ssl-renew",
|
|
601
|
-
expectedType: "script",
|
|
602
|
-
aspect: "natural-language",
|
|
603
|
-
},
|
|
604
|
-
{
|
|
605
|
-
id: "sq-10",
|
|
606
|
-
query: "deploy to kubernetes",
|
|
607
|
-
expectedName: "k8s-deploy",
|
|
608
|
-
expectedType: "skill",
|
|
609
|
-
aspect: "natural-language-hint",
|
|
610
|
-
},
|
|
611
|
-
{
|
|
612
|
-
id: "sq-11",
|
|
613
|
-
query: "analyze data",
|
|
614
|
-
expectedName: "data-analyst",
|
|
615
|
-
expectedType: "agent",
|
|
616
|
-
aspect: "natural-language-hint",
|
|
617
|
-
},
|
|
618
|
-
// Cross-field matches (name match > description match)
|
|
619
|
-
{
|
|
620
|
-
id: "sq-12",
|
|
621
|
-
query: "deploy",
|
|
622
|
-
// k8s-deploy is a skill with "deploy" in tags/aliases; deploy-helper has it in name
|
|
623
|
-
// Both are valid top results — accept either at rank 1
|
|
624
|
-
expectedName: "k8s-deploy",
|
|
625
|
-
expectedType: "skill",
|
|
626
|
-
aspect: "field-weighting-name-vs-desc",
|
|
627
|
-
},
|
|
628
|
-
// Parameter-based discovery (I-2)
|
|
629
|
-
{
|
|
630
|
-
id: "sq-13",
|
|
631
|
-
query: "docker image",
|
|
632
|
-
expectedName: "docker-build",
|
|
633
|
-
expectedType: "command",
|
|
634
|
-
aspect: "parameter-discovery",
|
|
635
|
-
},
|
|
636
|
-
// Tag match specificity
|
|
637
|
-
{
|
|
638
|
-
id: "sq-14",
|
|
639
|
-
query: "docker",
|
|
640
|
-
// docker-build is a command with "docker" in name+tags; ranks above docker-clean (script)
|
|
641
|
-
// due to type boost (command > script)
|
|
642
|
-
expectedName: "docker-build",
|
|
643
|
-
expectedType: "command",
|
|
644
|
-
aspect: "tag-match",
|
|
645
|
-
},
|
|
646
|
-
// Description match
|
|
647
|
-
{
|
|
648
|
-
id: "sq-15",
|
|
649
|
-
query: "incident response",
|
|
650
|
-
expectedName: "runbook-incidents",
|
|
651
|
-
expectedType: "knowledge",
|
|
652
|
-
aspect: "desc-match",
|
|
653
|
-
},
|
|
654
|
-
];
|
|
655
|
-
async function benchmarkSearchQuality(_stashDir) {
|
|
656
|
-
log(" Running search quality benchmarks...\n");
|
|
657
|
-
const cases = [];
|
|
658
|
-
let sumRR = 0;
|
|
659
|
-
let in5 = 0;
|
|
660
|
-
let in10 = 0;
|
|
661
|
-
for (const q of QUALITY_QUERIES) {
|
|
662
|
-
const result = await akmSearch({ query: q.query, source: "stash", limit: 20 });
|
|
663
|
-
const hits = result.hits.filter((h) => h.type !== "registry");
|
|
664
|
-
const idx = hits.findIndex((h) => h.name === q.expectedName);
|
|
665
|
-
const rank = idx >= 0 ? idx + 1 : null;
|
|
666
|
-
const rr = rank !== null ? 1 / rank : 0;
|
|
667
|
-
sumRR += rr;
|
|
668
|
-
if (rank !== null && rank <= 5)
|
|
669
|
-
in5++;
|
|
670
|
-
if (rank !== null && rank <= 10)
|
|
671
|
-
in10++;
|
|
672
|
-
const passed = rank !== null && rank <= 5;
|
|
673
|
-
cases.push({
|
|
674
|
-
id: q.id,
|
|
675
|
-
scenario: "search_quality",
|
|
676
|
-
description: `${q.aspect}: "${q.query}" -> ${q.expectedName}`,
|
|
677
|
-
passed,
|
|
678
|
-
metric: rank ?? -1,
|
|
679
|
-
unit: "rank",
|
|
680
|
-
details: rank !== null ? `Rank ${rank}` : "MISS (not in results)",
|
|
681
|
-
});
|
|
682
|
-
}
|
|
683
|
-
const total = QUALITY_QUERIES.length;
|
|
684
|
-
const mrr = Math.round((sumRR / total) * 10000) / 10000;
|
|
685
|
-
const recall_at_5 = Math.round((in5 / total) * 10000) / 10000;
|
|
686
|
-
const recall_at_10 = Math.round((in10 / total) * 10000) / 10000;
|
|
687
|
-
return { mrr, recall_at_5, recall_at_10, cases };
|
|
688
|
-
}
|
|
689
|
-
// ── Scenario 2: Search Performance ───────────────────────────────────────────
|
|
690
|
-
async function benchmarkSearchPerformance(_stashDir) {
|
|
691
|
-
log(" Running search performance benchmarks...\n");
|
|
692
|
-
const cases = [];
|
|
693
|
-
// Cold search (first query after process start -- index already warm from quality tests,
|
|
694
|
-
// but this is the first timing of this specific query)
|
|
695
|
-
const coldMs = await timeMsAsync(async () => {
|
|
696
|
-
await akmSearch({ query: "infrastructure automation pipeline", source: "stash", limit: 20 });
|
|
697
|
-
});
|
|
698
|
-
cases.push({
|
|
699
|
-
id: "sp-01",
|
|
700
|
-
scenario: "search_performance",
|
|
701
|
-
description: "Cold search (first query with this text)",
|
|
702
|
-
passed: coldMs < 500,
|
|
703
|
-
metric: coldMs,
|
|
704
|
-
unit: "ms",
|
|
705
|
-
});
|
|
706
|
-
// Warm search (repeated query -- FTS cache warm)
|
|
707
|
-
const warmMs = await timeMsAsync(async () => {
|
|
708
|
-
await akmSearch({ query: "infrastructure automation pipeline", source: "stash", limit: 20 });
|
|
709
|
-
});
|
|
710
|
-
cases.push({
|
|
711
|
-
id: "sp-02",
|
|
712
|
-
scenario: "search_performance",
|
|
713
|
-
description: "Warm search (repeated query)",
|
|
714
|
-
passed: warmMs < 200,
|
|
715
|
-
metric: warmMs,
|
|
716
|
-
unit: "ms",
|
|
717
|
-
});
|
|
718
|
-
// FTS-only search (semantic search disabled in config)
|
|
719
|
-
const ftsMs = await timeMsAsync(async () => {
|
|
720
|
-
await akmSearch({ query: "deploy kubernetes containers", source: "stash", limit: 20 });
|
|
721
|
-
});
|
|
722
|
-
cases.push({
|
|
723
|
-
id: "sp-03",
|
|
724
|
-
scenario: "search_performance",
|
|
725
|
-
description: "FTS-only search (no embeddings)",
|
|
726
|
-
passed: ftsMs < 200,
|
|
727
|
-
metric: ftsMs,
|
|
728
|
-
unit: "ms",
|
|
729
|
-
});
|
|
730
|
-
// Large result set (empty query returns all entries)
|
|
731
|
-
const largeMs = await timeMsAsync(async () => {
|
|
732
|
-
await akmSearch({ query: "", source: "stash", limit: 100 });
|
|
733
|
-
});
|
|
734
|
-
cases.push({
|
|
735
|
-
id: "sp-04",
|
|
736
|
-
scenario: "search_performance",
|
|
737
|
-
description: "Large result set (all assets)",
|
|
738
|
-
passed: largeMs < 500,
|
|
739
|
-
metric: largeMs,
|
|
740
|
-
unit: "ms",
|
|
741
|
-
});
|
|
742
|
-
return {
|
|
743
|
-
cold_ms: coldMs,
|
|
744
|
-
warm_ms: warmMs,
|
|
745
|
-
fts_only_ms: ftsMs,
|
|
746
|
-
large_result_ms: largeMs,
|
|
747
|
-
cases,
|
|
748
|
-
};
|
|
749
|
-
}
|
|
750
|
-
// ── Scenario 3: Indexing Performance ─────────────────────────────────────────
|
|
751
|
-
async function benchmarkIndexingPerformance(stashDir) {
|
|
752
|
-
log(" Running indexing performance benchmarks...\n");
|
|
753
|
-
const cases = [];
|
|
754
|
-
// Import akmIndex locally to avoid any caching issues
|
|
755
|
-
const { akmIndex } = await import("../src/indexer/indexer.js");
|
|
756
|
-
// Full index (fresh rebuild)
|
|
757
|
-
const fullMs = await timeMsAsync(async () => {
|
|
758
|
-
await akmIndex({ stashDir, full: true });
|
|
759
|
-
});
|
|
760
|
-
cases.push({
|
|
761
|
-
id: "ip-01",
|
|
762
|
-
scenario: "indexing_performance",
|
|
763
|
-
description: "Fresh full index (empty DB)",
|
|
764
|
-
passed: fullMs < 5000,
|
|
765
|
-
metric: fullMs,
|
|
766
|
-
unit: "ms",
|
|
767
|
-
});
|
|
768
|
-
// Incremental index (nothing changed)
|
|
769
|
-
const incrMs = await timeMsAsync(async () => {
|
|
770
|
-
await akmIndex({ stashDir, full: false });
|
|
771
|
-
});
|
|
772
|
-
cases.push({
|
|
773
|
-
id: "ip-02",
|
|
774
|
-
scenario: "indexing_performance",
|
|
775
|
-
description: "Incremental index (no changes)",
|
|
776
|
-
passed: incrMs < fullMs,
|
|
777
|
-
metric: incrMs,
|
|
778
|
-
unit: "ms",
|
|
779
|
-
details: `Should be faster than full (${fullMs}ms)`,
|
|
780
|
-
});
|
|
781
|
-
// FTS rebuild time
|
|
782
|
-
const dbPath = getDbPath();
|
|
783
|
-
const db = openDatabase(dbPath);
|
|
784
|
-
let ftsMs = 0;
|
|
785
|
-
let utilMs = 0;
|
|
786
|
-
try {
|
|
787
|
-
ftsMs = timeMs(() => {
|
|
788
|
-
rebuildFts(db);
|
|
789
|
-
});
|
|
790
|
-
cases.push({
|
|
791
|
-
id: "ip-03",
|
|
792
|
-
scenario: "indexing_performance",
|
|
793
|
-
description: "FTS rebuild time",
|
|
794
|
-
passed: ftsMs < 500,
|
|
795
|
-
metric: ftsMs,
|
|
796
|
-
unit: "ms",
|
|
797
|
-
});
|
|
798
|
-
// recomputeUtilityScores time
|
|
799
|
-
utilMs = timeMs(() => {
|
|
800
|
-
recomputeUtilityScores(db);
|
|
801
|
-
});
|
|
802
|
-
cases.push({
|
|
803
|
-
id: "ip-04",
|
|
804
|
-
scenario: "indexing_performance",
|
|
805
|
-
description: "recomputeUtilityScores time",
|
|
806
|
-
passed: utilMs < 200,
|
|
807
|
-
metric: utilMs,
|
|
808
|
-
unit: "ms",
|
|
809
|
-
});
|
|
810
|
-
}
|
|
811
|
-
finally {
|
|
812
|
-
closeDatabase(db);
|
|
813
|
-
}
|
|
814
|
-
return {
|
|
815
|
-
full_ms: fullMs,
|
|
816
|
-
incremental_ms: incrMs,
|
|
817
|
-
fts_rebuild_ms: ftsMs,
|
|
818
|
-
recompute_utility_ms: utilMs,
|
|
819
|
-
cases,
|
|
820
|
-
};
|
|
821
|
-
}
|
|
822
|
-
// ── Scenario 4: Token Efficiency ─────────────────────────────────────────────
|
|
823
|
-
async function benchmarkTokenEfficiency(stashDir) {
|
|
824
|
-
log(" Running token efficiency benchmarks...\n");
|
|
825
|
-
const cases = [];
|
|
826
|
-
// Summary vs full: measure JSON output size
|
|
827
|
-
// We simulate by calling akmSearch with the same query and comparing what
|
|
828
|
-
// a "full" vs "summary" response would look like in terms of the show output.
|
|
829
|
-
// Since we cannot easily call the CLI with --detail, we measure the search
|
|
830
|
-
// result in different output scenarios.
|
|
831
|
-
const fullResult = await akmSearch({ query: "deploy", source: "stash", limit: 10 });
|
|
832
|
-
const fullJson = JSON.stringify(fullResult);
|
|
833
|
-
const fullBytes = Buffer.byteLength(fullJson);
|
|
834
|
-
// Build a summary-equivalent by stripping content fields
|
|
835
|
-
const summaryResult = {
|
|
836
|
-
...fullResult,
|
|
837
|
-
hits: fullResult.hits.map((h) => {
|
|
838
|
-
const { path: _p, ...minimal } = h;
|
|
839
|
-
return {
|
|
840
|
-
name: minimal.name,
|
|
841
|
-
type: minimal.type,
|
|
842
|
-
description: minimal.description,
|
|
843
|
-
ref: h.ref,
|
|
844
|
-
};
|
|
845
|
-
}),
|
|
846
|
-
};
|
|
847
|
-
const summaryJson = JSON.stringify(summaryResult);
|
|
848
|
-
const summaryBytes = Buffer.byteLength(summaryJson);
|
|
849
|
-
const summarySavingsPct = Math.round(((fullBytes - summaryBytes) / fullBytes) * 100);
|
|
850
|
-
cases.push({
|
|
851
|
-
id: "te-01",
|
|
852
|
-
scenario: "token_efficiency",
|
|
853
|
-
description: "Summary vs full search output savings",
|
|
854
|
-
passed: summarySavingsPct > 10,
|
|
855
|
-
metric: summarySavingsPct,
|
|
856
|
-
unit: "%",
|
|
857
|
-
details: `Full: ${fullBytes}B, Summary: ${summaryBytes}B`,
|
|
858
|
-
});
|
|
859
|
-
// Manifest output size per N assets
|
|
860
|
-
const { akmManifest } = await import("../src/indexer/manifest.js");
|
|
861
|
-
const manifest = await akmManifest({ stashDir });
|
|
862
|
-
const manifestJson = JSON.stringify(manifest);
|
|
863
|
-
const manifestBytes = Buffer.byteLength(manifestJson);
|
|
864
|
-
const bytesPerAsset = manifest.entries.length > 0 ? Math.round(manifestBytes / manifest.entries.length) : 0;
|
|
865
|
-
cases.push({
|
|
866
|
-
id: "te-02",
|
|
867
|
-
scenario: "token_efficiency",
|
|
868
|
-
description: "Manifest bytes per asset",
|
|
869
|
-
passed: bytesPerAsset < 200,
|
|
870
|
-
metric: bytesPerAsset,
|
|
871
|
-
unit: "bytes/asset",
|
|
872
|
-
details: `Total: ${manifestBytes}B for ${manifest.entries.length} assets`,
|
|
873
|
-
});
|
|
874
|
-
// --for-agent output size vs normal: for-agent strips paths, editHints, etc.
|
|
875
|
-
const normalHits = fullResult.hits;
|
|
876
|
-
const normalJson = JSON.stringify(normalHits);
|
|
877
|
-
const forAgentHits = normalHits.map((h) => ({
|
|
878
|
-
type: h.type,
|
|
879
|
-
name: h.name,
|
|
880
|
-
ref: h.ref,
|
|
881
|
-
description: h.description,
|
|
882
|
-
action: h.action,
|
|
883
|
-
score: h.score,
|
|
884
|
-
}));
|
|
885
|
-
const forAgentJson = JSON.stringify(forAgentHits);
|
|
886
|
-
const forAgentSavings = Math.round(((Buffer.byteLength(normalJson) - Buffer.byteLength(forAgentJson)) / Buffer.byteLength(normalJson)) * 100);
|
|
887
|
-
cases.push({
|
|
888
|
-
id: "te-03",
|
|
889
|
-
scenario: "token_efficiency",
|
|
890
|
-
description: "--for-agent output size savings vs normal",
|
|
891
|
-
passed: forAgentSavings > 10,
|
|
892
|
-
metric: forAgentSavings,
|
|
893
|
-
unit: "%",
|
|
894
|
-
});
|
|
895
|
-
// --format jsonl size vs json (JSONL has less overhead for arrays)
|
|
896
|
-
const jsonlOutput = normalHits.map((h) => JSON.stringify(h)).join("\n");
|
|
897
|
-
const jsonlBytes = Buffer.byteLength(jsonlOutput);
|
|
898
|
-
const jsonBytes = Buffer.byteLength(JSON.stringify(normalHits));
|
|
899
|
-
const jsonlSavingsPct = Math.round(((jsonBytes - jsonlBytes) / jsonBytes) * 100);
|
|
900
|
-
cases.push({
|
|
901
|
-
id: "te-04",
|
|
902
|
-
scenario: "token_efficiency",
|
|
903
|
-
description: "JSONL vs JSON format size",
|
|
904
|
-
// JSONL typically has slightly less overhead (no outer brackets + commas)
|
|
905
|
-
// but can be slightly larger too, so we just report
|
|
906
|
-
passed: true,
|
|
907
|
-
metric: jsonlSavingsPct,
|
|
908
|
-
unit: "%",
|
|
909
|
-
details: `JSON: ${jsonBytes}B, JSONL: ${jsonlBytes}B`,
|
|
910
|
-
});
|
|
911
|
-
return {
|
|
912
|
-
summary_savings_pct: summarySavingsPct,
|
|
913
|
-
manifest_bytes_per_asset: bytesPerAsset,
|
|
914
|
-
for_agent_savings_pct: forAgentSavings,
|
|
915
|
-
jsonl_savings_pct: jsonlSavingsPct,
|
|
916
|
-
cases,
|
|
917
|
-
};
|
|
918
|
-
}
|
|
919
|
-
// ── Scenario 5: Utility Scoring ──────────────────────────────────────────────
|
|
920
|
-
async function benchmarkUtilityScoring(_stashDir) {
|
|
921
|
-
log(" Running utility scoring benchmarks...\n");
|
|
922
|
-
const cases = [];
|
|
923
|
-
const dbPath = getDbPath();
|
|
924
|
-
// Test 1: Fresh index with no usage data — all scores should be baseline (no utility boost)
|
|
925
|
-
{
|
|
926
|
-
const result = await akmSearch({ query: "deploy", source: "stash", limit: 20 });
|
|
927
|
-
const localHits = result.hits.filter((h) => h.type !== "registry");
|
|
928
|
-
const hasUtilityBoost = localHits.some((h) => h.whyMatched?.includes("usage history boost"));
|
|
929
|
-
cases.push({
|
|
930
|
-
id: "us-01",
|
|
931
|
-
scenario: "utility_scoring",
|
|
932
|
-
description: "Fresh index has no utility boosts",
|
|
933
|
-
passed: !hasUtilityBoost,
|
|
934
|
-
metric: hasUtilityBoost ? 1 : 0,
|
|
935
|
-
unit: "boosted_count",
|
|
936
|
-
});
|
|
937
|
-
}
|
|
938
|
-
// Test 2: After simulated usage events, boosted entry ranks higher
|
|
939
|
-
let boostApplied = false;
|
|
940
|
-
{
|
|
941
|
-
const db = openDatabase(dbPath);
|
|
942
|
-
try {
|
|
943
|
-
// Find two entries that match the same query
|
|
944
|
-
const entries = db
|
|
945
|
-
.prepare("SELECT id, entry_key FROM entries WHERE entry_key LIKE '%deploy%' LIMIT 2")
|
|
946
|
-
.all();
|
|
947
|
-
if (entries.length >= 2) {
|
|
948
|
-
const boostedId = entries[0].id;
|
|
949
|
-
const _baselineId = entries[1].id;
|
|
950
|
-
// Record usage events for the boosted entry
|
|
951
|
-
for (let i = 0; i < 10; i++) {
|
|
952
|
-
recordUsageEvent(db, { eventType: "show", entryId: boostedId, timestamp: new Date().toISOString() });
|
|
953
|
-
recordUsageEvent(db, { eventType: "search", entryId: boostedId, timestamp: new Date().toISOString() });
|
|
954
|
-
}
|
|
955
|
-
// Recompute utility scores
|
|
956
|
-
recomputeUtilityScores(db);
|
|
957
|
-
// Verify the boosted entry now has a non-zero utility score
|
|
958
|
-
const score = db.prepare("SELECT utility FROM utility_scores WHERE entry_id = ?").get(boostedId);
|
|
959
|
-
boostApplied = (score?.utility ?? 0) > 0;
|
|
960
|
-
}
|
|
961
|
-
}
|
|
962
|
-
finally {
|
|
963
|
-
closeDatabase(db);
|
|
964
|
-
}
|
|
965
|
-
cases.push({
|
|
966
|
-
id: "us-02",
|
|
967
|
-
scenario: "utility_scoring",
|
|
968
|
-
description: "Usage events generate positive utility score",
|
|
969
|
-
passed: boostApplied,
|
|
970
|
-
});
|
|
971
|
-
}
|
|
972
|
-
// Test 3: Recency decay — old events contribute less
|
|
973
|
-
let decayWorks = false;
|
|
974
|
-
{
|
|
975
|
-
const db = openDatabase(dbPath);
|
|
976
|
-
try {
|
|
977
|
-
const entries = db.prepare("SELECT id FROM entries LIMIT 2").all();
|
|
978
|
-
if (entries.length >= 2) {
|
|
979
|
-
const recentId = entries[0].id;
|
|
980
|
-
const oldId = entries[1].id;
|
|
981
|
-
// Clear existing usage events and utility scores
|
|
982
|
-
db.exec("DELETE FROM usage_events");
|
|
983
|
-
db.exec("DELETE FROM utility_scores");
|
|
984
|
-
// Recent usage for entry 0
|
|
985
|
-
recordUsageEvent(db, { eventType: "show", entryId: recentId, timestamp: new Date().toISOString() });
|
|
986
|
-
recordUsageEvent(db, { eventType: "search", entryId: recentId, timestamp: new Date().toISOString() });
|
|
987
|
-
// Old usage for entry 1 (60 days ago)
|
|
988
|
-
const oldDate = new Date();
|
|
989
|
-
oldDate.setDate(oldDate.getDate() - 60);
|
|
990
|
-
recordUsageEvent(db, { eventType: "show", entryId: oldId, timestamp: oldDate.toISOString() });
|
|
991
|
-
recordUsageEvent(db, { eventType: "search", entryId: oldId, timestamp: oldDate.toISOString() });
|
|
992
|
-
recomputeUtilityScores(db);
|
|
993
|
-
const recentScore = db
|
|
994
|
-
.prepare("SELECT utility, last_used_at FROM utility_scores WHERE entry_id = ?")
|
|
995
|
-
.get(recentId);
|
|
996
|
-
const oldScore = db.prepare("SELECT utility, last_used_at FROM utility_scores WHERE entry_id = ?").get(oldId);
|
|
997
|
-
// Both should have the same utility score from recompute (based on select_rate),
|
|
998
|
-
// but the recency decay is applied at search time, not at recompute time.
|
|
999
|
-
// So we need to verify that the last_used_at timestamps differ.
|
|
1000
|
-
if (recentScore && oldScore) {
|
|
1001
|
-
const recentTs = new Date(recentScore.last_used_at).getTime();
|
|
1002
|
-
const oldTs = new Date(oldScore.last_used_at).getTime();
|
|
1003
|
-
decayWorks = recentTs > oldTs;
|
|
1004
|
-
}
|
|
1005
|
-
}
|
|
1006
|
-
}
|
|
1007
|
-
finally {
|
|
1008
|
-
closeDatabase(db);
|
|
1009
|
-
}
|
|
1010
|
-
cases.push({
|
|
1011
|
-
id: "us-03",
|
|
1012
|
-
scenario: "utility_scoring",
|
|
1013
|
-
description: "Recency decay: recent last_used_at vs old",
|
|
1014
|
-
passed: decayWorks,
|
|
1015
|
-
});
|
|
1016
|
-
}
|
|
1017
|
-
// Test 4: Utility cap — extreme utility doesn't over-boost (cap at 1.5x)
|
|
1018
|
-
let capWorks = false;
|
|
1019
|
-
{
|
|
1020
|
-
const db = openDatabase(dbPath);
|
|
1021
|
-
try {
|
|
1022
|
-
const entries = db.prepare("SELECT id FROM entries LIMIT 2").all();
|
|
1023
|
-
if (entries.length >= 2) {
|
|
1024
|
-
// Give extreme utility to first entry
|
|
1025
|
-
upsertUtilityScore(db, entries[0].id, {
|
|
1026
|
-
utility: 100.0, // Extreme
|
|
1027
|
-
showCount: 10000,
|
|
1028
|
-
searchCount: 10000,
|
|
1029
|
-
selectRate: 1.0,
|
|
1030
|
-
lastUsedAt: new Date().toISOString(),
|
|
1031
|
-
});
|
|
1032
|
-
// Give zero utility to second entry
|
|
1033
|
-
upsertUtilityScore(db, entries[1].id, {
|
|
1034
|
-
utility: 0,
|
|
1035
|
-
showCount: 0,
|
|
1036
|
-
searchCount: 0,
|
|
1037
|
-
selectRate: 0,
|
|
1038
|
-
});
|
|
1039
|
-
}
|
|
1040
|
-
}
|
|
1041
|
-
finally {
|
|
1042
|
-
closeDatabase(db);
|
|
1043
|
-
}
|
|
1044
|
-
// Search and check scores
|
|
1045
|
-
const result = await akmSearch({ query: "deploy", source: "stash", limit: 20 });
|
|
1046
|
-
const localHits = result.hits.filter((h) => h.type !== "registry");
|
|
1047
|
-
if (localHits.length >= 2) {
|
|
1048
|
-
const maxScore = localHits[0].score ?? 0;
|
|
1049
|
-
const minScore = localHits[localHits.length - 1].score ?? 0;
|
|
1050
|
-
// The ratio should be bounded (due to 1.5x cap)
|
|
1051
|
-
const ratio = minScore > 0 ? maxScore / minScore : 0;
|
|
1052
|
-
// Even with extreme utility, the max boost factor is 1.5x applied to base score.
|
|
1053
|
-
// With different base FTS scores the ratio can exceed 1.5, but
|
|
1054
|
-
// for same-content entries it should be <= ~1.55
|
|
1055
|
-
capWorks = ratio < 10; // Very generous bound; just verify no extreme blowup
|
|
1056
|
-
}
|
|
1057
|
-
cases.push({
|
|
1058
|
-
id: "us-04",
|
|
1059
|
-
scenario: "utility_scoring",
|
|
1060
|
-
description: "Utility cap prevents extreme score inflation",
|
|
1061
|
-
passed: capWorks,
|
|
1062
|
-
});
|
|
1063
|
-
}
|
|
1064
|
-
// Clean up utility data for other tests
|
|
1065
|
-
{
|
|
1066
|
-
const db = openDatabase(dbPath);
|
|
1067
|
-
try {
|
|
1068
|
-
db.exec("DELETE FROM usage_events");
|
|
1069
|
-
db.exec("DELETE FROM utility_scores");
|
|
1070
|
-
}
|
|
1071
|
-
finally {
|
|
1072
|
-
closeDatabase(db);
|
|
1073
|
-
}
|
|
1074
|
-
}
|
|
1075
|
-
return {
|
|
1076
|
-
baseline_no_usage: !!cases[0].passed, // pass means no boost = correct
|
|
1077
|
-
boost_applied: boostApplied,
|
|
1078
|
-
decay_works: decayWorks,
|
|
1079
|
-
cap_works: capWorks,
|
|
1080
|
-
cases,
|
|
1081
|
-
};
|
|
1082
|
-
}
|
|
1083
|
-
// ── Scenario 6: Feature Correctness ──────────────────────────────────────────
|
|
1084
|
-
async function benchmarkFeatureCorrectness(_stashDir) {
|
|
1085
|
-
log(" Running feature correctness benchmarks...\n");
|
|
1086
|
-
const cases = [];
|
|
1087
|
-
// Test 1: Fuzzy/prefix fallback triggers only when exact match returns 0
|
|
1088
|
-
let fuzzyWorks = false;
|
|
1089
|
-
{
|
|
1090
|
-
// "certb" has no exact FTS match but prefix "certb*" should match "certbot" (tag of ssl-renew)
|
|
1091
|
-
const exactResult = await akmSearch({ query: "certb", source: "stash", limit: 10 });
|
|
1092
|
-
const exactHits = exactResult.hits.filter((h) => h.type !== "registry");
|
|
1093
|
-
// FTS5 porter stemmer + prefix fallback should find ssl-renew via "certbot" tag
|
|
1094
|
-
fuzzyWorks = exactHits.some((h) => h.name === "ssl-renew");
|
|
1095
|
-
cases.push({
|
|
1096
|
-
id: "fc-01",
|
|
1097
|
-
scenario: "feature_correctness",
|
|
1098
|
-
description: "Fuzzy/prefix fallback finds 'ssl-renew' for query 'certb'",
|
|
1099
|
-
passed: fuzzyWorks,
|
|
1100
|
-
details: fuzzyWorks ? "Found via prefix expansion" : `Got: ${exactHits.map((h) => h.name).join(", ") || "none"}`,
|
|
1101
|
-
});
|
|
1102
|
-
}
|
|
1103
|
-
// Test 2: Field weighting — name match ranks higher than description match
|
|
1104
|
-
let fieldWeightingCorrect = false;
|
|
1105
|
-
{
|
|
1106
|
-
// Query "deploy" — assets with "deploy" in their name should rank above
|
|
1107
|
-
// those that only have "deploy" in description/tags
|
|
1108
|
-
const result = await akmSearch({ query: "deploy", source: "stash", limit: 20 });
|
|
1109
|
-
const hits = result.hits.filter((h) => h.type !== "registry");
|
|
1110
|
-
// Assets with "deploy" in name or aliases: k8s-deploy, deploy-helper, deploy-status, deploy-checklist
|
|
1111
|
-
const nameMatchAssets = ["k8s-deploy", "deploy-helper", "deploy-status", "deploy-checklist"];
|
|
1112
|
-
// Assets with "deploy" NOT in name but in desc/tags: metrics-collector, health-check, monitoring-guide
|
|
1113
|
-
const nonNameMatchAssets = ["metrics-collector", "health-check", "monitoring-guide"];
|
|
1114
|
-
if (hits.length > 0) {
|
|
1115
|
-
const nameRanks = nameMatchAssets.map((n) => hits.findIndex((h) => h.name === n)).filter((i) => i >= 0);
|
|
1116
|
-
const nonNameRanks = nonNameMatchAssets.map((n) => hits.findIndex((h) => h.name === n)).filter((i) => i >= 0);
|
|
1117
|
-
if (nameRanks.length > 0 && nonNameRanks.length > 0) {
|
|
1118
|
-
const avgNameRank = nameRanks.reduce((s, r) => s + r, 0) / nameRanks.length;
|
|
1119
|
-
const avgNonNameRank = nonNameRanks.reduce((s, r) => s + r, 0) / nonNameRanks.length;
|
|
1120
|
-
// Name matches should on average rank higher (lower index) than non-name matches
|
|
1121
|
-
fieldWeightingCorrect = avgNameRank < avgNonNameRank;
|
|
1122
|
-
}
|
|
1123
|
-
}
|
|
1124
|
-
cases.push({
|
|
1125
|
-
id: "fc-02",
|
|
1126
|
-
scenario: "feature_correctness",
|
|
1127
|
-
description: "Field weighting: name match ranks higher than desc-only match",
|
|
1128
|
-
passed: fieldWeightingCorrect,
|
|
1129
|
-
details: `Top 5: ${hits
|
|
1130
|
-
.slice(0, 5)
|
|
1131
|
-
.map((h) => h.name)
|
|
1132
|
-
.join(", ")}`,
|
|
1133
|
-
});
|
|
1134
|
-
}
|
|
1135
|
-
// Test 3: Parameter extraction — commands with $ARGUMENTS detected
|
|
1136
|
-
let paramExtraction = false;
|
|
1137
|
-
{
|
|
1138
|
-
const { extractCommandParameters, extractScriptParameters } = await import("../src/indexer/metadata.js");
|
|
1139
|
-
const cmdTemplate = "Run $ARGUMENTS tests and report results.\n$1 is the target directory.";
|
|
1140
|
-
const cmdParams = extractCommandParameters(cmdTemplate);
|
|
1141
|
-
const hasArguments = cmdParams?.some((p) => p.name === "ARGUMENTS") ?? false;
|
|
1142
|
-
const hasDollar1 = cmdParams?.some((p) => p.name === "$1") ?? false;
|
|
1143
|
-
const scriptContent = '#!/bin/bash\n# @param {string} host - Target hostname\n# @param {number} port - Port number\nssh "$1" -p "$2"\n';
|
|
1144
|
-
const scriptParams = extractScriptParameters("/tmp/test.sh", scriptContent);
|
|
1145
|
-
const hasHost = scriptParams?.some((p) => p.name === "host") ?? false;
|
|
1146
|
-
const hasPort = scriptParams?.some((p) => p.name === "port") ?? false;
|
|
1147
|
-
paramExtraction = hasArguments && hasDollar1 && hasHost && hasPort;
|
|
1148
|
-
cases.push({
|
|
1149
|
-
id: "fc-03",
|
|
1150
|
-
scenario: "feature_correctness",
|
|
1151
|
-
description: "Parameter extraction: $ARGUMENTS, $1, and @param",
|
|
1152
|
-
passed: paramExtraction,
|
|
1153
|
-
details: `CMD: ARGUMENTS=${hasArguments}, $1=${hasDollar1}; Script: host=${hasHost}, port=${hasPort}`,
|
|
1154
|
-
});
|
|
1155
|
-
}
|
|
1156
|
-
// Test 4: akm info returns valid capability advertisement
|
|
1157
|
-
let infoValid = false;
|
|
1158
|
-
{
|
|
1159
|
-
const info = assembleInfo();
|
|
1160
|
-
infoValid =
|
|
1161
|
-
info.schemaVersion === 1 &&
|
|
1162
|
-
typeof info.version === "string" &&
|
|
1163
|
-
Array.isArray(info.assetTypes) &&
|
|
1164
|
-
info.assetTypes.length > 0 &&
|
|
1165
|
-
Array.isArray(info.searchModes) &&
|
|
1166
|
-
info.searchModes.includes("fts") &&
|
|
1167
|
-
typeof info.indexStats.entryCount === "number";
|
|
1168
|
-
cases.push({
|
|
1169
|
-
id: "fc-04",
|
|
1170
|
-
scenario: "feature_correctness",
|
|
1171
|
-
description: "akm info returns valid capability advertisement",
|
|
1172
|
-
passed: infoValid,
|
|
1173
|
-
details: `version=${info.version}, types=${info.assetTypes.length}, modes=${info.searchModes.join(",")}`,
|
|
1174
|
-
});
|
|
1175
|
-
}
|
|
1176
|
-
// Test 5: Feedback/usage events record correctly
|
|
1177
|
-
let feedbackRecords = false;
|
|
1178
|
-
{
|
|
1179
|
-
const dbPath = getDbPath();
|
|
1180
|
-
const db = openDatabase(dbPath);
|
|
1181
|
-
try {
|
|
1182
|
-
const countBefore = db.prepare("SELECT COUNT(*) AS cnt FROM usage_events").get().cnt;
|
|
1183
|
-
insertUsageEvent(db, {
|
|
1184
|
-
event_type: "feedback",
|
|
1185
|
-
entry_ref: "skill:test-feedback",
|
|
1186
|
-
signal: "positive",
|
|
1187
|
-
metadata: JSON.stringify({ source: "benchmark" }),
|
|
1188
|
-
});
|
|
1189
|
-
const countAfter = db.prepare("SELECT COUNT(*) AS cnt FROM usage_events").get().cnt;
|
|
1190
|
-
feedbackRecords = countAfter === countBefore + 1;
|
|
1191
|
-
// Verify the event was written correctly
|
|
1192
|
-
const lastEvent = db
|
|
1193
|
-
.prepare("SELECT event_type, entry_ref, signal FROM usage_events ORDER BY id DESC LIMIT 1")
|
|
1194
|
-
.get();
|
|
1195
|
-
feedbackRecords =
|
|
1196
|
-
feedbackRecords &&
|
|
1197
|
-
lastEvent?.event_type === "feedback" &&
|
|
1198
|
-
lastEvent?.entry_ref === "skill:test-feedback" &&
|
|
1199
|
-
lastEvent?.signal === "positive";
|
|
1200
|
-
}
|
|
1201
|
-
finally {
|
|
1202
|
-
closeDatabase(db);
|
|
1203
|
-
}
|
|
1204
|
-
cases.push({
|
|
1205
|
-
id: "fc-05",
|
|
1206
|
-
scenario: "feature_correctness",
|
|
1207
|
-
description: "Feedback events are recorded correctly in usage_events",
|
|
1208
|
-
passed: feedbackRecords,
|
|
1209
|
-
});
|
|
1210
|
-
}
|
|
1211
|
-
// Test 6: buildSearchFields produces per-field text
|
|
1212
|
-
{
|
|
1213
|
-
const entry = {
|
|
1214
|
-
name: "test-entry",
|
|
1215
|
-
type: "skill",
|
|
1216
|
-
description: "A test skill",
|
|
1217
|
-
tags: ["alpha", "beta"],
|
|
1218
|
-
searchHints: ["hint one"],
|
|
1219
|
-
aliases: ["test alt"],
|
|
1220
|
-
};
|
|
1221
|
-
const fields = buildSearchFields(entry);
|
|
1222
|
-
const nameOk = fields.name.includes("test") && fields.name.includes("entry");
|
|
1223
|
-
const descOk = fields.description.includes("test skill");
|
|
1224
|
-
const tagsOk = fields.tags.includes("alpha") && fields.tags.includes("beta");
|
|
1225
|
-
const hintsOk = fields.hints.includes("hint one");
|
|
1226
|
-
const allFieldsPresent = nameOk && descOk && tagsOk && hintsOk;
|
|
1227
|
-
cases.push({
|
|
1228
|
-
id: "fc-06",
|
|
1229
|
-
scenario: "feature_correctness",
|
|
1230
|
-
description: "buildSearchFields produces correct per-field text",
|
|
1231
|
-
passed: allFieldsPresent,
|
|
1232
|
-
details: `name=${nameOk}, desc=${descOk}, tags=${tagsOk}, hints=${hintsOk}`,
|
|
1233
|
-
});
|
|
1234
|
-
}
|
|
1235
|
-
// Test 7: sanitizeFtsQuery handles special characters safely
|
|
1236
|
-
{
|
|
1237
|
-
const { sanitizeFtsQuery } = await import("../src/indexer/db.js");
|
|
1238
|
-
const dangerous = 'code-review "OR 1=1" NEAR(test,5)';
|
|
1239
|
-
const sanitized = sanitizeFtsQuery(dangerous);
|
|
1240
|
-
const noQuotes = !sanitized.includes('"');
|
|
1241
|
-
const noParens = !sanitized.includes("(") && !sanitized.includes(")");
|
|
1242
|
-
const noNear = !sanitized.includes("NEAR");
|
|
1243
|
-
const safe = noQuotes && noParens && noNear && sanitized.length > 0;
|
|
1244
|
-
cases.push({
|
|
1245
|
-
id: "fc-07",
|
|
1246
|
-
scenario: "feature_correctness",
|
|
1247
|
-
description: "sanitizeFtsQuery neutralizes dangerous FTS5 syntax",
|
|
1248
|
-
passed: safe,
|
|
1249
|
-
details: `Input: "${dangerous}" -> "${sanitized}"`,
|
|
1250
|
-
});
|
|
1251
|
-
}
|
|
1252
|
-
// Test 8: Empty query returns all entries
|
|
1253
|
-
{
|
|
1254
|
-
const result = await akmSearch({ query: "", source: "stash", limit: 100 });
|
|
1255
|
-
const localHits = result.hits.filter((h) => h.type !== "registry");
|
|
1256
|
-
// Should return all or most of the 35 assets
|
|
1257
|
-
const allEntriesReturned = localHits.length >= 25;
|
|
1258
|
-
cases.push({
|
|
1259
|
-
id: "fc-08",
|
|
1260
|
-
scenario: "feature_correctness",
|
|
1261
|
-
description: "Empty query returns all assets",
|
|
1262
|
-
passed: allEntriesReturned,
|
|
1263
|
-
metric: localHits.length,
|
|
1264
|
-
unit: "assets",
|
|
1265
|
-
});
|
|
1266
|
-
}
|
|
1267
|
-
// Test 9: Type filtering works
|
|
1268
|
-
{
|
|
1269
|
-
const result = await akmSearch({ query: "", type: "skill", source: "stash", limit: 50 });
|
|
1270
|
-
const localHits = result.hits.filter((h) => h.type !== "registry");
|
|
1271
|
-
const allSkills = localHits.every((h) => h.type === "skill");
|
|
1272
|
-
const hasMultiple = localHits.length >= 3;
|
|
1273
|
-
cases.push({
|
|
1274
|
-
id: "fc-09",
|
|
1275
|
-
scenario: "feature_correctness",
|
|
1276
|
-
description: "Type filtering returns only matching types",
|
|
1277
|
-
passed: allSkills && hasMultiple,
|
|
1278
|
-
metric: localHits.length,
|
|
1279
|
-
unit: "skills",
|
|
1280
|
-
details: allSkills ? "All results are skills" : "Mixed types found",
|
|
1281
|
-
});
|
|
1282
|
-
}
|
|
1283
|
-
// Test 10: Deterministic tiebreaker — same query returns same order
|
|
1284
|
-
{
|
|
1285
|
-
const r1 = await akmSearch({ query: "deploy", source: "stash", limit: 20 });
|
|
1286
|
-
const r2 = await akmSearch({ query: "deploy", source: "stash", limit: 20 });
|
|
1287
|
-
const h1 = r1.hits.filter((h) => h.type !== "registry").map((h) => h.name);
|
|
1288
|
-
const h2 = r2.hits.filter((h) => h.type !== "registry").map((h) => h.name);
|
|
1289
|
-
const deterministic = JSON.stringify(h1) === JSON.stringify(h2);
|
|
1290
|
-
cases.push({
|
|
1291
|
-
id: "fc-10",
|
|
1292
|
-
scenario: "feature_correctness",
|
|
1293
|
-
description: "Search results are deterministic (same order for same query)",
|
|
1294
|
-
passed: deterministic,
|
|
1295
|
-
});
|
|
1296
|
-
}
|
|
1297
|
-
return {
|
|
1298
|
-
fuzzy_works: fuzzyWorks,
|
|
1299
|
-
field_weighting_correct: fieldWeightingCorrect,
|
|
1300
|
-
parameter_extraction: paramExtraction,
|
|
1301
|
-
info_valid: infoValid,
|
|
1302
|
-
feedback_records: feedbackRecords,
|
|
1303
|
-
cases,
|
|
1304
|
-
};
|
|
1305
|
-
}
|
|
1306
|
-
// ── Main benchmark orchestrator ──────────────────────────────────────────────
|
|
1307
|
-
async function runBenchmarkSuite() {
|
|
1308
|
-
const { branch, commit } = gitInfo();
|
|
1309
|
-
log("=== akm Comprehensive Benchmark Suite ===\n\n");
|
|
1310
|
-
// 1. Create stash and index
|
|
1311
|
-
log("Setting up benchmark stash...\n");
|
|
1312
|
-
const stashDir = createBenchmarkStash();
|
|
1313
|
-
process.env.AKM_STASH_DIR = stashDir;
|
|
1314
|
-
saveConfig({ semanticSearchMode: "off", registries: [] });
|
|
1315
|
-
const { akmIndex } = await import("../src/indexer/indexer.js");
|
|
1316
|
-
const indexResult = await akmIndex({ stashDir, full: true });
|
|
1317
|
-
log(` Indexed ${indexResult.totalEntries} entries in ${indexResult.timing?.totalMs ?? "?"}ms\n\n`);
|
|
1318
|
-
// 2. Run all scenarios
|
|
1319
|
-
const searchQuality = await benchmarkSearchQuality(stashDir);
|
|
1320
|
-
const searchPerf = await benchmarkSearchPerformance(stashDir);
|
|
1321
|
-
const indexPerf = await benchmarkIndexingPerformance(stashDir);
|
|
1322
|
-
const tokenEff = await benchmarkTokenEfficiency(stashDir);
|
|
1323
|
-
const utilScoring = await benchmarkUtilityScoring(stashDir);
|
|
1324
|
-
const featureCorr = await benchmarkFeatureCorrectness(stashDir);
|
|
1325
|
-
// 3. Aggregate results
|
|
1326
|
-
const allCases = [
|
|
1327
|
-
...searchQuality.cases,
|
|
1328
|
-
...searchPerf.cases,
|
|
1329
|
-
...indexPerf.cases,
|
|
1330
|
-
...tokenEff.cases,
|
|
1331
|
-
...utilScoring.cases,
|
|
1332
|
-
...featureCorr.cases,
|
|
1333
|
-
];
|
|
1334
|
-
const totalCases = allCases.length;
|
|
1335
|
-
const passedCount = allCases.filter((c) => c.passed).length;
|
|
1336
|
-
const failedCount = totalCases - passedCount;
|
|
1337
|
-
const output = {
|
|
1338
|
-
branch,
|
|
1339
|
-
commit,
|
|
1340
|
-
timestamp: new Date().toISOString(),
|
|
1341
|
-
asset_count: ASSETS.length,
|
|
1342
|
-
scenarios: {
|
|
1343
|
-
search_quality: {
|
|
1344
|
-
mrr: searchQuality.mrr,
|
|
1345
|
-
recall_at_5: searchQuality.recall_at_5,
|
|
1346
|
-
recall_at_10: searchQuality.recall_at_10,
|
|
1347
|
-
cases: searchQuality.cases,
|
|
1348
|
-
},
|
|
1349
|
-
search_performance: {
|
|
1350
|
-
cold_ms: searchPerf.cold_ms,
|
|
1351
|
-
warm_ms: searchPerf.warm_ms,
|
|
1352
|
-
fts_only_ms: searchPerf.fts_only_ms,
|
|
1353
|
-
large_result_ms: searchPerf.large_result_ms,
|
|
1354
|
-
cases: searchPerf.cases,
|
|
1355
|
-
},
|
|
1356
|
-
indexing_performance: {
|
|
1357
|
-
full_ms: indexPerf.full_ms,
|
|
1358
|
-
incremental_ms: indexPerf.incremental_ms,
|
|
1359
|
-
fts_rebuild_ms: indexPerf.fts_rebuild_ms,
|
|
1360
|
-
recompute_utility_ms: indexPerf.recompute_utility_ms,
|
|
1361
|
-
cases: indexPerf.cases,
|
|
1362
|
-
},
|
|
1363
|
-
token_efficiency: {
|
|
1364
|
-
summary_savings_pct: tokenEff.summary_savings_pct,
|
|
1365
|
-
manifest_bytes_per_asset: tokenEff.manifest_bytes_per_asset,
|
|
1366
|
-
for_agent_savings_pct: tokenEff.for_agent_savings_pct,
|
|
1367
|
-
jsonl_savings_pct: tokenEff.jsonl_savings_pct,
|
|
1368
|
-
cases: tokenEff.cases,
|
|
1369
|
-
},
|
|
1370
|
-
utility_scoring: {
|
|
1371
|
-
baseline_no_usage: utilScoring.baseline_no_usage,
|
|
1372
|
-
boost_applied: utilScoring.boost_applied,
|
|
1373
|
-
decay_works: utilScoring.decay_works,
|
|
1374
|
-
cap_works: utilScoring.cap_works,
|
|
1375
|
-
cases: utilScoring.cases,
|
|
1376
|
-
},
|
|
1377
|
-
feature_correctness: {
|
|
1378
|
-
fuzzy_works: featureCorr.fuzzy_works,
|
|
1379
|
-
field_weighting_correct: featureCorr.field_weighting_correct,
|
|
1380
|
-
parameter_extraction: featureCorr.parameter_extraction,
|
|
1381
|
-
info_valid: featureCorr.info_valid,
|
|
1382
|
-
feedback_records: featureCorr.feedback_records,
|
|
1383
|
-
cases: featureCorr.cases,
|
|
1384
|
-
},
|
|
1385
|
-
},
|
|
1386
|
-
summary: {
|
|
1387
|
-
total_cases: totalCases,
|
|
1388
|
-
passed: passedCount,
|
|
1389
|
-
failed: failedCount,
|
|
1390
|
-
},
|
|
1391
|
-
};
|
|
1392
|
-
// 4. Output JSON
|
|
1393
|
-
console.log(JSON.stringify(output, null, 2));
|
|
1394
|
-
// 5. Human-readable summary
|
|
1395
|
-
if (!jsonOnly) {
|
|
1396
|
-
process.stderr.write("\n=== Benchmark Summary ===\n");
|
|
1397
|
-
process.stderr.write(`Branch: ${branch} (${commit})\n`);
|
|
1398
|
-
process.stderr.write(`Assets: ${ASSETS.length}\n\n`);
|
|
1399
|
-
process.stderr.write(`Search Quality:\n`);
|
|
1400
|
-
process.stderr.write(` MRR: ${searchQuality.mrr}\n`);
|
|
1401
|
-
process.stderr.write(` Recall@5: ${searchQuality.recall_at_5}\n`);
|
|
1402
|
-
process.stderr.write(` Recall@10: ${searchQuality.recall_at_10}\n\n`);
|
|
1403
|
-
process.stderr.write(`Search Performance:\n`);
|
|
1404
|
-
process.stderr.write(` Cold: ${searchPerf.cold_ms}ms\n`);
|
|
1405
|
-
process.stderr.write(` Warm: ${searchPerf.warm_ms}ms\n`);
|
|
1406
|
-
process.stderr.write(` FTS-only: ${searchPerf.fts_only_ms}ms\n\n`);
|
|
1407
|
-
process.stderr.write(`Indexing Performance:\n`);
|
|
1408
|
-
process.stderr.write(` Full: ${indexPerf.full_ms}ms\n`);
|
|
1409
|
-
process.stderr.write(` Incr: ${indexPerf.incremental_ms}ms\n`);
|
|
1410
|
-
process.stderr.write(` FTS rebuild: ${indexPerf.fts_rebuild_ms}ms\n\n`);
|
|
1411
|
-
process.stderr.write(`Token Efficiency:\n`);
|
|
1412
|
-
process.stderr.write(` Summary savings: ${tokenEff.summary_savings_pct}%\n`);
|
|
1413
|
-
process.stderr.write(` Manifest: ${tokenEff.manifest_bytes_per_asset} bytes/asset\n\n`);
|
|
1414
|
-
process.stderr.write(`Utility Scoring:\n`);
|
|
1415
|
-
process.stderr.write(` Baseline: ${utilScoring.baseline_no_usage ? "PASS" : "FAIL"}\n`);
|
|
1416
|
-
process.stderr.write(` Boost: ${utilScoring.boost_applied ? "PASS" : "FAIL"}\n`);
|
|
1417
|
-
process.stderr.write(` Decay: ${utilScoring.decay_works ? "PASS" : "FAIL"}\n`);
|
|
1418
|
-
process.stderr.write(` Cap: ${utilScoring.cap_works ? "PASS" : "FAIL"}\n\n`);
|
|
1419
|
-
process.stderr.write(`Feature Correctness:\n`);
|
|
1420
|
-
process.stderr.write(` Fuzzy: ${featureCorr.fuzzy_works ? "PASS" : "FAIL"}\n`);
|
|
1421
|
-
process.stderr.write(` Weighting: ${featureCorr.field_weighting_correct ? "PASS" : "FAIL"}\n`);
|
|
1422
|
-
process.stderr.write(` Params: ${featureCorr.parameter_extraction ? "PASS" : "FAIL"}\n`);
|
|
1423
|
-
process.stderr.write(` Info: ${featureCorr.info_valid ? "PASS" : "FAIL"}\n`);
|
|
1424
|
-
process.stderr.write(` Feedback: ${featureCorr.feedback_records ? "PASS" : "FAIL"}\n\n`);
|
|
1425
|
-
process.stderr.write(`Total: ${passedCount}/${totalCases} passed, ${failedCount} failed\n`);
|
|
1426
|
-
if (failedCount > 0) {
|
|
1427
|
-
process.stderr.write("\nFailed cases:\n");
|
|
1428
|
-
for (const c of allCases.filter((c) => !c.passed)) {
|
|
1429
|
-
process.stderr.write(` [FAIL] ${c.id}: ${c.description}${c.details ? ` — ${c.details}` : ""}${c.metric !== undefined ? ` (${c.metric}${c.unit ? ` ${c.unit}` : ""})` : ""}\n`);
|
|
1430
|
-
}
|
|
1431
|
-
}
|
|
1432
|
-
}
|
|
1433
|
-
return output;
|
|
1434
|
-
}
|
|
1435
|
-
// ── Entry point ──────────────────────────────────────────────────────────────
|
|
1436
|
-
try {
|
|
1437
|
-
await runBenchmarkSuite();
|
|
1438
|
-
}
|
|
1439
|
-
finally {
|
|
1440
|
-
cleanup();
|
|
1441
|
-
}
|