claude-memory-layer 1.0.27 → 1.0.29
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.env.example +7 -0
- package/AGENTS.md +11 -0
- package/README.md +374 -49
- package/benchmarks/replay/anonymized-real-sessions.json +48 -0
- package/dist/cli/index.js +10097 -6003
- package/dist/cli/index.js.map +4 -4
- package/dist/core/index.js +9745 -5587
- package/dist/core/index.js.map +4 -4
- package/dist/hooks/post-tool-use.js +6545 -5270
- package/dist/hooks/post-tool-use.js.map +4 -4
- package/dist/hooks/semantic-daemon.js +6646 -5354
- package/dist/hooks/semantic-daemon.js.map +4 -4
- package/dist/hooks/session-end.js +6618 -5347
- package/dist/hooks/session-end.js.map +4 -4
- package/dist/hooks/session-start.js +6619 -5354
- package/dist/hooks/session-start.js.map +4 -4
- package/dist/hooks/stop.js +6614 -5325
- package/dist/hooks/stop.js.map +4 -4
- package/dist/hooks/user-prompt-submit.js +6702 -5356
- package/dist/hooks/user-prompt-submit.js.map +4 -4
- package/dist/index.js +13537 -0
- package/dist/index.js.map +7 -0
- package/dist/mcp/index.js +20770 -0
- package/dist/mcp/index.js.map +7 -0
- package/dist/server/api/index.js +6632 -5319
- package/dist/server/api/index.js.map +4 -4
- package/dist/server/index.js +6667 -5340
- package/dist/server/index.js.map +4 -4
- package/dist/services/memory-service.js +6568 -5350
- package/dist/services/memory-service.js.map +4 -4
- package/dist/ui/assets/js/bootstrap.js +244 -0
- package/dist/ui/assets/js/chat.js +373 -0
- package/dist/ui/assets/js/disclosure.js +232 -0
- package/dist/ui/assets/js/modals.js +298 -0
- package/dist/ui/assets/js/overview.js +655 -0
- package/dist/ui/assets/js/state.js +72 -0
- package/dist/ui/assets/js/views.js +468 -0
- package/dist/ui/index.html +43 -1
- package/dist/ui/index.ts +3 -0
- package/dist/ui/style.css +222 -0
- package/docs/ARCHITECTURE_COMPARISON_AND_RECOMMENDATIONS.md +627 -0
- package/docs/HERMES_MEMORY_INGESTION_ANALYSIS.md +440 -0
- package/docs/MEMORY_USEFULNESS_AUDIT.md +371 -0
- package/docs/MEMORY_USEFULNESS_AUDIT_RAW.json +80 -0
- package/docs/MEMSEARCH_PROJECT_STRUCTURE_ANALYSIS.md +333 -0
- package/docs/PRODUCT_VALIDATION_MATRIX.md +82 -0
- package/docs/PROJECT_STRUCTURE_ANALYSIS.md +421 -0
- package/docs/REFACTORING_MILESTONES_AND_ISSUES.md +501 -0
- package/docs/REFACTORING_PLAN_THIN_CORE.md +414 -0
- package/docs/REFERENCE_PROJECT_ANALYSES.md +25 -0
- package/docs/SUPERLOCALMEMORY_PROJECT_STRUCTURE_ANALYSIS.md +452 -0
- package/docs/TARGET_ARCHITECTURE_AND_FOLDER_STRUCTURE.md +446 -0
- package/docs/architecture/comparison-index.md +47 -0
- package/docs/reports/codex-real-data-validation-20260505T040447Z.md +46 -0
- package/package.json +12 -5
- package/scripts/build.ts +25 -8
- package/scripts/generate-session-qrels.ts +126 -0
- package/scripts/postinstall-embedding-backend.cjs +142 -0
- package/scripts/replay-retrieval-benchmark.ts +69 -0
- package/specs/thin-core-refactor/context.md +275 -0
- package/specs/thin-core-refactor/plan.md +536 -0
- package/specs/thin-core-refactor/spec.md +465 -0
- package/src/adapters/claude/capture/index.ts +3 -0
- package/src/adapters/claude/context/index.ts +3 -0
- package/src/adapters/claude/hooks/index.ts +21 -0
- package/src/adapters/claude/hooks/post-tool-use.ts +239 -0
- package/src/adapters/claude/hooks/prompt-injection-policy.ts +104 -0
- package/src/adapters/claude/hooks/semantic-daemon-client.ts +209 -0
- package/src/adapters/claude/hooks/semantic-daemon.ts +283 -0
- package/src/adapters/claude/hooks/session-end.ts +59 -0
- package/src/adapters/claude/hooks/session-start.ts +73 -0
- package/src/adapters/claude/hooks/stop.ts +128 -0
- package/src/adapters/claude/hooks/user-prompt-submit.ts +361 -0
- package/src/adapters/claude/index.ts +4 -0
- package/src/adapters/claude/transcript/index.ts +4 -0
- package/src/adapters/claude/transcript/transcript-reader.ts +57 -0
- package/src/adapters/claude/transcript/turn-reconstructor.ts +65 -0
- package/src/apps/cli/claude-settings-hooks.ts +138 -0
- package/src/apps/cli/codex-import-runner.ts +125 -0
- package/src/apps/cli/codex-validation-output.ts +95 -0
- package/src/apps/cli/hermes-import-runner.ts +130 -0
- package/src/apps/cli/hermes-validation-output.ts +91 -0
- package/src/apps/cli/index.ts +1731 -0
- package/src/apps/cli/mcp-install.ts +106 -0
- package/src/apps/cli/retrieval-disclosure-output.ts +196 -0
- package/src/apps/dashboard/assets/js/bootstrap.js +244 -0
- package/src/apps/dashboard/assets/js/chat.js +373 -0
- package/src/apps/dashboard/assets/js/disclosure.js +232 -0
- package/src/apps/dashboard/assets/js/modals.js +298 -0
- package/src/apps/dashboard/assets/js/overview.js +655 -0
- package/src/apps/dashboard/assets/js/state.js +72 -0
- package/src/apps/dashboard/assets/js/views.js +468 -0
- package/src/{ui → apps/dashboard}/index.html +43 -1
- package/src/apps/dashboard/index.ts +3 -0
- package/src/{ui → apps/dashboard}/style.css +222 -0
- package/src/apps/index.ts +5 -0
- package/src/apps/server/api/chat.ts +244 -0
- package/src/apps/server/api/citations.ts +105 -0
- package/src/apps/server/api/events.ts +137 -0
- package/src/apps/server/api/health.ts +53 -0
- package/src/apps/server/api/index.ts +26 -0
- package/src/apps/server/api/projects.ts +74 -0
- package/src/apps/server/api/search.ts +184 -0
- package/src/apps/server/api/sessions.ts +115 -0
- package/src/apps/server/api/stats.ts +723 -0
- package/src/apps/server/api/turns.ts +143 -0
- package/src/apps/server/api/utils.ts +65 -0
- package/src/apps/server/index.ts +111 -0
- package/src/cli/index.ts +2 -1311
- package/src/cli/retrieval-disclosure-output.ts +2 -0
- package/src/compat/index.ts +5 -0
- package/src/core/derive/fact-deriver.ts +170 -0
- package/src/core/derive/index.ts +2 -0
- package/src/core/derive/summary-deriver.ts +76 -0
- package/src/core/embedder.ts +4 -152
- package/src/core/engine/embedding-maintenance-service.ts +187 -0
- package/src/core/engine/endless-memory-services.ts +4 -0
- package/src/core/engine/index.ts +19 -0
- package/src/core/engine/memory-engine-services.ts +170 -0
- package/src/core/engine/memory-ingest-service.ts +317 -0
- package/src/core/engine/memory-query-service.ts +173 -0
- package/src/core/engine/memory-runtime-service.ts +162 -0
- package/src/core/engine/memory-service-composition.ts +231 -0
- package/src/core/engine/retrieval-analytics-service.ts +181 -0
- package/src/core/engine/retrieval-disclosure-service.ts +420 -0
- package/src/core/engine/retrieval-orchestrator.ts +377 -0
- package/src/core/engine/retrieval-services.ts +176 -0
- package/src/core/engine/shared-memory-services.ts +4 -0
- package/src/core/entity-repo.ts +1 -3
- package/src/core/event-store.ts +3 -3
- package/src/core/evidence-aligner.ts +2 -2
- package/src/core/external-market-context.ts +582 -0
- package/src/core/graduation.ts +2 -3
- package/src/core/index.ts +21 -0
- package/src/core/matcher.ts +2 -4
- package/src/core/model/memory-fact.ts +30 -0
- package/src/core/model/memory-rule.ts +14 -0
- package/src/core/model/memory-summary.ts +21 -0
- package/src/core/model/raw-event.ts +28 -0
- package/src/core/model/retrieval-result.ts +35 -0
- package/src/core/privacy/filter.ts +21 -10
- package/src/core/product-validation-matrix.ts +314 -0
- package/src/core/progressive-retriever.ts +1 -2
- package/src/core/registry/project-path.ts +54 -0
- package/src/core/registry/session-registry.ts +69 -0
- package/src/core/replay-evaluator.ts +625 -0
- package/src/core/retrieval-benchmark.ts +117 -0
- package/src/core/retrieval-quality.ts +109 -0
- package/src/core/retriever.ts +53 -15
- package/src/core/session-qrels.ts +360 -0
- package/src/core/shared-event-store.ts +1 -1
- package/src/core/sqlite-event-store.ts +35 -11
- package/src/core/task/blocker-resolver.ts +2 -2
- package/src/core/task/task-resolver.ts +0 -1
- package/src/core/vector-outbox.ts +1 -10
- package/src/core/vector-worker.ts +1 -1
- package/src/extensions/endless-memory/endless-memory-services.ts +350 -0
- package/src/extensions/endless-memory/index.ts +1 -0
- package/src/extensions/index.ts +5 -0
- package/src/extensions/mcp/handlers.ts +960 -0
- package/src/extensions/mcp/index.ts +48 -0
- package/src/extensions/mcp/tools.ts +252 -0
- package/src/extensions/shared-memory/index.ts +1 -0
- package/src/extensions/shared-memory/shared-memory-services.ts +211 -0
- package/src/extensions/vector/embedder.ts +197 -0
- package/src/extensions/vector/index.ts +1 -0
- package/src/hooks/post-tool-use.ts +3 -236
- package/src/hooks/semantic-daemon-client.ts +1 -208
- package/src/hooks/semantic-daemon.ts +6 -271
- package/src/hooks/session-end.ts +4 -79
- package/src/hooks/session-start.ts +4 -73
- package/src/hooks/stop.ts +3 -173
- package/src/hooks/user-prompt-submit.ts +3 -338
- package/src/index.ts +13 -0
- package/src/mcp/handlers.ts +2 -212
- package/src/mcp/index.ts +3 -46
- package/src/mcp/tools.ts +2 -78
- package/src/server/api/chat.ts +2 -244
- package/src/server/api/citations.ts +2 -105
- package/src/server/api/events.ts +2 -137
- package/src/server/api/health.ts +2 -53
- package/src/server/api/index.ts +2 -26
- package/src/server/api/projects.ts +2 -74
- package/src/server/api/search.ts +2 -102
- package/src/server/api/sessions.ts +2 -115
- package/src/server/api/stats.ts +2 -724
- package/src/server/api/turns.ts +2 -143
- package/src/server/api/utils.ts +2 -46
- package/src/server/index.ts +2 -100
- package/src/services/bootstrap-organizer.ts +46 -26
- package/src/services/codex-session-history-importer.ts +521 -29
- package/src/services/hermes-session-history-importer.ts +733 -0
- package/src/services/memory-service-config.ts +36 -0
- package/src/services/memory-service-registry.ts +150 -0
- package/src/services/memory-service.ts +211 -1325
- package/src/services/session-history-importer.ts +58 -14
- package/tests/README.md +23 -0
- package/tests/adapters/claude/claude-semantic-daemon-adapter.test.ts +54 -0
- package/tests/adapters/claude/claude-transcript-reconstructor.test.ts +98 -0
- package/tests/adapters/claude-hook-prompt-injection-policy.test.ts +99 -0
- package/tests/apps/app-layer-boundary.test.ts +48 -0
- package/tests/apps/claude-settings-hooks.test.ts +107 -0
- package/tests/apps/cli-disclosure-output.test.ts +212 -0
- package/tests/apps/codex-import-runner.test.ts +99 -0
- package/tests/apps/codex-validation-output.test.ts +100 -0
- package/tests/apps/hermes-import-runner.test.ts +99 -0
- package/tests/apps/mcp-install-command.test.ts +59 -0
- package/tests/apps/package-build-entrypoints.test.ts +30 -0
- package/tests/apps/postinstall-embedding-backend.test.ts +167 -0
- package/tests/apps/search-api-disclosure.test.ts +162 -0
- package/tests/apps/stats-api-lightweight.test.ts +67 -0
- package/tests/apps/ui-disclosure-output.test.ts +140 -0
- package/tests/{bootstrap-organizer.test.ts → core/bootstrap-organizer.test.ts} +1 -1
- package/tests/{canonical-key.test.ts → core/canonical-key.test.ts} +1 -1
- package/tests/core/codex-session-history-importer-validation.test.ts +185 -0
- package/tests/{consolidation-worker.test.ts → core/consolidation-worker.test.ts} +2 -2
- package/tests/core/embedding-maintenance-service.test.ts +282 -0
- package/tests/{evidence-aligner.test.ts → core/evidence-aligner.test.ts} +1 -1
- package/tests/core/external-market-context.test.ts +209 -0
- package/tests/core/fact-deriver.test.ts +79 -0
- package/tests/core/hermes-session-history-importer-validation.test.ts +609 -0
- package/tests/{ingest-interceptor.test.ts → core/ingest-interceptor.test.ts} +1 -1
- package/tests/{markdown-mirror.test.ts → core/markdown-mirror.test.ts} +2 -2
- package/tests/{matcher.test.ts → core/matcher.test.ts} +1 -1
- package/tests/{md-mirror.test.ts → core/md-mirror.test.ts} +2 -2
- package/tests/core/memory-engine-services.test.ts +240 -0
- package/tests/core/memory-ingest-service.test.ts +296 -0
- package/tests/core/memory-query-service.test.ts +129 -0
- package/tests/core/memory-runtime-service.test.ts +201 -0
- package/tests/core/memory-service-composition.test.ts +192 -0
- package/tests/core/memory-service-config.test.ts +41 -0
- package/tests/core/memory-service-facade.test.ts +30 -0
- package/tests/core/memory-service-registry.test.ts +206 -0
- package/tests/core/product-validation-matrix.test.ts +61 -0
- package/tests/core/project-registry.test.ts +78 -0
- package/tests/core/replay-evaluator.test.ts +181 -0
- package/tests/core/retrieval-analytics-service.test.ts +210 -0
- package/tests/core/retrieval-benchmark.test.ts +93 -0
- package/tests/core/retrieval-disclosure-service.test.ts +264 -0
- package/tests/core/retrieval-orchestrator.test.ts +403 -0
- package/tests/core/retrieval-quality.test.ts +31 -0
- package/tests/core/retrieval-services.test.ts +185 -0
- package/tests/{retriever-fallback-chain.test.ts → core/retriever-fallback-chain.test.ts} +3 -3
- package/tests/{retriever-strategy-scope.test.ts → core/retriever-strategy-scope.test.ts} +70 -3
- package/tests/{retriever.memu-adoption.test.ts → core/retriever.memu-adoption.test.ts} +3 -3
- package/tests/core/session-history-importer-filter.test.ts +78 -0
- package/tests/core/session-qrels.test.ts +250 -0
- package/tests/{sqlite-event-store-replication.test.ts → core/sqlite-event-store-replication.test.ts} +36 -1
- package/tests/core/summary-deriver.test.ts +66 -0
- package/tests/extensions/embedder-warning-suppression.test.ts +53 -0
- package/tests/extensions/endless-memory-extension-boundary.test.ts +17 -0
- package/tests/extensions/endless-memory-services.test.ts +325 -0
- package/tests/extensions/mcp-context-tools.test.ts +905 -0
- package/tests/extensions/mcp-extension-boundary.test.ts +21 -0
- package/tests/extensions/mcp-package-build.test.ts +22 -0
- package/tests/extensions/mcp-project-aware-tools.test.ts +102 -0
- package/tests/extensions/shared-memory-extension-boundary.test.ts +24 -0
- package/tests/extensions/shared-memory-services.test.ts +309 -0
- package/tests/extensions/vector-extension-boundary.test.ts +21 -0
- package/.claude/settings.local.json +0 -25
- package/.npm-cache/_cacache/content-v2/sha512/04/76/c098f88dfe584a2b80870bff7421b05d17d3d9ee1027f77772332a22d3f93a9a57101a2855107f6ad82077a818bba912b2bc317f2361b5ddb09ad284d9ce +0 -0
- package/.npm-cache/_cacache/content-v2/sha512/60/25/d2ecd39cfc7cab58351162814be77f935c6d6491c10c3745d456da7ddb2117ffd90c10e53fe3c0f1ed16b403307841543634504398b16ee4e6b6dd8e0c45 +0 -0
- package/.npm-cache/_cacache/index-v5/2b/9a/7f8f40206ed8a2e0a84efaa953ccaed1f5d001e14b931083f2e7a0738007 +0 -2
- package/.npm-cache/_cacache/index-v5/2e/d9/fcfa5c6a6abdc2a3644ab84a95936047298c465a2f47ee03db8f7fe1e946 +0 -3
- package/.npm-cache/_cacache/index-v5/a9/42/e519633356d12d3d2f19da66a8301016d496c8f5c3e0554124aaa62dc043 +0 -2
- package/.npm-cache/_logs/2026-02-26T12_04_52_729Z-debug-0.log +0 -256
- package/.npm-cache/_logs/2026-02-26T12_05_36_835Z-debug-0.log +0 -18
- package/.npm-cache/_logs/2026-02-26T12_05_45_982Z-debug-0.log +0 -32
- package/.npm-cache/_logs/2026-02-26T12_05_48_515Z-debug-0.log +0 -260
- package/.npm-cache/_logs/2026-02-26T12_05_53_567Z-debug-0.log +0 -69
- package/.npm-cache/_update-notifier-last-checked +0 -0
- package/bootstrap-kb/decisions/decisions.md +0 -244
- package/bootstrap-kb/glossary/glossary.md +0 -46
- package/bootstrap-kb/modules/.claude-plugin.md +0 -22
- package/bootstrap-kb/modules/agents.md.md +0 -15
- package/bootstrap-kb/modules/claude.md.md +0 -15
- package/bootstrap-kb/modules/context.md.md +0 -15
- package/bootstrap-kb/modules/docs.md +0 -18
- package/bootstrap-kb/modules/handoff.md.md +0 -15
- package/bootstrap-kb/modules/package-lock.json.md +0 -15
- package/bootstrap-kb/modules/package.json.md +0 -15
- package/bootstrap-kb/modules/plan.md.md +0 -15
- package/bootstrap-kb/modules/readme.md.md +0 -15
- package/bootstrap-kb/modules/scripts.md +0 -26
- package/bootstrap-kb/modules/spec.md.md +0 -15
- package/bootstrap-kb/modules/specs.md +0 -20
- package/bootstrap-kb/modules/src.md +0 -51
- package/bootstrap-kb/modules/tests.md +0 -42
- package/bootstrap-kb/modules/tsconfig.json.md +0 -15
- package/bootstrap-kb/modules/vitest.config.ts.md +0 -15
- package/bootstrap-kb/overview/overview.md +0 -40
- package/bootstrap-kb/sources/manifest.json +0 -950
- package/bootstrap-kb/sources/manifest.md +0 -227
- package/bootstrap-kb/timeline/timeline.md +0 -57
- package/claude-memory-layer-1.0.14.tgz +0 -0
- package/d.sh +0 -3
- package/deploy.sh +0 -3
- package/dist/ui/app.js +0 -2101
- package/memory/.claude-plugin/commands/2026-02-25.md +0 -263
- package/memory/_index.md +0 -419
- package/memory/agent_response/uncategorized/2026-02-26.md +0 -176
- package/memory/agent_response/uncategorized/2026-03-03.md +0 -14
- package/memory/agent_response/uncategorized/2026-03-04.md +0 -1421
- package/memory/agent_response/uncategorized/2026-03-05.md +0 -157
- package/memory/default/uncategorized/2026-02-25.md +0 -4839
- package/memory/session_summary/uncategorized/2026-02-26.md +0 -13
- package/memory/session_summary/uncategorized/2026-03-03.md +0 -5
- package/memory/session_summary/uncategorized/2026-03-04.md +0 -50
- package/memory/specs/20260207-dashboard-upgrade/2026-02-25.md +0 -142
- package/memory/specs/citations-system/2026-02-25.md +0 -1121
- package/memory/specs/endless-mode/2026-02-25.md +0 -1392
- package/memory/specs/entity-edge-model/2026-02-25.md +0 -1263
- package/memory/specs/evidence-aligner-v2/2026-02-25.md +0 -1028
- package/memory/specs/mcp-desktop-integration/2026-02-25.md +0 -1334
- package/memory/specs/post-tool-use-hook/2026-02-25.md +0 -1164
- package/memory/specs/private-tags/2026-02-25.md +0 -1057
- package/memory/specs/progressive-disclosure/2026-02-25.md +0 -1436
- package/memory/specs/task-entity-system/2026-02-25.md +0 -924
- package/memory/specs/vector-outbox-v2/2026-02-25.md +0 -1510
- package/memory/specs/web-viewer-ui/2026-02-25.md +0 -1709
- package/memory/tool_observation/uncategorized/2026-02-26.md +0 -209
- package/memory/tool_observation/uncategorized/2026-03-03.md +0 -21
- package/memory/tool_observation/uncategorized/2026-03-04.md +0 -1033
- package/memory/tool_observation/uncategorized/2026-03-05.md +0 -33
- package/memory/user_prompt/uncategorized/2026-02-26.md +0 -25
- package/memory/user_prompt/uncategorized/2026-03-04.md +0 -634
- package/memory/user_prompt/uncategorized/2026-03-05.md +0 -6
- package/specs/optional-duckdb/context.md +0 -77
- package/specs/optional-duckdb/plan.md +0 -142
- package/specs/optional-duckdb/spec.md +0 -35
- package/src/ui/app.js +0 -2101
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
import { describe, expect, it } from 'vitest';
|
|
2
|
+
|
|
3
|
+
import {
|
|
4
|
+
getProductValidationMatrixSummary,
|
|
5
|
+
productValidationMatrix,
|
|
6
|
+
renderProductValidationMatrixMarkdown
|
|
7
|
+
} from '../../src/core/product-validation-matrix.js';
|
|
8
|
+
|
|
9
|
+
const requiredSurfaces = [
|
|
10
|
+
'claude.adapter.import',
|
|
11
|
+
'claude.adapter.search',
|
|
12
|
+
'claude.adapter.disclosure',
|
|
13
|
+
'codex.adapter.scan',
|
|
14
|
+
'codex.adapter.import',
|
|
15
|
+
'codex.adapter.replay',
|
|
16
|
+
'hermes.adapter.scan',
|
|
17
|
+
'hermes.adapter.import',
|
|
18
|
+
'hermes.adapter.replay',
|
|
19
|
+
'mcp.context.pack',
|
|
20
|
+
'mcp.project.timeline',
|
|
21
|
+
'mcp.source.ref',
|
|
22
|
+
'cli.api.reporting',
|
|
23
|
+
'safety.dryRun'
|
|
24
|
+
];
|
|
25
|
+
|
|
26
|
+
describe('product validation matrix', () => {
|
|
27
|
+
it('covers the product-level validation surfaces with requirements and evidence', () => {
|
|
28
|
+
const surfaceIds = new Set(productValidationMatrix.map((surface) => surface.id));
|
|
29
|
+
|
|
30
|
+
for (const id of requiredSurfaces) {
|
|
31
|
+
expect(surfaceIds.has(id), `missing surface ${id}`).toBe(true);
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
for (const surface of productValidationMatrix) {
|
|
35
|
+
expect(surface.title).toBeTruthy();
|
|
36
|
+
expect(surface.requirements.length, `${surface.id} requirements`).toBeGreaterThan(0);
|
|
37
|
+
expect(surface.evidence.length, `${surface.id} evidence`).toBeGreaterThan(0);
|
|
38
|
+
expect(['ready', 'covered', 'partial', 'planned']).toContain(surface.status);
|
|
39
|
+
}
|
|
40
|
+
});
|
|
41
|
+
|
|
42
|
+
it('summarizes and renders a stable reporting-friendly matrix', () => {
|
|
43
|
+
const summary = getProductValidationMatrixSummary(productValidationMatrix);
|
|
44
|
+
expect(summary.totalSurfaces).toBeGreaterThanOrEqual(requiredSurfaces.length);
|
|
45
|
+
expect(summary.surfacesByArea.codex).toBeGreaterThanOrEqual(3);
|
|
46
|
+
expect(summary.surfacesByArea.hermes).toBeGreaterThanOrEqual(3);
|
|
47
|
+
expect(summary.surfacesByArea.mcp).toBeGreaterThanOrEqual(3);
|
|
48
|
+
expect(summary.surfacesByArea.claude).toBeGreaterThanOrEqual(3);
|
|
49
|
+
expect(summary.evidenceCount).toBeGreaterThanOrEqual(requiredSurfaces.length);
|
|
50
|
+
|
|
51
|
+
const markdown = renderProductValidationMatrixMarkdown(productValidationMatrix);
|
|
52
|
+
expect(markdown).toContain('# Product Validation Matrix');
|
|
53
|
+
expect(markdown).toContain('Codex adapter replay');
|
|
54
|
+
expect(markdown).toContain('Hermes adapter replay');
|
|
55
|
+
expect(markdown).toContain('MCP context pack');
|
|
56
|
+
expect(markdown).toContain('MCP source reference');
|
|
57
|
+
expect(markdown).toContain('Safety / dry-run');
|
|
58
|
+
expect(markdown).toContain('tests/core/codex-session-history-importer-validation.test.ts');
|
|
59
|
+
expect(markdown).toContain('tests/core/hermes-session-history-importer-validation.test.ts');
|
|
60
|
+
});
|
|
61
|
+
});
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
import { afterEach, describe, expect, it, vi } from 'vitest';
|
|
2
|
+
import * as fs from 'node:fs/promises';
|
|
3
|
+
import * as os from 'node:os';
|
|
4
|
+
import * as path from 'node:path';
|
|
5
|
+
import {
|
|
6
|
+
getProjectStoragePath,
|
|
7
|
+
hashProjectPath,
|
|
8
|
+
normalizeProjectPath,
|
|
9
|
+
resolveProjectStoragePath
|
|
10
|
+
} from '../../src/core/registry/project-path.js';
|
|
11
|
+
|
|
12
|
+
describe('project-path registry utilities', () => {
|
|
13
|
+
it('normalizes paths and generates stable hashes', async () => {
|
|
14
|
+
const root = await fs.mkdtemp(path.join(os.tmpdir(), 'cml-project-path-'));
|
|
15
|
+
const projectDir = path.join(root, 'project');
|
|
16
|
+
await fs.mkdir(projectDir, { recursive: true });
|
|
17
|
+
|
|
18
|
+
const hashA = hashProjectPath(projectDir);
|
|
19
|
+
const hashB = hashProjectPath(projectDir + '/');
|
|
20
|
+
|
|
21
|
+
const normalized = normalizeProjectPath(projectDir + '/');
|
|
22
|
+
|
|
23
|
+
expect(normalized.endsWith('/project')).toBe(true);
|
|
24
|
+
expect(hashA).toBe(hashB);
|
|
25
|
+
expect(hashA).toMatch(/^[a-f0-9]{8}$/);
|
|
26
|
+
});
|
|
27
|
+
|
|
28
|
+
it('resolves storage paths for both project paths and explicit hashes', async () => {
|
|
29
|
+
const root = await fs.mkdtemp(path.join(os.tmpdir(), 'cml-storage-path-'));
|
|
30
|
+
const projectDir = path.join(root, 'project');
|
|
31
|
+
await fs.mkdir(projectDir, { recursive: true });
|
|
32
|
+
|
|
33
|
+
const projectHash = hashProjectPath(projectDir);
|
|
34
|
+
const storageFromPath = getProjectStoragePath(projectDir);
|
|
35
|
+
const storageFromResolverPath = resolveProjectStoragePath(projectDir);
|
|
36
|
+
const storageFromResolverHash = resolveProjectStoragePath(projectHash);
|
|
37
|
+
|
|
38
|
+
expect(storageFromPath).toBe(storageFromResolverPath);
|
|
39
|
+
expect(storageFromResolverHash).toContain(path.join('.claude-code', 'memory', 'projects', projectHash));
|
|
40
|
+
});
|
|
41
|
+
});
|
|
42
|
+
|
|
43
|
+
describe('session registry utilities', () => {
|
|
44
|
+
afterEach(() => {
|
|
45
|
+
vi.resetModules();
|
|
46
|
+
vi.doUnmock('os');
|
|
47
|
+
});
|
|
48
|
+
|
|
49
|
+
it('registers and loads project mapping from the isolated home directory', async () => {
|
|
50
|
+
const tempHome = await fs.mkdtemp(path.join(os.tmpdir(), 'cml-home-'));
|
|
51
|
+
const projectDir = path.join(tempHome, 'workspace', 'project');
|
|
52
|
+
await fs.mkdir(projectDir, { recursive: true });
|
|
53
|
+
|
|
54
|
+
vi.doMock('os', async () => {
|
|
55
|
+
const actual = await vi.importActual<typeof import('os')>('os');
|
|
56
|
+
return {
|
|
57
|
+
...actual,
|
|
58
|
+
homedir: () => tempHome
|
|
59
|
+
};
|
|
60
|
+
});
|
|
61
|
+
|
|
62
|
+
const registryModule = await import('../../src/core/registry/session-registry.js');
|
|
63
|
+
|
|
64
|
+
registryModule.registerSession('session-123', projectDir);
|
|
65
|
+
const projectInfo = registryModule.getSessionProject('session-123');
|
|
66
|
+
|
|
67
|
+
expect(projectInfo).not.toBeNull();
|
|
68
|
+
expect(projectInfo?.projectPath.endsWith('/workspace/project')).toBe(true);
|
|
69
|
+
expect(projectInfo?.projectHash).toBe(hashProjectPath(projectDir));
|
|
70
|
+
|
|
71
|
+
const registryPath = path.join(tempHome, '.claude-code', 'memory', 'session-registry.json');
|
|
72
|
+
const saved = JSON.parse(await fs.readFile(registryPath, 'utf8')) as {
|
|
73
|
+
sessions: Record<string, { projectPath: string }>;
|
|
74
|
+
};
|
|
75
|
+
|
|
76
|
+
expect(saved.sessions['session-123']?.projectPath.endsWith('/workspace/project')).toBe(true);
|
|
77
|
+
});
|
|
78
|
+
});
|
|
@@ -0,0 +1,181 @@
|
|
|
1
|
+
import { describe, expect, it } from 'vitest';
|
|
2
|
+
|
|
3
|
+
import {
|
|
4
|
+
evaluateReplayFixture,
|
|
5
|
+
formatReplayEvaluationMarkdown,
|
|
6
|
+
type ReplayRetrievalRunner
|
|
7
|
+
} from '../../src/core/replay-evaluator.js';
|
|
8
|
+
|
|
9
|
+
const fixture = {
|
|
10
|
+
name: 'private-real-session-qrels',
|
|
11
|
+
description: 'contains raw real session text that reports must not leak',
|
|
12
|
+
ks: [1, 3],
|
|
13
|
+
queries: [
|
|
14
|
+
{
|
|
15
|
+
queryId: 'q-secret-1',
|
|
16
|
+
query: 'SECRET vector search recall regression',
|
|
17
|
+
expectedIds: ['m-secret-1'],
|
|
18
|
+
expectedRelevance: { 'm-secret-1': 2 }
|
|
19
|
+
}
|
|
20
|
+
],
|
|
21
|
+
memories: [
|
|
22
|
+
{
|
|
23
|
+
id: 'm-secret-1',
|
|
24
|
+
content: 'SECRET vector search recall regression fix uses retriever pipeline replay'
|
|
25
|
+
},
|
|
26
|
+
{
|
|
27
|
+
id: 'm-noise',
|
|
28
|
+
content: 'unrelated dashboard layout memory'
|
|
29
|
+
}
|
|
30
|
+
]
|
|
31
|
+
};
|
|
32
|
+
|
|
33
|
+
describe('replay fixture evaluator', () => {
|
|
34
|
+
it('evaluates through the retriever pipeline runner and returns a sanitized report', async () => {
|
|
35
|
+
const calls: Array<{ query: string; queryId: string; topK: number }> = [];
|
|
36
|
+
const retrievalRunner: ReplayRetrievalRunner = async (query, input) => {
|
|
37
|
+
calls.push({ query, queryId: input.query.queryId, topK: input.topK });
|
|
38
|
+
return {
|
|
39
|
+
retrievedIds: ['m-secret-1', 'm-noise'],
|
|
40
|
+
candidateIds: ['m-secret-1', 'm-noise'],
|
|
41
|
+
confidence: 'high',
|
|
42
|
+
fallbackTrace: ['stage:primary:fast']
|
|
43
|
+
};
|
|
44
|
+
};
|
|
45
|
+
|
|
46
|
+
const report = await evaluateReplayFixture(fixture, {
|
|
47
|
+
generatedAt: '2026-05-05T00:00:00.000Z',
|
|
48
|
+
retrievalRunner
|
|
49
|
+
});
|
|
50
|
+
const serialized = JSON.stringify(report);
|
|
51
|
+
|
|
52
|
+
expect(calls).toEqual([
|
|
53
|
+
{ query: 'SECRET vector search recall regression', queryId: 'q-secret-1', topK: 3 }
|
|
54
|
+
]);
|
|
55
|
+
expect(report).toMatchObject({
|
|
56
|
+
name: 'private-real-session-qrels',
|
|
57
|
+
evaluator: 'retriever-pipeline-v1',
|
|
58
|
+
generatedAt: '2026-05-05T00:00:00.000Z',
|
|
59
|
+
fixtureStats: {
|
|
60
|
+
queryCount: 1,
|
|
61
|
+
memoryCount: 2,
|
|
62
|
+
ks: [1, 3]
|
|
63
|
+
},
|
|
64
|
+
summary: {
|
|
65
|
+
queryCount: 1,
|
|
66
|
+
precisionAtK: { 1: 1, 3: 1 / 3 },
|
|
67
|
+
recallAtK: { 1: 1, 3: 1 },
|
|
68
|
+
ndcgAtK: { 1: 1, 3: 1 },
|
|
69
|
+
hitAtK: { 1: 1, 3: 1 },
|
|
70
|
+
mrr: 1,
|
|
71
|
+
failedQueryCount: 0
|
|
72
|
+
}
|
|
73
|
+
});
|
|
74
|
+
expect(report.perQuery).toEqual([
|
|
75
|
+
{
|
|
76
|
+
queryId: 'q-secret-1',
|
|
77
|
+
retrievedIds: ['m-secret-1', 'm-noise'],
|
|
78
|
+
candidateIds: ['m-secret-1', 'm-noise'],
|
|
79
|
+
confidence: 'high',
|
|
80
|
+
fallbackTrace: ['stage:primary:fast'],
|
|
81
|
+
reciprocalRank: 1,
|
|
82
|
+
at: {
|
|
83
|
+
1: { precision: 1, recall: 1, hits: 1, ndcg: 1 },
|
|
84
|
+
3: { precision: 1 / 3, recall: 1, hits: 1, ndcg: 1 }
|
|
85
|
+
}
|
|
86
|
+
}
|
|
87
|
+
]);
|
|
88
|
+
expect(serialized).not.toContain('SECRET');
|
|
89
|
+
expect(serialized).not.toContain('vector search recall regression');
|
|
90
|
+
});
|
|
91
|
+
|
|
92
|
+
it('uses the real in-memory Retriever/RetrievalOrchestrator pipeline by default', async () => {
|
|
93
|
+
const report = await evaluateReplayFixture(fixture, {
|
|
94
|
+
generatedAt: '2026-05-05T00:00:00.000Z',
|
|
95
|
+
retrievalOptions: { strategy: 'fast' }
|
|
96
|
+
});
|
|
97
|
+
|
|
98
|
+
expect(report.evaluator).toBe('retriever-pipeline-v1');
|
|
99
|
+
expect(report.perQuery[0]).toMatchObject({
|
|
100
|
+
queryId: 'q-secret-1',
|
|
101
|
+
retrievedIds: expect.arrayContaining(['m-secret-1']),
|
|
102
|
+
candidateIds: expect.arrayContaining(['m-secret-1']),
|
|
103
|
+
fallbackTrace: expect.arrayContaining(['stage:primary:fast'])
|
|
104
|
+
});
|
|
105
|
+
expect(report.summary.hitAtK[1]).toBe(1);
|
|
106
|
+
});
|
|
107
|
+
|
|
108
|
+
it('counts no-match qrels separately from positive retrieval misses', async () => {
|
|
109
|
+
const report = await evaluateReplayFixture({
|
|
110
|
+
name: 'negative-qrels-fixture',
|
|
111
|
+
ks: [1, 3],
|
|
112
|
+
queries: [
|
|
113
|
+
{
|
|
114
|
+
queryId: 'q-positive',
|
|
115
|
+
query: 'retriever pipeline replay answer',
|
|
116
|
+
expectation: 'match',
|
|
117
|
+
expectedIds: ['m-positive'],
|
|
118
|
+
expectedRelevance: { 'm-positive': 2 },
|
|
119
|
+
knownAnswer: 'Retriever pipeline replay answer should be found.'
|
|
120
|
+
},
|
|
121
|
+
{
|
|
122
|
+
queryId: 'q-command-artifact-no-match',
|
|
123
|
+
query: 'local-command-stdout command-name opus',
|
|
124
|
+
expectation: 'no_match',
|
|
125
|
+
expectedIds: [],
|
|
126
|
+
expectedRelevance: {},
|
|
127
|
+
forbiddenIds: ['m-positive']
|
|
128
|
+
}
|
|
129
|
+
],
|
|
130
|
+
memories: [
|
|
131
|
+
{
|
|
132
|
+
id: 'm-positive',
|
|
133
|
+
content: 'Retriever pipeline replay answer should be found.'
|
|
134
|
+
}
|
|
135
|
+
]
|
|
136
|
+
}, {
|
|
137
|
+
generatedAt: '2026-05-05T00:00:00.000Z',
|
|
138
|
+
retrievalOptions: { strategy: 'auto' }
|
|
139
|
+
});
|
|
140
|
+
|
|
141
|
+
expect(report.summary).toMatchObject({
|
|
142
|
+
queryCount: 2,
|
|
143
|
+
positiveQueryCount: 1,
|
|
144
|
+
noMatchQueryCount: 1,
|
|
145
|
+
noMatchCorrect: 1,
|
|
146
|
+
noMatchAccuracy: 1,
|
|
147
|
+
failedQueryCount: 0,
|
|
148
|
+
precisionAtK: { 1: 1, 3: 1 / 3 },
|
|
149
|
+
recallAtK: { 1: 1, 3: 1 },
|
|
150
|
+
hitAtK: { 1: 1, 3: 1 }
|
|
151
|
+
});
|
|
152
|
+
expect(report.perQuery[1]).toMatchObject({
|
|
153
|
+
queryId: 'q-command-artifact-no-match',
|
|
154
|
+
expectation: 'no_match',
|
|
155
|
+
retrievedIds: [],
|
|
156
|
+
forbiddenHitIds: [],
|
|
157
|
+
noMatchSatisfied: true,
|
|
158
|
+
confidence: 'none'
|
|
159
|
+
});
|
|
160
|
+
});
|
|
161
|
+
|
|
162
|
+
it('formats markdown reports without raw query or memory content', async () => {
|
|
163
|
+
const report = await evaluateReplayFixture(fixture, {
|
|
164
|
+
generatedAt: '2026-05-05T00:00:00.000Z',
|
|
165
|
+
includePerQuery: false
|
|
166
|
+
});
|
|
167
|
+
|
|
168
|
+
const markdown = formatReplayEvaluationMarkdown(report, {
|
|
169
|
+
qrelsPath: '.claude-memory/benchmarks/real-session-qrels.json'
|
|
170
|
+
});
|
|
171
|
+
|
|
172
|
+
expect(markdown).toContain('# Retrieval Replay Benchmark Report');
|
|
173
|
+
expect(markdown).toContain('private-real-session-qrels');
|
|
174
|
+
expect(markdown).toContain('nDCG@1');
|
|
175
|
+
expect(markdown).toContain('Hit@1');
|
|
176
|
+
expect(markdown).toContain('MRR');
|
|
177
|
+
expect(markdown).toContain('.claude-memory/benchmarks/real-session-qrels.json');
|
|
178
|
+
expect(markdown).not.toContain('SECRET');
|
|
179
|
+
expect(markdown).not.toContain('vector search recall regression');
|
|
180
|
+
});
|
|
181
|
+
});
|
|
@@ -0,0 +1,210 @@
|
|
|
1
|
+
import { mkdtempSync, rmSync } from 'node:fs';
|
|
2
|
+
import { tmpdir } from 'node:os';
|
|
3
|
+
import { join } from 'node:path';
|
|
4
|
+
import { afterEach, describe, expect, it } from 'vitest';
|
|
5
|
+
import { RetrievalAnalyticsService } from '../../src/core/engine/retrieval-analytics-service.js';
|
|
6
|
+
import type {
|
|
7
|
+
RetrievalAnalyticsStore,
|
|
8
|
+
RetrievalTrace
|
|
9
|
+
} from '../../src/core/engine/retrieval-analytics-service.js';
|
|
10
|
+
import type { MemoryEvent } from '../../src/core/types.js';
|
|
11
|
+
|
|
12
|
+
const tempDirs: string[] = [];
|
|
13
|
+
|
|
14
|
+
afterEach(() => {
|
|
15
|
+
while (tempDirs.length > 0) {
|
|
16
|
+
const dir = tempDirs.pop();
|
|
17
|
+
if (dir) {
|
|
18
|
+
rmSync(dir, { recursive: true, force: true });
|
|
19
|
+
}
|
|
20
|
+
}
|
|
21
|
+
});
|
|
22
|
+
|
|
23
|
+
function tempStoragePath(): string {
|
|
24
|
+
const dir = mkdtempSync(join(tmpdir(), 'retrieval-analytics-service-'));
|
|
25
|
+
tempDirs.push(dir);
|
|
26
|
+
return dir;
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
function event(
|
|
30
|
+
id: string,
|
|
31
|
+
content: string,
|
|
32
|
+
timestamp = new Date('2026-02-24T00:00:00.000Z')
|
|
33
|
+
): MemoryEvent & { access_count?: number; last_accessed_at?: string } {
|
|
34
|
+
return {
|
|
35
|
+
id,
|
|
36
|
+
sessionId: 's1',
|
|
37
|
+
eventType: 'user_prompt',
|
|
38
|
+
content,
|
|
39
|
+
canonicalKey: `test/${id}`,
|
|
40
|
+
dedupeKey: `s1:${id}`,
|
|
41
|
+
timestamp,
|
|
42
|
+
metadata: {}
|
|
43
|
+
};
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
function baseStore(overrides: Partial<RetrievalAnalyticsStore> = {}): RetrievalAnalyticsStore {
|
|
47
|
+
return {
|
|
48
|
+
getRetrievalTraceStats: async () => ({
|
|
49
|
+
totalQueries: 0,
|
|
50
|
+
avgCandidateCount: 0,
|
|
51
|
+
avgSelectedCount: 0,
|
|
52
|
+
selectionRate: 0
|
|
53
|
+
}),
|
|
54
|
+
getRecentRetrievalTraces: async (_limit = 50) => [],
|
|
55
|
+
getMostAccessed: async (_limit = 10) => [],
|
|
56
|
+
evaluateSessionHelpfulness: async (_sessionId: string) => {},
|
|
57
|
+
getUnevaluatedSessions: async (_currentSessionId: string, _limit = 5) => [],
|
|
58
|
+
getHelpfulMemories: async (_limit = 10) => [],
|
|
59
|
+
getHelpfulnessStats: async () => ({
|
|
60
|
+
avgScore: 0,
|
|
61
|
+
totalEvaluated: 0,
|
|
62
|
+
totalRetrievals: 0,
|
|
63
|
+
helpful: 0,
|
|
64
|
+
neutral: 0,
|
|
65
|
+
unhelpful: 0
|
|
66
|
+
}),
|
|
67
|
+
...overrides
|
|
68
|
+
};
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
describe('RetrievalAnalyticsService', () => {
|
|
72
|
+
it('maps most-accessed events to dashboard memories and extracts topics from content', async () => {
|
|
73
|
+
let initialized = 0;
|
|
74
|
+
let requestedLimit: number | undefined;
|
|
75
|
+
const accessed = event(
|
|
76
|
+
'e1',
|
|
77
|
+
`${'x'.repeat(205)}\n## Thin Core Architecture\nUse **Retrieval Analytics** to keep MemoryService small.`
|
|
78
|
+
);
|
|
79
|
+
accessed.access_count = 3;
|
|
80
|
+
accessed.last_accessed_at = '2026-02-25T00:00:00.000Z';
|
|
81
|
+
const untouched = event('e2', 'plain content without explicit access metadata');
|
|
82
|
+
|
|
83
|
+
const service = new RetrievalAnalyticsService({
|
|
84
|
+
initialize: async () => { initialized += 1; },
|
|
85
|
+
retrievalStore: baseStore({
|
|
86
|
+
getMostAccessed: async (limit = 10) => {
|
|
87
|
+
requestedLimit = limit;
|
|
88
|
+
return [accessed, untouched];
|
|
89
|
+
}
|
|
90
|
+
})
|
|
91
|
+
});
|
|
92
|
+
|
|
93
|
+
const memories = await service.getMostAccessedMemories(7);
|
|
94
|
+
|
|
95
|
+
expect(initialized).toBe(0);
|
|
96
|
+
expect(requestedLimit).toBe(7);
|
|
97
|
+
expect(memories).toHaveLength(2);
|
|
98
|
+
expect(memories[0]).toMatchObject({
|
|
99
|
+
memoryId: 'e1',
|
|
100
|
+
summary: `${'x'.repeat(200)}...`,
|
|
101
|
+
topics: ['Thin Core Architecture', 'Retrieval Analytics'],
|
|
102
|
+
accessCount: 3,
|
|
103
|
+
lastAccessed: '2026-02-25T00:00:00.000Z',
|
|
104
|
+
confidence: 1.0,
|
|
105
|
+
createdAt: accessed.timestamp
|
|
106
|
+
});
|
|
107
|
+
expect(memories[1]).toMatchObject({
|
|
108
|
+
memoryId: 'e2',
|
|
109
|
+
accessCount: 0,
|
|
110
|
+
lastAccessed: null,
|
|
111
|
+
confidence: 1.0,
|
|
112
|
+
createdAt: untouched.timestamp
|
|
113
|
+
});
|
|
114
|
+
});
|
|
115
|
+
|
|
116
|
+
it('evaluates pending sessions best-effort and ignores individual failures', async () => {
|
|
117
|
+
let initialized = 0;
|
|
118
|
+
const evaluated: string[] = [];
|
|
119
|
+
|
|
120
|
+
const service = new RetrievalAnalyticsService({
|
|
121
|
+
initialize: async () => { initialized += 1; },
|
|
122
|
+
retrievalStore: baseStore({
|
|
123
|
+
getUnevaluatedSessions: async (currentSessionId: string, limit = 5) => {
|
|
124
|
+
expect(currentSessionId).toBe('current-session');
|
|
125
|
+
expect(limit).toBe(5);
|
|
126
|
+
return ['ok-1', 'fails', 'ok-2'];
|
|
127
|
+
},
|
|
128
|
+
evaluateSessionHelpfulness: async (sessionId: string) => {
|
|
129
|
+
if (sessionId === 'fails') {
|
|
130
|
+
throw new Error('transient evaluation failure');
|
|
131
|
+
}
|
|
132
|
+
evaluated.push(sessionId);
|
|
133
|
+
}
|
|
134
|
+
})
|
|
135
|
+
});
|
|
136
|
+
|
|
137
|
+
await service.evaluatePendingSessions('current-session');
|
|
138
|
+
|
|
139
|
+
expect(initialized).toBe(1);
|
|
140
|
+
expect(evaluated).toEqual(['ok-1', 'ok-2']);
|
|
141
|
+
});
|
|
142
|
+
|
|
143
|
+
it('delegates trace and helpfulness read-model methods after initialization', async () => {
|
|
144
|
+
let initialized = 0;
|
|
145
|
+
const traceStats = {
|
|
146
|
+
totalQueries: 12,
|
|
147
|
+
avgCandidateCount: 4,
|
|
148
|
+
avgSelectedCount: 2,
|
|
149
|
+
selectionRate: 0.5
|
|
150
|
+
};
|
|
151
|
+
const traceRows = [{
|
|
152
|
+
traceId: 't1',
|
|
153
|
+
sessionId: 's1',
|
|
154
|
+
projectHash: 'project-hash',
|
|
155
|
+
queryText: 'thin core',
|
|
156
|
+
strategy: 'auto',
|
|
157
|
+
candidateEventIds: ['e1', 'e2'],
|
|
158
|
+
selectedEventIds: ['e1'],
|
|
159
|
+
candidateDetails: [{ eventId: 'e1', score: 0.9 }],
|
|
160
|
+
selectedDetails: [{ eventId: 'e1', score: 0.9 }],
|
|
161
|
+
candidateCount: 2,
|
|
162
|
+
selectedCount: 1,
|
|
163
|
+
confidence: 'high',
|
|
164
|
+
fallbackTrace: ['stage:primary:deep'],
|
|
165
|
+
createdAt: new Date('2026-02-24T01:00:00.000Z')
|
|
166
|
+
}] satisfies RetrievalTrace[];
|
|
167
|
+
const helpfulMemories = [{
|
|
168
|
+
eventId: 'e1',
|
|
169
|
+
summary: 'helpful memory',
|
|
170
|
+
helpfulnessScore: 0.8,
|
|
171
|
+
accessCount: 4,
|
|
172
|
+
evaluationCount: 2
|
|
173
|
+
}];
|
|
174
|
+
const helpfulnessStats = {
|
|
175
|
+
avgScore: 0.75,
|
|
176
|
+
totalEvaluated: 8,
|
|
177
|
+
totalRetrievals: 10,
|
|
178
|
+
helpful: 6,
|
|
179
|
+
neutral: 1,
|
|
180
|
+
unhelpful: 1
|
|
181
|
+
};
|
|
182
|
+
const evaluated: string[] = [];
|
|
183
|
+
|
|
184
|
+
const service = new RetrievalAnalyticsService({
|
|
185
|
+
initialize: async () => { initialized += 1; },
|
|
186
|
+
retrievalStore: baseStore({
|
|
187
|
+
getRetrievalTraceStats: async () => traceStats,
|
|
188
|
+
getRecentRetrievalTraces: async (limit = 50) => {
|
|
189
|
+
expect(limit).toBe(3);
|
|
190
|
+
return traceRows;
|
|
191
|
+
},
|
|
192
|
+
evaluateSessionHelpfulness: async (sessionId: string) => { evaluated.push(sessionId); },
|
|
193
|
+
getHelpfulMemories: async (limit = 10) => {
|
|
194
|
+
expect(limit).toBe(2);
|
|
195
|
+
return helpfulMemories;
|
|
196
|
+
},
|
|
197
|
+
getHelpfulnessStats: async () => helpfulnessStats
|
|
198
|
+
})
|
|
199
|
+
});
|
|
200
|
+
|
|
201
|
+
await expect(service.getRetrievalTraceStats()).resolves.toEqual(traceStats);
|
|
202
|
+
await expect(service.getRecentRetrievalTraces(3)).resolves.toEqual(traceRows);
|
|
203
|
+
await service.evaluateSessionHelpfulness('s1');
|
|
204
|
+
await expect(service.getHelpfulMemories(2)).resolves.toEqual(helpfulMemories);
|
|
205
|
+
await expect(service.getHelpfulnessStats()).resolves.toEqual(helpfulnessStats);
|
|
206
|
+
|
|
207
|
+
expect(evaluated).toEqual(['s1']);
|
|
208
|
+
expect(initialized).toBe(5);
|
|
209
|
+
});
|
|
210
|
+
});
|
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
import { describe, expect, it } from 'vitest';
|
|
2
|
+
|
|
3
|
+
import { computePrecisionRecallAtK, summarizeReplayMetrics } from '../../src/core/retrieval-benchmark.js';
|
|
4
|
+
|
|
5
|
+
describe('retrieval replay benchmark metrics', () => {
|
|
6
|
+
it('computes Precision@k and Recall@k for replay queries', () => {
|
|
7
|
+
const queryMetrics = computePrecisionRecallAtK(
|
|
8
|
+
[
|
|
9
|
+
{ queryId: 'q1', expectedIds: ['a', 'b'], retrievedIds: ['a', 'x', 'b'] },
|
|
10
|
+
{ queryId: 'q2', expectedIds: ['c'], retrievedIds: ['x', 'y', 'z'] }
|
|
11
|
+
],
|
|
12
|
+
[1, 3]
|
|
13
|
+
);
|
|
14
|
+
|
|
15
|
+
expect(queryMetrics[0].at[1]).toMatchObject({ precision: 1, recall: 0.5, hits: 1 });
|
|
16
|
+
expect(queryMetrics[0].at[3]).toMatchObject({ precision: 2 / 3, recall: 1, hits: 2 });
|
|
17
|
+
expect(queryMetrics[0].at[1].ndcg).toBe(1);
|
|
18
|
+
expect(queryMetrics[0].at[3].ndcg).toBeCloseTo(0.91972, 4);
|
|
19
|
+
expect(queryMetrics[1].at[1]).toMatchObject({ precision: 0, recall: 0, hits: 0 });
|
|
20
|
+
expect(queryMetrics[1].at[3]).toMatchObject({ precision: 0, recall: 0, hits: 0 });
|
|
21
|
+
|
|
22
|
+
const summary = summarizeReplayMetrics(queryMetrics, [1, 3]);
|
|
23
|
+
expect(summary).toMatchObject({
|
|
24
|
+
queryCount: 2,
|
|
25
|
+
precisionAtK: { 1: 0.5, 3: 1 / 3 },
|
|
26
|
+
recallAtK: { 1: 0.25, 3: 0.5 }
|
|
27
|
+
});
|
|
28
|
+
expect(summary.ndcgAtK[1]).toBe(0.5);
|
|
29
|
+
expect(summary.ndcgAtK[3]).toBeCloseTo(0.45986, 4);
|
|
30
|
+
});
|
|
31
|
+
|
|
32
|
+
it('computes graded nDCG@k from qrels relevance labels', () => {
|
|
33
|
+
const [queryMetrics] = computePrecisionRecallAtK(
|
|
34
|
+
[
|
|
35
|
+
{
|
|
36
|
+
queryId: 'q-graded',
|
|
37
|
+
expectedIds: ['a', 'b'],
|
|
38
|
+
expectedRelevance: { a: 3, b: 1 },
|
|
39
|
+
retrievedIds: ['b', 'a', 'noise']
|
|
40
|
+
}
|
|
41
|
+
],
|
|
42
|
+
[2]
|
|
43
|
+
);
|
|
44
|
+
|
|
45
|
+
expect(queryMetrics.at[2]).toMatchObject({ precision: 1, recall: 1, hits: 2 });
|
|
46
|
+
expect(queryMetrics.at[2].ndcg).toBeCloseTo(0.70981, 4);
|
|
47
|
+
});
|
|
48
|
+
|
|
49
|
+
it('deduplicates retrieved ids so replay metrics cannot over-count repeated hits', () => {
|
|
50
|
+
const queryMetrics = computePrecisionRecallAtK(
|
|
51
|
+
[
|
|
52
|
+
{ queryId: 'q-duplicate', expectedIds: ['a'], retrievedIds: ['a', 'a', 'a', 'x'] }
|
|
53
|
+
],
|
|
54
|
+
[1, 3]
|
|
55
|
+
);
|
|
56
|
+
|
|
57
|
+
expect(queryMetrics).toEqual([
|
|
58
|
+
{
|
|
59
|
+
queryId: 'q-duplicate',
|
|
60
|
+
at: {
|
|
61
|
+
1: { precision: 1, recall: 1, hits: 1, ndcg: 1 },
|
|
62
|
+
3: { precision: 1 / 3, recall: 1, hits: 1, ndcg: 1 }
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
]);
|
|
66
|
+
expect(queryMetrics[0].at[3].ndcg).toBe(1);
|
|
67
|
+
});
|
|
68
|
+
|
|
69
|
+
it('normalizes k values without losing zero-result replay rows', () => {
|
|
70
|
+
const queryMetrics = computePrecisionRecallAtK(
|
|
71
|
+
[{ queryId: 'q-empty', expectedIds: ['a'], retrievedIds: [] }],
|
|
72
|
+
[3.9, 1, 1, -2]
|
|
73
|
+
);
|
|
74
|
+
|
|
75
|
+
expect(queryMetrics).toEqual([
|
|
76
|
+
{
|
|
77
|
+
queryId: 'q-empty',
|
|
78
|
+
at: {
|
|
79
|
+
0: { precision: 0, recall: 0, hits: 0, ndcg: 0 },
|
|
80
|
+
1: { precision: 0, recall: 0, hits: 0, ndcg: 0 },
|
|
81
|
+
3: { precision: 0, recall: 0, hits: 0, ndcg: 0 }
|
|
82
|
+
}
|
|
83
|
+
}
|
|
84
|
+
]);
|
|
85
|
+
|
|
86
|
+
expect(summarizeReplayMetrics(queryMetrics, [3.9, 1, 1, -2])).toEqual({
|
|
87
|
+
queryCount: 1,
|
|
88
|
+
precisionAtK: { 0: 0, 1: 0, 3: 0 },
|
|
89
|
+
recallAtK: { 0: 0, 1: 0, 3: 0 },
|
|
90
|
+
ndcgAtK: { 0: 0, 1: 0, 3: 0 }
|
|
91
|
+
});
|
|
92
|
+
});
|
|
93
|
+
});
|