claude-memory-layer 1.0.30 → 1.0.32
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +12 -5
- package/dist/cli/index.js +4 -3
- package/dist/cli/index.js.map +2 -2
- package/dist/core/index.js +3 -2
- package/dist/core/index.js.map +2 -2
- package/dist/hooks/post-tool-use.js +3 -2
- package/dist/hooks/post-tool-use.js.map +2 -2
- package/dist/hooks/semantic-daemon.js +3 -2
- package/dist/hooks/semantic-daemon.js.map +2 -2
- package/dist/hooks/session-end.js +3 -2
- package/dist/hooks/session-end.js.map +2 -2
- package/dist/hooks/session-start.js +3 -2
- package/dist/hooks/session-start.js.map +2 -2
- package/dist/hooks/stop.js +3 -2
- package/dist/hooks/stop.js.map +2 -2
- package/dist/hooks/user-prompt-submit.js +3 -2
- package/dist/hooks/user-prompt-submit.js.map +2 -2
- package/dist/index.js +3 -2
- package/dist/index.js.map +2 -2
- package/dist/mcp/index.js +3 -2
- package/dist/mcp/index.js.map +2 -2
- package/dist/server/api/index.js +3 -2
- package/dist/server/api/index.js.map +2 -2
- package/dist/server/index.js +3 -2
- package/dist/server/index.js.map +2 -2
- package/dist/services/memory-service.js +3 -2
- package/dist/services/memory-service.js.map +2 -2
- package/package.json +10 -3
- package/scripts/postinstall-embedding-backend.cjs +18 -16
- package/AGENTS.md +0 -71
- package/CLAUDE.md +0 -30
- package/HANDOFF.md +0 -92
- package/Memo.txt +0 -558
- package/benchmarks/replay/anonymized-real-sessions.json +0 -48
- package/config/kpi-thresholds.json +0 -7
- package/context.md +0 -636
- package/docs/ARCHITECTURE_COMPARISON_AND_RECOMMENDATIONS.md +0 -627
- package/docs/HERMES_MEMORY_INGESTION_ANALYSIS.md +0 -440
- package/docs/MCP_MEMORY_SERVICE_COMPARATIVE_REVIEW.md +0 -271
- package/docs/MEMORY_USEFULNESS_AUDIT.md +0 -371
- package/docs/MEMORY_USEFULNESS_AUDIT_RAW.json +0 -80
- package/docs/MEMSEARCH_PROJECT_STRUCTURE_ANALYSIS.md +0 -333
- package/docs/MEMU_ADOPTION.md +0 -40
- package/docs/OPERATIONS.md +0 -18
- package/docs/PRODUCT_VALIDATION_MATRIX.md +0 -82
- package/docs/PROJECT_STRUCTURE_ANALYSIS.md +0 -421
- package/docs/REFACTORING_MILESTONES_AND_ISSUES.md +0 -501
- package/docs/REFACTORING_PLAN_THIN_CORE.md +0 -414
- package/docs/REFERENCE_PROJECT_ANALYSES.md +0 -25
- package/docs/SUPERLOCALMEMORY_PROJECT_STRUCTURE_ANALYSIS.md +0 -452
- package/docs/TARGET_ARCHITECTURE_AND_FOLDER_STRUCTURE.md +0 -446
- package/docs/architecture/comparison-index.md +0 -47
- package/docs/reports/codex-real-data-validation-20260505T040447Z.md +0 -46
- package/plan.md +0 -1642
- package/scripts/build.ts +0 -159
- package/scripts/bump-patch-version.sh +0 -18
- package/scripts/delete-unknown-projects.js +0 -154
- package/scripts/fix-sync-gap.js +0 -32
- package/scripts/generate-session-qrels.ts +0 -126
- package/scripts/heartbeat-memory-orchestrator.sh +0 -28
- package/scripts/replay-retrieval-benchmark.ts +0 -69
- package/scripts/report-sync-gap.js +0 -26
- package/scripts/review-queue-auto-resolve.js +0 -21
- package/scripts/sync-gap-auto-heal.sh +0 -17
- package/spec.md +0 -624
- package/specs/20260207-dashboard-upgrade/context.md +0 -38
- package/specs/20260207-dashboard-upgrade/spec.md +0 -96
- package/specs/citations-system/context.md +0 -243
- package/specs/citations-system/plan.md +0 -495
- package/specs/citations-system/spec.md +0 -371
- package/specs/endless-mode/context.md +0 -305
- package/specs/endless-mode/plan.md +0 -620
- package/specs/endless-mode/spec.md +0 -455
- package/specs/entity-edge-model/context.md +0 -401
- package/specs/entity-edge-model/plan.md +0 -459
- package/specs/entity-edge-model/spec.md +0 -391
- package/specs/evidence-aligner-v2/context.md +0 -401
- package/specs/evidence-aligner-v2/plan.md +0 -303
- package/specs/evidence-aligner-v2/spec.md +0 -312
- package/specs/mcp-desktop-integration/context.md +0 -278
- package/specs/mcp-desktop-integration/plan.md +0 -550
- package/specs/mcp-desktop-integration/spec.md +0 -494
- package/specs/memory-utilization-improvements/context.md +0 -145
- package/specs/memory-utilization-improvements/plan.md +0 -361
- package/specs/memory-utilization-improvements/spec.md +0 -361
- package/specs/post-tool-use-hook/context.md +0 -319
- package/specs/post-tool-use-hook/plan.md +0 -469
- package/specs/post-tool-use-hook/spec.md +0 -364
- package/specs/private-tags/context.md +0 -288
- package/specs/private-tags/plan.md +0 -412
- package/specs/private-tags/spec.md +0 -345
- package/specs/progressive-disclosure/context.md +0 -346
- package/specs/progressive-disclosure/plan.md +0 -663
- package/specs/progressive-disclosure/spec.md +0 -415
- package/specs/selective-tool-observation/context.md +0 -100
- package/specs/selective-tool-observation/plan.md +0 -158
- package/specs/selective-tool-observation/spec.md +0 -127
- package/specs/task-entity-system/context.md +0 -297
- package/specs/task-entity-system/plan.md +0 -301
- package/specs/task-entity-system/spec.md +0 -314
- package/specs/thin-core-refactor/context.md +0 -275
- package/specs/thin-core-refactor/plan.md +0 -536
- package/specs/thin-core-refactor/spec.md +0 -465
- package/specs/vector-outbox-v2/context.md +0 -470
- package/specs/vector-outbox-v2/plan.md +0 -562
- package/specs/vector-outbox-v2/spec.md +0 -466
- package/specs/web-viewer-ui/context.md +0 -384
- package/specs/web-viewer-ui/plan.md +0 -797
- package/specs/web-viewer-ui/spec.md +0 -516
- package/src/adapters/claude/capture/index.ts +0 -3
- package/src/adapters/claude/context/index.ts +0 -3
- package/src/adapters/claude/hooks/index.ts +0 -21
- package/src/adapters/claude/hooks/post-tool-use.ts +0 -239
- package/src/adapters/claude/hooks/prompt-injection-policy.ts +0 -104
- package/src/adapters/claude/hooks/semantic-daemon-client.ts +0 -209
- package/src/adapters/claude/hooks/semantic-daemon.ts +0 -283
- package/src/adapters/claude/hooks/session-end.ts +0 -59
- package/src/adapters/claude/hooks/session-start.ts +0 -73
- package/src/adapters/claude/hooks/stop.ts +0 -128
- package/src/adapters/claude/hooks/user-prompt-submit.ts +0 -361
- package/src/adapters/claude/index.ts +0 -4
- package/src/adapters/claude/transcript/index.ts +0 -4
- package/src/adapters/claude/transcript/transcript-reader.ts +0 -57
- package/src/adapters/claude/transcript/turn-reconstructor.ts +0 -65
- package/src/apps/cli/claude-settings-hooks.ts +0 -138
- package/src/apps/cli/codex-import-runner.ts +0 -125
- package/src/apps/cli/codex-validation-output.ts +0 -95
- package/src/apps/cli/hermes-import-runner.ts +0 -130
- package/src/apps/cli/hermes-validation-output.ts +0 -91
- package/src/apps/cli/index.ts +0 -1735
- package/src/apps/cli/mcp-install.ts +0 -106
- package/src/apps/cli/retrieval-disclosure-output.ts +0 -196
- package/src/apps/dashboard/assets/js/bootstrap.js +0 -244
- package/src/apps/dashboard/assets/js/chat.js +0 -373
- package/src/apps/dashboard/assets/js/disclosure.js +0 -232
- package/src/apps/dashboard/assets/js/modals.js +0 -298
- package/src/apps/dashboard/assets/js/overview.js +0 -655
- package/src/apps/dashboard/assets/js/state.js +0 -72
- package/src/apps/dashboard/assets/js/views.js +0 -468
- package/src/apps/dashboard/index.html +0 -543
- package/src/apps/dashboard/index.ts +0 -3
- package/src/apps/dashboard/style.css +0 -1750
- package/src/apps/index.ts +0 -5
- package/src/apps/server/api/chat.ts +0 -244
- package/src/apps/server/api/citations.ts +0 -105
- package/src/apps/server/api/events.ts +0 -137
- package/src/apps/server/api/health.ts +0 -53
- package/src/apps/server/api/index.ts +0 -26
- package/src/apps/server/api/projects.ts +0 -74
- package/src/apps/server/api/search.ts +0 -184
- package/src/apps/server/api/sessions.ts +0 -115
- package/src/apps/server/api/stats.ts +0 -723
- package/src/apps/server/api/turns.ts +0 -143
- package/src/apps/server/api/utils.ts +0 -65
- package/src/apps/server/index.ts +0 -111
- package/src/cli/index.ts +0 -3
- package/src/cli/retrieval-disclosure-output.ts +0 -2
- package/src/compat/index.ts +0 -5
- package/src/core/canonical-key.ts +0 -186
- package/src/core/citation-generator.ts +0 -63
- package/src/core/consolidated-store.ts +0 -356
- package/src/core/consolidation-worker.ts +0 -493
- package/src/core/context-formatter.ts +0 -276
- package/src/core/continuity-manager.ts +0 -341
- package/src/core/db-wrapper.ts +0 -64
- package/src/core/derive/fact-deriver.ts +0 -170
- package/src/core/derive/index.ts +0 -2
- package/src/core/derive/summary-deriver.ts +0 -76
- package/src/core/edge-repo.ts +0 -333
- package/src/core/embedder.ts +0 -4
- package/src/core/engine/embedding-maintenance-service.ts +0 -187
- package/src/core/engine/endless-memory-services.ts +0 -4
- package/src/core/engine/index.ts +0 -19
- package/src/core/engine/memory-engine-services.ts +0 -170
- package/src/core/engine/memory-ingest-service.ts +0 -317
- package/src/core/engine/memory-query-service.ts +0 -173
- package/src/core/engine/memory-runtime-service.ts +0 -162
- package/src/core/engine/memory-service-composition.ts +0 -231
- package/src/core/engine/retrieval-analytics-service.ts +0 -181
- package/src/core/engine/retrieval-disclosure-service.ts +0 -420
- package/src/core/engine/retrieval-orchestrator.ts +0 -377
- package/src/core/engine/retrieval-services.ts +0 -176
- package/src/core/engine/shared-memory-services.ts +0 -4
- package/src/core/entity-repo.ts +0 -349
- package/src/core/event-store.ts +0 -779
- package/src/core/evidence-aligner.ts +0 -635
- package/src/core/external-market-context.ts +0 -582
- package/src/core/graduation-worker.ts +0 -171
- package/src/core/graduation.ts +0 -377
- package/src/core/index.ts +0 -64
- package/src/core/ingest-interceptor.ts +0 -80
- package/src/core/markdown-mirror.ts +0 -70
- package/src/core/matcher.ts +0 -208
- package/src/core/md-mirror.ts +0 -92
- package/src/core/metadata-extractor.ts +0 -203
- package/src/core/model/memory-fact.ts +0 -30
- package/src/core/model/memory-rule.ts +0 -14
- package/src/core/model/memory-summary.ts +0 -21
- package/src/core/model/raw-event.ts +0 -28
- package/src/core/model/retrieval-result.ts +0 -35
- package/src/core/mongo-sync-config.ts +0 -165
- package/src/core/mongo-sync-worker.ts +0 -381
- package/src/core/privacy/filter.ts +0 -190
- package/src/core/privacy/index.ts +0 -20
- package/src/core/privacy/tag-parser.ts +0 -145
- package/src/core/product-validation-matrix.ts +0 -314
- package/src/core/progressive-retriever.ts +0 -414
- package/src/core/registry/project-path.ts +0 -54
- package/src/core/registry/session-registry.ts +0 -69
- package/src/core/replay-evaluator.ts +0 -625
- package/src/core/retrieval-benchmark.ts +0 -117
- package/src/core/retrieval-quality.ts +0 -109
- package/src/core/retriever.ts +0 -800
- package/src/core/session-qrels.ts +0 -360
- package/src/core/shared-event-store.ts +0 -114
- package/src/core/shared-promoter.ts +0 -249
- package/src/core/shared-store.ts +0 -289
- package/src/core/shared-vector-store.ts +0 -203
- package/src/core/sqlite-event-store.ts +0 -1846
- package/src/core/sqlite-wrapper.ts +0 -116
- package/src/core/sync-worker.ts +0 -228
- package/src/core/tag-taxonomy.ts +0 -51
- package/src/core/task/blocker-resolver.ts +0 -333
- package/src/core/task/index.ts +0 -9
- package/src/core/task/task-matcher.ts +0 -240
- package/src/core/task/task-projector.ts +0 -358
- package/src/core/task/task-resolver.ts +0 -421
- package/src/core/turn-state.ts +0 -207
- package/src/core/types.ts +0 -952
- package/src/core/vector-outbox.ts +0 -299
- package/src/core/vector-store.ts +0 -231
- package/src/core/vector-worker.ts +0 -521
- package/src/core/working-set-store.ts +0 -257
- package/src/extensions/endless-memory/endless-memory-services.ts +0 -350
- package/src/extensions/endless-memory/index.ts +0 -1
- package/src/extensions/index.ts +0 -5
- package/src/extensions/mcp/handlers.ts +0 -960
- package/src/extensions/mcp/index.ts +0 -48
- package/src/extensions/mcp/tools.ts +0 -252
- package/src/extensions/shared-memory/index.ts +0 -1
- package/src/extensions/shared-memory/shared-memory-services.ts +0 -211
- package/src/extensions/vector/embedder.ts +0 -233
- package/src/extensions/vector/index.ts +0 -1
- package/src/hooks/post-tool-use.ts +0 -9
- package/src/hooks/semantic-daemon-client.ts +0 -1
- package/src/hooks/semantic-daemon.ts +0 -11
- package/src/hooks/session-end.ts +0 -9
- package/src/hooks/session-start.ts +0 -9
- package/src/hooks/stop.ts +0 -9
- package/src/hooks/user-prompt-submit.ts +0 -9
- package/src/index.ts +0 -13
- package/src/mcp/handlers.ts +0 -2
- package/src/mcp/index.ts +0 -4
- package/src/mcp/tools.ts +0 -2
- package/src/server/api/chat.ts +0 -2
- package/src/server/api/citations.ts +0 -2
- package/src/server/api/events.ts +0 -2
- package/src/server/api/health.ts +0 -2
- package/src/server/api/index.ts +0 -2
- package/src/server/api/projects.ts +0 -2
- package/src/server/api/search.ts +0 -2
- package/src/server/api/sessions.ts +0 -2
- package/src/server/api/stats.ts +0 -2
- package/src/server/api/turns.ts +0 -2
- package/src/server/api/utils.ts +0 -2
- package/src/server/index.ts +0 -2
- package/src/services/bootstrap-organizer.ts +0 -463
- package/src/services/codex-session-history-importer.ts +0 -966
- package/src/services/hermes-session-history-importer.ts +0 -733
- package/src/services/memory-service-config.ts +0 -36
- package/src/services/memory-service-registry.ts +0 -150
- package/src/services/memory-service.ts +0 -688
- package/src/services/session-history-importer.ts +0 -629
- package/tests/README.md +0 -23
- package/tests/adapters/claude/claude-semantic-daemon-adapter.test.ts +0 -54
- package/tests/adapters/claude/claude-transcript-reconstructor.test.ts +0 -98
- package/tests/adapters/claude-hook-prompt-injection-policy.test.ts +0 -99
- package/tests/apps/app-layer-boundary.test.ts +0 -48
- package/tests/apps/claude-settings-hooks.test.ts +0 -107
- package/tests/apps/cli-disclosure-output.test.ts +0 -212
- package/tests/apps/codex-import-runner.test.ts +0 -99
- package/tests/apps/codex-validation-output.test.ts +0 -100
- package/tests/apps/hermes-import-runner.test.ts +0 -99
- package/tests/apps/mcp-install-command.test.ts +0 -59
- package/tests/apps/package-build-entrypoints.test.ts +0 -30
- package/tests/apps/postinstall-embedding-backend.test.ts +0 -175
- package/tests/apps/search-api-disclosure.test.ts +0 -162
- package/tests/apps/stats-api-lightweight.test.ts +0 -67
- package/tests/apps/ui-disclosure-output.test.ts +0 -140
- package/tests/core/bootstrap-organizer.test.ts +0 -111
- package/tests/core/canonical-key.test.ts +0 -101
- package/tests/core/codex-session-history-importer-validation.test.ts +0 -185
- package/tests/core/consolidation-worker.test.ts +0 -75
- package/tests/core/embedding-maintenance-service.test.ts +0 -282
- package/tests/core/evidence-aligner.test.ts +0 -152
- package/tests/core/external-market-context.test.ts +0 -209
- package/tests/core/fact-deriver.test.ts +0 -79
- package/tests/core/hermes-session-history-importer-validation.test.ts +0 -609
- package/tests/core/ingest-interceptor.test.ts +0 -38
- package/tests/core/markdown-mirror.test.ts +0 -85
- package/tests/core/matcher.test.ts +0 -112
- package/tests/core/md-mirror.test.ts +0 -50
- package/tests/core/memory-engine-services.test.ts +0 -240
- package/tests/core/memory-ingest-service.test.ts +0 -296
- package/tests/core/memory-query-service.test.ts +0 -129
- package/tests/core/memory-runtime-service.test.ts +0 -201
- package/tests/core/memory-service-composition.test.ts +0 -192
- package/tests/core/memory-service-config.test.ts +0 -41
- package/tests/core/memory-service-facade.test.ts +0 -30
- package/tests/core/memory-service-registry.test.ts +0 -206
- package/tests/core/product-validation-matrix.test.ts +0 -61
- package/tests/core/project-registry.test.ts +0 -78
- package/tests/core/replay-evaluator.test.ts +0 -181
- package/tests/core/retrieval-analytics-service.test.ts +0 -210
- package/tests/core/retrieval-benchmark.test.ts +0 -93
- package/tests/core/retrieval-disclosure-service.test.ts +0 -264
- package/tests/core/retrieval-orchestrator.test.ts +0 -403
- package/tests/core/retrieval-quality.test.ts +0 -31
- package/tests/core/retrieval-services.test.ts +0 -185
- package/tests/core/retriever-fallback-chain.test.ts +0 -223
- package/tests/core/retriever-strategy-scope.test.ts +0 -164
- package/tests/core/retriever.memu-adoption.test.ts +0 -122
- package/tests/core/session-history-importer-filter.test.ts +0 -78
- package/tests/core/session-qrels.test.ts +0 -250
- package/tests/core/sqlite-event-store-replication.test.ts +0 -127
- package/tests/core/summary-deriver.test.ts +0 -66
- package/tests/extensions/embedder-warning-suppression.test.ts +0 -83
- package/tests/extensions/endless-memory-extension-boundary.test.ts +0 -17
- package/tests/extensions/endless-memory-services.test.ts +0 -325
- package/tests/extensions/mcp-context-tools.test.ts +0 -905
- package/tests/extensions/mcp-extension-boundary.test.ts +0 -21
- package/tests/extensions/mcp-package-build.test.ts +0 -22
- package/tests/extensions/mcp-project-aware-tools.test.ts +0 -102
- package/tests/extensions/shared-memory-extension-boundary.test.ts +0 -24
- package/tests/extensions/shared-memory-services.test.ts +0 -309
- package/tests/extensions/vector-extension-boundary.test.ts +0 -21
- package/tsconfig.json +0 -24
- package/vitest.config.ts +0 -15
|
@@ -1,181 +0,0 @@
|
|
|
1
|
-
import { describe, expect, it } from 'vitest';
|
|
2
|
-
|
|
3
|
-
import {
|
|
4
|
-
evaluateReplayFixture,
|
|
5
|
-
formatReplayEvaluationMarkdown,
|
|
6
|
-
type ReplayRetrievalRunner
|
|
7
|
-
} from '../../src/core/replay-evaluator.js';
|
|
8
|
-
|
|
9
|
-
const fixture = {
|
|
10
|
-
name: 'private-real-session-qrels',
|
|
11
|
-
description: 'contains raw real session text that reports must not leak',
|
|
12
|
-
ks: [1, 3],
|
|
13
|
-
queries: [
|
|
14
|
-
{
|
|
15
|
-
queryId: 'q-secret-1',
|
|
16
|
-
query: 'SECRET vector search recall regression',
|
|
17
|
-
expectedIds: ['m-secret-1'],
|
|
18
|
-
expectedRelevance: { 'm-secret-1': 2 }
|
|
19
|
-
}
|
|
20
|
-
],
|
|
21
|
-
memories: [
|
|
22
|
-
{
|
|
23
|
-
id: 'm-secret-1',
|
|
24
|
-
content: 'SECRET vector search recall regression fix uses retriever pipeline replay'
|
|
25
|
-
},
|
|
26
|
-
{
|
|
27
|
-
id: 'm-noise',
|
|
28
|
-
content: 'unrelated dashboard layout memory'
|
|
29
|
-
}
|
|
30
|
-
]
|
|
31
|
-
};
|
|
32
|
-
|
|
33
|
-
describe('replay fixture evaluator', () => {
|
|
34
|
-
it('evaluates through the retriever pipeline runner and returns a sanitized report', async () => {
|
|
35
|
-
const calls: Array<{ query: string; queryId: string; topK: number }> = [];
|
|
36
|
-
const retrievalRunner: ReplayRetrievalRunner = async (query, input) => {
|
|
37
|
-
calls.push({ query, queryId: input.query.queryId, topK: input.topK });
|
|
38
|
-
return {
|
|
39
|
-
retrievedIds: ['m-secret-1', 'm-noise'],
|
|
40
|
-
candidateIds: ['m-secret-1', 'm-noise'],
|
|
41
|
-
confidence: 'high',
|
|
42
|
-
fallbackTrace: ['stage:primary:fast']
|
|
43
|
-
};
|
|
44
|
-
};
|
|
45
|
-
|
|
46
|
-
const report = await evaluateReplayFixture(fixture, {
|
|
47
|
-
generatedAt: '2026-05-05T00:00:00.000Z',
|
|
48
|
-
retrievalRunner
|
|
49
|
-
});
|
|
50
|
-
const serialized = JSON.stringify(report);
|
|
51
|
-
|
|
52
|
-
expect(calls).toEqual([
|
|
53
|
-
{ query: 'SECRET vector search recall regression', queryId: 'q-secret-1', topK: 3 }
|
|
54
|
-
]);
|
|
55
|
-
expect(report).toMatchObject({
|
|
56
|
-
name: 'private-real-session-qrels',
|
|
57
|
-
evaluator: 'retriever-pipeline-v1',
|
|
58
|
-
generatedAt: '2026-05-05T00:00:00.000Z',
|
|
59
|
-
fixtureStats: {
|
|
60
|
-
queryCount: 1,
|
|
61
|
-
memoryCount: 2,
|
|
62
|
-
ks: [1, 3]
|
|
63
|
-
},
|
|
64
|
-
summary: {
|
|
65
|
-
queryCount: 1,
|
|
66
|
-
precisionAtK: { 1: 1, 3: 1 / 3 },
|
|
67
|
-
recallAtK: { 1: 1, 3: 1 },
|
|
68
|
-
ndcgAtK: { 1: 1, 3: 1 },
|
|
69
|
-
hitAtK: { 1: 1, 3: 1 },
|
|
70
|
-
mrr: 1,
|
|
71
|
-
failedQueryCount: 0
|
|
72
|
-
}
|
|
73
|
-
});
|
|
74
|
-
expect(report.perQuery).toEqual([
|
|
75
|
-
{
|
|
76
|
-
queryId: 'q-secret-1',
|
|
77
|
-
retrievedIds: ['m-secret-1', 'm-noise'],
|
|
78
|
-
candidateIds: ['m-secret-1', 'm-noise'],
|
|
79
|
-
confidence: 'high',
|
|
80
|
-
fallbackTrace: ['stage:primary:fast'],
|
|
81
|
-
reciprocalRank: 1,
|
|
82
|
-
at: {
|
|
83
|
-
1: { precision: 1, recall: 1, hits: 1, ndcg: 1 },
|
|
84
|
-
3: { precision: 1 / 3, recall: 1, hits: 1, ndcg: 1 }
|
|
85
|
-
}
|
|
86
|
-
}
|
|
87
|
-
]);
|
|
88
|
-
expect(serialized).not.toContain('SECRET');
|
|
89
|
-
expect(serialized).not.toContain('vector search recall regression');
|
|
90
|
-
});
|
|
91
|
-
|
|
92
|
-
it('uses the real in-memory Retriever/RetrievalOrchestrator pipeline by default', async () => {
|
|
93
|
-
const report = await evaluateReplayFixture(fixture, {
|
|
94
|
-
generatedAt: '2026-05-05T00:00:00.000Z',
|
|
95
|
-
retrievalOptions: { strategy: 'fast' }
|
|
96
|
-
});
|
|
97
|
-
|
|
98
|
-
expect(report.evaluator).toBe('retriever-pipeline-v1');
|
|
99
|
-
expect(report.perQuery[0]).toMatchObject({
|
|
100
|
-
queryId: 'q-secret-1',
|
|
101
|
-
retrievedIds: expect.arrayContaining(['m-secret-1']),
|
|
102
|
-
candidateIds: expect.arrayContaining(['m-secret-1']),
|
|
103
|
-
fallbackTrace: expect.arrayContaining(['stage:primary:fast'])
|
|
104
|
-
});
|
|
105
|
-
expect(report.summary.hitAtK[1]).toBe(1);
|
|
106
|
-
});
|
|
107
|
-
|
|
108
|
-
it('counts no-match qrels separately from positive retrieval misses', async () => {
|
|
109
|
-
const report = await evaluateReplayFixture({
|
|
110
|
-
name: 'negative-qrels-fixture',
|
|
111
|
-
ks: [1, 3],
|
|
112
|
-
queries: [
|
|
113
|
-
{
|
|
114
|
-
queryId: 'q-positive',
|
|
115
|
-
query: 'retriever pipeline replay answer',
|
|
116
|
-
expectation: 'match',
|
|
117
|
-
expectedIds: ['m-positive'],
|
|
118
|
-
expectedRelevance: { 'm-positive': 2 },
|
|
119
|
-
knownAnswer: 'Retriever pipeline replay answer should be found.'
|
|
120
|
-
},
|
|
121
|
-
{
|
|
122
|
-
queryId: 'q-command-artifact-no-match',
|
|
123
|
-
query: 'local-command-stdout command-name opus',
|
|
124
|
-
expectation: 'no_match',
|
|
125
|
-
expectedIds: [],
|
|
126
|
-
expectedRelevance: {},
|
|
127
|
-
forbiddenIds: ['m-positive']
|
|
128
|
-
}
|
|
129
|
-
],
|
|
130
|
-
memories: [
|
|
131
|
-
{
|
|
132
|
-
id: 'm-positive',
|
|
133
|
-
content: 'Retriever pipeline replay answer should be found.'
|
|
134
|
-
}
|
|
135
|
-
]
|
|
136
|
-
}, {
|
|
137
|
-
generatedAt: '2026-05-05T00:00:00.000Z',
|
|
138
|
-
retrievalOptions: { strategy: 'auto' }
|
|
139
|
-
});
|
|
140
|
-
|
|
141
|
-
expect(report.summary).toMatchObject({
|
|
142
|
-
queryCount: 2,
|
|
143
|
-
positiveQueryCount: 1,
|
|
144
|
-
noMatchQueryCount: 1,
|
|
145
|
-
noMatchCorrect: 1,
|
|
146
|
-
noMatchAccuracy: 1,
|
|
147
|
-
failedQueryCount: 0,
|
|
148
|
-
precisionAtK: { 1: 1, 3: 1 / 3 },
|
|
149
|
-
recallAtK: { 1: 1, 3: 1 },
|
|
150
|
-
hitAtK: { 1: 1, 3: 1 }
|
|
151
|
-
});
|
|
152
|
-
expect(report.perQuery[1]).toMatchObject({
|
|
153
|
-
queryId: 'q-command-artifact-no-match',
|
|
154
|
-
expectation: 'no_match',
|
|
155
|
-
retrievedIds: [],
|
|
156
|
-
forbiddenHitIds: [],
|
|
157
|
-
noMatchSatisfied: true,
|
|
158
|
-
confidence: 'none'
|
|
159
|
-
});
|
|
160
|
-
});
|
|
161
|
-
|
|
162
|
-
it('formats markdown reports without raw query or memory content', async () => {
|
|
163
|
-
const report = await evaluateReplayFixture(fixture, {
|
|
164
|
-
generatedAt: '2026-05-05T00:00:00.000Z',
|
|
165
|
-
includePerQuery: false
|
|
166
|
-
});
|
|
167
|
-
|
|
168
|
-
const markdown = formatReplayEvaluationMarkdown(report, {
|
|
169
|
-
qrelsPath: '.claude-memory/benchmarks/real-session-qrels.json'
|
|
170
|
-
});
|
|
171
|
-
|
|
172
|
-
expect(markdown).toContain('# Retrieval Replay Benchmark Report');
|
|
173
|
-
expect(markdown).toContain('private-real-session-qrels');
|
|
174
|
-
expect(markdown).toContain('nDCG@1');
|
|
175
|
-
expect(markdown).toContain('Hit@1');
|
|
176
|
-
expect(markdown).toContain('MRR');
|
|
177
|
-
expect(markdown).toContain('.claude-memory/benchmarks/real-session-qrels.json');
|
|
178
|
-
expect(markdown).not.toContain('SECRET');
|
|
179
|
-
expect(markdown).not.toContain('vector search recall regression');
|
|
180
|
-
});
|
|
181
|
-
});
|
|
@@ -1,210 +0,0 @@
|
|
|
1
|
-
import { mkdtempSync, rmSync } from 'node:fs';
|
|
2
|
-
import { tmpdir } from 'node:os';
|
|
3
|
-
import { join } from 'node:path';
|
|
4
|
-
import { afterEach, describe, expect, it } from 'vitest';
|
|
5
|
-
import { RetrievalAnalyticsService } from '../../src/core/engine/retrieval-analytics-service.js';
|
|
6
|
-
import type {
|
|
7
|
-
RetrievalAnalyticsStore,
|
|
8
|
-
RetrievalTrace
|
|
9
|
-
} from '../../src/core/engine/retrieval-analytics-service.js';
|
|
10
|
-
import type { MemoryEvent } from '../../src/core/types.js';
|
|
11
|
-
|
|
12
|
-
const tempDirs: string[] = [];
|
|
13
|
-
|
|
14
|
-
afterEach(() => {
|
|
15
|
-
while (tempDirs.length > 0) {
|
|
16
|
-
const dir = tempDirs.pop();
|
|
17
|
-
if (dir) {
|
|
18
|
-
rmSync(dir, { recursive: true, force: true });
|
|
19
|
-
}
|
|
20
|
-
}
|
|
21
|
-
});
|
|
22
|
-
|
|
23
|
-
function tempStoragePath(): string {
|
|
24
|
-
const dir = mkdtempSync(join(tmpdir(), 'retrieval-analytics-service-'));
|
|
25
|
-
tempDirs.push(dir);
|
|
26
|
-
return dir;
|
|
27
|
-
}
|
|
28
|
-
|
|
29
|
-
function event(
|
|
30
|
-
id: string,
|
|
31
|
-
content: string,
|
|
32
|
-
timestamp = new Date('2026-02-24T00:00:00.000Z')
|
|
33
|
-
): MemoryEvent & { access_count?: number; last_accessed_at?: string } {
|
|
34
|
-
return {
|
|
35
|
-
id,
|
|
36
|
-
sessionId: 's1',
|
|
37
|
-
eventType: 'user_prompt',
|
|
38
|
-
content,
|
|
39
|
-
canonicalKey: `test/${id}`,
|
|
40
|
-
dedupeKey: `s1:${id}`,
|
|
41
|
-
timestamp,
|
|
42
|
-
metadata: {}
|
|
43
|
-
};
|
|
44
|
-
}
|
|
45
|
-
|
|
46
|
-
function baseStore(overrides: Partial<RetrievalAnalyticsStore> = {}): RetrievalAnalyticsStore {
|
|
47
|
-
return {
|
|
48
|
-
getRetrievalTraceStats: async () => ({
|
|
49
|
-
totalQueries: 0,
|
|
50
|
-
avgCandidateCount: 0,
|
|
51
|
-
avgSelectedCount: 0,
|
|
52
|
-
selectionRate: 0
|
|
53
|
-
}),
|
|
54
|
-
getRecentRetrievalTraces: async (_limit = 50) => [],
|
|
55
|
-
getMostAccessed: async (_limit = 10) => [],
|
|
56
|
-
evaluateSessionHelpfulness: async (_sessionId: string) => {},
|
|
57
|
-
getUnevaluatedSessions: async (_currentSessionId: string, _limit = 5) => [],
|
|
58
|
-
getHelpfulMemories: async (_limit = 10) => [],
|
|
59
|
-
getHelpfulnessStats: async () => ({
|
|
60
|
-
avgScore: 0,
|
|
61
|
-
totalEvaluated: 0,
|
|
62
|
-
totalRetrievals: 0,
|
|
63
|
-
helpful: 0,
|
|
64
|
-
neutral: 0,
|
|
65
|
-
unhelpful: 0
|
|
66
|
-
}),
|
|
67
|
-
...overrides
|
|
68
|
-
};
|
|
69
|
-
}
|
|
70
|
-
|
|
71
|
-
describe('RetrievalAnalyticsService', () => {
|
|
72
|
-
it('maps most-accessed events to dashboard memories and extracts topics from content', async () => {
|
|
73
|
-
let initialized = 0;
|
|
74
|
-
let requestedLimit: number | undefined;
|
|
75
|
-
const accessed = event(
|
|
76
|
-
'e1',
|
|
77
|
-
`${'x'.repeat(205)}\n## Thin Core Architecture\nUse **Retrieval Analytics** to keep MemoryService small.`
|
|
78
|
-
);
|
|
79
|
-
accessed.access_count = 3;
|
|
80
|
-
accessed.last_accessed_at = '2026-02-25T00:00:00.000Z';
|
|
81
|
-
const untouched = event('e2', 'plain content without explicit access metadata');
|
|
82
|
-
|
|
83
|
-
const service = new RetrievalAnalyticsService({
|
|
84
|
-
initialize: async () => { initialized += 1; },
|
|
85
|
-
retrievalStore: baseStore({
|
|
86
|
-
getMostAccessed: async (limit = 10) => {
|
|
87
|
-
requestedLimit = limit;
|
|
88
|
-
return [accessed, untouched];
|
|
89
|
-
}
|
|
90
|
-
})
|
|
91
|
-
});
|
|
92
|
-
|
|
93
|
-
const memories = await service.getMostAccessedMemories(7);
|
|
94
|
-
|
|
95
|
-
expect(initialized).toBe(0);
|
|
96
|
-
expect(requestedLimit).toBe(7);
|
|
97
|
-
expect(memories).toHaveLength(2);
|
|
98
|
-
expect(memories[0]).toMatchObject({
|
|
99
|
-
memoryId: 'e1',
|
|
100
|
-
summary: `${'x'.repeat(200)}...`,
|
|
101
|
-
topics: ['Thin Core Architecture', 'Retrieval Analytics'],
|
|
102
|
-
accessCount: 3,
|
|
103
|
-
lastAccessed: '2026-02-25T00:00:00.000Z',
|
|
104
|
-
confidence: 1.0,
|
|
105
|
-
createdAt: accessed.timestamp
|
|
106
|
-
});
|
|
107
|
-
expect(memories[1]).toMatchObject({
|
|
108
|
-
memoryId: 'e2',
|
|
109
|
-
accessCount: 0,
|
|
110
|
-
lastAccessed: null,
|
|
111
|
-
confidence: 1.0,
|
|
112
|
-
createdAt: untouched.timestamp
|
|
113
|
-
});
|
|
114
|
-
});
|
|
115
|
-
|
|
116
|
-
it('evaluates pending sessions best-effort and ignores individual failures', async () => {
|
|
117
|
-
let initialized = 0;
|
|
118
|
-
const evaluated: string[] = [];
|
|
119
|
-
|
|
120
|
-
const service = new RetrievalAnalyticsService({
|
|
121
|
-
initialize: async () => { initialized += 1; },
|
|
122
|
-
retrievalStore: baseStore({
|
|
123
|
-
getUnevaluatedSessions: async (currentSessionId: string, limit = 5) => {
|
|
124
|
-
expect(currentSessionId).toBe('current-session');
|
|
125
|
-
expect(limit).toBe(5);
|
|
126
|
-
return ['ok-1', 'fails', 'ok-2'];
|
|
127
|
-
},
|
|
128
|
-
evaluateSessionHelpfulness: async (sessionId: string) => {
|
|
129
|
-
if (sessionId === 'fails') {
|
|
130
|
-
throw new Error('transient evaluation failure');
|
|
131
|
-
}
|
|
132
|
-
evaluated.push(sessionId);
|
|
133
|
-
}
|
|
134
|
-
})
|
|
135
|
-
});
|
|
136
|
-
|
|
137
|
-
await service.evaluatePendingSessions('current-session');
|
|
138
|
-
|
|
139
|
-
expect(initialized).toBe(1);
|
|
140
|
-
expect(evaluated).toEqual(['ok-1', 'ok-2']);
|
|
141
|
-
});
|
|
142
|
-
|
|
143
|
-
it('delegates trace and helpfulness read-model methods after initialization', async () => {
|
|
144
|
-
let initialized = 0;
|
|
145
|
-
const traceStats = {
|
|
146
|
-
totalQueries: 12,
|
|
147
|
-
avgCandidateCount: 4,
|
|
148
|
-
avgSelectedCount: 2,
|
|
149
|
-
selectionRate: 0.5
|
|
150
|
-
};
|
|
151
|
-
const traceRows = [{
|
|
152
|
-
traceId: 't1',
|
|
153
|
-
sessionId: 's1',
|
|
154
|
-
projectHash: 'project-hash',
|
|
155
|
-
queryText: 'thin core',
|
|
156
|
-
strategy: 'auto',
|
|
157
|
-
candidateEventIds: ['e1', 'e2'],
|
|
158
|
-
selectedEventIds: ['e1'],
|
|
159
|
-
candidateDetails: [{ eventId: 'e1', score: 0.9 }],
|
|
160
|
-
selectedDetails: [{ eventId: 'e1', score: 0.9 }],
|
|
161
|
-
candidateCount: 2,
|
|
162
|
-
selectedCount: 1,
|
|
163
|
-
confidence: 'high',
|
|
164
|
-
fallbackTrace: ['stage:primary:deep'],
|
|
165
|
-
createdAt: new Date('2026-02-24T01:00:00.000Z')
|
|
166
|
-
}] satisfies RetrievalTrace[];
|
|
167
|
-
const helpfulMemories = [{
|
|
168
|
-
eventId: 'e1',
|
|
169
|
-
summary: 'helpful memory',
|
|
170
|
-
helpfulnessScore: 0.8,
|
|
171
|
-
accessCount: 4,
|
|
172
|
-
evaluationCount: 2
|
|
173
|
-
}];
|
|
174
|
-
const helpfulnessStats = {
|
|
175
|
-
avgScore: 0.75,
|
|
176
|
-
totalEvaluated: 8,
|
|
177
|
-
totalRetrievals: 10,
|
|
178
|
-
helpful: 6,
|
|
179
|
-
neutral: 1,
|
|
180
|
-
unhelpful: 1
|
|
181
|
-
};
|
|
182
|
-
const evaluated: string[] = [];
|
|
183
|
-
|
|
184
|
-
const service = new RetrievalAnalyticsService({
|
|
185
|
-
initialize: async () => { initialized += 1; },
|
|
186
|
-
retrievalStore: baseStore({
|
|
187
|
-
getRetrievalTraceStats: async () => traceStats,
|
|
188
|
-
getRecentRetrievalTraces: async (limit = 50) => {
|
|
189
|
-
expect(limit).toBe(3);
|
|
190
|
-
return traceRows;
|
|
191
|
-
},
|
|
192
|
-
evaluateSessionHelpfulness: async (sessionId: string) => { evaluated.push(sessionId); },
|
|
193
|
-
getHelpfulMemories: async (limit = 10) => {
|
|
194
|
-
expect(limit).toBe(2);
|
|
195
|
-
return helpfulMemories;
|
|
196
|
-
},
|
|
197
|
-
getHelpfulnessStats: async () => helpfulnessStats
|
|
198
|
-
})
|
|
199
|
-
});
|
|
200
|
-
|
|
201
|
-
await expect(service.getRetrievalTraceStats()).resolves.toEqual(traceStats);
|
|
202
|
-
await expect(service.getRecentRetrievalTraces(3)).resolves.toEqual(traceRows);
|
|
203
|
-
await service.evaluateSessionHelpfulness('s1');
|
|
204
|
-
await expect(service.getHelpfulMemories(2)).resolves.toEqual(helpfulMemories);
|
|
205
|
-
await expect(service.getHelpfulnessStats()).resolves.toEqual(helpfulnessStats);
|
|
206
|
-
|
|
207
|
-
expect(evaluated).toEqual(['s1']);
|
|
208
|
-
expect(initialized).toBe(5);
|
|
209
|
-
});
|
|
210
|
-
});
|
|
@@ -1,93 +0,0 @@
|
|
|
1
|
-
import { describe, expect, it } from 'vitest';
|
|
2
|
-
|
|
3
|
-
import { computePrecisionRecallAtK, summarizeReplayMetrics } from '../../src/core/retrieval-benchmark.js';
|
|
4
|
-
|
|
5
|
-
describe('retrieval replay benchmark metrics', () => {
|
|
6
|
-
it('computes Precision@k and Recall@k for replay queries', () => {
|
|
7
|
-
const queryMetrics = computePrecisionRecallAtK(
|
|
8
|
-
[
|
|
9
|
-
{ queryId: 'q1', expectedIds: ['a', 'b'], retrievedIds: ['a', 'x', 'b'] },
|
|
10
|
-
{ queryId: 'q2', expectedIds: ['c'], retrievedIds: ['x', 'y', 'z'] }
|
|
11
|
-
],
|
|
12
|
-
[1, 3]
|
|
13
|
-
);
|
|
14
|
-
|
|
15
|
-
expect(queryMetrics[0].at[1]).toMatchObject({ precision: 1, recall: 0.5, hits: 1 });
|
|
16
|
-
expect(queryMetrics[0].at[3]).toMatchObject({ precision: 2 / 3, recall: 1, hits: 2 });
|
|
17
|
-
expect(queryMetrics[0].at[1].ndcg).toBe(1);
|
|
18
|
-
expect(queryMetrics[0].at[3].ndcg).toBeCloseTo(0.91972, 4);
|
|
19
|
-
expect(queryMetrics[1].at[1]).toMatchObject({ precision: 0, recall: 0, hits: 0 });
|
|
20
|
-
expect(queryMetrics[1].at[3]).toMatchObject({ precision: 0, recall: 0, hits: 0 });
|
|
21
|
-
|
|
22
|
-
const summary = summarizeReplayMetrics(queryMetrics, [1, 3]);
|
|
23
|
-
expect(summary).toMatchObject({
|
|
24
|
-
queryCount: 2,
|
|
25
|
-
precisionAtK: { 1: 0.5, 3: 1 / 3 },
|
|
26
|
-
recallAtK: { 1: 0.25, 3: 0.5 }
|
|
27
|
-
});
|
|
28
|
-
expect(summary.ndcgAtK[1]).toBe(0.5);
|
|
29
|
-
expect(summary.ndcgAtK[3]).toBeCloseTo(0.45986, 4);
|
|
30
|
-
});
|
|
31
|
-
|
|
32
|
-
it('computes graded nDCG@k from qrels relevance labels', () => {
|
|
33
|
-
const [queryMetrics] = computePrecisionRecallAtK(
|
|
34
|
-
[
|
|
35
|
-
{
|
|
36
|
-
queryId: 'q-graded',
|
|
37
|
-
expectedIds: ['a', 'b'],
|
|
38
|
-
expectedRelevance: { a: 3, b: 1 },
|
|
39
|
-
retrievedIds: ['b', 'a', 'noise']
|
|
40
|
-
}
|
|
41
|
-
],
|
|
42
|
-
[2]
|
|
43
|
-
);
|
|
44
|
-
|
|
45
|
-
expect(queryMetrics.at[2]).toMatchObject({ precision: 1, recall: 1, hits: 2 });
|
|
46
|
-
expect(queryMetrics.at[2].ndcg).toBeCloseTo(0.70981, 4);
|
|
47
|
-
});
|
|
48
|
-
|
|
49
|
-
it('deduplicates retrieved ids so replay metrics cannot over-count repeated hits', () => {
|
|
50
|
-
const queryMetrics = computePrecisionRecallAtK(
|
|
51
|
-
[
|
|
52
|
-
{ queryId: 'q-duplicate', expectedIds: ['a'], retrievedIds: ['a', 'a', 'a', 'x'] }
|
|
53
|
-
],
|
|
54
|
-
[1, 3]
|
|
55
|
-
);
|
|
56
|
-
|
|
57
|
-
expect(queryMetrics).toEqual([
|
|
58
|
-
{
|
|
59
|
-
queryId: 'q-duplicate',
|
|
60
|
-
at: {
|
|
61
|
-
1: { precision: 1, recall: 1, hits: 1, ndcg: 1 },
|
|
62
|
-
3: { precision: 1 / 3, recall: 1, hits: 1, ndcg: 1 }
|
|
63
|
-
}
|
|
64
|
-
}
|
|
65
|
-
]);
|
|
66
|
-
expect(queryMetrics[0].at[3].ndcg).toBe(1);
|
|
67
|
-
});
|
|
68
|
-
|
|
69
|
-
it('normalizes k values without losing zero-result replay rows', () => {
|
|
70
|
-
const queryMetrics = computePrecisionRecallAtK(
|
|
71
|
-
[{ queryId: 'q-empty', expectedIds: ['a'], retrievedIds: [] }],
|
|
72
|
-
[3.9, 1, 1, -2]
|
|
73
|
-
);
|
|
74
|
-
|
|
75
|
-
expect(queryMetrics).toEqual([
|
|
76
|
-
{
|
|
77
|
-
queryId: 'q-empty',
|
|
78
|
-
at: {
|
|
79
|
-
0: { precision: 0, recall: 0, hits: 0, ndcg: 0 },
|
|
80
|
-
1: { precision: 0, recall: 0, hits: 0, ndcg: 0 },
|
|
81
|
-
3: { precision: 0, recall: 0, hits: 0, ndcg: 0 }
|
|
82
|
-
}
|
|
83
|
-
}
|
|
84
|
-
]);
|
|
85
|
-
|
|
86
|
-
expect(summarizeReplayMetrics(queryMetrics, [3.9, 1, 1, -2])).toEqual({
|
|
87
|
-
queryCount: 1,
|
|
88
|
-
precisionAtK: { 0: 0, 1: 0, 3: 0 },
|
|
89
|
-
recallAtK: { 0: 0, 1: 0, 3: 0 },
|
|
90
|
-
ndcgAtK: { 0: 0, 1: 0, 3: 0 }
|
|
91
|
-
});
|
|
92
|
-
});
|
|
93
|
-
});
|