claude-memory-layer 1.0.31 → 1.0.32

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (313) hide show
  1. package/README.md +9 -2
  2. package/dist/cli/index.js +1 -1
  3. package/package.json +11 -2
  4. package/scripts/postinstall-embedding-backend.cjs +16 -12
  5. package/AGENTS.md +0 -71
  6. package/CLAUDE.md +0 -30
  7. package/HANDOFF.md +0 -92
  8. package/Memo.txt +0 -558
  9. package/benchmarks/replay/anonymized-real-sessions.json +0 -48
  10. package/config/kpi-thresholds.json +0 -7
  11. package/context.md +0 -636
  12. package/docs/ARCHITECTURE_COMPARISON_AND_RECOMMENDATIONS.md +0 -627
  13. package/docs/HERMES_MEMORY_INGESTION_ANALYSIS.md +0 -440
  14. package/docs/MCP_MEMORY_SERVICE_COMPARATIVE_REVIEW.md +0 -271
  15. package/docs/MEMORY_USEFULNESS_AUDIT.md +0 -371
  16. package/docs/MEMORY_USEFULNESS_AUDIT_RAW.json +0 -80
  17. package/docs/MEMSEARCH_PROJECT_STRUCTURE_ANALYSIS.md +0 -333
  18. package/docs/MEMU_ADOPTION.md +0 -40
  19. package/docs/OPERATIONS.md +0 -18
  20. package/docs/PRODUCT_VALIDATION_MATRIX.md +0 -82
  21. package/docs/PROJECT_STRUCTURE_ANALYSIS.md +0 -421
  22. package/docs/REFACTORING_MILESTONES_AND_ISSUES.md +0 -501
  23. package/docs/REFACTORING_PLAN_THIN_CORE.md +0 -414
  24. package/docs/REFERENCE_PROJECT_ANALYSES.md +0 -25
  25. package/docs/SUPERLOCALMEMORY_PROJECT_STRUCTURE_ANALYSIS.md +0 -452
  26. package/docs/TARGET_ARCHITECTURE_AND_FOLDER_STRUCTURE.md +0 -446
  27. package/docs/architecture/comparison-index.md +0 -47
  28. package/docs/reports/codex-real-data-validation-20260505T040447Z.md +0 -46
  29. package/plan.md +0 -1642
  30. package/scripts/build.ts +0 -159
  31. package/scripts/bump-patch-version.sh +0 -18
  32. package/scripts/delete-unknown-projects.js +0 -154
  33. package/scripts/fix-sync-gap.js +0 -32
  34. package/scripts/generate-session-qrels.ts +0 -126
  35. package/scripts/heartbeat-memory-orchestrator.sh +0 -28
  36. package/scripts/replay-retrieval-benchmark.ts +0 -69
  37. package/scripts/report-sync-gap.js +0 -26
  38. package/scripts/review-queue-auto-resolve.js +0 -21
  39. package/scripts/sync-gap-auto-heal.sh +0 -17
  40. package/spec.md +0 -624
  41. package/specs/20260207-dashboard-upgrade/context.md +0 -38
  42. package/specs/20260207-dashboard-upgrade/spec.md +0 -96
  43. package/specs/citations-system/context.md +0 -243
  44. package/specs/citations-system/plan.md +0 -495
  45. package/specs/citations-system/spec.md +0 -371
  46. package/specs/endless-mode/context.md +0 -305
  47. package/specs/endless-mode/plan.md +0 -620
  48. package/specs/endless-mode/spec.md +0 -455
  49. package/specs/entity-edge-model/context.md +0 -401
  50. package/specs/entity-edge-model/plan.md +0 -459
  51. package/specs/entity-edge-model/spec.md +0 -391
  52. package/specs/evidence-aligner-v2/context.md +0 -401
  53. package/specs/evidence-aligner-v2/plan.md +0 -303
  54. package/specs/evidence-aligner-v2/spec.md +0 -312
  55. package/specs/mcp-desktop-integration/context.md +0 -278
  56. package/specs/mcp-desktop-integration/plan.md +0 -550
  57. package/specs/mcp-desktop-integration/spec.md +0 -494
  58. package/specs/memory-utilization-improvements/context.md +0 -145
  59. package/specs/memory-utilization-improvements/plan.md +0 -361
  60. package/specs/memory-utilization-improvements/spec.md +0 -361
  61. package/specs/post-tool-use-hook/context.md +0 -319
  62. package/specs/post-tool-use-hook/plan.md +0 -469
  63. package/specs/post-tool-use-hook/spec.md +0 -364
  64. package/specs/private-tags/context.md +0 -288
  65. package/specs/private-tags/plan.md +0 -412
  66. package/specs/private-tags/spec.md +0 -345
  67. package/specs/progressive-disclosure/context.md +0 -346
  68. package/specs/progressive-disclosure/plan.md +0 -663
  69. package/specs/progressive-disclosure/spec.md +0 -415
  70. package/specs/selective-tool-observation/context.md +0 -100
  71. package/specs/selective-tool-observation/plan.md +0 -158
  72. package/specs/selective-tool-observation/spec.md +0 -127
  73. package/specs/task-entity-system/context.md +0 -297
  74. package/specs/task-entity-system/plan.md +0 -301
  75. package/specs/task-entity-system/spec.md +0 -314
  76. package/specs/thin-core-refactor/context.md +0 -275
  77. package/specs/thin-core-refactor/plan.md +0 -536
  78. package/specs/thin-core-refactor/spec.md +0 -465
  79. package/specs/vector-outbox-v2/context.md +0 -470
  80. package/specs/vector-outbox-v2/plan.md +0 -562
  81. package/specs/vector-outbox-v2/spec.md +0 -466
  82. package/specs/web-viewer-ui/context.md +0 -384
  83. package/specs/web-viewer-ui/plan.md +0 -797
  84. package/specs/web-viewer-ui/spec.md +0 -516
  85. package/src/adapters/claude/capture/index.ts +0 -3
  86. package/src/adapters/claude/context/index.ts +0 -3
  87. package/src/adapters/claude/hooks/index.ts +0 -21
  88. package/src/adapters/claude/hooks/post-tool-use.ts +0 -239
  89. package/src/adapters/claude/hooks/prompt-injection-policy.ts +0 -104
  90. package/src/adapters/claude/hooks/semantic-daemon-client.ts +0 -209
  91. package/src/adapters/claude/hooks/semantic-daemon.ts +0 -283
  92. package/src/adapters/claude/hooks/session-end.ts +0 -59
  93. package/src/adapters/claude/hooks/session-start.ts +0 -73
  94. package/src/adapters/claude/hooks/stop.ts +0 -128
  95. package/src/adapters/claude/hooks/user-prompt-submit.ts +0 -361
  96. package/src/adapters/claude/index.ts +0 -4
  97. package/src/adapters/claude/transcript/index.ts +0 -4
  98. package/src/adapters/claude/transcript/transcript-reader.ts +0 -57
  99. package/src/adapters/claude/transcript/turn-reconstructor.ts +0 -65
  100. package/src/apps/cli/claude-settings-hooks.ts +0 -138
  101. package/src/apps/cli/codex-import-runner.ts +0 -125
  102. package/src/apps/cli/codex-validation-output.ts +0 -95
  103. package/src/apps/cli/hermes-import-runner.ts +0 -130
  104. package/src/apps/cli/hermes-validation-output.ts +0 -91
  105. package/src/apps/cli/index.ts +0 -1735
  106. package/src/apps/cli/mcp-install.ts +0 -106
  107. package/src/apps/cli/retrieval-disclosure-output.ts +0 -196
  108. package/src/apps/dashboard/assets/js/bootstrap.js +0 -244
  109. package/src/apps/dashboard/assets/js/chat.js +0 -373
  110. package/src/apps/dashboard/assets/js/disclosure.js +0 -232
  111. package/src/apps/dashboard/assets/js/modals.js +0 -298
  112. package/src/apps/dashboard/assets/js/overview.js +0 -655
  113. package/src/apps/dashboard/assets/js/state.js +0 -72
  114. package/src/apps/dashboard/assets/js/views.js +0 -468
  115. package/src/apps/dashboard/index.html +0 -543
  116. package/src/apps/dashboard/index.ts +0 -3
  117. package/src/apps/dashboard/style.css +0 -1750
  118. package/src/apps/index.ts +0 -5
  119. package/src/apps/server/api/chat.ts +0 -244
  120. package/src/apps/server/api/citations.ts +0 -105
  121. package/src/apps/server/api/events.ts +0 -137
  122. package/src/apps/server/api/health.ts +0 -53
  123. package/src/apps/server/api/index.ts +0 -26
  124. package/src/apps/server/api/projects.ts +0 -74
  125. package/src/apps/server/api/search.ts +0 -184
  126. package/src/apps/server/api/sessions.ts +0 -115
  127. package/src/apps/server/api/stats.ts +0 -723
  128. package/src/apps/server/api/turns.ts +0 -143
  129. package/src/apps/server/api/utils.ts +0 -65
  130. package/src/apps/server/index.ts +0 -111
  131. package/src/cli/index.ts +0 -3
  132. package/src/cli/retrieval-disclosure-output.ts +0 -2
  133. package/src/compat/index.ts +0 -5
  134. package/src/core/canonical-key.ts +0 -186
  135. package/src/core/citation-generator.ts +0 -63
  136. package/src/core/consolidated-store.ts +0 -356
  137. package/src/core/consolidation-worker.ts +0 -493
  138. package/src/core/context-formatter.ts +0 -276
  139. package/src/core/continuity-manager.ts +0 -341
  140. package/src/core/db-wrapper.ts +0 -64
  141. package/src/core/derive/fact-deriver.ts +0 -170
  142. package/src/core/derive/index.ts +0 -2
  143. package/src/core/derive/summary-deriver.ts +0 -76
  144. package/src/core/edge-repo.ts +0 -333
  145. package/src/core/embedder.ts +0 -4
  146. package/src/core/engine/embedding-maintenance-service.ts +0 -187
  147. package/src/core/engine/endless-memory-services.ts +0 -4
  148. package/src/core/engine/index.ts +0 -19
  149. package/src/core/engine/memory-engine-services.ts +0 -170
  150. package/src/core/engine/memory-ingest-service.ts +0 -317
  151. package/src/core/engine/memory-query-service.ts +0 -173
  152. package/src/core/engine/memory-runtime-service.ts +0 -162
  153. package/src/core/engine/memory-service-composition.ts +0 -231
  154. package/src/core/engine/retrieval-analytics-service.ts +0 -181
  155. package/src/core/engine/retrieval-disclosure-service.ts +0 -420
  156. package/src/core/engine/retrieval-orchestrator.ts +0 -377
  157. package/src/core/engine/retrieval-services.ts +0 -176
  158. package/src/core/engine/shared-memory-services.ts +0 -4
  159. package/src/core/entity-repo.ts +0 -349
  160. package/src/core/event-store.ts +0 -779
  161. package/src/core/evidence-aligner.ts +0 -635
  162. package/src/core/external-market-context.ts +0 -582
  163. package/src/core/graduation-worker.ts +0 -171
  164. package/src/core/graduation.ts +0 -377
  165. package/src/core/index.ts +0 -64
  166. package/src/core/ingest-interceptor.ts +0 -80
  167. package/src/core/markdown-mirror.ts +0 -70
  168. package/src/core/matcher.ts +0 -208
  169. package/src/core/md-mirror.ts +0 -92
  170. package/src/core/metadata-extractor.ts +0 -203
  171. package/src/core/model/memory-fact.ts +0 -30
  172. package/src/core/model/memory-rule.ts +0 -14
  173. package/src/core/model/memory-summary.ts +0 -21
  174. package/src/core/model/raw-event.ts +0 -28
  175. package/src/core/model/retrieval-result.ts +0 -35
  176. package/src/core/mongo-sync-config.ts +0 -165
  177. package/src/core/mongo-sync-worker.ts +0 -381
  178. package/src/core/privacy/filter.ts +0 -190
  179. package/src/core/privacy/index.ts +0 -20
  180. package/src/core/privacy/tag-parser.ts +0 -145
  181. package/src/core/product-validation-matrix.ts +0 -314
  182. package/src/core/progressive-retriever.ts +0 -414
  183. package/src/core/registry/project-path.ts +0 -54
  184. package/src/core/registry/session-registry.ts +0 -69
  185. package/src/core/replay-evaluator.ts +0 -625
  186. package/src/core/retrieval-benchmark.ts +0 -117
  187. package/src/core/retrieval-quality.ts +0 -109
  188. package/src/core/retriever.ts +0 -800
  189. package/src/core/session-qrels.ts +0 -360
  190. package/src/core/shared-event-store.ts +0 -114
  191. package/src/core/shared-promoter.ts +0 -249
  192. package/src/core/shared-store.ts +0 -289
  193. package/src/core/shared-vector-store.ts +0 -203
  194. package/src/core/sqlite-event-store.ts +0 -1846
  195. package/src/core/sqlite-wrapper.ts +0 -116
  196. package/src/core/sync-worker.ts +0 -228
  197. package/src/core/tag-taxonomy.ts +0 -51
  198. package/src/core/task/blocker-resolver.ts +0 -333
  199. package/src/core/task/index.ts +0 -9
  200. package/src/core/task/task-matcher.ts +0 -240
  201. package/src/core/task/task-projector.ts +0 -358
  202. package/src/core/task/task-resolver.ts +0 -421
  203. package/src/core/turn-state.ts +0 -207
  204. package/src/core/types.ts +0 -952
  205. package/src/core/vector-outbox.ts +0 -299
  206. package/src/core/vector-store.ts +0 -231
  207. package/src/core/vector-worker.ts +0 -521
  208. package/src/core/working-set-store.ts +0 -257
  209. package/src/extensions/endless-memory/endless-memory-services.ts +0 -350
  210. package/src/extensions/endless-memory/index.ts +0 -1
  211. package/src/extensions/index.ts +0 -5
  212. package/src/extensions/mcp/handlers.ts +0 -960
  213. package/src/extensions/mcp/index.ts +0 -48
  214. package/src/extensions/mcp/tools.ts +0 -252
  215. package/src/extensions/shared-memory/index.ts +0 -1
  216. package/src/extensions/shared-memory/shared-memory-services.ts +0 -211
  217. package/src/extensions/vector/embedder.ts +0 -234
  218. package/src/extensions/vector/index.ts +0 -1
  219. package/src/hooks/post-tool-use.ts +0 -9
  220. package/src/hooks/semantic-daemon-client.ts +0 -1
  221. package/src/hooks/semantic-daemon.ts +0 -11
  222. package/src/hooks/session-end.ts +0 -9
  223. package/src/hooks/session-start.ts +0 -9
  224. package/src/hooks/stop.ts +0 -9
  225. package/src/hooks/user-prompt-submit.ts +0 -9
  226. package/src/index.ts +0 -13
  227. package/src/mcp/handlers.ts +0 -2
  228. package/src/mcp/index.ts +0 -4
  229. package/src/mcp/tools.ts +0 -2
  230. package/src/server/api/chat.ts +0 -2
  231. package/src/server/api/citations.ts +0 -2
  232. package/src/server/api/events.ts +0 -2
  233. package/src/server/api/health.ts +0 -2
  234. package/src/server/api/index.ts +0 -2
  235. package/src/server/api/projects.ts +0 -2
  236. package/src/server/api/search.ts +0 -2
  237. package/src/server/api/sessions.ts +0 -2
  238. package/src/server/api/stats.ts +0 -2
  239. package/src/server/api/turns.ts +0 -2
  240. package/src/server/api/utils.ts +0 -2
  241. package/src/server/index.ts +0 -2
  242. package/src/services/bootstrap-organizer.ts +0 -463
  243. package/src/services/codex-session-history-importer.ts +0 -966
  244. package/src/services/hermes-session-history-importer.ts +0 -733
  245. package/src/services/memory-service-config.ts +0 -36
  246. package/src/services/memory-service-registry.ts +0 -150
  247. package/src/services/memory-service.ts +0 -688
  248. package/src/services/session-history-importer.ts +0 -629
  249. package/tests/README.md +0 -23
  250. package/tests/adapters/claude/claude-semantic-daemon-adapter.test.ts +0 -54
  251. package/tests/adapters/claude/claude-transcript-reconstructor.test.ts +0 -98
  252. package/tests/adapters/claude-hook-prompt-injection-policy.test.ts +0 -99
  253. package/tests/apps/app-layer-boundary.test.ts +0 -48
  254. package/tests/apps/claude-settings-hooks.test.ts +0 -107
  255. package/tests/apps/cli-disclosure-output.test.ts +0 -212
  256. package/tests/apps/codex-import-runner.test.ts +0 -99
  257. package/tests/apps/codex-validation-output.test.ts +0 -100
  258. package/tests/apps/hermes-import-runner.test.ts +0 -99
  259. package/tests/apps/mcp-install-command.test.ts +0 -59
  260. package/tests/apps/package-build-entrypoints.test.ts +0 -30
  261. package/tests/apps/postinstall-embedding-backend.test.ts +0 -185
  262. package/tests/apps/search-api-disclosure.test.ts +0 -162
  263. package/tests/apps/stats-api-lightweight.test.ts +0 -67
  264. package/tests/apps/ui-disclosure-output.test.ts +0 -140
  265. package/tests/core/bootstrap-organizer.test.ts +0 -111
  266. package/tests/core/canonical-key.test.ts +0 -101
  267. package/tests/core/codex-session-history-importer-validation.test.ts +0 -185
  268. package/tests/core/consolidation-worker.test.ts +0 -75
  269. package/tests/core/embedding-maintenance-service.test.ts +0 -282
  270. package/tests/core/evidence-aligner.test.ts +0 -152
  271. package/tests/core/external-market-context.test.ts +0 -209
  272. package/tests/core/fact-deriver.test.ts +0 -79
  273. package/tests/core/hermes-session-history-importer-validation.test.ts +0 -609
  274. package/tests/core/ingest-interceptor.test.ts +0 -38
  275. package/tests/core/markdown-mirror.test.ts +0 -85
  276. package/tests/core/matcher.test.ts +0 -112
  277. package/tests/core/md-mirror.test.ts +0 -50
  278. package/tests/core/memory-engine-services.test.ts +0 -240
  279. package/tests/core/memory-ingest-service.test.ts +0 -296
  280. package/tests/core/memory-query-service.test.ts +0 -129
  281. package/tests/core/memory-runtime-service.test.ts +0 -201
  282. package/tests/core/memory-service-composition.test.ts +0 -192
  283. package/tests/core/memory-service-config.test.ts +0 -41
  284. package/tests/core/memory-service-facade.test.ts +0 -30
  285. package/tests/core/memory-service-registry.test.ts +0 -206
  286. package/tests/core/product-validation-matrix.test.ts +0 -61
  287. package/tests/core/project-registry.test.ts +0 -78
  288. package/tests/core/replay-evaluator.test.ts +0 -181
  289. package/tests/core/retrieval-analytics-service.test.ts +0 -210
  290. package/tests/core/retrieval-benchmark.test.ts +0 -93
  291. package/tests/core/retrieval-disclosure-service.test.ts +0 -264
  292. package/tests/core/retrieval-orchestrator.test.ts +0 -403
  293. package/tests/core/retrieval-quality.test.ts +0 -31
  294. package/tests/core/retrieval-services.test.ts +0 -185
  295. package/tests/core/retriever-fallback-chain.test.ts +0 -223
  296. package/tests/core/retriever-strategy-scope.test.ts +0 -164
  297. package/tests/core/retriever.memu-adoption.test.ts +0 -122
  298. package/tests/core/session-history-importer-filter.test.ts +0 -78
  299. package/tests/core/session-qrels.test.ts +0 -250
  300. package/tests/core/sqlite-event-store-replication.test.ts +0 -127
  301. package/tests/core/summary-deriver.test.ts +0 -66
  302. package/tests/extensions/embedder-warning-suppression.test.ts +0 -84
  303. package/tests/extensions/endless-memory-extension-boundary.test.ts +0 -17
  304. package/tests/extensions/endless-memory-services.test.ts +0 -325
  305. package/tests/extensions/mcp-context-tools.test.ts +0 -905
  306. package/tests/extensions/mcp-extension-boundary.test.ts +0 -21
  307. package/tests/extensions/mcp-package-build.test.ts +0 -22
  308. package/tests/extensions/mcp-project-aware-tools.test.ts +0 -102
  309. package/tests/extensions/shared-memory-extension-boundary.test.ts +0 -24
  310. package/tests/extensions/shared-memory-services.test.ts +0 -309
  311. package/tests/extensions/vector-extension-boundary.test.ts +0 -21
  312. package/tsconfig.json +0 -24
  313. package/vitest.config.ts +0 -15
@@ -1,625 +0,0 @@
1
- import {
2
- computePrecisionRecallAtK,
3
- summarizeReplayMetrics,
4
- type ReplayMetricsSummary,
5
- type ReplayQueryMetrics
6
- } from './retrieval-benchmark.js';
7
- import { createRetrievalServices, type RetrieveMemoriesOptions } from './engine/retrieval-services.js';
8
- import { Matcher } from './matcher.js';
9
- import type { Embedder } from './embedder.js';
10
- import type { MatchConfidence, MemoryEvent } from './types.js';
11
- import type { SearchResult, VectorStore } from './vector-store.js';
12
-
13
- export type ReplayExpectation = 'match' | 'no_match';
14
-
15
- export interface ReplayEvaluationQuery {
16
- queryId: string;
17
- query: string;
18
- expectedIds: string[];
19
- expectedRelevance?: Record<string, number>;
20
- expectation?: ReplayExpectation;
21
- forbiddenIds?: string[];
22
- knownAnswer?: string;
23
- }
24
-
25
- export interface ReplayEvaluationMemory {
26
- id: string;
27
- content: string;
28
- sourceSessionId?: string;
29
- sourceTurnIndex?: number;
30
- timestamp?: string;
31
- eventType?: MemoryEvent['eventType'];
32
- canonicalKey?: string;
33
- metadata?: Record<string, unknown>;
34
- }
35
-
36
- export interface ReplayEvaluationFixtureMetadata {
37
- sourceFileCount?: number;
38
- rawContentIncluded?: boolean;
39
- generatedAt?: string;
40
- }
41
-
42
- export interface ReplayEvaluationFixture {
43
- name: string;
44
- description?: string;
45
- ks: number[];
46
- queries: ReplayEvaluationQuery[];
47
- memories: ReplayEvaluationMemory[];
48
- metadata?: ReplayEvaluationFixtureMetadata;
49
- }
50
-
51
- export interface ReplayRetrievalRunInput {
52
- fixture: ReplayEvaluationFixture;
53
- query: ReplayEvaluationQuery;
54
- topK: number;
55
- retrievalOptions: Partial<RetrieveMemoriesOptions>;
56
- }
57
-
58
- export interface ReplayRetrievalRunResult {
59
- retrievedIds: string[];
60
- candidateIds?: string[];
61
- confidence?: MatchConfidence;
62
- fallbackTrace?: string[];
63
- }
64
-
65
- export type ReplayRetrievalRunner = (
66
- query: string,
67
- input: ReplayRetrievalRunInput
68
- ) => Promise<ReplayRetrievalRunResult>;
69
-
70
- export interface ReplayEvaluationOptions {
71
- generatedAt?: string;
72
- includePerQuery?: boolean;
73
- evaluator?: string;
74
- topK?: number;
75
- retrievalOptions?: Partial<RetrieveMemoriesOptions>;
76
- retrievalRunner?: ReplayRetrievalRunner;
77
- }
78
-
79
- export interface ReplayFixtureStats {
80
- queryCount: number;
81
- memoryCount: number;
82
- ks: number[];
83
- sourceFileCount?: number;
84
- rawContentIncluded?: boolean;
85
- }
86
-
87
- export interface ReplayFailedQuery {
88
- queryId: string;
89
- expectedIds: string[];
90
- retrievedIds: string[];
91
- expectation?: ReplayExpectation;
92
- reason?: 'missing_expected' | 'unexpected_match';
93
- }
94
-
95
- export interface ReplayEvaluationSummary extends ReplayMetricsSummary {
96
- positiveQueryCount: number;
97
- noMatchQueryCount: number;
98
- noMatchCorrect: number;
99
- noMatchAccuracy: number;
100
- forbiddenHitCount: number;
101
- hitAtK: Record<number, number>;
102
- mrr: number;
103
- failedQueryCount: number;
104
- failedQueries: ReplayFailedQuery[];
105
- }
106
-
107
- export interface ReplayEvaluationQueryMetrics extends ReplayQueryMetrics {
108
- retrievedIds: string[];
109
- candidateIds: string[];
110
- confidence: MatchConfidence;
111
- fallbackTrace: string[];
112
- reciprocalRank: number;
113
- expectation?: ReplayExpectation;
114
- forbiddenHitIds?: string[];
115
- noMatchSatisfied?: boolean;
116
- }
117
-
118
- export interface ReplayEvaluationReport {
119
- name: string;
120
- description?: string;
121
- evaluator: string;
122
- generatedAt: string;
123
- fixtureStats: ReplayFixtureStats;
124
- summary: ReplayEvaluationSummary;
125
- perQuery: ReplayEvaluationQueryMetrics[];
126
- }
127
-
128
- export interface ReplayEvaluationMarkdownOptions {
129
- qrelsPath?: string;
130
- }
131
-
132
- export async function evaluateReplayFixture(
133
- fixture: ReplayEvaluationFixture,
134
- options: ReplayEvaluationOptions = {}
135
- ): Promise<ReplayEvaluationReport> {
136
- const topK = determineTopK(fixture, options.topK);
137
- const retrievalOptions: Partial<RetrieveMemoriesOptions> = {
138
- strategy: 'auto',
139
- minScore: 0.1,
140
- includeShared: false,
141
- adaptiveRerank: false,
142
- ...options.retrievalOptions,
143
- topK
144
- };
145
- const runner = options.retrievalRunner ?? createReplayRetrievalRunner(fixture);
146
-
147
- const runs = await Promise.all(
148
- fixture.queries.map(async (query) => {
149
- const run = await runner(query.query, {
150
- fixture,
151
- query,
152
- topK,
153
- retrievalOptions
154
- });
155
- return {
156
- query,
157
- retrievedIds: uniqueIds(run.retrievedIds).slice(0, topK),
158
- candidateIds: uniqueIds(run.candidateIds ?? run.retrievedIds),
159
- confidence: run.confidence ?? 'none',
160
- fallbackTrace: run.fallbackTrace ?? []
161
- };
162
- })
163
- );
164
-
165
- const baseMetrics = computePrecisionRecallAtK(
166
- runs.map((run) => ({
167
- queryId: run.query.queryId,
168
- expectedIds: run.query.expectedIds,
169
- expectedRelevance: run.query.expectedRelevance,
170
- retrievedIds: run.retrievedIds
171
- })),
172
- fixture.ks
173
- );
174
-
175
- const perQuery: ReplayEvaluationQueryMetrics[] = baseMetrics.map((metric, index) => {
176
- const run = runs[index];
177
- const expectation = getReplayExpectation(run.query);
178
- const base: ReplayEvaluationQueryMetrics = {
179
- ...metric,
180
- retrievedIds: run.retrievedIds,
181
- candidateIds: run.candidateIds,
182
- confidence: run.confidence,
183
- fallbackTrace: run.fallbackTrace,
184
- reciprocalRank: expectation === 'match' ? reciprocalRank(run.retrievedIds, run.query.expectedIds) : 0
185
- };
186
-
187
- if (expectation === 'no_match') {
188
- const forbiddenHitIds = findForbiddenHitIds(run.retrievedIds, run.query.forbiddenIds ?? []);
189
- base.expectation = 'no_match';
190
- base.forbiddenHitIds = forbiddenHitIds;
191
- base.noMatchSatisfied = forbiddenHitIds.length === 0 && run.confidence === 'none';
192
- }
193
-
194
- return base;
195
- });
196
-
197
- const fixtureStats: ReplayFixtureStats = {
198
- queryCount: fixture.queries.length,
199
- memoryCount: fixture.memories.length,
200
- ks: fixture.ks
201
- };
202
- if (fixture.metadata?.sourceFileCount !== undefined) {
203
- fixtureStats.sourceFileCount = fixture.metadata.sourceFileCount;
204
- }
205
- if (fixture.metadata?.rawContentIncluded !== undefined) {
206
- fixtureStats.rawContentIncluded = fixture.metadata.rawContentIncluded;
207
- }
208
-
209
- const report: ReplayEvaluationReport = {
210
- name: fixture.name,
211
- evaluator: options.evaluator ?? 'retriever-pipeline-v1',
212
- generatedAt: options.generatedAt ?? new Date().toISOString(),
213
- fixtureStats,
214
- summary: summarizeEvaluationMetrics(perQuery, fixture.queries, fixture.ks),
215
- perQuery: options.includePerQuery === false ? [] : perQuery
216
- };
217
-
218
- if (fixture.description !== undefined) {
219
- report.description = fixture.description;
220
- }
221
-
222
- return report;
223
- }
224
-
225
- export function createReplayRetrievalRunner(
226
- fixture: ReplayEvaluationFixture
227
- ): ReplayRetrievalRunner {
228
- const eventStore = new ReplayEventStore(fixture.memories);
229
- const vectorStore = new ReplayVectorStore(eventStore.events);
230
- const embedder = new ReplayEmbedder();
231
- const services = createRetrievalServices({
232
- initialize: async () => undefined,
233
- eventStore: eventStore as unknown as Parameters<typeof createRetrievalServices>[0]['eventStore'],
234
- vectorStore: vectorStore as unknown as VectorStore,
235
- embedder: embedder as unknown as Embedder,
236
- matcher: new Matcher(),
237
- getProjectHash: () => null,
238
- hasSharedStore: () => false
239
- });
240
-
241
- return async (query, input) => {
242
- const result = await services.retrievalOrchestrator.retrieveMemories(query, {
243
- ...input.retrievalOptions,
244
- topK: input.topK,
245
- includeShared: false
246
- });
247
-
248
- return {
249
- retrievedIds: result.memories.map((memory) => memory.event.id),
250
- candidateIds: (result.candidateDebug ?? result.selectedDebug ?? [])
251
- .map((detail) => detail.eventId),
252
- confidence: result.matchResult.confidence,
253
- fallbackTrace: result.fallbackTrace ?? []
254
- };
255
- };
256
- }
257
-
258
- export function formatReplayEvaluationMarkdown(
259
- report: ReplayEvaluationReport,
260
- options: ReplayEvaluationMarkdownOptions = {}
261
- ): string {
262
- const lines: string[] = [];
263
- lines.push('# Retrieval Replay Benchmark Report');
264
- lines.push('');
265
- lines.push(`- Fixture: ${escapeMarkdownCell(report.name)}`);
266
- if (report.description) lines.push(`- Description: ${escapeMarkdownCell(report.description)}`);
267
- if (options.qrelsPath) lines.push(`- Qrels: \`${options.qrelsPath}\``);
268
- lines.push(`- Evaluator: \`${report.evaluator}\``);
269
- lines.push(`- Generated at: ${report.generatedAt}`);
270
- lines.push(`- Queries: ${report.fixtureStats.queryCount}`);
271
- lines.push(`- Memories: ${report.fixtureStats.memoryCount}`);
272
- if (report.fixtureStats.sourceFileCount !== undefined) {
273
- lines.push(`- Source files: ${report.fixtureStats.sourceFileCount}`);
274
- }
275
- if (report.fixtureStats.rawContentIncluded !== undefined) {
276
- lines.push(`- Raw content in evaluated fixture: ${report.fixtureStats.rawContentIncluded ? 'yes' : 'no'}`);
277
- }
278
- lines.push('');
279
- lines.push('## Summary');
280
- lines.push('');
281
- lines.push('| k | Precision@k | Recall@k | nDCG@k | Hit@k |');
282
- lines.push('|---:|---:|---:|---:|---:|');
283
-
284
- for (const k of sortedKValues(report.summary)) {
285
- lines.push(
286
- `| ${k} | ${formatMetric(report.summary.precisionAtK[k] ?? 0)} | ${formatMetric(report.summary.recallAtK[k] ?? 0)} | ${formatMetric(report.summary.ndcgAtK[k] ?? 0)} | ${formatMetric(report.summary.hitAtK[k] ?? 0)} |`
287
- );
288
- }
289
-
290
- lines.push('');
291
- lines.push('## Key metrics');
292
- lines.push('');
293
- lines.push('| Metric | Value |');
294
- lines.push('|---|---:|');
295
- lines.push(`| Positive queries | ${report.summary.positiveQueryCount} |`);
296
- lines.push(`| No-match queries | ${report.summary.noMatchQueryCount} |`);
297
- lines.push(`| No-match accuracy | ${formatMetric(report.summary.noMatchAccuracy)} |`);
298
- lines.push(`| Forbidden hits | ${report.summary.forbiddenHitCount} |`);
299
- lines.push(`| MRR | ${formatMetric(report.summary.mrr)} |`);
300
- lines.push(`| Failed queries | ${report.summary.failedQueryCount} |`);
301
- for (const k of sortedKValues(report.summary)) {
302
- lines.push(`| Precision@${k} | ${formatMetric(report.summary.precisionAtK[k] ?? 0)} |`);
303
- lines.push(`| Recall@${k} | ${formatMetric(report.summary.recallAtK[k] ?? 0)} |`);
304
- lines.push(`| nDCG@${k} | ${formatMetric(report.summary.ndcgAtK[k] ?? 0)} |`);
305
- lines.push(`| Hit@${k} | ${formatMetric(report.summary.hitAtK[k] ?? 0)} |`);
306
- }
307
-
308
- if (report.summary.failedQueries.length > 0) {
309
- lines.push('');
310
- lines.push('## Failed queries');
311
- lines.push('');
312
- lines.push('| queryId | expectedIds | retrievedIds |');
313
- lines.push('|---|---|---|');
314
- for (const failed of report.summary.failedQueries) {
315
- lines.push(
316
- `| ${escapeMarkdownCell(failed.queryId)} | ${escapeMarkdownCell(failed.expectedIds.join(', '))} | ${escapeMarkdownCell(failed.retrievedIds.join(', '))} |`
317
- );
318
- }
319
- }
320
-
321
- if (report.perQuery.length > 0) {
322
- lines.push('');
323
- lines.push('## Per-query metrics');
324
- lines.push('');
325
- lines.push('| queryId | k | hits | Precision@k | Recall@k | nDCG@k | RR | confidence |');
326
- lines.push('|---|---:|---:|---:|---:|---:|---:|---|');
327
-
328
- for (const query of report.perQuery) {
329
- const ks = Object.keys(query.at).map(Number).sort((a, b) => a - b);
330
- for (const k of ks) {
331
- const metric = query.at[k];
332
- if (!metric) continue;
333
- lines.push(
334
- `| ${escapeMarkdownCell(query.queryId)} | ${k} | ${metric.hits} | ${formatMetric(metric.precision)} | ${formatMetric(metric.recall)} | ${formatMetric(metric.ndcg)} | ${formatMetric(query.reciprocalRank)} | ${query.confidence} |`
335
- );
336
- }
337
- }
338
- }
339
-
340
- lines.push('');
341
- lines.push('> Report intentionally omits raw query and memory text.');
342
- lines.push('');
343
- return lines.join('\n');
344
- }
345
-
346
- function summarizeEvaluationMetrics(
347
- perQuery: ReplayEvaluationQueryMetrics[],
348
- queries: ReplayEvaluationQuery[],
349
- ks: number[]
350
- ): ReplayEvaluationSummary {
351
- const pairs = perQuery.map((metric, index) => ({
352
- metric,
353
- query: queries[index],
354
- expectation: getReplayExpectation(queries[index])
355
- }));
356
- const positivePairs = pairs.filter((pair) => pair.expectation === 'match');
357
- const noMatchPairs = pairs.filter((pair) => pair.expectation === 'no_match');
358
- const positiveMetrics = positivePairs.map((pair) => pair.metric);
359
- const base = summarizeReplayMetrics(positiveMetrics, ks);
360
- const normalizedKs = normalizeKs(ks);
361
- const hitAtK: Record<number, number> = {};
362
- for (const k of normalizedKs) {
363
- hitAtK[k] = average(positiveMetrics.map((metric) => (metric.at[k]?.hits ?? 0) > 0 ? 1 : 0));
364
- }
365
-
366
- const positiveFailures: ReplayFailedQuery[] = positivePairs
367
- .filter(({ metric, query }) => query.expectedIds.length > 0 && metric.reciprocalRank === 0)
368
- .map(({ metric, query }) => ({
369
- queryId: query.queryId,
370
- expectedIds: [...query.expectedIds],
371
- retrievedIds: [...metric.retrievedIds],
372
- expectation: 'match',
373
- reason: 'missing_expected'
374
- }));
375
-
376
- const noMatchFailures: ReplayFailedQuery[] = noMatchPairs
377
- .filter(({ metric }) => metric.noMatchSatisfied !== true)
378
- .map(({ metric, query }) => ({
379
- queryId: query.queryId,
380
- expectedIds: [],
381
- retrievedIds: [...metric.retrievedIds],
382
- expectation: 'no_match',
383
- reason: 'unexpected_match'
384
- }));
385
-
386
- const noMatchCorrect = noMatchPairs.filter(({ metric }) => metric.noMatchSatisfied === true).length;
387
- const forbiddenHitCount = noMatchPairs.reduce(
388
- (sum, { metric }) => sum + (metric.forbiddenHitIds?.length ?? 0),
389
- 0
390
- );
391
- const failedQueries = [...positiveFailures, ...noMatchFailures];
392
-
393
- return {
394
- ...base,
395
- queryCount: perQuery.length,
396
- positiveQueryCount: positivePairs.length,
397
- noMatchQueryCount: noMatchPairs.length,
398
- noMatchCorrect,
399
- noMatchAccuracy: noMatchPairs.length === 0 ? 0 : noMatchCorrect / noMatchPairs.length,
400
- forbiddenHitCount,
401
- hitAtK,
402
- mrr: average(positiveMetrics.map((metric) => metric.reciprocalRank)),
403
- failedQueryCount: failedQueries.length,
404
- failedQueries
405
- };
406
- }
407
-
408
- function determineTopK(fixture: ReplayEvaluationFixture, optionTopK?: number): number {
409
- return Math.max(1, optionTopK ?? 0, ...fixture.ks.map((k) => Math.floor(k)).filter((k) => k > 0));
410
- }
411
-
412
- function sortedKValues(summary: ReplayMetricsSummary): number[] {
413
- return Object.keys(summary.precisionAtK).map(Number).sort((a, b) => a - b);
414
- }
415
-
416
- function normalizeKs(ks: number[]): number[] {
417
- const seen = new Set<number>();
418
- const normalized: number[] = [];
419
- for (const rawK of ks) {
420
- const k = Math.max(0, Math.floor(rawK));
421
- if (seen.has(k)) continue;
422
- seen.add(k);
423
- normalized.push(k);
424
- }
425
- return normalized.sort((a, b) => a - b);
426
- }
427
-
428
- function uniqueIds(ids: string[]): string[] {
429
- const seen = new Set<string>();
430
- const unique: string[] = [];
431
- for (const id of ids) {
432
- if (seen.has(id)) continue;
433
- seen.add(id);
434
- unique.push(id);
435
- }
436
- return unique;
437
- }
438
-
439
- function reciprocalRank(retrievedIds: string[], expectedIds: string[]): number {
440
- const expected = new Set(expectedIds);
441
- if (expected.size === 0) return 0;
442
- const index = retrievedIds.findIndex((id) => expected.has(id));
443
- return index === -1 ? 0 : 1 / (index + 1);
444
- }
445
-
446
- function getReplayExpectation(query: ReplayEvaluationQuery | undefined): ReplayExpectation {
447
- if (!query) return 'match';
448
- return query.expectation ?? (query.expectedIds.length === 0 ? 'no_match' : 'match');
449
- }
450
-
451
- function findForbiddenHitIds(retrievedIds: string[], forbiddenIds: string[]): string[] {
452
- if (forbiddenIds.length === 0) return [];
453
- const forbidden = new Set(forbiddenIds);
454
- return uniqueIds(retrievedIds.filter((id) => forbidden.has(id)));
455
- }
456
-
457
- function tokenize(text: string): string[] {
458
- return text
459
- .toLowerCase()
460
- .replace(/[^\p{L}\p{N}\s_.:-]/gu, ' ')
461
- .split(/\s+/)
462
- .flatMap((token) => token.split(/(?=[._:-])|(?<=[._:-])/g))
463
- .map((token) => token.replace(/^[._:-]+|[._:-]+$/g, ''))
464
- .filter((token) => token.length >= 2)
465
- .slice(0, 128);
466
- }
467
-
468
- function keywordScore(queryTokens: string[], content: string): number {
469
- if (queryTokens.length === 0) return 0;
470
- const contentTokens = new Set(tokenize(content));
471
- const hits = queryTokens.filter((token) => contentTokens.has(token)).length;
472
- return hits / queryTokens.length;
473
- }
474
-
475
- function vectorize(text: string, dimensions = 64): number[] {
476
- const vector = new Array<number>(dimensions).fill(0);
477
- for (const token of tokenize(text)) {
478
- let hash = 2166136261;
479
- for (let i = 0; i < token.length; i += 1) {
480
- hash ^= token.charCodeAt(i);
481
- hash = Math.imul(hash, 16777619);
482
- }
483
- vector[Math.abs(hash) % dimensions] += 1;
484
- }
485
- const norm = Math.sqrt(vector.reduce((sum, value) => sum + value * value, 0)) || 1;
486
- return vector.map((value) => value / norm);
487
- }
488
-
489
- function cosine(a: number[], b: number[]): number {
490
- const length = Math.min(a.length, b.length);
491
- let dot = 0;
492
- for (let i = 0; i < length; i += 1) dot += a[i] * b[i];
493
- return Math.max(0, Math.min(1, dot));
494
- }
495
-
496
- function formatMetric(value: number): string {
497
- return value.toFixed(4).replace(/\.0+$/, '').replace(/(\.\d*?)0+$/, '$1');
498
- }
499
-
500
- function escapeMarkdownCell(value: string): string {
501
- return value.replace(/\|/g, '\\|').replace(/\n/g, ' ');
502
- }
503
-
504
- function average(values: number[]): number {
505
- if (values.length === 0) return 0;
506
- return values.reduce((sum, value) => sum + value, 0) / values.length;
507
- }
508
-
509
- class ReplayEventStore {
510
- readonly events: MemoryEvent[];
511
- private readonly byId: Map<string, MemoryEvent>;
512
-
513
- constructor(memories: ReplayEvaluationMemory[]) {
514
- this.events = memories.map((memory, index) => replayMemoryToEvent(memory, index));
515
- this.byId = new Map(this.events.map((event) => [event.id, event]));
516
- }
517
-
518
- async keywordSearch(query: string, limit = 10): Promise<Array<{ event: MemoryEvent; rank: number }>> {
519
- const queryTokens = tokenize(query);
520
- return this.events
521
- .map((event) => ({ event, score: keywordScore(queryTokens, event.content) }))
522
- .filter((row) => row.score > 0)
523
- .sort((a, b) => b.score - a.score || a.event.id.localeCompare(b.event.id))
524
- .slice(0, limit)
525
- .map((row, index) => ({ event: row.event, rank: -row.score - index / 1000 }));
526
- }
527
-
528
- async getRecentEvents(limit = 100): Promise<MemoryEvent[]> {
529
- return [...this.events]
530
- .sort((a, b) => b.timestamp.getTime() - a.timestamp.getTime() || a.id.localeCompare(b.id))
531
- .slice(0, limit);
532
- }
533
-
534
- async getEvent(id: string): Promise<MemoryEvent | null> {
535
- return this.byId.get(id) ?? null;
536
- }
537
-
538
- async getSessionEvents(sessionId: string): Promise<MemoryEvent[]> {
539
- return this.events
540
- .filter((event) => event.sessionId === sessionId)
541
- .sort((a, b) => a.timestamp.getTime() - b.timestamp.getTime() || a.id.localeCompare(b.id));
542
- }
543
-
544
- async getHelpfulnessStats(): Promise<{ avgScore: number; totalEvaluated: number; totalRetrievals: number; helpful: number; neutral: number; unhelpful: number }> {
545
- return { avgScore: 0, totalEvaluated: 0, totalRetrievals: 0, helpful: 0, neutral: 0, unhelpful: 0 };
546
- }
547
-
548
- async recordRetrievalTrace(): Promise<void> {
549
- return undefined;
550
- }
551
-
552
- async incrementAccessCount(): Promise<void> {
553
- return undefined;
554
- }
555
-
556
- async recordRetrieval(): Promise<void> {
557
- return undefined;
558
- }
559
- }
560
-
561
- class ReplayVectorStore {
562
- private readonly rows: Array<SearchResult & { vector: number[] }>;
563
-
564
- constructor(events: MemoryEvent[]) {
565
- this.rows = events.map((event) => ({
566
- id: `replay-vector-${event.id}`,
567
- eventId: event.id,
568
- content: event.content,
569
- score: 0,
570
- sessionId: event.sessionId,
571
- eventType: event.eventType,
572
- timestamp: event.timestamp.toISOString(),
573
- vector: vectorize(event.content)
574
- }));
575
- }
576
-
577
- async search(queryVector: number[], options: { limit?: number; minScore?: number; sessionId?: string } = {}): Promise<SearchResult[]> {
578
- const limit = options.limit ?? 5;
579
- const minScore = options.minScore ?? 0;
580
- return this.rows
581
- .filter((row) => !options.sessionId || row.sessionId === options.sessionId)
582
- .map((row) => ({ ...row, score: cosine(queryVector, row.vector) }))
583
- .filter((row) => row.score >= minScore)
584
- .sort((a, b) => b.score - a.score || a.eventId.localeCompare(b.eventId))
585
- .slice(0, limit)
586
- .map((row) => ({
587
- id: row.id,
588
- eventId: row.eventId,
589
- content: row.content,
590
- score: row.score,
591
- sessionId: row.sessionId,
592
- eventType: row.eventType,
593
- timestamp: row.timestamp
594
- }));
595
- }
596
-
597
- async count(): Promise<number> {
598
- return this.rows.length;
599
- }
600
- }
601
-
602
- class ReplayEmbedder {
603
- async embed(text: string): Promise<{ vector: number[]; model: string; dimensions: number }> {
604
- const vector = vectorize(text);
605
- return { vector, model: 'deterministic-replay-hash', dimensions: vector.length };
606
- }
607
- }
608
-
609
- function replayMemoryToEvent(memory: ReplayEvaluationMemory, index: number): MemoryEvent {
610
- const sessionId = memory.sourceSessionId ?? 'replay-fixture';
611
- const timestamp = memory.timestamp
612
- ? new Date(memory.timestamp)
613
- : new Date(Date.UTC(2026, 0, 1, 0, 0, index));
614
-
615
- return {
616
- id: memory.id,
617
- sessionId,
618
- eventType: memory.eventType ?? 'agent_response',
619
- content: memory.content,
620
- canonicalKey: memory.canonicalKey ?? `replay/${memory.id}`,
621
- dedupeKey: `replay:${sessionId}:${memory.id}`,
622
- timestamp,
623
- metadata: memory.metadata ?? {}
624
- };
625
- }