causantic 0.9.3 → 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (201) hide show
  1. package/README.md +70 -56
  2. package/dist/cli/skill-templates.d.ts.map +1 -1
  3. package/dist/cli/skill-templates.js +23 -18
  4. package/dist/cli/skill-templates.js.map +1 -1
  5. package/dist/clusters/cluster-manager.d.ts +16 -0
  6. package/dist/clusters/cluster-manager.d.ts.map +1 -1
  7. package/dist/clusters/cluster-manager.js +119 -1
  8. package/dist/clusters/cluster-manager.js.map +1 -1
  9. package/dist/config/loader.d.ts +16 -0
  10. package/dist/config/loader.d.ts.map +1 -1
  11. package/dist/config/loader.js +51 -0
  12. package/dist/config/loader.js.map +1 -1
  13. package/dist/config/memory-config.d.ts +26 -0
  14. package/dist/config/memory-config.d.ts.map +1 -1
  15. package/dist/config/memory-config.js +22 -0
  16. package/dist/config/memory-config.js.map +1 -1
  17. package/dist/eval/experiments/embedding-model-comparison/run-experiment.d.ts +20 -0
  18. package/dist/eval/experiments/embedding-model-comparison/run-experiment.d.ts.map +1 -0
  19. package/dist/eval/experiments/embedding-model-comparison/run-experiment.js +289 -0
  20. package/dist/eval/experiments/embedding-model-comparison/run-experiment.js.map +1 -0
  21. package/dist/eval/experiments/index-differentiation/alignment-analysis.d.ts +53 -0
  22. package/dist/eval/experiments/index-differentiation/alignment-analysis.d.ts.map +1 -0
  23. package/dist/eval/experiments/index-differentiation/alignment-analysis.js +91 -0
  24. package/dist/eval/experiments/index-differentiation/alignment-analysis.js.map +1 -0
  25. package/dist/eval/experiments/index-differentiation/discrimination-test.d.ts +24 -0
  26. package/dist/eval/experiments/index-differentiation/discrimination-test.d.ts.map +1 -0
  27. package/dist/eval/experiments/index-differentiation/discrimination-test.js +79 -0
  28. package/dist/eval/experiments/index-differentiation/discrimination-test.js.map +1 -0
  29. package/dist/eval/experiments/index-differentiation/index.d.ts +11 -0
  30. package/dist/eval/experiments/index-differentiation/index.d.ts.map +1 -0
  31. package/dist/eval/experiments/index-differentiation/index.js +8 -0
  32. package/dist/eval/experiments/index-differentiation/index.js.map +1 -0
  33. package/dist/eval/experiments/index-differentiation/refinement-test.d.ts +32 -0
  34. package/dist/eval/experiments/index-differentiation/refinement-test.d.ts.map +1 -0
  35. package/dist/eval/experiments/index-differentiation/refinement-test.js +203 -0
  36. package/dist/eval/experiments/index-differentiation/refinement-test.js.map +1 -0
  37. package/dist/eval/experiments/index-differentiation/run-experiment.d.ts +20 -0
  38. package/dist/eval/experiments/index-differentiation/run-experiment.d.ts.map +1 -0
  39. package/dist/eval/experiments/index-differentiation/run-experiment.js +338 -0
  40. package/dist/eval/experiments/index-differentiation/run-experiment.js.map +1 -0
  41. package/dist/eval/experiments/index-differentiation/similarity-analysis.d.ts +31 -0
  42. package/dist/eval/experiments/index-differentiation/similarity-analysis.d.ts.map +1 -0
  43. package/dist/eval/experiments/index-differentiation/similarity-analysis.js +60 -0
  44. package/dist/eval/experiments/index-differentiation/similarity-analysis.js.map +1 -0
  45. package/dist/eval/experiments/index-differentiation/types.d.ts +114 -0
  46. package/dist/eval/experiments/index-differentiation/types.d.ts.map +1 -0
  47. package/dist/eval/experiments/index-differentiation/types.js +8 -0
  48. package/dist/eval/experiments/index-differentiation/types.js.map +1 -0
  49. package/dist/eval/experiments/index-vs-chunk/jeopardy-experiment.d.ts +19 -0
  50. package/dist/eval/experiments/index-vs-chunk/jeopardy-experiment.d.ts.map +1 -0
  51. package/dist/eval/experiments/index-vs-chunk/jeopardy-experiment.js +328 -0
  52. package/dist/eval/experiments/index-vs-chunk/jeopardy-experiment.js.map +1 -0
  53. package/dist/eval/experiments/index-vs-chunk/jeopardy-generator.d.ts +27 -0
  54. package/dist/eval/experiments/index-vs-chunk/jeopardy-generator.d.ts.map +1 -0
  55. package/dist/eval/experiments/index-vs-chunk/jeopardy-generator.js +154 -0
  56. package/dist/eval/experiments/index-vs-chunk/jeopardy-generator.js.map +1 -0
  57. package/dist/eval/experiments/index-vs-chunk/query-generator.d.ts +23 -0
  58. package/dist/eval/experiments/index-vs-chunk/query-generator.d.ts.map +1 -0
  59. package/dist/eval/experiments/index-vs-chunk/query-generator.js +113 -0
  60. package/dist/eval/experiments/index-vs-chunk/query-generator.js.map +1 -0
  61. package/dist/eval/experiments/index-vs-chunk/run-experiment.d.ts +17 -0
  62. package/dist/eval/experiments/index-vs-chunk/run-experiment.d.ts.map +1 -0
  63. package/dist/eval/experiments/index-vs-chunk/run-experiment.js +341 -0
  64. package/dist/eval/experiments/index-vs-chunk/run-experiment.js.map +1 -0
  65. package/dist/eval/experiments/index-vs-chunk/types.d.ts +71 -0
  66. package/dist/eval/experiments/index-vs-chunk/types.d.ts.map +1 -0
  67. package/dist/eval/experiments/index-vs-chunk/types.js +8 -0
  68. package/dist/eval/experiments/index-vs-chunk/types.js.map +1 -0
  69. package/dist/eval/experiments/pipeline-dropout/run-experiment.d.ts +18 -0
  70. package/dist/eval/experiments/pipeline-dropout/run-experiment.d.ts.map +1 -0
  71. package/dist/eval/experiments/pipeline-dropout/run-experiment.js +347 -0
  72. package/dist/eval/experiments/pipeline-dropout/run-experiment.js.map +1 -0
  73. package/dist/eval/experiments/rescorer-ceiling/analyze-misses.d.ts +17 -0
  74. package/dist/eval/experiments/rescorer-ceiling/analyze-misses.d.ts.map +1 -0
  75. package/dist/eval/experiments/rescorer-ceiling/analyze-misses.js +247 -0
  76. package/dist/eval/experiments/rescorer-ceiling/analyze-misses.js.map +1 -0
  77. package/dist/eval/experiments/rescorer-ceiling/benchmark-rescorers.d.ts +18 -0
  78. package/dist/eval/experiments/rescorer-ceiling/benchmark-rescorers.d.ts.map +1 -0
  79. package/dist/eval/experiments/rescorer-ceiling/benchmark-rescorers.js +443 -0
  80. package/dist/eval/experiments/rescorer-ceiling/benchmark-rescorers.js.map +1 -0
  81. package/dist/eval/experiments/rescorer-ceiling/run-experiment.d.ts +16 -0
  82. package/dist/eval/experiments/rescorer-ceiling/run-experiment.d.ts.map +1 -0
  83. package/dist/eval/experiments/rescorer-ceiling/run-experiment.js +226 -0
  84. package/dist/eval/experiments/rescorer-ceiling/run-experiment.js.map +1 -0
  85. package/dist/index-entries/index-generator.d.ts +74 -0
  86. package/dist/index-entries/index-generator.d.ts.map +1 -0
  87. package/dist/index-entries/index-generator.js +323 -0
  88. package/dist/index-entries/index-generator.js.map +1 -0
  89. package/dist/index-entries/index-refresher.d.ts +54 -0
  90. package/dist/index-entries/index-refresher.d.ts.map +1 -0
  91. package/dist/index-entries/index-refresher.js +203 -0
  92. package/dist/index-entries/index-refresher.js.map +1 -0
  93. package/dist/index-entries/index.d.ts +6 -0
  94. package/dist/index-entries/index.d.ts.map +1 -0
  95. package/dist/index-entries/index.js +6 -0
  96. package/dist/index-entries/index.js.map +1 -0
  97. package/dist/index.d.ts +4 -0
  98. package/dist/index.d.ts.map +1 -1
  99. package/dist/index.js +5 -0
  100. package/dist/index.js.map +1 -1
  101. package/dist/ingest/index-entry-hook.d.ts +15 -0
  102. package/dist/ingest/index-entry-hook.d.ts.map +1 -0
  103. package/dist/ingest/index-entry-hook.js +84 -0
  104. package/dist/ingest/index-entry-hook.js.map +1 -0
  105. package/dist/ingest/ingest-session.d.ts.map +1 -1
  106. package/dist/ingest/ingest-session.js +72 -18
  107. package/dist/ingest/ingest-session.js.map +1 -1
  108. package/dist/ingest/session-state.d.ts +49 -0
  109. package/dist/ingest/session-state.d.ts.map +1 -0
  110. package/dist/ingest/session-state.js +158 -0
  111. package/dist/ingest/session-state.js.map +1 -0
  112. package/dist/maintenance/scheduler.d.ts.map +1 -1
  113. package/dist/maintenance/scheduler.js +25 -0
  114. package/dist/maintenance/scheduler.js.map +1 -1
  115. package/dist/maintenance/tasks/backfill-index.d.ts +27 -0
  116. package/dist/maintenance/tasks/backfill-index.d.ts.map +1 -0
  117. package/dist/maintenance/tasks/backfill-index.js +44 -0
  118. package/dist/maintenance/tasks/backfill-index.js.map +1 -0
  119. package/dist/mcp/tools.d.ts +4 -0
  120. package/dist/mcp/tools.d.ts.map +1 -1
  121. package/dist/mcp/tools.js +115 -7
  122. package/dist/mcp/tools.js.map +1 -1
  123. package/dist/models/embedder.js +2 -2
  124. package/dist/models/embedder.js.map +1 -1
  125. package/dist/models/model-registry.d.ts +2 -0
  126. package/dist/models/model-registry.d.ts.map +1 -1
  127. package/dist/models/model-registry.js +15 -0
  128. package/dist/models/model-registry.js.map +1 -1
  129. package/dist/repomap/cache.d.ts +58 -0
  130. package/dist/repomap/cache.d.ts.map +1 -0
  131. package/dist/repomap/cache.js +101 -0
  132. package/dist/repomap/cache.js.map +1 -0
  133. package/dist/repomap/graph.d.ts +54 -0
  134. package/dist/repomap/graph.d.ts.map +1 -0
  135. package/dist/repomap/graph.js +113 -0
  136. package/dist/repomap/graph.js.map +1 -0
  137. package/dist/repomap/index.d.ts +83 -0
  138. package/dist/repomap/index.d.ts.map +1 -0
  139. package/dist/repomap/index.js +99 -0
  140. package/dist/repomap/index.js.map +1 -0
  141. package/dist/repomap/parser.d.ts +43 -0
  142. package/dist/repomap/parser.d.ts.map +1 -0
  143. package/dist/repomap/parser.js +994 -0
  144. package/dist/repomap/parser.js.map +1 -0
  145. package/dist/repomap/regex-parser.d.ts +24 -0
  146. package/dist/repomap/regex-parser.d.ts.map +1 -0
  147. package/dist/repomap/regex-parser.js +190 -0
  148. package/dist/repomap/regex-parser.js.map +1 -0
  149. package/dist/repomap/renderer.d.ts +40 -0
  150. package/dist/repomap/renderer.d.ts.map +1 -0
  151. package/dist/repomap/renderer.js +163 -0
  152. package/dist/repomap/renderer.js.map +1 -0
  153. package/dist/repomap/scanner.d.ts +32 -0
  154. package/dist/repomap/scanner.d.ts.map +1 -0
  155. package/dist/repomap/scanner.js +171 -0
  156. package/dist/repomap/scanner.js.map +1 -0
  157. package/dist/retrieval/chain-assembler.d.ts.map +1 -1
  158. package/dist/retrieval/chain-assembler.js +22 -3
  159. package/dist/retrieval/chain-assembler.js.map +1 -1
  160. package/dist/retrieval/index.d.ts +2 -0
  161. package/dist/retrieval/index.d.ts.map +1 -1
  162. package/dist/retrieval/index.js +2 -0
  163. package/dist/retrieval/index.js.map +1 -1
  164. package/dist/retrieval/mmr.d.ts +1 -0
  165. package/dist/retrieval/mmr.d.ts.map +1 -1
  166. package/dist/retrieval/mmr.js +35 -1
  167. package/dist/retrieval/mmr.js.map +1 -1
  168. package/dist/retrieval/search-assembler.d.ts +10 -1
  169. package/dist/retrieval/search-assembler.d.ts.map +1 -1
  170. package/dist/retrieval/search-assembler.js +249 -81
  171. package/dist/retrieval/search-assembler.js.map +1 -1
  172. package/dist/retrieval/session-reconstructor.d.ts +36 -0
  173. package/dist/retrieval/session-reconstructor.d.ts.map +1 -1
  174. package/dist/retrieval/session-reconstructor.js +126 -0
  175. package/dist/retrieval/session-reconstructor.js.map +1 -1
  176. package/dist/storage/db.d.ts.map +1 -1
  177. package/dist/storage/db.js +15 -0
  178. package/dist/storage/db.js.map +1 -1
  179. package/dist/storage/index-entry-store.d.ts +71 -0
  180. package/dist/storage/index-entry-store.d.ts.map +1 -0
  181. package/dist/storage/index-entry-store.js +275 -0
  182. package/dist/storage/index-entry-store.js.map +1 -0
  183. package/dist/storage/index.d.ts +5 -2
  184. package/dist/storage/index.d.ts.map +1 -1
  185. package/dist/storage/index.js +5 -1
  186. package/dist/storage/index.js.map +1 -1
  187. package/dist/storage/migrations.d.ts.map +1 -1
  188. package/dist/storage/migrations.js +102 -0
  189. package/dist/storage/migrations.js.map +1 -1
  190. package/dist/storage/schema.sql +68 -2
  191. package/dist/storage/session-state-store.d.ts +61 -0
  192. package/dist/storage/session-state-store.d.ts.map +1 -0
  193. package/dist/storage/session-state-store.js +119 -0
  194. package/dist/storage/session-state-store.js.map +1 -0
  195. package/dist/storage/types.d.ts +50 -0
  196. package/dist/storage/types.d.ts.map +1 -1
  197. package/dist/storage/vector-store.d.ts +17 -2
  198. package/dist/storage/vector-store.d.ts.map +1 -1
  199. package/dist/storage/vector-store.js +96 -36
  200. package/dist/storage/vector-store.js.map +1 -1
  201. package/package.json +4 -2
@@ -0,0 +1,347 @@
1
+ /**
2
+ * Pipeline Dropout Analysis
3
+ *
4
+ * Traces where target chunks get lost in the retrieval pipeline.
5
+ * For each query, checks whether the ground-truth chunk survives each stage:
6
+ * 1. Vector search (raw top-K)
7
+ * 2. RRF fusion (vector + keyword)
8
+ * 3. Cluster expansion
9
+ * 4. Oversized filtering
10
+ * 5. MMR reranking
11
+ * 6. Budget assembly
12
+ * 7. Chain walking (recall path)
13
+ *
14
+ * Usage:
15
+ * npx tsx src/eval/experiments/pipeline-dropout/run-experiment.ts [--sample-size=50]
16
+ */
17
+ import { getDb } from '../../../storage/db.js';
18
+ import { vectorStore, indexVectorStore } from '../../../storage/vector-store.js';
19
+ import { getChunkById } from '../../../storage/chunk-store.js';
20
+ import { getIndexEntryCount, getIndexedChunkCount, dereferenceToChunkIds, searchIndexEntriesByKeyword, } from '../../../storage/index-entry-store.js';
21
+ import { getAllClusters, getClusterChunkIds } from '../../../storage/cluster-store.js';
22
+ import { Embedder } from '../../../models/embedder.js';
23
+ import { getModel } from '../../../models/model-registry.js';
24
+ import { loadConfig, toRuntimeConfig } from '../../../config/loader.js';
25
+ import { KeywordStore } from '../../../storage/keyword-store.js';
26
+ import { fuseRRF } from '../../../retrieval/rrf.js';
27
+ import { expandViaClusters } from '../../../retrieval/cluster-expander.js';
28
+ import { reorderWithMMR } from '../../../retrieval/mmr.js';
29
+ import { walkChains, selectBestChain } from '../../../retrieval/chain-walker.js';
30
+ import { generateSearchQueries } from '../index-vs-chunk/query-generator.js';
31
+ // ── Helpers ────────────────────────────────────────────────────────────────
32
+ function createRng(seed) {
33
+ let s = seed;
34
+ return () => {
35
+ s = (s * 1664525 + 1013904223) & 0x7fffffff;
36
+ return s / 0x7fffffff;
37
+ };
38
+ }
39
+ function sampleChunks(sampleSize, seed) {
40
+ getDb();
41
+ const clusters = getAllClusters();
42
+ if (clusters.length === 0)
43
+ throw new Error('No clusters found.');
44
+ const rng = createRng(seed);
45
+ const result = [];
46
+ const shuffled = [...clusters].sort(() => rng() - 0.5);
47
+ for (const cluster of shuffled) {
48
+ if (result.length >= sampleSize)
49
+ break;
50
+ const chunkIds = getClusterChunkIds(cluster.id);
51
+ if (chunkIds.length < 2)
52
+ continue;
53
+ const numPicks = Math.min(2, Math.ceil(sampleSize / clusters.length), chunkIds.length);
54
+ const shuffledIds = [...chunkIds].sort(() => rng() - 0.5);
55
+ for (let i = 0; i < numPicks && result.length < sampleSize; i++) {
56
+ const chunk = getChunkById(shuffledIds[i]);
57
+ if (!chunk || chunk.content.length < 100)
58
+ continue;
59
+ result.push({
60
+ id: chunk.id,
61
+ sessionSlug: chunk.sessionSlug,
62
+ content: chunk.content,
63
+ clusterId: cluster.id,
64
+ clusterName: cluster.name,
65
+ });
66
+ }
67
+ }
68
+ return result;
69
+ }
70
+ function findIn(items, targetId) {
71
+ const idx = items.findIndex((i) => i.chunkId === targetId);
72
+ return idx >= 0 ? idx + 1 : 0;
73
+ }
74
+ // ── Main ───────────────────────────────────────────────────────────────────
75
+ async function runAnalysis() {
76
+ const args = process.argv.slice(2);
77
+ const sampleSizeArg = args.find((a) => a.startsWith('--sample-size='));
78
+ const sampleSize = sampleSizeArg ? parseInt(sampleSizeArg.split('=')[1], 10) : 50;
79
+ const seed = 42;
80
+ console.log('=== Pipeline Dropout Analysis ===\n');
81
+ getDb();
82
+ const externalConfig = loadConfig();
83
+ const config = toRuntimeConfig(externalConfig);
84
+ const { hybridSearch, clusterExpansion, mmrReranking, embeddingModel } = config;
85
+ const maxTokens = config.mcpMaxResponseTokens;
86
+ const useIndexSearch = config.semanticIndex.useForSearch && getIndexEntryCount() > 0;
87
+ console.log(`Search path: ${useIndexSearch ? 'INDEX' : 'CHUNK'}`);
88
+ console.log(`Max tokens: ${maxTokens}`);
89
+ vectorStore.setModelId(embeddingModel);
90
+ if (useIndexSearch)
91
+ indexVectorStore.setModelId(embeddingModel);
92
+ // 1. Sample and generate queries
93
+ console.log(`\nSampling ${sampleSize} chunks...`);
94
+ const sampledChunks = sampleChunks(sampleSize, seed);
95
+ console.log(` Sampled ${sampledChunks.length} chunks`);
96
+ console.log('Generating queries...');
97
+ const queries = await generateSearchQueries(sampledChunks, config.clusterRefreshModel);
98
+ console.log(` Generated ${queries.length} queries\n`);
99
+ // 2. Prepare embedder
100
+ const embedder = new Embedder();
101
+ await embedder.load(getModel(embeddingModel));
102
+ let keywordStore = null;
103
+ try {
104
+ keywordStore = new KeywordStore();
105
+ }
106
+ catch { /* unavailable */ }
107
+ const vectorLimits = [20, 50, 100, 200];
108
+ console.log(`Vector search limits: ${vectorLimits.join(', ')}`);
109
+ // Pre-embed all queries
110
+ console.log('Embedding queries...');
111
+ const queryEmbeddings = [];
112
+ for (let i = 0; i < queries.length; i++) {
113
+ const { embedding } = await embedder.embed(queries[i].query, true);
114
+ queryEmbeddings.push(embedding);
115
+ }
116
+ console.log(` Embedded ${queryEmbeddings.length} queries\n`);
117
+ // 3. Sweep vector search limits
118
+ for (const vectorSearchLimit of vectorLimits) {
119
+ console.log(`── Vector limit: ${vectorSearchLimit} ──`);
120
+ const traces = [];
121
+ for (let qi = 0; qi < queries.length; qi++) {
122
+ const q = queries[qi];
123
+ const embedding = queryEmbeddings[qi];
124
+ const targetId = q.groundTruthChunkId;
125
+ const stages = {
126
+ vectorSearch: false,
127
+ rrfFusion: false,
128
+ clusterExpansion: false,
129
+ oversizedFilter: false,
130
+ mmrRerank: false,
131
+ budgetAssembly: false,
132
+ chainSeeds: false,
133
+ chainOutput: false,
134
+ };
135
+ let vectorRank = 0;
136
+ let rrfRank = 0;
137
+ let fusedResults;
138
+ if (useIndexSearch) {
139
+ const entryCount = getIndexEntryCount();
140
+ const indexedChunks = getIndexedChunkCount();
141
+ const entriesPerChunk = indexedChunks > 0 ? entryCount / indexedChunks : 1;
142
+ const indexSearchLimit = Math.ceil(vectorSearchLimit * entriesPerChunk);
143
+ const indexSimilar = await indexVectorStore.search(embedding, indexSearchLimit);
144
+ for (let i = 0; i < indexSimilar.length; i++) {
145
+ const chunkIds = dereferenceToChunkIds([indexSimilar[i].id]);
146
+ if (chunkIds.includes(targetId)) {
147
+ stages.vectorSearch = true;
148
+ if (vectorRank === 0)
149
+ vectorRank = i + 1;
150
+ break;
151
+ }
152
+ }
153
+ let indexKeywordResults = [];
154
+ try {
155
+ indexKeywordResults = searchIndexEntriesByKeyword(q.query, hybridSearch.keywordSearchLimit);
156
+ }
157
+ catch { /* unavailable */ }
158
+ const indexVectorItems = indexSimilar.map((s) => ({
159
+ chunkId: s.id,
160
+ score: Math.max(0, 1 - s.distance),
161
+ source: 'vector',
162
+ }));
163
+ const indexKeywordItems = indexKeywordResults.map((r) => ({
164
+ chunkId: r.id,
165
+ score: r.score,
166
+ source: 'keyword',
167
+ }));
168
+ const indexFused = fuseRRF([
169
+ { items: indexVectorItems, weight: hybridSearch.vectorWeight },
170
+ ...(indexKeywordItems.length > 0
171
+ ? [{ items: indexKeywordItems, weight: hybridSearch.keywordWeight }]
172
+ : []),
173
+ ], hybridSearch.rrfK);
174
+ const chunkScoreMap = new Map();
175
+ for (const item of indexFused) {
176
+ const entryChunkIds = dereferenceToChunkIds([item.chunkId]);
177
+ for (const cid of entryChunkIds) {
178
+ const existing = chunkScoreMap.get(cid);
179
+ if (!existing || item.score > existing.score) {
180
+ chunkScoreMap.set(cid, { score: item.score, source: item.source });
181
+ }
182
+ }
183
+ }
184
+ const allChunkIds = [...chunkScoreMap.keys()];
185
+ fusedResults = allChunkIds.map((cid) => {
186
+ const entry = chunkScoreMap.get(cid);
187
+ return { chunkId: cid, score: entry.score, source: entry.source };
188
+ });
189
+ rrfRank = fusedResults.findIndex((r) => r.chunkId === targetId) + 1;
190
+ if (rrfRank > 0)
191
+ stages.rrfFusion = true;
192
+ }
193
+ else {
194
+ const similar = await vectorStore.search(embedding, vectorSearchLimit);
195
+ vectorRank = similar.findIndex((s) => s.id === targetId) + 1;
196
+ if (vectorRank > 0)
197
+ stages.vectorSearch = true;
198
+ let keywordResults = [];
199
+ if (keywordStore) {
200
+ try {
201
+ keywordResults = keywordStore.search(q.query, hybridSearch.keywordSearchLimit);
202
+ }
203
+ catch { /* */ }
204
+ }
205
+ const vectorItems = similar.map((s) => ({
206
+ chunkId: s.id,
207
+ score: Math.max(0, 1 - s.distance),
208
+ source: 'vector',
209
+ }));
210
+ const keywordItems = keywordResults.map((r) => ({
211
+ chunkId: r.id,
212
+ score: r.score,
213
+ source: 'keyword',
214
+ }));
215
+ fusedResults = fuseRRF([
216
+ { items: vectorItems, weight: hybridSearch.vectorWeight },
217
+ ...(keywordItems.length > 0
218
+ ? [{ items: keywordItems, weight: hybridSearch.keywordWeight }]
219
+ : []),
220
+ ], hybridSearch.rrfK);
221
+ rrfRank = findIn(fusedResults, targetId);
222
+ if (rrfRank > 0)
223
+ stages.rrfFusion = true;
224
+ }
225
+ // Cluster expansion
226
+ const expanded = expandViaClusters(fusedResults, clusterExpansion, undefined, undefined, config.feedbackWeight);
227
+ if (findIn(expanded, targetId) > 0)
228
+ stages.clusterExpansion = true;
229
+ // Dedupe
230
+ const seen = new Set();
231
+ const deduped = expanded.filter((r) => {
232
+ if (seen.has(r.chunkId))
233
+ return false;
234
+ seen.add(r.chunkId);
235
+ return true;
236
+ });
237
+ const chunkTokenMap = new Map();
238
+ for (const item of deduped) {
239
+ const chunk = getChunkById(item.chunkId);
240
+ if (chunk)
241
+ chunkTokenMap.set(item.chunkId, chunk.approxTokens || 500);
242
+ }
243
+ // Oversized filter
244
+ const sizeBounded = deduped.filter((item) => {
245
+ const tokens = chunkTokenMap.get(item.chunkId);
246
+ return tokens !== undefined && tokens <= maxTokens;
247
+ });
248
+ if (findIn(sizeBounded, targetId) > 0)
249
+ stages.oversizedFilter = true;
250
+ // MMR reranking
251
+ const reordered = await reorderWithMMR(sizeBounded, embedding, mmrReranking, {
252
+ tokenBudget: maxTokens,
253
+ chunkTokenCounts: chunkTokenMap,
254
+ });
255
+ const mmrRank = findIn(reordered, targetId);
256
+ if (mmrRank > 0)
257
+ stages.mmrRerank = true;
258
+ // Log MMR dropout details
259
+ if (stages.oversizedFilter && !stages.mmrRerank) {
260
+ const preRank = findIn(sizeBounded, targetId);
261
+ const targetTokens = chunkTokenMap.get(targetId) ?? 0;
262
+ const totalTokensInReordered = reordered.reduce((s, r) => s + (chunkTokenMap.get(r.chunkId) ?? 0), 0);
263
+ console.log(` MMR DROP: pre-rank=${preRank}/${sizeBounded.length} target=${targetTokens}tok reordered=${reordered.length} reorderedTokens=${totalTokensInReordered} budget=${maxTokens}`);
264
+ }
265
+ // Budget assembly
266
+ let budgetUsed = 0;
267
+ for (const item of reordered) {
268
+ const tokens = chunkTokenMap.get(item.chunkId) ?? 500;
269
+ if (budgetUsed + tokens > maxTokens)
270
+ break;
271
+ budgetUsed += tokens;
272
+ if (item.chunkId === targetId) {
273
+ stages.budgetAssembly = true;
274
+ break;
275
+ }
276
+ }
277
+ // Chain walking
278
+ const seedIds = expanded.slice(0, 5).map((r) => r.chunkId);
279
+ stages.chainSeeds = seedIds.includes(targetId);
280
+ try {
281
+ const chains = await walkChains(seedIds, {
282
+ direction: 'backward',
283
+ tokenBudget: maxTokens,
284
+ queryEmbedding: embedding,
285
+ });
286
+ const bestChain = selectBestChain(chains);
287
+ if (bestChain && bestChain.chunkIds.includes(targetId)) {
288
+ stages.chainOutput = true;
289
+ }
290
+ }
291
+ catch { /* chain walk failed */ }
292
+ let droppedAt = null;
293
+ const stageOrder = [
294
+ ['vectorSearch', 'Vector Search'],
295
+ ['rrfFusion', 'RRF Fusion'],
296
+ ['clusterExpansion', 'Cluster Expansion'],
297
+ ['oversizedFilter', 'Oversized Filter'],
298
+ ['mmrRerank', 'MMR Rerank'],
299
+ ['budgetAssembly', 'Budget Assembly'],
300
+ ];
301
+ for (const [key, name] of stageOrder) {
302
+ if (!stages[key]) {
303
+ droppedAt = name;
304
+ break;
305
+ }
306
+ }
307
+ traces.push({
308
+ query: q.query,
309
+ groundTruthChunkId: targetId,
310
+ stages,
311
+ droppedAt,
312
+ vectorRank,
313
+ rrfRank,
314
+ });
315
+ }
316
+ // Aggregate for this limit
317
+ const total = traces.length;
318
+ const stageCounts = {};
319
+ for (const trace of traces) {
320
+ for (const [key, val] of Object.entries(trace.stages)) {
321
+ stageCounts[key] = (stageCounts[key] ?? 0) + (val ? 1 : 0);
322
+ }
323
+ }
324
+ const stageLabels = [
325
+ ['vectorSearch', 'Vector'],
326
+ ['rrfFusion', 'RRF'],
327
+ ['clusterExpansion', 'Cluster'],
328
+ ['oversizedFilter', 'Size'],
329
+ ['mmrRerank', 'MMR'],
330
+ ['budgetAssembly', 'Budget'],
331
+ ['chainSeeds', 'Seeds'],
332
+ ['chainOutput', 'Chain'],
333
+ ];
334
+ const rates = stageLabels.map(([key, label]) => {
335
+ const count = stageCounts[key] ?? 0;
336
+ return `${label}: ${((count / total) * 100).toFixed(0)}%`;
337
+ });
338
+ console.log(` ${rates.join(' → ')}`);
339
+ }
340
+ await embedder.dispose();
341
+ console.log('\nDone.');
342
+ }
343
+ runAnalysis().catch((err) => {
344
+ console.error('Analysis failed:', err);
345
+ process.exit(1);
346
+ });
347
+ //# sourceMappingURL=run-experiment.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"run-experiment.js","sourceRoot":"","sources":["../../../../src/eval/experiments/pipeline-dropout/run-experiment.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;GAeG;AAEH,OAAO,EAAE,KAAK,EAAE,MAAM,wBAAwB,CAAC;AAC/C,OAAO,EAAE,WAAW,EAAE,gBAAgB,EAAE,MAAM,kCAAkC,CAAC;AACjF,OAAO,EAAE,YAAY,EAAE,MAAM,iCAAiC,CAAC;AAC/D,OAAO,EACL,kBAAkB,EAClB,oBAAoB,EACpB,qBAAqB,EACrB,2BAA2B,GAC5B,MAAM,uCAAuC,CAAC;AAC/C,OAAO,EAAE,cAAc,EAAE,kBAAkB,EAAE,MAAM,mCAAmC,CAAC;AACvF,OAAO,EAAE,QAAQ,EAAE,MAAM,6BAA6B,CAAC;AACvD,OAAO,EAAE,QAAQ,EAAE,MAAM,mCAAmC,CAAC;AAC7D,OAAO,EAAE,UAAU,EAAE,eAAe,EAAE,MAAM,2BAA2B,CAAC;AACxE,OAAO,EAAE,YAAY,EAAE,MAAM,mCAAmC,CAAC;AACjE,OAAO,EAAE,OAAO,EAAmB,MAAM,2BAA2B,CAAC;AACrE,OAAO,EAAE,iBAAiB,EAAE,MAAM,wCAAwC,CAAC;AAC3E,OAAO,EAAE,cAAc,EAAE,MAAM,2BAA2B,CAAC;AAC3D,OAAO,EAAE,UAAU,EAAE,eAAe,EAAE,MAAM,oCAAoC,CAAC;AACjF,OAAO,EAAE,qBAAqB,EAAyB,MAAM,sCAAsC,CAAC;AA2BpG,8EAA8E;AAE9E,SAAS,SAAS,CAAC,IAAY;IAC7B,IAAI,CAAC,GAAG,IAAI,CAAC;IACb,OAAO,GAAG,EAAE;QACV,CAAC,GAAG,CAAC,CAAC,GAAG,OAAO,GAAG,UAAU,CAAC,GAAG,UAAU,CAAC;QAC5C,OAAO,CAAC,GAAG,UAAU,CAAC;IACxB,CAAC,CAAC;AACJ,CAAC;AAED,SAAS,YAAY,CAAC,UAAkB,EAAE,IAAY;IACpD,KAAK,EAAE,CAAC;IACR,MAAM,QAAQ,GAAG,cAAc,EAAE,CAAC;IAClC,IAAI,QAAQ,CAAC,MAAM,KAAK,CAAC;QAAE,MAAM,IAAI,KAAK,CAAC,oBAAoB,CAAC,CAAC;IAEjE,MAAM,GAAG,GAAG,SAAS,CAAC,IAAI,CAAC,CAAC;IAC5B,MAAM,MAAM,GAAuB,EAAE,CAAC;IACtC,MAAM,QAAQ,GAAG,CAAC,GAAG,QAAQ,CAAC,CAAC,IAAI,CAAC,GAAG,EAAE,CAAC,GAAG,EAAE,GAAG,GAAG,CAAC,CAAC;IAEvD,KAAK,MAAM,OAAO,IAAI,QAAQ,EAAE,CAAC;QAC/B,IAAI,MAAM,CAAC,MAAM,IAAI,UAAU;YAAE,MAAM;QACvC,MAAM,QAAQ,GAAG,kBAAkB,CAAC,OAAO,CAAC,EAAE,CAAC,CAAC;QAChD,IAAI,QAAQ,CAAC,MAAM,GAAG,CAAC;YAAE,SAAS;QAElC,MAAM,QAAQ,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,IAAI,CAAC,IAAI,CAAC,UAAU,GAAG,QAAQ,CAAC,MAAM,CAAC,EAAE,QAAQ,CAAC,MAAM,CAAC,CAAC;QACvF,MAAM,WAAW,GAAG,CAAC,GAAG,QAAQ,CAAC,CAAC,IAAI,CAAC,GAAG,EAAE,CAAC,GAAG,EAAE,GAAG,GAAG,CAAC,CAAC;QAE1D,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,QAAQ,IAAI,MAAM,CAAC,MAAM,GAAG,UAAU,EAAE,CAAC,EAAE,EAAE,CAAC;YAChE,MAAM,KAAK,GAAG,YAAY,CAAC,WAAW,CAAC,CAAC,CAAC,CAAC,CAAC;YAC3C,IAAI,CAAC,KAAK,IAAI,KAAK,CAAC,OAAO,CAAC,MAAM,GAAG,GAAG;gBAAE,SAAS;YACnD,MAAM,CAAC,IAAI,CAAC;gBACV,EAAE,EAAE,KAAK,CAAC,EAAE;gBACZ,WAAW,EAAE,KAAK,CAAC,WAAW;gBAC9B,OAAO,EAAE,KAAK,CAAC,OAAO;gBACtB,SAAS,EAAE,OAAO,CAAC,EAAE;gBACrB,WAAW,EAAE,OAAO,CAAC,IAAI;aAC1B,CAAC,CAAC;QACL,CAAC;IACH,CAAC;IACD,OAAO,MAAM,CAAC;AAChB,CAAC;AAED,SAAS,MAAM,CAAC,KAAiC,EAAE,QAAgB;IACjE,MAAM,GAAG,GAAG,KAAK,CAAC,SAAS,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,OAAO,KAAK,QAAQ,CAAC,CAAC;IAC3D,OAAO,GAAG,IAAI,CAAC,CAAC,CAAC,CAAC,GAAG,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;AAChC,CAAC;AAED,8EAA8E;AAE9E,KAAK,UAAU,WAAW;IACxB,MAAM,IAAI,GAAG,OAAO,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;IACnC,MAAM,aAAa,GAAG,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,UAAU,CAAC,gBAAgB,CAAC,CAAC,CAAC;IACvE,MAAM,UAAU,GAAG,aAAa,CAAC,CAAC,CAAC,QAAQ,CAAC,aAAa,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;IAClF,MAAM,IAAI,GAAG,EAAE,CAAC;IAEhB,OAAO,CAAC,GAAG,CAAC,qCAAqC,CAAC,CAAC;IAEnD,KAAK,EAAE,CAAC;IACR,MAAM,cAAc,GAAG,UAAU,EAAE,CAAC;IACpC,MAAM,MAAM,GAAG,eAAe,CAAC,cAAc,CAAC,CAAC;IAC/C,MAAM,EAAE,YAAY,EAAE,gBAAgB,EAAE,YAAY,EAAE,cAAc,EAAE,GAAG,MAAM,CAAC;IAChF,MAAM,SAAS,GAAG,MAAM,CAAC,oBAAoB,CAAC;IAE9C,MAAM,cAAc,GAAG,MAAM,CAAC,aAAa,CAAC,YAAY,IAAI,kBAAkB,EAAE,GAAG,CAAC,CAAC;IACrF,OAAO,CAAC,GAAG,CAAC,gBAAgB,cAAc,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,OAAO,EAAE,CAAC,CAAC;IAClE,OAAO,CAAC,GAAG,CAAC,eAAe,SAAS,EAAE,CAAC,CAAC;IAExC,WAAW,CAAC,UAAU,CAAC,cAAc,CAAC,CAAC;IACvC,IAAI,cAAc;QAAE,gBAAgB,CAAC,UAAU,CAAC,cAAc,CAAC,CAAC;IAEhE,iCAAiC;IACjC,OAAO,CAAC,GAAG,CAAC,cAAc,UAAU,YAAY,CAAC,CAAC;IAClD,MAAM,aAAa,GAAG,YAAY,CAAC,UAAU,EAAE,IAAI,CAAC,CAAC;IACrD,OAAO,CAAC,GAAG,CAAC,aAAa,aAAa,CAAC,MAAM,SAAS,CAAC,CAAC;IAExD,OAAO,CAAC,GAAG,CAAC,uBAAuB,CAAC,CAAC;IACrC,MAAM,OAAO,GAAG,MAAM,qBAAqB,CAAC,aAAa,EAAE,MAAM,CAAC,mBAAmB,CAAC,CAAC;IACvF,OAAO,CAAC,GAAG,CAAC,eAAe,OAAO,CAAC,MAAM,YAAY,CAAC,CAAC;IAEvD,sBAAsB;IACtB,MAAM,QAAQ,GAAG,IAAI,QAAQ,EAAE,CAAC;IAChC,MAAM,QAAQ,CAAC,IAAI,CAAC,QAAQ,CAAC,cAAc,CAAC,CAAC,CAAC;IAE9C,IAAI,YAAY,GAAwB,IAAI,CAAC;IAC7C,IAAI,CAAC;QAAC,YAAY,GAAG,IAAI,YAAY,EAAE,CAAC;IAAC,CAAC;IAAC,MAAM,CAAC,CAAC,iBAAiB,CAAC,CAAC;IAEtE,MAAM,YAAY,GAAG,CAAC,EAAE,EAAE,EAAE,EAAE,GAAG,EAAE,GAAG,CAAC,CAAC;IACxC,OAAO,CAAC,GAAG,CAAC,yBAAyB,YAAY,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;IAEhE,wBAAwB;IACxB,OAAO,CAAC,GAAG,CAAC,sBAAsB,CAAC,CAAC;IACpC,MAAM,eAAe,GAAe,EAAE,CAAC;IACvC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACxC,MAAM,EAAE,SAAS,EAAE,GAAG,MAAM,QAAQ,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,KAAK,EAAE,IAAI,CAAC,CAAC;QACnE,eAAe,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;IAClC,CAAC;IACD,OAAO,CAAC,GAAG,CAAC,cAAc,eAAe,CAAC,MAAM,YAAY,CAAC,CAAC;IAE9D,gCAAgC;IAChC,KAAK,MAAM,iBAAiB,IAAI,YAAY,EAAE,CAAC;QAC7C,OAAO,CAAC,GAAG,CAAC,oBAAoB,iBAAiB,KAAK,CAAC,CAAC;QACxD,MAAM,MAAM,GAAiB,EAAE,CAAC;QAEhC,KAAK,IAAI,EAAE,GAAG,CAAC,EAAE,EAAE,GAAG,OAAO,CAAC,MAAM,EAAE,EAAE,EAAE,EAAE,CAAC;YAC3C,MAAM,CAAC,GAAG,OAAO,CAAC,EAAE,CAAC,CAAC;YACtB,MAAM,SAAS,GAAG,eAAe,CAAC,EAAE,CAAC,CAAC;YACtC,MAAM,QAAQ,GAAG,CAAC,CAAC,kBAAkB,CAAC;YAEtC,MAAM,MAAM,GAAkB;gBAC5B,YAAY,EAAE,KAAK;gBACnB,SAAS,EAAE,KAAK;gBAChB,gBAAgB,EAAE,KAAK;gBACvB,eAAe,EAAE,KAAK;gBACtB,SAAS,EAAE,KAAK;gBAChB,cAAc,EAAE,KAAK;gBACrB,UAAU,EAAE,KAAK;gBACjB,WAAW,EAAE,KAAK;aACnB,CAAC;YAEF,IAAI,UAAU,GAAG,CAAC,CAAC;YACnB,IAAI,OAAO,GAAG,CAAC,CAAC;YAChB,IAAI,YAA0B,CAAC;YAE/B,IAAI,cAAc,EAAE,CAAC;gBACnB,MAAM,UAAU,GAAG,kBAAkB,EAAE,CAAC;gBACxC,MAAM,aAAa,GAAG,oBAAoB,EAAE,CAAC;gBAC7C,MAAM,eAAe,GAAG,aAAa,GAAG,CAAC,CAAC,CAAC,CAAC,UAAU,GAAG,aAAa,CAAC,CAAC,CAAC,CAAC,CAAC;gBAC3E,MAAM,gBAAgB,GAAG,IAAI,CAAC,IAAI,CAAC,iBAAiB,GAAG,eAAe,CAAC,CAAC;gBAExE,MAAM,YAAY,GAAG,MAAM,gBAAgB,CAAC,MAAM,CAAC,SAAS,EAAE,gBAAgB,CAAC,CAAC;gBAEhF,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,YAAY,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;oBAC7C,MAAM,QAAQ,GAAG,qBAAqB,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC;oBAC7D,IAAI,QAAQ,CAAC,QAAQ,CAAC,QAAQ,CAAC,EAAE,CAAC;wBAChC,MAAM,CAAC,YAAY,GAAG,IAAI,CAAC;wBAC3B,IAAI,UAAU,KAAK,CAAC;4BAAE,UAAU,GAAG,CAAC,GAAG,CAAC,CAAC;wBACzC,MAAM;oBACR,CAAC;gBACH,CAAC;gBAED,IAAI,mBAAmB,GAAyC,EAAE,CAAC;gBACnE,IAAI,CAAC;oBACH,mBAAmB,GAAG,2BAA2B,CAAC,CAAC,CAAC,KAAK,EAAE,YAAY,CAAC,kBAAkB,CAAC,CAAC;gBAC9F,CAAC;gBAAC,MAAM,CAAC,CAAC,iBAAiB,CAAC,CAAC;gBAE7B,MAAM,gBAAgB,GAAiB,YAAY,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;oBAC9D,OAAO,EAAE,CAAC,CAAC,EAAE;oBACb,KAAK,EAAE,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,CAAC,GAAG,CAAC,CAAC,QAAQ,CAAC;oBAClC,MAAM,EAAE,QAAiB;iBAC1B,CAAC,CAAC,CAAC;gBACJ,MAAM,iBAAiB,GAAiB,mBAAmB,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;oBACtE,OAAO,EAAE,CAAC,CAAC,EAAE;oBACb,KAAK,EAAE,CAAC,CAAC,KAAK;oBACd,MAAM,EAAE,SAAkB;iBAC3B,CAAC,CAAC,CAAC;gBAEJ,MAAM,UAAU,GAAG,OAAO,CACxB;oBACE,EAAE,KAAK,EAAE,gBAAgB,EAAE,MAAM,EAAE,YAAY,CAAC,YAAY,EAAE;oBAC9D,GAAG,CAAC,iBAAiB,CAAC,MAAM,GAAG,CAAC;wBAC9B,CAAC,CAAC,CAAC,EAAE,KAAK,EAAE,iBAAiB,EAAE,MAAM,EAAE,YAAY,CAAC,aAAa,EAAE,CAAC;wBACpE,CAAC,CAAC,EAAE,CAAC;iBACR,EACD,YAAY,CAAC,IAAI,CAClB,CAAC;gBAEF,MAAM,aAAa,GAAG,IAAI,GAAG,EAA2D,CAAC;gBACzF,KAAK,MAAM,IAAI,IAAI,UAAU,EAAE,CAAC;oBAC9B,MAAM,aAAa,GAAG,qBAAqB,CAAC,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC,CAAC;oBAC5D,KAAK,MAAM,GAAG,IAAI,aAAa,EAAE,CAAC;wBAChC,MAAM,QAAQ,GAAG,aAAa,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;wBACxC,IAAI,CAAC,QAAQ,IAAI,IAAI,CAAC,KAAK,GAAG,QAAQ,CAAC,KAAK,EAAE,CAAC;4BAC7C,aAAa,CAAC,GAAG,CAAC,GAAG,EAAE,EAAE,KAAK,EAAE,IAAI,CAAC,KAAK,EAAE,MAAM,EAAE,IAAI,CAAC,MAAM,EAAE,CAAC,CAAC;wBACrE,CAAC;oBACH,CAAC;gBACH,CAAC;gBAED,MAAM,WAAW,GAAG,CAAC,GAAG,aAAa,CAAC,IAAI,EAAE,CAAC,CAAC;gBAC9C,YAAY,GAAG,WAAW,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,EAAE;oBACrC,MAAM,KAAK,GAAG,aAAa,CAAC,GAAG,CAAC,GAAG,CAAE,CAAC;oBACtC,OAAO,EAAE,OAAO,EAAE,GAAG,EAAE,KAAK,EAAE,KAAK,CAAC,KAAK,EAAE,MAAM,EAAE,KAAK,CAAC,MAAM,EAAE,CAAC;gBACpE,CAAC,CAAC,CAAC;gBAEH,OAAO,GAAG,YAAY,CAAC,SAAS,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,OAAO,KAAK,QAAQ,CAAC,GAAG,CAAC,CAAC;gBACpE,IAAI,OAAO,GAAG,CAAC;oBAAE,MAAM,CAAC,SAAS,GAAG,IAAI,CAAC;YAC3C,CAAC;iBAAM,CAAC;gBACN,MAAM,OAAO,GAAG,MAAM,WAAW,CAAC,MAAM,CAAC,SAAS,EAAE,iBAAiB,CAAC,CAAC;gBACvE,UAAU,GAAG,OAAO,CAAC,SAAS,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,EAAE,KAAK,QAAQ,CAAC,GAAG,CAAC,CAAC;gBAC7D,IAAI,UAAU,GAAG,CAAC;oBAAE,MAAM,CAAC,YAAY,GAAG,IAAI,CAAC;gBAE/C,IAAI,cAAc,GAAyC,EAAE,CAAC;gBAC9D,IAAI,YAAY,EAAE,CAAC;oBACjB,IAAI,CAAC;wBAAC,cAAc,GAAG,YAAY,CAAC,MAAM,CAAC,CAAC,CAAC,KAAK,EAAE,YAAY,CAAC,kBAAkB,CAAC,CAAC;oBAAC,CAAC;oBACvF,MAAM,CAAC,CAAC,KAAK,CAAC,CAAC;gBACjB,CAAC;gBAED,MAAM,WAAW,GAAiB,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;oBACpD,OAAO,EAAE,CAAC,CAAC,EAAE;oBACb,KAAK,EAAE,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,CAAC,GAAG,CAAC,CAAC,QAAQ,CAAC;oBAClC,MAAM,EAAE,QAAiB;iBAC1B,CAAC,CAAC,CAAC;gBACJ,MAAM,YAAY,GAAiB,cAAc,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;oBAC5D,OAAO,EAAE,CAAC,CAAC,EAAE;oBACb,KAAK,EAAE,CAAC,CAAC,KAAK;oBACd,MAAM,EAAE,SAAkB;iBAC3B,CAAC,CAAC,CAAC;gBAEJ,YAAY,GAAG,OAAO,CACpB;oBACE,EAAE,KAAK,EAAE,WAAW,EAAE,MAAM,EAAE,YAAY,CAAC,YAAY,EAAE;oBACzD,GAAG,CAAC,YAAY,CAAC,MAAM,GAAG,CAAC;wBACzB,CAAC,CAAC,CAAC,EAAE,KAAK,EAAE,YAAY,EAAE,MAAM,EAAE,YAAY,CAAC,aAAa,EAAE,CAAC;wBAC/D,CAAC,CAAC,EAAE,CAAC;iBACR,EACD,YAAY,CAAC,IAAI,CAClB,CAAC;gBAEF,OAAO,GAAG,MAAM,CAAC,YAAY,EAAE,QAAQ,CAAC,CAAC;gBACzC,IAAI,OAAO,GAAG,CAAC;oBAAE,MAAM,CAAC,SAAS,GAAG,IAAI,CAAC;YAC3C,CAAC;YAED,oBAAoB;YACpB,MAAM,QAAQ,GAAG,iBAAiB,CAChC,YAAY,EACZ,gBAAgB,EAChB,SAAS,EACT,SAAS,EACT,MAAM,CAAC,cAAc,CACtB,CAAC;YACF,IAAI,MAAM,CAAC,QAAQ,EAAE,QAAQ,CAAC,GAAG,CAAC;gBAAE,MAAM,CAAC,gBAAgB,GAAG,IAAI,CAAC;YAEnE,SAAS;YACT,MAAM,IAAI,GAAG,IAAI,GAAG,EAAU,CAAC;YAC/B,MAAM,OAAO,GAAG,QAAQ,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE;gBACpC,IAAI,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,OAAO,CAAC;oBAAE,OAAO,KAAK,CAAC;gBACtC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC;gBACpB,OAAO,IAAI,CAAC;YACd,CAAC,CAAC,CAAC;YAEH,MAAM,aAAa,GAAG,IAAI,GAAG,EAAkB,CAAC;YAChD,KAAK,MAAM,IAAI,IAAI,OAAO,EAAE,CAAC;gBAC3B,MAAM,KAAK,GAAG,YAAY,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;gBACzC,IAAI,KAAK;oBAAE,aAAa,CAAC,GAAG,CAAC,IAAI,CAAC,OAAO,EAAE,KAAK,CAAC,YAAY,IAAI,GAAG,CAAC,CAAC;YACxE,CAAC;YAED,mBAAmB;YACnB,MAAM,WAAW,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,IAAI,EAAE,EAAE;gBAC1C,MAAM,MAAM,GAAG,aAAa,CAAC,GAAG,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;gBAC/C,OAAO,MAAM,KAAK,SAAS,IAAI,MAAM,IAAI,SAAS,CAAC;YACrD,CAAC,CAAC,CAAC;YACH,IAAI,MAAM,CAAC,WAAW,EAAE,QAAQ,CAAC,GAAG,CAAC;gBAAE,MAAM,CAAC,eAAe,GAAG,IAAI,CAAC;YAErE,gBAAgB;YAChB,MAAM,SAAS,GAAG,MAAM,cAAc,CAAC,WAAW,EAAE,SAAS,EAAE,YAAY,EAAE;gBAC3E,WAAW,EAAE,SAAS;gBACtB,gBAAgB,EAAE,aAAa;aAChC,CAAC,CAAC;YACH,MAAM,OAAO,GAAG,MAAM,CAAC,SAAS,EAAE,QAAQ,CAAC,CAAC;YAC5C,IAAI,OAAO,GAAG,CAAC;gBAAE,MAAM,CAAC,SAAS,GAAG,IAAI,CAAC;YAEzC,0BAA0B;YAC1B,IAAI,MAAM,CAAC,eAAe,IAAI,CAAC,MAAM,CAAC,SAAS,EAAE,CAAC;gBAChD,MAAM,OAAO,GAAG,MAAM,CAAC,WAAW,EAAE,QAAQ,CAAC,CAAC;gBAC9C,MAAM,YAAY,GAAG,aAAa,CAAC,GAAG,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC;gBACtD,MAAM,sBAAsB,GAAG,SAAS,CAAC,MAAM,CAC7C,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,aAAa,CAAC,GAAG,CAAC,CAAC,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC,EAAE,CAAC,CACrD,CAAC;gBACF,OAAO,CAAC,GAAG,CAAC,0BAA0B,OAAO,IAAI,WAAW,CAAC,MAAM,WAAW,YAAY,iBAAiB,SAAS,CAAC,MAAM,oBAAoB,sBAAsB,WAAW,SAAS,EAAE,CAAC,CAAC;YAC/L,CAAC;YAED,kBAAkB;YAClB,IAAI,UAAU,GAAG,CAAC,CAAC;YACnB,KAAK,MAAM,IAAI,IAAI,SAAS,EAAE,CAAC;gBAC7B,MAAM,MAAM,GAAG,aAAa,CAAC,GAAG,CAAC,IAAI,CAAC,OAAO,CAAC,IAAI,GAAG,CAAC;gBACtD,IAAI,UAAU,GAAG,MAAM,GAAG,SAAS;oBAAE,MAAM;gBAC3C,UAAU,IAAI,MAAM,CAAC;gBACrB,IAAI,IAAI,CAAC,OAAO,KAAK,QAAQ,EAAE,CAAC;oBAC9B,MAAM,CAAC,cAAc,GAAG,IAAI,CAAC;oBAC7B,MAAM;gBACR,CAAC;YACH,CAAC;YAED,gBAAgB;YAChB,MAAM,OAAO,GAAG,QAAQ,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC;YAC3D,MAAM,CAAC,UAAU,GAAG,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAC;YAE/C,IAAI,CAAC;gBACH,MAAM,MAAM,GAAG,MAAM,UAAU,CAAC,OAAO,EAAE;oBACvC,SAAS,EAAE,UAAU;oBACrB,WAAW,EAAE,SAAS;oBACtB,cAAc,EAAE,SAAS;iBAC1B,CAAC,CAAC;gBACH,MAAM,SAAS,GAAG,eAAe,CAAC,MAAM,CAAC,CAAC;gBAC1C,IAAI,SAAS,IAAI,SAAS,CAAC,QAAQ,CAAC,QAAQ,CAAC,QAAQ,CAAC,EAAE,CAAC;oBACvD,MAAM,CAAC,WAAW,GAAG,IAAI,CAAC;gBAC5B,CAAC;YACH,CAAC;YAAC,MAAM,CAAC,CAAC,uBAAuB,CAAC,CAAC;YAEnC,IAAI,SAAS,GAAkB,IAAI,CAAC;YACpC,MAAM,UAAU,GAAoC;gBAClD,CAAC,cAAc,EAAE,eAAe,CAAC;gBACjC,CAAC,WAAW,EAAE,YAAY,CAAC;gBAC3B,CAAC,kBAAkB,EAAE,mBAAmB,CAAC;gBACzC,CAAC,iBAAiB,EAAE,kBAAkB,CAAC;gBACvC,CAAC,WAAW,EAAE,YAAY,CAAC;gBAC3B,CAAC,gBAAgB,EAAE,iBAAiB,CAAC;aACtC,CAAC;YAEF,KAAK,MAAM,CAAC,GAAG,EAAE,IAAI,CAAC,IAAI,UAAU,EAAE,CAAC;gBACrC,IAAI,CAAC,MAAM,CAAC,GAAG,CAAC,EAAE,CAAC;oBACjB,SAAS,GAAG,IAAI,CAAC;oBACjB,MAAM;gBACR,CAAC;YACH,CAAC;YAED,MAAM,CAAC,IAAI,CAAC;gBACV,KAAK,EAAE,CAAC,CAAC,KAAK;gBACd,kBAAkB,EAAE,QAAQ;gBAC5B,MAAM;gBACN,SAAS;gBACT,UAAU;gBACV,OAAO;aACR,CAAC,CAAC;QACL,CAAC;QAED,2BAA2B;QAC3B,MAAM,KAAK,GAAG,MAAM,CAAC,MAAM,CAAC;QAC5B,MAAM,WAAW,GAA2B,EAAE,CAAC;QAC/C,KAAK,MAAM,KAAK,IAAI,MAAM,EAAE,CAAC;YAC3B,KAAK,MAAM,CAAC,GAAG,EAAE,GAAG,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,KAAK,CAAC,MAAM,CAAC,EAAE,CAAC;gBACtD,WAAW,CAAC,GAAG,CAAC,GAAG,CAAC,WAAW,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;YAC7D,CAAC;QACH,CAAC;QAED,MAAM,WAAW,GAAuB;YACtC,CAAC,cAAc,EAAE,QAAQ,CAAC;YAC1B,CAAC,WAAW,EAAE,KAAK,CAAC;YACpB,CAAC,kBAAkB,EAAE,SAAS,CAAC;YAC/B,CAAC,iBAAiB,EAAE,MAAM,CAAC;YAC3B,CAAC,WAAW,EAAE,KAAK,CAAC;YACpB,CAAC,gBAAgB,EAAE,QAAQ,CAAC;YAC5B,CAAC,YAAY,EAAE,OAAO,CAAC;YACvB,CAAC,aAAa,EAAE,OAAO,CAAC;SACzB,CAAC;QAEF,MAAM,KAAK,GAAG,WAAW,CAAC,GAAG,CAAC,CAAC,CAAC,GAAG,EAAE,KAAK,CAAC,EAAE,EAAE;YAC7C,MAAM,KAAK,GAAG,WAAW,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;YACpC,OAAO,GAAG,KAAK,KAAK,CAAC,CAAC,KAAK,GAAG,KAAK,CAAC,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,GAAG,CAAC;QAC5D,CAAC,CAAC,CAAC;QACH,OAAO,CAAC,GAAG,CAAC,KAAK,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,EAAE,CAAC,CAAC;IACxC,CAAC;IAED,MAAM,QAAQ,CAAC,OAAO,EAAE,CAAC;IAEzB,OAAO,CAAC,GAAG,CAAC,SAAS,CAAC,CAAC;AACzB,CAAC;AAED,WAAW,EAAE,CAAC,KAAK,CAAC,CAAC,GAAG,EAAE,EAAE;IAC1B,OAAO,CAAC,KAAK,CAAC,kBAAkB,EAAE,GAAG,CAAC,CAAC;IACvC,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;AAClB,CAAC,CAAC,CAAC"}
@@ -0,0 +1,17 @@
1
+ /**
2
+ * Miss Population Analysis
3
+ *
4
+ * For chunks that are never found by vector search (even at K=2000),
5
+ * diagnoses why by comparing hit vs miss populations across:
6
+ *
7
+ * 1. Embedding distance: query → chunk embedding, query → best index entry embedding
8
+ * 2. Index entry count per chunk
9
+ * 3. Chunk content length (tokens)
10
+ * 4. Cluster size (competition from similar chunks)
11
+ * 5. Content samples from hits vs misses
12
+ *
13
+ * Usage:
14
+ * npx tsx src/eval/experiments/rescorer-ceiling/analyze-misses.ts [--sample-size=50]
15
+ */
16
+ export {};
17
+ //# sourceMappingURL=analyze-misses.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"analyze-misses.d.ts","sourceRoot":"","sources":["../../../../src/eval/experiments/rescorer-ceiling/analyze-misses.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;GAcG"}
@@ -0,0 +1,247 @@
1
+ /**
2
+ * Miss Population Analysis
3
+ *
4
+ * For chunks that are never found by vector search (even at K=2000),
5
+ * diagnoses why by comparing hit vs miss populations across:
6
+ *
7
+ * 1. Embedding distance: query → chunk embedding, query → best index entry embedding
8
+ * 2. Index entry count per chunk
9
+ * 3. Chunk content length (tokens)
10
+ * 4. Cluster size (competition from similar chunks)
11
+ * 5. Content samples from hits vs misses
12
+ *
13
+ * Usage:
14
+ * npx tsx src/eval/experiments/rescorer-ceiling/analyze-misses.ts [--sample-size=50]
15
+ */
16
+ import { getDb } from '../../../storage/db.js';
17
+ import { vectorStore, indexVectorStore } from '../../../storage/vector-store.js';
18
+ import { getChunkById } from '../../../storage/chunk-store.js';
19
+ import { getIndexEntryCount, getIndexedChunkCount, getIndexEntriesForChunk, dereferenceToChunkIds, } from '../../../storage/index-entry-store.js';
20
+ import { getAllClusters, getClusterChunkIds } from '../../../storage/cluster-store.js';
21
+ import { Embedder } from '../../../models/embedder.js';
22
+ import { getModel } from '../../../models/model-registry.js';
23
+ import { loadConfig, toRuntimeConfig } from '../../../config/loader.js';
24
+ import { cosineSimilarity } from '../../../utils/angular-distance.js';
25
+ import { generateSearchQueries } from '../index-vs-chunk/query-generator.js';
26
+ // ── Helpers ────────────────────────────────────────────────────────────────
27
+ function createRng(seed) {
28
+ let s = seed;
29
+ return () => {
30
+ s = (s * 1664525 + 1013904223) & 0x7fffffff;
31
+ return s / 0x7fffffff;
32
+ };
33
+ }
34
+ function sampleChunks(sampleSize, seed) {
35
+ getDb();
36
+ const clusters = getAllClusters();
37
+ if (clusters.length === 0)
38
+ throw new Error('No clusters found.');
39
+ const rng = createRng(seed);
40
+ const result = [];
41
+ const shuffled = [...clusters].sort(() => rng() - 0.5);
42
+ for (const cluster of shuffled) {
43
+ if (result.length >= sampleSize)
44
+ break;
45
+ const chunkIds = getClusterChunkIds(cluster.id);
46
+ if (chunkIds.length < 2)
47
+ continue;
48
+ const numPicks = Math.min(2, Math.ceil(sampleSize / clusters.length), chunkIds.length);
49
+ const shuffledIds = [...chunkIds].sort(() => rng() - 0.5);
50
+ for (let i = 0; i < numPicks && result.length < sampleSize; i++) {
51
+ const chunk = getChunkById(shuffledIds[i]);
52
+ if (!chunk || chunk.content.length < 100)
53
+ continue;
54
+ result.push({
55
+ id: chunk.id,
56
+ sessionSlug: chunk.sessionSlug,
57
+ content: chunk.content,
58
+ clusterId: cluster.id,
59
+ clusterName: cluster.name,
60
+ });
61
+ }
62
+ }
63
+ return result;
64
+ }
65
+ function median(values) {
66
+ if (values.length === 0)
67
+ return 0;
68
+ const sorted = [...values].sort((a, b) => a - b);
69
+ return sorted[Math.floor(sorted.length / 2)];
70
+ }
71
+ function mean(values) {
72
+ if (values.length === 0)
73
+ return 0;
74
+ return values.reduce((s, v) => s + v, 0) / values.length;
75
+ }
76
+ // ── Main ───────────────────────────────────────────────────────────────────
77
+ async function runAnalysis() {
78
+ const args = process.argv.slice(2);
79
+ const sampleSizeArg = args.find((a) => a.startsWith('--sample-size='));
80
+ const sampleSize = sampleSizeArg ? parseInt(sampleSizeArg.split('=')[1], 10) : 50;
81
+ const seed = 42;
82
+ const K = 2000; // high K for determining hit/miss
83
+ console.log('=== Miss Population Analysis ===\n');
84
+ getDb();
85
+ const externalConfig = loadConfig();
86
+ const config = toRuntimeConfig(externalConfig);
87
+ const useIndex = config.semanticIndex.useForSearch && getIndexEntryCount() > 0;
88
+ let entriesPerChunk = 1;
89
+ if (useIndex) {
90
+ const indexedChunks = getIndexedChunkCount();
91
+ entriesPerChunk = indexedChunks > 0 ? getIndexEntryCount() / indexedChunks : 1;
92
+ }
93
+ console.log(`Search path: ${useIndex ? 'INDEX' : 'CHUNK'}`);
94
+ console.log(`K: ${K}`);
95
+ vectorStore.setModelId(config.embeddingModel);
96
+ if (useIndex)
97
+ indexVectorStore.setModelId(config.embeddingModel);
98
+ // Build cluster size map
99
+ const clusterSizeMap = new Map();
100
+ for (const cluster of getAllClusters()) {
101
+ const chunkIds = getClusterChunkIds(cluster.id);
102
+ for (const cid of chunkIds) {
103
+ clusterSizeMap.set(cid, chunkIds.length);
104
+ }
105
+ }
106
+ // 1. Sample and generate queries
107
+ console.log(`\nSampling ${sampleSize} chunks...`);
108
+ const sampledChunks = sampleChunks(sampleSize, seed);
109
+ console.log(` Sampled ${sampledChunks.length} chunks`);
110
+ console.log('Generating queries...');
111
+ const queries = await generateSearchQueries(sampledChunks, config.clusterRefreshModel);
112
+ console.log(` Generated ${queries.length} queries`);
113
+ // 2. Prepare embedder
114
+ const embedder = new Embedder();
115
+ await embedder.load(getModel(config.embeddingModel));
116
+ console.log('Embedding queries...');
117
+ const queryEmbeddings = [];
118
+ for (const q of queries) {
119
+ const { embedding } = await embedder.embed(q.query, true);
120
+ queryEmbeddings.push(embedding);
121
+ }
122
+ console.log(` Embedded ${queryEmbeddings.length} queries\n`);
123
+ // 3. Profile each query-chunk pair
124
+ const profiles = [];
125
+ for (let qi = 0; qi < queries.length; qi++) {
126
+ const q = queries[qi];
127
+ const queryEmb = queryEmbeddings[qi];
128
+ const targetId = q.groundTruthChunkId;
129
+ // Determine hit/miss at K=2000
130
+ let found = false;
131
+ if (useIndex) {
132
+ const indexLimit = Math.ceil(K * entriesPerChunk);
133
+ const results = await indexVectorStore.search(queryEmb, indexLimit);
134
+ for (const r of results) {
135
+ const chunkIds = dereferenceToChunkIds([r.id]);
136
+ if (chunkIds.includes(targetId)) {
137
+ found = true;
138
+ break;
139
+ }
140
+ }
141
+ }
142
+ else {
143
+ const results = await vectorStore.search(queryEmb, K);
144
+ found = results.some((r) => r.id === targetId);
145
+ }
146
+ // Get chunk's own embedding
147
+ const chunkEmb = await vectorStore.get(targetId);
148
+ const queryToChunkSim = chunkEmb ? cosineSimilarity(queryEmb, chunkEmb) : -1;
149
+ // Get index entry embeddings and find best match
150
+ const entries = getIndexEntriesForChunk(targetId);
151
+ let queryToBestEntrySim = -1;
152
+ let bestEntryText = '(no entries)';
153
+ for (const entry of entries) {
154
+ const entryEmb = await indexVectorStore.get(entry.id);
155
+ if (entryEmb) {
156
+ const sim = cosineSimilarity(queryEmb, entryEmb);
157
+ if (sim > queryToBestEntrySim) {
158
+ queryToBestEntrySim = sim;
159
+ bestEntryText = entry.description;
160
+ }
161
+ }
162
+ }
163
+ const chunk = getChunkById(targetId);
164
+ profiles.push({
165
+ chunkId: targetId,
166
+ query: q.query,
167
+ found,
168
+ queryToChunkSim,
169
+ queryToBestEntrySim,
170
+ indexEntryCount: entries.length,
171
+ chunkTokens: chunk?.approxTokens ?? 0,
172
+ clusterSize: clusterSizeMap.get(targetId) ?? 0,
173
+ contentPreview: (chunk?.content ?? '').slice(0, 120).replace(/\n/g, ' '),
174
+ bestEntryText,
175
+ });
176
+ }
177
+ // 4. Split into hits vs misses and compare
178
+ const hits = profiles.filter((p) => p.found);
179
+ const misses = profiles.filter((p) => !p.found);
180
+ console.log(`══ Population Summary ══\n`);
181
+ console.log(` Hits: ${hits.length}/${profiles.length}`);
182
+ console.log(` Misses: ${misses.length}/${profiles.length}\n`);
183
+ console.log(`══ Embedding Distance Comparison ══\n`);
184
+ console.log(' Query → Chunk embedding (cosine similarity):');
185
+ console.log(` Hits: mean=${mean(hits.map((p) => p.queryToChunkSim)).toFixed(3)} median=${median(hits.map((p) => p.queryToChunkSim)).toFixed(3)}`);
186
+ console.log(` Misses: mean=${mean(misses.map((p) => p.queryToChunkSim)).toFixed(3)} median=${median(misses.map((p) => p.queryToChunkSim)).toFixed(3)}`);
187
+ console.log('\n Query → Best index entry embedding (cosine similarity):');
188
+ console.log(` Hits: mean=${mean(hits.map((p) => p.queryToBestEntrySim)).toFixed(3)} median=${median(hits.map((p) => p.queryToBestEntrySim)).toFixed(3)}`);
189
+ console.log(` Misses: mean=${mean(misses.map((p) => p.queryToBestEntrySim)).toFixed(3)} median=${median(misses.map((p) => p.queryToBestEntrySim)).toFixed(3)}`);
190
+ console.log(`\n══ Index Entry Count ══\n`);
191
+ console.log(` Hits: mean=${mean(hits.map((p) => p.indexEntryCount)).toFixed(1)} median=${median(hits.map((p) => p.indexEntryCount))}`);
192
+ console.log(` Misses: mean=${mean(misses.map((p) => p.indexEntryCount)).toFixed(1)} median=${median(misses.map((p) => p.indexEntryCount))}`);
193
+ console.log(`\n══ Chunk Size (tokens) ══\n`);
194
+ console.log(` Hits: mean=${mean(hits.map((p) => p.chunkTokens)).toFixed(0)} median=${median(hits.map((p) => p.chunkTokens))}`);
195
+ console.log(` Misses: mean=${mean(misses.map((p) => p.chunkTokens)).toFixed(0)} median=${median(misses.map((p) => p.chunkTokens))}`);
196
+ console.log(`\n══ Cluster Size ══\n`);
197
+ console.log(` Hits: mean=${mean(hits.map((p) => p.clusterSize)).toFixed(1)} median=${median(hits.map((p) => p.clusterSize))}`);
198
+ console.log(` Misses: mean=${mean(misses.map((p) => p.clusterSize)).toFixed(1)} median=${median(misses.map((p) => p.clusterSize))}`);
199
+ // 5. Similarity distribution buckets
200
+ console.log(`\n══ Best Entry Similarity Distribution ══\n`);
201
+ const simBuckets = [
202
+ { label: '0.9-1.0', min: 0.9, max: 1.0 },
203
+ { label: '0.8-0.9', min: 0.8, max: 0.9 },
204
+ { label: '0.7-0.8', min: 0.7, max: 0.8 },
205
+ { label: '0.6-0.7', min: 0.6, max: 0.7 },
206
+ { label: '0.5-0.6', min: 0.5, max: 0.6 },
207
+ { label: '0.4-0.5', min: 0.4, max: 0.5 },
208
+ { label: '<0.4', min: -1, max: 0.4 },
209
+ ];
210
+ console.log(' Bucket Hits Misses');
211
+ console.log(' ─────────────────────────');
212
+ for (const { label, min, max } of simBuckets) {
213
+ const hitCount = hits.filter((p) => p.queryToBestEntrySim >= min && p.queryToBestEntrySim < max).length;
214
+ const missCount = misses.filter((p) => p.queryToBestEntrySim >= min && p.queryToBestEntrySim < max).length;
215
+ console.log(` ${label.padStart(7)} ${String(hitCount).padStart(4)} ${String(missCount).padStart(6)}`);
216
+ }
217
+ // 6. Sample misses for qualitative review
218
+ console.log(`\n══ Sample Misses (worst by entry similarity) ══\n`);
219
+ const sortedMisses = [...misses].sort((a, b) => a.queryToBestEntrySim - b.queryToBestEntrySim);
220
+ const samplesToShow = Math.min(10, sortedMisses.length);
221
+ for (let i = 0; i < samplesToShow; i++) {
222
+ const p = sortedMisses[i];
223
+ console.log(` [${i + 1}] chunk=${p.chunkId.slice(0, 12)}… sim_chunk=${p.queryToChunkSim.toFixed(3)} sim_entry=${p.queryToBestEntrySim.toFixed(3)} entries=${p.indexEntryCount}`);
224
+ console.log(` Query: ${p.query}`);
225
+ console.log(` Entry: ${p.bestEntryText}`);
226
+ console.log(` Chunk: ${p.contentPreview}…`);
227
+ console.log();
228
+ }
229
+ // 7. Sample hits for comparison
230
+ console.log(`══ Sample Hits (best by entry similarity) ══\n`);
231
+ const sortedHits = [...hits].sort((a, b) => b.queryToBestEntrySim - a.queryToBestEntrySim);
232
+ const hitSamplesToShow = Math.min(5, sortedHits.length);
233
+ for (let i = 0; i < hitSamplesToShow; i++) {
234
+ const p = sortedHits[i];
235
+ console.log(` [${i + 1}] chunk=${p.chunkId.slice(0, 12)}… sim_chunk=${p.queryToChunkSim.toFixed(3)} sim_entry=${p.queryToBestEntrySim.toFixed(3)} entries=${p.indexEntryCount}`);
236
+ console.log(` Query: ${p.query}`);
237
+ console.log(` Entry: ${p.bestEntryText}`);
238
+ console.log();
239
+ }
240
+ await embedder.dispose();
241
+ console.log('Done.');
242
+ }
243
+ runAnalysis().catch((err) => {
244
+ console.error('Analysis failed:', err);
245
+ process.exit(1);
246
+ });
247
+ //# sourceMappingURL=analyze-misses.js.map