causantic 0.9.3 → 0.10.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +70 -56
- package/dist/cli/skill-templates.d.ts.map +1 -1
- package/dist/cli/skill-templates.js +23 -18
- package/dist/cli/skill-templates.js.map +1 -1
- package/dist/clusters/cluster-manager.d.ts +16 -0
- package/dist/clusters/cluster-manager.d.ts.map +1 -1
- package/dist/clusters/cluster-manager.js +119 -1
- package/dist/clusters/cluster-manager.js.map +1 -1
- package/dist/config/loader.d.ts +16 -0
- package/dist/config/loader.d.ts.map +1 -1
- package/dist/config/loader.js +51 -0
- package/dist/config/loader.js.map +1 -1
- package/dist/config/memory-config.d.ts +26 -0
- package/dist/config/memory-config.d.ts.map +1 -1
- package/dist/config/memory-config.js +22 -0
- package/dist/config/memory-config.js.map +1 -1
- package/dist/eval/experiments/embedding-model-comparison/run-experiment.d.ts +20 -0
- package/dist/eval/experiments/embedding-model-comparison/run-experiment.d.ts.map +1 -0
- package/dist/eval/experiments/embedding-model-comparison/run-experiment.js +289 -0
- package/dist/eval/experiments/embedding-model-comparison/run-experiment.js.map +1 -0
- package/dist/eval/experiments/index-differentiation/alignment-analysis.d.ts +53 -0
- package/dist/eval/experiments/index-differentiation/alignment-analysis.d.ts.map +1 -0
- package/dist/eval/experiments/index-differentiation/alignment-analysis.js +91 -0
- package/dist/eval/experiments/index-differentiation/alignment-analysis.js.map +1 -0
- package/dist/eval/experiments/index-differentiation/discrimination-test.d.ts +24 -0
- package/dist/eval/experiments/index-differentiation/discrimination-test.d.ts.map +1 -0
- package/dist/eval/experiments/index-differentiation/discrimination-test.js +79 -0
- package/dist/eval/experiments/index-differentiation/discrimination-test.js.map +1 -0
- package/dist/eval/experiments/index-differentiation/index.d.ts +11 -0
- package/dist/eval/experiments/index-differentiation/index.d.ts.map +1 -0
- package/dist/eval/experiments/index-differentiation/index.js +8 -0
- package/dist/eval/experiments/index-differentiation/index.js.map +1 -0
- package/dist/eval/experiments/index-differentiation/refinement-test.d.ts +32 -0
- package/dist/eval/experiments/index-differentiation/refinement-test.d.ts.map +1 -0
- package/dist/eval/experiments/index-differentiation/refinement-test.js +203 -0
- package/dist/eval/experiments/index-differentiation/refinement-test.js.map +1 -0
- package/dist/eval/experiments/index-differentiation/run-experiment.d.ts +20 -0
- package/dist/eval/experiments/index-differentiation/run-experiment.d.ts.map +1 -0
- package/dist/eval/experiments/index-differentiation/run-experiment.js +338 -0
- package/dist/eval/experiments/index-differentiation/run-experiment.js.map +1 -0
- package/dist/eval/experiments/index-differentiation/similarity-analysis.d.ts +31 -0
- package/dist/eval/experiments/index-differentiation/similarity-analysis.d.ts.map +1 -0
- package/dist/eval/experiments/index-differentiation/similarity-analysis.js +60 -0
- package/dist/eval/experiments/index-differentiation/similarity-analysis.js.map +1 -0
- package/dist/eval/experiments/index-differentiation/types.d.ts +114 -0
- package/dist/eval/experiments/index-differentiation/types.d.ts.map +1 -0
- package/dist/eval/experiments/index-differentiation/types.js +8 -0
- package/dist/eval/experiments/index-differentiation/types.js.map +1 -0
- package/dist/eval/experiments/index-vs-chunk/jeopardy-experiment.d.ts +19 -0
- package/dist/eval/experiments/index-vs-chunk/jeopardy-experiment.d.ts.map +1 -0
- package/dist/eval/experiments/index-vs-chunk/jeopardy-experiment.js +328 -0
- package/dist/eval/experiments/index-vs-chunk/jeopardy-experiment.js.map +1 -0
- package/dist/eval/experiments/index-vs-chunk/jeopardy-generator.d.ts +27 -0
- package/dist/eval/experiments/index-vs-chunk/jeopardy-generator.d.ts.map +1 -0
- package/dist/eval/experiments/index-vs-chunk/jeopardy-generator.js +154 -0
- package/dist/eval/experiments/index-vs-chunk/jeopardy-generator.js.map +1 -0
- package/dist/eval/experiments/index-vs-chunk/query-generator.d.ts +23 -0
- package/dist/eval/experiments/index-vs-chunk/query-generator.d.ts.map +1 -0
- package/dist/eval/experiments/index-vs-chunk/query-generator.js +113 -0
- package/dist/eval/experiments/index-vs-chunk/query-generator.js.map +1 -0
- package/dist/eval/experiments/index-vs-chunk/run-experiment.d.ts +17 -0
- package/dist/eval/experiments/index-vs-chunk/run-experiment.d.ts.map +1 -0
- package/dist/eval/experiments/index-vs-chunk/run-experiment.js +341 -0
- package/dist/eval/experiments/index-vs-chunk/run-experiment.js.map +1 -0
- package/dist/eval/experiments/index-vs-chunk/types.d.ts +71 -0
- package/dist/eval/experiments/index-vs-chunk/types.d.ts.map +1 -0
- package/dist/eval/experiments/index-vs-chunk/types.js +8 -0
- package/dist/eval/experiments/index-vs-chunk/types.js.map +1 -0
- package/dist/eval/experiments/pipeline-dropout/run-experiment.d.ts +18 -0
- package/dist/eval/experiments/pipeline-dropout/run-experiment.d.ts.map +1 -0
- package/dist/eval/experiments/pipeline-dropout/run-experiment.js +347 -0
- package/dist/eval/experiments/pipeline-dropout/run-experiment.js.map +1 -0
- package/dist/eval/experiments/rescorer-ceiling/analyze-misses.d.ts +17 -0
- package/dist/eval/experiments/rescorer-ceiling/analyze-misses.d.ts.map +1 -0
- package/dist/eval/experiments/rescorer-ceiling/analyze-misses.js +247 -0
- package/dist/eval/experiments/rescorer-ceiling/analyze-misses.js.map +1 -0
- package/dist/eval/experiments/rescorer-ceiling/benchmark-rescorers.d.ts +18 -0
- package/dist/eval/experiments/rescorer-ceiling/benchmark-rescorers.d.ts.map +1 -0
- package/dist/eval/experiments/rescorer-ceiling/benchmark-rescorers.js +443 -0
- package/dist/eval/experiments/rescorer-ceiling/benchmark-rescorers.js.map +1 -0
- package/dist/eval/experiments/rescorer-ceiling/run-experiment.d.ts +16 -0
- package/dist/eval/experiments/rescorer-ceiling/run-experiment.d.ts.map +1 -0
- package/dist/eval/experiments/rescorer-ceiling/run-experiment.js +226 -0
- package/dist/eval/experiments/rescorer-ceiling/run-experiment.js.map +1 -0
- package/dist/index-entries/index-generator.d.ts +74 -0
- package/dist/index-entries/index-generator.d.ts.map +1 -0
- package/dist/index-entries/index-generator.js +323 -0
- package/dist/index-entries/index-generator.js.map +1 -0
- package/dist/index-entries/index-refresher.d.ts +54 -0
- package/dist/index-entries/index-refresher.d.ts.map +1 -0
- package/dist/index-entries/index-refresher.js +203 -0
- package/dist/index-entries/index-refresher.js.map +1 -0
- package/dist/index-entries/index.d.ts +6 -0
- package/dist/index-entries/index.d.ts.map +1 -0
- package/dist/index-entries/index.js +6 -0
- package/dist/index-entries/index.js.map +1 -0
- package/dist/index.d.ts +4 -0
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +5 -0
- package/dist/index.js.map +1 -1
- package/dist/ingest/index-entry-hook.d.ts +15 -0
- package/dist/ingest/index-entry-hook.d.ts.map +1 -0
- package/dist/ingest/index-entry-hook.js +84 -0
- package/dist/ingest/index-entry-hook.js.map +1 -0
- package/dist/ingest/ingest-session.d.ts.map +1 -1
- package/dist/ingest/ingest-session.js +72 -18
- package/dist/ingest/ingest-session.js.map +1 -1
- package/dist/ingest/session-state.d.ts +49 -0
- package/dist/ingest/session-state.d.ts.map +1 -0
- package/dist/ingest/session-state.js +158 -0
- package/dist/ingest/session-state.js.map +1 -0
- package/dist/maintenance/scheduler.d.ts.map +1 -1
- package/dist/maintenance/scheduler.js +25 -0
- package/dist/maintenance/scheduler.js.map +1 -1
- package/dist/maintenance/tasks/backfill-index.d.ts +27 -0
- package/dist/maintenance/tasks/backfill-index.d.ts.map +1 -0
- package/dist/maintenance/tasks/backfill-index.js +44 -0
- package/dist/maintenance/tasks/backfill-index.js.map +1 -0
- package/dist/mcp/tools.d.ts +4 -0
- package/dist/mcp/tools.d.ts.map +1 -1
- package/dist/mcp/tools.js +115 -7
- package/dist/mcp/tools.js.map +1 -1
- package/dist/models/embedder.js +2 -2
- package/dist/models/embedder.js.map +1 -1
- package/dist/models/model-registry.d.ts +2 -0
- package/dist/models/model-registry.d.ts.map +1 -1
- package/dist/models/model-registry.js +15 -0
- package/dist/models/model-registry.js.map +1 -1
- package/dist/repomap/cache.d.ts +58 -0
- package/dist/repomap/cache.d.ts.map +1 -0
- package/dist/repomap/cache.js +101 -0
- package/dist/repomap/cache.js.map +1 -0
- package/dist/repomap/graph.d.ts +54 -0
- package/dist/repomap/graph.d.ts.map +1 -0
- package/dist/repomap/graph.js +113 -0
- package/dist/repomap/graph.js.map +1 -0
- package/dist/repomap/index.d.ts +83 -0
- package/dist/repomap/index.d.ts.map +1 -0
- package/dist/repomap/index.js +99 -0
- package/dist/repomap/index.js.map +1 -0
- package/dist/repomap/parser.d.ts +43 -0
- package/dist/repomap/parser.d.ts.map +1 -0
- package/dist/repomap/parser.js +994 -0
- package/dist/repomap/parser.js.map +1 -0
- package/dist/repomap/regex-parser.d.ts +24 -0
- package/dist/repomap/regex-parser.d.ts.map +1 -0
- package/dist/repomap/regex-parser.js +190 -0
- package/dist/repomap/regex-parser.js.map +1 -0
- package/dist/repomap/renderer.d.ts +40 -0
- package/dist/repomap/renderer.d.ts.map +1 -0
- package/dist/repomap/renderer.js +163 -0
- package/dist/repomap/renderer.js.map +1 -0
- package/dist/repomap/scanner.d.ts +32 -0
- package/dist/repomap/scanner.d.ts.map +1 -0
- package/dist/repomap/scanner.js +171 -0
- package/dist/repomap/scanner.js.map +1 -0
- package/dist/retrieval/chain-assembler.d.ts.map +1 -1
- package/dist/retrieval/chain-assembler.js +22 -3
- package/dist/retrieval/chain-assembler.js.map +1 -1
- package/dist/retrieval/index.d.ts +2 -0
- package/dist/retrieval/index.d.ts.map +1 -1
- package/dist/retrieval/index.js +2 -0
- package/dist/retrieval/index.js.map +1 -1
- package/dist/retrieval/mmr.d.ts +1 -0
- package/dist/retrieval/mmr.d.ts.map +1 -1
- package/dist/retrieval/mmr.js +35 -1
- package/dist/retrieval/mmr.js.map +1 -1
- package/dist/retrieval/search-assembler.d.ts +10 -1
- package/dist/retrieval/search-assembler.d.ts.map +1 -1
- package/dist/retrieval/search-assembler.js +249 -81
- package/dist/retrieval/search-assembler.js.map +1 -1
- package/dist/retrieval/session-reconstructor.d.ts +36 -0
- package/dist/retrieval/session-reconstructor.d.ts.map +1 -1
- package/dist/retrieval/session-reconstructor.js +126 -0
- package/dist/retrieval/session-reconstructor.js.map +1 -1
- package/dist/storage/db.d.ts.map +1 -1
- package/dist/storage/db.js +15 -0
- package/dist/storage/db.js.map +1 -1
- package/dist/storage/index-entry-store.d.ts +71 -0
- package/dist/storage/index-entry-store.d.ts.map +1 -0
- package/dist/storage/index-entry-store.js +275 -0
- package/dist/storage/index-entry-store.js.map +1 -0
- package/dist/storage/index.d.ts +5 -2
- package/dist/storage/index.d.ts.map +1 -1
- package/dist/storage/index.js +5 -1
- package/dist/storage/index.js.map +1 -1
- package/dist/storage/migrations.d.ts.map +1 -1
- package/dist/storage/migrations.js +102 -0
- package/dist/storage/migrations.js.map +1 -1
- package/dist/storage/schema.sql +68 -2
- package/dist/storage/session-state-store.d.ts +61 -0
- package/dist/storage/session-state-store.d.ts.map +1 -0
- package/dist/storage/session-state-store.js +119 -0
- package/dist/storage/session-state-store.js.map +1 -0
- package/dist/storage/types.d.ts +50 -0
- package/dist/storage/types.d.ts.map +1 -1
- package/dist/storage/vector-store.d.ts +17 -2
- package/dist/storage/vector-store.d.ts.map +1 -1
- package/dist/storage/vector-store.js +96 -36
- package/dist/storage/vector-store.js.map +1 -1
- package/package.json +4 -2
|
@@ -0,0 +1,113 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Generate natural language search queries from chunks using LLM.
|
|
3
|
+
*
|
|
4
|
+
* Each query is something a user might search for that should find
|
|
5
|
+
* the source chunk. The LLM sees a truncated chunk and generates
|
|
6
|
+
* a plausible search query.
|
|
7
|
+
*/
|
|
8
|
+
import Anthropic from '@anthropic-ai/sdk';
|
|
9
|
+
import { createSecretStore } from '../../../utils/secret-store.js';
|
|
10
|
+
/**
|
|
11
|
+
* Generate search queries for a batch of chunks.
|
|
12
|
+
*
|
|
13
|
+
* Batches up to 10 chunks per API call to keep cost low.
|
|
14
|
+
*/
|
|
15
|
+
export async function generateSearchQueries(chunks, model) {
|
|
16
|
+
const client = await getClient();
|
|
17
|
+
if (!client) {
|
|
18
|
+
throw new Error('No Anthropic API key available');
|
|
19
|
+
}
|
|
20
|
+
const results = [];
|
|
21
|
+
const batchSize = 10;
|
|
22
|
+
for (let i = 0; i < chunks.length; i += batchSize) {
|
|
23
|
+
const batch = chunks.slice(i, i + batchSize);
|
|
24
|
+
const queries = await generateBatch(client, batch, model);
|
|
25
|
+
results.push(...queries);
|
|
26
|
+
}
|
|
27
|
+
return results;
|
|
28
|
+
}
|
|
29
|
+
async function generateBatch(client, chunks, model) {
|
|
30
|
+
const maxContentChars = 500 * 4; // ~500 tokens per chunk
|
|
31
|
+
const chunkTexts = chunks
|
|
32
|
+
.map((c, i) => {
|
|
33
|
+
const content = c.content.length > maxContentChars
|
|
34
|
+
? c.content.slice(0, maxContentChars) + '\n...[truncated]'
|
|
35
|
+
: c.content;
|
|
36
|
+
return `--- Chunk ${i} ---\n${content}`;
|
|
37
|
+
})
|
|
38
|
+
.join('\n\n');
|
|
39
|
+
const prompt = `You are generating natural language search queries for a retrieval benchmark. For each conversation chunk below, write a short search query (5-15 words) that a user would type to find this specific chunk.
|
|
40
|
+
|
|
41
|
+
Requirements:
|
|
42
|
+
- The query should be something a real user would search for
|
|
43
|
+
- It should target the SPECIFIC content of this chunk, not the general topic
|
|
44
|
+
- Use natural language, not keywords
|
|
45
|
+
- Do NOT quote or copy text directly from the chunk
|
|
46
|
+
- Focus on the key decision, action, or outcome in the chunk
|
|
47
|
+
|
|
48
|
+
${chunkTexts}
|
|
49
|
+
|
|
50
|
+
Respond with exactly ${chunks.length} queries, one per line, prefixed with the chunk number:
|
|
51
|
+
0: [query]
|
|
52
|
+
1: [query]
|
|
53
|
+
...`;
|
|
54
|
+
try {
|
|
55
|
+
const response = await client.messages.create({
|
|
56
|
+
model,
|
|
57
|
+
max_tokens: Math.min(2048, chunks.length * 100),
|
|
58
|
+
messages: [{ role: 'user', content: prompt }],
|
|
59
|
+
});
|
|
60
|
+
const text = response.content[0].type === 'text' ? response.content[0].text : '';
|
|
61
|
+
const queries = parseResponse(text, chunks.length);
|
|
62
|
+
return chunks
|
|
63
|
+
.map((chunk, i) => {
|
|
64
|
+
const query = queries[i];
|
|
65
|
+
if (!query)
|
|
66
|
+
return null;
|
|
67
|
+
return {
|
|
68
|
+
query,
|
|
69
|
+
groundTruthChunkId: chunk.id,
|
|
70
|
+
sessionSlug: chunk.sessionSlug,
|
|
71
|
+
clusterId: chunk.clusterId,
|
|
72
|
+
clusterName: chunk.clusterName,
|
|
73
|
+
};
|
|
74
|
+
})
|
|
75
|
+
.filter((q) => q !== null);
|
|
76
|
+
}
|
|
77
|
+
catch (error) {
|
|
78
|
+
console.warn(` Query generation batch failed: ${error.message}`);
|
|
79
|
+
return [];
|
|
80
|
+
}
|
|
81
|
+
}
|
|
82
|
+
function parseResponse(text, expectedCount) {
|
|
83
|
+
const queries = new Array(expectedCount).fill(null);
|
|
84
|
+
const lines = text.split('\n');
|
|
85
|
+
for (const line of lines) {
|
|
86
|
+
const match = line.match(/^(\d+):\s*(.+)/);
|
|
87
|
+
if (match) {
|
|
88
|
+
const index = parseInt(match[1], 10);
|
|
89
|
+
if (index >= 0 && index < expectedCount) {
|
|
90
|
+
queries[index] = match[2].trim();
|
|
91
|
+
}
|
|
92
|
+
}
|
|
93
|
+
}
|
|
94
|
+
return queries;
|
|
95
|
+
}
|
|
96
|
+
async function getClient() {
|
|
97
|
+
if (!process.env.ANTHROPIC_API_KEY) {
|
|
98
|
+
try {
|
|
99
|
+
const store = createSecretStore();
|
|
100
|
+
const storedKey = await store.get('anthropic-api-key');
|
|
101
|
+
if (storedKey) {
|
|
102
|
+
process.env.ANTHROPIC_API_KEY = storedKey;
|
|
103
|
+
}
|
|
104
|
+
}
|
|
105
|
+
catch {
|
|
106
|
+
// Keychain not available
|
|
107
|
+
}
|
|
108
|
+
}
|
|
109
|
+
if (!process.env.ANTHROPIC_API_KEY)
|
|
110
|
+
return null;
|
|
111
|
+
return new Anthropic();
|
|
112
|
+
}
|
|
113
|
+
//# sourceMappingURL=query-generator.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"query-generator.js","sourceRoot":"","sources":["../../../../src/eval/experiments/index-vs-chunk/query-generator.ts"],"names":[],"mappings":"AAAA;;;;;;GAMG;AAEH,OAAO,SAAS,MAAM,mBAAmB,CAAC;AAC1C,OAAO,EAAE,iBAAiB,EAAE,MAAM,gCAAgC,CAAC;AAYnE;;;;GAIG;AACH,MAAM,CAAC,KAAK,UAAU,qBAAqB,CACzC,MAA0B,EAC1B,KAAa;IAEb,MAAM,MAAM,GAAG,MAAM,SAAS,EAAE,CAAC;IACjC,IAAI,CAAC,MAAM,EAAE,CAAC;QACZ,MAAM,IAAI,KAAK,CAAC,gCAAgC,CAAC,CAAC;IACpD,CAAC;IAED,MAAM,OAAO,GAAqB,EAAE,CAAC;IACrC,MAAM,SAAS,GAAG,EAAE,CAAC;IAErB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,MAAM,CAAC,MAAM,EAAE,CAAC,IAAI,SAAS,EAAE,CAAC;QAClD,MAAM,KAAK,GAAG,MAAM,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,GAAG,SAAS,CAAC,CAAC;QAC7C,MAAM,OAAO,GAAG,MAAM,aAAa,CAAC,MAAM,EAAE,KAAK,EAAE,KAAK,CAAC,CAAC;QAC1D,OAAO,CAAC,IAAI,CAAC,GAAG,OAAO,CAAC,CAAC;IAC3B,CAAC;IAED,OAAO,OAAO,CAAC;AACjB,CAAC;AAED,KAAK,UAAU,aAAa,CAC1B,MAAiB,EACjB,MAA0B,EAC1B,KAAa;IAEb,MAAM,eAAe,GAAG,GAAG,GAAG,CAAC,CAAC,CAAC,wBAAwB;IAEzD,MAAM,UAAU,GAAG,MAAM;SACtB,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE;QACZ,MAAM,OAAO,GACX,CAAC,CAAC,OAAO,CAAC,MAAM,GAAG,eAAe;YAChC,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC,EAAE,eAAe,CAAC,GAAG,kBAAkB;YAC1D,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC;QAChB,OAAO,aAAa,CAAC,SAAS,OAAO,EAAE,CAAC;IAC1C,CAAC,CAAC;SACD,IAAI,CAAC,MAAM,CAAC,CAAC;IAEhB,MAAM,MAAM,GAAG;;;;;;;;;EASf,UAAU;;uBAEW,MAAM,CAAC,MAAM;;;IAGhC,CAAC;IAEH,IAAI,CAAC;QACH,MAAM,QAAQ,GAAG,MAAM,MAAM,CAAC,QAAQ,CAAC,MAAM,CAAC;YAC5C,KAAK;YACL,UAAU,EAAE,IAAI,CAAC,GAAG,CAAC,IAAI,EAAE,MAAM,CAAC,MAAM,GAAG,GAAG,CAAC;YAC/C,QAAQ,EAAE,CAAC,EAAE,IAAI,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,EAAE,CAAC;SAC9C,CAAC,CAAC;QAEH,MAAM,IAAI,GACR,QAAQ,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,IAAI,KAAK,MAAM,CAAC,CAAC,CAAC,QAAQ,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC;QACtE,MAAM,OAAO,GAAG,aAAa,CAAC,IAAI,EAAE,MAAM,CAAC,MAAM,CAAC,CAAC;QAEnD,OAAO,MAAM;aACV,GAAG,CAAC,CAAC,KAAK,EAAE,CAAC,EAAE,EAAE;YAChB,MAAM,KAAK,GAAG,OAAO,CAAC,CAAC,CAAC,CAAC;YACzB,IAAI,CAAC,KAAK;gBAAE,OAAO,IAAI,CAAC;YACxB,OAAO;gBACL,KAAK;gBACL,kBAAkB,EAAE,KAAK,CAAC,EAAE;gBAC5B,WAAW,EAAE,KAAK,CAAC,WAAW;gBAC9B,SAAS,EAAE,KAAK,CAAC,SAAS;gBAC1B,WAAW,EAAE,KAAK,CAAC,WAAW;aAC/B,CAAC;QACJ,CAAC,CAAC;aACD,MAAM,CAAC,CAAC,CAAC,EAAuB,EAAE,CAAC,CAAC,KAAK,IAAI,CAAC,CAAC;IACpD,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,OAAO,CAAC,IAAI,CACV,oCAAqC,KAAe,CAAC,OAAO,EAAE,CAC/D,CAAC;QACF,OAAO,EAAE,CAAC;IACZ,CAAC;AACH,CAAC;AAED,SAAS,aAAa,CACpB,IAAY,EACZ,aAAqB;IAErB,MAAM,OAAO,GAAsB,IAAI,KAAK,CAAC,aAAa,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IACvE,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;IAE/B,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,gBAAgB,CAAC,CAAC;QAC3C,IAAI,KAAK,EAAE,CAAC;YACV,MAAM,KAAK,GAAG,QAAQ,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;YACrC,IAAI,KAAK,IAAI,CAAC,IAAI,KAAK,GAAG,aAAa,EAAE,CAAC;gBACxC,OAAO,CAAC,KAAK,CAAC,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;YACnC,CAAC;QACH,CAAC;IACH,CAAC;IAED,OAAO,OAAO,CAAC;AACjB,CAAC;AAED,KAAK,UAAU,SAAS;IACtB,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,iBAAiB,EAAE,CAAC;QACnC,IAAI,CAAC;YACH,MAAM,KAAK,GAAG,iBAAiB,EAAE,CAAC;YAClC,MAAM,SAAS,GAAG,MAAM,KAAK,CAAC,GAAG,CAAC,mBAAmB,CAAC,CAAC;YACvD,IAAI,SAAS,EAAE,CAAC;gBACd,OAAO,CAAC,GAAG,CAAC,iBAAiB,GAAG,SAAS,CAAC;YAC5C,CAAC;QACH,CAAC;QAAC,MAAM,CAAC;YACP,yBAAyB;QAC3B,CAAC;IACH,CAAC;IAED,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,iBAAiB;QAAE,OAAO,IAAI,CAAC;IAChD,OAAO,IAAI,SAAS,EAAE,CAAC;AACzB,CAAC"}
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Index vs Chunk Retrieval Benchmark
|
|
3
|
+
*
|
|
4
|
+
* Compares search quality when using the semantic index layer vs
|
|
5
|
+
* direct chunk search, using LLM-generated natural language queries.
|
|
6
|
+
*
|
|
7
|
+
* Steps:
|
|
8
|
+
* 1. Sample chunks across diverse clusters
|
|
9
|
+
* 2. Generate natural language search queries via LLM
|
|
10
|
+
* 3. Run each query through both search paths (index + chunk)
|
|
11
|
+
* 4. Compare recall@K, MRR, latency
|
|
12
|
+
*
|
|
13
|
+
* Usage:
|
|
14
|
+
* npx tsx src/eval/experiments/index-vs-chunk/run-experiment.ts [--sample-size N]
|
|
15
|
+
*/
|
|
16
|
+
export {};
|
|
17
|
+
//# sourceMappingURL=run-experiment.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"run-experiment.d.ts","sourceRoot":"","sources":["../../../../src/eval/experiments/index-vs-chunk/run-experiment.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;GAcG"}
|
|
@@ -0,0 +1,341 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Index vs Chunk Retrieval Benchmark
|
|
3
|
+
*
|
|
4
|
+
* Compares search quality when using the semantic index layer vs
|
|
5
|
+
* direct chunk search, using LLM-generated natural language queries.
|
|
6
|
+
*
|
|
7
|
+
* Steps:
|
|
8
|
+
* 1. Sample chunks across diverse clusters
|
|
9
|
+
* 2. Generate natural language search queries via LLM
|
|
10
|
+
* 3. Run each query through both search paths (index + chunk)
|
|
11
|
+
* 4. Compare recall@K, MRR, latency
|
|
12
|
+
*
|
|
13
|
+
* Usage:
|
|
14
|
+
* npx tsx src/eval/experiments/index-vs-chunk/run-experiment.ts [--sample-size N]
|
|
15
|
+
*/
|
|
16
|
+
import { writeFileSync } from 'fs';
|
|
17
|
+
import { getDb } from '../../../storage/db.js';
|
|
18
|
+
import { vectorStore, indexVectorStore } from '../../../storage/vector-store.js';
|
|
19
|
+
import { getChunkById } from '../../../storage/chunk-store.js';
|
|
20
|
+
import { getIndexEntryCount, getIndexedChunkCount, dereferenceToChunkIds, searchIndexEntriesByKeyword, } from '../../../storage/index-entry-store.js';
|
|
21
|
+
import { getAllClusters, getClusterChunkIds } from '../../../storage/cluster-store.js';
|
|
22
|
+
import { Embedder } from '../../../models/embedder.js';
|
|
23
|
+
import { getModel } from '../../../models/model-registry.js';
|
|
24
|
+
import { loadConfig, toRuntimeConfig } from '../../../config/loader.js';
|
|
25
|
+
import { KeywordStore } from '../../../storage/keyword-store.js';
|
|
26
|
+
import { fuseRRF } from '../../../retrieval/rrf.js';
|
|
27
|
+
import { generateSearchQueries } from './query-generator.js';
|
|
28
|
+
/** Seeded PRNG for reproducible sampling. */
|
|
29
|
+
function createRng(seed) {
|
|
30
|
+
let s = seed;
|
|
31
|
+
return () => {
|
|
32
|
+
s = (s * 1664525 + 1013904223) & 0x7fffffff;
|
|
33
|
+
return s / 0x7fffffff;
|
|
34
|
+
};
|
|
35
|
+
}
|
|
36
|
+
/**
|
|
37
|
+
* Sample chunks across diverse clusters.
|
|
38
|
+
* Takes 1-2 chunks per cluster to ensure diversity.
|
|
39
|
+
*/
|
|
40
|
+
function sampleChunks(sampleSize, seed) {
|
|
41
|
+
getDb(); // ensure init
|
|
42
|
+
const clusters = getAllClusters();
|
|
43
|
+
if (clusters.length === 0) {
|
|
44
|
+
throw new Error('No clusters found. Run clustering first.');
|
|
45
|
+
}
|
|
46
|
+
const rng = createRng(seed);
|
|
47
|
+
const result = [];
|
|
48
|
+
// Shuffle clusters
|
|
49
|
+
const shuffled = [...clusters].sort(() => rng() - 0.5);
|
|
50
|
+
for (const cluster of shuffled) {
|
|
51
|
+
if (result.length >= sampleSize)
|
|
52
|
+
break;
|
|
53
|
+
const chunkIds = getClusterChunkIds(cluster.id);
|
|
54
|
+
if (chunkIds.length < 2)
|
|
55
|
+
continue;
|
|
56
|
+
// Pick 1-2 random chunks from this cluster
|
|
57
|
+
const numPicks = Math.min(2, Math.ceil(sampleSize / clusters.length), chunkIds.length);
|
|
58
|
+
const shuffledIds = [...chunkIds].sort(() => rng() - 0.5);
|
|
59
|
+
for (let i = 0; i < numPicks && result.length < sampleSize; i++) {
|
|
60
|
+
const chunk = getChunkById(shuffledIds[i]);
|
|
61
|
+
if (!chunk || chunk.content.length < 100)
|
|
62
|
+
continue;
|
|
63
|
+
result.push({
|
|
64
|
+
id: chunk.id,
|
|
65
|
+
sessionSlug: chunk.sessionSlug,
|
|
66
|
+
content: chunk.content,
|
|
67
|
+
clusterId: cluster.id,
|
|
68
|
+
clusterName: cluster.name,
|
|
69
|
+
});
|
|
70
|
+
}
|
|
71
|
+
}
|
|
72
|
+
return result;
|
|
73
|
+
}
|
|
74
|
+
/**
|
|
75
|
+
* Search using the index-based path (index entry embeddings + FTS).
|
|
76
|
+
* Returns ranked chunk IDs.
|
|
77
|
+
*/
|
|
78
|
+
async function searchViaIndex(queryEmbedding, queryText, vectorSearchLimit, hybridSearch) {
|
|
79
|
+
const runtimeConfig = toRuntimeConfig(loadConfig());
|
|
80
|
+
indexVectorStore.setModelId(runtimeConfig.embeddingModel);
|
|
81
|
+
// Scale search limit by entries-per-chunk ratio so the index path
|
|
82
|
+
// covers roughly the same number of unique chunks as the chunk path.
|
|
83
|
+
const entryCount = getIndexEntryCount();
|
|
84
|
+
const indexedChunks = getIndexedChunkCount();
|
|
85
|
+
const entriesPerChunk = indexedChunks > 0 ? entryCount / indexedChunks : 1;
|
|
86
|
+
const indexSearchLimit = Math.ceil(vectorSearchLimit * entriesPerChunk);
|
|
87
|
+
const indexSimilar = await indexVectorStore.search(queryEmbedding, indexSearchLimit);
|
|
88
|
+
let indexKeywordResults = [];
|
|
89
|
+
try {
|
|
90
|
+
indexKeywordResults = searchIndexEntriesByKeyword(queryText, hybridSearch.keywordSearchLimit);
|
|
91
|
+
}
|
|
92
|
+
catch {
|
|
93
|
+
// FTS unavailable
|
|
94
|
+
}
|
|
95
|
+
if (indexSimilar.length === 0 && indexKeywordResults.length === 0)
|
|
96
|
+
return [];
|
|
97
|
+
const indexVectorItems = indexSimilar.map((s) => ({
|
|
98
|
+
chunkId: s.id,
|
|
99
|
+
score: Math.max(0, 1 - s.distance),
|
|
100
|
+
source: 'vector',
|
|
101
|
+
}));
|
|
102
|
+
const indexKeywordItems = indexKeywordResults.map((r) => ({
|
|
103
|
+
chunkId: r.id,
|
|
104
|
+
score: r.score,
|
|
105
|
+
source: 'keyword',
|
|
106
|
+
}));
|
|
107
|
+
const indexFused = fuseRRF([
|
|
108
|
+
{ items: indexVectorItems, weight: hybridSearch.vectorWeight },
|
|
109
|
+
...(indexKeywordItems.length > 0
|
|
110
|
+
? [{ items: indexKeywordItems, weight: hybridSearch.keywordWeight }]
|
|
111
|
+
: []),
|
|
112
|
+
], hybridSearch.rrfK);
|
|
113
|
+
// Dereference to chunk IDs
|
|
114
|
+
const allChunkIds = [];
|
|
115
|
+
for (const item of indexFused) {
|
|
116
|
+
const chunkIds = dereferenceToChunkIds([item.chunkId]);
|
|
117
|
+
for (const cid of chunkIds) {
|
|
118
|
+
if (!allChunkIds.includes(cid)) {
|
|
119
|
+
allChunkIds.push(cid);
|
|
120
|
+
}
|
|
121
|
+
}
|
|
122
|
+
}
|
|
123
|
+
return allChunkIds;
|
|
124
|
+
}
|
|
125
|
+
/**
|
|
126
|
+
* Search using the chunk-based path (chunk embeddings + FTS).
|
|
127
|
+
* Returns ranked chunk IDs.
|
|
128
|
+
*/
|
|
129
|
+
async function searchViaChunks(queryEmbedding, queryText, vectorSearchLimit, hybridSearch) {
|
|
130
|
+
const runtimeConfig = toRuntimeConfig(loadConfig());
|
|
131
|
+
vectorStore.setModelId(runtimeConfig.embeddingModel);
|
|
132
|
+
const similar = await vectorStore.search(queryEmbedding, vectorSearchLimit);
|
|
133
|
+
let keywordResults = [];
|
|
134
|
+
try {
|
|
135
|
+
const keywordStore = new KeywordStore();
|
|
136
|
+
keywordResults = keywordStore.search(queryText, hybridSearch.keywordSearchLimit);
|
|
137
|
+
}
|
|
138
|
+
catch {
|
|
139
|
+
// FTS unavailable
|
|
140
|
+
}
|
|
141
|
+
if (similar.length === 0 && keywordResults.length === 0)
|
|
142
|
+
return [];
|
|
143
|
+
const vectorItems = similar.map((s) => ({
|
|
144
|
+
chunkId: s.id,
|
|
145
|
+
score: Math.max(0, 1 - s.distance),
|
|
146
|
+
source: 'vector',
|
|
147
|
+
}));
|
|
148
|
+
const keywordItems = keywordResults.map((r) => ({
|
|
149
|
+
chunkId: r.id,
|
|
150
|
+
score: r.score,
|
|
151
|
+
source: 'keyword',
|
|
152
|
+
}));
|
|
153
|
+
const fused = fuseRRF([
|
|
154
|
+
{ items: vectorItems, weight: hybridSearch.vectorWeight },
|
|
155
|
+
...(keywordItems.length > 0
|
|
156
|
+
? [{ items: keywordItems, weight: hybridSearch.keywordWeight }]
|
|
157
|
+
: []),
|
|
158
|
+
], hybridSearch.rrfK);
|
|
159
|
+
return fused.map((r) => r.chunkId);
|
|
160
|
+
}
|
|
161
|
+
/**
|
|
162
|
+
* Compute metrics from per-query results.
|
|
163
|
+
*/
|
|
164
|
+
function computeMetrics(results, path) {
|
|
165
|
+
const ranks = results.map((r) => r[path].rank);
|
|
166
|
+
const durations = results.map((r) => r[path].durationMs);
|
|
167
|
+
const recallAtK = (k) => ranks.filter((r) => r > 0 && r <= k).length / ranks.length;
|
|
168
|
+
const mrr = ranks
|
|
169
|
+
.filter((r) => r > 0)
|
|
170
|
+
.reduce((sum, r) => sum + 1 / r, 0) / ranks.length;
|
|
171
|
+
const hitRate = ranks.filter((r) => r > 0).length / ranks.length;
|
|
172
|
+
const sortedDurations = [...durations].sort((a, b) => a - b);
|
|
173
|
+
const medianLatency = sortedDurations.length > 0
|
|
174
|
+
? sortedDurations[Math.floor(sortedDurations.length / 2)]
|
|
175
|
+
: 0;
|
|
176
|
+
const meanLatency = durations.length > 0
|
|
177
|
+
? durations.reduce((a, b) => a + b, 0) / durations.length
|
|
178
|
+
: 0;
|
|
179
|
+
return {
|
|
180
|
+
recallAt5: recallAtK(5),
|
|
181
|
+
recallAt10: recallAtK(10),
|
|
182
|
+
recallAt20: recallAtK(20),
|
|
183
|
+
mrr,
|
|
184
|
+
hitRate,
|
|
185
|
+
meanLatencyMs: meanLatency,
|
|
186
|
+
medianLatencyMs: medianLatency,
|
|
187
|
+
};
|
|
188
|
+
}
|
|
189
|
+
function fmt(n, d = 3) {
|
|
190
|
+
return n.toFixed(d);
|
|
191
|
+
}
|
|
192
|
+
/**
|
|
193
|
+
* Run the full A/B benchmark.
|
|
194
|
+
*/
|
|
195
|
+
async function runBenchmark() {
|
|
196
|
+
const args = process.argv.slice(2);
|
|
197
|
+
const sampleSizeArg = args.find((a) => a.startsWith('--sample-size='));
|
|
198
|
+
const sampleSize = sampleSizeArg
|
|
199
|
+
? parseInt(sampleSizeArg.split('=')[1], 10)
|
|
200
|
+
: 100;
|
|
201
|
+
const seed = 42;
|
|
202
|
+
console.log('=== Index vs Chunk Retrieval Benchmark ===\n');
|
|
203
|
+
// Check prerequisites
|
|
204
|
+
const _db = getDb();
|
|
205
|
+
const entryCount = getIndexEntryCount();
|
|
206
|
+
console.log(`Index entries: ${entryCount}`);
|
|
207
|
+
if (entryCount === 0) {
|
|
208
|
+
console.log('No index entries. Run backfill first.');
|
|
209
|
+
process.exit(1);
|
|
210
|
+
}
|
|
211
|
+
const externalConfig = loadConfig();
|
|
212
|
+
const config = toRuntimeConfig(externalConfig);
|
|
213
|
+
// 1. Sample chunks
|
|
214
|
+
console.log(`\nSampling ${sampleSize} chunks across clusters...`);
|
|
215
|
+
const sampledChunks = sampleChunks(sampleSize, seed);
|
|
216
|
+
console.log(` Sampled ${sampledChunks.length} chunks from ${new Set(sampledChunks.map((c) => c.clusterId)).size} clusters`);
|
|
217
|
+
// 2. Generate search queries
|
|
218
|
+
console.log('\nGenerating natural language search queries via LLM...');
|
|
219
|
+
const queries = await generateSearchQueries(sampledChunks, config.clusterRefreshModel);
|
|
220
|
+
console.log(` Generated ${queries.length} queries (${sampledChunks.length - queries.length} failed)`);
|
|
221
|
+
if (queries.length === 0) {
|
|
222
|
+
console.log('No queries generated. Check API key.');
|
|
223
|
+
process.exit(1);
|
|
224
|
+
}
|
|
225
|
+
// 3. Prepare embedder
|
|
226
|
+
const embedder = new Embedder();
|
|
227
|
+
await embedder.load(getModel(config.embeddingModel));
|
|
228
|
+
const hybridSearch = config.hybridSearch;
|
|
229
|
+
const vectorSearchLimit = 20;
|
|
230
|
+
// 4. Run queries through both paths
|
|
231
|
+
console.log(`\nRunning ${queries.length} queries through both search paths...`);
|
|
232
|
+
const perQuery = [];
|
|
233
|
+
for (let i = 0; i < queries.length; i++) {
|
|
234
|
+
const q = queries[i];
|
|
235
|
+
if ((i + 1) % 10 === 0 || i === queries.length - 1) {
|
|
236
|
+
console.log(` Query ${i + 1}/${queries.length}`);
|
|
237
|
+
}
|
|
238
|
+
// Embed query
|
|
239
|
+
const embedResult = await embedder.embed(q.query, true);
|
|
240
|
+
const queryEmbedding = embedResult.embedding;
|
|
241
|
+
// Index path
|
|
242
|
+
const indexStart = Date.now();
|
|
243
|
+
const indexResults = await searchViaIndex(queryEmbedding, q.query, vectorSearchLimit, hybridSearch);
|
|
244
|
+
const indexDuration = Date.now() - indexStart;
|
|
245
|
+
// Chunk path
|
|
246
|
+
const chunkStart = Date.now();
|
|
247
|
+
const chunkResults = await searchViaChunks(queryEmbedding, q.query, vectorSearchLimit, hybridSearch);
|
|
248
|
+
const chunkDuration = Date.now() - chunkStart;
|
|
249
|
+
// Find rank of ground truth
|
|
250
|
+
const indexRank = indexResults.indexOf(q.groundTruthChunkId) + 1; // 0 = not found
|
|
251
|
+
const chunkRank = chunkResults.indexOf(q.groundTruthChunkId) + 1;
|
|
252
|
+
perQuery.push({
|
|
253
|
+
query: q.query,
|
|
254
|
+
groundTruthChunkId: q.groundTruthChunkId,
|
|
255
|
+
index: {
|
|
256
|
+
rank: indexRank,
|
|
257
|
+
totalReturned: indexResults.length,
|
|
258
|
+
durationMs: indexDuration,
|
|
259
|
+
},
|
|
260
|
+
chunk: {
|
|
261
|
+
rank: chunkRank,
|
|
262
|
+
totalReturned: chunkResults.length,
|
|
263
|
+
durationMs: chunkDuration,
|
|
264
|
+
},
|
|
265
|
+
});
|
|
266
|
+
}
|
|
267
|
+
await embedder.dispose();
|
|
268
|
+
// 5. Compute metrics
|
|
269
|
+
const indexMetrics = computeMetrics(perQuery, 'index');
|
|
270
|
+
const chunkMetrics = computeMetrics(perQuery, 'chunk');
|
|
271
|
+
// 6. Display results
|
|
272
|
+
console.log('\n══ Results ══\n');
|
|
273
|
+
console.log(' Metric Index Path Chunk Path Delta');
|
|
274
|
+
console.log(' ' + '─'.repeat(55));
|
|
275
|
+
console.log(` Recall@5 ${fmt(indexMetrics.recallAt5 * 100, 1)}% ${fmt(chunkMetrics.recallAt5 * 100, 1)}% ${fmt((indexMetrics.recallAt5 - chunkMetrics.recallAt5) * 100, 1)}%`);
|
|
276
|
+
console.log(` Recall@10 ${fmt(indexMetrics.recallAt10 * 100, 1)}% ${fmt(chunkMetrics.recallAt10 * 100, 1)}% ${fmt((indexMetrics.recallAt10 - chunkMetrics.recallAt10) * 100, 1)}%`);
|
|
277
|
+
console.log(` Recall@20 ${fmt(indexMetrics.recallAt20 * 100, 1)}% ${fmt(chunkMetrics.recallAt20 * 100, 1)}% ${fmt((indexMetrics.recallAt20 - chunkMetrics.recallAt20) * 100, 1)}%`);
|
|
278
|
+
console.log(` MRR ${fmt(indexMetrics.mrr)} ${fmt(chunkMetrics.mrr)} ${fmt(indexMetrics.mrr - chunkMetrics.mrr)}`);
|
|
279
|
+
console.log(` Hit Rate ${fmt(indexMetrics.hitRate * 100, 1)}% ${fmt(chunkMetrics.hitRate * 100, 1)}% ${fmt((indexMetrics.hitRate - chunkMetrics.hitRate) * 100, 1)}%`);
|
|
280
|
+
console.log(` Mean Latency ${fmt(indexMetrics.meanLatencyMs, 0)}ms ${fmt(chunkMetrics.meanLatencyMs, 0)}ms ${fmt(indexMetrics.meanLatencyMs - chunkMetrics.meanLatencyMs, 0)}ms`);
|
|
281
|
+
console.log(` Median Latency ${fmt(indexMetrics.medianLatencyMs, 0)}ms ${fmt(chunkMetrics.medianLatencyMs, 0)}ms ${fmt(indexMetrics.medianLatencyMs - chunkMetrics.medianLatencyMs, 0)}ms`);
|
|
282
|
+
// Show examples where paths disagree
|
|
283
|
+
const indexWins = perQuery.filter((q) => q.index.rank > 0 &&
|
|
284
|
+
(q.chunk.rank === 0 || q.index.rank < q.chunk.rank));
|
|
285
|
+
const chunkWins = perQuery.filter((q) => q.chunk.rank > 0 &&
|
|
286
|
+
(q.index.rank === 0 || q.chunk.rank < q.index.rank));
|
|
287
|
+
const ties = perQuery.filter((q) => q.index.rank > 0 && q.index.rank === q.chunk.rank);
|
|
288
|
+
console.log(`\n Path comparison: Index wins ${indexWins.length}, Chunk wins ${chunkWins.length}, Ties ${ties.length}, Both miss ${perQuery.length - indexWins.length - chunkWins.length - ties.length}`);
|
|
289
|
+
if (indexWins.length > 0) {
|
|
290
|
+
console.log('\n Sample queries where INDEX path wins:');
|
|
291
|
+
for (const q of indexWins.slice(0, 3)) {
|
|
292
|
+
console.log(` "${q.query}" → index rank ${q.index.rank}, chunk rank ${q.chunk.rank || 'miss'}`);
|
|
293
|
+
}
|
|
294
|
+
}
|
|
295
|
+
if (chunkWins.length > 0) {
|
|
296
|
+
console.log('\n Sample queries where CHUNK path wins:');
|
|
297
|
+
for (const q of chunkWins.slice(0, 3)) {
|
|
298
|
+
console.log(` "${q.query}" → chunk rank ${q.chunk.rank}, index rank ${q.index.rank || 'miss'}`);
|
|
299
|
+
}
|
|
300
|
+
}
|
|
301
|
+
// 7. Summary
|
|
302
|
+
const summary = [];
|
|
303
|
+
summary.push(`Benchmark: ${queries.length} natural language queries across ${new Set(queries.map((q) => q.clusterId)).size} clusters`);
|
|
304
|
+
const recallDelta5 = indexMetrics.recallAt5 - chunkMetrics.recallAt5;
|
|
305
|
+
const mrrDelta = indexMetrics.mrr - chunkMetrics.mrr;
|
|
306
|
+
if (recallDelta5 > 0.05) {
|
|
307
|
+
summary.push(`Index path BETTER: +${fmt(recallDelta5 * 100, 1)}% recall@5, +${fmt(mrrDelta, 3)} MRR`);
|
|
308
|
+
}
|
|
309
|
+
else if (recallDelta5 < -0.05) {
|
|
310
|
+
summary.push(`Chunk path BETTER: ${fmt(recallDelta5 * 100, 1)}% recall@5, ${fmt(mrrDelta, 3)} MRR`);
|
|
311
|
+
}
|
|
312
|
+
else {
|
|
313
|
+
summary.push(`Paths comparable: ${fmt(recallDelta5 * 100, 1)}% recall@5 delta, ${fmt(mrrDelta, 3)} MRR delta`);
|
|
314
|
+
}
|
|
315
|
+
summary.push(`Index path wins ${indexWins.length}/${perQuery.length} queries, chunk path wins ${chunkWins.length}/${perQuery.length}`);
|
|
316
|
+
console.log('\n══ Summary ══\n');
|
|
317
|
+
for (const line of summary) {
|
|
318
|
+
console.log(` • ${line}`);
|
|
319
|
+
}
|
|
320
|
+
return {
|
|
321
|
+
timestamp: new Date().toISOString(),
|
|
322
|
+
queryCount: queries.length,
|
|
323
|
+
failedQueryCount: sampledChunks.length - queries.length,
|
|
324
|
+
indexMetrics,
|
|
325
|
+
chunkMetrics,
|
|
326
|
+
perQuery,
|
|
327
|
+
summary,
|
|
328
|
+
};
|
|
329
|
+
}
|
|
330
|
+
// ── CLI entrypoint ──────────────────────────────────────────────────────────
|
|
331
|
+
runBenchmark()
|
|
332
|
+
.then((report) => {
|
|
333
|
+
const outPath = 'index-vs-chunk-report.json';
|
|
334
|
+
writeFileSync(outPath, JSON.stringify(report, null, 2));
|
|
335
|
+
console.log(`\nReport written to ${outPath}`);
|
|
336
|
+
})
|
|
337
|
+
.catch((err) => {
|
|
338
|
+
console.error('Benchmark failed:', err);
|
|
339
|
+
process.exit(1);
|
|
340
|
+
});
|
|
341
|
+
//# sourceMappingURL=run-experiment.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"run-experiment.js","sourceRoot":"","sources":["../../../../src/eval/experiments/index-vs-chunk/run-experiment.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;GAcG;AAEH,OAAO,EAAE,aAAa,EAAE,MAAM,IAAI,CAAC;AACnC,OAAO,EAAE,KAAK,EAAE,MAAM,wBAAwB,CAAC;AAC/C,OAAO,EAAE,WAAW,EAAE,gBAAgB,EAAE,MAAM,kCAAkC,CAAC;AACjF,OAAO,EAAE,YAAY,EAAE,MAAM,iCAAiC,CAAC;AAC/D,OAAO,EACL,kBAAkB,EAClB,oBAAoB,EACpB,qBAAqB,EACrB,2BAA2B,GAC5B,MAAM,uCAAuC,CAAC;AAC/C,OAAO,EAAE,cAAc,EAAE,kBAAkB,EAAE,MAAM,mCAAmC,CAAC;AACvF,OAAO,EAAE,QAAQ,EAAE,MAAM,6BAA6B,CAAC;AACvD,OAAO,EAAE,QAAQ,EAAE,MAAM,mCAAmC,CAAC;AAC7D,OAAO,EAAE,UAAU,EAAE,eAAe,EAAE,MAAM,2BAA2B,CAAC;AACxE,OAAO,EAAE,YAAY,EAAE,MAAM,mCAAmC,CAAC;AACjE,OAAO,EAAE,OAAO,EAAmB,MAAM,2BAA2B,CAAC;AACrE,OAAO,EAAE,qBAAqB,EAAyB,MAAM,sBAAsB,CAAC;AAOpF,6CAA6C;AAC7C,SAAS,SAAS,CAAC,IAAY;IAC7B,IAAI,CAAC,GAAG,IAAI,CAAC;IACb,OAAO,GAAG,EAAE;QACV,CAAC,GAAG,CAAC,CAAC,GAAG,OAAO,GAAG,UAAU,CAAC,GAAG,UAAU,CAAC;QAC5C,OAAO,CAAC,GAAG,UAAU,CAAC;IACxB,CAAC,CAAC;AACJ,CAAC;AAED;;;GAGG;AACH,SAAS,YAAY,CACnB,UAAkB,EAClB,IAAY;IAEZ,KAAK,EAAE,CAAC,CAAC,cAAc;IAEvB,MAAM,QAAQ,GAAG,cAAc,EAAE,CAAC;IAClC,IAAI,QAAQ,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QAC1B,MAAM,IAAI,KAAK,CAAC,0CAA0C,CAAC,CAAC;IAC9D,CAAC;IAED,MAAM,GAAG,GAAG,SAAS,CAAC,IAAI,CAAC,CAAC;IAC5B,MAAM,MAAM,GAAuB,EAAE,CAAC;IAEtC,mBAAmB;IACnB,MAAM,QAAQ,GAAG,CAAC,GAAG,QAAQ,CAAC,CAAC,IAAI,CAAC,GAAG,EAAE,CAAC,GAAG,EAAE,GAAG,GAAG,CAAC,CAAC;IAEvD,KAAK,MAAM,OAAO,IAAI,QAAQ,EAAE,CAAC;QAC/B,IAAI,MAAM,CAAC,MAAM,IAAI,UAAU;YAAE,MAAM;QAEvC,MAAM,QAAQ,GAAG,kBAAkB,CAAC,OAAO,CAAC,EAAE,CAAC,CAAC;QAChD,IAAI,QAAQ,CAAC,MAAM,GAAG,CAAC;YAAE,SAAS;QAElC,2CAA2C;QAC3C,MAAM,QAAQ,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,IAAI,CAAC,IAAI,CAAC,UAAU,GAAG,QAAQ,CAAC,MAAM,CAAC,EAAE,QAAQ,CAAC,MAAM,CAAC,CAAC;QACvF,MAAM,WAAW,GAAG,CAAC,GAAG,QAAQ,CAAC,CAAC,IAAI,CAAC,GAAG,EAAE,CAAC,GAAG,EAAE,GAAG,GAAG,CAAC,CAAC;QAE1D,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,QAAQ,IAAI,MAAM,CAAC,MAAM,GAAG,UAAU,EAAE,CAAC,EAAE,EAAE,CAAC;YAChE,MAAM,KAAK,GAAG,YAAY,CAAC,WAAW,CAAC,CAAC,CAAC,CAAC,CAAC;YAC3C,IAAI,CAAC,KAAK,IAAI,KAAK,CAAC,OAAO,CAAC,MAAM,GAAG,GAAG;gBAAE,SAAS;YAEnD,MAAM,CAAC,IAAI,CAAC;gBACV,EAAE,EAAE,KAAK,CAAC,EAAE;gBACZ,WAAW,EAAE,KAAK,CAAC,WAAW;gBAC9B,OAAO,EAAE,KAAK,CAAC,OAAO;gBACtB,SAAS,EAAE,OAAO,CAAC,EAAE;gBACrB,WAAW,EAAE,OAAO,CAAC,IAAI;aAC1B,CAAC,CAAC;QACL,CAAC;IACH,CAAC;IAED,OAAO,MAAM,CAAC;AAChB,CAAC;AAED;;;GAGG;AACH,KAAK,UAAU,cAAc,CAC3B,cAAwB,EACxB,SAAiB,EACjB,iBAAyB,EACzB,YAAuG;IAEvG,MAAM,aAAa,GAAG,eAAe,CAAC,UAAU,EAAE,CAAC,CAAC;IACpD,gBAAgB,CAAC,UAAU,CAAC,aAAa,CAAC,cAAc,CAAC,CAAC;IAE1D,kEAAkE;IAClE,qEAAqE;IACrE,MAAM,UAAU,GAAG,kBAAkB,EAAE,CAAC;IACxC,MAAM,aAAa,GAAG,oBAAoB,EAAE,CAAC;IAC7C,MAAM,eAAe,GAAG,aAAa,GAAG,CAAC,CAAC,CAAC,CAAC,UAAU,GAAG,aAAa,CAAC,CAAC,CAAC,CAAC,CAAC;IAC3E,MAAM,gBAAgB,GAAG,IAAI,CAAC,IAAI,CAAC,iBAAiB,GAAG,eAAe,CAAC,CAAC;IAExE,MAAM,YAAY,GAAG,MAAM,gBAAgB,CAAC,MAAM,CAAC,cAAc,EAAE,gBAAgB,CAAC,CAAC;IAErF,IAAI,mBAAmB,GAAyC,EAAE,CAAC;IACnE,IAAI,CAAC;QACH,mBAAmB,GAAG,2BAA2B,CAC/C,SAAS,EACT,YAAY,CAAC,kBAAkB,CAChC,CAAC;IACJ,CAAC;IAAC,MAAM,CAAC;QACP,kBAAkB;IACpB,CAAC;IAED,IAAI,YAAY,CAAC,MAAM,KAAK,CAAC,IAAI,mBAAmB,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,EAAE,CAAC;IAE7E,MAAM,gBAAgB,GAAiB,YAAY,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;QAC9D,OAAO,EAAE,CAAC,CAAC,EAAE;QACb,KAAK,EAAE,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,CAAC,GAAG,CAAC,CAAC,QAAQ,CAAC;QAClC,MAAM,EAAE,QAAiB;KAC1B,CAAC,CAAC,CAAC;IAEJ,MAAM,iBAAiB,GAAiB,mBAAmB,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;QACtE,OAAO,EAAE,CAAC,CAAC,EAAE;QACb,KAAK,EAAE,CAAC,CAAC,KAAK;QACd,MAAM,EAAE,SAAkB;KAC3B,CAAC,CAAC,CAAC;IAEJ,MAAM,UAAU,GAAG,OAAO,CACxB;QACE,EAAE,KAAK,EAAE,gBAAgB,EAAE,MAAM,EAAE,YAAY,CAAC,YAAY,EAAE;QAC9D,GAAG,CAAC,iBAAiB,CAAC,MAAM,GAAG,CAAC;YAC9B,CAAC,CAAC,CAAC,EAAE,KAAK,EAAE,iBAAiB,EAAE,MAAM,EAAE,YAAY,CAAC,aAAa,EAAE,CAAC;YACpE,CAAC,CAAC,EAAE,CAAC;KACR,EACD,YAAY,CAAC,IAAI,CAClB,CAAC;IAEF,2BAA2B;IAC3B,MAAM,WAAW,GAAa,EAAE,CAAC;IACjC,KAAK,MAAM,IAAI,IAAI,UAAU,EAAE,CAAC;QAC9B,MAAM,QAAQ,GAAG,qBAAqB,CAAC,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC,CAAC;QACvD,KAAK,MAAM,GAAG,IAAI,QAAQ,EAAE,CAAC;YAC3B,IAAI,CAAC,WAAW,CAAC,QAAQ,CAAC,GAAG,CAAC,EAAE,CAAC;gBAC/B,WAAW,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;YACxB,CAAC;QACH,CAAC;IACH,CAAC;IAED,OAAO,WAAW,CAAC;AACrB,CAAC;AAED;;;GAGG;AACH,KAAK,UAAU,eAAe,CAC5B,cAAwB,EACxB,SAAiB,EACjB,iBAAyB,EACzB,YAAuG;IAEvG,MAAM,aAAa,GAAG,eAAe,CAAC,UAAU,EAAE,CAAC,CAAC;IACpD,WAAW,CAAC,UAAU,CAAC,aAAa,CAAC,cAAc,CAAC,CAAC;IAErD,MAAM,OAAO,GAAG,MAAM,WAAW,CAAC,MAAM,CAAC,cAAc,EAAE,iBAAiB,CAAC,CAAC;IAE5E,IAAI,cAAc,GAAyC,EAAE,CAAC;IAC9D,IAAI,CAAC;QACH,MAAM,YAAY,GAAG,IAAI,YAAY,EAAE,CAAC;QACxC,cAAc,GAAG,YAAY,CAAC,MAAM,CAAC,SAAS,EAAE,YAAY,CAAC,kBAAkB,CAAC,CAAC;IACnF,CAAC;IAAC,MAAM,CAAC;QACP,kBAAkB;IACpB,CAAC;IAED,IAAI,OAAO,CAAC,MAAM,KAAK,CAAC,IAAI,cAAc,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,EAAE,CAAC;IAEnE,MAAM,WAAW,GAAiB,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;QACpD,OAAO,EAAE,CAAC,CAAC,EAAE;QACb,KAAK,EAAE,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,CAAC,GAAG,CAAC,CAAC,QAAQ,CAAC;QAClC,MAAM,EAAE,QAAiB;KAC1B,CAAC,CAAC,CAAC;IAEJ,MAAM,YAAY,GAAiB,cAAc,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;QAC5D,OAAO,EAAE,CAAC,CAAC,EAAE;QACb,KAAK,EAAE,CAAC,CAAC,KAAK;QACd,MAAM,EAAE,SAAkB;KAC3B,CAAC,CAAC,CAAC;IAEJ,MAAM,KAAK,GAAG,OAAO,CACnB;QACE,EAAE,KAAK,EAAE,WAAW,EAAE,MAAM,EAAE,YAAY,CAAC,YAAY,EAAE;QACzD,GAAG,CAAC,YAAY,CAAC,MAAM,GAAG,CAAC;YACzB,CAAC,CAAC,CAAC,EAAE,KAAK,EAAE,YAAY,EAAE,MAAM,EAAE,YAAY,CAAC,aAAa,EAAE,CAAC;YAC/D,CAAC,CAAC,EAAE,CAAC;KACR,EACD,YAAY,CAAC,IAAI,CAClB,CAAC;IAEF,OAAO,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC;AACrC,CAAC;AAED;;GAEG;AACH,SAAS,cAAc,CACrB,OAAsB,EACtB,IAAuB;IAEvB,MAAM,KAAK,GAAG,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,CAAC;IAC/C,MAAM,SAAS,GAAG,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,UAAU,CAAC,CAAC;IAEzD,MAAM,SAAS,GAAG,CAAC,CAAS,EAAE,EAAE,CAC9B,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,MAAM,GAAG,KAAK,CAAC,MAAM,CAAC;IAE7D,MAAM,GAAG,GACP,KAAK;SACF,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC;SACpB,MAAM,CAAC,CAAC,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC,GAAG,GAAG,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC,GAAG,KAAK,CAAC,MAAM,CAAC;IAEvD,MAAM,OAAO,GAAG,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,MAAM,GAAG,KAAK,CAAC,MAAM,CAAC;IAEjE,MAAM,eAAe,GAAG,CAAC,GAAG,SAAS,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;IAC7D,MAAM,aAAa,GACjB,eAAe,CAAC,MAAM,GAAG,CAAC;QACxB,CAAC,CAAC,eAAe,CAAC,IAAI,CAAC,KAAK,CAAC,eAAe,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;QACzD,CAAC,CAAC,CAAC,CAAC;IACR,MAAM,WAAW,GACf,SAAS,CAAC,MAAM,GAAG,CAAC;QAClB,CAAC,CAAC,SAAS,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC,GAAG,SAAS,CAAC,MAAM;QACzD,CAAC,CAAC,CAAC,CAAC;IAER,OAAO;QACL,SAAS,EAAE,SAAS,CAAC,CAAC,CAAC;QACvB,UAAU,EAAE,SAAS,CAAC,EAAE,CAAC;QACzB,UAAU,EAAE,SAAS,CAAC,EAAE,CAAC;QACzB,GAAG;QACH,OAAO;QACP,aAAa,EAAE,WAAW;QAC1B,eAAe,EAAE,aAAa;KAC/B,CAAC;AACJ,CAAC;AAED,SAAS,GAAG,CAAC,CAAS,EAAE,CAAC,GAAG,CAAC;IAC3B,OAAO,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC;AACtB,CAAC;AAED;;GAEG;AACH,KAAK,UAAU,YAAY;IACzB,MAAM,IAAI,GAAG,OAAO,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;IACnC,MAAM,aAAa,GAAG,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,UAAU,CAAC,gBAAgB,CAAC,CAAC,CAAC;IACvE,MAAM,UAAU,GAAG,aAAa;QAC9B,CAAC,CAAC,QAAQ,CAAC,aAAa,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC;QAC3C,CAAC,CAAC,GAAG,CAAC;IACR,MAAM,IAAI,GAAG,EAAE,CAAC;IAEhB,OAAO,CAAC,GAAG,CAAC,8CAA8C,CAAC,CAAC;IAE5D,sBAAsB;IACtB,MAAM,GAAG,GAAG,KAAK,EAAE,CAAC;IACpB,MAAM,UAAU,GAAG,kBAAkB,EAAE,CAAC;IACxC,OAAO,CAAC,GAAG,CAAC,kBAAkB,UAAU,EAAE,CAAC,CAAC;IAC5C,IAAI,UAAU,KAAK,CAAC,EAAE,CAAC;QACrB,OAAO,CAAC,GAAG,CAAC,uCAAuC,CAAC,CAAC;QACrD,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAClB,CAAC;IAED,MAAM,cAAc,GAAG,UAAU,EAAE,CAAC;IACpC,MAAM,MAAM,GAAG,eAAe,CAAC,cAAc,CAAC,CAAC;IAE/C,mBAAmB;IACnB,OAAO,CAAC,GAAG,CAAC,cAAc,UAAU,4BAA4B,CAAC,CAAC;IAClE,MAAM,aAAa,GAAG,YAAY,CAAC,UAAU,EAAE,IAAI,CAAC,CAAC;IACrD,OAAO,CAAC,GAAG,CAAC,aAAa,aAAa,CAAC,MAAM,gBAAgB,IAAI,GAAG,CAAC,aAAa,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,IAAI,WAAW,CAAC,CAAC;IAE7H,6BAA6B;IAC7B,OAAO,CAAC,GAAG,CAAC,yDAAyD,CAAC,CAAC;IACvE,MAAM,OAAO,GAAG,MAAM,qBAAqB,CACzC,aAAa,EACb,MAAM,CAAC,mBAAmB,CAC3B,CAAC;IACF,OAAO,CAAC,GAAG,CAAC,eAAe,OAAO,CAAC,MAAM,aAAa,aAAa,CAAC,MAAM,GAAG,OAAO,CAAC,MAAM,UAAU,CAAC,CAAC;IAEvG,IAAI,OAAO,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACzB,OAAO,CAAC,GAAG,CAAC,sCAAsC,CAAC,CAAC;QACpD,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAClB,CAAC;IAED,sBAAsB;IACtB,MAAM,QAAQ,GAAG,IAAI,QAAQ,EAAE,CAAC;IAChC,MAAM,QAAQ,CAAC,IAAI,CAAC,QAAQ,CAAC,MAAM,CAAC,cAAc,CAAC,CAAC,CAAC;IAErD,MAAM,YAAY,GAAG,MAAM,CAAC,YAAY,CAAC;IACzC,MAAM,iBAAiB,GAAG,EAAE,CAAC;IAE7B,oCAAoC;IACpC,OAAO,CAAC,GAAG,CAAC,aAAa,OAAO,CAAC,MAAM,uCAAuC,CAAC,CAAC;IAChF,MAAM,QAAQ,GAAkB,EAAE,CAAC;IAEnC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACxC,MAAM,CAAC,GAAG,OAAO,CAAC,CAAC,CAAC,CAAC;QACrB,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,KAAK,CAAC,IAAI,CAAC,KAAK,OAAO,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACnD,OAAO,CAAC,GAAG,CAAC,WAAW,CAAC,GAAG,CAAC,IAAI,OAAO,CAAC,MAAM,EAAE,CAAC,CAAC;QACpD,CAAC;QAED,cAAc;QACd,MAAM,WAAW,GAAG,MAAM,QAAQ,CAAC,KAAK,CAAC,CAAC,CAAC,KAAK,EAAE,IAAI,CAAC,CAAC;QACxD,MAAM,cAAc,GAAG,WAAW,CAAC,SAAS,CAAC;QAE7C,aAAa;QACb,MAAM,UAAU,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;QAC9B,MAAM,YAAY,GAAG,MAAM,cAAc,CACvC,cAAc,EACd,CAAC,CAAC,KAAK,EACP,iBAAiB,EACjB,YAAY,CACb,CAAC;QACF,MAAM,aAAa,GAAG,IAAI,CAAC,GAAG,EAAE,GAAG,UAAU,CAAC;QAE9C,aAAa;QACb,MAAM,UAAU,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;QAC9B,MAAM,YAAY,GAAG,MAAM,eAAe,CACxC,cAAc,EACd,CAAC,CAAC,KAAK,EACP,iBAAiB,EACjB,YAAY,CACb,CAAC;QACF,MAAM,aAAa,GAAG,IAAI,CAAC,GAAG,EAAE,GAAG,UAAU,CAAC;QAE9C,4BAA4B;QAC5B,MAAM,SAAS,GAAG,YAAY,CAAC,OAAO,CAAC,CAAC,CAAC,kBAAkB,CAAC,GAAG,CAAC,CAAC,CAAC,gBAAgB;QAClF,MAAM,SAAS,GAAG,YAAY,CAAC,OAAO,CAAC,CAAC,CAAC,kBAAkB,CAAC,GAAG,CAAC,CAAC;QAEjE,QAAQ,CAAC,IAAI,CAAC;YACZ,KAAK,EAAE,CAAC,CAAC,KAAK;YACd,kBAAkB,EAAE,CAAC,CAAC,kBAAkB;YACxC,KAAK,EAAE;gBACL,IAAI,EAAE,SAAS;gBACf,aAAa,EAAE,YAAY,CAAC,MAAM;gBAClC,UAAU,EAAE,aAAa;aAC1B;YACD,KAAK,EAAE;gBACL,IAAI,EAAE,SAAS;gBACf,aAAa,EAAE,YAAY,CAAC,MAAM;gBAClC,UAAU,EAAE,aAAa;aAC1B;SACF,CAAC,CAAC;IACL,CAAC;IAED,MAAM,QAAQ,CAAC,OAAO,EAAE,CAAC;IAEzB,qBAAqB;IACrB,MAAM,YAAY,GAAG,cAAc,CAAC,QAAQ,EAAE,OAAO,CAAC,CAAC;IACvD,MAAM,YAAY,GAAG,cAAc,CAAC,QAAQ,EAAE,OAAO,CAAC,CAAC;IAEvD,qBAAqB;IACrB,OAAO,CAAC,GAAG,CAAC,mBAAmB,CAAC,CAAC;IACjC,OAAO,CAAC,GAAG,CAAC,qDAAqD,CAAC,CAAC;IACnE,OAAO,CAAC,GAAG,CAAC,IAAI,GAAG,GAAG,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC,CAAC;IACnC,OAAO,CAAC,GAAG,CACT,qBAAqB,GAAG,CAAC,YAAY,CAAC,SAAS,GAAG,GAAG,EAAE,CAAC,CAAC,YAAY,GAAG,CAAC,YAAY,CAAC,SAAS,GAAG,GAAG,EAAE,CAAC,CAAC,YAAY,GAAG,CAAC,CAAC,YAAY,CAAC,SAAS,GAAG,YAAY,CAAC,SAAS,CAAC,GAAG,GAAG,EAAE,CAAC,CAAC,GAAG,CACxL,CAAC;IACF,OAAO,CAAC,GAAG,CACT,qBAAqB,GAAG,CAAC,YAAY,CAAC,UAAU,GAAG,GAAG,EAAE,CAAC,CAAC,YAAY,GAAG,CAAC,YAAY,CAAC,UAAU,GAAG,GAAG,EAAE,CAAC,CAAC,YAAY,GAAG,CAAC,CAAC,YAAY,CAAC,UAAU,GAAG,YAAY,CAAC,UAAU,CAAC,GAAG,GAAG,EAAE,CAAC,CAAC,GAAG,CAC5L,CAAC;IACF,OAAO,CAAC,GAAG,CACT,qBAAqB,GAAG,CAAC,YAAY,CAAC,UAAU,GAAG,GAAG,EAAE,CAAC,CAAC,YAAY,GAAG,CAAC,YAAY,CAAC,UAAU,GAAG,GAAG,EAAE,CAAC,CAAC,YAAY,GAAG,CAAC,CAAC,YAAY,CAAC,UAAU,GAAG,YAAY,CAAC,UAAU,CAAC,GAAG,GAAG,EAAE,CAAC,CAAC,GAAG,CAC5L,CAAC;IACF,OAAO,CAAC,GAAG,CACT,qBAAqB,GAAG,CAAC,YAAY,CAAC,GAAG,CAAC,aAAa,GAAG,CAAC,YAAY,CAAC,GAAG,CAAC,aAAa,GAAG,CAAC,YAAY,CAAC,GAAG,GAAG,YAAY,CAAC,GAAG,CAAC,EAAE,CACpI,CAAC;IACF,OAAO,CAAC,GAAG,CACT,qBAAqB,GAAG,CAAC,YAAY,CAAC,OAAO,GAAG,GAAG,EAAE,CAAC,CAAC,YAAY,GAAG,CAAC,YAAY,CAAC,OAAO,GAAG,GAAG,EAAE,CAAC,CAAC,YAAY,GAAG,CAAC,CAAC,YAAY,CAAC,OAAO,GAAG,YAAY,CAAC,OAAO,CAAC,GAAG,GAAG,EAAE,CAAC,CAAC,GAAG,CAChL,CAAC;IACF,OAAO,CAAC,GAAG,CACT,qBAAqB,GAAG,CAAC,YAAY,CAAC,aAAa,EAAE,CAAC,CAAC,cAAc,GAAG,CAAC,YAAY,CAAC,aAAa,EAAE,CAAC,CAAC,cAAc,GAAG,CAAC,YAAY,CAAC,aAAa,GAAG,YAAY,CAAC,aAAa,EAAE,CAAC,CAAC,IAAI,CACzL,CAAC;IACF,OAAO,CAAC,GAAG,CACT,qBAAqB,GAAG,CAAC,YAAY,CAAC,eAAe,EAAE,CAAC,CAAC,cAAc,GAAG,CAAC,YAAY,CAAC,eAAe,EAAE,CAAC,CAAC,cAAc,GAAG,CAAC,YAAY,CAAC,eAAe,GAAG,YAAY,CAAC,eAAe,EAAE,CAAC,CAAC,IAAI,CACjM,CAAC;IAEF,qCAAqC;IACrC,MAAM,SAAS,GAAG,QAAQ,CAAC,MAAM,CAC/B,CAAC,CAAC,EAAE,EAAE,CACJ,CAAC,CAAC,KAAK,CAAC,IAAI,GAAG,CAAC;QAChB,CAAC,CAAC,CAAC,KAAK,CAAC,IAAI,KAAK,CAAC,IAAI,CAAC,CAAC,KAAK,CAAC,IAAI,GAAG,CAAC,CAAC,KAAK,CAAC,IAAI,CAAC,CACtD,CAAC;IACF,MAAM,SAAS,GAAG,QAAQ,CAAC,MAAM,CAC/B,CAAC,CAAC,EAAE,EAAE,CACJ,CAAC,CAAC,KAAK,CAAC,IAAI,GAAG,CAAC;QAChB,CAAC,CAAC,CAAC,KAAK,CAAC,IAAI,KAAK,CAAC,IAAI,CAAC,CAAC,KAAK,CAAC,IAAI,GAAG,CAAC,CAAC,KAAK,CAAC,IAAI,CAAC,CACtD,CAAC;IACF,MAAM,IAAI,GAAG,QAAQ,CAAC,MAAM,CAC1B,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,CAAC,IAAI,GAAG,CAAC,IAAI,CAAC,CAAC,KAAK,CAAC,IAAI,KAAK,CAAC,CAAC,KAAK,CAAC,IAAI,CACzD,CAAC;IAEF,OAAO,CAAC,GAAG,CAAC,mCAAmC,SAAS,CAAC,MAAM,gBAAgB,SAAS,CAAC,MAAM,UAAU,IAAI,CAAC,MAAM,eAAe,QAAQ,CAAC,MAAM,GAAG,SAAS,CAAC,MAAM,GAAG,SAAS,CAAC,MAAM,GAAG,IAAI,CAAC,MAAM,EAAE,CAAC,CAAC;IAE1M,IAAI,SAAS,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACzB,OAAO,CAAC,GAAG,CAAC,2CAA2C,CAAC,CAAC;QACzD,KAAK,MAAM,CAAC,IAAI,SAAS,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,EAAE,CAAC;YACtC,OAAO,CAAC,GAAG,CACT,QAAQ,CAAC,CAAC,KAAK,kBAAkB,CAAC,CAAC,KAAK,CAAC,IAAI,gBAAgB,CAAC,CAAC,KAAK,CAAC,IAAI,IAAI,MAAM,EAAE,CACtF,CAAC;QACJ,CAAC;IACH,CAAC;IACD,IAAI,SAAS,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACzB,OAAO,CAAC,GAAG,CAAC,2CAA2C,CAAC,CAAC;QACzD,KAAK,MAAM,CAAC,IAAI,SAAS,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,EAAE,CAAC;YACtC,OAAO,CAAC,GAAG,CACT,QAAQ,CAAC,CAAC,KAAK,kBAAkB,CAAC,CAAC,KAAK,CAAC,IAAI,gBAAgB,CAAC,CAAC,KAAK,CAAC,IAAI,IAAI,MAAM,EAAE,CACtF,CAAC;QACJ,CAAC;IACH,CAAC;IAED,aAAa;IACb,MAAM,OAAO,GAAa,EAAE,CAAC;IAC7B,OAAO,CAAC,IAAI,CACV,cAAc,OAAO,CAAC,MAAM,oCAAoC,IAAI,GAAG,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,IAAI,WAAW,CACzH,CAAC;IAEF,MAAM,YAAY,GAAG,YAAY,CAAC,SAAS,GAAG,YAAY,CAAC,SAAS,CAAC;IACrE,MAAM,QAAQ,GAAG,YAAY,CAAC,GAAG,GAAG,YAAY,CAAC,GAAG,CAAC;IAErD,IAAI,YAAY,GAAG,IAAI,EAAE,CAAC;QACxB,OAAO,CAAC,IAAI,CACV,uBAAuB,GAAG,CAAC,YAAY,GAAG,GAAG,EAAE,CAAC,CAAC,gBAAgB,GAAG,CAAC,QAAQ,EAAE,CAAC,CAAC,MAAM,CACxF,CAAC;IACJ,CAAC;SAAM,IAAI,YAAY,GAAG,CAAC,IAAI,EAAE,CAAC;QAChC,OAAO,CAAC,IAAI,CACV,sBAAsB,GAAG,CAAC,YAAY,GAAG,GAAG,EAAE,CAAC,CAAC,eAAe,GAAG,CAAC,QAAQ,EAAE,CAAC,CAAC,MAAM,CACtF,CAAC;IACJ,CAAC;SAAM,CAAC;QACN,OAAO,CAAC,IAAI,CACV,qBAAqB,GAAG,CAAC,YAAY,GAAG,GAAG,EAAE,CAAC,CAAC,qBAAqB,GAAG,CAAC,QAAQ,EAAE,CAAC,CAAC,YAAY,CACjG,CAAC;IACJ,CAAC;IAED,OAAO,CAAC,IAAI,CACV,mBAAmB,SAAS,CAAC,MAAM,IAAI,QAAQ,CAAC,MAAM,6BAA6B,SAAS,CAAC,MAAM,IAAI,QAAQ,CAAC,MAAM,EAAE,CACzH,CAAC;IAEF,OAAO,CAAC,GAAG,CAAC,mBAAmB,CAAC,CAAC;IACjC,KAAK,MAAM,IAAI,IAAI,OAAO,EAAE,CAAC;QAC3B,OAAO,CAAC,GAAG,CAAC,OAAO,IAAI,EAAE,CAAC,CAAC;IAC7B,CAAC;IAED,OAAO;QACL,SAAS,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;QACnC,UAAU,EAAE,OAAO,CAAC,MAAM;QAC1B,gBAAgB,EAAE,aAAa,CAAC,MAAM,GAAG,OAAO,CAAC,MAAM;QACvD,YAAY;QACZ,YAAY;QACZ,QAAQ;QACR,OAAO;KACR,CAAC;AACJ,CAAC;AAED,+EAA+E;AAE/E,YAAY,EAAE;KACX,IAAI,CAAC,CAAC,MAAM,EAAE,EAAE;IACf,MAAM,OAAO,GAAG,4BAA4B,CAAC;IAC7C,aAAa,CAAC,OAAO,EAAE,IAAI,CAAC,SAAS,CAAC,MAAM,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC,CAAC;IACxD,OAAO,CAAC,GAAG,CAAC,uBAAuB,OAAO,EAAE,CAAC,CAAC;AAChD,CAAC,CAAC;KACD,KAAK,CAAC,CAAC,GAAG,EAAE,EAAE;IACb,OAAO,CAAC,KAAK,CAAC,mBAAmB,EAAE,GAAG,CAAC,CAAC;IACxC,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;AAClB,CAAC,CAAC,CAAC"}
|
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Types for the index-vs-chunk retrieval benchmark.
|
|
3
|
+
*
|
|
4
|
+
* Compares search quality when using the semantic index layer
|
|
5
|
+
* vs direct chunk search, using LLM-generated natural language queries.
|
|
6
|
+
*/
|
|
7
|
+
/** A single benchmark query with ground truth. */
|
|
8
|
+
export interface BenchmarkQuery {
|
|
9
|
+
/** LLM-generated natural language search query. */
|
|
10
|
+
query: string;
|
|
11
|
+
/** Ground truth chunk ID that should be found. */
|
|
12
|
+
groundTruthChunkId: string;
|
|
13
|
+
/** Session slug for context. */
|
|
14
|
+
sessionSlug: string;
|
|
15
|
+
/** Cluster ID the chunk belongs to. */
|
|
16
|
+
clusterId: string;
|
|
17
|
+
/** Cluster name (for display). */
|
|
18
|
+
clusterName: string | null;
|
|
19
|
+
}
|
|
20
|
+
/** Result for a single query across both search paths. */
|
|
21
|
+
export interface QueryResult {
|
|
22
|
+
query: string;
|
|
23
|
+
groundTruthChunkId: string;
|
|
24
|
+
/** Index-based search path results. */
|
|
25
|
+
index: {
|
|
26
|
+
/** Rank of ground truth chunk (1-indexed, 0 = not found). */
|
|
27
|
+
rank: number;
|
|
28
|
+
/** Total chunks returned. */
|
|
29
|
+
totalReturned: number;
|
|
30
|
+
/** Duration in milliseconds. */
|
|
31
|
+
durationMs: number;
|
|
32
|
+
};
|
|
33
|
+
/** Chunk-based search path results. */
|
|
34
|
+
chunk: {
|
|
35
|
+
rank: number;
|
|
36
|
+
totalReturned: number;
|
|
37
|
+
durationMs: number;
|
|
38
|
+
};
|
|
39
|
+
}
|
|
40
|
+
/** Aggregate metrics for one search path. */
|
|
41
|
+
export interface PathMetrics {
|
|
42
|
+
/** Recall at K = fraction of queries where ground truth was in top K. */
|
|
43
|
+
recallAt5: number;
|
|
44
|
+
recallAt10: number;
|
|
45
|
+
recallAt20: number;
|
|
46
|
+
/** Mean Reciprocal Rank. */
|
|
47
|
+
mrr: number;
|
|
48
|
+
/** Hit rate = fraction of queries where ground truth was found at all. */
|
|
49
|
+
hitRate: number;
|
|
50
|
+
/** Mean latency in ms. */
|
|
51
|
+
meanLatencyMs: number;
|
|
52
|
+
/** Median latency in ms. */
|
|
53
|
+
medianLatencyMs: number;
|
|
54
|
+
}
|
|
55
|
+
/** Full benchmark report. */
|
|
56
|
+
export interface IndexVsChunkReport {
|
|
57
|
+
timestamp: string;
|
|
58
|
+
/** Number of queries used. */
|
|
59
|
+
queryCount: number;
|
|
60
|
+
/** Number of queries that failed to generate. */
|
|
61
|
+
failedQueryCount: number;
|
|
62
|
+
/** Index-based search path metrics. */
|
|
63
|
+
indexMetrics: PathMetrics;
|
|
64
|
+
/** Chunk-based search path metrics. */
|
|
65
|
+
chunkMetrics: PathMetrics;
|
|
66
|
+
/** Per-query results. */
|
|
67
|
+
perQuery: QueryResult[];
|
|
68
|
+
/** Human-readable summary. */
|
|
69
|
+
summary: string[];
|
|
70
|
+
}
|
|
71
|
+
//# sourceMappingURL=types.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../../../../src/eval/experiments/index-vs-chunk/types.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH,kDAAkD;AAClD,MAAM,WAAW,cAAc;IAC7B,mDAAmD;IACnD,KAAK,EAAE,MAAM,CAAC;IACd,kDAAkD;IAClD,kBAAkB,EAAE,MAAM,CAAC;IAC3B,gCAAgC;IAChC,WAAW,EAAE,MAAM,CAAC;IACpB,uCAAuC;IACvC,SAAS,EAAE,MAAM,CAAC;IAClB,kCAAkC;IAClC,WAAW,EAAE,MAAM,GAAG,IAAI,CAAC;CAC5B;AAED,0DAA0D;AAC1D,MAAM,WAAW,WAAW;IAC1B,KAAK,EAAE,MAAM,CAAC;IACd,kBAAkB,EAAE,MAAM,CAAC;IAE3B,uCAAuC;IACvC,KAAK,EAAE;QACL,6DAA6D;QAC7D,IAAI,EAAE,MAAM,CAAC;QACb,6BAA6B;QAC7B,aAAa,EAAE,MAAM,CAAC;QACtB,gCAAgC;QAChC,UAAU,EAAE,MAAM,CAAC;KACpB,CAAC;IAEF,uCAAuC;IACvC,KAAK,EAAE;QACL,IAAI,EAAE,MAAM,CAAC;QACb,aAAa,EAAE,MAAM,CAAC;QACtB,UAAU,EAAE,MAAM,CAAC;KACpB,CAAC;CACH;AAED,6CAA6C;AAC7C,MAAM,WAAW,WAAW;IAC1B,yEAAyE;IACzE,SAAS,EAAE,MAAM,CAAC;IAClB,UAAU,EAAE,MAAM,CAAC;IACnB,UAAU,EAAE,MAAM,CAAC;IACnB,4BAA4B;IAC5B,GAAG,EAAE,MAAM,CAAC;IACZ,0EAA0E;IAC1E,OAAO,EAAE,MAAM,CAAC;IAChB,0BAA0B;IAC1B,aAAa,EAAE,MAAM,CAAC;IACtB,4BAA4B;IAC5B,eAAe,EAAE,MAAM,CAAC;CACzB;AAED,6BAA6B;AAC7B,MAAM,WAAW,kBAAkB;IACjC,SAAS,EAAE,MAAM,CAAC;IAClB,8BAA8B;IAC9B,UAAU,EAAE,MAAM,CAAC;IACnB,iDAAiD;IACjD,gBAAgB,EAAE,MAAM,CAAC;IAEzB,uCAAuC;IACvC,YAAY,EAAE,WAAW,CAAC;IAC1B,uCAAuC;IACvC,YAAY,EAAE,WAAW,CAAC;IAE1B,yBAAyB;IACzB,QAAQ,EAAE,WAAW,EAAE,CAAC;IAExB,8BAA8B;IAC9B,OAAO,EAAE,MAAM,EAAE,CAAC;CACnB"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"types.js","sourceRoot":"","sources":["../../../../src/eval/experiments/index-vs-chunk/types.ts"],"names":[],"mappings":"AAAA;;;;;GAKG"}
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Pipeline Dropout Analysis
|
|
3
|
+
*
|
|
4
|
+
* Traces where target chunks get lost in the retrieval pipeline.
|
|
5
|
+
* For each query, checks whether the ground-truth chunk survives each stage:
|
|
6
|
+
* 1. Vector search (raw top-K)
|
|
7
|
+
* 2. RRF fusion (vector + keyword)
|
|
8
|
+
* 3. Cluster expansion
|
|
9
|
+
* 4. Oversized filtering
|
|
10
|
+
* 5. MMR reranking
|
|
11
|
+
* 6. Budget assembly
|
|
12
|
+
* 7. Chain walking (recall path)
|
|
13
|
+
*
|
|
14
|
+
* Usage:
|
|
15
|
+
* npx tsx src/eval/experiments/pipeline-dropout/run-experiment.ts [--sample-size=50]
|
|
16
|
+
*/
|
|
17
|
+
export {};
|
|
18
|
+
//# sourceMappingURL=run-experiment.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"run-experiment.d.ts","sourceRoot":"","sources":["../../../../src/eval/experiments/pipeline-dropout/run-experiment.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;GAeG"}
|