@gmickel/gno 0.3.5 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +74 -7
- package/package.json +30 -1
- package/src/cli/commands/ask.ts +12 -187
- package/src/cli/commands/embed.ts +10 -4
- package/src/cli/commands/models/pull.ts +9 -4
- package/src/cli/commands/serve.ts +19 -0
- package/src/cli/commands/vsearch.ts +5 -2
- package/src/cli/program.ts +28 -0
- package/src/config/types.ts +11 -6
- package/src/llm/registry.ts +3 -1
- package/src/mcp/tools/vsearch.ts +5 -2
- package/src/pipeline/answer.ts +224 -0
- package/src/pipeline/contextual.ts +57 -0
- package/src/pipeline/expansion.ts +49 -31
- package/src/pipeline/explain.ts +11 -3
- package/src/pipeline/fusion.ts +20 -9
- package/src/pipeline/hybrid.ts +57 -40
- package/src/pipeline/index.ts +7 -0
- package/src/pipeline/rerank.ts +55 -27
- package/src/pipeline/types.ts +0 -3
- package/src/pipeline/vsearch.ts +3 -2
- package/src/serve/CLAUDE.md +91 -0
- package/src/serve/bunfig.toml +2 -0
- package/src/serve/context.ts +181 -0
- package/src/serve/index.ts +7 -0
- package/src/serve/public/app.tsx +56 -0
- package/src/serve/public/components/ai-elements/code-block.tsx +176 -0
- package/src/serve/public/components/ai-elements/conversation.tsx +98 -0
- package/src/serve/public/components/ai-elements/inline-citation.tsx +285 -0
- package/src/serve/public/components/ai-elements/loader.tsx +96 -0
- package/src/serve/public/components/ai-elements/message.tsx +443 -0
- package/src/serve/public/components/ai-elements/prompt-input.tsx +1421 -0
- package/src/serve/public/components/ai-elements/sources.tsx +75 -0
- package/src/serve/public/components/ai-elements/suggestion.tsx +51 -0
- package/src/serve/public/components/preset-selector.tsx +403 -0
- package/src/serve/public/components/ui/badge.tsx +46 -0
- package/src/serve/public/components/ui/button-group.tsx +82 -0
- package/src/serve/public/components/ui/button.tsx +62 -0
- package/src/serve/public/components/ui/card.tsx +92 -0
- package/src/serve/public/components/ui/carousel.tsx +244 -0
- package/src/serve/public/components/ui/collapsible.tsx +31 -0
- package/src/serve/public/components/ui/command.tsx +181 -0
- package/src/serve/public/components/ui/dialog.tsx +141 -0
- package/src/serve/public/components/ui/dropdown-menu.tsx +255 -0
- package/src/serve/public/components/ui/hover-card.tsx +42 -0
- package/src/serve/public/components/ui/input-group.tsx +167 -0
- package/src/serve/public/components/ui/input.tsx +21 -0
- package/src/serve/public/components/ui/progress.tsx +28 -0
- package/src/serve/public/components/ui/scroll-area.tsx +56 -0
- package/src/serve/public/components/ui/select.tsx +188 -0
- package/src/serve/public/components/ui/separator.tsx +26 -0
- package/src/serve/public/components/ui/table.tsx +114 -0
- package/src/serve/public/components/ui/textarea.tsx +18 -0
- package/src/serve/public/components/ui/tooltip.tsx +59 -0
- package/src/serve/public/globals.css +226 -0
- package/src/serve/public/hooks/use-api.ts +112 -0
- package/src/serve/public/index.html +13 -0
- package/src/serve/public/pages/Ask.tsx +442 -0
- package/src/serve/public/pages/Browse.tsx +270 -0
- package/src/serve/public/pages/Dashboard.tsx +202 -0
- package/src/serve/public/pages/DocView.tsx +302 -0
- package/src/serve/public/pages/Search.tsx +335 -0
- package/src/serve/routes/api.ts +763 -0
- package/src/serve/server.ts +249 -0
- package/src/store/migrations/002-documents-fts.ts +40 -0
- package/src/store/migrations/index.ts +2 -1
- package/src/store/sqlite/adapter.ts +216 -33
- package/src/store/sqlite/fts5-snowball.ts +144 -0
- package/src/store/types.ts +33 -3
- package/src/store/vector/stats.ts +3 -0
- package/src/store/vector/types.ts +1 -0
|
@@ -0,0 +1,224 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Grounded answer generation.
|
|
3
|
+
* Shared between CLI ask command and web API.
|
|
4
|
+
*
|
|
5
|
+
* @module src/pipeline/answer
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
import type { GenerationPort } from '../llm/types';
|
|
9
|
+
import type { StorePort } from '../store/types';
|
|
10
|
+
import type { Citation, SearchResult } from './types';
|
|
11
|
+
|
|
12
|
+
// ─────────────────────────────────────────────────────────────────────────────
|
|
13
|
+
// Constants
|
|
14
|
+
// ─────────────────────────────────────────────────────────────────────────────
|
|
15
|
+
|
|
16
|
+
const ANSWER_PROMPT = `You are answering a question using ONLY the provided context blocks.
|
|
17
|
+
|
|
18
|
+
Rules you MUST follow:
|
|
19
|
+
1) Use ONLY facts stated in the context blocks. Do NOT use outside knowledge.
|
|
20
|
+
2) Every factual statement must include an inline citation like [1] or [2] referring to a context block.
|
|
21
|
+
3) If the context does not contain enough information to answer, reply EXACTLY:
|
|
22
|
+
"I don't have enough information in the provided sources to answer this question."
|
|
23
|
+
4) Do not cite sources you did not use. Do not invent citation numbers.
|
|
24
|
+
|
|
25
|
+
Question: {query}
|
|
26
|
+
|
|
27
|
+
Context blocks:
|
|
28
|
+
{context}
|
|
29
|
+
|
|
30
|
+
Write a concise answer (1-3 paragraphs).`;
|
|
31
|
+
|
|
32
|
+
/** Abstention message when LLM cannot ground answer */
|
|
33
|
+
export const ABSTENTION_MESSAGE =
|
|
34
|
+
"I don't have enough information in the provided sources to answer this question.";
|
|
35
|
+
|
|
36
|
+
/** Max characters per document (~8K tokens) */
|
|
37
|
+
const MAX_DOC_CHARS = 32_000;
|
|
38
|
+
|
|
39
|
+
/** Max number of sources - fewer docs but full content */
|
|
40
|
+
const MAX_CONTEXT_SOURCES = 3;
|
|
41
|
+
|
|
42
|
+
/** Fallback snippet limit when full content unavailable */
|
|
43
|
+
const MAX_SNIPPET_CHARS = 1500;
|
|
44
|
+
|
|
45
|
+
// ─────────────────────────────────────────────────────────────────────────────
|
|
46
|
+
// Citation Processing
|
|
47
|
+
// ─────────────────────────────────────────────────────────────────────────────
|
|
48
|
+
|
|
49
|
+
/**
|
|
50
|
+
* Extract VALID citation numbers from answer text.
|
|
51
|
+
* Only returns numbers in range [1, maxCitation].
|
|
52
|
+
*/
|
|
53
|
+
export function extractValidCitationNumbers(
|
|
54
|
+
answer: string,
|
|
55
|
+
maxCitation: number
|
|
56
|
+
): number[] {
|
|
57
|
+
const nums = new Set<number>();
|
|
58
|
+
const re = /\[(\d+)\]/g;
|
|
59
|
+
const matches = answer.matchAll(re);
|
|
60
|
+
for (const match of matches) {
|
|
61
|
+
const n = Number(match[1]);
|
|
62
|
+
if (Number.isInteger(n) && n >= 1 && n <= maxCitation) {
|
|
63
|
+
nums.add(n);
|
|
64
|
+
}
|
|
65
|
+
}
|
|
66
|
+
return [...nums].sort((a, b) => a - b);
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
/**
|
|
70
|
+
* Filter citations to only those actually referenced in the answer.
|
|
71
|
+
*/
|
|
72
|
+
export function filterCitationsByUse(
|
|
73
|
+
citations: Citation[],
|
|
74
|
+
validUsedNumbers: number[]
|
|
75
|
+
): Citation[] {
|
|
76
|
+
const usedSet = new Set(validUsedNumbers);
|
|
77
|
+
return citations.filter((_, idx) => usedSet.has(idx + 1));
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
/**
|
|
81
|
+
* Renumber citations in answer text to match filtered citations.
|
|
82
|
+
* E.g., if answer uses [2] and [5], renumber to [1] and [2].
|
|
83
|
+
* Invalid citations (not in validUsedNumbers) are removed.
|
|
84
|
+
*/
|
|
85
|
+
export function renumberAnswerCitations(
|
|
86
|
+
answer: string,
|
|
87
|
+
validUsedNumbers: number[]
|
|
88
|
+
): string {
|
|
89
|
+
const mapping = new Map<number, number>();
|
|
90
|
+
for (let i = 0; i < validUsedNumbers.length; i++) {
|
|
91
|
+
const oldNum = validUsedNumbers[i];
|
|
92
|
+
if (oldNum !== undefined) {
|
|
93
|
+
mapping.set(oldNum, i + 1);
|
|
94
|
+
}
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
const re = /\[(\d+)\]/g;
|
|
98
|
+
const replaced = answer.replace(re, (_match, numStr: string) => {
|
|
99
|
+
const oldNum = Number(numStr);
|
|
100
|
+
const newNum = mapping.get(oldNum);
|
|
101
|
+
return newNum !== undefined ? `[${newNum}]` : '';
|
|
102
|
+
});
|
|
103
|
+
|
|
104
|
+
return replaced.replace(/ {2,}/g, ' ').trim();
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
// ─────────────────────────────────────────────────────────────────────────────
|
|
108
|
+
// Answer Generation
|
|
109
|
+
// ─────────────────────────────────────────────────────────────────────────────
|
|
110
|
+
|
|
111
|
+
export interface AnswerGenerationResult {
|
|
112
|
+
answer: string;
|
|
113
|
+
citations: Citation[];
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
export interface AnswerGenerationDeps {
|
|
117
|
+
genPort: GenerationPort;
|
|
118
|
+
store: StorePort | null;
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
/**
|
|
122
|
+
* Generate a grounded answer from search results.
|
|
123
|
+
* Returns null if no valid context or generation fails.
|
|
124
|
+
*
|
|
125
|
+
* When store is provided, fetches full document content for better context.
|
|
126
|
+
* Falls back to snippets if store unavailable or content fetch fails.
|
|
127
|
+
*/
|
|
128
|
+
// biome-ignore lint/complexity/noExcessiveCognitiveComplexity: sequential content processing with fallbacks
|
|
129
|
+
export async function generateGroundedAnswer(
|
|
130
|
+
deps: AnswerGenerationDeps,
|
|
131
|
+
query: string,
|
|
132
|
+
results: SearchResult[],
|
|
133
|
+
maxTokens: number
|
|
134
|
+
): Promise<AnswerGenerationResult | null> {
|
|
135
|
+
const { genPort, store } = deps;
|
|
136
|
+
const contextParts: string[] = [];
|
|
137
|
+
const citations: Citation[] = [];
|
|
138
|
+
let citationIndex = 0;
|
|
139
|
+
|
|
140
|
+
for (const r of results.slice(0, MAX_CONTEXT_SOURCES)) {
|
|
141
|
+
let content: string | null = null;
|
|
142
|
+
let usedFullContent = false;
|
|
143
|
+
|
|
144
|
+
// Try to fetch full document content if store available
|
|
145
|
+
if (store && r.conversion?.mirrorHash) {
|
|
146
|
+
const contentResult = await store.getContent(r.conversion.mirrorHash);
|
|
147
|
+
if (contentResult.ok && contentResult.value) {
|
|
148
|
+
content = contentResult.value;
|
|
149
|
+
usedFullContent = true;
|
|
150
|
+
// Truncate to max doc chars
|
|
151
|
+
if (content.length > MAX_DOC_CHARS) {
|
|
152
|
+
content = `${content.slice(0, MAX_DOC_CHARS)}\n\n[... truncated ...]`;
|
|
153
|
+
}
|
|
154
|
+
}
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
// Fallback to snippet if full content unavailable
|
|
158
|
+
if (!content) {
|
|
159
|
+
if (!r.snippet || r.snippet.trim().length === 0) {
|
|
160
|
+
continue;
|
|
161
|
+
}
|
|
162
|
+
content =
|
|
163
|
+
r.snippet.length > MAX_SNIPPET_CHARS
|
|
164
|
+
? `${r.snippet.slice(0, MAX_SNIPPET_CHARS)}...`
|
|
165
|
+
: r.snippet;
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
citationIndex += 1;
|
|
169
|
+
contextParts.push(`[${citationIndex}] ${content}`);
|
|
170
|
+
// Clear line range when citing full content (not a specific snippet)
|
|
171
|
+
citations.push({
|
|
172
|
+
docid: r.docid,
|
|
173
|
+
uri: r.uri,
|
|
174
|
+
startLine: usedFullContent ? undefined : r.snippetRange?.startLine,
|
|
175
|
+
endLine: usedFullContent ? undefined : r.snippetRange?.endLine,
|
|
176
|
+
});
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
if (contextParts.length === 0) {
|
|
180
|
+
return null;
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
const prompt = ANSWER_PROMPT.replace('{query}', query).replace(
|
|
184
|
+
'{context}',
|
|
185
|
+
contextParts.join('\n\n')
|
|
186
|
+
);
|
|
187
|
+
|
|
188
|
+
const result = await genPort.generate(prompt, {
|
|
189
|
+
temperature: 0,
|
|
190
|
+
maxTokens,
|
|
191
|
+
});
|
|
192
|
+
|
|
193
|
+
if (!result.ok) {
|
|
194
|
+
return null;
|
|
195
|
+
}
|
|
196
|
+
|
|
197
|
+
return { answer: result.value, citations };
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
/**
|
|
201
|
+
* Process raw answer result into final answer with cleaned citations.
|
|
202
|
+
* Extracts valid citations, filters unused ones, and renumbers.
|
|
203
|
+
*/
|
|
204
|
+
export function processAnswerResult(rawResult: AnswerGenerationResult): {
|
|
205
|
+
answer: string;
|
|
206
|
+
citations: Citation[];
|
|
207
|
+
} {
|
|
208
|
+
const maxCitation = rawResult.citations.length;
|
|
209
|
+
const validUsedNums = extractValidCitationNumbers(
|
|
210
|
+
rawResult.answer,
|
|
211
|
+
maxCitation
|
|
212
|
+
);
|
|
213
|
+
const filteredCitations = filterCitationsByUse(
|
|
214
|
+
rawResult.citations,
|
|
215
|
+
validUsedNums
|
|
216
|
+
);
|
|
217
|
+
|
|
218
|
+
if (validUsedNums.length === 0 || filteredCitations.length === 0) {
|
|
219
|
+
return { answer: ABSTENTION_MESSAGE, citations: [] };
|
|
220
|
+
}
|
|
221
|
+
|
|
222
|
+
const answer = renumberAnswerCitations(rawResult.answer, validUsedNums);
|
|
223
|
+
return { answer, citations: filteredCitations };
|
|
224
|
+
}
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Contextual embedding formatting.
|
|
3
|
+
* Prepends document context to chunks for better retrieval.
|
|
4
|
+
*
|
|
5
|
+
* Based on Anthropic Contextual Retrieval research:
|
|
6
|
+
* - Query relevance jumps from 0.1 to 0.92 for context-dependent queries
|
|
7
|
+
* - 49% reduction in retrieval failure with contextual embeddings + BM25
|
|
8
|
+
* - 67% reduction with reranking added
|
|
9
|
+
*
|
|
10
|
+
* @module src/pipeline/contextual
|
|
11
|
+
*/
|
|
12
|
+
|
|
13
|
+
// Top-level regex for performance
|
|
14
|
+
const HEADING_REGEX = /^##?\s+(.+)$/m;
|
|
15
|
+
const SUBHEADING_REGEX = /^##\s+(.+)$/m;
|
|
16
|
+
const EXT_REGEX = /\.\w+$/;
|
|
17
|
+
|
|
18
|
+
/**
|
|
19
|
+
* Format document text for embedding.
|
|
20
|
+
* Prepends title for contextual retrieval.
|
|
21
|
+
*/
|
|
22
|
+
export function formatDocForEmbedding(text: string, title?: string): string {
|
|
23
|
+
const safeTitle = title?.trim() || 'none';
|
|
24
|
+
return `title: ${safeTitle} | text: ${text}`;
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
/**
|
|
28
|
+
* Format query for embedding.
|
|
29
|
+
* Uses task-prefixed format for asymmetric retrieval.
|
|
30
|
+
*/
|
|
31
|
+
export function formatQueryForEmbedding(query: string): string {
|
|
32
|
+
return `task: search result | query: ${query}`;
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
/**
|
|
36
|
+
* Extract title from markdown content or filename.
|
|
37
|
+
* Prefers first heading, falls back to filename without extension.
|
|
38
|
+
*/
|
|
39
|
+
export function extractTitle(content: string, filename: string): string {
|
|
40
|
+
// Try to find first heading (# or ##)
|
|
41
|
+
const match = content.match(HEADING_REGEX);
|
|
42
|
+
if (match?.[1]) {
|
|
43
|
+
const title = match[1].trim();
|
|
44
|
+
// Skip generic titles like "Notes" and try next heading
|
|
45
|
+
if (title.toLowerCase() === 'notes') {
|
|
46
|
+
const nextMatch = content.match(SUBHEADING_REGEX);
|
|
47
|
+
if (nextMatch?.[1]) {
|
|
48
|
+
return nextMatch[1].trim();
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
return title;
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
// Fall back to filename without extension
|
|
55
|
+
const basename = filename.split('/').pop() ?? filename;
|
|
56
|
+
return basename.replace(EXT_REGEX, '');
|
|
57
|
+
}
|
|
@@ -15,9 +15,10 @@ import type { ExpansionResult } from './types';
|
|
|
15
15
|
// Constants
|
|
16
16
|
// ─────────────────────────────────────────────────────────────────────────────
|
|
17
17
|
|
|
18
|
-
const EXPANSION_PROMPT_VERSION = '
|
|
18
|
+
const EXPANSION_PROMPT_VERSION = 'v2';
|
|
19
19
|
const DEFAULT_TIMEOUT_MS = 5000;
|
|
20
|
-
|
|
20
|
+
// Non-greedy to avoid matching from first { to last } across multiple objects
|
|
21
|
+
const JSON_EXTRACT_PATTERN = /\{[\s\S]*?\}/;
|
|
21
22
|
|
|
22
23
|
// ─────────────────────────────────────────────────────────────────────────────
|
|
23
24
|
// Cache Key Generation
|
|
@@ -40,45 +41,53 @@ export function generateCacheKey(
|
|
|
40
41
|
// Prompt Templates
|
|
41
42
|
// ─────────────────────────────────────────────────────────────────────────────
|
|
42
43
|
|
|
43
|
-
const EXPANSION_PROMPT_EN = `You
|
|
44
|
+
const EXPANSION_PROMPT_EN = `You expand search queries for a hybrid search system.
|
|
44
45
|
|
|
45
|
-
|
|
46
|
+
Query: "{query}"
|
|
46
47
|
|
|
47
|
-
Generate
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
48
|
+
Generate JSON with:
|
|
49
|
+
1. "lexicalQueries": 2-3 keyword variations using synonyms (for BM25)
|
|
50
|
+
2. "vectorQueries": 2-3 semantic rephrasings capturing intent (for embeddings)
|
|
51
|
+
3. "hyde": A 50-100 word passage that directly answers the query, as if excerpted from a relevant document
|
|
51
52
|
|
|
52
|
-
|
|
53
|
+
Rules:
|
|
54
|
+
- Keep proper nouns exactly as written
|
|
55
|
+
- Be concise - each variation 3-8 words
|
|
56
|
+
- HyDE should read like actual documentation, not a question
|
|
53
57
|
|
|
54
|
-
|
|
55
|
-
{
|
|
56
|
-
"lexicalQueries": ["deployment process", "how to deploy", "deploying application"],
|
|
57
|
-
"vectorQueries": ["steps to release software to production", "guide for application deployment"],
|
|
58
|
-
"hyde": "To deploy the application, first run the build command, then push to the staging environment..."
|
|
59
|
-
}`;
|
|
58
|
+
Respond with valid JSON only.`;
|
|
60
59
|
|
|
61
|
-
const EXPANSION_PROMPT_DE = `Du
|
|
60
|
+
const EXPANSION_PROMPT_DE = `Du erweiterst Suchanfragen für ein hybrides Suchsystem.
|
|
62
61
|
|
|
63
|
-
|
|
62
|
+
Anfrage: "{query}"
|
|
64
63
|
|
|
65
|
-
Generiere
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
64
|
+
Generiere JSON mit:
|
|
65
|
+
1. "lexicalQueries": 2-3 Keyword-Variationen mit Synonymen (für BM25)
|
|
66
|
+
2. "vectorQueries": 2-3 semantische Umformulierungen (für Embeddings)
|
|
67
|
+
3. "hyde": Ein 50-100 Wort Abschnitt, der die Anfrage direkt beantwortet, wie aus einem relevanten Dokument
|
|
69
68
|
|
|
70
|
-
|
|
69
|
+
Regeln:
|
|
70
|
+
- Eigennamen exakt beibehalten
|
|
71
|
+
- Kurz halten - jede Variation 3-8 Wörter
|
|
72
|
+
- HyDE soll wie echte Dokumentation klingen, nicht wie eine Frage
|
|
71
73
|
|
|
72
|
-
|
|
74
|
+
Antworte nur mit validem JSON.`;
|
|
73
75
|
|
|
74
|
-
|
|
76
|
+
const EXPANSION_PROMPT_MULTILINGUAL = `You expand search queries for a hybrid search system. Respond in the same language as the query.
|
|
75
77
|
|
|
76
|
-
|
|
77
|
-
- "lexicalQueries": array of 2-3 keyword-based variations
|
|
78
|
-
- "vectorQueries": array of 2-3 semantic rephrasing
|
|
79
|
-
- "hyde": a short hypothetical document passage (optional)
|
|
78
|
+
Query: "{query}"
|
|
80
79
|
|
|
81
|
-
|
|
80
|
+
Generate JSON with:
|
|
81
|
+
1. "lexicalQueries": 2-3 keyword variations using synonyms (for BM25)
|
|
82
|
+
2. "vectorQueries": 2-3 semantic rephrasings capturing intent (for embeddings)
|
|
83
|
+
3. "hyde": A 50-100 word passage that directly answers the query, as if excerpted from a relevant document
|
|
84
|
+
|
|
85
|
+
Rules:
|
|
86
|
+
- Keep proper nouns exactly as written
|
|
87
|
+
- Be concise - each variation 3-8 words
|
|
88
|
+
- HyDE should read like actual documentation, not a question
|
|
89
|
+
|
|
90
|
+
Respond with valid JSON only.`;
|
|
82
91
|
|
|
83
92
|
/**
|
|
84
93
|
* Get prompt template for language.
|
|
@@ -178,9 +187,10 @@ export async function expandQuery(
|
|
|
178
187
|
const template = getPromptTemplate(options.lang);
|
|
179
188
|
const prompt = template.replace('{query}', query);
|
|
180
189
|
|
|
181
|
-
// Run with timeout
|
|
190
|
+
// Run with timeout (clear timer to avoid resource leak)
|
|
191
|
+
let timeoutId: ReturnType<typeof setTimeout> | undefined;
|
|
182
192
|
const timeoutPromise = new Promise<null>((resolve) => {
|
|
183
|
-
setTimeout(() => resolve(null), timeout);
|
|
193
|
+
timeoutId = setTimeout(() => resolve(null), timeout);
|
|
184
194
|
});
|
|
185
195
|
|
|
186
196
|
try {
|
|
@@ -193,6 +203,11 @@ export async function expandQuery(
|
|
|
193
203
|
timeoutPromise,
|
|
194
204
|
]);
|
|
195
205
|
|
|
206
|
+
// Clear timeout if generation completed first
|
|
207
|
+
if (timeoutId) {
|
|
208
|
+
clearTimeout(timeoutId);
|
|
209
|
+
}
|
|
210
|
+
|
|
196
211
|
// Timeout
|
|
197
212
|
if (result === null) {
|
|
198
213
|
return ok(null);
|
|
@@ -207,6 +222,9 @@ export async function expandQuery(
|
|
|
207
222
|
const parsed = parseExpansionResult(result.value);
|
|
208
223
|
return ok(parsed);
|
|
209
224
|
} catch {
|
|
225
|
+
if (timeoutId) {
|
|
226
|
+
clearTimeout(timeoutId);
|
|
227
|
+
}
|
|
210
228
|
return ok(null); // Graceful degradation
|
|
211
229
|
}
|
|
212
230
|
}
|
package/src/pipeline/explain.ts
CHANGED
|
@@ -49,15 +49,23 @@ export function formatResultExplain(results: ExplainResult[]): string {
|
|
|
49
49
|
// Explain Line Builders
|
|
50
50
|
// ─────────────────────────────────────────────────────────────────────────────
|
|
51
51
|
|
|
52
|
+
export type ExpansionStatus =
|
|
53
|
+
| 'disabled' // User chose --no-expand
|
|
54
|
+
| 'skipped_strong' // Strong BM25 signal detected
|
|
55
|
+
| 'attempted'; // Expansion was attempted (may have succeeded or timed out)
|
|
56
|
+
|
|
52
57
|
export function explainExpansion(
|
|
53
|
-
|
|
58
|
+
status: ExpansionStatus,
|
|
54
59
|
result: ExpansionResult | null
|
|
55
60
|
): ExplainLine {
|
|
56
|
-
if (
|
|
61
|
+
if (status === 'disabled') {
|
|
57
62
|
return { stage: 'expansion', message: 'disabled' };
|
|
58
63
|
}
|
|
64
|
+
if (status === 'skipped_strong') {
|
|
65
|
+
return { stage: 'expansion', message: 'skipped (strong BM25)' };
|
|
66
|
+
}
|
|
59
67
|
if (!result) {
|
|
60
|
-
return { stage: 'expansion', message: 'skipped (
|
|
68
|
+
return { stage: 'expansion', message: 'skipped (timeout)' };
|
|
61
69
|
}
|
|
62
70
|
const lex = result.lexicalQueries.length;
|
|
63
71
|
const sem = result.vectorQueries.length;
|
package/src/pipeline/fusion.ts
CHANGED
|
@@ -64,9 +64,12 @@ export function rrfFuse(
|
|
|
64
64
|
);
|
|
65
65
|
|
|
66
66
|
// Process BM25 sources
|
|
67
|
+
// Original query gets 2x weight to prevent dilution by expansion variants
|
|
67
68
|
for (const input of bm25Inputs) {
|
|
68
69
|
const weight =
|
|
69
|
-
input.source === 'bm25'
|
|
70
|
+
input.source === 'bm25'
|
|
71
|
+
? config.bm25Weight * 2.0
|
|
72
|
+
: config.bm25Weight * 0.5;
|
|
70
73
|
|
|
71
74
|
for (const result of input.results) {
|
|
72
75
|
const key = `${result.mirrorHash}:${result.seq}`;
|
|
@@ -98,8 +101,9 @@ export function rrfFuse(
|
|
|
98
101
|
}
|
|
99
102
|
|
|
100
103
|
// Process vector sources
|
|
104
|
+
// Original query gets 2x weight to prevent dilution by expansion variants
|
|
101
105
|
for (const input of vectorInputs) {
|
|
102
|
-
let weight = config.vecWeight;
|
|
106
|
+
let weight = config.vecWeight * 2.0; // Default for original vector
|
|
103
107
|
if (input.source === 'vector_variant') {
|
|
104
108
|
weight = config.vecWeight * 0.5;
|
|
105
109
|
} else if (input.source === 'hyde') {
|
|
@@ -135,16 +139,23 @@ export function rrfFuse(
|
|
|
135
139
|
}
|
|
136
140
|
}
|
|
137
141
|
|
|
138
|
-
// Apply top-rank bonus
|
|
142
|
+
// Apply tiered top-rank bonus
|
|
143
|
+
// Rewards documents ranking highly in ANY list (not requiring both)
|
|
139
144
|
for (const candidate of candidates.values()) {
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
) {
|
|
145
|
+
const bm25Rank = candidate.bm25Rank;
|
|
146
|
+
const vecRank = candidate.vecRank;
|
|
147
|
+
|
|
148
|
+
// Tier 1: #1 in any list
|
|
149
|
+
if (bm25Rank === 1 || vecRank === 1) {
|
|
146
150
|
candidate.fusionScore += config.topRankBonus;
|
|
147
151
|
}
|
|
152
|
+
// Tier 2: Top-3 in any list (but not #1)
|
|
153
|
+
else if (
|
|
154
|
+
(bm25Rank !== null && bm25Rank <= config.topRankThreshold) ||
|
|
155
|
+
(vecRank !== null && vecRank <= config.topRankThreshold)
|
|
156
|
+
) {
|
|
157
|
+
candidate.fusionScore += config.topRankBonus * 0.4; // 40% of tier 1
|
|
158
|
+
}
|
|
148
159
|
}
|
|
149
160
|
|
|
150
161
|
// Sort by fusion score (descending), then by mirrorHash:seq for determinism
|