@gmickel/gno 0.4.0 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +10 -6
- package/package.json +1 -1
- package/src/cli/commands/ask.ts +1 -1
- package/src/cli/commands/embed.ts +10 -4
- package/src/cli/commands/vsearch.ts +5 -2
- package/src/config/types.ts +11 -6
- package/src/mcp/tools/vsearch.ts +5 -2
- package/src/pipeline/answer.ts +47 -14
- package/src/pipeline/contextual.ts +57 -0
- package/src/pipeline/expansion.ts +49 -31
- package/src/pipeline/explain.ts +11 -3
- package/src/pipeline/fusion.ts +20 -9
- package/src/pipeline/hybrid.ts +57 -40
- package/src/pipeline/index.ts +7 -0
- package/src/pipeline/rerank.ts +55 -27
- package/src/pipeline/types.ts +0 -3
- package/src/pipeline/vsearch.ts +3 -2
- package/src/serve/routes/api.ts +1 -1
- package/src/store/migrations/002-documents-fts.ts +40 -0
- package/src/store/migrations/index.ts +2 -1
- package/src/store/sqlite/adapter.ts +169 -33
- package/src/store/sqlite/fts5-snowball.ts +144 -0
- package/src/store/types.ts +23 -3
- package/src/store/vector/stats.ts +3 -0
- package/src/store/vector/types.ts +1 -0
package/README.md
CHANGED
|
@@ -99,11 +99,14 @@ gno skill install --target all # Both Claude + Codex
|
|
|
99
99
|
|
|
100
100
|
| Command | Mode | Best For |
|
|
101
101
|
|:--------|:-----|:---------|
|
|
102
|
-
| `gno search` | BM25 | Exact phrases, code identifiers |
|
|
103
|
-
| `gno vsearch` | Vector | Natural language, concepts |
|
|
102
|
+
| `gno search` | Document-level BM25 | Exact phrases, code identifiers |
|
|
103
|
+
| `gno vsearch` | Contextual Vector | Natural language, concepts |
|
|
104
104
|
| `gno query` | Hybrid | Best accuracy (BM25 + vector + reranking) |
|
|
105
105
|
| `gno ask --answer` | RAG | Direct answers with citations |
|
|
106
106
|
|
|
107
|
+
**BM25** indexes full documents (not chunks) with Snowball stemming—"running" matches "run".
|
|
108
|
+
**Vector** embeds chunks with document titles for context awareness.
|
|
109
|
+
|
|
107
110
|
```bash
|
|
108
111
|
gno search "handleAuth" # Find exact matches
|
|
109
112
|
gno vsearch "error handling patterns" # Semantic similarity
|
|
@@ -230,10 +233,11 @@ graph TD
|
|
|
230
233
|
M --> N[Final Results]
|
|
231
234
|
```
|
|
232
235
|
|
|
236
|
+
0. **Strong Signal Check** — Skip expansion if BM25 has confident match (saves 1-3s)
|
|
233
237
|
1. **Query Expansion** — LLM generates lexical variants, semantic rephrases, and a [HyDE](https://arxiv.org/abs/2212.10496) passage
|
|
234
|
-
2. **Parallel Retrieval** — BM25 + vector search
|
|
235
|
-
3. **Fusion** —
|
|
236
|
-
4. **Reranking** —
|
|
238
|
+
2. **Parallel Retrieval** — Document-level BM25 + chunk-level vector search on all variants
|
|
239
|
+
3. **Fusion** — RRF with 2× weight for original query, tiered bonus for top ranks
|
|
240
|
+
4. **Reranking** — Qwen3-Reranker scores full documents (32K context), blended with fusion
|
|
237
241
|
|
|
238
242
|
> **Deep dive**: [How Search Works](https://gno.sh/docs/HOW-SEARCH-WORKS/)
|
|
239
243
|
|
|
@@ -263,7 +267,7 @@ Models auto-download on first use to `~/.cache/gno/models/`.
|
|
|
263
267
|
| Model | Purpose | Size |
|
|
264
268
|
|:------|:--------|:-----|
|
|
265
269
|
| bge-m3 | Embeddings (1024-dim, multilingual) | ~500MB |
|
|
266
|
-
|
|
|
270
|
+
| Qwen3-Reranker-0.6B | Cross-encoder reranking (32K context) | ~700MB |
|
|
267
271
|
| Qwen/SmolLM | Query expansion + AI answers | ~600MB-1.2GB |
|
|
268
272
|
|
|
269
273
|
### Model Presets
|
package/package.json
CHANGED
package/src/cli/commands/ask.ts
CHANGED
|
@@ -11,6 +11,7 @@ import { getConfigPaths, isInitialized, loadConfig } from '../../config';
|
|
|
11
11
|
import { LlmAdapter } from '../../llm/nodeLlamaCpp/adapter';
|
|
12
12
|
import { getActivePreset } from '../../llm/registry';
|
|
13
13
|
import type { EmbeddingPort } from '../../llm/types';
|
|
14
|
+
import { formatDocForEmbedding } from '../../pipeline/contextual';
|
|
14
15
|
import { SqliteAdapter } from '../../store/sqlite/adapter';
|
|
15
16
|
import type { StoreResult } from '../../store/types';
|
|
16
17
|
import { err, ok } from '../../store/types';
|
|
@@ -131,9 +132,9 @@ async function processBatches(ctx: BatchContext): Promise<BatchResult> {
|
|
|
131
132
|
cursor = { mirrorHash: lastItem.mirrorHash, seq: lastItem.seq };
|
|
132
133
|
}
|
|
133
134
|
|
|
134
|
-
// Embed batch
|
|
135
|
+
// Embed batch with contextual formatting (title prefix)
|
|
135
136
|
const batchEmbedResult = await ctx.embedPort.embedBatch(
|
|
136
|
-
batch.map((b) => b.text)
|
|
137
|
+
batch.map((b) => formatDocForEmbedding(b.text, b.title ?? undefined))
|
|
137
138
|
);
|
|
138
139
|
if (!batchEmbedResult.ok) {
|
|
139
140
|
errors += batch.length;
|
|
@@ -365,9 +366,12 @@ function getActiveChunks(
|
|
|
365
366
|
after?: { mirrorHash: string; seq: number }
|
|
366
367
|
): Promise<StoreResult<BacklogItem[]>> {
|
|
367
368
|
try {
|
|
369
|
+
// Include title for contextual embedding
|
|
368
370
|
const sql = after
|
|
369
371
|
? `
|
|
370
|
-
SELECT c.mirror_hash as mirrorHash, c.seq, c.text,
|
|
372
|
+
SELECT c.mirror_hash as mirrorHash, c.seq, c.text,
|
|
373
|
+
(SELECT d.title FROM documents d WHERE d.mirror_hash = c.mirror_hash AND d.active = 1 LIMIT 1) as title,
|
|
374
|
+
'force' as reason
|
|
371
375
|
FROM content_chunks c
|
|
372
376
|
WHERE EXISTS (
|
|
373
377
|
SELECT 1 FROM documents d
|
|
@@ -378,7 +382,9 @@ function getActiveChunks(
|
|
|
378
382
|
LIMIT ?
|
|
379
383
|
`
|
|
380
384
|
: `
|
|
381
|
-
SELECT c.mirror_hash as mirrorHash, c.seq, c.text,
|
|
385
|
+
SELECT c.mirror_hash as mirrorHash, c.seq, c.text,
|
|
386
|
+
(SELECT d.title FROM documents d WHERE d.mirror_hash = c.mirror_hash AND d.active = 1 LIMIT 1) as title,
|
|
387
|
+
'force' as reason
|
|
382
388
|
FROM content_chunks c
|
|
383
389
|
WHERE EXISTS (
|
|
384
390
|
SELECT 1 FROM documents d
|
|
@@ -7,6 +7,7 @@
|
|
|
7
7
|
|
|
8
8
|
import { LlmAdapter } from '../../llm/nodeLlamaCpp/adapter';
|
|
9
9
|
import { getActivePreset } from '../../llm/registry';
|
|
10
|
+
import { formatQueryForEmbedding } from '../../pipeline/contextual';
|
|
10
11
|
import type { SearchOptions, SearchResults } from '../../pipeline/types';
|
|
11
12
|
import {
|
|
12
13
|
searchVectorWithEmbedding,
|
|
@@ -86,8 +87,10 @@ export async function vsearch(
|
|
|
86
87
|
const embedPort = embedResult.value;
|
|
87
88
|
|
|
88
89
|
try {
|
|
89
|
-
// Embed query (also determines dimensions
|
|
90
|
-
const queryEmbedResult = await embedPort.embed(
|
|
90
|
+
// Embed query with contextual formatting (also determines dimensions)
|
|
91
|
+
const queryEmbedResult = await embedPort.embed(
|
|
92
|
+
formatQueryForEmbedding(query)
|
|
93
|
+
);
|
|
91
94
|
if (!queryEmbedResult.ok) {
|
|
92
95
|
return { success: false, error: queryEmbedResult.error.message };
|
|
93
96
|
}
|
package/src/config/types.ts
CHANGED
|
@@ -32,11 +32,16 @@ export const DEFAULT_EXCLUDES: readonly string[] = [
|
|
|
32
32
|
];
|
|
33
33
|
|
|
34
34
|
/** Valid FTS tokenizer options */
|
|
35
|
-
export const FTS_TOKENIZERS = [
|
|
35
|
+
export const FTS_TOKENIZERS = [
|
|
36
|
+
'unicode61',
|
|
37
|
+
'porter',
|
|
38
|
+
'trigram',
|
|
39
|
+
'snowball english',
|
|
40
|
+
] as const;
|
|
36
41
|
export type FtsTokenizer = (typeof FTS_TOKENIZERS)[number];
|
|
37
42
|
|
|
38
|
-
/** Default FTS tokenizer */
|
|
39
|
-
export const DEFAULT_FTS_TOKENIZER: FtsTokenizer = '
|
|
43
|
+
/** Default FTS tokenizer - snowball english for multilingual stemming */
|
|
44
|
+
export const DEFAULT_FTS_TOKENIZER: FtsTokenizer = 'snowball english';
|
|
40
45
|
|
|
41
46
|
/**
|
|
42
47
|
* BCP-47 language tag pattern (simplified, case-insensitive).
|
|
@@ -173,7 +178,7 @@ export const DEFAULT_MODEL_PRESETS: ModelPreset[] = [
|
|
|
173
178
|
name: 'Slim (Fast, ~1GB)',
|
|
174
179
|
embed: 'hf:gpustack/bge-m3-GGUF/bge-m3-Q4_K_M.gguf',
|
|
175
180
|
rerank:
|
|
176
|
-
'hf:
|
|
181
|
+
'hf:ggml-org/Qwen3-Reranker-0.6B-Q8_0-GGUF/qwen3-reranker-0.6b-q8_0.gguf',
|
|
177
182
|
gen: 'hf:unsloth/Qwen3-1.7B-GGUF/Qwen3-1.7B-Q4_K_M.gguf',
|
|
178
183
|
},
|
|
179
184
|
{
|
|
@@ -181,7 +186,7 @@ export const DEFAULT_MODEL_PRESETS: ModelPreset[] = [
|
|
|
181
186
|
name: 'Balanced (Default, ~2GB)',
|
|
182
187
|
embed: 'hf:gpustack/bge-m3-GGUF/bge-m3-Q4_K_M.gguf',
|
|
183
188
|
rerank:
|
|
184
|
-
'hf:
|
|
189
|
+
'hf:ggml-org/Qwen3-Reranker-0.6B-Q8_0-GGUF/qwen3-reranker-0.6b-q8_0.gguf',
|
|
185
190
|
gen: 'hf:ggml-org/SmolLM3-3B-GGUF/SmolLM3-Q4_K_M.gguf',
|
|
186
191
|
},
|
|
187
192
|
{
|
|
@@ -189,7 +194,7 @@ export const DEFAULT_MODEL_PRESETS: ModelPreset[] = [
|
|
|
189
194
|
name: 'Quality (Best Answers, ~2.5GB)',
|
|
190
195
|
embed: 'hf:gpustack/bge-m3-GGUF/bge-m3-Q4_K_M.gguf',
|
|
191
196
|
rerank:
|
|
192
|
-
'hf:
|
|
197
|
+
'hf:ggml-org/Qwen3-Reranker-0.6B-Q8_0-GGUF/qwen3-reranker-0.6b-q8_0.gguf',
|
|
193
198
|
gen: 'hf:unsloth/Qwen3-4B-Instruct-2507-GGUF/Qwen3-4B-Instruct-2507-Q4_K_M.gguf',
|
|
194
199
|
},
|
|
195
200
|
];
|
package/src/mcp/tools/vsearch.ts
CHANGED
|
@@ -8,6 +8,7 @@ import { join as pathJoin } from 'node:path';
|
|
|
8
8
|
import { parseUri } from '../../app/constants';
|
|
9
9
|
import { LlmAdapter } from '../../llm/nodeLlamaCpp/adapter';
|
|
10
10
|
import { getActivePreset } from '../../llm/registry';
|
|
11
|
+
import { formatQueryForEmbedding } from '../../pipeline/contextual';
|
|
11
12
|
import type { SearchResult, SearchResults } from '../../pipeline/types';
|
|
12
13
|
import {
|
|
13
14
|
searchVectorWithEmbedding,
|
|
@@ -121,8 +122,10 @@ export function handleVsearch(
|
|
|
121
122
|
const embedPort = embedResult.value;
|
|
122
123
|
|
|
123
124
|
try {
|
|
124
|
-
// Embed query
|
|
125
|
-
const queryEmbedResult = await embedPort.embed(
|
|
125
|
+
// Embed query with contextual formatting
|
|
126
|
+
const queryEmbedResult = await embedPort.embed(
|
|
127
|
+
formatQueryForEmbedding(args.query)
|
|
128
|
+
);
|
|
126
129
|
if (!queryEmbedResult.ok) {
|
|
127
130
|
throw new Error(queryEmbedResult.error.message);
|
|
128
131
|
}
|
package/src/pipeline/answer.ts
CHANGED
|
@@ -6,6 +6,7 @@
|
|
|
6
6
|
*/
|
|
7
7
|
|
|
8
8
|
import type { GenerationPort } from '../llm/types';
|
|
9
|
+
import type { StorePort } from '../store/types';
|
|
9
10
|
import type { Citation, SearchResult } from './types';
|
|
10
11
|
|
|
11
12
|
// ─────────────────────────────────────────────────────────────────────────────
|
|
@@ -32,11 +33,14 @@ Write a concise answer (1-3 paragraphs).`;
|
|
|
32
33
|
export const ABSTENTION_MESSAGE =
|
|
33
34
|
"I don't have enough information in the provided sources to answer this question.";
|
|
34
35
|
|
|
35
|
-
/** Max characters per
|
|
36
|
-
const
|
|
36
|
+
/** Max characters per document (~8K tokens) */
|
|
37
|
+
const MAX_DOC_CHARS = 32_000;
|
|
38
|
+
|
|
39
|
+
/** Max number of sources - fewer docs but full content */
|
|
40
|
+
const MAX_CONTEXT_SOURCES = 3;
|
|
37
41
|
|
|
38
|
-
/**
|
|
39
|
-
const
|
|
42
|
+
/** Fallback snippet limit when full content unavailable */
|
|
43
|
+
const MAX_SNIPPET_CHARS = 1500;
|
|
40
44
|
|
|
41
45
|
// ─────────────────────────────────────────────────────────────────────────────
|
|
42
46
|
// Citation Processing
|
|
@@ -109,37 +113,66 @@ export interface AnswerGenerationResult {
|
|
|
109
113
|
citations: Citation[];
|
|
110
114
|
}
|
|
111
115
|
|
|
116
|
+
export interface AnswerGenerationDeps {
|
|
117
|
+
genPort: GenerationPort;
|
|
118
|
+
store: StorePort | null;
|
|
119
|
+
}
|
|
120
|
+
|
|
112
121
|
/**
|
|
113
122
|
* Generate a grounded answer from search results.
|
|
114
123
|
* Returns null if no valid context or generation fails.
|
|
124
|
+
*
|
|
125
|
+
* When store is provided, fetches full document content for better context.
|
|
126
|
+
* Falls back to snippets if store unavailable or content fetch fails.
|
|
115
127
|
*/
|
|
128
|
+
// biome-ignore lint/complexity/noExcessiveCognitiveComplexity: sequential content processing with fallbacks
|
|
116
129
|
export async function generateGroundedAnswer(
|
|
117
|
-
|
|
130
|
+
deps: AnswerGenerationDeps,
|
|
118
131
|
query: string,
|
|
119
132
|
results: SearchResult[],
|
|
120
133
|
maxTokens: number
|
|
121
134
|
): Promise<AnswerGenerationResult | null> {
|
|
135
|
+
const { genPort, store } = deps;
|
|
122
136
|
const contextParts: string[] = [];
|
|
123
137
|
const citations: Citation[] = [];
|
|
124
138
|
let citationIndex = 0;
|
|
125
139
|
|
|
126
140
|
for (const r of results.slice(0, MAX_CONTEXT_SOURCES)) {
|
|
127
|
-
|
|
128
|
-
|
|
141
|
+
let content: string | null = null;
|
|
142
|
+
let usedFullContent = false;
|
|
143
|
+
|
|
144
|
+
// Try to fetch full document content if store available
|
|
145
|
+
if (store && r.conversion?.mirrorHash) {
|
|
146
|
+
const contentResult = await store.getContent(r.conversion.mirrorHash);
|
|
147
|
+
if (contentResult.ok && contentResult.value) {
|
|
148
|
+
content = contentResult.value;
|
|
149
|
+
usedFullContent = true;
|
|
150
|
+
// Truncate to max doc chars
|
|
151
|
+
if (content.length > MAX_DOC_CHARS) {
|
|
152
|
+
content = `${content.slice(0, MAX_DOC_CHARS)}\n\n[... truncated ...]`;
|
|
153
|
+
}
|
|
154
|
+
}
|
|
129
155
|
}
|
|
130
156
|
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
157
|
+
// Fallback to snippet if full content unavailable
|
|
158
|
+
if (!content) {
|
|
159
|
+
if (!r.snippet || r.snippet.trim().length === 0) {
|
|
160
|
+
continue;
|
|
161
|
+
}
|
|
162
|
+
content =
|
|
163
|
+
r.snippet.length > MAX_SNIPPET_CHARS
|
|
164
|
+
? `${r.snippet.slice(0, MAX_SNIPPET_CHARS)}...`
|
|
165
|
+
: r.snippet;
|
|
166
|
+
}
|
|
135
167
|
|
|
136
168
|
citationIndex += 1;
|
|
137
|
-
contextParts.push(`[${citationIndex}] ${
|
|
169
|
+
contextParts.push(`[${citationIndex}] ${content}`);
|
|
170
|
+
// Clear line range when citing full content (not a specific snippet)
|
|
138
171
|
citations.push({
|
|
139
172
|
docid: r.docid,
|
|
140
173
|
uri: r.uri,
|
|
141
|
-
startLine: r.snippetRange?.startLine,
|
|
142
|
-
endLine: r.snippetRange?.endLine,
|
|
174
|
+
startLine: usedFullContent ? undefined : r.snippetRange?.startLine,
|
|
175
|
+
endLine: usedFullContent ? undefined : r.snippetRange?.endLine,
|
|
143
176
|
});
|
|
144
177
|
}
|
|
145
178
|
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Contextual embedding formatting.
|
|
3
|
+
* Prepends document context to chunks for better retrieval.
|
|
4
|
+
*
|
|
5
|
+
* Based on Anthropic Contextual Retrieval research:
|
|
6
|
+
* - Query relevance jumps from 0.1 to 0.92 for context-dependent queries
|
|
7
|
+
* - 49% reduction in retrieval failure with contextual embeddings + BM25
|
|
8
|
+
* - 67% reduction with reranking added
|
|
9
|
+
*
|
|
10
|
+
* @module src/pipeline/contextual
|
|
11
|
+
*/
|
|
12
|
+
|
|
13
|
+
// Top-level regex for performance
|
|
14
|
+
const HEADING_REGEX = /^##?\s+(.+)$/m;
|
|
15
|
+
const SUBHEADING_REGEX = /^##\s+(.+)$/m;
|
|
16
|
+
const EXT_REGEX = /\.\w+$/;
|
|
17
|
+
|
|
18
|
+
/**
|
|
19
|
+
* Format document text for embedding.
|
|
20
|
+
* Prepends title for contextual retrieval.
|
|
21
|
+
*/
|
|
22
|
+
export function formatDocForEmbedding(text: string, title?: string): string {
|
|
23
|
+
const safeTitle = title?.trim() || 'none';
|
|
24
|
+
return `title: ${safeTitle} | text: ${text}`;
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
/**
|
|
28
|
+
* Format query for embedding.
|
|
29
|
+
* Uses task-prefixed format for asymmetric retrieval.
|
|
30
|
+
*/
|
|
31
|
+
export function formatQueryForEmbedding(query: string): string {
|
|
32
|
+
return `task: search result | query: ${query}`;
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
/**
|
|
36
|
+
* Extract title from markdown content or filename.
|
|
37
|
+
* Prefers first heading, falls back to filename without extension.
|
|
38
|
+
*/
|
|
39
|
+
export function extractTitle(content: string, filename: string): string {
|
|
40
|
+
// Try to find first heading (# or ##)
|
|
41
|
+
const match = content.match(HEADING_REGEX);
|
|
42
|
+
if (match?.[1]) {
|
|
43
|
+
const title = match[1].trim();
|
|
44
|
+
// Skip generic titles like "Notes" and try next heading
|
|
45
|
+
if (title.toLowerCase() === 'notes') {
|
|
46
|
+
const nextMatch = content.match(SUBHEADING_REGEX);
|
|
47
|
+
if (nextMatch?.[1]) {
|
|
48
|
+
return nextMatch[1].trim();
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
return title;
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
// Fall back to filename without extension
|
|
55
|
+
const basename = filename.split('/').pop() ?? filename;
|
|
56
|
+
return basename.replace(EXT_REGEX, '');
|
|
57
|
+
}
|
|
@@ -15,9 +15,10 @@ import type { ExpansionResult } from './types';
|
|
|
15
15
|
// Constants
|
|
16
16
|
// ─────────────────────────────────────────────────────────────────────────────
|
|
17
17
|
|
|
18
|
-
const EXPANSION_PROMPT_VERSION = '
|
|
18
|
+
const EXPANSION_PROMPT_VERSION = 'v2';
|
|
19
19
|
const DEFAULT_TIMEOUT_MS = 5000;
|
|
20
|
-
|
|
20
|
+
// Non-greedy to avoid matching from first { to last } across multiple objects
|
|
21
|
+
const JSON_EXTRACT_PATTERN = /\{[\s\S]*?\}/;
|
|
21
22
|
|
|
22
23
|
// ─────────────────────────────────────────────────────────────────────────────
|
|
23
24
|
// Cache Key Generation
|
|
@@ -40,45 +41,53 @@ export function generateCacheKey(
|
|
|
40
41
|
// Prompt Templates
|
|
41
42
|
// ─────────────────────────────────────────────────────────────────────────────
|
|
42
43
|
|
|
43
|
-
const EXPANSION_PROMPT_EN = `You
|
|
44
|
+
const EXPANSION_PROMPT_EN = `You expand search queries for a hybrid search system.
|
|
44
45
|
|
|
45
|
-
|
|
46
|
+
Query: "{query}"
|
|
46
47
|
|
|
47
|
-
Generate
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
48
|
+
Generate JSON with:
|
|
49
|
+
1. "lexicalQueries": 2-3 keyword variations using synonyms (for BM25)
|
|
50
|
+
2. "vectorQueries": 2-3 semantic rephrasings capturing intent (for embeddings)
|
|
51
|
+
3. "hyde": A 50-100 word passage that directly answers the query, as if excerpted from a relevant document
|
|
51
52
|
|
|
52
|
-
|
|
53
|
+
Rules:
|
|
54
|
+
- Keep proper nouns exactly as written
|
|
55
|
+
- Be concise - each variation 3-8 words
|
|
56
|
+
- HyDE should read like actual documentation, not a question
|
|
53
57
|
|
|
54
|
-
|
|
55
|
-
{
|
|
56
|
-
"lexicalQueries": ["deployment process", "how to deploy", "deploying application"],
|
|
57
|
-
"vectorQueries": ["steps to release software to production", "guide for application deployment"],
|
|
58
|
-
"hyde": "To deploy the application, first run the build command, then push to the staging environment..."
|
|
59
|
-
}`;
|
|
58
|
+
Respond with valid JSON only.`;
|
|
60
59
|
|
|
61
|
-
const EXPANSION_PROMPT_DE = `Du
|
|
60
|
+
const EXPANSION_PROMPT_DE = `Du erweiterst Suchanfragen für ein hybrides Suchsystem.
|
|
62
61
|
|
|
63
|
-
|
|
62
|
+
Anfrage: "{query}"
|
|
64
63
|
|
|
65
|
-
Generiere
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
64
|
+
Generiere JSON mit:
|
|
65
|
+
1. "lexicalQueries": 2-3 Keyword-Variationen mit Synonymen (für BM25)
|
|
66
|
+
2. "vectorQueries": 2-3 semantische Umformulierungen (für Embeddings)
|
|
67
|
+
3. "hyde": Ein 50-100 Wort Abschnitt, der die Anfrage direkt beantwortet, wie aus einem relevanten Dokument
|
|
69
68
|
|
|
70
|
-
|
|
69
|
+
Regeln:
|
|
70
|
+
- Eigennamen exakt beibehalten
|
|
71
|
+
- Kurz halten - jede Variation 3-8 Wörter
|
|
72
|
+
- HyDE soll wie echte Dokumentation klingen, nicht wie eine Frage
|
|
71
73
|
|
|
72
|
-
|
|
74
|
+
Antworte nur mit validem JSON.`;
|
|
73
75
|
|
|
74
|
-
|
|
76
|
+
const EXPANSION_PROMPT_MULTILINGUAL = `You expand search queries for a hybrid search system. Respond in the same language as the query.
|
|
75
77
|
|
|
76
|
-
|
|
77
|
-
- "lexicalQueries": array of 2-3 keyword-based variations
|
|
78
|
-
- "vectorQueries": array of 2-3 semantic rephrasing
|
|
79
|
-
- "hyde": a short hypothetical document passage (optional)
|
|
78
|
+
Query: "{query}"
|
|
80
79
|
|
|
81
|
-
|
|
80
|
+
Generate JSON with:
|
|
81
|
+
1. "lexicalQueries": 2-3 keyword variations using synonyms (for BM25)
|
|
82
|
+
2. "vectorQueries": 2-3 semantic rephrasings capturing intent (for embeddings)
|
|
83
|
+
3. "hyde": A 50-100 word passage that directly answers the query, as if excerpted from a relevant document
|
|
84
|
+
|
|
85
|
+
Rules:
|
|
86
|
+
- Keep proper nouns exactly as written
|
|
87
|
+
- Be concise - each variation 3-8 words
|
|
88
|
+
- HyDE should read like actual documentation, not a question
|
|
89
|
+
|
|
90
|
+
Respond with valid JSON only.`;
|
|
82
91
|
|
|
83
92
|
/**
|
|
84
93
|
* Get prompt template for language.
|
|
@@ -178,9 +187,10 @@ export async function expandQuery(
|
|
|
178
187
|
const template = getPromptTemplate(options.lang);
|
|
179
188
|
const prompt = template.replace('{query}', query);
|
|
180
189
|
|
|
181
|
-
// Run with timeout
|
|
190
|
+
// Run with timeout (clear timer to avoid resource leak)
|
|
191
|
+
let timeoutId: ReturnType<typeof setTimeout> | undefined;
|
|
182
192
|
const timeoutPromise = new Promise<null>((resolve) => {
|
|
183
|
-
setTimeout(() => resolve(null), timeout);
|
|
193
|
+
timeoutId = setTimeout(() => resolve(null), timeout);
|
|
184
194
|
});
|
|
185
195
|
|
|
186
196
|
try {
|
|
@@ -193,6 +203,11 @@ export async function expandQuery(
|
|
|
193
203
|
timeoutPromise,
|
|
194
204
|
]);
|
|
195
205
|
|
|
206
|
+
// Clear timeout if generation completed first
|
|
207
|
+
if (timeoutId) {
|
|
208
|
+
clearTimeout(timeoutId);
|
|
209
|
+
}
|
|
210
|
+
|
|
196
211
|
// Timeout
|
|
197
212
|
if (result === null) {
|
|
198
213
|
return ok(null);
|
|
@@ -207,6 +222,9 @@ export async function expandQuery(
|
|
|
207
222
|
const parsed = parseExpansionResult(result.value);
|
|
208
223
|
return ok(parsed);
|
|
209
224
|
} catch {
|
|
225
|
+
if (timeoutId) {
|
|
226
|
+
clearTimeout(timeoutId);
|
|
227
|
+
}
|
|
210
228
|
return ok(null); // Graceful degradation
|
|
211
229
|
}
|
|
212
230
|
}
|
package/src/pipeline/explain.ts
CHANGED
|
@@ -49,15 +49,23 @@ export function formatResultExplain(results: ExplainResult[]): string {
|
|
|
49
49
|
// Explain Line Builders
|
|
50
50
|
// ─────────────────────────────────────────────────────────────────────────────
|
|
51
51
|
|
|
52
|
+
export type ExpansionStatus =
|
|
53
|
+
| 'disabled' // User chose --no-expand
|
|
54
|
+
| 'skipped_strong' // Strong BM25 signal detected
|
|
55
|
+
| 'attempted'; // Expansion was attempted (may have succeeded or timed out)
|
|
56
|
+
|
|
52
57
|
export function explainExpansion(
|
|
53
|
-
|
|
58
|
+
status: ExpansionStatus,
|
|
54
59
|
result: ExpansionResult | null
|
|
55
60
|
): ExplainLine {
|
|
56
|
-
if (
|
|
61
|
+
if (status === 'disabled') {
|
|
57
62
|
return { stage: 'expansion', message: 'disabled' };
|
|
58
63
|
}
|
|
64
|
+
if (status === 'skipped_strong') {
|
|
65
|
+
return { stage: 'expansion', message: 'skipped (strong BM25)' };
|
|
66
|
+
}
|
|
59
67
|
if (!result) {
|
|
60
|
-
return { stage: 'expansion', message: 'skipped (
|
|
68
|
+
return { stage: 'expansion', message: 'skipped (timeout)' };
|
|
61
69
|
}
|
|
62
70
|
const lex = result.lexicalQueries.length;
|
|
63
71
|
const sem = result.vectorQueries.length;
|
package/src/pipeline/fusion.ts
CHANGED
|
@@ -64,9 +64,12 @@ export function rrfFuse(
|
|
|
64
64
|
);
|
|
65
65
|
|
|
66
66
|
// Process BM25 sources
|
|
67
|
+
// Original query gets 2x weight to prevent dilution by expansion variants
|
|
67
68
|
for (const input of bm25Inputs) {
|
|
68
69
|
const weight =
|
|
69
|
-
input.source === 'bm25'
|
|
70
|
+
input.source === 'bm25'
|
|
71
|
+
? config.bm25Weight * 2.0
|
|
72
|
+
: config.bm25Weight * 0.5;
|
|
70
73
|
|
|
71
74
|
for (const result of input.results) {
|
|
72
75
|
const key = `${result.mirrorHash}:${result.seq}`;
|
|
@@ -98,8 +101,9 @@ export function rrfFuse(
|
|
|
98
101
|
}
|
|
99
102
|
|
|
100
103
|
// Process vector sources
|
|
104
|
+
// Original query gets 2x weight to prevent dilution by expansion variants
|
|
101
105
|
for (const input of vectorInputs) {
|
|
102
|
-
let weight = config.vecWeight;
|
|
106
|
+
let weight = config.vecWeight * 2.0; // Default for original vector
|
|
103
107
|
if (input.source === 'vector_variant') {
|
|
104
108
|
weight = config.vecWeight * 0.5;
|
|
105
109
|
} else if (input.source === 'hyde') {
|
|
@@ -135,16 +139,23 @@ export function rrfFuse(
|
|
|
135
139
|
}
|
|
136
140
|
}
|
|
137
141
|
|
|
138
|
-
// Apply top-rank bonus
|
|
142
|
+
// Apply tiered top-rank bonus
|
|
143
|
+
// Rewards documents ranking highly in ANY list (not requiring both)
|
|
139
144
|
for (const candidate of candidates.values()) {
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
) {
|
|
145
|
+
const bm25Rank = candidate.bm25Rank;
|
|
146
|
+
const vecRank = candidate.vecRank;
|
|
147
|
+
|
|
148
|
+
// Tier 1: #1 in any list
|
|
149
|
+
if (bm25Rank === 1 || vecRank === 1) {
|
|
146
150
|
candidate.fusionScore += config.topRankBonus;
|
|
147
151
|
}
|
|
152
|
+
// Tier 2: Top-3 in any list (but not #1)
|
|
153
|
+
else if (
|
|
154
|
+
(bm25Rank !== null && bm25Rank <= config.topRankThreshold) ||
|
|
155
|
+
(vecRank !== null && vecRank <= config.topRankThreshold)
|
|
156
|
+
) {
|
|
157
|
+
candidate.fusionScore += config.topRankBonus * 0.4; // 40% of tier 1
|
|
158
|
+
}
|
|
148
159
|
}
|
|
149
160
|
|
|
150
161
|
// Sort by fusion score (descending), then by mirrorHash:seq for determinism
|