@gmickel/gno 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +256 -0
- package/assets/skill/SKILL.md +112 -0
- package/assets/skill/cli-reference.md +327 -0
- package/assets/skill/examples.md +234 -0
- package/assets/skill/mcp-reference.md +159 -0
- package/package.json +90 -0
- package/src/app/constants.ts +313 -0
- package/src/cli/colors.ts +65 -0
- package/src/cli/commands/ask.ts +545 -0
- package/src/cli/commands/cleanup.ts +105 -0
- package/src/cli/commands/collection/add.ts +120 -0
- package/src/cli/commands/collection/index.ts +10 -0
- package/src/cli/commands/collection/list.ts +108 -0
- package/src/cli/commands/collection/remove.ts +64 -0
- package/src/cli/commands/collection/rename.ts +95 -0
- package/src/cli/commands/context/add.ts +67 -0
- package/src/cli/commands/context/check.ts +153 -0
- package/src/cli/commands/context/index.ts +10 -0
- package/src/cli/commands/context/list.ts +109 -0
- package/src/cli/commands/context/rm.ts +52 -0
- package/src/cli/commands/doctor.ts +393 -0
- package/src/cli/commands/embed.ts +462 -0
- package/src/cli/commands/get.ts +356 -0
- package/src/cli/commands/index-cmd.ts +119 -0
- package/src/cli/commands/index.ts +102 -0
- package/src/cli/commands/init.ts +328 -0
- package/src/cli/commands/ls.ts +217 -0
- package/src/cli/commands/mcp/config.ts +300 -0
- package/src/cli/commands/mcp/index.ts +24 -0
- package/src/cli/commands/mcp/install.ts +203 -0
- package/src/cli/commands/mcp/paths.ts +470 -0
- package/src/cli/commands/mcp/status.ts +222 -0
- package/src/cli/commands/mcp/uninstall.ts +158 -0
- package/src/cli/commands/mcp.ts +20 -0
- package/src/cli/commands/models/clear.ts +103 -0
- package/src/cli/commands/models/index.ts +32 -0
- package/src/cli/commands/models/list.ts +214 -0
- package/src/cli/commands/models/path.ts +51 -0
- package/src/cli/commands/models/pull.ts +199 -0
- package/src/cli/commands/models/use.ts +85 -0
- package/src/cli/commands/multi-get.ts +400 -0
- package/src/cli/commands/query.ts +220 -0
- package/src/cli/commands/ref-parser.ts +108 -0
- package/src/cli/commands/reset.ts +191 -0
- package/src/cli/commands/search.ts +136 -0
- package/src/cli/commands/shared.ts +156 -0
- package/src/cli/commands/skill/index.ts +19 -0
- package/src/cli/commands/skill/install.ts +197 -0
- package/src/cli/commands/skill/paths-cmd.ts +81 -0
- package/src/cli/commands/skill/paths.ts +191 -0
- package/src/cli/commands/skill/show.ts +73 -0
- package/src/cli/commands/skill/uninstall.ts +141 -0
- package/src/cli/commands/status.ts +205 -0
- package/src/cli/commands/update.ts +68 -0
- package/src/cli/commands/vsearch.ts +188 -0
- package/src/cli/context.ts +64 -0
- package/src/cli/errors.ts +64 -0
- package/src/cli/format/search-results.ts +211 -0
- package/src/cli/options.ts +183 -0
- package/src/cli/program.ts +1330 -0
- package/src/cli/run.ts +213 -0
- package/src/cli/ui.ts +92 -0
- package/src/config/defaults.ts +20 -0
- package/src/config/index.ts +55 -0
- package/src/config/loader.ts +161 -0
- package/src/config/paths.ts +87 -0
- package/src/config/saver.ts +153 -0
- package/src/config/types.ts +280 -0
- package/src/converters/adapters/markitdownTs/adapter.ts +140 -0
- package/src/converters/adapters/officeparser/adapter.ts +126 -0
- package/src/converters/canonicalize.ts +89 -0
- package/src/converters/errors.ts +218 -0
- package/src/converters/index.ts +51 -0
- package/src/converters/mime.ts +163 -0
- package/src/converters/native/markdown.ts +115 -0
- package/src/converters/native/plaintext.ts +56 -0
- package/src/converters/path.ts +48 -0
- package/src/converters/pipeline.ts +159 -0
- package/src/converters/registry.ts +74 -0
- package/src/converters/types.ts +123 -0
- package/src/converters/versions.ts +24 -0
- package/src/index.ts +27 -0
- package/src/ingestion/chunker.ts +238 -0
- package/src/ingestion/index.ts +32 -0
- package/src/ingestion/language.ts +276 -0
- package/src/ingestion/sync.ts +671 -0
- package/src/ingestion/types.ts +219 -0
- package/src/ingestion/walker.ts +235 -0
- package/src/llm/cache.ts +467 -0
- package/src/llm/errors.ts +191 -0
- package/src/llm/index.ts +58 -0
- package/src/llm/nodeLlamaCpp/adapter.ts +133 -0
- package/src/llm/nodeLlamaCpp/embedding.ts +165 -0
- package/src/llm/nodeLlamaCpp/generation.ts +88 -0
- package/src/llm/nodeLlamaCpp/lifecycle.ts +317 -0
- package/src/llm/nodeLlamaCpp/rerank.ts +94 -0
- package/src/llm/registry.ts +86 -0
- package/src/llm/types.ts +129 -0
- package/src/mcp/resources/index.ts +151 -0
- package/src/mcp/server.ts +229 -0
- package/src/mcp/tools/get.ts +220 -0
- package/src/mcp/tools/index.ts +160 -0
- package/src/mcp/tools/multi-get.ts +263 -0
- package/src/mcp/tools/query.ts +226 -0
- package/src/mcp/tools/search.ts +119 -0
- package/src/mcp/tools/status.ts +81 -0
- package/src/mcp/tools/vsearch.ts +198 -0
- package/src/pipeline/chunk-lookup.ts +44 -0
- package/src/pipeline/expansion.ts +256 -0
- package/src/pipeline/explain.ts +115 -0
- package/src/pipeline/fusion.ts +185 -0
- package/src/pipeline/hybrid.ts +535 -0
- package/src/pipeline/index.ts +64 -0
- package/src/pipeline/query-language.ts +118 -0
- package/src/pipeline/rerank.ts +223 -0
- package/src/pipeline/search.ts +261 -0
- package/src/pipeline/types.ts +328 -0
- package/src/pipeline/vsearch.ts +348 -0
- package/src/store/index.ts +41 -0
- package/src/store/migrations/001-initial.ts +196 -0
- package/src/store/migrations/index.ts +20 -0
- package/src/store/migrations/runner.ts +187 -0
- package/src/store/sqlite/adapter.ts +1242 -0
- package/src/store/sqlite/index.ts +7 -0
- package/src/store/sqlite/setup.ts +129 -0
- package/src/store/sqlite/types.ts +28 -0
- package/src/store/types.ts +506 -0
- package/src/store/vector/index.ts +13 -0
- package/src/store/vector/sqlite-vec.ts +373 -0
- package/src/store/vector/stats.ts +152 -0
- package/src/store/vector/types.ts +115 -0
|
@@ -0,0 +1,328 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Search pipeline types.
|
|
3
|
+
* Defines SearchPipelinePort and related types for search operations.
|
|
4
|
+
*
|
|
5
|
+
* @module src/pipeline/types
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
import type { StoreResult } from '../store/types';
|
|
9
|
+
|
|
10
|
+
// ─────────────────────────────────────────────────────────────────────────────
|
|
11
|
+
// Search Result Types
|
|
12
|
+
// ─────────────────────────────────────────────────────────────────────────────
|
|
13
|
+
|
|
14
|
+
/** Source metadata for a search result */
|
|
15
|
+
export interface SearchResultSource {
|
|
16
|
+
relPath: string;
|
|
17
|
+
absPath?: string;
|
|
18
|
+
mime: string;
|
|
19
|
+
ext: string;
|
|
20
|
+
modifiedAt?: string;
|
|
21
|
+
sizeBytes?: number;
|
|
22
|
+
sourceHash?: string;
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
/** Conversion metadata for a search result */
|
|
26
|
+
export interface SearchResultConversion {
|
|
27
|
+
converterId?: string;
|
|
28
|
+
converterVersion?: string;
|
|
29
|
+
mirrorHash: string;
|
|
30
|
+
warnings?: { code: string; message: string }[];
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
/** Snippet range in mirror content */
|
|
34
|
+
export interface SnippetRange {
|
|
35
|
+
startLine: number;
|
|
36
|
+
endLine: number;
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
/** Single search result matching output schema */
|
|
40
|
+
export interface SearchResult {
|
|
41
|
+
docid: string;
|
|
42
|
+
score: number;
|
|
43
|
+
uri: string;
|
|
44
|
+
title?: string;
|
|
45
|
+
snippet: string;
|
|
46
|
+
snippetLanguage?: string;
|
|
47
|
+
snippetRange?: SnippetRange;
|
|
48
|
+
context?: string;
|
|
49
|
+
source: SearchResultSource;
|
|
50
|
+
conversion?: SearchResultConversion;
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
/** Search mode enum */
|
|
54
|
+
export type SearchMode = 'bm25' | 'vector' | 'hybrid' | 'bm25_only';
|
|
55
|
+
|
|
56
|
+
/** Search metadata */
|
|
57
|
+
export interface SearchMeta {
|
|
58
|
+
query: string;
|
|
59
|
+
mode: SearchMode;
|
|
60
|
+
expanded?: boolean;
|
|
61
|
+
reranked?: boolean;
|
|
62
|
+
vectorsUsed?: boolean;
|
|
63
|
+
totalResults: number;
|
|
64
|
+
collection?: string;
|
|
65
|
+
lang?: string;
|
|
66
|
+
/** Detected/overridden query language for prompt selection (typically BCP-47; may be user-provided via --lang) */
|
|
67
|
+
queryLanguage?: string;
|
|
68
|
+
/** Explain data (when --explain is used) */
|
|
69
|
+
explain?: {
|
|
70
|
+
lines: ExplainLine[];
|
|
71
|
+
results: ExplainResult[];
|
|
72
|
+
};
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
/** Complete search results wrapper */
|
|
76
|
+
export interface SearchResults {
|
|
77
|
+
results: SearchResult[];
|
|
78
|
+
meta: SearchMeta;
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
// ─────────────────────────────────────────────────────────────────────────────
|
|
82
|
+
// Search Options
|
|
83
|
+
// ─────────────────────────────────────────────────────────────────────────────
|
|
84
|
+
|
|
85
|
+
/** Common options for all search commands */
|
|
86
|
+
export interface SearchOptions {
|
|
87
|
+
/** Max results */
|
|
88
|
+
limit?: number;
|
|
89
|
+
/** Min score threshold (0-1) */
|
|
90
|
+
minScore?: number;
|
|
91
|
+
/** Filter by collection */
|
|
92
|
+
collection?: string;
|
|
93
|
+
/** Language filter/hint (BCP-47) */
|
|
94
|
+
lang?: string;
|
|
95
|
+
/** Include full content instead of snippet */
|
|
96
|
+
full?: boolean;
|
|
97
|
+
/** Include line numbers */
|
|
98
|
+
lineNumbers?: boolean;
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
/** Options for hybrid search (gno query) */
|
|
102
|
+
export type HybridSearchOptions = SearchOptions & {
|
|
103
|
+
/** Disable query expansion */
|
|
104
|
+
noExpand?: boolean;
|
|
105
|
+
/** Disable reranking */
|
|
106
|
+
noRerank?: boolean;
|
|
107
|
+
/** Enable explain output */
|
|
108
|
+
explain?: boolean;
|
|
109
|
+
/** Language hint for prompt selection (does NOT filter retrieval, only affects expansion prompts) */
|
|
110
|
+
queryLanguageHint?: string;
|
|
111
|
+
};
|
|
112
|
+
|
|
113
|
+
/** Options for ask command */
|
|
114
|
+
export type AskOptions = HybridSearchOptions & {
|
|
115
|
+
/** Generate grounded answer */
|
|
116
|
+
answer?: boolean;
|
|
117
|
+
/** Force retrieval-only output */
|
|
118
|
+
noAnswer?: boolean;
|
|
119
|
+
/** Max tokens for answer */
|
|
120
|
+
maxAnswerTokens?: number;
|
|
121
|
+
};
|
|
122
|
+
|
|
123
|
+
// ─────────────────────────────────────────────────────────────────────────────
|
|
124
|
+
// Query Expansion Types
|
|
125
|
+
// ─────────────────────────────────────────────────────────────────────────────
|
|
126
|
+
|
|
127
|
+
/** Expansion result from LLM */
|
|
128
|
+
export interface ExpansionResult {
|
|
129
|
+
lexicalQueries: string[];
|
|
130
|
+
vectorQueries: string[];
|
|
131
|
+
hyde?: string;
|
|
132
|
+
notes?: string;
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
// ─────────────────────────────────────────────────────────────────────────────
|
|
136
|
+
// Fusion Types
|
|
137
|
+
// ─────────────────────────────────────────────────────────────────────────────
|
|
138
|
+
|
|
139
|
+
/** RRF config */
|
|
140
|
+
export interface RrfConfig {
|
|
141
|
+
/** RRF constant (default: 60) */
|
|
142
|
+
k: number;
|
|
143
|
+
/** Weight for BM25 source */
|
|
144
|
+
bm25Weight: number;
|
|
145
|
+
/** Weight for vector source */
|
|
146
|
+
vecWeight: number;
|
|
147
|
+
/** Bonus for top-rank in both modes */
|
|
148
|
+
topRankBonus: number;
|
|
149
|
+
/** Max rank for top-rank bonus */
|
|
150
|
+
topRankThreshold: number;
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
/** Default RRF configuration */
|
|
154
|
+
export const DEFAULT_RRF_CONFIG: RrfConfig = {
|
|
155
|
+
k: 60,
|
|
156
|
+
bm25Weight: 1.0,
|
|
157
|
+
vecWeight: 1.0,
|
|
158
|
+
topRankBonus: 0.1,
|
|
159
|
+
topRankThreshold: 5,
|
|
160
|
+
};
|
|
161
|
+
|
|
162
|
+
/** Chunk identifier for fusion tracking */
|
|
163
|
+
export interface ChunkId {
|
|
164
|
+
mirrorHash: string;
|
|
165
|
+
seq: number;
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
/** Source for a fusion candidate */
|
|
169
|
+
export type FusionSource =
|
|
170
|
+
| 'bm25'
|
|
171
|
+
| 'vector'
|
|
172
|
+
| 'bm25_variant'
|
|
173
|
+
| 'vector_variant'
|
|
174
|
+
| 'hyde';
|
|
175
|
+
|
|
176
|
+
/** Fusion candidate with ranks from different sources */
|
|
177
|
+
export interface FusionCandidate {
|
|
178
|
+
mirrorHash: string;
|
|
179
|
+
seq: number;
|
|
180
|
+
bm25Rank: number | null;
|
|
181
|
+
vecRank: number | null;
|
|
182
|
+
fusionScore: number;
|
|
183
|
+
sources: FusionSource[];
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
// ─────────────────────────────────────────────────────────────────────────────
|
|
187
|
+
// Rerank & Blending Types
|
|
188
|
+
// ─────────────────────────────────────────────────────────────────────────────
|
|
189
|
+
|
|
190
|
+
/** Blending tier config */
|
|
191
|
+
export interface BlendingTier {
|
|
192
|
+
maxRank: number;
|
|
193
|
+
fusionWeight: number;
|
|
194
|
+
rerankWeight: number;
|
|
195
|
+
}
|
|
196
|
+
|
|
197
|
+
/** Default blending schedule */
|
|
198
|
+
export const DEFAULT_BLENDING_SCHEDULE: BlendingTier[] = [
|
|
199
|
+
{ maxRank: 3, fusionWeight: 0.75, rerankWeight: 0.25 },
|
|
200
|
+
{ maxRank: 10, fusionWeight: 0.6, rerankWeight: 0.4 },
|
|
201
|
+
{ maxRank: Number.POSITIVE_INFINITY, fusionWeight: 0.4, rerankWeight: 0.6 },
|
|
202
|
+
];
|
|
203
|
+
|
|
204
|
+
/** Result after reranking */
|
|
205
|
+
export type RerankedCandidate = FusionCandidate & {
|
|
206
|
+
rerankScore: number | null;
|
|
207
|
+
blendedScore: number;
|
|
208
|
+
};
|
|
209
|
+
|
|
210
|
+
// ─────────────────────────────────────────────────────────────────────────────
|
|
211
|
+
// Pipeline Config
|
|
212
|
+
// ─────────────────────────────────────────────────────────────────────────────
|
|
213
|
+
|
|
214
|
+
/** Search pipeline configuration */
|
|
215
|
+
export interface PipelineConfig {
|
|
216
|
+
/** Strong BM25 threshold to skip expansion */
|
|
217
|
+
strongBm25Threshold: number;
|
|
218
|
+
/** Expansion timeout in ms */
|
|
219
|
+
expansionTimeout: number;
|
|
220
|
+
/** Max candidates to rerank */
|
|
221
|
+
rerankCandidates: number;
|
|
222
|
+
/** RRF configuration */
|
|
223
|
+
rrf: RrfConfig;
|
|
224
|
+
/** Blending schedule */
|
|
225
|
+
blendingSchedule: BlendingTier[];
|
|
226
|
+
}
|
|
227
|
+
|
|
228
|
+
/** Default pipeline configuration */
|
|
229
|
+
export const DEFAULT_PIPELINE_CONFIG: PipelineConfig = {
|
|
230
|
+
strongBm25Threshold: 0.7,
|
|
231
|
+
expansionTimeout: 5000,
|
|
232
|
+
rerankCandidates: 20,
|
|
233
|
+
rrf: DEFAULT_RRF_CONFIG,
|
|
234
|
+
blendingSchedule: DEFAULT_BLENDING_SCHEDULE,
|
|
235
|
+
};
|
|
236
|
+
|
|
237
|
+
// ─────────────────────────────────────────────────────────────────────────────
|
|
238
|
+
// Ask Types
|
|
239
|
+
// ─────────────────────────────────────────────────────────────────────────────
|
|
240
|
+
|
|
241
|
+
/** Citation reference */
|
|
242
|
+
export interface Citation {
|
|
243
|
+
docid: string;
|
|
244
|
+
uri: string;
|
|
245
|
+
startLine?: number;
|
|
246
|
+
endLine?: number;
|
|
247
|
+
}
|
|
248
|
+
|
|
249
|
+
/** Ask result metadata */
|
|
250
|
+
export interface AskMeta {
|
|
251
|
+
expanded: boolean;
|
|
252
|
+
reranked: boolean;
|
|
253
|
+
vectorsUsed: boolean;
|
|
254
|
+
answerGenerated?: boolean;
|
|
255
|
+
totalResults?: number;
|
|
256
|
+
}
|
|
257
|
+
|
|
258
|
+
/** Ask command result */
|
|
259
|
+
export interface AskResult {
|
|
260
|
+
query: string;
|
|
261
|
+
mode: 'hybrid' | 'bm25_only';
|
|
262
|
+
queryLanguage: string;
|
|
263
|
+
answer?: string;
|
|
264
|
+
citations?: Citation[];
|
|
265
|
+
results: SearchResult[];
|
|
266
|
+
meta: AskMeta;
|
|
267
|
+
}
|
|
268
|
+
|
|
269
|
+
// ─────────────────────────────────────────────────────────────────────────────
|
|
270
|
+
// Port Interfaces
|
|
271
|
+
// ─────────────────────────────────────────────────────────────────────────────
|
|
272
|
+
|
|
273
|
+
/** BM25 search port */
|
|
274
|
+
export interface Bm25SearchPort {
|
|
275
|
+
search(
|
|
276
|
+
query: string,
|
|
277
|
+
options?: SearchOptions
|
|
278
|
+
): Promise<StoreResult<SearchResults>>;
|
|
279
|
+
}
|
|
280
|
+
|
|
281
|
+
/** Vector search port */
|
|
282
|
+
export interface VectorSearchPort {
|
|
283
|
+
search(
|
|
284
|
+
query: string,
|
|
285
|
+
options?: SearchOptions
|
|
286
|
+
): Promise<StoreResult<SearchResults>>;
|
|
287
|
+
}
|
|
288
|
+
|
|
289
|
+
/** Query expansion port */
|
|
290
|
+
export interface ExpansionPort {
|
|
291
|
+
expand(
|
|
292
|
+
query: string,
|
|
293
|
+
lang?: string
|
|
294
|
+
): Promise<StoreResult<ExpansionResult | null>>;
|
|
295
|
+
}
|
|
296
|
+
|
|
297
|
+
/** Hybrid search port */
|
|
298
|
+
export interface HybridSearchPort {
|
|
299
|
+
search(
|
|
300
|
+
query: string,
|
|
301
|
+
options?: HybridSearchOptions
|
|
302
|
+
): Promise<StoreResult<SearchResults>>;
|
|
303
|
+
}
|
|
304
|
+
|
|
305
|
+
/** Ask port */
|
|
306
|
+
export interface AskPort {
|
|
307
|
+
ask(query: string, options?: AskOptions): Promise<StoreResult<AskResult>>;
|
|
308
|
+
}
|
|
309
|
+
|
|
310
|
+
// ─────────────────────────────────────────────────────────────────────────────
|
|
311
|
+
// Explain Types
|
|
312
|
+
// ─────────────────────────────────────────────────────────────────────────────
|
|
313
|
+
|
|
314
|
+
/** Explain output line */
|
|
315
|
+
export interface ExplainLine {
|
|
316
|
+
stage: string;
|
|
317
|
+
message: string;
|
|
318
|
+
}
|
|
319
|
+
|
|
320
|
+
/** Detailed explain for a result */
|
|
321
|
+
export interface ExplainResult {
|
|
322
|
+
rank: number;
|
|
323
|
+
docid: string;
|
|
324
|
+
score: number;
|
|
325
|
+
bm25Score?: number;
|
|
326
|
+
vecScore?: number;
|
|
327
|
+
rerankScore?: number;
|
|
328
|
+
}
|
|
@@ -0,0 +1,348 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Vector search pipeline.
|
|
3
|
+
* Wraps VectorIndexPort.searchNearest() to produce SearchResults.
|
|
4
|
+
*
|
|
5
|
+
* @module src/pipeline/vsearch
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
import type { Config } from '../config/types';
|
|
9
|
+
import type { EmbeddingPort } from '../llm/types';
|
|
10
|
+
import type { StorePort } from '../store/types';
|
|
11
|
+
import { err, ok } from '../store/types';
|
|
12
|
+
import type { VectorIndexPort } from '../store/vector/types';
|
|
13
|
+
import { createChunkLookup } from './chunk-lookup';
|
|
14
|
+
import { detectQueryLanguage } from './query-language';
|
|
15
|
+
import type { SearchOptions, SearchResult, SearchResults } from './types';
|
|
16
|
+
|
|
17
|
+
// ─────────────────────────────────────────────────────────────────────────────
|
|
18
|
+
// Score Normalization
|
|
19
|
+
// ─────────────────────────────────────────────────────────────────────────────
|
|
20
|
+
|
|
21
|
+
/**
|
|
22
|
+
* Normalize cosine distance to 0-1 similarity score.
|
|
23
|
+
* Cosine distance: 0 = identical, 2 = opposite.
|
|
24
|
+
* Similarity = 1 - (distance / 2)
|
|
25
|
+
*/
|
|
26
|
+
function normalizeVectorScore(distance: number): number {
|
|
27
|
+
return Math.max(0, Math.min(1, 1 - distance / 2));
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
// ─────────────────────────────────────────────────────────────────────────────
|
|
31
|
+
// Vector Search Dependencies
|
|
32
|
+
// ─────────────────────────────────────────────────────────────────────────────
|
|
33
|
+
|
|
34
|
+
export interface VectorSearchDeps {
|
|
35
|
+
store: StorePort;
|
|
36
|
+
vectorIndex: VectorIndexPort;
|
|
37
|
+
embedPort: EmbeddingPort;
|
|
38
|
+
config: Config;
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
// ─────────────────────────────────────────────────────────────────────────────
|
|
42
|
+
// Search Function (with pre-computed embedding)
|
|
43
|
+
// ─────────────────────────────────────────────────────────────────────────────
|
|
44
|
+
|
|
45
|
+
/**
|
|
46
|
+
* Execute vector search with pre-computed embedding.
|
|
47
|
+
* Use this to avoid double-embedding when caller already has the query vector.
|
|
48
|
+
*/
|
|
49
|
+
// biome-ignore lint/complexity/noExcessiveCognitiveComplexity: search pipeline with expansion, reranking, and scoring
|
|
50
|
+
export async function searchVectorWithEmbedding(
|
|
51
|
+
deps: VectorSearchDeps,
|
|
52
|
+
query: string,
|
|
53
|
+
queryEmbedding: Float32Array,
|
|
54
|
+
options: SearchOptions = {}
|
|
55
|
+
): Promise<
|
|
56
|
+
ReturnType<typeof ok<SearchResults>> | ReturnType<typeof err<SearchResults>>
|
|
57
|
+
> {
|
|
58
|
+
const { store, vectorIndex } = deps;
|
|
59
|
+
const limit = options.limit ?? 20;
|
|
60
|
+
const minScore = options.minScore ?? 0;
|
|
61
|
+
|
|
62
|
+
// Detect query language for metadata (DOES NOT affect retrieval filtering)
|
|
63
|
+
const detection = detectQueryLanguage(query);
|
|
64
|
+
const queryLanguage = options.lang ?? detection.bcp47;
|
|
65
|
+
|
|
66
|
+
// Check if vector search is available
|
|
67
|
+
if (!vectorIndex.searchAvailable) {
|
|
68
|
+
return err(
|
|
69
|
+
'VEC_SEARCH_UNAVAILABLE',
|
|
70
|
+
'Vector search requires sqlite-vec. Run: gno embed'
|
|
71
|
+
);
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
// Search nearest neighbors
|
|
75
|
+
const searchResult = await vectorIndex.searchNearest(queryEmbedding, limit, {
|
|
76
|
+
minScore,
|
|
77
|
+
});
|
|
78
|
+
|
|
79
|
+
if (!searchResult.ok) {
|
|
80
|
+
return err('QUERY_FAILED', searchResult.error.message);
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
const vecResults = searchResult.value;
|
|
84
|
+
|
|
85
|
+
// Get collection paths for absPath resolution
|
|
86
|
+
const collectionsResult = await store.getCollections();
|
|
87
|
+
const collectionPaths = new Map<string, string>();
|
|
88
|
+
if (collectionsResult.ok) {
|
|
89
|
+
for (const c of collectionsResult.value) {
|
|
90
|
+
collectionPaths.set(c.name, c.path);
|
|
91
|
+
}
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
// Cache docs to avoid N+1 queries
|
|
95
|
+
const docByMirrorHash = await buildDocumentMap(store, options.collection);
|
|
96
|
+
|
|
97
|
+
// Pre-fetch all chunks in one batch query (eliminates N+1)
|
|
98
|
+
const uniqueHashes = [...new Set(vecResults.map((v) => v.mirrorHash))];
|
|
99
|
+
const chunksMapResult = await store.getChunksBatch(uniqueHashes);
|
|
100
|
+
if (!chunksMapResult.ok) {
|
|
101
|
+
return err('QUERY_FAILED', chunksMapResult.error.message);
|
|
102
|
+
}
|
|
103
|
+
const chunksMap = chunksMapResult.value;
|
|
104
|
+
const getChunk = createChunkLookup(chunksMap);
|
|
105
|
+
|
|
106
|
+
// Build search results
|
|
107
|
+
const results: SearchResult[] = [];
|
|
108
|
+
|
|
109
|
+
// For --full, track best score per docid to de-dupe
|
|
110
|
+
const bestByDocid = new Map<
|
|
111
|
+
string,
|
|
112
|
+
{ doc: DocumentInfo; chunk: ChunkInfo; score: number }
|
|
113
|
+
>();
|
|
114
|
+
|
|
115
|
+
for (const vec of vecResults) {
|
|
116
|
+
const score = normalizeVectorScore(vec.distance);
|
|
117
|
+
if (score < minScore) {
|
|
118
|
+
continue;
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
// Get chunk via O(1) lookup
|
|
122
|
+
const chunk = getChunk(vec.mirrorHash, vec.seq);
|
|
123
|
+
if (!chunk) {
|
|
124
|
+
continue;
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
// STRICT --lang filter: require exact match (excludes null/undefined)
|
|
128
|
+
if (options.lang && chunk.language !== options.lang) {
|
|
129
|
+
continue;
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
// Get document (cached)
|
|
133
|
+
const doc = docByMirrorHash.get(vec.mirrorHash);
|
|
134
|
+
if (!doc) {
|
|
135
|
+
continue;
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
// For --full, de-dupe by docid (keep best scoring chunk per doc)
|
|
139
|
+
if (options.full) {
|
|
140
|
+
const existing = bestByDocid.get(doc.docid);
|
|
141
|
+
if (!existing || score > existing.score) {
|
|
142
|
+
bestByDocid.set(doc.docid, {
|
|
143
|
+
doc,
|
|
144
|
+
chunk: {
|
|
145
|
+
text: chunk.text,
|
|
146
|
+
language: chunk.language,
|
|
147
|
+
startLine: chunk.startLine,
|
|
148
|
+
endLine: chunk.endLine,
|
|
149
|
+
},
|
|
150
|
+
score,
|
|
151
|
+
});
|
|
152
|
+
}
|
|
153
|
+
continue;
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
const collectionPath = collectionPaths.get(doc.collection);
|
|
157
|
+
|
|
158
|
+
results.push({
|
|
159
|
+
docid: doc.docid,
|
|
160
|
+
score,
|
|
161
|
+
uri: doc.uri,
|
|
162
|
+
title: doc.title ?? undefined,
|
|
163
|
+
snippet: chunk.text,
|
|
164
|
+
snippetLanguage: chunk.language ?? undefined,
|
|
165
|
+
snippetRange: {
|
|
166
|
+
startLine: chunk.startLine,
|
|
167
|
+
endLine: chunk.endLine,
|
|
168
|
+
},
|
|
169
|
+
source: {
|
|
170
|
+
relPath: doc.relPath,
|
|
171
|
+
absPath: collectionPath
|
|
172
|
+
? `${collectionPath}/${doc.relPath}`
|
|
173
|
+
: undefined,
|
|
174
|
+
mime: doc.sourceMime,
|
|
175
|
+
ext: doc.sourceExt,
|
|
176
|
+
modifiedAt: doc.sourceMtime,
|
|
177
|
+
sizeBytes: doc.sourceSize,
|
|
178
|
+
sourceHash: doc.sourceHash,
|
|
179
|
+
},
|
|
180
|
+
conversion: doc.mirrorHash
|
|
181
|
+
? {
|
|
182
|
+
mirrorHash: doc.mirrorHash,
|
|
183
|
+
converterId: doc.converterId ?? undefined,
|
|
184
|
+
converterVersion: doc.converterVersion ?? undefined,
|
|
185
|
+
}
|
|
186
|
+
: undefined,
|
|
187
|
+
});
|
|
188
|
+
}
|
|
189
|
+
|
|
190
|
+
// For --full, fetch full content and build results
|
|
191
|
+
if (options.full) {
|
|
192
|
+
for (const { doc, chunk, score } of bestByDocid.values()) {
|
|
193
|
+
let fullContent: string | undefined;
|
|
194
|
+
if (doc.mirrorHash) {
|
|
195
|
+
const contentResult = await store.getContent(doc.mirrorHash);
|
|
196
|
+
if (contentResult.ok && contentResult.value) {
|
|
197
|
+
fullContent = contentResult.value;
|
|
198
|
+
}
|
|
199
|
+
}
|
|
200
|
+
|
|
201
|
+
const collectionPath = collectionPaths.get(doc.collection);
|
|
202
|
+
|
|
203
|
+
results.push({
|
|
204
|
+
docid: doc.docid,
|
|
205
|
+
score,
|
|
206
|
+
uri: doc.uri,
|
|
207
|
+
title: doc.title ?? undefined,
|
|
208
|
+
snippet: fullContent ?? chunk.text,
|
|
209
|
+
snippetLanguage: chunk.language ?? undefined,
|
|
210
|
+
// --full: no snippetRange (full doc content)
|
|
211
|
+
snippetRange: fullContent
|
|
212
|
+
? undefined
|
|
213
|
+
: { startLine: chunk.startLine, endLine: chunk.endLine },
|
|
214
|
+
source: {
|
|
215
|
+
relPath: doc.relPath,
|
|
216
|
+
absPath: collectionPath
|
|
217
|
+
? `${collectionPath}/${doc.relPath}`
|
|
218
|
+
: undefined,
|
|
219
|
+
mime: doc.sourceMime,
|
|
220
|
+
ext: doc.sourceExt,
|
|
221
|
+
modifiedAt: doc.sourceMtime,
|
|
222
|
+
sizeBytes: doc.sourceSize,
|
|
223
|
+
sourceHash: doc.sourceHash,
|
|
224
|
+
},
|
|
225
|
+
conversion: doc.mirrorHash
|
|
226
|
+
? {
|
|
227
|
+
mirrorHash: doc.mirrorHash,
|
|
228
|
+
converterId: doc.converterId ?? undefined,
|
|
229
|
+
converterVersion: doc.converterVersion ?? undefined,
|
|
230
|
+
}
|
|
231
|
+
: undefined,
|
|
232
|
+
});
|
|
233
|
+
}
|
|
234
|
+
}
|
|
235
|
+
|
|
236
|
+
return ok({
|
|
237
|
+
results,
|
|
238
|
+
meta: {
|
|
239
|
+
query,
|
|
240
|
+
mode: 'vector',
|
|
241
|
+
vectorsUsed: true,
|
|
242
|
+
totalResults: results.length,
|
|
243
|
+
collection: options.collection,
|
|
244
|
+
lang: options.lang,
|
|
245
|
+
queryLanguage,
|
|
246
|
+
},
|
|
247
|
+
});
|
|
248
|
+
}
|
|
249
|
+
|
|
250
|
+
/**
|
|
251
|
+
* Execute vector search and return structured results.
|
|
252
|
+
* Embeds the query internally - use searchVectorWithEmbedding if you already have the embedding.
|
|
253
|
+
*/
|
|
254
|
+
export async function searchVector(
|
|
255
|
+
deps: VectorSearchDeps,
|
|
256
|
+
query: string,
|
|
257
|
+
options: SearchOptions = {}
|
|
258
|
+
): Promise<
|
|
259
|
+
ReturnType<typeof ok<SearchResults>> | ReturnType<typeof err<SearchResults>>
|
|
260
|
+
> {
|
|
261
|
+
const { vectorIndex, embedPort } = deps;
|
|
262
|
+
|
|
263
|
+
// Check if vector search is available
|
|
264
|
+
if (!vectorIndex.searchAvailable) {
|
|
265
|
+
return err(
|
|
266
|
+
'VEC_SEARCH_UNAVAILABLE',
|
|
267
|
+
'Vector search requires sqlite-vec. Run: gno embed'
|
|
268
|
+
);
|
|
269
|
+
}
|
|
270
|
+
|
|
271
|
+
// Embed query
|
|
272
|
+
const embedResult = await embedPort.embed(query);
|
|
273
|
+
if (!embedResult.ok) {
|
|
274
|
+
return err(
|
|
275
|
+
'QUERY_FAILED',
|
|
276
|
+
`Failed to embed query: ${embedResult.error.message}`
|
|
277
|
+
);
|
|
278
|
+
}
|
|
279
|
+
|
|
280
|
+
const queryEmbedding = new Float32Array(embedResult.value);
|
|
281
|
+
|
|
282
|
+
return searchVectorWithEmbedding(deps, query, queryEmbedding, options);
|
|
283
|
+
}
|
|
284
|
+
|
|
285
|
+
// ─────────────────────────────────────────────────────────────────────────────
|
|
286
|
+
// Helper Types
|
|
287
|
+
// ─────────────────────────────────────────────────────────────────────────────
|
|
288
|
+
|
|
289
|
+
interface ChunkInfo {
|
|
290
|
+
text: string;
|
|
291
|
+
language: string | null;
|
|
292
|
+
startLine: number;
|
|
293
|
+
endLine: number;
|
|
294
|
+
}
|
|
295
|
+
|
|
296
|
+
interface DocumentInfo {
|
|
297
|
+
docid: string;
|
|
298
|
+
uri: string;
|
|
299
|
+
title: string | null;
|
|
300
|
+
collection: string;
|
|
301
|
+
relPath: string;
|
|
302
|
+
sourceHash: string;
|
|
303
|
+
sourceMime: string;
|
|
304
|
+
sourceExt: string;
|
|
305
|
+
sourceMtime: string;
|
|
306
|
+
sourceSize: number;
|
|
307
|
+
mirrorHash: string | null;
|
|
308
|
+
converterId: string | null;
|
|
309
|
+
converterVersion: string | null;
|
|
310
|
+
}
|
|
311
|
+
|
|
312
|
+
// ─────────────────────────────────────────────────────────────────────────────
|
|
313
|
+
// Helper: Build document map by mirrorHash
|
|
314
|
+
// ─────────────────────────────────────────────────────────────────────────────
|
|
315
|
+
|
|
316
|
+
async function buildDocumentMap(
|
|
317
|
+
store: StorePort,
|
|
318
|
+
collectionFilter?: string
|
|
319
|
+
): Promise<Map<string, DocumentInfo>> {
|
|
320
|
+
const result = new Map<string, DocumentInfo>();
|
|
321
|
+
|
|
322
|
+
const docs = await store.listDocuments(collectionFilter);
|
|
323
|
+
if (!docs.ok) {
|
|
324
|
+
return result;
|
|
325
|
+
}
|
|
326
|
+
|
|
327
|
+
for (const doc of docs.value) {
|
|
328
|
+
if (doc.mirrorHash && doc.active) {
|
|
329
|
+
result.set(doc.mirrorHash, {
|
|
330
|
+
docid: doc.docid,
|
|
331
|
+
uri: doc.uri,
|
|
332
|
+
title: doc.title,
|
|
333
|
+
collection: doc.collection,
|
|
334
|
+
relPath: doc.relPath,
|
|
335
|
+
sourceHash: doc.sourceHash,
|
|
336
|
+
sourceMime: doc.sourceMime,
|
|
337
|
+
sourceExt: doc.sourceExt,
|
|
338
|
+
sourceMtime: doc.sourceMtime,
|
|
339
|
+
sourceSize: doc.sourceSize,
|
|
340
|
+
mirrorHash: doc.mirrorHash,
|
|
341
|
+
converterId: doc.converterId,
|
|
342
|
+
converterVersion: doc.converterVersion,
|
|
343
|
+
});
|
|
344
|
+
}
|
|
345
|
+
}
|
|
346
|
+
|
|
347
|
+
return result;
|
|
348
|
+
}
|