@gmickel/gno 0.3.5 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +74 -7
- package/package.json +30 -1
- package/src/cli/commands/ask.ts +12 -187
- package/src/cli/commands/embed.ts +10 -4
- package/src/cli/commands/models/pull.ts +9 -4
- package/src/cli/commands/serve.ts +19 -0
- package/src/cli/commands/vsearch.ts +5 -2
- package/src/cli/program.ts +28 -0
- package/src/config/types.ts +11 -6
- package/src/llm/registry.ts +3 -1
- package/src/mcp/tools/vsearch.ts +5 -2
- package/src/pipeline/answer.ts +224 -0
- package/src/pipeline/contextual.ts +57 -0
- package/src/pipeline/expansion.ts +49 -31
- package/src/pipeline/explain.ts +11 -3
- package/src/pipeline/fusion.ts +20 -9
- package/src/pipeline/hybrid.ts +57 -40
- package/src/pipeline/index.ts +7 -0
- package/src/pipeline/rerank.ts +55 -27
- package/src/pipeline/types.ts +0 -3
- package/src/pipeline/vsearch.ts +3 -2
- package/src/serve/CLAUDE.md +91 -0
- package/src/serve/bunfig.toml +2 -0
- package/src/serve/context.ts +181 -0
- package/src/serve/index.ts +7 -0
- package/src/serve/public/app.tsx +56 -0
- package/src/serve/public/components/ai-elements/code-block.tsx +176 -0
- package/src/serve/public/components/ai-elements/conversation.tsx +98 -0
- package/src/serve/public/components/ai-elements/inline-citation.tsx +285 -0
- package/src/serve/public/components/ai-elements/loader.tsx +96 -0
- package/src/serve/public/components/ai-elements/message.tsx +443 -0
- package/src/serve/public/components/ai-elements/prompt-input.tsx +1421 -0
- package/src/serve/public/components/ai-elements/sources.tsx +75 -0
- package/src/serve/public/components/ai-elements/suggestion.tsx +51 -0
- package/src/serve/public/components/preset-selector.tsx +403 -0
- package/src/serve/public/components/ui/badge.tsx +46 -0
- package/src/serve/public/components/ui/button-group.tsx +82 -0
- package/src/serve/public/components/ui/button.tsx +62 -0
- package/src/serve/public/components/ui/card.tsx +92 -0
- package/src/serve/public/components/ui/carousel.tsx +244 -0
- package/src/serve/public/components/ui/collapsible.tsx +31 -0
- package/src/serve/public/components/ui/command.tsx +181 -0
- package/src/serve/public/components/ui/dialog.tsx +141 -0
- package/src/serve/public/components/ui/dropdown-menu.tsx +255 -0
- package/src/serve/public/components/ui/hover-card.tsx +42 -0
- package/src/serve/public/components/ui/input-group.tsx +167 -0
- package/src/serve/public/components/ui/input.tsx +21 -0
- package/src/serve/public/components/ui/progress.tsx +28 -0
- package/src/serve/public/components/ui/scroll-area.tsx +56 -0
- package/src/serve/public/components/ui/select.tsx +188 -0
- package/src/serve/public/components/ui/separator.tsx +26 -0
- package/src/serve/public/components/ui/table.tsx +114 -0
- package/src/serve/public/components/ui/textarea.tsx +18 -0
- package/src/serve/public/components/ui/tooltip.tsx +59 -0
- package/src/serve/public/globals.css +226 -0
- package/src/serve/public/hooks/use-api.ts +112 -0
- package/src/serve/public/index.html +13 -0
- package/src/serve/public/pages/Ask.tsx +442 -0
- package/src/serve/public/pages/Browse.tsx +270 -0
- package/src/serve/public/pages/Dashboard.tsx +202 -0
- package/src/serve/public/pages/DocView.tsx +302 -0
- package/src/serve/public/pages/Search.tsx +335 -0
- package/src/serve/routes/api.ts +763 -0
- package/src/serve/server.ts +249 -0
- package/src/store/migrations/002-documents-fts.ts +40 -0
- package/src/store/migrations/index.ts +2 -1
- package/src/store/sqlite/adapter.ts +216 -33
- package/src/store/sqlite/fts5-snowball.ts +144 -0
- package/src/store/types.ts +33 -3
- package/src/store/vector/stats.ts +3 -0
- package/src/store/vector/types.ts +1 -0
package/src/pipeline/hybrid.ts
CHANGED
|
@@ -11,9 +11,11 @@ import type { StorePort } from '../store/types';
|
|
|
11
11
|
import { err, ok } from '../store/types';
|
|
12
12
|
import type { VectorIndexPort } from '../store/vector/types';
|
|
13
13
|
import { createChunkLookup } from './chunk-lookup';
|
|
14
|
+
import { formatQueryForEmbedding } from './contextual';
|
|
14
15
|
import { expandQuery } from './expansion';
|
|
15
16
|
import {
|
|
16
17
|
buildExplainResults,
|
|
18
|
+
type ExpansionStatus,
|
|
17
19
|
explainBm25,
|
|
18
20
|
explainExpansion,
|
|
19
21
|
explainFusion,
|
|
@@ -51,56 +53,64 @@ export interface HybridSearchDeps {
|
|
|
51
53
|
// Score Normalization
|
|
52
54
|
// ─────────────────────────────────────────────────────────────────────────────
|
|
53
55
|
|
|
54
|
-
|
|
55
|
-
|
|
56
|
+
// Removed: _normalizeVectorScore was dead code (vector distances normalized in vector index)
|
|
57
|
+
|
|
58
|
+
// ─────────────────────────────────────────────────────────────────────────────
|
|
59
|
+
// BM25 Score Normalization
|
|
60
|
+
// ─────────────────────────────────────────────────────────────────────────────
|
|
61
|
+
|
|
62
|
+
/**
|
|
63
|
+
* Normalize raw BM25 score to 0-1 range using sigmoid.
|
|
64
|
+
* BM25 scores are negative in SQLite FTS5 (more negative = better match).
|
|
65
|
+
* Typical range: -15 (excellent) to -2 (weak match).
|
|
66
|
+
* Maps to 0-1 where higher is better.
|
|
67
|
+
*/
|
|
68
|
+
function normalizeBm25Score(rawScore: number): number {
|
|
69
|
+
const absScore = Math.abs(rawScore);
|
|
70
|
+
// Sigmoid with center=4.5, scale=2.8
|
|
71
|
+
// Maps: -15 → ~0.99, -5 → ~0.55, -2 → ~0.29
|
|
72
|
+
return 1 / (1 + Math.exp(-(absScore - 4.5) / 2.8));
|
|
56
73
|
}
|
|
57
74
|
|
|
58
75
|
// ─────────────────────────────────────────────────────────────────────────────
|
|
59
76
|
// BM25 Strength Check
|
|
60
77
|
// ─────────────────────────────────────────────────────────────────────────────
|
|
61
78
|
|
|
79
|
+
// Thresholds for strong signal detection (conservative - prefer expansion over speed)
|
|
80
|
+
const STRONG_TOP_SCORE = 0.84; // ~84th percentile confidence
|
|
81
|
+
const STRONG_GAP = 0.14; // Clear separation from #2
|
|
82
|
+
|
|
62
83
|
/**
|
|
63
84
|
* Check if BM25 results are strong enough to skip expansion.
|
|
64
|
-
*
|
|
65
|
-
*
|
|
66
|
-
* Raw BM25: smaller (more negative) is better.
|
|
85
|
+
* Returns true if top result is both confident AND clearly separated.
|
|
86
|
+
* This prevents skipping on weak-but-separated results.
|
|
67
87
|
*/
|
|
68
88
|
async function checkBm25Strength(
|
|
69
89
|
store: StorePort,
|
|
70
90
|
query: string,
|
|
71
91
|
options?: { collection?: string; lang?: string }
|
|
72
|
-
): Promise<
|
|
92
|
+
): Promise<boolean> {
|
|
73
93
|
const result = await store.searchFts(query, {
|
|
74
94
|
limit: 5,
|
|
75
95
|
collection: options?.collection,
|
|
76
96
|
language: options?.lang,
|
|
77
97
|
});
|
|
98
|
+
|
|
78
99
|
if (!result.ok || result.value.length === 0) {
|
|
79
|
-
return
|
|
100
|
+
return false;
|
|
80
101
|
}
|
|
81
102
|
|
|
82
|
-
//
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
103
|
+
// Normalize scores (higher = better)
|
|
104
|
+
const scores = result.value
|
|
105
|
+
.map((r) => normalizeBm25Score(r.score))
|
|
106
|
+
.sort((a, b) => b - a); // Descending
|
|
86
107
|
|
|
87
|
-
|
|
88
|
-
const
|
|
89
|
-
const
|
|
90
|
-
const second = scores[1] ?? best;
|
|
91
|
-
const worst = scores.at(-1) ?? best;
|
|
92
|
-
|
|
93
|
-
// Compute gap-based strength
|
|
94
|
-
// If best and second are equal, gap = 0
|
|
95
|
-
// If second is much worse (larger), gap approaches 1
|
|
96
|
-
const range = worst - best;
|
|
97
|
-
if (range === 0) {
|
|
98
|
-
return 0; // All scores equal, no clear winner
|
|
99
|
-
}
|
|
108
|
+
const topScore = scores[0] ?? 0;
|
|
109
|
+
const secondScore = scores[1] ?? 0;
|
|
110
|
+
const gap = topScore - secondScore;
|
|
100
111
|
|
|
101
|
-
//
|
|
102
|
-
|
|
103
|
-
return Math.max(0, Math.min(1, gap));
|
|
112
|
+
// Strong signal requires BOTH: high confidence AND clear separation
|
|
113
|
+
return topScore >= STRONG_TOP_SCORE && gap >= STRONG_GAP;
|
|
104
114
|
}
|
|
105
115
|
|
|
106
116
|
// ─────────────────────────────────────────────────────────────────────────────
|
|
@@ -155,7 +165,8 @@ async function searchVectorChunks(
|
|
|
155
165
|
return [];
|
|
156
166
|
}
|
|
157
167
|
|
|
158
|
-
|
|
168
|
+
// Embed query with contextual formatting
|
|
169
|
+
const embedResult = await embedPort.embed(formatQueryForEmbedding(query));
|
|
159
170
|
if (!embedResult.ok) {
|
|
160
171
|
return [];
|
|
161
172
|
}
|
|
@@ -225,17 +236,18 @@ export async function searchHybrid(
|
|
|
225
236
|
// 1. Check if expansion needed
|
|
226
237
|
// ─────────────────────────────────────────────────────────────────────────
|
|
227
238
|
const shouldExpand = !options.noExpand && genPort !== null;
|
|
228
|
-
let
|
|
239
|
+
let expansionStatus: ExpansionStatus = 'disabled';
|
|
229
240
|
|
|
230
241
|
if (shouldExpand) {
|
|
231
|
-
const
|
|
242
|
+
const hasStrongSignal = await checkBm25Strength(store, query, {
|
|
232
243
|
collection: options.collection,
|
|
233
244
|
lang: options.lang,
|
|
234
245
|
});
|
|
235
|
-
skipExpansionDueToStrength =
|
|
236
|
-
bm25Strength >= pipelineConfig.strongBm25Threshold;
|
|
237
246
|
|
|
238
|
-
if (
|
|
247
|
+
if (hasStrongSignal) {
|
|
248
|
+
expansionStatus = 'skipped_strong';
|
|
249
|
+
} else {
|
|
250
|
+
expansionStatus = 'attempted';
|
|
239
251
|
const expandResult = await expandQuery(genPort, query, {
|
|
240
252
|
// Use queryLanguage for prompt selection, NOT options.lang (retrieval filter)
|
|
241
253
|
lang: queryLanguage,
|
|
@@ -247,9 +259,7 @@ export async function searchHybrid(
|
|
|
247
259
|
}
|
|
248
260
|
}
|
|
249
261
|
|
|
250
|
-
explainLines.push(
|
|
251
|
-
explainExpansion(shouldExpand && !skipExpansionDueToStrength, expansion)
|
|
252
|
-
);
|
|
262
|
+
explainLines.push(explainExpansion(expansionStatus, expansion));
|
|
253
263
|
|
|
254
264
|
// ─────────────────────────────────────────────────────────────────────────
|
|
255
265
|
// 2. Parallel retrieval using raw store/vector APIs for correct seq tracking
|
|
@@ -293,7 +303,8 @@ export async function searchHybrid(
|
|
|
293
303
|
|
|
294
304
|
// Vector search
|
|
295
305
|
let vecCount = 0;
|
|
296
|
-
const vectorAvailable =
|
|
306
|
+
const vectorAvailable =
|
|
307
|
+
(vectorIndex?.searchAvailable && embedPort !== null) ?? false;
|
|
297
308
|
|
|
298
309
|
if (vectorAvailable && vectorIndex && embedPort) {
|
|
299
310
|
// Original query
|
|
@@ -335,7 +346,7 @@ export async function searchHybrid(
|
|
|
335
346
|
}
|
|
336
347
|
}
|
|
337
348
|
|
|
338
|
-
explainLines.push(explainVector(vecCount, vectorAvailable
|
|
349
|
+
explainLines.push(explainVector(vecCount, vectorAvailable));
|
|
339
350
|
|
|
340
351
|
// ─────────────────────────────────────────────────────────────────────────
|
|
341
352
|
// 3. RRF Fusion
|
|
@@ -441,7 +452,13 @@ export async function searchHybrid(
|
|
|
441
452
|
}
|
|
442
453
|
|
|
443
454
|
// Get chunk via O(1) lookup
|
|
444
|
-
|
|
455
|
+
// For doc-level FTS (seq=0), fall back to first available chunk if exact lookup fails
|
|
456
|
+
let chunk = getChunk(candidate.mirrorHash, candidate.seq);
|
|
457
|
+
if (!chunk && candidate.seq === 0) {
|
|
458
|
+
// Doc-level FTS uses seq=0 as placeholder - try first chunk
|
|
459
|
+
const docChunks = chunksMap.get(candidate.mirrorHash);
|
|
460
|
+
chunk = docChunks?.[0];
|
|
461
|
+
}
|
|
445
462
|
if (!chunk) {
|
|
446
463
|
continue;
|
|
447
464
|
}
|
|
@@ -524,7 +541,7 @@ export async function searchHybrid(
|
|
|
524
541
|
mode: vectorAvailable ? 'hybrid' : 'bm25_only',
|
|
525
542
|
expanded: expansion !== null,
|
|
526
543
|
reranked: rerankResult.reranked,
|
|
527
|
-
vectorsUsed: vectorAvailable
|
|
544
|
+
vectorsUsed: vectorAvailable,
|
|
528
545
|
totalResults: results.length,
|
|
529
546
|
collection: options.collection,
|
|
530
547
|
lang: options.lang,
|
package/src/pipeline/index.ts
CHANGED
|
@@ -4,11 +4,18 @@
|
|
|
4
4
|
* @module src/pipeline
|
|
5
5
|
*/
|
|
6
6
|
|
|
7
|
+
// Contextual embedding
|
|
8
|
+
export {
|
|
9
|
+
extractTitle,
|
|
10
|
+
formatDocForEmbedding,
|
|
11
|
+
formatQueryForEmbedding,
|
|
12
|
+
} from './contextual';
|
|
7
13
|
// Expansion
|
|
8
14
|
export { expandQuery, generateCacheKey } from './expansion';
|
|
9
15
|
// Explain
|
|
10
16
|
export {
|
|
11
17
|
buildExplainResults,
|
|
18
|
+
type ExpansionStatus,
|
|
12
19
|
explainBm25,
|
|
13
20
|
explainExpansion,
|
|
14
21
|
explainFusion,
|
package/src/pipeline/rerank.ts
CHANGED
|
@@ -7,7 +7,6 @@
|
|
|
7
7
|
|
|
8
8
|
import type { RerankPort } from '../llm/types';
|
|
9
9
|
import type { StorePort } from '../store/types';
|
|
10
|
-
import { createChunkLookup } from './chunk-lookup';
|
|
11
10
|
import type { BlendingTier, FusionCandidate, RerankedCandidate } from './types';
|
|
12
11
|
import { DEFAULT_BLENDING_SCHEDULE } from './types';
|
|
13
12
|
|
|
@@ -121,32 +120,44 @@ export async function rerankCandidates(
|
|
|
121
120
|
const toRerank = candidates.slice(0, maxCandidates);
|
|
122
121
|
const remaining = candidates.slice(maxCandidates);
|
|
123
122
|
|
|
124
|
-
//
|
|
123
|
+
// Dedupe by document - multiple chunks from same doc use single full-doc rerank
|
|
125
124
|
const uniqueHashes = [...new Set(toRerank.map((c) => c.mirrorHash))];
|
|
126
|
-
const chunksMapResult = await store.getChunksBatch(uniqueHashes);
|
|
127
125
|
|
|
128
|
-
//
|
|
129
|
-
//
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
126
|
+
// Fetch full document content for each unique document (parallel)
|
|
127
|
+
// Max 128K chars per doc to fit in reranker context
|
|
128
|
+
const MAX_DOC_CHARS = 128_000;
|
|
129
|
+
const contentResults = await Promise.all(
|
|
130
|
+
uniqueHashes.map((hash) => store.getContent(hash))
|
|
131
|
+
);
|
|
132
|
+
const docContents = new Map<string, string>();
|
|
133
|
+
for (let i = 0; i < uniqueHashes.length; i++) {
|
|
134
|
+
const hash = uniqueHashes[i] as string;
|
|
135
|
+
const result = contentResults[i] as Awaited<
|
|
136
|
+
ReturnType<typeof store.getContent>
|
|
137
|
+
>;
|
|
138
|
+
if (result.ok && result.value) {
|
|
139
|
+
const content = result.value;
|
|
140
|
+
docContents.set(
|
|
141
|
+
hash,
|
|
142
|
+
content.length > MAX_DOC_CHARS
|
|
143
|
+
? `${content.slice(0, MAX_DOC_CHARS)}...`
|
|
144
|
+
: content
|
|
145
|
+
);
|
|
146
|
+
} else {
|
|
147
|
+
// Fallback to empty string if content not available
|
|
148
|
+
docContents.set(hash, '');
|
|
149
|
+
}
|
|
139
150
|
}
|
|
140
|
-
const chunksMap = chunksMapResult.value;
|
|
141
|
-
const getChunk = createChunkLookup(chunksMap);
|
|
142
151
|
|
|
143
|
-
// Build texts array for reranking (
|
|
144
|
-
const
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
152
|
+
// Build texts array for reranking (one per unique document)
|
|
153
|
+
const hashToIndex = new Map<string, number>();
|
|
154
|
+
const texts: string[] = [];
|
|
155
|
+
for (const hash of uniqueHashes) {
|
|
156
|
+
hashToIndex.set(hash, texts.length);
|
|
157
|
+
texts.push(docContents.get(hash) ?? '');
|
|
158
|
+
}
|
|
148
159
|
|
|
149
|
-
// Run reranking
|
|
160
|
+
// Run reranking on full documents
|
|
150
161
|
const rerankResult = await rerankPort.rerank(query, texts);
|
|
151
162
|
|
|
152
163
|
if (!rerankResult.ok) {
|
|
@@ -163,16 +174,33 @@ export async function rerankCandidates(
|
|
|
163
174
|
|
|
164
175
|
// Map rerank scores to candidates
|
|
165
176
|
// Note: We use normalizeFusionScore defined above (across ALL candidates)
|
|
166
|
-
// Build index->score map for O(1) lookup
|
|
167
|
-
|
|
177
|
+
// Build doc index->score map for O(1) lookup
|
|
178
|
+
// All chunks from same document share the same rerank score
|
|
179
|
+
const scoreByDocIndex = new Map(
|
|
168
180
|
rerankResult.value.map((s) => [s.index, s.score])
|
|
169
181
|
);
|
|
182
|
+
|
|
183
|
+
// Normalize rerank scores using min-max (models return varying scales)
|
|
184
|
+
const rerankScores = rerankResult.value.map((s) => s.score);
|
|
185
|
+
const minRerank = Math.min(...rerankScores);
|
|
186
|
+
const maxRerank = Math.max(...rerankScores);
|
|
187
|
+
const rerankRange = maxRerank - minRerank;
|
|
188
|
+
|
|
189
|
+
function normalizeRerankScore(score: number): number {
|
|
190
|
+
if (rerankRange < 1e-9) {
|
|
191
|
+
return 1; // All tied for best
|
|
192
|
+
}
|
|
193
|
+
return (score - minRerank) / rerankRange;
|
|
194
|
+
}
|
|
195
|
+
|
|
170
196
|
const rerankedCandidates: RerankedCandidate[] = toRerank.map((c, i) => {
|
|
171
|
-
|
|
197
|
+
// Get document-level rerank score (shared by all chunks from same doc)
|
|
198
|
+
const docIndex = hashToIndex.get(c.mirrorHash) ?? -1;
|
|
199
|
+
const rerankScore = scoreByDocIndex.get(docIndex) ?? null;
|
|
172
200
|
|
|
173
|
-
// Normalize rerank score to 0-1 range
|
|
201
|
+
// Normalize rerank score to 0-1 range using min-max
|
|
174
202
|
const normalizedRerankScore =
|
|
175
|
-
rerankScore !== null ?
|
|
203
|
+
rerankScore !== null ? normalizeRerankScore(rerankScore) : null;
|
|
176
204
|
|
|
177
205
|
// Calculate blended score using normalized fusion score
|
|
178
206
|
const position = i + 1;
|
package/src/pipeline/types.ts
CHANGED
|
@@ -213,8 +213,6 @@ export type RerankedCandidate = FusionCandidate & {
|
|
|
213
213
|
|
|
214
214
|
/** Search pipeline configuration */
|
|
215
215
|
export interface PipelineConfig {
|
|
216
|
-
/** Strong BM25 threshold to skip expansion */
|
|
217
|
-
strongBm25Threshold: number;
|
|
218
216
|
/** Expansion timeout in ms */
|
|
219
217
|
expansionTimeout: number;
|
|
220
218
|
/** Max candidates to rerank */
|
|
@@ -227,7 +225,6 @@ export interface PipelineConfig {
|
|
|
227
225
|
|
|
228
226
|
/** Default pipeline configuration */
|
|
229
227
|
export const DEFAULT_PIPELINE_CONFIG: PipelineConfig = {
|
|
230
|
-
strongBm25Threshold: 0.7,
|
|
231
228
|
expansionTimeout: 5000,
|
|
232
229
|
rerankCandidates: 20,
|
|
233
230
|
rrf: DEFAULT_RRF_CONFIG,
|
package/src/pipeline/vsearch.ts
CHANGED
|
@@ -11,6 +11,7 @@ import type { StorePort } from '../store/types';
|
|
|
11
11
|
import { err, ok } from '../store/types';
|
|
12
12
|
import type { VectorIndexPort } from '../store/vector/types';
|
|
13
13
|
import { createChunkLookup } from './chunk-lookup';
|
|
14
|
+
import { formatQueryForEmbedding } from './contextual';
|
|
14
15
|
import { detectQueryLanguage } from './query-language';
|
|
15
16
|
import type { SearchOptions, SearchResult, SearchResults } from './types';
|
|
16
17
|
|
|
@@ -268,8 +269,8 @@ export async function searchVector(
|
|
|
268
269
|
);
|
|
269
270
|
}
|
|
270
271
|
|
|
271
|
-
// Embed query
|
|
272
|
-
const embedResult = await embedPort.embed(query);
|
|
272
|
+
// Embed query with contextual formatting
|
|
273
|
+
const embedResult = await embedPort.embed(formatQueryForEmbedding(query));
|
|
273
274
|
if (!embedResult.ok) {
|
|
274
275
|
return err(
|
|
275
276
|
'QUERY_FAILED',
|
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
# Web UI (gno serve)
|
|
2
|
+
|
|
3
|
+
Local web server for GNO search and document browsing.
|
|
4
|
+
|
|
5
|
+
## Architecture
|
|
6
|
+
|
|
7
|
+
Uses same **"Ports without DI"** pattern as CLI/MCP (see root CLAUDE.md):
|
|
8
|
+
- Adapters instantiated directly in `context.ts`
|
|
9
|
+
- Pipeline code receives port interfaces
|
|
10
|
+
- No dependency injection
|
|
11
|
+
|
|
12
|
+
```
|
|
13
|
+
src/serve/
|
|
14
|
+
├── server.ts # Bun.serve() entry point
|
|
15
|
+
├── context.ts # ServerContext with LLM ports
|
|
16
|
+
├── routes/
|
|
17
|
+
│ └── api.ts # REST API handlers
|
|
18
|
+
└── public/ # React frontend (Bun HTML imports)
|
|
19
|
+
├── App.tsx # Router
|
|
20
|
+
├── pages/ # Page components
|
|
21
|
+
├── components/ # UI components (ShadCN + AI Elements)
|
|
22
|
+
└── hooks/ # Custom hooks (useApi, etc.)
|
|
23
|
+
```
|
|
24
|
+
|
|
25
|
+
## Key Patterns
|
|
26
|
+
|
|
27
|
+
### Ports (interfaces)
|
|
28
|
+
- `EmbeddingPort` - vector embeddings
|
|
29
|
+
- `GenerationPort` - LLM text generation
|
|
30
|
+
- `RerankPort` - cross-encoder reranking
|
|
31
|
+
- `VectorIndexPort` - vector search
|
|
32
|
+
|
|
33
|
+
### ServerContext
|
|
34
|
+
Created at startup, holds all LLM ports and capabilities:
|
|
35
|
+
```typescript
|
|
36
|
+
interface ServerContext {
|
|
37
|
+
store: SqliteAdapter;
|
|
38
|
+
config: Config;
|
|
39
|
+
vectorIndex: VectorIndexPort | null;
|
|
40
|
+
embedPort: EmbeddingPort | null;
|
|
41
|
+
genPort: GenerationPort | null;
|
|
42
|
+
rerankPort: RerankPort | null;
|
|
43
|
+
capabilities: { bm25, vector, hybrid, answer };
|
|
44
|
+
}
|
|
45
|
+
```
|
|
46
|
+
|
|
47
|
+
### Shared Pipeline Code
|
|
48
|
+
Answer generation uses shared module to stay in sync with CLI:
|
|
49
|
+
- `src/pipeline/answer.ts` - generateGroundedAnswer, processAnswerResult
|
|
50
|
+
|
|
51
|
+
## API Endpoints
|
|
52
|
+
|
|
53
|
+
| Endpoint | Method | Description |
|
|
54
|
+
|----------|--------|-------------|
|
|
55
|
+
| `/api/health` | GET | Health check |
|
|
56
|
+
| `/api/status` | GET | Index stats, collections |
|
|
57
|
+
| `/api/capabilities` | GET | Available features |
|
|
58
|
+
| `/api/collections` | GET | List collections |
|
|
59
|
+
| `/api/docs` | GET | List documents |
|
|
60
|
+
| `/api/doc` | GET | Get document content |
|
|
61
|
+
| `/api/search` | POST | BM25 search |
|
|
62
|
+
| `/api/query` | POST | Hybrid search |
|
|
63
|
+
| `/api/ask` | POST | AI answer with citations |
|
|
64
|
+
| `/api/presets` | GET | List model presets |
|
|
65
|
+
| `/api/presets` | POST | Switch preset (hot-reload) |
|
|
66
|
+
| `/api/models/status` | GET | Download progress |
|
|
67
|
+
| `/api/models/pull` | POST | Start model download |
|
|
68
|
+
|
|
69
|
+
## Frontend
|
|
70
|
+
|
|
71
|
+
- **Framework**: React (via Bun HTML imports)
|
|
72
|
+
- **Styling**: Tailwind CSS + ShadCN components
|
|
73
|
+
- **AI Elements**: Conversation, Message, Sources, CodeBlock, Loader
|
|
74
|
+
- **Routing**: Simple hash-free SPA routing in App.tsx
|
|
75
|
+
|
|
76
|
+
## Development
|
|
77
|
+
|
|
78
|
+
```bash
|
|
79
|
+
# Start dev server with HMR
|
|
80
|
+
bun run src/serve/index.ts
|
|
81
|
+
|
|
82
|
+
# Or via CLI
|
|
83
|
+
gno serve --port 3000
|
|
84
|
+
```
|
|
85
|
+
|
|
86
|
+
## Security
|
|
87
|
+
|
|
88
|
+
- Binds to `127.0.0.1` only (no LAN exposure)
|
|
89
|
+
- CSP headers on all responses
|
|
90
|
+
- CORS protection on POST endpoints
|
|
91
|
+
- No external font/script loading
|
|
@@ -0,0 +1,181 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Server context for web UI.
|
|
3
|
+
* Manages LLM ports and vector index for hybrid search and AI answers.
|
|
4
|
+
*
|
|
5
|
+
* @module src/serve/context
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
import type { Config } from '../config/types';
|
|
9
|
+
import { LlmAdapter } from '../llm/nodeLlamaCpp/adapter';
|
|
10
|
+
import { getActivePreset } from '../llm/registry';
|
|
11
|
+
import type {
|
|
12
|
+
DownloadProgress,
|
|
13
|
+
EmbeddingPort,
|
|
14
|
+
GenerationPort,
|
|
15
|
+
ModelType,
|
|
16
|
+
RerankPort,
|
|
17
|
+
} from '../llm/types';
|
|
18
|
+
import type { SqliteAdapter } from '../store/sqlite/adapter';
|
|
19
|
+
import { createVectorIndexPort, type VectorIndexPort } from '../store/vector';
|
|
20
|
+
|
|
21
|
+
// ─────────────────────────────────────────────────────────────────────────────
|
|
22
|
+
// Download State (in-memory, single user)
|
|
23
|
+
// ─────────────────────────────────────────────────────────────────────────────
|
|
24
|
+
|
|
25
|
+
export interface DownloadState {
|
|
26
|
+
active: boolean;
|
|
27
|
+
currentType: ModelType | null;
|
|
28
|
+
progress: DownloadProgress | null;
|
|
29
|
+
completed: ModelType[];
|
|
30
|
+
failed: Array<{ type: ModelType; error: string }>;
|
|
31
|
+
startedAt: number | null;
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
/** Global download state for polling */
|
|
35
|
+
export const downloadState: DownloadState = {
|
|
36
|
+
active: false,
|
|
37
|
+
currentType: null,
|
|
38
|
+
progress: null,
|
|
39
|
+
completed: [],
|
|
40
|
+
failed: [],
|
|
41
|
+
startedAt: null,
|
|
42
|
+
};
|
|
43
|
+
|
|
44
|
+
/** Reset download state */
|
|
45
|
+
export function resetDownloadState(): void {
|
|
46
|
+
downloadState.active = false;
|
|
47
|
+
downloadState.currentType = null;
|
|
48
|
+
downloadState.progress = null;
|
|
49
|
+
downloadState.completed = [];
|
|
50
|
+
downloadState.failed = [];
|
|
51
|
+
downloadState.startedAt = null;
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
// ─────────────────────────────────────────────────────────────────────────────
|
|
55
|
+
// Server Context
|
|
56
|
+
// ─────────────────────────────────────────────────────────────────────────────
|
|
57
|
+
|
|
58
|
+
export interface ServerContext {
|
|
59
|
+
store: SqliteAdapter;
|
|
60
|
+
config: Config;
|
|
61
|
+
vectorIndex: VectorIndexPort | null;
|
|
62
|
+
embedPort: EmbeddingPort | null;
|
|
63
|
+
genPort: GenerationPort | null;
|
|
64
|
+
rerankPort: RerankPort | null;
|
|
65
|
+
capabilities: {
|
|
66
|
+
bm25: boolean;
|
|
67
|
+
vector: boolean;
|
|
68
|
+
hybrid: boolean;
|
|
69
|
+
answer: boolean;
|
|
70
|
+
};
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
/**
|
|
74
|
+
* Initialize server context with LLM ports.
|
|
75
|
+
* Attempts to load models; missing models are logged but don't fail.
|
|
76
|
+
*/
|
|
77
|
+
export async function createServerContext(
|
|
78
|
+
store: SqliteAdapter,
|
|
79
|
+
config: Config
|
|
80
|
+
): Promise<ServerContext> {
|
|
81
|
+
let embedPort: EmbeddingPort | null = null;
|
|
82
|
+
let genPort: GenerationPort | null = null;
|
|
83
|
+
let rerankPort: RerankPort | null = null;
|
|
84
|
+
let vectorIndex: VectorIndexPort | null = null;
|
|
85
|
+
|
|
86
|
+
try {
|
|
87
|
+
const preset = getActivePreset(config);
|
|
88
|
+
const llm = new LlmAdapter(config);
|
|
89
|
+
|
|
90
|
+
// Try to create embedding port
|
|
91
|
+
const embedResult = await llm.createEmbeddingPort(preset.embed);
|
|
92
|
+
if (embedResult.ok) {
|
|
93
|
+
embedPort = embedResult.value;
|
|
94
|
+
const initResult = await embedPort.init();
|
|
95
|
+
if (initResult.ok) {
|
|
96
|
+
// Create vector index
|
|
97
|
+
const dimensions = embedPort.dimensions();
|
|
98
|
+
const db = store.getRawDb();
|
|
99
|
+
const vectorResult = await createVectorIndexPort(db, {
|
|
100
|
+
model: preset.embed,
|
|
101
|
+
dimensions,
|
|
102
|
+
});
|
|
103
|
+
if (vectorResult.ok) {
|
|
104
|
+
vectorIndex = vectorResult.value;
|
|
105
|
+
console.log('Vector search enabled');
|
|
106
|
+
}
|
|
107
|
+
}
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
// Try to create generation port
|
|
111
|
+
const genResult = await llm.createGenerationPort(preset.gen);
|
|
112
|
+
if (genResult.ok) {
|
|
113
|
+
genPort = genResult.value;
|
|
114
|
+
console.log('AI answer generation enabled');
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
// Try to create rerank port
|
|
118
|
+
const rerankResult = await llm.createRerankPort(preset.rerank);
|
|
119
|
+
if (rerankResult.ok) {
|
|
120
|
+
rerankPort = rerankResult.value;
|
|
121
|
+
console.log('Reranking enabled');
|
|
122
|
+
}
|
|
123
|
+
} catch (e) {
|
|
124
|
+
// Log but don't fail - models are optional
|
|
125
|
+
console.log(
|
|
126
|
+
'LLM initialization skipped:',
|
|
127
|
+
e instanceof Error ? e.message : String(e)
|
|
128
|
+
);
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
const capabilities = {
|
|
132
|
+
bm25: true, // Always available
|
|
133
|
+
vector: vectorIndex?.searchAvailable ?? false,
|
|
134
|
+
hybrid: (vectorIndex?.searchAvailable ?? false) && embedPort !== null,
|
|
135
|
+
answer: genPort !== null,
|
|
136
|
+
};
|
|
137
|
+
|
|
138
|
+
return {
|
|
139
|
+
store,
|
|
140
|
+
config,
|
|
141
|
+
vectorIndex,
|
|
142
|
+
embedPort,
|
|
143
|
+
genPort,
|
|
144
|
+
rerankPort,
|
|
145
|
+
capabilities,
|
|
146
|
+
};
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
/**
|
|
150
|
+
* Dispose server context resources.
|
|
151
|
+
* Each port is disposed independently to prevent one failure from blocking others.
|
|
152
|
+
*/
|
|
153
|
+
export async function disposeServerContext(ctx: ServerContext): Promise<void> {
|
|
154
|
+
const ports = [
|
|
155
|
+
{ name: 'embed', port: ctx.embedPort },
|
|
156
|
+
{ name: 'gen', port: ctx.genPort },
|
|
157
|
+
{ name: 'rerank', port: ctx.rerankPort },
|
|
158
|
+
];
|
|
159
|
+
|
|
160
|
+
for (const { name, port } of ports) {
|
|
161
|
+
if (port) {
|
|
162
|
+
try {
|
|
163
|
+
await port.dispose();
|
|
164
|
+
} catch (e) {
|
|
165
|
+
console.error(`Failed to dispose ${name} port:`, e);
|
|
166
|
+
}
|
|
167
|
+
}
|
|
168
|
+
}
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
/**
|
|
172
|
+
* Reload server context with potentially new config.
|
|
173
|
+
* Disposes existing ports and recreates them.
|
|
174
|
+
*/
|
|
175
|
+
export async function reloadServerContext(
|
|
176
|
+
ctx: ServerContext,
|
|
177
|
+
newConfig?: Config
|
|
178
|
+
): Promise<ServerContext> {
|
|
179
|
+
await disposeServerContext(ctx);
|
|
180
|
+
return createServerContext(ctx.store, newConfig ?? ctx.config);
|
|
181
|
+
}
|