@gmickel/gno 0.3.5 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. package/README.md +74 -7
  2. package/package.json +30 -1
  3. package/src/cli/commands/ask.ts +12 -187
  4. package/src/cli/commands/embed.ts +10 -4
  5. package/src/cli/commands/models/pull.ts +9 -4
  6. package/src/cli/commands/serve.ts +19 -0
  7. package/src/cli/commands/vsearch.ts +5 -2
  8. package/src/cli/program.ts +28 -0
  9. package/src/config/types.ts +11 -6
  10. package/src/llm/registry.ts +3 -1
  11. package/src/mcp/tools/vsearch.ts +5 -2
  12. package/src/pipeline/answer.ts +224 -0
  13. package/src/pipeline/contextual.ts +57 -0
  14. package/src/pipeline/expansion.ts +49 -31
  15. package/src/pipeline/explain.ts +11 -3
  16. package/src/pipeline/fusion.ts +20 -9
  17. package/src/pipeline/hybrid.ts +57 -40
  18. package/src/pipeline/index.ts +7 -0
  19. package/src/pipeline/rerank.ts +55 -27
  20. package/src/pipeline/types.ts +0 -3
  21. package/src/pipeline/vsearch.ts +3 -2
  22. package/src/serve/CLAUDE.md +91 -0
  23. package/src/serve/bunfig.toml +2 -0
  24. package/src/serve/context.ts +181 -0
  25. package/src/serve/index.ts +7 -0
  26. package/src/serve/public/app.tsx +56 -0
  27. package/src/serve/public/components/ai-elements/code-block.tsx +176 -0
  28. package/src/serve/public/components/ai-elements/conversation.tsx +98 -0
  29. package/src/serve/public/components/ai-elements/inline-citation.tsx +285 -0
  30. package/src/serve/public/components/ai-elements/loader.tsx +96 -0
  31. package/src/serve/public/components/ai-elements/message.tsx +443 -0
  32. package/src/serve/public/components/ai-elements/prompt-input.tsx +1421 -0
  33. package/src/serve/public/components/ai-elements/sources.tsx +75 -0
  34. package/src/serve/public/components/ai-elements/suggestion.tsx +51 -0
  35. package/src/serve/public/components/preset-selector.tsx +403 -0
  36. package/src/serve/public/components/ui/badge.tsx +46 -0
  37. package/src/serve/public/components/ui/button-group.tsx +82 -0
  38. package/src/serve/public/components/ui/button.tsx +62 -0
  39. package/src/serve/public/components/ui/card.tsx +92 -0
  40. package/src/serve/public/components/ui/carousel.tsx +244 -0
  41. package/src/serve/public/components/ui/collapsible.tsx +31 -0
  42. package/src/serve/public/components/ui/command.tsx +181 -0
  43. package/src/serve/public/components/ui/dialog.tsx +141 -0
  44. package/src/serve/public/components/ui/dropdown-menu.tsx +255 -0
  45. package/src/serve/public/components/ui/hover-card.tsx +42 -0
  46. package/src/serve/public/components/ui/input-group.tsx +167 -0
  47. package/src/serve/public/components/ui/input.tsx +21 -0
  48. package/src/serve/public/components/ui/progress.tsx +28 -0
  49. package/src/serve/public/components/ui/scroll-area.tsx +56 -0
  50. package/src/serve/public/components/ui/select.tsx +188 -0
  51. package/src/serve/public/components/ui/separator.tsx +26 -0
  52. package/src/serve/public/components/ui/table.tsx +114 -0
  53. package/src/serve/public/components/ui/textarea.tsx +18 -0
  54. package/src/serve/public/components/ui/tooltip.tsx +59 -0
  55. package/src/serve/public/globals.css +226 -0
  56. package/src/serve/public/hooks/use-api.ts +112 -0
  57. package/src/serve/public/index.html +13 -0
  58. package/src/serve/public/pages/Ask.tsx +442 -0
  59. package/src/serve/public/pages/Browse.tsx +270 -0
  60. package/src/serve/public/pages/Dashboard.tsx +202 -0
  61. package/src/serve/public/pages/DocView.tsx +302 -0
  62. package/src/serve/public/pages/Search.tsx +335 -0
  63. package/src/serve/routes/api.ts +763 -0
  64. package/src/serve/server.ts +249 -0
  65. package/src/store/migrations/002-documents-fts.ts +40 -0
  66. package/src/store/migrations/index.ts +2 -1
  67. package/src/store/sqlite/adapter.ts +216 -33
  68. package/src/store/sqlite/fts5-snowball.ts +144 -0
  69. package/src/store/types.ts +33 -3
  70. package/src/store/vector/stats.ts +3 -0
  71. package/src/store/vector/types.ts +1 -0
@@ -11,9 +11,11 @@ import type { StorePort } from '../store/types';
11
11
  import { err, ok } from '../store/types';
12
12
  import type { VectorIndexPort } from '../store/vector/types';
13
13
  import { createChunkLookup } from './chunk-lookup';
14
+ import { formatQueryForEmbedding } from './contextual';
14
15
  import { expandQuery } from './expansion';
15
16
  import {
16
17
  buildExplainResults,
18
+ type ExpansionStatus,
17
19
  explainBm25,
18
20
  explainExpansion,
19
21
  explainFusion,
@@ -51,56 +53,64 @@ export interface HybridSearchDeps {
51
53
  // Score Normalization
52
54
  // ─────────────────────────────────────────────────────────────────────────────
53
55
 
54
- function _normalizeVectorScore(distance: number): number {
55
- return Math.max(0, Math.min(1, 1 - distance / 2));
56
+ // Removed: _normalizeVectorScore was dead code (vector distances normalized in vector index)
57
+
58
+ // ─────────────────────────────────────────────────────────────────────────────
59
+ // BM25 Score Normalization
60
+ // ─────────────────────────────────────────────────────────────────────────────
61
+
62
+ /**
63
+ * Normalize raw BM25 score to 0-1 range using sigmoid.
64
+ * BM25 scores are negative in SQLite FTS5 (more negative = better match).
65
+ * Typical range: -15 (excellent) to -2 (weak match).
66
+ * Maps to 0-1 where higher is better.
67
+ */
68
+ function normalizeBm25Score(rawScore: number): number {
69
+ const absScore = Math.abs(rawScore);
70
+ // Sigmoid with center=4.5, scale=2.8
71
+ // Maps: -15 → ~0.99, -5 → ~0.55, -2 → ~0.29
72
+ return 1 / (1 + Math.exp(-(absScore - 4.5) / 2.8));
56
73
  }
57
74
 
58
75
  // ─────────────────────────────────────────────────────────────────────────────
59
76
  // BM25 Strength Check
60
77
  // ─────────────────────────────────────────────────────────────────────────────
61
78
 
79
+ // Thresholds for strong signal detection (conservative - prefer expansion over speed)
80
+ const STRONG_TOP_SCORE = 0.84; // ~84th percentile confidence
81
+ const STRONG_GAP = 0.14; // Clear separation from #2
82
+
62
83
  /**
63
84
  * Check if BM25 results are strong enough to skip expansion.
64
- * Uses gap-based metric: how much better is #1 than #2?
65
- * Returns 0-1 where 1 = #1 is clearly dominant, 0 = results are similar.
66
- * Raw BM25: smaller (more negative) is better.
85
+ * Returns true if top result is both confident AND clearly separated.
86
+ * This prevents skipping on weak-but-separated results.
67
87
  */
68
88
  async function checkBm25Strength(
69
89
  store: StorePort,
70
90
  query: string,
71
91
  options?: { collection?: string; lang?: string }
72
- ): Promise<number> {
92
+ ): Promise<boolean> {
73
93
  const result = await store.searchFts(query, {
74
94
  limit: 5,
75
95
  collection: options?.collection,
76
96
  language: options?.lang,
77
97
  });
98
+
78
99
  if (!result.ok || result.value.length === 0) {
79
- return 0;
100
+ return false;
80
101
  }
81
102
 
82
- // Only one result = strong signal
83
- if (result.value.length === 1) {
84
- return 1;
85
- }
103
+ // Normalize scores (higher = better)
104
+ const scores = result.value
105
+ .map((r) => normalizeBm25Score(r.score))
106
+ .sort((a, b) => b - a); // Descending
86
107
 
87
- // Get top 2 scores (smaller is better)
88
- const scores = result.value.map((r) => r.score).sort((a, b) => a - b);
89
- const best = scores[0] ?? 0;
90
- const second = scores[1] ?? best;
91
- const worst = scores.at(-1) ?? best;
92
-
93
- // Compute gap-based strength
94
- // If best and second are equal, gap = 0
95
- // If second is much worse (larger), gap approaches 1
96
- const range = worst - best;
97
- if (range === 0) {
98
- return 0; // All scores equal, no clear winner
99
- }
108
+ const topScore = scores[0] ?? 0;
109
+ const secondScore = scores[1] ?? 0;
110
+ const gap = topScore - secondScore;
100
111
 
101
- // Gap = how much worse is #2 relative to the range (clamped for safety)
102
- const gap = (second - best) / range;
103
- return Math.max(0, Math.min(1, gap));
112
+ // Strong signal requires BOTH: high confidence AND clear separation
113
+ return topScore >= STRONG_TOP_SCORE && gap >= STRONG_GAP;
104
114
  }
105
115
 
106
116
  // ─────────────────────────────────────────────────────────────────────────────
@@ -155,7 +165,8 @@ async function searchVectorChunks(
155
165
  return [];
156
166
  }
157
167
 
158
- const embedResult = await embedPort.embed(query);
168
+ // Embed query with contextual formatting
169
+ const embedResult = await embedPort.embed(formatQueryForEmbedding(query));
159
170
  if (!embedResult.ok) {
160
171
  return [];
161
172
  }
@@ -225,17 +236,18 @@ export async function searchHybrid(
225
236
  // 1. Check if expansion needed
226
237
  // ─────────────────────────────────────────────────────────────────────────
227
238
  const shouldExpand = !options.noExpand && genPort !== null;
228
- let skipExpansionDueToStrength = false;
239
+ let expansionStatus: ExpansionStatus = 'disabled';
229
240
 
230
241
  if (shouldExpand) {
231
- const bm25Strength = await checkBm25Strength(store, query, {
242
+ const hasStrongSignal = await checkBm25Strength(store, query, {
232
243
  collection: options.collection,
233
244
  lang: options.lang,
234
245
  });
235
- skipExpansionDueToStrength =
236
- bm25Strength >= pipelineConfig.strongBm25Threshold;
237
246
 
238
- if (!skipExpansionDueToStrength) {
247
+ if (hasStrongSignal) {
248
+ expansionStatus = 'skipped_strong';
249
+ } else {
250
+ expansionStatus = 'attempted';
239
251
  const expandResult = await expandQuery(genPort, query, {
240
252
  // Use queryLanguage for prompt selection, NOT options.lang (retrieval filter)
241
253
  lang: queryLanguage,
@@ -247,9 +259,7 @@ export async function searchHybrid(
247
259
  }
248
260
  }
249
261
 
250
- explainLines.push(
251
- explainExpansion(shouldExpand && !skipExpansionDueToStrength, expansion)
252
- );
262
+ explainLines.push(explainExpansion(expansionStatus, expansion));
253
263
 
254
264
  // ─────────────────────────────────────────────────────────────────────────
255
265
  // 2. Parallel retrieval using raw store/vector APIs for correct seq tracking
@@ -293,7 +303,8 @@ export async function searchHybrid(
293
303
 
294
304
  // Vector search
295
305
  let vecCount = 0;
296
- const vectorAvailable = vectorIndex?.searchAvailable && embedPort !== null;
306
+ const vectorAvailable =
307
+ (vectorIndex?.searchAvailable && embedPort !== null) ?? false;
297
308
 
298
309
  if (vectorAvailable && vectorIndex && embedPort) {
299
310
  // Original query
@@ -335,7 +346,7 @@ export async function searchHybrid(
335
346
  }
336
347
  }
337
348
 
338
- explainLines.push(explainVector(vecCount, vectorAvailable ?? false));
349
+ explainLines.push(explainVector(vecCount, vectorAvailable));
339
350
 
340
351
  // ─────────────────────────────────────────────────────────────────────────
341
352
  // 3. RRF Fusion
@@ -441,7 +452,13 @@ export async function searchHybrid(
441
452
  }
442
453
 
443
454
  // Get chunk via O(1) lookup
444
- const chunk = getChunk(candidate.mirrorHash, candidate.seq);
455
+ // For doc-level FTS (seq=0), fall back to first available chunk if exact lookup fails
456
+ let chunk = getChunk(candidate.mirrorHash, candidate.seq);
457
+ if (!chunk && candidate.seq === 0) {
458
+ // Doc-level FTS uses seq=0 as placeholder - try first chunk
459
+ const docChunks = chunksMap.get(candidate.mirrorHash);
460
+ chunk = docChunks?.[0];
461
+ }
445
462
  if (!chunk) {
446
463
  continue;
447
464
  }
@@ -524,7 +541,7 @@ export async function searchHybrid(
524
541
  mode: vectorAvailable ? 'hybrid' : 'bm25_only',
525
542
  expanded: expansion !== null,
526
543
  reranked: rerankResult.reranked,
527
- vectorsUsed: vectorAvailable ?? false,
544
+ vectorsUsed: vectorAvailable,
528
545
  totalResults: results.length,
529
546
  collection: options.collection,
530
547
  lang: options.lang,
@@ -4,11 +4,18 @@
4
4
  * @module src/pipeline
5
5
  */
6
6
 
7
+ // Contextual embedding
8
+ export {
9
+ extractTitle,
10
+ formatDocForEmbedding,
11
+ formatQueryForEmbedding,
12
+ } from './contextual';
7
13
  // Expansion
8
14
  export { expandQuery, generateCacheKey } from './expansion';
9
15
  // Explain
10
16
  export {
11
17
  buildExplainResults,
18
+ type ExpansionStatus,
12
19
  explainBm25,
13
20
  explainExpansion,
14
21
  explainFusion,
@@ -7,7 +7,6 @@
7
7
 
8
8
  import type { RerankPort } from '../llm/types';
9
9
  import type { StorePort } from '../store/types';
10
- import { createChunkLookup } from './chunk-lookup';
11
10
  import type { BlendingTier, FusionCandidate, RerankedCandidate } from './types';
12
11
  import { DEFAULT_BLENDING_SCHEDULE } from './types';
13
12
 
@@ -121,32 +120,44 @@ export async function rerankCandidates(
121
120
  const toRerank = candidates.slice(0, maxCandidates);
122
121
  const remaining = candidates.slice(maxCandidates);
123
122
 
124
- // Pre-fetch all chunks in one batch query (eliminates N+1)
123
+ // Dedupe by document - multiple chunks from same doc use single full-doc rerank
125
124
  const uniqueHashes = [...new Set(toRerank.map((c) => c.mirrorHash))];
126
- const chunksMapResult = await store.getChunksBatch(uniqueHashes);
127
125
 
128
- // If chunk fetch fails, degrade gracefully (fusion-only)
129
- // Don't rerank on empty/missing texts - produces non-deterministic results
130
- if (!chunksMapResult.ok) {
131
- return {
132
- candidates: candidates.map((c) => ({
133
- ...c,
134
- rerankScore: null,
135
- blendedScore: normalizeFusionScore(c.fusionScore),
136
- })),
137
- reranked: false,
138
- };
126
+ // Fetch full document content for each unique document (parallel)
127
+ // Max 128K chars per doc to fit in reranker context
128
+ const MAX_DOC_CHARS = 128_000;
129
+ const contentResults = await Promise.all(
130
+ uniqueHashes.map((hash) => store.getContent(hash))
131
+ );
132
+ const docContents = new Map<string, string>();
133
+ for (let i = 0; i < uniqueHashes.length; i++) {
134
+ const hash = uniqueHashes[i] as string;
135
+ const result = contentResults[i] as Awaited<
136
+ ReturnType<typeof store.getContent>
137
+ >;
138
+ if (result.ok && result.value) {
139
+ const content = result.value;
140
+ docContents.set(
141
+ hash,
142
+ content.length > MAX_DOC_CHARS
143
+ ? `${content.slice(0, MAX_DOC_CHARS)}...`
144
+ : content
145
+ );
146
+ } else {
147
+ // Fallback to empty string if content not available
148
+ docContents.set(hash, '');
149
+ }
139
150
  }
140
- const chunksMap = chunksMapResult.value;
141
- const getChunk = createChunkLookup(chunksMap);
142
151
 
143
- // Build texts array for reranking (O(1) lookup per candidate)
144
- const texts: string[] = toRerank.map((c) => {
145
- const chunk = getChunk(c.mirrorHash, c.seq);
146
- return chunk?.text ?? '';
147
- });
152
+ // Build texts array for reranking (one per unique document)
153
+ const hashToIndex = new Map<string, number>();
154
+ const texts: string[] = [];
155
+ for (const hash of uniqueHashes) {
156
+ hashToIndex.set(hash, texts.length);
157
+ texts.push(docContents.get(hash) ?? '');
158
+ }
148
159
 
149
- // Run reranking
160
+ // Run reranking on full documents
150
161
  const rerankResult = await rerankPort.rerank(query, texts);
151
162
 
152
163
  if (!rerankResult.ok) {
@@ -163,16 +174,33 @@ export async function rerankCandidates(
163
174
 
164
175
  // Map rerank scores to candidates
165
176
  // Note: We use normalizeFusionScore defined above (across ALL candidates)
166
- // Build index->score map for O(1) lookup instead of O(n) find per candidate
167
- const scoreByIndex = new Map(
177
+ // Build doc index->score map for O(1) lookup
178
+ // All chunks from same document share the same rerank score
179
+ const scoreByDocIndex = new Map(
168
180
  rerankResult.value.map((s) => [s.index, s.score])
169
181
  );
182
+
183
+ // Normalize rerank scores using min-max (models return varying scales)
184
+ const rerankScores = rerankResult.value.map((s) => s.score);
185
+ const minRerank = Math.min(...rerankScores);
186
+ const maxRerank = Math.max(...rerankScores);
187
+ const rerankRange = maxRerank - minRerank;
188
+
189
+ function normalizeRerankScore(score: number): number {
190
+ if (rerankRange < 1e-9) {
191
+ return 1; // All tied for best
192
+ }
193
+ return (score - minRerank) / rerankRange;
194
+ }
195
+
170
196
  const rerankedCandidates: RerankedCandidate[] = toRerank.map((c, i) => {
171
- const rerankScore = scoreByIndex.get(i) ?? null;
197
+ // Get document-level rerank score (shared by all chunks from same doc)
198
+ const docIndex = hashToIndex.get(c.mirrorHash) ?? -1;
199
+ const rerankScore = scoreByDocIndex.get(docIndex) ?? null;
172
200
 
173
- // Normalize rerank score to 0-1 range (models may return different scales)
201
+ // Normalize rerank score to 0-1 range using min-max
174
202
  const normalizedRerankScore =
175
- rerankScore !== null ? Math.max(0, Math.min(1, rerankScore)) : null;
203
+ rerankScore !== null ? normalizeRerankScore(rerankScore) : null;
176
204
 
177
205
  // Calculate blended score using normalized fusion score
178
206
  const position = i + 1;
@@ -213,8 +213,6 @@ export type RerankedCandidate = FusionCandidate & {
213
213
 
214
214
  /** Search pipeline configuration */
215
215
  export interface PipelineConfig {
216
- /** Strong BM25 threshold to skip expansion */
217
- strongBm25Threshold: number;
218
216
  /** Expansion timeout in ms */
219
217
  expansionTimeout: number;
220
218
  /** Max candidates to rerank */
@@ -227,7 +225,6 @@ export interface PipelineConfig {
227
225
 
228
226
  /** Default pipeline configuration */
229
227
  export const DEFAULT_PIPELINE_CONFIG: PipelineConfig = {
230
- strongBm25Threshold: 0.7,
231
228
  expansionTimeout: 5000,
232
229
  rerankCandidates: 20,
233
230
  rrf: DEFAULT_RRF_CONFIG,
@@ -11,6 +11,7 @@ import type { StorePort } from '../store/types';
11
11
  import { err, ok } from '../store/types';
12
12
  import type { VectorIndexPort } from '../store/vector/types';
13
13
  import { createChunkLookup } from './chunk-lookup';
14
+ import { formatQueryForEmbedding } from './contextual';
14
15
  import { detectQueryLanguage } from './query-language';
15
16
  import type { SearchOptions, SearchResult, SearchResults } from './types';
16
17
 
@@ -268,8 +269,8 @@ export async function searchVector(
268
269
  );
269
270
  }
270
271
 
271
- // Embed query
272
- const embedResult = await embedPort.embed(query);
272
+ // Embed query with contextual formatting
273
+ const embedResult = await embedPort.embed(formatQueryForEmbedding(query));
273
274
  if (!embedResult.ok) {
274
275
  return err(
275
276
  'QUERY_FAILED',
@@ -0,0 +1,91 @@
1
+ # Web UI (gno serve)
2
+
3
+ Local web server for GNO search and document browsing.
4
+
5
+ ## Architecture
6
+
7
+ Uses same **"Ports without DI"** pattern as CLI/MCP (see root CLAUDE.md):
8
+ - Adapters instantiated directly in `context.ts`
9
+ - Pipeline code receives port interfaces
10
+ - No dependency injection
11
+
12
+ ```
13
+ src/serve/
14
+ ├── server.ts # Bun.serve() entry point
15
+ ├── context.ts # ServerContext with LLM ports
16
+ ├── routes/
17
+ │ └── api.ts # REST API handlers
18
+ └── public/ # React frontend (Bun HTML imports)
19
+ ├── App.tsx # Router
20
+ ├── pages/ # Page components
21
+ ├── components/ # UI components (ShadCN + AI Elements)
22
+ └── hooks/ # Custom hooks (useApi, etc.)
23
+ ```
24
+
25
+ ## Key Patterns
26
+
27
+ ### Ports (interfaces)
28
+ - `EmbeddingPort` - vector embeddings
29
+ - `GenerationPort` - LLM text generation
30
+ - `RerankPort` - cross-encoder reranking
31
+ - `VectorIndexPort` - vector search
32
+
33
+ ### ServerContext
34
+ Created at startup, holds all LLM ports and capabilities:
35
+ ```typescript
36
+ interface ServerContext {
37
+ store: SqliteAdapter;
38
+ config: Config;
39
+ vectorIndex: VectorIndexPort | null;
40
+ embedPort: EmbeddingPort | null;
41
+ genPort: GenerationPort | null;
42
+ rerankPort: RerankPort | null;
43
+ capabilities: { bm25, vector, hybrid, answer };
44
+ }
45
+ ```
46
+
47
+ ### Shared Pipeline Code
48
+ Answer generation uses shared module to stay in sync with CLI:
49
+ - `src/pipeline/answer.ts` - generateGroundedAnswer, processAnswerResult
50
+
51
+ ## API Endpoints
52
+
53
+ | Endpoint | Method | Description |
54
+ |----------|--------|-------------|
55
+ | `/api/health` | GET | Health check |
56
+ | `/api/status` | GET | Index stats, collections |
57
+ | `/api/capabilities` | GET | Available features |
58
+ | `/api/collections` | GET | List collections |
59
+ | `/api/docs` | GET | List documents |
60
+ | `/api/doc` | GET | Get document content |
61
+ | `/api/search` | POST | BM25 search |
62
+ | `/api/query` | POST | Hybrid search |
63
+ | `/api/ask` | POST | AI answer with citations |
64
+ | `/api/presets` | GET | List model presets |
65
+ | `/api/presets` | POST | Switch preset (hot-reload) |
66
+ | `/api/models/status` | GET | Download progress |
67
+ | `/api/models/pull` | POST | Start model download |
68
+
69
+ ## Frontend
70
+
71
+ - **Framework**: React (via Bun HTML imports)
72
+ - **Styling**: Tailwind CSS + ShadCN components
73
+ - **AI Elements**: Conversation, Message, Sources, CodeBlock, Loader
74
+ - **Routing**: Simple hash-free SPA routing in App.tsx
75
+
76
+ ## Development
77
+
78
+ ```bash
79
+ # Start dev server with HMR
80
+ bun run src/serve/index.ts
81
+
82
+ # Or via CLI
83
+ gno serve --port 3000
84
+ ```
85
+
86
+ ## Security
87
+
88
+ - Binds to `127.0.0.1` only (no LAN exposure)
89
+ - CSP headers on all responses
90
+ - CORS protection on POST endpoints
91
+ - No external font/script loading
@@ -0,0 +1,2 @@
1
+ [serve.static]
2
+ plugins = ["bun-plugin-tailwind"]
@@ -0,0 +1,181 @@
1
+ /**
2
+ * Server context for web UI.
3
+ * Manages LLM ports and vector index for hybrid search and AI answers.
4
+ *
5
+ * @module src/serve/context
6
+ */
7
+
8
+ import type { Config } from '../config/types';
9
+ import { LlmAdapter } from '../llm/nodeLlamaCpp/adapter';
10
+ import { getActivePreset } from '../llm/registry';
11
+ import type {
12
+ DownloadProgress,
13
+ EmbeddingPort,
14
+ GenerationPort,
15
+ ModelType,
16
+ RerankPort,
17
+ } from '../llm/types';
18
+ import type { SqliteAdapter } from '../store/sqlite/adapter';
19
+ import { createVectorIndexPort, type VectorIndexPort } from '../store/vector';
20
+
21
+ // ─────────────────────────────────────────────────────────────────────────────
22
+ // Download State (in-memory, single user)
23
+ // ─────────────────────────────────────────────────────────────────────────────
24
+
25
+ export interface DownloadState {
26
+ active: boolean;
27
+ currentType: ModelType | null;
28
+ progress: DownloadProgress | null;
29
+ completed: ModelType[];
30
+ failed: Array<{ type: ModelType; error: string }>;
31
+ startedAt: number | null;
32
+ }
33
+
34
+ /** Global download state for polling */
35
+ export const downloadState: DownloadState = {
36
+ active: false,
37
+ currentType: null,
38
+ progress: null,
39
+ completed: [],
40
+ failed: [],
41
+ startedAt: null,
42
+ };
43
+
44
+ /** Reset download state */
45
+ export function resetDownloadState(): void {
46
+ downloadState.active = false;
47
+ downloadState.currentType = null;
48
+ downloadState.progress = null;
49
+ downloadState.completed = [];
50
+ downloadState.failed = [];
51
+ downloadState.startedAt = null;
52
+ }
53
+
54
+ // ─────────────────────────────────────────────────────────────────────────────
55
+ // Server Context
56
+ // ─────────────────────────────────────────────────────────────────────────────
57
+
58
+ export interface ServerContext {
59
+ store: SqliteAdapter;
60
+ config: Config;
61
+ vectorIndex: VectorIndexPort | null;
62
+ embedPort: EmbeddingPort | null;
63
+ genPort: GenerationPort | null;
64
+ rerankPort: RerankPort | null;
65
+ capabilities: {
66
+ bm25: boolean;
67
+ vector: boolean;
68
+ hybrid: boolean;
69
+ answer: boolean;
70
+ };
71
+ }
72
+
73
+ /**
74
+ * Initialize server context with LLM ports.
75
+ * Attempts to load models; missing models are logged but don't fail.
76
+ */
77
+ export async function createServerContext(
78
+ store: SqliteAdapter,
79
+ config: Config
80
+ ): Promise<ServerContext> {
81
+ let embedPort: EmbeddingPort | null = null;
82
+ let genPort: GenerationPort | null = null;
83
+ let rerankPort: RerankPort | null = null;
84
+ let vectorIndex: VectorIndexPort | null = null;
85
+
86
+ try {
87
+ const preset = getActivePreset(config);
88
+ const llm = new LlmAdapter(config);
89
+
90
+ // Try to create embedding port
91
+ const embedResult = await llm.createEmbeddingPort(preset.embed);
92
+ if (embedResult.ok) {
93
+ embedPort = embedResult.value;
94
+ const initResult = await embedPort.init();
95
+ if (initResult.ok) {
96
+ // Create vector index
97
+ const dimensions = embedPort.dimensions();
98
+ const db = store.getRawDb();
99
+ const vectorResult = await createVectorIndexPort(db, {
100
+ model: preset.embed,
101
+ dimensions,
102
+ });
103
+ if (vectorResult.ok) {
104
+ vectorIndex = vectorResult.value;
105
+ console.log('Vector search enabled');
106
+ }
107
+ }
108
+ }
109
+
110
+ // Try to create generation port
111
+ const genResult = await llm.createGenerationPort(preset.gen);
112
+ if (genResult.ok) {
113
+ genPort = genResult.value;
114
+ console.log('AI answer generation enabled');
115
+ }
116
+
117
+ // Try to create rerank port
118
+ const rerankResult = await llm.createRerankPort(preset.rerank);
119
+ if (rerankResult.ok) {
120
+ rerankPort = rerankResult.value;
121
+ console.log('Reranking enabled');
122
+ }
123
+ } catch (e) {
124
+ // Log but don't fail - models are optional
125
+ console.log(
126
+ 'LLM initialization skipped:',
127
+ e instanceof Error ? e.message : String(e)
128
+ );
129
+ }
130
+
131
+ const capabilities = {
132
+ bm25: true, // Always available
133
+ vector: vectorIndex?.searchAvailable ?? false,
134
+ hybrid: (vectorIndex?.searchAvailable ?? false) && embedPort !== null,
135
+ answer: genPort !== null,
136
+ };
137
+
138
+ return {
139
+ store,
140
+ config,
141
+ vectorIndex,
142
+ embedPort,
143
+ genPort,
144
+ rerankPort,
145
+ capabilities,
146
+ };
147
+ }
148
+
149
+ /**
150
+ * Dispose server context resources.
151
+ * Each port is disposed independently to prevent one failure from blocking others.
152
+ */
153
+ export async function disposeServerContext(ctx: ServerContext): Promise<void> {
154
+ const ports = [
155
+ { name: 'embed', port: ctx.embedPort },
156
+ { name: 'gen', port: ctx.genPort },
157
+ { name: 'rerank', port: ctx.rerankPort },
158
+ ];
159
+
160
+ for (const { name, port } of ports) {
161
+ if (port) {
162
+ try {
163
+ await port.dispose();
164
+ } catch (e) {
165
+ console.error(`Failed to dispose ${name} port:`, e);
166
+ }
167
+ }
168
+ }
169
+ }
170
+
171
+ /**
172
+ * Reload server context with potentially new config.
173
+ * Disposes existing ports and recreates them.
174
+ */
175
+ export async function reloadServerContext(
176
+ ctx: ServerContext,
177
+ newConfig?: Config
178
+ ): Promise<ServerContext> {
179
+ await disposeServerContext(ctx);
180
+ return createServerContext(ctx.store, newConfig ?? ctx.config);
181
+ }
@@ -0,0 +1,7 @@
1
+ /**
2
+ * Serve module exports.
3
+ *
4
+ * @module src/serve
5
+ */
6
+
7
+ export { type ServeOptions, type ServeResult, startServer } from './server';