@gmickel/gno 0.3.5 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. package/README.md +74 -7
  2. package/package.json +30 -1
  3. package/src/cli/commands/ask.ts +12 -187
  4. package/src/cli/commands/embed.ts +10 -4
  5. package/src/cli/commands/models/pull.ts +9 -4
  6. package/src/cli/commands/serve.ts +19 -0
  7. package/src/cli/commands/vsearch.ts +5 -2
  8. package/src/cli/program.ts +28 -0
  9. package/src/config/types.ts +11 -6
  10. package/src/llm/registry.ts +3 -1
  11. package/src/mcp/tools/vsearch.ts +5 -2
  12. package/src/pipeline/answer.ts +224 -0
  13. package/src/pipeline/contextual.ts +57 -0
  14. package/src/pipeline/expansion.ts +49 -31
  15. package/src/pipeline/explain.ts +11 -3
  16. package/src/pipeline/fusion.ts +20 -9
  17. package/src/pipeline/hybrid.ts +57 -40
  18. package/src/pipeline/index.ts +7 -0
  19. package/src/pipeline/rerank.ts +55 -27
  20. package/src/pipeline/types.ts +0 -3
  21. package/src/pipeline/vsearch.ts +3 -2
  22. package/src/serve/CLAUDE.md +91 -0
  23. package/src/serve/bunfig.toml +2 -0
  24. package/src/serve/context.ts +181 -0
  25. package/src/serve/index.ts +7 -0
  26. package/src/serve/public/app.tsx +56 -0
  27. package/src/serve/public/components/ai-elements/code-block.tsx +176 -0
  28. package/src/serve/public/components/ai-elements/conversation.tsx +98 -0
  29. package/src/serve/public/components/ai-elements/inline-citation.tsx +285 -0
  30. package/src/serve/public/components/ai-elements/loader.tsx +96 -0
  31. package/src/serve/public/components/ai-elements/message.tsx +443 -0
  32. package/src/serve/public/components/ai-elements/prompt-input.tsx +1421 -0
  33. package/src/serve/public/components/ai-elements/sources.tsx +75 -0
  34. package/src/serve/public/components/ai-elements/suggestion.tsx +51 -0
  35. package/src/serve/public/components/preset-selector.tsx +403 -0
  36. package/src/serve/public/components/ui/badge.tsx +46 -0
  37. package/src/serve/public/components/ui/button-group.tsx +82 -0
  38. package/src/serve/public/components/ui/button.tsx +62 -0
  39. package/src/serve/public/components/ui/card.tsx +92 -0
  40. package/src/serve/public/components/ui/carousel.tsx +244 -0
  41. package/src/serve/public/components/ui/collapsible.tsx +31 -0
  42. package/src/serve/public/components/ui/command.tsx +181 -0
  43. package/src/serve/public/components/ui/dialog.tsx +141 -0
  44. package/src/serve/public/components/ui/dropdown-menu.tsx +255 -0
  45. package/src/serve/public/components/ui/hover-card.tsx +42 -0
  46. package/src/serve/public/components/ui/input-group.tsx +167 -0
  47. package/src/serve/public/components/ui/input.tsx +21 -0
  48. package/src/serve/public/components/ui/progress.tsx +28 -0
  49. package/src/serve/public/components/ui/scroll-area.tsx +56 -0
  50. package/src/serve/public/components/ui/select.tsx +188 -0
  51. package/src/serve/public/components/ui/separator.tsx +26 -0
  52. package/src/serve/public/components/ui/table.tsx +114 -0
  53. package/src/serve/public/components/ui/textarea.tsx +18 -0
  54. package/src/serve/public/components/ui/tooltip.tsx +59 -0
  55. package/src/serve/public/globals.css +226 -0
  56. package/src/serve/public/hooks/use-api.ts +112 -0
  57. package/src/serve/public/index.html +13 -0
  58. package/src/serve/public/pages/Ask.tsx +442 -0
  59. package/src/serve/public/pages/Browse.tsx +270 -0
  60. package/src/serve/public/pages/Dashboard.tsx +202 -0
  61. package/src/serve/public/pages/DocView.tsx +302 -0
  62. package/src/serve/public/pages/Search.tsx +335 -0
  63. package/src/serve/routes/api.ts +763 -0
  64. package/src/serve/server.ts +249 -0
  65. package/src/store/migrations/002-documents-fts.ts +40 -0
  66. package/src/store/migrations/index.ts +2 -1
  67. package/src/store/sqlite/adapter.ts +216 -33
  68. package/src/store/sqlite/fts5-snowball.ts +144 -0
  69. package/src/store/types.ts +33 -3
  70. package/src/store/vector/stats.ts +3 -0
  71. package/src/store/vector/types.ts +1 -0
package/README.md CHANGED
@@ -16,6 +16,8 @@ GNO is a local knowledge engine for privacy-conscious developers and AI agents.
16
16
  - [Quick Start](#quick-start)
17
17
  - [Installation](#installation)
18
18
  - [Search Modes](#search-modes)
19
+ - [Web UI](#web-ui)
20
+ - [REST API](#rest-api)
19
21
  - [Agent Integration](#agent-integration)
20
22
  - [How It Works](#how-it-works)
21
23
  - [Features](#features)
@@ -97,11 +99,14 @@ gno skill install --target all # Both Claude + Codex
97
99
 
98
100
  | Command | Mode | Best For |
99
101
  |:--------|:-----|:---------|
100
- | `gno search` | BM25 | Exact phrases, code identifiers |
101
- | `gno vsearch` | Vector | Natural language, concepts |
102
+ | `gno search` | Document-level BM25 | Exact phrases, code identifiers |
103
+ | `gno vsearch` | Contextual Vector | Natural language, concepts |
102
104
  | `gno query` | Hybrid | Best accuracy (BM25 + vector + reranking) |
103
105
  | `gno ask --answer` | RAG | Direct answers with citations |
104
106
 
107
+ **BM25** indexes full documents (not chunks) with Snowball stemming—"running" matches "run".
108
+ **Vector** embeds chunks with document titles for context awareness.
109
+
105
110
  ```bash
106
111
  gno search "handleAuth" # Find exact matches
107
112
  gno vsearch "error handling patterns" # Semantic similarity
@@ -113,6 +118,65 @@ Output formats: `--json`, `--files`, `--csv`, `--md`, `--xml`
113
118
 
114
119
  ---
115
120
 
121
+ ## Web UI
122
+
123
+ Visual dashboard for search, browsing, and AI answers—right in your browser.
124
+
125
+ ```bash
126
+ gno serve # Start on port 3000
127
+ gno serve --port 8080 # Custom port
128
+ ```
129
+
130
+ Open `http://localhost:3000` to:
131
+
132
+ - **Search** — BM25, vector, or hybrid modes with visual results
133
+ - **Browse** — Paginated document list, filter by collection
134
+ - **Ask** — AI-powered Q&A with citations
135
+ - **Switch presets** — Change models live without restart
136
+
137
+ Everything runs locally. No cloud, no accounts, no data leaving your machine.
138
+
139
+ > **Detailed docs**: [Web UI Guide](https://gno.sh/docs/WEB-UI/)
140
+
141
+ ---
142
+
143
+ ## REST API
144
+
145
+ Programmatic access to all GNO features via HTTP.
146
+
147
+ ```bash
148
+ # Hybrid search
149
+ curl -X POST http://localhost:3000/api/query \
150
+ -H "Content-Type: application/json" \
151
+ -d '{"query": "authentication patterns", "limit": 10}'
152
+
153
+ # AI answer
154
+ curl -X POST http://localhost:3000/api/ask \
155
+ -H "Content-Type: application/json" \
156
+ -d '{"query": "What is our deployment process?"}'
157
+
158
+ # Index status
159
+ curl http://localhost:3000/api/status
160
+ ```
161
+
162
+ | Endpoint | Method | Description |
163
+ |:---------|:-------|:------------|
164
+ | `/api/query` | POST | Hybrid search (recommended) |
165
+ | `/api/search` | POST | BM25 keyword search |
166
+ | `/api/ask` | POST | AI-powered Q&A |
167
+ | `/api/docs` | GET | List documents |
168
+ | `/api/doc` | GET | Get document content |
169
+ | `/api/status` | GET | Index statistics |
170
+ | `/api/presets` | GET/POST | Model preset management |
171
+ | `/api/models/pull` | POST | Download models |
172
+ | `/api/models/status` | GET | Download progress |
173
+
174
+ No authentication. No rate limits. Build custom tools, automate workflows, integrate with any language.
175
+
176
+ > **Full reference**: [API Documentation](https://gno.sh/docs/API/)
177
+
178
+ ---
179
+
116
180
  ## Agent Integration
117
181
 
118
182
  ### MCP Server
@@ -169,10 +233,11 @@ graph TD
169
233
  M --> N[Final Results]
170
234
  ```
171
235
 
236
+ 0. **Strong Signal Check** — Skip expansion if BM25 has confident match (saves 1-3s)
172
237
  1. **Query Expansion** — LLM generates lexical variants, semantic rephrases, and a [HyDE](https://arxiv.org/abs/2212.10496) passage
173
- 2. **Parallel Retrieval** — BM25 + vector search run concurrently on all variants
174
- 3. **Fusion** — Reciprocal Rank Fusion merges results with position-based scoring
175
- 4. **Reranking** — Cross-encoder rescores top 20, blended with fusion scores
238
+ 2. **Parallel Retrieval** — Document-level BM25 + chunk-level vector search on all variants
239
+ 3. **Fusion** — RRF with weight for original query, tiered bonus for top ranks
240
+ 4. **Reranking** — Qwen3-Reranker scores full documents (32K context), blended with fusion
176
241
 
177
242
  > **Deep dive**: [How Search Works](https://gno.sh/docs/HOW-SEARCH-WORKS/)
178
243
 
@@ -183,6 +248,8 @@ graph TD
183
248
  | Feature | Description |
184
249
  |:--------|:------------|
185
250
  | **Hybrid Search** | BM25 + vector + RRF fusion + cross-encoder reranking |
251
+ | **Web UI** | Visual dashboard for search, browse, and AI Q&A |
252
+ | **REST API** | HTTP API for custom tools and integrations |
186
253
  | **Multi-Format** | Markdown, PDF, DOCX, XLSX, PPTX, plain text |
187
254
  | **Local LLM** | AI answers via llama.cpp—no API keys |
188
255
  | **Privacy First** | 100% offline, zero telemetry, your data stays yours |
@@ -200,7 +267,7 @@ Models auto-download on first use to `~/.cache/gno/models/`.
200
267
  | Model | Purpose | Size |
201
268
  |:------|:--------|:-----|
202
269
  | bge-m3 | Embeddings (1024-dim, multilingual) | ~500MB |
203
- | bge-reranker-v2-m3 | Cross-encoder reranking | ~700MB |
270
+ | Qwen3-Reranker-0.6B | Cross-encoder reranking (32K context) | ~700MB |
204
271
  | Qwen/SmolLM | Query expansion + AI answers | ~600MB-1.2GB |
205
272
 
206
273
  ### Model Presets
@@ -224,7 +291,7 @@ gno models pull --all
224
291
 
225
292
  ```
226
293
  ┌─────────────────────────────────────────────────┐
227
- GNO CLI / MCP
294
+ GNO CLI / MCP / Web UI / API
228
295
  ├─────────────────────────────────────────────────┤
229
296
  │ Ports: Converter, Store, Embedding, Rerank │
230
297
  ├─────────────────────────────────────────────────┤
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@gmickel/gno",
3
- "version": "0.3.5",
3
+ "version": "0.5.0",
4
4
  "description": "Local semantic search for your documents. Index Markdown, PDF, and Office files with hybrid BM25 + vector search.",
5
5
  "keywords": [
6
6
  "search",
@@ -56,6 +56,8 @@
56
56
  "website:dev": "cd website && make serve",
57
57
  "website:build": "cd website && make build",
58
58
  "website:demos": "cd website/demos && ./build-demos.sh",
59
+ "serve": "bun src/index.ts serve",
60
+ "serve:dev": "NODE_ENV=development bun --hot src/index.ts serve",
59
61
  "version:patch": "npm version patch --no-git-tag-version",
60
62
  "version:minor": "npm version minor --no-git-tag-version",
61
63
  "version:major": "npm version major --no-git-tag-version",
@@ -65,18 +67,44 @@
65
67
  },
66
68
  "dependencies": {
67
69
  "@modelcontextprotocol/sdk": "^1.25.1",
70
+ "@radix-ui/react-collapsible": "^1.1.12",
71
+ "@radix-ui/react-dialog": "^1.1.15",
72
+ "@radix-ui/react-dropdown-menu": "^2.1.16",
73
+ "@radix-ui/react-hover-card": "^1.1.15",
74
+ "@radix-ui/react-progress": "^1.1.8",
75
+ "@radix-ui/react-scroll-area": "^1.2.10",
76
+ "@radix-ui/react-select": "^2.2.6",
77
+ "@radix-ui/react-separator": "^1.1.8",
78
+ "@radix-ui/react-slot": "^1.2.4",
79
+ "@radix-ui/react-tooltip": "^1.2.8",
80
+ "ai": "^6.0.5",
81
+ "bun-plugin-tailwind": "^0.1.2",
82
+ "class-variance-authority": "^0.7.1",
83
+ "clsx": "^2.1.1",
84
+ "cmdk": "^1.1.1",
68
85
  "commander": "^14.0.2",
86
+ "embla-carousel-react": "^8.6.0",
69
87
  "franc": "^6.2.0",
88
+ "lucide-react": "^0.562.0",
70
89
  "markitdown-ts": "^0.0.8",
90
+ "nanoid": "^5.1.6",
71
91
  "node-llama-cpp": "^3.14.5",
72
92
  "officeparser": "^5.2.2",
73
93
  "picocolors": "^1.1.1",
94
+ "react": "^19.2.3",
95
+ "react-dom": "^19.2.3",
96
+ "shiki": "^3.20.0",
74
97
  "sqlite-vec": "^0.1.7-alpha.2",
98
+ "streamdown": "^1.6.10",
99
+ "tailwind-merge": "^3.4.0",
100
+ "use-stick-to-bottom": "^1.1.1",
75
101
  "zod": "^4.2.1"
76
102
  },
77
103
  "devDependencies": {
78
104
  "@biomejs/biome": "2.3.10",
79
105
  "@types/bun": "latest",
106
+ "@types/react": "^19.2.7",
107
+ "@types/react-dom": "^19.2.3",
80
108
  "@typescript/native-preview": "^7.0.0-dev.20251215.1",
81
109
  "ajv": "^8.17.1",
82
110
  "ajv-formats": "^3.0.1",
@@ -87,6 +115,7 @@
87
115
  "oxlint-tsgolint": "^0.10.0",
88
116
  "pdf-lib": "^1.17.1",
89
117
  "pptxgenjs": "^4.0.1",
118
+ "tailwindcss": "^4.1.18",
90
119
  "ultracite": "^6.5.0"
91
120
  },
92
121
  "peerDependencies": {
@@ -12,13 +12,12 @@ import type {
12
12
  GenerationPort,
13
13
  RerankPort,
14
14
  } from '../../llm/types';
15
+ import {
16
+ generateGroundedAnswer,
17
+ processAnswerResult,
18
+ } from '../../pipeline/answer';
15
19
  import { type HybridSearchDeps, searchHybrid } from '../../pipeline/hybrid';
16
- import type {
17
- AskOptions,
18
- AskResult,
19
- Citation,
20
- SearchResult,
21
- } from '../../pipeline/types';
20
+ import type { AskOptions, AskResult, Citation } from '../../pipeline/types';
22
21
  import {
23
22
  createVectorIndexPort,
24
23
  type VectorIndexPort,
@@ -50,163 +49,6 @@ export type AskCommandResult =
50
49
  | { success: true; data: AskResult }
51
50
  | { success: false; error: string };
52
51
 
53
- // ─────────────────────────────────────────────────────────────────────────────
54
- // Grounded Answer Generation
55
- // ─────────────────────────────────────────────────────────────────────────────
56
-
57
- const ANSWER_PROMPT = `You are answering a question using ONLY the provided context blocks.
58
-
59
- Rules you MUST follow:
60
- 1) Use ONLY facts stated in the context blocks. Do NOT use outside knowledge.
61
- 2) Every factual statement must include an inline citation like [1] or [2] referring to a context block.
62
- 3) If the context does not contain enough information to answer, reply EXACTLY:
63
- "I don't have enough information in the provided sources to answer this question."
64
- 4) Do not cite sources you did not use. Do not invent citation numbers.
65
-
66
- Question: {query}
67
-
68
- Context blocks:
69
- {context}
70
-
71
- Write a concise answer (1-3 paragraphs).`;
72
-
73
- /** Abstention message when LLM cannot ground answer */
74
- const ABSTENTION_MESSAGE =
75
- "I don't have enough information in the provided sources to answer this question.";
76
-
77
- // Max characters per snippet to avoid blowing up prompt size
78
- const MAX_SNIPPET_CHARS = 1500;
79
- // Max number of sources to include in context
80
- const MAX_CONTEXT_SOURCES = 5;
81
-
82
- /**
83
- * Extract VALID citation numbers from answer text.
84
- * Only returns numbers in range [1, maxCitation].
85
- * @param answer Answer text to parse
86
- * @param maxCitation Maximum valid citation number
87
- * @returns Sorted unique valid citation numbers (1-indexed)
88
- */
89
- function extractValidCitationNumbers(
90
- answer: string,
91
- maxCitation: number
92
- ): number[] {
93
- const nums = new Set<number>();
94
- // Use fresh regex to avoid lastIndex issues
95
- const re = /\[(\d+)\]/g;
96
- const matches = answer.matchAll(re);
97
- for (const match of matches) {
98
- const n = Number(match[1]);
99
- // Only accept valid citation numbers in range [1, maxCitation]
100
- if (Number.isInteger(n) && n >= 1 && n <= maxCitation) {
101
- nums.add(n);
102
- }
103
- }
104
- return [...nums].sort((a, b) => a - b);
105
- }
106
-
107
- /**
108
- * Filter citations to only those actually referenced in the answer.
109
- * @param citations All citations provided to LLM
110
- * @param validUsedNumbers Valid 1-indexed citation numbers from answer
111
- */
112
- function filterCitationsByUse(
113
- citations: Citation[],
114
- validUsedNumbers: number[]
115
- ): Citation[] {
116
- const usedSet = new Set(validUsedNumbers);
117
- return citations.filter((_, idx) => usedSet.has(idx + 1));
118
- }
119
-
120
- /**
121
- * Renumber citations in answer text to match filtered citations.
122
- * E.g., if answer uses [2] and [5], renumber to [1] and [2].
123
- * Invalid citations (not in validUsedNumbers) are removed.
124
- */
125
- function renumberAnswerCitations(
126
- answer: string,
127
- validUsedNumbers: number[]
128
- ): string {
129
- // Build mapping: old number -> new number (1-indexed)
130
- const mapping = new Map<number, number>();
131
- for (let i = 0; i < validUsedNumbers.length; i++) {
132
- const oldNum = validUsedNumbers[i];
133
- if (oldNum !== undefined) {
134
- mapping.set(oldNum, i + 1);
135
- }
136
- }
137
-
138
- // Use fresh regex to avoid lastIndex issues
139
- const re = /\[(\d+)\]/g;
140
- // Replace valid [n] with renumbered [m], remove invalid citations
141
- const replaced = answer.replace(re, (_match, numStr: string) => {
142
- const oldNum = Number(numStr);
143
- const newNum = mapping.get(oldNum);
144
- // If not in mapping, remove the citation entirely
145
- return newNum !== undefined ? `[${newNum}]` : '';
146
- });
147
-
148
- // Clean up whitespace artifacts from removed citations
149
- // e.g., "See [99] for" → "See for" → "See for"
150
- return replaced.replace(/ {2,}/g, ' ').trim();
151
- }
152
-
153
- async function generateGroundedAnswer(
154
- genPort: GenerationPort,
155
- query: string,
156
- results: SearchResult[],
157
- maxTokens: number
158
- ): Promise<{ answer: string; citations: Citation[] } | null> {
159
- // Build context from top results with bounded snippet sizes
160
- const contextParts: string[] = [];
161
- const citations: Citation[] = [];
162
-
163
- // Track citation index separately to ensure it matches context blocks exactly
164
- let citationIndex = 0;
165
-
166
- for (const r of results.slice(0, MAX_CONTEXT_SOURCES)) {
167
- // Skip results with empty snippets
168
- if (!r.snippet || r.snippet.trim().length === 0) {
169
- continue;
170
- }
171
-
172
- // Cap snippet length to avoid prompt blowup
173
- const snippet =
174
- r.snippet.length > MAX_SNIPPET_CHARS
175
- ? `${r.snippet.slice(0, MAX_SNIPPET_CHARS)}...`
176
- : r.snippet;
177
-
178
- citationIndex += 1;
179
- contextParts.push(`[${citationIndex}] ${snippet}`);
180
- citations.push({
181
- docid: r.docid,
182
- uri: r.uri,
183
- startLine: r.snippetRange?.startLine,
184
- endLine: r.snippetRange?.endLine,
185
- });
186
- }
187
-
188
- // If no valid context, can't generate answer
189
- if (contextParts.length === 0) {
190
- return null;
191
- }
192
-
193
- const prompt = ANSWER_PROMPT.replace('{query}', query).replace(
194
- '{context}',
195
- contextParts.join('\n\n')
196
- );
197
-
198
- const result = await genPort.generate(prompt, {
199
- temperature: 0,
200
- maxTokens,
201
- });
202
-
203
- if (!result.ok) {
204
- return null;
205
- }
206
-
207
- return { answer: result.value, citations };
208
- }
209
-
210
52
  // ─────────────────────────────────────────────────────────────────────────────
211
53
  // Command Implementation
212
54
  // ─────────────────────────────────────────────────────────────────────────────
@@ -327,15 +169,15 @@ export async function ask(
327
169
 
328
170
  if (shouldGenerateAnswer && genPort) {
329
171
  const maxTokens = options.maxAnswerTokens ?? 512;
330
- const answerResult = await generateGroundedAnswer(
331
- genPort,
172
+ const rawResult = await generateGroundedAnswer(
173
+ { genPort, store },
332
174
  query,
333
175
  results,
334
176
  maxTokens
335
177
  );
336
178
 
337
179
  // Fail loudly if generation was requested but failed
338
- if (!answerResult) {
180
+ if (!rawResult) {
339
181
  return {
340
182
  success: false,
341
183
  error:
@@ -343,27 +185,10 @@ export async function ask(
343
185
  };
344
186
  }
345
187
 
346
- // Extract only VALID citation numbers (in range 1..citations.length)
347
- const maxCitation = answerResult.citations.length;
348
- const validUsedNums = extractValidCitationNumbers(
349
- answerResult.answer,
350
- maxCitation
351
- );
352
- const filteredCitations = filterCitationsByUse(
353
- answerResult.citations,
354
- validUsedNums
355
- );
356
-
357
- // Abstention guard: if no valid citations, LLM didn't ground the answer
358
- if (validUsedNums.length === 0 || filteredCitations.length === 0) {
359
- answer = ABSTENTION_MESSAGE;
360
- citations = [];
361
- } else {
362
- // Renumber citations in answer to match filtered list (e.g., [2],[5] -> [1],[2])
363
- // Invalid citations are removed from the answer text
364
- answer = renumberAnswerCitations(answerResult.answer, validUsedNums);
365
- citations = filteredCitations;
366
- }
188
+ // Process answer: extract valid citations, filter, renumber
189
+ const processed = processAnswerResult(rawResult);
190
+ answer = processed.answer;
191
+ citations = processed.citations;
367
192
  answerGenerated = true;
368
193
  }
369
194
 
@@ -11,6 +11,7 @@ import { getConfigPaths, isInitialized, loadConfig } from '../../config';
11
11
  import { LlmAdapter } from '../../llm/nodeLlamaCpp/adapter';
12
12
  import { getActivePreset } from '../../llm/registry';
13
13
  import type { EmbeddingPort } from '../../llm/types';
14
+ import { formatDocForEmbedding } from '../../pipeline/contextual';
14
15
  import { SqliteAdapter } from '../../store/sqlite/adapter';
15
16
  import type { StoreResult } from '../../store/types';
16
17
  import { err, ok } from '../../store/types';
@@ -131,9 +132,9 @@ async function processBatches(ctx: BatchContext): Promise<BatchResult> {
131
132
  cursor = { mirrorHash: lastItem.mirrorHash, seq: lastItem.seq };
132
133
  }
133
134
 
134
- // Embed batch
135
+ // Embed batch with contextual formatting (title prefix)
135
136
  const batchEmbedResult = await ctx.embedPort.embedBatch(
136
- batch.map((b) => b.text)
137
+ batch.map((b) => formatDocForEmbedding(b.text, b.title ?? undefined))
137
138
  );
138
139
  if (!batchEmbedResult.ok) {
139
140
  errors += batch.length;
@@ -365,9 +366,12 @@ function getActiveChunks(
365
366
  after?: { mirrorHash: string; seq: number }
366
367
  ): Promise<StoreResult<BacklogItem[]>> {
367
368
  try {
369
+ // Include title for contextual embedding
368
370
  const sql = after
369
371
  ? `
370
- SELECT c.mirror_hash as mirrorHash, c.seq, c.text, 'force' as reason
372
+ SELECT c.mirror_hash as mirrorHash, c.seq, c.text,
373
+ (SELECT d.title FROM documents d WHERE d.mirror_hash = c.mirror_hash AND d.active = 1 LIMIT 1) as title,
374
+ 'force' as reason
371
375
  FROM content_chunks c
372
376
  WHERE EXISTS (
373
377
  SELECT 1 FROM documents d
@@ -378,7 +382,9 @@ function getActiveChunks(
378
382
  LIMIT ?
379
383
  `
380
384
  : `
381
- SELECT c.mirror_hash as mirrorHash, c.seq, c.text, 'force' as reason
385
+ SELECT c.mirror_hash as mirrorHash, c.seq, c.text,
386
+ (SELECT d.title FROM documents d WHERE d.mirror_hash = c.mirror_hash AND d.active = 1 LIMIT 1) as title,
387
+ 'force' as reason
382
388
  FROM content_chunks c
383
389
  WHERE EXISTS (
384
390
  SELECT 1 FROM documents d
@@ -18,6 +18,8 @@ import type { DownloadProgress, ModelType } from '../../../llm/types';
18
18
  export interface ModelsPullOptions {
19
19
  /** Override config path */
20
20
  configPath?: string;
21
+ /** Override config object (takes precedence over configPath) */
22
+ config?: import('../../../config/types').Config;
21
23
  /** Pull all models */
22
24
  all?: boolean;
23
25
  /** Pull embedding model */
@@ -81,10 +83,13 @@ function getTypesToPull(options: ModelsPullOptions): ModelType[] {
81
83
  export async function modelsPull(
82
84
  options: ModelsPullOptions = {}
83
85
  ): Promise<ModelsPullResult> {
84
- // Load config (use defaults if not initialized)
85
- const { createDefaultConfig } = await import('../../../config');
86
- const configResult = await loadConfig(options.configPath);
87
- const config = configResult.ok ? configResult.value : createDefaultConfig();
86
+ // Use provided config, or load from disk (use defaults if not initialized)
87
+ let config = options.config;
88
+ if (!config) {
89
+ const { createDefaultConfig } = await import('../../../config');
90
+ const configResult = await loadConfig(options.configPath);
91
+ config = configResult.ok ? configResult.value : createDefaultConfig();
92
+ }
88
93
 
89
94
  const preset = getActivePreset(config);
90
95
  const cache = new ModelCache(getModelsCachePath());
@@ -0,0 +1,19 @@
1
+ /**
2
+ * gno serve command implementation.
3
+ * Start web UI server.
4
+ *
5
+ * @module src/cli/commands/serve
6
+ */
7
+
8
+ export type { ServeOptions, ServeResult } from '../../serve';
9
+
10
+ /**
11
+ * Execute gno serve command.
12
+ * Server runs until SIGINT/SIGTERM.
13
+ */
14
+ export async function serve(
15
+ options: import('../../serve').ServeOptions = {}
16
+ ): Promise<import('../../serve').ServeResult> {
17
+ const { startServer } = await import('../../serve');
18
+ return startServer(options);
19
+ }
@@ -7,6 +7,7 @@
7
7
 
8
8
  import { LlmAdapter } from '../../llm/nodeLlamaCpp/adapter';
9
9
  import { getActivePreset } from '../../llm/registry';
10
+ import { formatQueryForEmbedding } from '../../pipeline/contextual';
10
11
  import type { SearchOptions, SearchResults } from '../../pipeline/types';
11
12
  import {
12
13
  searchVectorWithEmbedding,
@@ -86,8 +87,10 @@ export async function vsearch(
86
87
  const embedPort = embedResult.value;
87
88
 
88
89
  try {
89
- // Embed query (also determines dimensions - avoids double embed)
90
- const queryEmbedResult = await embedPort.embed(query);
90
+ // Embed query with contextual formatting (also determines dimensions)
91
+ const queryEmbedResult = await embedPort.embed(
92
+ formatQueryForEmbedding(query)
93
+ );
91
94
  if (!queryEmbedResult.ok) {
92
95
  return { success: false, error: queryEmbedResult.error.message };
93
96
  }
@@ -149,6 +149,7 @@ export function createProgram(): Command {
149
149
  wireRetrievalCommands(program);
150
150
  wireMcpCommand(program);
151
151
  wireSkillCommands(program);
152
+ wireServeCommand(program);
152
153
 
153
154
  // Add docs/support links to help footer
154
155
  program.addHelpText(
@@ -1328,3 +1329,30 @@ function wireSkillCommands(program: Command): void {
1328
1329
  });
1329
1330
  });
1330
1331
  }
1332
+
1333
+ // ─────────────────────────────────────────────────────────────────────────────
1334
+ // Serve Command (web UI)
1335
+ // ─────────────────────────────────────────────────────────────────────────────
1336
+
1337
+ function wireServeCommand(program: Command): void {
1338
+ program
1339
+ .command('serve')
1340
+ .description('Start web UI server')
1341
+ .option('-p, --port <num>', 'port to listen on', '3000')
1342
+ .action(async (cmdOpts: Record<string, unknown>) => {
1343
+ const globals = getGlobals();
1344
+ const port = parsePositiveInt('port', cmdOpts.port);
1345
+
1346
+ const { serve } = await import('./commands/serve.js');
1347
+ const result = await serve({
1348
+ port,
1349
+ configPath: globals.config,
1350
+ index: globals.index,
1351
+ });
1352
+
1353
+ if (!result.success) {
1354
+ throw new CliError('RUNTIME', result.error ?? 'Server failed to start');
1355
+ }
1356
+ // Server runs until SIGINT/SIGTERM - no output needed here
1357
+ });
1358
+ }
@@ -32,11 +32,16 @@ export const DEFAULT_EXCLUDES: readonly string[] = [
32
32
  ];
33
33
 
34
34
  /** Valid FTS tokenizer options */
35
- export const FTS_TOKENIZERS = ['unicode61', 'porter', 'trigram'] as const;
35
+ export const FTS_TOKENIZERS = [
36
+ 'unicode61',
37
+ 'porter',
38
+ 'trigram',
39
+ 'snowball english',
40
+ ] as const;
36
41
  export type FtsTokenizer = (typeof FTS_TOKENIZERS)[number];
37
42
 
38
- /** Default FTS tokenizer */
39
- export const DEFAULT_FTS_TOKENIZER: FtsTokenizer = 'unicode61';
43
+ /** Default FTS tokenizer - snowball english for multilingual stemming */
44
+ export const DEFAULT_FTS_TOKENIZER: FtsTokenizer = 'snowball english';
40
45
 
41
46
  /**
42
47
  * BCP-47 language tag pattern (simplified, case-insensitive).
@@ -173,7 +178,7 @@ export const DEFAULT_MODEL_PRESETS: ModelPreset[] = [
173
178
  name: 'Slim (Fast, ~1GB)',
174
179
  embed: 'hf:gpustack/bge-m3-GGUF/bge-m3-Q4_K_M.gguf',
175
180
  rerank:
176
- 'hf:gpustack/bge-reranker-v2-m3-GGUF/bge-reranker-v2-m3-Q4_K_M.gguf',
181
+ 'hf:ggml-org/Qwen3-Reranker-0.6B-Q8_0-GGUF/qwen3-reranker-0.6b-q8_0.gguf',
177
182
  gen: 'hf:unsloth/Qwen3-1.7B-GGUF/Qwen3-1.7B-Q4_K_M.gguf',
178
183
  },
179
184
  {
@@ -181,7 +186,7 @@ export const DEFAULT_MODEL_PRESETS: ModelPreset[] = [
181
186
  name: 'Balanced (Default, ~2GB)',
182
187
  embed: 'hf:gpustack/bge-m3-GGUF/bge-m3-Q4_K_M.gguf',
183
188
  rerank:
184
- 'hf:gpustack/bge-reranker-v2-m3-GGUF/bge-reranker-v2-m3-Q4_K_M.gguf',
189
+ 'hf:ggml-org/Qwen3-Reranker-0.6B-Q8_0-GGUF/qwen3-reranker-0.6b-q8_0.gguf',
185
190
  gen: 'hf:ggml-org/SmolLM3-3B-GGUF/SmolLM3-Q4_K_M.gguf',
186
191
  },
187
192
  {
@@ -189,7 +194,7 @@ export const DEFAULT_MODEL_PRESETS: ModelPreset[] = [
189
194
  name: 'Quality (Best Answers, ~2.5GB)',
190
195
  embed: 'hf:gpustack/bge-m3-GGUF/bge-m3-Q4_K_M.gguf',
191
196
  rerank:
192
- 'hf:gpustack/bge-reranker-v2-m3-GGUF/bge-reranker-v2-m3-Q4_K_M.gguf',
197
+ 'hf:ggml-org/Qwen3-Reranker-0.6B-Q8_0-GGUF/qwen3-reranker-0.6b-q8_0.gguf',
193
198
  gen: 'hf:unsloth/Qwen3-4B-Instruct-2507-GGUF/Qwen3-4B-Instruct-2507-Q4_K_M.gguf',
194
199
  },
195
200
  ];
@@ -19,7 +19,9 @@ import type { ModelType } from './types';
19
19
  export function getModelConfig(config: Config): ModelConfig {
20
20
  return {
21
21
  activePreset: config.models?.activePreset ?? 'balanced',
22
- presets: config.models?.presets ?? DEFAULT_MODEL_PRESETS,
22
+ presets: config.models?.presets?.length
23
+ ? config.models.presets
24
+ : DEFAULT_MODEL_PRESETS,
23
25
  loadTimeout: config.models?.loadTimeout ?? 60_000,
24
26
  inferenceTimeout: config.models?.inferenceTimeout ?? 30_000,
25
27
  warmModelTtl: config.models?.warmModelTtl ?? 300_000,
@@ -8,6 +8,7 @@ import { join as pathJoin } from 'node:path';
8
8
  import { parseUri } from '../../app/constants';
9
9
  import { LlmAdapter } from '../../llm/nodeLlamaCpp/adapter';
10
10
  import { getActivePreset } from '../../llm/registry';
11
+ import { formatQueryForEmbedding } from '../../pipeline/contextual';
11
12
  import type { SearchResult, SearchResults } from '../../pipeline/types';
12
13
  import {
13
14
  searchVectorWithEmbedding,
@@ -121,8 +122,10 @@ export function handleVsearch(
121
122
  const embedPort = embedResult.value;
122
123
 
123
124
  try {
124
- // Embed query
125
- const queryEmbedResult = await embedPort.embed(args.query);
125
+ // Embed query with contextual formatting
126
+ const queryEmbedResult = await embedPort.embed(
127
+ formatQueryForEmbedding(args.query)
128
+ );
126
129
  if (!queryEmbedResult.ok) {
127
130
  throw new Error(queryEmbedResult.error.message);
128
131
  }