npm - @gmickel/gno - Versions diffs - 0.3.5 → 0.5.0 - Mend

@gmickel/gno 0.3.5 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (71) hide show

package/README.md +74 -7
package/package.json +30 -1
package/src/cli/commands/ask.ts +12 -187
package/src/cli/commands/embed.ts +10 -4
package/src/cli/commands/models/pull.ts +9 -4
package/src/cli/commands/serve.ts +19 -0
package/src/cli/commands/vsearch.ts +5 -2
package/src/cli/program.ts +28 -0
package/src/config/types.ts +11 -6
package/src/llm/registry.ts +3 -1
package/src/mcp/tools/vsearch.ts +5 -2
package/src/pipeline/answer.ts +224 -0
package/src/pipeline/contextual.ts +57 -0
package/src/pipeline/expansion.ts +49 -31
package/src/pipeline/explain.ts +11 -3
package/src/pipeline/fusion.ts +20 -9
package/src/pipeline/hybrid.ts +57 -40
package/src/pipeline/index.ts +7 -0
package/src/pipeline/rerank.ts +55 -27
package/src/pipeline/types.ts +0 -3
package/src/pipeline/vsearch.ts +3 -2
package/src/serve/CLAUDE.md +91 -0
package/src/serve/bunfig.toml +2 -0
package/src/serve/context.ts +181 -0
package/src/serve/index.ts +7 -0
package/src/serve/public/app.tsx +56 -0
package/src/serve/public/components/ai-elements/code-block.tsx +176 -0
package/src/serve/public/components/ai-elements/conversation.tsx +98 -0
package/src/serve/public/components/ai-elements/inline-citation.tsx +285 -0
package/src/serve/public/components/ai-elements/loader.tsx +96 -0
package/src/serve/public/components/ai-elements/message.tsx +443 -0
package/src/serve/public/components/ai-elements/prompt-input.tsx +1421 -0
package/src/serve/public/components/ai-elements/sources.tsx +75 -0
package/src/serve/public/components/ai-elements/suggestion.tsx +51 -0
package/src/serve/public/components/preset-selector.tsx +403 -0
package/src/serve/public/components/ui/badge.tsx +46 -0
package/src/serve/public/components/ui/button-group.tsx +82 -0
package/src/serve/public/components/ui/button.tsx +62 -0
package/src/serve/public/components/ui/card.tsx +92 -0
package/src/serve/public/components/ui/carousel.tsx +244 -0
package/src/serve/public/components/ui/collapsible.tsx +31 -0
package/src/serve/public/components/ui/command.tsx +181 -0
package/src/serve/public/components/ui/dialog.tsx +141 -0
package/src/serve/public/components/ui/dropdown-menu.tsx +255 -0
package/src/serve/public/components/ui/hover-card.tsx +42 -0
package/src/serve/public/components/ui/input-group.tsx +167 -0
package/src/serve/public/components/ui/input.tsx +21 -0
package/src/serve/public/components/ui/progress.tsx +28 -0
package/src/serve/public/components/ui/scroll-area.tsx +56 -0
package/src/serve/public/components/ui/select.tsx +188 -0
package/src/serve/public/components/ui/separator.tsx +26 -0
package/src/serve/public/components/ui/table.tsx +114 -0
package/src/serve/public/components/ui/textarea.tsx +18 -0
package/src/serve/public/components/ui/tooltip.tsx +59 -0
package/src/serve/public/globals.css +226 -0
package/src/serve/public/hooks/use-api.ts +112 -0
package/src/serve/public/index.html +13 -0
package/src/serve/public/pages/Ask.tsx +442 -0
package/src/serve/public/pages/Browse.tsx +270 -0
package/src/serve/public/pages/Dashboard.tsx +202 -0
package/src/serve/public/pages/DocView.tsx +302 -0
package/src/serve/public/pages/Search.tsx +335 -0
package/src/serve/routes/api.ts +763 -0
package/src/serve/server.ts +249 -0
package/src/store/migrations/002-documents-fts.ts +40 -0
package/src/store/migrations/index.ts +2 -1
package/src/store/sqlite/adapter.ts +216 -33
package/src/store/sqlite/fts5-snowball.ts +144 -0
package/src/store/types.ts +33 -3
package/src/store/vector/stats.ts +3 -0
package/src/store/vector/types.ts +1 -0

package/README.md CHANGED Viewed

@@ -16,6 +16,8 @@ GNO is a local knowledge engine for privacy-conscious developers and AI agents.
 - [Quick Start](#quick-start)
 - [Installation](#installation)
 - [Search Modes](#search-modes)
+- [Web UI](#web-ui)
+- [REST API](#rest-api)
 - [Agent Integration](#agent-integration)
 - [How It Works](#how-it-works)
 - [Features](#features)
@@ -97,11 +99,14 @@ gno skill install --target all       # Both Claude + Codex
 | Command | Mode | Best For |
 |:--------|:-----|:---------|
-| `gno search` | BM25 | Exact phrases, code identifiers |
-| `gno vsearch` | Vector | Natural language, concepts |
+| `gno search` | Document-level BM25 | Exact phrases, code identifiers |
+| `gno vsearch` | Contextual Vector | Natural language, concepts |
 | `gno query` | Hybrid | Best accuracy (BM25 + vector + reranking) |
 | `gno ask --answer` | RAG | Direct answers with citations |
+**BM25** indexes full documents (not chunks) with Snowball stemming—"running" matches "run".
+**Vector** embeds chunks with document titles for context awareness.
 ```bash
 gno search "handleAuth"              # Find exact matches
 gno vsearch "error handling patterns" # Semantic similarity
@@ -113,6 +118,65 @@ Output formats: `--json`, `--files`, `--csv`, `--md`, `--xml`
 ---
+## Web UI
+Visual dashboard for search, browsing, and AI answers—right in your browser.
+```bash
+gno serve                    # Start on port 3000
+gno serve --port 8080        # Custom port
+```
+Open `http://localhost:3000` to:
+- **Search** — BM25, vector, or hybrid modes with visual results
+- **Browse** — Paginated document list, filter by collection
+- **Ask** — AI-powered Q&A with citations
+- **Switch presets** — Change models live without restart
+Everything runs locally. No cloud, no accounts, no data leaving your machine.
+> **Detailed docs**: [Web UI Guide](https://gno.sh/docs/WEB-UI/)
+---
+## REST API
+Programmatic access to all GNO features via HTTP.
+```bash
+# Hybrid search
+curl -X POST http://localhost:3000/api/query \
+  -H "Content-Type: application/json" \
+  -d '{"query": "authentication patterns", "limit": 10}'
+# AI answer
+curl -X POST http://localhost:3000/api/ask \
+  -H "Content-Type: application/json" \
+  -d '{"query": "What is our deployment process?"}'
+# Index status
+curl http://localhost:3000/api/status
+```
+| Endpoint | Method | Description |
+|:---------|:-------|:------------|
+| `/api/query` | POST | Hybrid search (recommended) |
+| `/api/search` | POST | BM25 keyword search |
+| `/api/ask` | POST | AI-powered Q&A |
+| `/api/docs` | GET | List documents |
+| `/api/doc` | GET | Get document content |
+| `/api/status` | GET | Index statistics |
+| `/api/presets` | GET/POST | Model preset management |
+| `/api/models/pull` | POST | Download models |
+| `/api/models/status` | GET | Download progress |
+No authentication. No rate limits. Build custom tools, automate workflows, integrate with any language.
+> **Full reference**: [API Documentation](https://gno.sh/docs/API/)
+---
 ## Agent Integration
 ### MCP Server
@@ -169,10 +233,11 @@ graph TD
     M --> N[Final Results]
 ```
+0. **Strong Signal Check** — Skip expansion if BM25 has confident match (saves 1-3s)
 1. **Query Expansion** — LLM generates lexical variants, semantic rephrases, and a [HyDE](https://arxiv.org/abs/2212.10496) passage
-2. **Parallel Retrieval** — BM25 + vector search run concurrently on all variants
-3. **Fusion** — Reciprocal Rank Fusion merges results with position-based scoring
-4. **Reranking** — Cross-encoder rescores top 20, blended with fusion scores
+2. **Parallel Retrieval** — Document-level BM25 + chunk-level vector search on all variants
+3. **Fusion** — RRF with 2× weight for original query, tiered bonus for top ranks
+4. **Reranking** — Qwen3-Reranker scores full documents (32K context), blended with fusion
 > **Deep dive**: [How Search Works](https://gno.sh/docs/HOW-SEARCH-WORKS/)
@@ -183,6 +248,8 @@ graph TD
 | Feature | Description |
 |:--------|:------------|
 | **Hybrid Search** | BM25 + vector + RRF fusion + cross-encoder reranking |
+| **Web UI** | Visual dashboard for search, browse, and AI Q&A |
+| **REST API** | HTTP API for custom tools and integrations |
 | **Multi-Format** | Markdown, PDF, DOCX, XLSX, PPTX, plain text |
 | **Local LLM** | AI answers via llama.cpp—no API keys |
 | **Privacy First** | 100% offline, zero telemetry, your data stays yours |
@@ -200,7 +267,7 @@ Models auto-download on first use to `~/.cache/gno/models/`.
 | Model | Purpose | Size |
 |:------|:--------|:-----|
 | bge-m3 | Embeddings (1024-dim, multilingual) | ~500MB |
-| bge-reranker-v2-m3 | Cross-encoder reranking | ~700MB |
+| Qwen3-Reranker-0.6B | Cross-encoder reranking (32K context) | ~700MB |
 | Qwen/SmolLM | Query expansion + AI answers | ~600MB-1.2GB |
 ### Model Presets
@@ -224,7 +291,7 @@ gno models pull --all
 ```
 ┌─────────────────────────────────────────────────┐
-│                 GNO CLI / MCP                   │
+│            GNO CLI / MCP / Web UI / API         │
 ├─────────────────────────────────────────────────┤
 │  Ports: Converter, Store, Embedding, Rerank    │
 ├─────────────────────────────────────────────────┤

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@gmickel/gno",
-  "version": "0.3.5",
+  "version": "0.5.0",
   "description": "Local semantic search for your documents. Index Markdown, PDF, and Office files with hybrid BM25 + vector search.",
   "keywords": [
     "search",
@@ -56,6 +56,8 @@
     "website:dev": "cd website && make serve",
     "website:build": "cd website && make build",
     "website:demos": "cd website/demos && ./build-demos.sh",
+    "serve": "bun src/index.ts serve",
+    "serve:dev": "NODE_ENV=development bun --hot src/index.ts serve",
     "version:patch": "npm version patch --no-git-tag-version",
     "version:minor": "npm version minor --no-git-tag-version",
     "version:major": "npm version major --no-git-tag-version",
@@ -65,18 +67,44 @@
   },
   "dependencies": {
     "@modelcontextprotocol/sdk": "^1.25.1",
+    "@radix-ui/react-collapsible": "^1.1.12",
+    "@radix-ui/react-dialog": "^1.1.15",
+    "@radix-ui/react-dropdown-menu": "^2.1.16",
+    "@radix-ui/react-hover-card": "^1.1.15",
+    "@radix-ui/react-progress": "^1.1.8",
+    "@radix-ui/react-scroll-area": "^1.2.10",
+    "@radix-ui/react-select": "^2.2.6",
+    "@radix-ui/react-separator": "^1.1.8",
+    "@radix-ui/react-slot": "^1.2.4",
+    "@radix-ui/react-tooltip": "^1.2.8",
+    "ai": "^6.0.5",
+    "bun-plugin-tailwind": "^0.1.2",
+    "class-variance-authority": "^0.7.1",
+    "clsx": "^2.1.1",
+    "cmdk": "^1.1.1",
     "commander": "^14.0.2",
+    "embla-carousel-react": "^8.6.0",
     "franc": "^6.2.0",
+    "lucide-react": "^0.562.0",
     "markitdown-ts": "^0.0.8",
+    "nanoid": "^5.1.6",
     "node-llama-cpp": "^3.14.5",
     "officeparser": "^5.2.2",
     "picocolors": "^1.1.1",
+    "react": "^19.2.3",
+    "react-dom": "^19.2.3",
+    "shiki": "^3.20.0",
     "sqlite-vec": "^0.1.7-alpha.2",
+    "streamdown": "^1.6.10",
+    "tailwind-merge": "^3.4.0",
+    "use-stick-to-bottom": "^1.1.1",
     "zod": "^4.2.1"
   },
   "devDependencies": {
     "@biomejs/biome": "2.3.10",
     "@types/bun": "latest",
+    "@types/react": "^19.2.7",
+    "@types/react-dom": "^19.2.3",
     "@typescript/native-preview": "^7.0.0-dev.20251215.1",
     "ajv": "^8.17.1",
     "ajv-formats": "^3.0.1",
@@ -87,6 +115,7 @@
     "oxlint-tsgolint": "^0.10.0",
     "pdf-lib": "^1.17.1",
     "pptxgenjs": "^4.0.1",
+    "tailwindcss": "^4.1.18",
     "ultracite": "^6.5.0"
   },
   "peerDependencies": {

package/src/cli/commands/ask.ts CHANGED Viewed

@@ -12,13 +12,12 @@ import type {
   GenerationPort,
   RerankPort,
 } from '../../llm/types';
+import {
+  generateGroundedAnswer,
+  processAnswerResult,
+} from '../../pipeline/answer';
 import { type HybridSearchDeps, searchHybrid } from '../../pipeline/hybrid';
-import type {
-  AskOptions,
-  AskResult,
-  Citation,
-  SearchResult,
-} from '../../pipeline/types';
+import type { AskOptions, AskResult, Citation } from '../../pipeline/types';
 import {
   createVectorIndexPort,
   type VectorIndexPort,
@@ -50,163 +49,6 @@ export type AskCommandResult =
   | { success: true; data: AskResult }
   | { success: false; error: string };
-// ─────────────────────────────────────────────────────────────────────────────
-// Grounded Answer Generation
-// ─────────────────────────────────────────────────────────────────────────────
-const ANSWER_PROMPT = `You are answering a question using ONLY the provided context blocks.
-Rules you MUST follow:
-1) Use ONLY facts stated in the context blocks. Do NOT use outside knowledge.
-2) Every factual statement must include an inline citation like [1] or [2] referring to a context block.
-3) If the context does not contain enough information to answer, reply EXACTLY:
-   "I don't have enough information in the provided sources to answer this question."
-4) Do not cite sources you did not use. Do not invent citation numbers.
-Question: {query}
-Context blocks:
-{context}
-Write a concise answer (1-3 paragraphs).`;
-/** Abstention message when LLM cannot ground answer */
-const ABSTENTION_MESSAGE =
-  "I don't have enough information in the provided sources to answer this question.";
-// Max characters per snippet to avoid blowing up prompt size
-const MAX_SNIPPET_CHARS = 1500;
-// Max number of sources to include in context
-const MAX_CONTEXT_SOURCES = 5;
-/**
- * Extract VALID citation numbers from answer text.
- * Only returns numbers in range [1, maxCitation].
- * @param answer Answer text to parse
- * @param maxCitation Maximum valid citation number
- * @returns Sorted unique valid citation numbers (1-indexed)
- */
-function extractValidCitationNumbers(
-  answer: string,
-  maxCitation: number
-): number[] {
-  const nums = new Set<number>();
-  // Use fresh regex to avoid lastIndex issues
-  const re = /\[(\d+)\]/g;
-  const matches = answer.matchAll(re);
-  for (const match of matches) {
-    const n = Number(match[1]);
-    // Only accept valid citation numbers in range [1, maxCitation]
-    if (Number.isInteger(n) && n >= 1 && n <= maxCitation) {
-      nums.add(n);
-    }
-  }
-  return [...nums].sort((a, b) => a - b);
-}
-/**
- * Filter citations to only those actually referenced in the answer.
- * @param citations All citations provided to LLM
- * @param validUsedNumbers Valid 1-indexed citation numbers from answer
- */
-function filterCitationsByUse(
-  citations: Citation[],
-  validUsedNumbers: number[]
-): Citation[] {
-  const usedSet = new Set(validUsedNumbers);
-  return citations.filter((_, idx) => usedSet.has(idx + 1));
-}
-/**
- * Renumber citations in answer text to match filtered citations.
- * E.g., if answer uses [2] and [5], renumber to [1] and [2].
- * Invalid citations (not in validUsedNumbers) are removed.
- */
-function renumberAnswerCitations(
-  answer: string,
-  validUsedNumbers: number[]
-): string {
-  // Build mapping: old number -> new number (1-indexed)
-  const mapping = new Map<number, number>();
-  for (let i = 0; i < validUsedNumbers.length; i++) {
-    const oldNum = validUsedNumbers[i];
-    if (oldNum !== undefined) {
-      mapping.set(oldNum, i + 1);
-    }
-  }
-  // Use fresh regex to avoid lastIndex issues
-  const re = /\[(\d+)\]/g;
-  // Replace valid [n] with renumbered [m], remove invalid citations
-  const replaced = answer.replace(re, (_match, numStr: string) => {
-    const oldNum = Number(numStr);
-    const newNum = mapping.get(oldNum);
-    // If not in mapping, remove the citation entirely
-    return newNum !== undefined ? `[${newNum}]` : '';
-  });
-  // Clean up whitespace artifacts from removed citations
-  // e.g., "See [99] for" → "See  for" → "See for"
-  return replaced.replace(/ {2,}/g, ' ').trim();
-}
-async function generateGroundedAnswer(
-  genPort: GenerationPort,
-  query: string,
-  results: SearchResult[],
-  maxTokens: number
-): Promise<{ answer: string; citations: Citation[] } | null> {
-  // Build context from top results with bounded snippet sizes
-  const contextParts: string[] = [];
-  const citations: Citation[] = [];
-  // Track citation index separately to ensure it matches context blocks exactly
-  let citationIndex = 0;
-  for (const r of results.slice(0, MAX_CONTEXT_SOURCES)) {
-    // Skip results with empty snippets
-    if (!r.snippet || r.snippet.trim().length === 0) {
-      continue;
-    }
-    // Cap snippet length to avoid prompt blowup
-    const snippet =
-      r.snippet.length > MAX_SNIPPET_CHARS
-        ? `${r.snippet.slice(0, MAX_SNIPPET_CHARS)}...`
-        : r.snippet;
-    citationIndex += 1;
-    contextParts.push(`[${citationIndex}] ${snippet}`);
-    citations.push({
-      docid: r.docid,
-      uri: r.uri,
-      startLine: r.snippetRange?.startLine,
-      endLine: r.snippetRange?.endLine,
-    });
-  }
-  // If no valid context, can't generate answer
-  if (contextParts.length === 0) {
-    return null;
-  }
-  const prompt = ANSWER_PROMPT.replace('{query}', query).replace(
-    '{context}',
-    contextParts.join('\n\n')
-  );
-  const result = await genPort.generate(prompt, {
-    temperature: 0,
-    maxTokens,
-  });
-  if (!result.ok) {
-    return null;
-  }
-  return { answer: result.value, citations };
-}
 // ─────────────────────────────────────────────────────────────────────────────
 // Command Implementation
 // ─────────────────────────────────────────────────────────────────────────────
@@ -327,15 +169,15 @@ export async function ask(
     if (shouldGenerateAnswer && genPort) {
       const maxTokens = options.maxAnswerTokens ?? 512;
-      const answerResult = await generateGroundedAnswer(
-        genPort,
+      const rawResult = await generateGroundedAnswer(
+        { genPort, store },
         query,
         results,
         maxTokens
       );
       // Fail loudly if generation was requested but failed
-      if (!answerResult) {
+      if (!rawResult) {
         return {
           success: false,
           error:
@@ -343,27 +185,10 @@ export async function ask(
         };
       }
-      // Extract only VALID citation numbers (in range 1..citations.length)
-      const maxCitation = answerResult.citations.length;
-      const validUsedNums = extractValidCitationNumbers(
-        answerResult.answer,
-        maxCitation
-      );
-      const filteredCitations = filterCitationsByUse(
-        answerResult.citations,
-        validUsedNums
-      );
-      // Abstention guard: if no valid citations, LLM didn't ground the answer
-      if (validUsedNums.length === 0 || filteredCitations.length === 0) {
-        answer = ABSTENTION_MESSAGE;
-        citations = [];
-      } else {
-        // Renumber citations in answer to match filtered list (e.g., [2],[5] -> [1],[2])
-        // Invalid citations are removed from the answer text
-        answer = renumberAnswerCitations(answerResult.answer, validUsedNums);
-        citations = filteredCitations;
-      }
+      // Process answer: extract valid citations, filter, renumber
+      const processed = processAnswerResult(rawResult);
+      answer = processed.answer;
+      citations = processed.citations;
       answerGenerated = true;
     }

package/src/cli/commands/embed.ts CHANGED Viewed

@@ -11,6 +11,7 @@ import { getConfigPaths, isInitialized, loadConfig } from '../../config';
 import { LlmAdapter } from '../../llm/nodeLlamaCpp/adapter';
 import { getActivePreset } from '../../llm/registry';
 import type { EmbeddingPort } from '../../llm/types';
+import { formatDocForEmbedding } from '../../pipeline/contextual';
 import { SqliteAdapter } from '../../store/sqlite/adapter';
 import type { StoreResult } from '../../store/types';
 import { err, ok } from '../../store/types';
@@ -131,9 +132,9 @@ async function processBatches(ctx: BatchContext): Promise<BatchResult> {
       cursor = { mirrorHash: lastItem.mirrorHash, seq: lastItem.seq };
     }
-    // Embed batch
+    // Embed batch with contextual formatting (title prefix)
     const batchEmbedResult = await ctx.embedPort.embedBatch(
-      batch.map((b) => b.text)
+      batch.map((b) => formatDocForEmbedding(b.text, b.title ?? undefined))
     );
     if (!batchEmbedResult.ok) {
       errors += batch.length;
@@ -365,9 +366,12 @@ function getActiveChunks(
   after?: { mirrorHash: string; seq: number }
 ): Promise<StoreResult<BacklogItem[]>> {
   try {
+    // Include title for contextual embedding
     const sql = after
       ? `
-        SELECT c.mirror_hash as mirrorHash, c.seq, c.text, 'force' as reason
+        SELECT c.mirror_hash as mirrorHash, c.seq, c.text,
+          (SELECT d.title FROM documents d WHERE d.mirror_hash = c.mirror_hash AND d.active = 1 LIMIT 1) as title,
+          'force' as reason
         FROM content_chunks c
         WHERE EXISTS (
           SELECT 1 FROM documents d
@@ -378,7 +382,9 @@ function getActiveChunks(
         LIMIT ?
       `
       : `
-        SELECT c.mirror_hash as mirrorHash, c.seq, c.text, 'force' as reason
+        SELECT c.mirror_hash as mirrorHash, c.seq, c.text,
+          (SELECT d.title FROM documents d WHERE d.mirror_hash = c.mirror_hash AND d.active = 1 LIMIT 1) as title,
+          'force' as reason
         FROM content_chunks c
         WHERE EXISTS (
           SELECT 1 FROM documents d

package/src/cli/commands/models/pull.ts CHANGED Viewed

@@ -18,6 +18,8 @@ import type { DownloadProgress, ModelType } from '../../../llm/types';
 export interface ModelsPullOptions {
   /** Override config path */
   configPath?: string;
+  /** Override config object (takes precedence over configPath) */
+  config?: import('../../../config/types').Config;
   /** Pull all models */
   all?: boolean;
   /** Pull embedding model */
@@ -81,10 +83,13 @@ function getTypesToPull(options: ModelsPullOptions): ModelType[] {
 export async function modelsPull(
   options: ModelsPullOptions = {}
 ): Promise<ModelsPullResult> {
-  // Load config (use defaults if not initialized)
-  const { createDefaultConfig } = await import('../../../config');
-  const configResult = await loadConfig(options.configPath);
-  const config = configResult.ok ? configResult.value : createDefaultConfig();
+  // Use provided config, or load from disk (use defaults if not initialized)
+  let config = options.config;
+  if (!config) {
+    const { createDefaultConfig } = await import('../../../config');
+    const configResult = await loadConfig(options.configPath);
+    config = configResult.ok ? configResult.value : createDefaultConfig();
+  }
   const preset = getActivePreset(config);
   const cache = new ModelCache(getModelsCachePath());

package/src/cli/commands/serve.ts ADDED Viewed

@@ -0,0 +1,19 @@
+/**
+ * gno serve command implementation.
+ * Start web UI server.
+ *
+ * @module src/cli/commands/serve
+ */
+export type { ServeOptions, ServeResult } from '../../serve';
+/**
+ * Execute gno serve command.
+ * Server runs until SIGINT/SIGTERM.
+ */
+export async function serve(
+  options: import('../../serve').ServeOptions = {}
+): Promise<import('../../serve').ServeResult> {
+  const { startServer } = await import('../../serve');
+  return startServer(options);
+}

package/src/cli/commands/vsearch.ts CHANGED Viewed

@@ -7,6 +7,7 @@
 import { LlmAdapter } from '../../llm/nodeLlamaCpp/adapter';
 import { getActivePreset } from '../../llm/registry';
+import { formatQueryForEmbedding } from '../../pipeline/contextual';
 import type { SearchOptions, SearchResults } from '../../pipeline/types';
 import {
   searchVectorWithEmbedding,
@@ -86,8 +87,10 @@ export async function vsearch(
     const embedPort = embedResult.value;
     try {
-      // Embed query (also determines dimensions - avoids double embed)
-      const queryEmbedResult = await embedPort.embed(query);
+      // Embed query with contextual formatting (also determines dimensions)
+      const queryEmbedResult = await embedPort.embed(
+        formatQueryForEmbedding(query)
+      );
       if (!queryEmbedResult.ok) {
         return { success: false, error: queryEmbedResult.error.message };
       }

package/src/cli/program.ts CHANGED Viewed

@@ -149,6 +149,7 @@ export function createProgram(): Command {
   wireRetrievalCommands(program);
   wireMcpCommand(program);
   wireSkillCommands(program);
+  wireServeCommand(program);
   // Add docs/support links to help footer
   program.addHelpText(
@@ -1328,3 +1329,30 @@ function wireSkillCommands(program: Command): void {
       });
     });
 }
+// ─────────────────────────────────────────────────────────────────────────────
+// Serve Command (web UI)
+// ─────────────────────────────────────────────────────────────────────────────
+function wireServeCommand(program: Command): void {
+  program
+    .command('serve')
+    .description('Start web UI server')
+    .option('-p, --port <num>', 'port to listen on', '3000')
+    .action(async (cmdOpts: Record<string, unknown>) => {
+      const globals = getGlobals();
+      const port = parsePositiveInt('port', cmdOpts.port);
+      const { serve } = await import('./commands/serve.js');
+      const result = await serve({
+        port,
+        configPath: globals.config,
+        index: globals.index,
+      });
+      if (!result.success) {
+        throw new CliError('RUNTIME', result.error ?? 'Server failed to start');
+      }
+      // Server runs until SIGINT/SIGTERM - no output needed here
+    });
+}

package/src/config/types.ts CHANGED Viewed

@@ -32,11 +32,16 @@ export const DEFAULT_EXCLUDES: readonly string[] = [
 ];
 /** Valid FTS tokenizer options */
-export const FTS_TOKENIZERS = ['unicode61', 'porter', 'trigram'] as const;
+export const FTS_TOKENIZERS = [
+  'unicode61',
+  'porter',
+  'trigram',
+  'snowball english',
+] as const;
 export type FtsTokenizer = (typeof FTS_TOKENIZERS)[number];
-/** Default FTS tokenizer */
-export const DEFAULT_FTS_TOKENIZER: FtsTokenizer = 'unicode61';
+/** Default FTS tokenizer - snowball english for multilingual stemming */
+export const DEFAULT_FTS_TOKENIZER: FtsTokenizer = 'snowball english';
 /**
  * BCP-47 language tag pattern (simplified, case-insensitive).
@@ -173,7 +178,7 @@ export const DEFAULT_MODEL_PRESETS: ModelPreset[] = [
     name: 'Slim (Fast, ~1GB)',
     embed: 'hf:gpustack/bge-m3-GGUF/bge-m3-Q4_K_M.gguf',
     rerank:
-      'hf:gpustack/bge-reranker-v2-m3-GGUF/bge-reranker-v2-m3-Q4_K_M.gguf',
+      'hf:ggml-org/Qwen3-Reranker-0.6B-Q8_0-GGUF/qwen3-reranker-0.6b-q8_0.gguf',
     gen: 'hf:unsloth/Qwen3-1.7B-GGUF/Qwen3-1.7B-Q4_K_M.gguf',
   },
   {
@@ -181,7 +186,7 @@ export const DEFAULT_MODEL_PRESETS: ModelPreset[] = [
     name: 'Balanced (Default, ~2GB)',
     embed: 'hf:gpustack/bge-m3-GGUF/bge-m3-Q4_K_M.gguf',
     rerank:
-      'hf:gpustack/bge-reranker-v2-m3-GGUF/bge-reranker-v2-m3-Q4_K_M.gguf',
+      'hf:ggml-org/Qwen3-Reranker-0.6B-Q8_0-GGUF/qwen3-reranker-0.6b-q8_0.gguf',
     gen: 'hf:ggml-org/SmolLM3-3B-GGUF/SmolLM3-Q4_K_M.gguf',
   },
   {
@@ -189,7 +194,7 @@ export const DEFAULT_MODEL_PRESETS: ModelPreset[] = [
     name: 'Quality (Best Answers, ~2.5GB)',
     embed: 'hf:gpustack/bge-m3-GGUF/bge-m3-Q4_K_M.gguf',
     rerank:
-      'hf:gpustack/bge-reranker-v2-m3-GGUF/bge-reranker-v2-m3-Q4_K_M.gguf',
+      'hf:ggml-org/Qwen3-Reranker-0.6B-Q8_0-GGUF/qwen3-reranker-0.6b-q8_0.gguf',
     gen: 'hf:unsloth/Qwen3-4B-Instruct-2507-GGUF/Qwen3-4B-Instruct-2507-Q4_K_M.gguf',
   },
 ];

package/src/llm/registry.ts CHANGED Viewed

@@ -19,7 +19,9 @@ import type { ModelType } from './types';
 export function getModelConfig(config: Config): ModelConfig {
   return {
     activePreset: config.models?.activePreset ?? 'balanced',
-    presets: config.models?.presets ?? DEFAULT_MODEL_PRESETS,
+    presets: config.models?.presets?.length
+      ? config.models.presets
+      : DEFAULT_MODEL_PRESETS,
     loadTimeout: config.models?.loadTimeout ?? 60_000,
     inferenceTimeout: config.models?.inferenceTimeout ?? 30_000,
     warmModelTtl: config.models?.warmModelTtl ?? 300_000,

package/src/mcp/tools/vsearch.ts CHANGED Viewed

@@ -8,6 +8,7 @@ import { join as pathJoin } from 'node:path';
 import { parseUri } from '../../app/constants';
 import { LlmAdapter } from '../../llm/nodeLlamaCpp/adapter';
 import { getActivePreset } from '../../llm/registry';
+import { formatQueryForEmbedding } from '../../pipeline/contextual';
 import type { SearchResult, SearchResults } from '../../pipeline/types';
 import {
   searchVectorWithEmbedding,
@@ -121,8 +122,10 @@ export function handleVsearch(
       const embedPort = embedResult.value;
       try {
-        // Embed query
-        const queryEmbedResult = await embedPort.embed(args.query);
+        // Embed query with contextual formatting
+        const queryEmbedResult = await embedPort.embed(
+          formatQueryForEmbedding(args.query)
+        );
         if (!queryEmbedResult.ok) {
           throw new Error(queryEmbedResult.error.message);
         }