npm - @gmickel/gno - Versions diffs - 0.3.5 → 0.5.0 - Mend

@gmickel/gno 0.3.5 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (71) hide show

package/README.md +74 -7
package/package.json +30 -1
package/src/cli/commands/ask.ts +12 -187
package/src/cli/commands/embed.ts +10 -4
package/src/cli/commands/models/pull.ts +9 -4
package/src/cli/commands/serve.ts +19 -0
package/src/cli/commands/vsearch.ts +5 -2
package/src/cli/program.ts +28 -0
package/src/config/types.ts +11 -6
package/src/llm/registry.ts +3 -1
package/src/mcp/tools/vsearch.ts +5 -2
package/src/pipeline/answer.ts +224 -0
package/src/pipeline/contextual.ts +57 -0
package/src/pipeline/expansion.ts +49 -31
package/src/pipeline/explain.ts +11 -3
package/src/pipeline/fusion.ts +20 -9
package/src/pipeline/hybrid.ts +57 -40
package/src/pipeline/index.ts +7 -0
package/src/pipeline/rerank.ts +55 -27
package/src/pipeline/types.ts +0 -3
package/src/pipeline/vsearch.ts +3 -2
package/src/serve/CLAUDE.md +91 -0
package/src/serve/bunfig.toml +2 -0
package/src/serve/context.ts +181 -0
package/src/serve/index.ts +7 -0
package/src/serve/public/app.tsx +56 -0
package/src/serve/public/components/ai-elements/code-block.tsx +176 -0
package/src/serve/public/components/ai-elements/conversation.tsx +98 -0
package/src/serve/public/components/ai-elements/inline-citation.tsx +285 -0
package/src/serve/public/components/ai-elements/loader.tsx +96 -0
package/src/serve/public/components/ai-elements/message.tsx +443 -0
package/src/serve/public/components/ai-elements/prompt-input.tsx +1421 -0
package/src/serve/public/components/ai-elements/sources.tsx +75 -0
package/src/serve/public/components/ai-elements/suggestion.tsx +51 -0
package/src/serve/public/components/preset-selector.tsx +403 -0
package/src/serve/public/components/ui/badge.tsx +46 -0
package/src/serve/public/components/ui/button-group.tsx +82 -0
package/src/serve/public/components/ui/button.tsx +62 -0
package/src/serve/public/components/ui/card.tsx +92 -0
package/src/serve/public/components/ui/carousel.tsx +244 -0
package/src/serve/public/components/ui/collapsible.tsx +31 -0
package/src/serve/public/components/ui/command.tsx +181 -0
package/src/serve/public/components/ui/dialog.tsx +141 -0
package/src/serve/public/components/ui/dropdown-menu.tsx +255 -0
package/src/serve/public/components/ui/hover-card.tsx +42 -0
package/src/serve/public/components/ui/input-group.tsx +167 -0
package/src/serve/public/components/ui/input.tsx +21 -0
package/src/serve/public/components/ui/progress.tsx +28 -0
package/src/serve/public/components/ui/scroll-area.tsx +56 -0
package/src/serve/public/components/ui/select.tsx +188 -0
package/src/serve/public/components/ui/separator.tsx +26 -0
package/src/serve/public/components/ui/table.tsx +114 -0
package/src/serve/public/components/ui/textarea.tsx +18 -0
package/src/serve/public/components/ui/tooltip.tsx +59 -0
package/src/serve/public/globals.css +226 -0
package/src/serve/public/hooks/use-api.ts +112 -0
package/src/serve/public/index.html +13 -0
package/src/serve/public/pages/Ask.tsx +442 -0
package/src/serve/public/pages/Browse.tsx +270 -0
package/src/serve/public/pages/Dashboard.tsx +202 -0
package/src/serve/public/pages/DocView.tsx +302 -0
package/src/serve/public/pages/Search.tsx +335 -0
package/src/serve/routes/api.ts +763 -0
package/src/serve/server.ts +249 -0
package/src/store/migrations/002-documents-fts.ts +40 -0
package/src/store/migrations/index.ts +2 -1
package/src/store/sqlite/adapter.ts +216 -33
package/src/store/sqlite/fts5-snowball.ts +144 -0
package/src/store/types.ts +33 -3
package/src/store/vector/stats.ts +3 -0
package/src/store/vector/types.ts +1 -0

package/src/pipeline/answer.ts ADDED Viewed

@@ -0,0 +1,224 @@
+/**
+ * Grounded answer generation.
+ * Shared between CLI ask command and web API.
+ *
+ * @module src/pipeline/answer
+ */
+import type { GenerationPort } from '../llm/types';
+import type { StorePort } from '../store/types';
+import type { Citation, SearchResult } from './types';
+// ─────────────────────────────────────────────────────────────────────────────
+// Constants
+// ─────────────────────────────────────────────────────────────────────────────
+const ANSWER_PROMPT = `You are answering a question using ONLY the provided context blocks.
+Rules you MUST follow:
+1) Use ONLY facts stated in the context blocks. Do NOT use outside knowledge.
+2) Every factual statement must include an inline citation like [1] or [2] referring to a context block.
+3) If the context does not contain enough information to answer, reply EXACTLY:
+   "I don't have enough information in the provided sources to answer this question."
+4) Do not cite sources you did not use. Do not invent citation numbers.
+Question: {query}
+Context blocks:
+{context}
+Write a concise answer (1-3 paragraphs).`;
+/** Abstention message when LLM cannot ground answer */
+export const ABSTENTION_MESSAGE =
+  "I don't have enough information in the provided sources to answer this question.";
+/** Max characters per document (~8K tokens) */
+const MAX_DOC_CHARS = 32_000;
+/** Max number of sources - fewer docs but full content */
+const MAX_CONTEXT_SOURCES = 3;
+/** Fallback snippet limit when full content unavailable */
+const MAX_SNIPPET_CHARS = 1500;
+// ─────────────────────────────────────────────────────────────────────────────
+// Citation Processing
+// ─────────────────────────────────────────────────────────────────────────────
+/**
+ * Extract VALID citation numbers from answer text.
+ * Only returns numbers in range [1, maxCitation].
+ */
+export function extractValidCitationNumbers(
+  answer: string,
+  maxCitation: number
+): number[] {
+  const nums = new Set<number>();
+  const re = /\[(\d+)\]/g;
+  const matches = answer.matchAll(re);
+  for (const match of matches) {
+    const n = Number(match[1]);
+    if (Number.isInteger(n) && n >= 1 && n <= maxCitation) {
+      nums.add(n);
+    }
+  }
+  return [...nums].sort((a, b) => a - b);
+}
+/**
+ * Filter citations to only those actually referenced in the answer.
+ */
+export function filterCitationsByUse(
+  citations: Citation[],
+  validUsedNumbers: number[]
+): Citation[] {
+  const usedSet = new Set(validUsedNumbers);
+  return citations.filter((_, idx) => usedSet.has(idx + 1));
+}
+/**
+ * Renumber citations in answer text to match filtered citations.
+ * E.g., if answer uses [2] and [5], renumber to [1] and [2].
+ * Invalid citations (not in validUsedNumbers) are removed.
+ */
+export function renumberAnswerCitations(
+  answer: string,
+  validUsedNumbers: number[]
+): string {
+  const mapping = new Map<number, number>();
+  for (let i = 0; i < validUsedNumbers.length; i++) {
+    const oldNum = validUsedNumbers[i];
+    if (oldNum !== undefined) {
+      mapping.set(oldNum, i + 1);
+    }
+  }
+  const re = /\[(\d+)\]/g;
+  const replaced = answer.replace(re, (_match, numStr: string) => {
+    const oldNum = Number(numStr);
+    const newNum = mapping.get(oldNum);
+    return newNum !== undefined ? `[${newNum}]` : '';
+  });
+  return replaced.replace(/ {2,}/g, ' ').trim();
+}
+// ─────────────────────────────────────────────────────────────────────────────
+// Answer Generation
+// ─────────────────────────────────────────────────────────────────────────────
+export interface AnswerGenerationResult {
+  answer: string;
+  citations: Citation[];
+}
+export interface AnswerGenerationDeps {
+  genPort: GenerationPort;
+  store: StorePort | null;
+}
+/**
+ * Generate a grounded answer from search results.
+ * Returns null if no valid context or generation fails.
+ *
+ * When store is provided, fetches full document content for better context.
+ * Falls back to snippets if store unavailable or content fetch fails.
+ */
+// biome-ignore lint/complexity/noExcessiveCognitiveComplexity: sequential content processing with fallbacks
+export async function generateGroundedAnswer(
+  deps: AnswerGenerationDeps,
+  query: string,
+  results: SearchResult[],
+  maxTokens: number
+): Promise<AnswerGenerationResult | null> {
+  const { genPort, store } = deps;
+  const contextParts: string[] = [];
+  const citations: Citation[] = [];
+  let citationIndex = 0;
+  for (const r of results.slice(0, MAX_CONTEXT_SOURCES)) {
+    let content: string | null = null;
+    let usedFullContent = false;
+    // Try to fetch full document content if store available
+    if (store && r.conversion?.mirrorHash) {
+      const contentResult = await store.getContent(r.conversion.mirrorHash);
+      if (contentResult.ok && contentResult.value) {
+        content = contentResult.value;
+        usedFullContent = true;
+        // Truncate to max doc chars
+        if (content.length > MAX_DOC_CHARS) {
+          content = `${content.slice(0, MAX_DOC_CHARS)}\n\n[... truncated ...]`;
+        }
+      }
+    }
+    // Fallback to snippet if full content unavailable
+    if (!content) {
+      if (!r.snippet || r.snippet.trim().length === 0) {
+        continue;
+      }
+      content =
+        r.snippet.length > MAX_SNIPPET_CHARS
+          ? `${r.snippet.slice(0, MAX_SNIPPET_CHARS)}...`
+          : r.snippet;
+    }
+    citationIndex += 1;
+    contextParts.push(`[${citationIndex}] ${content}`);
+    // Clear line range when citing full content (not a specific snippet)
+    citations.push({
+      docid: r.docid,
+      uri: r.uri,
+      startLine: usedFullContent ? undefined : r.snippetRange?.startLine,
+      endLine: usedFullContent ? undefined : r.snippetRange?.endLine,
+    });
+  }
+  if (contextParts.length === 0) {
+    return null;
+  }
+  const prompt = ANSWER_PROMPT.replace('{query}', query).replace(
+    '{context}',
+    contextParts.join('\n\n')
+  );
+  const result = await genPort.generate(prompt, {
+    temperature: 0,
+    maxTokens,
+  });
+  if (!result.ok) {
+    return null;
+  }
+  return { answer: result.value, citations };
+}
+/**
+ * Process raw answer result into final answer with cleaned citations.
+ * Extracts valid citations, filters unused ones, and renumbers.
+ */
+export function processAnswerResult(rawResult: AnswerGenerationResult): {
+  answer: string;
+  citations: Citation[];
+} {
+  const maxCitation = rawResult.citations.length;
+  const validUsedNums = extractValidCitationNumbers(
+    rawResult.answer,
+    maxCitation
+  );
+  const filteredCitations = filterCitationsByUse(
+    rawResult.citations,
+    validUsedNums
+  );
+  if (validUsedNums.length === 0 || filteredCitations.length === 0) {
+    return { answer: ABSTENTION_MESSAGE, citations: [] };
+  }
+  const answer = renumberAnswerCitations(rawResult.answer, validUsedNums);
+  return { answer, citations: filteredCitations };
+}

package/src/pipeline/contextual.ts ADDED Viewed

@@ -0,0 +1,57 @@
+/**
+ * Contextual embedding formatting.
+ * Prepends document context to chunks for better retrieval.
+ *
+ * Based on Anthropic Contextual Retrieval research:
+ * - Query relevance jumps from 0.1 to 0.92 for context-dependent queries
+ * - 49% reduction in retrieval failure with contextual embeddings + BM25
+ * - 67% reduction with reranking added
+ *
+ * @module src/pipeline/contextual
+ */
+// Top-level regex for performance
+const HEADING_REGEX = /^##?\s+(.+)$/m;
+const SUBHEADING_REGEX = /^##\s+(.+)$/m;
+const EXT_REGEX = /\.\w+$/;
+/**
+ * Format document text for embedding.
+ * Prepends title for contextual retrieval.
+ */
+export function formatDocForEmbedding(text: string, title?: string): string {
+  const safeTitle = title?.trim() || 'none';
+  return `title: ${safeTitle} | text: ${text}`;
+}
+/**
+ * Format query for embedding.
+ * Uses task-prefixed format for asymmetric retrieval.
+ */
+export function formatQueryForEmbedding(query: string): string {
+  return `task: search result | query: ${query}`;
+}
+/**
+ * Extract title from markdown content or filename.
+ * Prefers first heading, falls back to filename without extension.
+ */
+export function extractTitle(content: string, filename: string): string {
+  // Try to find first heading (# or ##)
+  const match = content.match(HEADING_REGEX);
+  if (match?.[1]) {
+    const title = match[1].trim();
+    // Skip generic titles like "Notes" and try next heading
+    if (title.toLowerCase() === 'notes') {
+      const nextMatch = content.match(SUBHEADING_REGEX);
+      if (nextMatch?.[1]) {
+        return nextMatch[1].trim();
+      }
+    }
+    return title;
+  }
+  // Fall back to filename without extension
+  const basename = filename.split('/').pop() ?? filename;
+  return basename.replace(EXT_REGEX, '');
+}

package/src/pipeline/expansion.ts CHANGED Viewed

@@ -15,9 +15,10 @@ import type { ExpansionResult } from './types';
 // Constants
 // ─────────────────────────────────────────────────────────────────────────────
-const EXPANSION_PROMPT_VERSION = 'v1';
+const EXPANSION_PROMPT_VERSION = 'v2';
 const DEFAULT_TIMEOUT_MS = 5000;
-const JSON_EXTRACT_PATTERN = /\{[\s\S]*\}/;
+// Non-greedy to avoid matching from first { to last } across multiple objects
+const JSON_EXTRACT_PATTERN = /\{[\s\S]*?\}/;
 // ─────────────────────────────────────────────────────────────────────────────
 // Cache Key Generation
@@ -40,45 +41,53 @@ export function generateCacheKey(
 // Prompt Templates
 // ─────────────────────────────────────────────────────────────────────────────
-const EXPANSION_PROMPT_EN = `You are a query expansion assistant. Given a search query, generate alternative phrasings to improve search results.
+const EXPANSION_PROMPT_EN = `You expand search queries for a hybrid search system.
-Input query: "{query}"
+Query: "{query}"
-Generate a JSON object with:
-- "lexicalQueries": array of 2-3 keyword-based variations (for BM25 search)
-- "vectorQueries": array of 2-3 semantic rephrasing (for embedding search)
-- "hyde": a short hypothetical document passage that would answer the query (optional)
+Generate JSON with:
+1. "lexicalQueries": 2-3 keyword variations using synonyms (for BM25)
+2. "vectorQueries": 2-3 semantic rephrasings capturing intent (for embeddings)
+3. "hyde": A 50-100 word passage that directly answers the query, as if excerpted from a relevant document
-Respond ONLY with valid JSON, no explanation.
+Rules:
+- Keep proper nouns exactly as written
+- Be concise - each variation 3-8 words
+- HyDE should read like actual documentation, not a question
-Example:
-{
-  "lexicalQueries": ["deployment process", "how to deploy", "deploying application"],
-  "vectorQueries": ["steps to release software to production", "guide for application deployment"],
-  "hyde": "To deploy the application, first run the build command, then push to the staging environment..."
-}`;
+Respond with valid JSON only.`;
-const EXPANSION_PROMPT_DE = `Du bist ein Query-Erweiterungs-Assistent. Generiere alternative Formulierungen für die Suchanfrage.
+const EXPANSION_PROMPT_DE = `Du erweiterst Suchanfragen für ein hybrides Suchsystem.
-Suchanfrage: "{query}"
+Anfrage: "{query}"
-Generiere ein JSON-Objekt mit:
-- "lexicalQueries": Array mit 2-3 Keyword-Variationen (für BM25-Suche)
-- "vectorQueries": Array mit 2-3 semantischen Umformulierungen (für Vektor-Suche)
-- "hyde": Ein kurzer hypothetischer Dokumentenausschnitt, der die Anfrage beantworten würde (optional)
+Generiere JSON mit:
+1. "lexicalQueries": 2-3 Keyword-Variationen mit Synonymen (für BM25)
+2. "vectorQueries": 2-3 semantische Umformulierungen (für Embeddings)
+3. "hyde": Ein 50-100 Wort Abschnitt, der die Anfrage direkt beantwortet, wie aus einem relevanten Dokument
-Antworte NUR mit validem JSON, keine Erklärung.`;
+Regeln:
+- Eigennamen exakt beibehalten
+- Kurz halten - jede Variation 3-8 Wörter
+- HyDE soll wie echte Dokumentation klingen, nicht wie eine Frage
-const EXPANSION_PROMPT_MULTILINGUAL = `You are a query expansion assistant. Generate alternative phrasings for the search query in the same language as the query.
+Antworte nur mit validem JSON.`;
-Input query: "{query}"
+const EXPANSION_PROMPT_MULTILINGUAL = `You expand search queries for a hybrid search system. Respond in the same language as the query.
-Generate a JSON object with:
-- "lexicalQueries": array of 2-3 keyword-based variations
-- "vectorQueries": array of 2-3 semantic rephrasing
-- "hyde": a short hypothetical document passage (optional)
+Query: "{query}"
-Respond ONLY with valid JSON.`;
+Generate JSON with:
+1. "lexicalQueries": 2-3 keyword variations using synonyms (for BM25)
+2. "vectorQueries": 2-3 semantic rephrasings capturing intent (for embeddings)
+3. "hyde": A 50-100 word passage that directly answers the query, as if excerpted from a relevant document
+Rules:
+- Keep proper nouns exactly as written
+- Be concise - each variation 3-8 words
+- HyDE should read like actual documentation, not a question
+Respond with valid JSON only.`;
 /**
  * Get prompt template for language.
@@ -178,9 +187,10 @@ export async function expandQuery(
   const template = getPromptTemplate(options.lang);
   const prompt = template.replace('{query}', query);
-  // Run with timeout
+  // Run with timeout (clear timer to avoid resource leak)
+  let timeoutId: ReturnType<typeof setTimeout> | undefined;
   const timeoutPromise = new Promise<null>((resolve) => {
-    setTimeout(() => resolve(null), timeout);
+    timeoutId = setTimeout(() => resolve(null), timeout);
   });
   try {
@@ -193,6 +203,11 @@ export async function expandQuery(
       timeoutPromise,
     ]);
+    // Clear timeout if generation completed first
+    if (timeoutId) {
+      clearTimeout(timeoutId);
+    }
     // Timeout
     if (result === null) {
       return ok(null);
@@ -207,6 +222,9 @@ export async function expandQuery(
     const parsed = parseExpansionResult(result.value);
     return ok(parsed);
   } catch {
+    if (timeoutId) {
+      clearTimeout(timeoutId);
+    }
     return ok(null); // Graceful degradation
   }
 }

package/src/pipeline/explain.ts CHANGED Viewed

@@ -49,15 +49,23 @@ export function formatResultExplain(results: ExplainResult[]): string {
 // Explain Line Builders
 // ─────────────────────────────────────────────────────────────────────────────
+export type ExpansionStatus =
+  | 'disabled' // User chose --no-expand
+  | 'skipped_strong' // Strong BM25 signal detected
+  | 'attempted'; // Expansion was attempted (may have succeeded or timed out)
 export function explainExpansion(
-  enabled: boolean,
+  status: ExpansionStatus,
   result: ExpansionResult | null
 ): ExplainLine {
-  if (!enabled) {
+  if (status === 'disabled') {
     return { stage: 'expansion', message: 'disabled' };
   }
+  if (status === 'skipped_strong') {
+    return { stage: 'expansion', message: 'skipped (strong BM25)' };
+  }
   if (!result) {
-    return { stage: 'expansion', message: 'skipped (strong BM25 or timeout)' };
+    return { stage: 'expansion', message: 'skipped (timeout)' };
   }
   const lex = result.lexicalQueries.length;
   const sem = result.vectorQueries.length;

package/src/pipeline/fusion.ts CHANGED Viewed

@@ -64,9 +64,12 @@ export function rrfFuse(
   );
   // Process BM25 sources
+  // Original query gets 2x weight to prevent dilution by expansion variants
   for (const input of bm25Inputs) {
     const weight =
-      input.source === 'bm25' ? config.bm25Weight : config.bm25Weight * 0.5;
+      input.source === 'bm25'
+        ? config.bm25Weight * 2.0
+        : config.bm25Weight * 0.5;
     for (const result of input.results) {
       const key = `${result.mirrorHash}:${result.seq}`;
@@ -98,8 +101,9 @@ export function rrfFuse(
   }
   // Process vector sources
+  // Original query gets 2x weight to prevent dilution by expansion variants
   for (const input of vectorInputs) {
-    let weight = config.vecWeight;
+    let weight = config.vecWeight * 2.0; // Default for original vector
     if (input.source === 'vector_variant') {
       weight = config.vecWeight * 0.5;
     } else if (input.source === 'hyde') {
@@ -135,16 +139,23 @@ export function rrfFuse(
     }
   }
-  // Apply top-rank bonus
+  // Apply tiered top-rank bonus
+  // Rewards documents ranking highly in ANY list (not requiring both)
   for (const candidate of candidates.values()) {
-    if (
-      candidate.bm25Rank !== null &&
-      candidate.bm25Rank <= config.topRankThreshold &&
-      candidate.vecRank !== null &&
-      candidate.vecRank <= config.topRankThreshold
-    ) {
+    const bm25Rank = candidate.bm25Rank;
+    const vecRank = candidate.vecRank;
+    // Tier 1: #1 in any list
+    if (bm25Rank === 1 || vecRank === 1) {
       candidate.fusionScore += config.topRankBonus;
     }
+    // Tier 2: Top-3 in any list (but not #1)
+    else if (
+      (bm25Rank !== null && bm25Rank <= config.topRankThreshold) ||
+      (vecRank !== null && vecRank <= config.topRankThreshold)
+    ) {
+      candidate.fusionScore += config.topRankBonus * 0.4; // 40% of tier 1
+    }
   }
   // Sort by fusion score (descending), then by mirrorHash:seq for determinism