npm - @gmickel/gno - Versions diffs - 0.40.1 → 0.41.0 - Mend

@gmickel/gno 0.40.1 → 0.41.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (20) hide show

package/README.md +54 -32
package/package.json +1 -1
package/src/cli/commands/embed.ts +35 -19
package/src/cli/commands/vsearch.ts +1 -1
package/src/embed/backlog.ts +28 -21
package/src/embed/batch.ts +126 -0
package/src/llm/embedding-compatibility.ts +82 -0
package/src/mcp/tools/vsearch.ts +1 -1
package/src/pipeline/contextual.ts +19 -2
package/src/pipeline/hybrid.ts +66 -24
package/src/pipeline/vsearch.ts +3 -1
package/src/sdk/client.ts +1 -1
package/src/sdk/embed.ts +31 -14
package/src/serve/public/components/FrontmatterDisplay.tsx +4 -2
package/src/serve/public/components/RelatedNotesSidebar.tsx +2 -2
package/src/serve/public/components/editor/MarkdownPreview.tsx +57 -2
package/src/serve/public/pages/DocView.tsx +46 -12
package/src/serve/public/pages/DocumentEditor.tsx +1 -1
package/src/serve/routes/api.ts +114 -0
package/src/serve/server.ts +10 -0

package/README.md CHANGED Viewed

@@ -87,7 +87,7 @@ gno daemon
 ## What's New
-> Latest release: [v0.39.1](./CHANGELOG.md#0391---2026-04-06)
+> Latest release: [v0.40.2](./CHANGELOG.md#0402---2026-04-06)
 > Full release history: [CHANGELOG.md](./CHANGELOG.md)
 - **Retrieval Quality Upgrade**: stronger BM25 lexical handling, code-aware chunking, terminal result hyperlinks, and per-collection model overrides
@@ -108,6 +108,26 @@ gno embed
 That regenerates embeddings for the new default model. Old vectors are kept
 until you explicitly clear stale embeddings.
+If the release also changes the embedding formatting/profile behavior for your
+active model, prefer one of these stronger migration paths:
+```bash
+gno embed --force
+```
+or per collection:
+```bash
+gno collection clear-embeddings my-collection --all
+gno embed my-collection
+```
+Model guides:
+- [Code Embeddings](./docs/guides/code-embeddings.md)
+- [Per-Collection Models](./docs/guides/per-collection-models.md)
+- [Bring Your Own Models](./docs/guides/bring-your-own-models.md)
 ### Fine-Tuned Model Quick Use
 ```yaml
@@ -672,22 +692,23 @@ graph TD
 Models auto-download on first use to `~/.cache/gno/models/`. For deterministic startup, set `GNO_NO_AUTO_DOWNLOAD=1` and use `gno models pull` explicitly. Alternatively, offload to a GPU server on your network using HTTP backends.
-| Model                | Purpose                               | Size         |
-| :------------------- | :------------------------------------ | :----------- |
-| Qwen3-Embedding-0.6B | Embeddings (multilingual)             | ~640MB       |
-| Qwen3-Reranker-0.6B  | Cross-encoder reranking (32K context) | ~700MB       |
-| Qwen/SmolLM          | Query expansion + AI answers          | ~600MB-1.2GB |
+| Model                  | Purpose                               | Size         |
+| :--------------------- | :------------------------------------ | :----------- |
+| Qwen3-Embedding-0.6B   | Embeddings (multilingual)             | ~640MB       |
+| Qwen3-Reranker-0.6B    | Cross-encoder reranking (32K context) | ~700MB       |
+| Qwen3 / Qwen2.5 family | Query expansion + AI answers          | ~600MB-2.5GB |
 ### Model Presets
-| Preset     | Disk   | Best For                     |
-| :--------- | :----- | :--------------------------- |
-| `slim`     | ~1GB   | Fast, good quality (default) |
-| `balanced` | ~2GB   | Slightly larger model        |
-| `quality`  | ~2.5GB | Best answers                 |
+| Preset       | Disk   | Best For                                                |
+| :----------- | :----- | :------------------------------------------------------ |
+| `slim-tuned` | ~1GB   | Current default, tuned retrieval in a compact footprint |
+| `slim`       | ~1GB   | Fast, good quality                                      |
+| `balanced`   | ~2GB   | Slightly larger model                                   |
+| `quality`    | ~2.5GB | Best answers                                            |
 ```bash
-gno models use slim
+gno models use slim-tuned
 gno models pull --all  # Optional: pre-download models (auto-downloads on first use)
 ```
@@ -720,7 +741,7 @@ models:
   presets:
     - id: remote-gpu
       name: Remote GPU Server
-      embed: "http://192.168.1.100:8081/v1/embeddings#bge-m3"
+      embed: "http://192.168.1.100:8081/v1/embeddings#qwen3-embedding-0.6b"
       rerank: "http://192.168.1.100:8082/v1/completions#reranker"
       expand: "http://192.168.1.100:8083/v1/chat/completions#gno-expand"
       gen: "http://192.168.1.100:8083/v1/chat/completions#qwen3-4b"
@@ -730,6 +751,11 @@ Works with llama-server, Ollama, LocalAI, vLLM, or any OpenAI-compatible server.
 > **Configuration**: [Model Setup](https://gno.sh/docs/CONFIGURATION/)
+Remote/BYOM guides:
+- [Bring Your Own Models](./docs/guides/bring-your-own-models.md)
+- [Per-Collection Models](./docs/guides/per-collection-models.md)
 ---
 ## Architecture
@@ -801,33 +827,29 @@ If a model turns out to be better specifically for code, the intended user story
 That lets GNO stay sane by default while still giving power users a clean path to code-specialist retrieval.
-Current code-focused recommendation:
+More model docs:
-```yaml
-collections:
-  - name: gno-code
-    path: /Users/you/work/gno/src
-    pattern: "**/*.{ts,tsx,js,jsx,go,rs,py,swift,c}"
-    models:
-      embed: "hf:Qwen/Qwen3-Embedding-0.6B-GGUF/Qwen3-Embedding-0.6B-Q8_0.gguf"
-```
+- [Code Embeddings](./docs/guides/code-embeddings.md)
+- [Per-Collection Models](./docs/guides/per-collection-models.md)
+- [Bring Your Own Models](./docs/guides/bring-your-own-models.md)
-GNO treats that override like any other model URI:
+Current product stance:
-- auto-downloads on first use by default
-- manual-only if `GNO_NO_AUTO_DOWNLOAD=1`
-- offline-safe if the model is already cached
+- `Qwen3-Embedding-0.6B-GGUF` is already the global default embed model
+- you do **not** need a collection override just to get Qwen on code collections
+- use a collection override only when one collection should intentionally diverge from that default
-Why this is the current recommendation:
+Why Qwen is the current default:
-- matches `bge-m3` on the tiny canonical benchmark
+- matches or exceeds `bge-m3` on the tiny canonical benchmark
 - significantly beats `bge-m3` on the real GNO `src/serve` code slice
 - also beats `bge-m3` on a pinned public-OSS code slice
+- also beats `bge-m3` on the multilingual prose/docs benchmark lane
-Trade-off:
+Current trade-off:
 - Qwen is slower to embed than `bge-m3`
-- existing users upgrading to the new default may need to run `gno embed` again so vector and hybrid retrieval catch up
+- existing users upgrading or adopting a new embedding formatting profile may need to run `gno embed` again so stored vectors match the current formatter/runtime path
 ### General Multilingual Embedding Benchmark
@@ -841,8 +863,8 @@ bun run bench:general-embeddings --candidate qwen3-embedding-0.6b --write
 Current signal on the public multilingual FastAPI-docs fixture:
-- `bge-m3`: vector nDCG@10 `0.350`, hybrid nDCG@10 `0.642`
-- `Qwen3-Embedding-0.6B-GGUF`: vector nDCG@10 `0.859`, hybrid nDCG@10 `0.947`
+- `bge-m3`: vector nDCG@10 `0.3508`, hybrid nDCG@10 `0.6756`
+- `Qwen3-Embedding-0.6B-GGUF`: vector nDCG@10 `0.9891`, hybrid nDCG@10 `0.9891`
 Interpretation:

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@gmickel/gno",
-  "version": "0.40.1",
+  "version": "0.41.0",
   "description": "Local semantic search for your documents. Index Markdown, PDF, and Office files with hybrid BM25 + vector search.",
   "keywords": [
     "embeddings",

package/src/cli/commands/embed.ts CHANGED Viewed

@@ -17,6 +17,7 @@ import {
   isInitialized,
   loadConfig,
 } from "../../config";
+import { embedTextsWithRecovery } from "../../embed/batch";
 import { LlmAdapter } from "../../llm/nodeLlamaCpp/adapter";
 import { resolveDownloadPolicy } from "../../llm/policy";
 import { resolveModelUri } from "../../llm/registry";
@@ -153,8 +154,11 @@ async function processBatches(ctx: BatchContext): Promise<BatchResult> {
     }
     // Embed batch with contextual formatting (title prefix)
-    const batchEmbedResult = await ctx.embedPort.embedBatch(
-      batch.map((b) => formatDocForEmbedding(b.text, b.title ?? undefined))
+    const batchEmbedResult = await embedTextsWithRecovery(
+      ctx.embedPort,
+      batch.map((b) =>
+        formatDocForEmbedding(b.text, b.title ?? undefined, ctx.modelUri)
+      )
     );
     if (!batchEmbedResult.ok) {
       if (ctx.verbose) {
@@ -178,26 +182,38 @@ async function processBatches(ctx: BatchContext): Promise<BatchResult> {
       continue;
     }
-    // Validate batch/embedding count match
-    const embeddings = batchEmbedResult.value;
-    if (embeddings.length !== batch.length) {
+    if (ctx.verbose && batchEmbedResult.value.batchFailed) {
+      const titles = batch
+        .slice(0, 3)
+        .map((b) => b.title ?? b.mirrorHash.slice(0, 8))
+        .join(", ");
+      process.stderr.write(
+        `\n[embed] Batch fallback (${batch.length} chunks: ${titles}${batch.length > 3 ? "..." : ""}): ${batchEmbedResult.value.batchError ?? "unknown batch error"}\n`
+      );
+    }
+    const vectors: VectorRow[] = [];
+    for (const [idx, item] of batch.entries()) {
+      const embedding = batchEmbedResult.value.vectors[idx];
+      if (!embedding) {
+        errors += 1;
+        continue;
+      }
+      vectors.push({
+        mirrorHash: item.mirrorHash,
+        seq: item.seq,
+        model: ctx.modelUri,
+        embedding: new Float32Array(embedding),
+      });
+    }
+    if (vectors.length === 0) {
       if (ctx.verbose) {
-        process.stderr.write(
-          `\n[embed] Count mismatch: got ${embeddings.length}, expected ${batch.length}\n`
-        );
+        process.stderr.write("\n[embed] No recoverable embeddings in batch\n");
       }
-      errors += batch.length;
       continue;
     }
-    // Store vectors (embeddedAt set by DB)
-    const vectors: VectorRow[] = batch.map((b, idx) => ({
-      mirrorHash: b.mirrorHash,
-      seq: b.seq,
-      model: ctx.modelUri,
-      embedding: new Float32Array(embeddings[idx] as number[]),
-    }));
     const storeResult = await ctx.vectorIndex.upsertVectors(vectors);
     if (!storeResult.ok) {
       if (ctx.verbose) {
@@ -205,11 +221,11 @@ async function processBatches(ctx: BatchContext): Promise<BatchResult> {
           `\n[embed] Store failed: ${storeResult.error.message}\n`
         );
       }
-      errors += batch.length;
+      errors += vectors.length;
       continue;
     }
-    embedded += batch.length;
+    embedded += vectors.length;
     // Progress output
     if (ctx.showProgress) {

package/src/cli/commands/vsearch.ts CHANGED Viewed

@@ -97,7 +97,7 @@ export async function vsearch(
     try {
       // Embed query with contextual formatting (also determines dimensions)
       const queryEmbedResult = await embedPort.embed(
-        formatQueryForEmbedding(query)
+        formatQueryForEmbedding(query, embedPort.modelUri)
       );
       if (!queryEmbedResult.ok) {
         return { success: false, error: queryEmbedResult.error.message };

package/src/embed/backlog.ts CHANGED Viewed

@@ -16,6 +16,7 @@ import type {
 import { formatDocForEmbedding } from "../pipeline/contextual";
 import { err, ok } from "../store/types";
+import { embedTextsWithRecovery } from "./batch";
 // ─────────────────────────────────────────────────────────────────────────────
 // Types
@@ -85,9 +86,14 @@ export async function embedBacklog(
       }
       // Embed batch with contextual formatting (title prefix)
-      const embedResult = await embedPort.embedBatch(
+      const embedResult = await embedTextsWithRecovery(
+        embedPort,
         batch.map((b: BacklogItem) =>
-          formatDocForEmbedding(b.text, b.title ?? undefined)
+          formatDocForEmbedding(
+            b.text,
+            b.title ?? undefined,
+            embedPort.modelUri
+          )
         )
       );
@@ -96,28 +102,29 @@ export async function embedBacklog(
         continue;
       }
-      // Validate batch/embedding count match
-      const embeddings = embedResult.value;
-      if (embeddings.length !== batch.length) {
-        errors += batch.length;
-        continue;
+      const vectors: VectorRow[] = [];
+      for (const [idx, item] of batch.entries()) {
+        const embedding = embedResult.value.vectors[idx];
+        if (!embedding) {
+          errors += 1;
+          continue;
+        }
+        vectors.push({
+          mirrorHash: item.mirrorHash,
+          seq: item.seq,
+          model: modelUri,
+          embedding: new Float32Array(embedding),
+        });
       }
-      // Store vectors (embeddedAt set by DB)
-      const vectors: VectorRow[] = batch.map((b: BacklogItem, idx: number) => ({
-        mirrorHash: b.mirrorHash,
-        seq: b.seq,
-        model: modelUri,
-        embedding: new Float32Array(embeddings[idx] as number[]),
-      }));
-      const storeResult = await vectorIndex.upsertVectors(vectors);
-      if (!storeResult.ok) {
-        errors += batch.length;
-        continue;
+      if (vectors.length > 0) {
+        const storeResult = await vectorIndex.upsertVectors(vectors);
+        if (!storeResult.ok) {
+          errors += vectors.length;
+          continue;
+        }
+        embedded += vectors.length;
       }
-      embedded += batch.length;
     }
     // Sync vec index once at end if any vec0 writes failed

package/src/embed/batch.ts ADDED Viewed

@@ -0,0 +1,126 @@
+/**
+ * Shared embedding batch helpers.
+ *
+ * @module src/embed/batch
+ */
+import type { EmbeddingPort, LlmResult } from "../llm/types";
+import { getEmbeddingCompatibilityProfile } from "../llm/embedding-compatibility";
+import { inferenceFailedError } from "../llm/errors";
+export interface EmbedBatchRecoveryResult {
+  vectors: Array<number[] | null>;
+  batchFailed: boolean;
+  batchError?: string;
+  fallbackErrors: number;
+}
+function errorMessage(error: unknown): string {
+  if (
+    error &&
+    typeof error === "object" &&
+    "message" in error &&
+    typeof error.message === "string"
+  ) {
+    return error.message;
+  }
+  return String(error);
+}
+export async function embedTextsWithRecovery(
+  embedPort: EmbeddingPort,
+  texts: string[]
+): Promise<LlmResult<EmbedBatchRecoveryResult>> {
+  if (texts.length === 0) {
+    return {
+      ok: true,
+      value: {
+        vectors: [],
+        batchFailed: false,
+        fallbackErrors: 0,
+      },
+    };
+  }
+  const profile = getEmbeddingCompatibilityProfile(embedPort.modelUri);
+  if (profile.batchEmbeddingTrusted) {
+    const batchResult = await embedPort.embedBatch(texts);
+    if (batchResult.ok && batchResult.value.length === texts.length) {
+      return {
+        ok: true,
+        value: {
+          vectors: batchResult.value,
+          batchFailed: false,
+          fallbackErrors: 0,
+        },
+      };
+    }
+    const recovered = await recoverIndividually(embedPort, texts);
+    if (!recovered.ok) {
+      return recovered;
+    }
+    return {
+      ok: true,
+      value: {
+        ...recovered.value,
+        batchFailed: true,
+        batchError: batchResult.ok
+          ? `Embedding count mismatch: got ${batchResult.value.length}, expected ${texts.length}`
+          : batchResult.error.message,
+      },
+    };
+  }
+  const recovered = await recoverIndividually(embedPort, texts);
+  if (!recovered.ok) {
+    return recovered;
+  }
+  return {
+    ok: true,
+    value: {
+      ...recovered.value,
+      batchFailed: true,
+      batchError: "Batch embedding disabled for this compatibility profile",
+    },
+  };
+}
+async function recoverIndividually(
+  embedPort: EmbeddingPort,
+  texts: string[]
+): Promise<
+  LlmResult<Omit<EmbedBatchRecoveryResult, "batchFailed" | "batchError">>
+> {
+  try {
+    const vectors: Array<number[] | null> = [];
+    let fallbackErrors = 0;
+    for (const text of texts) {
+      const result = await embedPort.embed(text);
+      if (result.ok) {
+        vectors.push(result.value);
+      } else {
+        vectors.push(null);
+        fallbackErrors += 1;
+      }
+    }
+    return {
+      ok: true,
+      value: {
+        vectors,
+        fallbackErrors,
+      },
+    };
+  } catch (error) {
+    return {
+      ok: false,
+      error: inferenceFailedError(
+        embedPort.modelUri,
+        new Error(errorMessage(error))
+      ),
+    };
+  }
+}

package/src/llm/embedding-compatibility.ts ADDED Viewed

@@ -0,0 +1,82 @@
+/**
+ * Embedding compatibility profiles.
+ *
+ * Encodes model-specific formatting/runtime hints for embedding models without
+ * forcing every caller to special-case URIs inline.
+ *
+ * @module src/llm/embedding-compatibility
+ */
+export type EmbeddingQueryFormat = "contextual-task" | "qwen-instruct";
+export type EmbeddingDocumentFormat = "title-prefixed" | "raw-text";
+export interface EmbeddingCompatibilityProfile {
+  id: string;
+  queryFormat: EmbeddingQueryFormat;
+  documentFormat: EmbeddingDocumentFormat;
+  /**
+   * Whether embedBatch is trusted for this model in GNO's current native path.
+   * If false, callers should use per-item embedding until compatibility is
+   * better understood.
+   */
+  batchEmbeddingTrusted: boolean;
+  notes?: string[];
+}
+const DEFAULT_PROFILE: EmbeddingCompatibilityProfile = {
+  id: "default",
+  queryFormat: "contextual-task",
+  documentFormat: "title-prefixed",
+  batchEmbeddingTrusted: true,
+};
+const QWEN_PROFILE: EmbeddingCompatibilityProfile = {
+  id: "qwen-embedding",
+  queryFormat: "qwen-instruct",
+  documentFormat: "raw-text",
+  batchEmbeddingTrusted: true,
+  notes: [
+    "Uses Qwen-style instruct query formatting.",
+    "Documents are embedded as raw text (optionally prefixed with title).",
+  ],
+};
+const JINA_PROFILE: EmbeddingCompatibilityProfile = {
+  id: "jina-embedding",
+  queryFormat: "contextual-task",
+  documentFormat: "title-prefixed",
+  batchEmbeddingTrusted: false,
+  notes: [
+    "Current native runtime path has batch-embedding issues on real fixtures.",
+    "Prefer per-item embedding fallback until compatibility improves.",
+  ],
+};
+function normalizeModelUri(modelUri?: string): string {
+  return modelUri?.toLowerCase() ?? "";
+}
+function hasAllTerms(haystack: string, terms: string[]): boolean {
+  return terms.every((term) => haystack.includes(term));
+}
+export function getEmbeddingCompatibilityProfile(
+  modelUri?: string
+): EmbeddingCompatibilityProfile {
+  const normalizedUri = normalizeModelUri(modelUri);
+  if (hasAllTerms(normalizedUri, ["qwen", "embed"])) {
+    return QWEN_PROFILE;
+  }
+  if (
+    normalizedUri.includes("jina-embeddings-v4-text-code") ||
+    normalizedUri.includes("jina-code-embeddings") ||
+    hasAllTerms(normalizedUri, ["jina", "embeddings-v4-text-code"]) ||
+    hasAllTerms(normalizedUri, ["jina", "code-embeddings"])
+  ) {
+    return JINA_PROFILE;
+  }
+  return DEFAULT_PROFILE;
+}

package/src/mcp/tools/vsearch.ts CHANGED Viewed

@@ -149,7 +149,7 @@ export function handleVsearch(
       try {
         // Embed query with contextual formatting
         const queryEmbedResult = await embedPort.embed(
-          formatQueryForEmbedding(args.query)
+          formatQueryForEmbedding(args.query, embedPort.modelUri)
         );
         if (!queryEmbedResult.ok) {
           throw new Error(queryEmbedResult.error.message);

package/src/pipeline/contextual.ts CHANGED Viewed

@@ -10,6 +10,8 @@
  * @module src/pipeline/contextual
  */
+import { getEmbeddingCompatibilityProfile } from "../llm/embedding-compatibility";
 // Top-level regex for performance
 const HEADING_REGEX = /^##?\s+(.+)$/m;
 const SUBHEADING_REGEX = /^##\s+(.+)$/m;
@@ -19,8 +21,16 @@ const EXT_REGEX = /\.\w+$/;
  * Format document text for embedding.
  * Prepends title for contextual retrieval.
  */
-export function formatDocForEmbedding(text: string, title?: string): string {
+export function formatDocForEmbedding(
+  text: string,
+  title?: string,
+  modelUri?: string
+): string {
+  const profile = getEmbeddingCompatibilityProfile(modelUri);
   const safeTitle = title?.trim() || "none";
+  if (profile.documentFormat === "raw-text") {
+    return title?.trim() ? `${title.trim()}\n${text}` : text;
+  }
   return `title: ${safeTitle} | text: ${text}`;
 }
@@ -28,7 +38,14 @@ export function formatDocForEmbedding(text: string, title?: string): string {
  * Format query for embedding.
  * Uses task-prefixed format for asymmetric retrieval.
  */
-export function formatQueryForEmbedding(query: string): string {
+export function formatQueryForEmbedding(
+  query: string,
+  modelUri?: string
+): string {
+  const profile = getEmbeddingCompatibilityProfile(modelUri);
+  if (profile.queryFormat === "qwen-instruct") {
+    return `Instruct: Retrieve relevant documents for the given query\nQuery: ${query}`;
+  }
   return `task: search result | query: ${query}`;
 }

package/src/pipeline/hybrid.ts CHANGED Viewed

@@ -18,6 +18,7 @@ import type {
   SearchResults,
 } from "./types";
+import { embedTextsWithRecovery } from "../embed/batch";
 import { err, ok } from "../store/types";
 import { createChunkLookup } from "./chunk-lookup";
 import { formatQueryForEmbedding } from "./contextual";
@@ -213,7 +214,9 @@ async function searchVectorChunks(
   }
   // Embed query with contextual formatting
-  const embedResult = await embedPort.embed(formatQueryForEmbedding(query));
+  const embedResult = await embedPort.embed(
+    formatQueryForEmbedding(query, embedPort.modelUri)
+  );
   if (!embedResult.ok) {
     return [];
   }
@@ -443,17 +446,6 @@ export async function searchHybrid(
   const vectorStartedAt = performance.now();
   if (vectorAvailable && vectorIndex && embedPort) {
-    // Original query (increase limit when post-filters are active).
-    const vecChunks = await searchVectorChunks(vectorIndex, embedPort, query, {
-      limit: limit * 2 * retrievalMultiplier,
-    });
-    vecCount = vecChunks.length;
-    if (vecCount > 0) {
-      rankedInputs.push(toRankedInput("vector", vecChunks));
-    }
-    // Semantic variants + HyDE (optional; run in parallel and ignore failures)
     const vectorVariantQueries = [
       ...(expansion?.vectorQueries?.map((query) => ({
         source: "vector_variant" as const,
@@ -464,22 +456,72 @@ export async function searchHybrid(
         : []),
     ];
-    if (vectorVariantQueries.length > 0) {
-      const optionalVectorResults = await Promise.allSettled(
-        vectorVariantQueries.map((variant) =>
-          searchVectorChunks(vectorIndex, embedPort, variant.query, {
-            limit: limit * retrievalMultiplier,
-          })
+    if (vectorVariantQueries.length === 0) {
+      const vecChunks = await searchVectorChunks(
+        vectorIndex,
+        embedPort,
+        query,
+        {
+          limit: limit * 2 * retrievalMultiplier,
+        }
+      );
+      vecCount = vecChunks.length;
+      if (vecCount > 0) {
+        rankedInputs.push(toRankedInput("vector", vecChunks));
+      }
+    } else {
+      const batchedQueries = [
+        {
+          source: "vector" as const,
+          query,
+          limit: limit * 2 * retrievalMultiplier,
+        },
+        ...vectorVariantQueries.map((variant) => ({
+          ...variant,
+          limit: limit * retrievalMultiplier,
+        })),
+      ];
+      const embedResult = await embedTextsWithRecovery(
+        embedPort,
+        batchedQueries.map((variant) =>
+          formatQueryForEmbedding(variant.query, embedPort.modelUri)
         )
       );
-      for (const [index, settled] of optionalVectorResults.entries()) {
-        if (settled.status !== "fulfilled" || settled.value.length === 0) {
-          continue;
+      if (!embedResult.ok) {
+        counters.fallbackEvents.push("vector_embed_error");
+      } else {
+        if (embedResult.value.batchFailed) {
+          counters.fallbackEvents.push("vector_embed_batch_fallback");
         }
-        const variant = vectorVariantQueries[index];
-        if (variant) {
-          rankedInputs.push(toRankedInput(variant.source, settled.value));
+        for (const [index, variant] of batchedQueries.entries()) {
+          const embedding = embedResult.value.vectors[index];
+          if (!embedding || !variant) {
+            continue;
+          }
+          const searchResult = await vectorIndex.searchNearest(
+            new Float32Array(embedding),
+            variant.limit
+          );
+          if (!searchResult.ok || searchResult.value.length === 0) {
+            continue;
+          }
+          const chunks = searchResult.value.map((item) => ({
+            mirrorHash: item.mirrorHash,
+            seq: item.seq,
+          }));
+          if (variant.source === "vector") {
+            vecCount = chunks.length;
+          }
+          if (chunks.length === 0) {
+            continue;
+          }
+          rankedInputs.push(toRankedInput(variant.source, chunks));
         }
       }
     }

package/src/pipeline/vsearch.ts CHANGED Viewed

@@ -353,7 +353,9 @@ export async function searchVector(
   }
   // Embed query with contextual formatting
-  const embedResult = await embedPort.embed(formatQueryForEmbedding(query));
+  const embedResult = await embedPort.embed(
+    formatQueryForEmbedding(query, embedPort.modelUri)
+  );
   if (!embedResult.ok) {
     return err(
       "QUERY_FAILED",

package/src/sdk/client.ts CHANGED Viewed

@@ -401,7 +401,7 @@ class GnoClientImpl implements GnoClient {
       }
       const queryEmbedResult = await ports.embedPort.embed(
-        formatQueryForEmbedding(query)
+        formatQueryForEmbedding(query, ports.embedPort.modelUri)
       );
       if (!queryEmbedResult.ok) {
         throw sdkError("MODEL", queryEmbedResult.error.message, {

package/src/sdk/embed.ts CHANGED Viewed

@@ -19,6 +19,7 @@ import type {
 import type { GnoEmbedOptions, GnoEmbedResult } from "./types";
 import { embedBacklog } from "../embed";
+import { embedTextsWithRecovery } from "../embed/batch";
 import { resolveModelUri } from "../llm/registry";
 import { formatDocForEmbedding } from "../pipeline/contextual";
 import { err, ok } from "../store/types";
@@ -139,29 +140,45 @@ async function forceEmbedAll(
       cursor = { mirrorHash: lastItem.mirrorHash, seq: lastItem.seq };
     }
-    const embedResult = await embedPort.embedBatch(
+    const embedResult = await embedTextsWithRecovery(
+      embedPort,
       batch.map((item) =>
-        formatDocForEmbedding(item.text, item.title ?? undefined)
+        formatDocForEmbedding(
+          item.text,
+          item.title ?? undefined,
+          embedPort.modelUri
+        )
       )
     );
-    if (!embedResult.ok || embedResult.value.length !== batch.length) {
+    if (!embedResult.ok) {
       errors += batch.length;
       continue;
     }
-    const vectors: VectorRow[] = batch.map((item, idx) => ({
-      mirrorHash: item.mirrorHash,
-      seq: item.seq,
-      model: modelUri,
-      embedding: new Float32Array(embedResult.value[idx] as number[]),
-    }));
-    const storeResult = await vectorIndex.upsertVectors(vectors);
-    if (!storeResult.ok) {
-      errors += batch.length;
-      continue;
+    const vectors: VectorRow[] = [];
+    for (const [idx, item] of batch.entries()) {
+      const embedding = embedResult.value.vectors[idx];
+      if (!embedding) {
+        errors += 1;
+        continue;
+      }
+      vectors.push({
+        mirrorHash: item.mirrorHash,
+        seq: item.seq,
+        model: modelUri,
+        embedding: new Float32Array(embedding),
+      });
     }
-    embedded += batch.length;
+    if (vectors.length > 0) {
+      const storeResult = await vectorIndex.upsertVectors(vectors);
+      if (!storeResult.ok) {
+        errors += vectors.length;
+        continue;
+      }
+      embedded += vectors.length;
+    }
   }
   if (vectorIndex.vecDirty) {

package/src/serve/public/components/FrontmatterDisplay.tsx CHANGED Viewed

@@ -303,8 +303,9 @@ const ValueDisplay: FC<ValueDisplayProps> = ({ keyName, value }) => {
         <div className="flex flex-wrap gap-1.5">
           {normalizedValues.map((item, i) => (
             <Badge
-              className="rounded-full border border-primary/20 bg-primary/10 px-2 py-0.5 font-mono text-[11px] text-primary"
+              className="max-w-full overflow-hidden rounded-full border border-primary/20 bg-primary/10 px-2 py-0.5 font-mono text-[11px] text-primary whitespace-nowrap text-ellipsis"
               key={`${item}-${i}`}
+              title={String(item)}
               variant="outline"
             >
               {String(item)}
@@ -339,8 +340,9 @@ const ValueDisplay: FC<ValueDisplayProps> = ({ keyName, value }) => {
       <div className="flex flex-wrap gap-1.5">
         {normalizedValues.map((item, i) => (
           <Badge
-            className="font-mono text-xs"
+            className="max-w-full overflow-hidden font-mono text-xs whitespace-nowrap text-ellipsis"
             key={`${item}-${i}`}
+            title={String(item)}
             variant="secondary"
           >
             {String(item)}

package/src/serve/public/components/RelatedNotesSidebar.tsx CHANGED Viewed

@@ -250,14 +250,14 @@ function RelatedNoteItem({
       <Tooltip>
         <TooltipTrigger asChild>
           <div className="min-w-0 flex-1">
-            <span className="block break-words font-medium leading-tight whitespace-normal text-foreground/90 group-hover:text-foreground">
+            <span className="line-clamp-2 block break-all font-medium leading-tight text-foreground/90 group-hover:text-foreground">
               {doc.title || "Untitled"}
             </span>
             <SimilarityBar score={doc.score} />
           </div>
         </TooltipTrigger>
         <TooltipContent side="left" className="max-w-[300px]">
-          <p className="break-words">{doc.title || "Untitled"}</p>
+          <p className="break-all">{doc.title || "Untitled"}</p>
         </TooltipContent>
       </Tooltip>
     </button>

package/src/serve/public/components/editor/MarkdownPreview.tsx CHANGED Viewed

@@ -41,9 +41,46 @@ export interface MarkdownPreviewProps {
     targetAnchor?: string;
     resolvedUri?: string;
   }>;
+  /** Current document URI for resolving note-relative assets */
+  docUri?: string;
 }
 const WIKI_LINK_REGEX = /\[\[([^\]|]+(?:\|[^\]]+)?)\]\]/g;
+const EXTERNAL_OR_APP_SCHEME_REGEX = /^(?:[a-z][a-z\d+.-]*:|\/\/)/i;
+const ABSOLUTE_FILESYSTEM_PATH_REGEX =
+  /^(?:\/(?:Users|home|var|tmp|private|Volumes)\/|[A-Za-z]:[\\/])/;
+function resolveMarkdownAssetSrc(
+  src: string | undefined,
+  docUri?: string
+): string | undefined {
+  if (!src) {
+    return src;
+  }
+  const trimmed = src.trim();
+  if (trimmed.length === 0) {
+    return trimmed;
+  }
+  if (EXTERNAL_OR_APP_SCHEME_REGEX.test(trimmed)) {
+    return trimmed;
+  }
+  if (ABSOLUTE_FILESYSTEM_PATH_REGEX.test(trimmed)) {
+    return `/api/doc-asset?path=${encodeURIComponent(trimmed)}`;
+  }
+  if (trimmed.startsWith("/")) {
+    return trimmed;
+  }
+  if (!docUri) {
+    return trimmed;
+  }
+  return `/api/doc-asset?uri=${encodeURIComponent(docUri)}&path=${encodeURIComponent(trimmed)}`;
+}
 function renderMarkdownWithWikiLinks(
   content: string,
@@ -364,6 +401,7 @@ const Image: FC<ComponentProps<"img"> & { node?: unknown }> = ({
   alt,
   className,
   node: _node,
+  src,
   ...props
 }) => (
   <img
@@ -372,6 +410,7 @@ const Image: FC<ComponentProps<"img"> & { node?: unknown }> = ({
       "my-4 max-w-full rounded-lg border border-border/40",
       className
     )}
+    src={src}
     {...props}
   />
 );
@@ -381,7 +420,13 @@ const Image: FC<ComponentProps<"img"> & { node?: unknown }> = ({
  * Sanitizes HTML to prevent XSS attacks.
  */
 export const MarkdownPreview = memo(
-  ({ content, className, collection, wikiLinks }: MarkdownPreviewProps) => {
+  ({
+    content,
+    className,
+    collection,
+    wikiLinks,
+    docUri,
+  }: MarkdownPreviewProps) => {
     if (!content) {
       return (
         <div className={cn("text-muted-foreground italic", className)}>
@@ -416,7 +461,17 @@ export const MarkdownPreview = memo(
       td: TableCell,
       th: TableHeaderCell,
       hr: Hr,
-      img: Image,
+      img: ({
+        node,
+        src,
+        ...props
+      }: ComponentProps<"img"> & { node?: unknown }) => (
+        <Image
+          {...props}
+          node={node}
+          src={resolveMarkdownAssetSrc(src, docUri)}
+        />
+      ),
     };
     return (

package/src/serve/public/pages/DocView.tsx CHANGED Viewed

@@ -50,6 +50,11 @@ import {
 } from "../components/ui/dialog";
 import { Input } from "../components/ui/input";
 import { Separator } from "../components/ui/separator";
+import {
+  Tooltip,
+  TooltipContent,
+  TooltipTrigger,
+} from "../components/ui/tooltip";
 import { apiFetch } from "../hooks/use-api";
 import { useDocEvents } from "../hooks/use-doc-events";
 import {
@@ -904,7 +909,10 @@ export default function DocView({ navigate }: PageProps) {
   /** Left rail — metadata + outline */
   const renderDocumentFactsRail = () => (
-    <nav aria-label="Document facts" className="space-y-0">
+    <nav
+      aria-label="Document facts"
+      className="w-full min-w-0 max-w-full space-y-0 overflow-x-hidden"
+    >
       {/* Frontmatter + tags */}
       {(hasFrontmatter || showStandaloneTags) && (
         <>
@@ -1005,19 +1013,19 @@ export default function DocView({ navigate }: PageProps) {
             <div className="mb-2 font-mono text-[10px] text-muted-foreground/50 uppercase tracking-[0.15em]">
               Outline
             </div>
-            <div className="space-y-0.5">
+            <div className="w-full min-w-0 max-w-full space-y-0.5 overflow-x-hidden">
               {sections.map((section) => (
                 <div
-                  className={`flex items-center gap-1 rounded px-1 py-0.5 ${
+                  className={`group relative w-full min-w-0 max-w-full overflow-hidden rounded px-1 py-0.5 ${
                     activeSectionAnchor === section.anchor
                       ? "bg-primary/10 text-primary"
                       : "text-muted-foreground"
                   }`}
                   key={section.anchor}
-                  style={{ paddingLeft: `${section.level * 10}px` }}
+                  style={{ paddingLeft: `${section.level * 7}px` }}
                 >
                   <button
-                    className="flex min-w-0 flex-1 cursor-pointer items-center gap-2 rounded px-1 py-0.5 text-left text-xs transition-colors hover:bg-muted/20 hover:text-foreground"
+                    className="flex w-full min-w-0 max-w-full cursor-pointer items-start gap-2 overflow-hidden rounded px-1 py-0.5 pr-7 text-left text-xs transition-colors hover:bg-muted/20 hover:text-foreground"
                     onClick={() => {
                       setShowRawView(false);
                       requestAnimationFrame(() => {
@@ -1040,10 +1048,21 @@ export default function DocView({ navigate }: PageProps) {
                     type="button"
                   >
                     <ChevronRightIcon className="size-3 shrink-0" />
-                    <span className="truncate">{section.title}</span>
+                    <Tooltip>
+                      <TooltipTrigger asChild>
+                        <div className="min-w-0 max-w-full flex-1 overflow-hidden">
+                          <span className="line-clamp-2 block break-words leading-snug">
+                            {section.title}
+                          </span>
+                        </div>
+                      </TooltipTrigger>
+                      <TooltipContent side="right" className="max-w-[320px]">
+                        <p className="break-words">{section.title}</p>
+                      </TooltipContent>
+                    </Tooltip>
                   </button>
                   <button
-                    className="cursor-pointer rounded p-1 transition-colors hover:bg-muted/20 hover:text-foreground"
+                    className="absolute top-1 right-1 cursor-pointer rounded p-1 opacity-0 transition-all hover:bg-muted/20 hover:text-foreground focus-visible:opacity-100 group-hover:opacity-100"
                     onClick={() => {
                       void navigator.clipboard.writeText(
                         `${window.location.origin}${buildDocDeepLink({
@@ -1394,9 +1413,17 @@ export default function DocView({ navigate }: PageProps) {
       <div className="mx-auto flex max-w-[1800px] gap-5 px-6 xl:px-8">
         {/* Left rail — metadata + outline */}
         {doc && (
-          <aside className="hidden w-[200px] shrink-0 border-border/15 border-r pr-2 py-6 lg:block">
-            <div className="sticky top-24 max-h-[calc(100vh-7rem)] overflow-y-auto pr-1">
-              {renderDocumentFactsRail()}
+          <aside
+            className="hidden min-w-0 flex-none border-border/15 border-r pr-2 pt-2 pb-6 lg:block"
+            style={{ width: 252, minWidth: 252, maxWidth: 252, flexBasis: 252 }}
+          >
+            <div
+              className="sticky min-w-0 max-w-full overflow-x-hidden overflow-y-auto pr-1"
+              style={{ top: 72, maxHeight: "calc(100vh - 5.5rem)" }}
+            >
+              <div className="min-w-0 max-w-full overflow-hidden">
+                {renderDocumentFactsRail()}
+              </div>
             </div>
           </aside>
         )}
@@ -1520,6 +1547,7 @@ export default function DocView({ navigate }: PageProps) {
                     <MarkdownPreview
                       collection={doc.collection}
                       content={parsedContent.body}
+                      docUri={doc.uri}
                       wikiLinks={resolvedWikiLinks}
                     />
                   </div>
@@ -1562,8 +1590,14 @@ export default function DocView({ navigate }: PageProps) {
         {/* Right rail — properties/path + relationships */}
         {doc && (
-          <aside className="hidden w-[250px] min-w-0 shrink-0 overflow-hidden border-border/15 border-l pl-2 pt-2 pb-6 lg:block">
-            <div className="sticky top-18 min-w-0 max-h-[calc(100vh-5.5rem)] space-y-1 overflow-y-auto overflow-x-hidden pr-1">
+          <aside
+            className="hidden min-w-0 flex-none overflow-hidden border-border/15 border-l pl-2 pt-2 pb-6 lg:block"
+            style={{ width: 250, minWidth: 250, maxWidth: 250, flexBasis: 250 }}
+          >
+            <div
+              className="sticky min-w-0 space-y-1 overflow-y-auto overflow-x-hidden pr-1"
+              style={{ top: 72, maxHeight: "calc(100vh - 5.5rem)" }}
+            >
               {renderPropertiesPathRail()}
               <BacklinksPanel
                 docId={doc.docid}

package/src/serve/public/pages/DocumentEditor.tsx CHANGED Viewed

@@ -1138,7 +1138,7 @@ export default function DocumentEditor({ navigate }: PageProps) {
             ref={previewRef}
           >
             <div className="mx-auto max-w-3xl">
-              <MarkdownPreview content={parsedContent.body} />
+              <MarkdownPreview content={parsedContent.body} docUri={doc?.uri} />
             </div>
           </div>
         )}

package/src/serve/routes/api.ts CHANGED Viewed

@@ -430,6 +430,24 @@ async function resolveAbsoluteDocPath(
   };
 }
+function isAbsoluteFilesystemPath(pathValue: string): boolean {
+  return /^(?:\/(?:Users|home|var|tmp|private|Volumes)\/|[A-Za-z]:[\\/])/.test(
+    pathValue
+  );
+}
+async function isPathWithinRoot(
+  root: string,
+  candidate: string
+): Promise<boolean> {
+  const nodePath = await import("node:path"); // no bun equivalent
+  const relative = nodePath.relative(root, candidate);
+  return (
+    relative === "" ||
+    (!relative.startsWith("..") && !nodePath.isAbsolute(relative))
+  );
+}
 async function listCollectionRelPaths(
   store: Pick<SqliteAdapter, "listDocuments">,
   collection: string
@@ -1445,6 +1463,98 @@ export async function handleDoc(
   });
 }
+/**
+ * GET /api/doc-asset
+ * Query params:
+ *   - path (required): relative to current doc, or absolute filesystem path
+ *   - uri (required for relative paths): current document uri
+ */
+export async function handleDocAsset(
+  store: SqliteAdapter,
+  config: Config,
+  url: URL
+): Promise<Response> {
+  const assetPath = url.searchParams.get("path")?.trim();
+  if (!assetPath) {
+    return errorResponse("VALIDATION", "Missing path parameter");
+  }
+  let resolvedPath: string | null = null;
+  if (isAbsoluteFilesystemPath(assetPath)) {
+    for (const collection of config.collections) {
+      if (await isPathWithinRoot(collection.path, assetPath)) {
+        resolvedPath = assetPath;
+        break;
+      }
+    }
+    if (!resolvedPath) {
+      return errorResponse(
+        "FORBIDDEN",
+        "Absolute asset path is outside configured collections",
+        403
+      );
+    }
+  } else {
+    const uri = url.searchParams.get("uri");
+    if (!uri) {
+      return errorResponse(
+        "VALIDATION",
+        "uri is required for relative asset paths"
+      );
+    }
+    const docResult = await store.getDocumentByUri(uri);
+    if (!docResult.ok) {
+      return errorResponse("RUNTIME", docResult.error.message, 500);
+    }
+    if (!docResult.value) {
+      return errorResponse("NOT_FOUND", "Document not found", 404);
+    }
+    const resolvedDoc = await resolveAbsoluteDocPath(
+      config.collections,
+      docResult.value
+    );
+    if (!resolvedDoc) {
+      return errorResponse(
+        "NOT_FOUND",
+        "Document path could not be resolved",
+        404
+      );
+    }
+    const nodePath = await import("node:path"); // no bun equivalent
+    const candidate = nodePath.resolve(
+      nodePath.dirname(resolvedDoc.fullPath),
+      assetPath
+    );
+    if (!(await isPathWithinRoot(resolvedDoc.collection.path, candidate))) {
+      return errorResponse(
+        "FORBIDDEN",
+        "Asset path escapes collection root",
+        403
+      );
+    }
+    resolvedPath = candidate;
+  }
+  const file = Bun.file(resolvedPath);
+  if (!(await file.exists())) {
+    return errorResponse("NOT_FOUND", "Asset not found", 404);
+  }
+  return new Response(file, {
+    headers: {
+      "Cache-Control": "no-store",
+      "Content-Type": file.type || "application/octet-stream",
+    },
+  });
+}
 /**
  * GET /api/tags
  * Query params: collection, prefix
@@ -3796,6 +3906,10 @@ export async function routeApi(
     return handleDoc(store, config, url);
   }
+  if (path === "/api/doc-asset") {
+    return handleDocAsset(store, config, url);
+  }
   if (path === "/api/search" && req.method === "POST") {
     return handleSearch(store, req);
   }

package/src/serve/server.ts CHANGED Viewed

@@ -27,6 +27,7 @@ import {
   handleDeactivateDoc,
   handleDeleteCollection,
   handleDoc,
+  handleDocAsset,
   handleDocSections,
   handleDocsAutocomplete,
   handleDocs,
@@ -423,6 +424,15 @@ export async function startServer(
             );
           },
         },
+        "/api/doc-asset": {
+          GET: async (req: Request) => {
+            const url = new URL(req.url);
+            return withSecurityHeaders(
+              await handleDocAsset(store, ctxHolder.config, url),
+              isDev
+            );
+          },
+        },
         "/api/events": {
           GET: () =>
             withSecurityHeaders(