npm - @gmickel/gno - Versions diffs - 1.2.0 → 1.3.0 - Mend

@gmickel/gno 1.2.0 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (43) hide show

package/README.md +1 -1
package/assets/skill/SKILL.md +3 -0
package/assets/skill/cli-reference.md +5 -0
package/assets/skill/examples.md +2 -0
package/package.json +1 -1
package/src/app/constants.ts +64 -8
package/src/cli/commands/embed.ts +6 -2
package/src/cli/commands/get.ts +15 -5
package/src/cli/commands/index-cmd.ts +4 -0
package/src/cli/commands/multi-get.ts +62 -1
package/src/cli/commands/query.ts +8 -2
package/src/cli/commands/search.ts +8 -2
package/src/cli/commands/shared.ts +18 -1
package/src/cli/commands/status.ts +4 -2
package/src/cli/commands/update.ts +6 -1
package/src/cli/commands/vsearch.ts +8 -2
package/src/cli/format/search-results.ts +1 -1
package/src/cli/program.ts +22 -1
package/src/ingestion/chunker.ts +6 -0
package/src/llm/cache.ts +162 -28
package/src/llm/errors.ts +32 -0
package/src/llm/lockfile.ts +49 -4
package/src/llm/nodeLlamaCpp/embedding.ts +69 -3
package/src/llm/nodeLlamaCpp/lifecycle.ts +60 -4
package/src/mcp/resources/index.ts +13 -4
package/src/mcp/server.ts +2 -0
package/src/mcp/tools/get.ts +7 -2
package/src/mcp/tools/multi-get.ts +2 -2
package/src/mcp/tools/query.ts +2 -1
package/src/mcp/tools/search.ts +2 -1
package/src/mcp/tools/vsearch.ts +2 -1
package/src/pipeline/explain.ts +12 -2
package/src/pipeline/hybrid.ts +9 -1
package/src/pipeline/search.ts +16 -7
package/src/pipeline/types.ts +2 -0
package/src/pipeline/vsearch.ts +29 -15
package/src/publish/export-service.ts +27 -2
package/src/sdk/client.ts +83 -28
package/src/store/content-batch.ts +38 -0
package/src/store/sqlite/adapter.ts +38 -2
package/src/store/types.ts +8 -0
package/src/store/vector/sqlite-vec.ts +10 -4
package/src/store/vector/types.ts +2 -0

package/src/cli/program.ts CHANGED Viewed

@@ -311,6 +311,7 @@ function wireSearchCommands(program: Command): void {
     .action(async (queryText: string, cmdOpts: Record<string, unknown>) => {
       const format = getFormat(cmdOpts);
       assertFormatSupported(CMD.search, format);
+      const globals = getGlobals();
       // Validate empty query
       if (!queryText.trim()) {
@@ -348,6 +349,8 @@ function wireSearchCommands(program: Command): void {
       const { search, formatSearch } = await import("./commands/search");
       const result = await search(queryText, {
+        configPath: globals.config,
+        indexName: globals.index,
         limit,
         minScore,
         collection: cmdOpts.collection as string | undefined,
@@ -425,6 +428,7 @@ function wireSearchCommands(program: Command): void {
     .action(async (queryText: string, cmdOpts: Record<string, unknown>) => {
       const format = getFormat(cmdOpts);
       assertFormatSupported(CMD.vsearch, format);
+      const globals = getGlobals();
       // Validate empty query
       if (!queryText.trim()) {
@@ -462,6 +466,8 @@ function wireSearchCommands(program: Command): void {
       const { vsearch, formatVsearch } = await import("./commands/vsearch");
       const result = await vsearch(queryText, {
+        configPath: globals.config,
+        indexName: globals.index,
         limit,
         minScore,
         collection: cmdOpts.collection as string | undefined,
@@ -631,6 +637,8 @@ function wireSearchCommands(program: Command): void {
       const { query, formatQuery } = await import("./commands/query");
       const result = await query(queryText, {
+        configPath: globals.config,
+        indexName: globals.index,
         limit,
         minScore,
         collection: cmdOpts.collection as string | undefined,
@@ -885,6 +893,8 @@ function wireOnboardingCommands(program: Command): void {
         const globals = getGlobals();
         const { index, formatIndex } = await import("./commands/index-cmd");
         const opts = {
+          configPath: globals.config,
+          indexName: globals.index,
           collection,
           noEmbed: cmdOpts.embed === false,
           gitPull: Boolean(cmdOpts.gitPull),
@@ -911,7 +921,12 @@ function wireOnboardingCommands(program: Command): void {
       assertFormatSupported(CMD.status, format);
       const { status, formatStatus } = await import("./commands/status");
-      const result = await status({ json: format === "json" });
+      const globals = getGlobals();
+      const result = await status({
+        configPath: globals.config,
+        indexName: globals.index,
+        json: format === "json",
+      });
       if (!result.success) {
         throw new CliError("RUNTIME", result.error ?? "Status failed");
@@ -969,6 +984,7 @@ function wireRetrievalCommands(program: Command): void {
       const { get, formatGet } = await import("./commands/get");
       const result = await get(ref, {
         configPath: globals.config,
+        indexName: globals.index,
         from: cmdOpts.from as number | undefined,
         limit: cmdOpts.limit as number | undefined,
         lineNumbers: Boolean(cmdOpts.lineNumbers),
@@ -1014,6 +1030,7 @@ function wireRetrievalCommands(program: Command): void {
       const { multiGet, formatMultiGet } = await import("./commands/multi-get");
       const result = await multiGet(refs, {
         configPath: globals.config,
+        indexName: globals.index,
         maxBytes: cmdOpts.maxBytes as number | undefined,
         lineNumbers: Boolean(cmdOpts.lineNumbers),
         json: format === "json",
@@ -1515,6 +1532,8 @@ function wireManagementCommands(program: Command): void {
       const globals = getGlobals();
       const { update, formatUpdate } = await import("./commands/update");
       const opts = {
+        configPath: globals.config,
+        indexName: globals.index,
         gitPull: Boolean(cmdOpts.gitPull),
         verbose: globals.verbose,
       };
@@ -1548,6 +1567,8 @@ function wireManagementCommands(program: Command): void {
         const collection =
           collectionArg ?? (cmdOpts.collection as string | undefined);
         const opts = {
+          configPath: globals.config,
+          indexName: globals.index,
           collection,
           model: cmdOpts.model as string | undefined,
           batchSize: parsePositiveInt("batch-size", cmdOpts.batchSize),

package/src/ingestion/chunker.ts CHANGED Viewed

@@ -372,6 +372,12 @@ export class MarkdownChunker implements ChunkerPort {
           // Find a good prose break point
           findBreakPoint(markdown, targetEnd, windowSize);
       }
+      if (endPos <= pos) {
+        endPos = Math.min(markdown.length, pos + maxChars);
+      }
+      if (endPos - pos > maxChars + windowSize) {
+        endPos = Math.min(markdown.length, pos + maxChars);
+      }
       // Extract chunk text - preserve exactly (no trim!)
       // This maintains accurate pos/line mappings and Markdown semantics

package/src/llm/cache.ts CHANGED Viewed

@@ -13,6 +13,7 @@ import { isAbsolute, join } from "node:path";
 // node:url: fileURLToPath for proper file:// URL handling
 import { fileURLToPath } from "node:url";
+import type { LlmError } from "./errors";
 import type { DownloadPolicy } from "./policy";
 import type {
   DownloadProgress,
@@ -26,8 +27,10 @@ import { getModelsCachePath } from "../app/constants";
 import {
   autoDownloadDisabledError,
   downloadFailedError,
+  invalidModelFileError,
   invalidUriError,
   lockFailedError,
+  modelDownloadInterceptedError,
   modelNotCachedError,
   modelNotFoundError,
 } from "./errors";
@@ -40,6 +43,88 @@ import { getLockPath, getManifestLockPath, withLock } from "./lockfile";
 // Regex patterns for URI parsing (top-level for performance)
 const HF_QUANT_PATTERN = /^([^/]+)\/([^/:]+):(\w+)$/;
 const HF_PATH_PATTERN = /^([^/]+)\/([^/]+)\/(.+\.gguf)$/;
+const GGUF_MAGIC = new Uint8Array([0x47, 0x47, 0x55, 0x46]);
+type ModelFileOwner = "cache" | "user";
+type ValidatedCachedPath =
+  | { ok: true; path: string }
+  | { ok: false; kind: "missing" }
+  | { ok: false; kind: "invalid"; error: LlmError };
+function looksLikeHtml(bytes: Uint8Array): boolean {
+  const text = new TextDecoder("utf-8", { fatal: false })
+    .decode(bytes)
+    .toLowerCase();
+  return (
+    text.includes("<!doctype") ||
+    text.includes("<html") ||
+    text.includes("<head") ||
+    text.includes("<body") ||
+    (text.includes("huggingface") && text.includes("<"))
+  );
+}
+function hasGgufMagic(bytes: Uint8Array): boolean {
+  return GGUF_MAGIC.every((value, index) => bytes[index] === value);
+}
+export async function validateGgufFile(
+  path: string,
+  uri: string,
+  owner: ModelFileOwner
+): Promise<LlmResult<void>> {
+  const file = Bun.file(path);
+  const exists = await file.exists();
+  if (!exists) {
+    return {
+      ok: false,
+      error: modelNotFoundError(uri, `File not found: ${path}`),
+    };
+  }
+  const bytes = new Uint8Array(await file.slice(0, 512).arrayBuffer());
+  if (hasGgufMagic(bytes)) {
+    return { ok: true, value: undefined };
+  }
+  if (looksLikeHtml(bytes)) {
+    return {
+      ok: false,
+      error: modelDownloadInterceptedError(uri, path, owner),
+    };
+  }
+  return {
+    ok: false,
+    error: invalidModelFileError(
+      uri,
+      path,
+      bytes.length === 0 ? "empty file" : "missing GGUF magic header"
+    ),
+  };
+}
+async function computeSha256(path: string): Promise<string> {
+  const hasher = new Bun.CryptoHasher("sha256");
+  const reader = Bun.file(path).stream().getReader();
+  try {
+    while (true) {
+      const { done, value } = await reader.read();
+      if (done) {
+        break;
+      }
+      if (value) {
+        hasher.update(value);
+      }
+    }
+  } finally {
+    reader.releaseLock();
+  }
+  return hasher.digest("hex");
+}
 export type ParsedModelUri =
   | {
@@ -212,13 +297,20 @@ export class ModelCache {
           ),
         };
       }
+      const validation = await validateGgufFile(parsed.value.file, uri, "user");
+      if (!validation.ok) {
+        return validation;
+      }
       return { ok: true, value: parsed.value.file };
     }
     // HF models: check cache
-    const cached = await this.getCachedPath(uri);
-    if (cached) {
-      return { ok: true, value: cached };
+    const cached = await this.getValidatedCachedPath(uri);
+    if (cached.ok) {
+      return { ok: true, value: cached.path };
+    }
+    if (cached.kind === "invalid") {
+      return { ok: false, error: cached.error };
     }
     return { ok: false, error: modelNotCachedError(uri, type) };
@@ -251,6 +343,10 @@ export class ModelCache {
           ),
         };
       }
+      const validation = await validateGgufFile(parsed.value.file, uri, "user");
+      if (!validation.ok) {
+        return validation;
+      }
       return { ok: true, value: parsed.value.file };
     }
@@ -305,6 +401,14 @@ export class ModelCache {
           : undefined,
       });
+      const validation = await validateGgufFile(resolvedPath, uri, "cache");
+      if (!validation.ok) {
+        await rm(resolvedPath, { force: true }).catch(() => {
+          // Ignore deletion errors
+        });
+        return validation;
+      }
       // Update manifest
       await this.addToManifest(uri, type, resolvedPath);
@@ -330,9 +434,12 @@ export class ModelCache {
     onProgress?: ProgressCallback
   ): Promise<LlmResult<string>> {
     // Fast path: check if already cached
-    const cached = await this.getCachedPath(uri);
-    if (cached) {
-      return { ok: true, value: cached };
+    const cached = await this.getValidatedCachedPath(uri);
+    if (cached.ok) {
+      return { ok: true, value: cached.path };
+    }
+    if (cached.kind === "invalid") {
+      return { ok: false, error: cached.error };
     }
     // Parse and validate URI
@@ -353,6 +460,10 @@ export class ModelCache {
           ),
         };
       }
+      const validation = await validateGgufFile(parsed.value.file, uri, "user");
+      if (!validation.ok) {
+        return validation;
+      }
       return { ok: true, value: parsed.value.file };
     }
@@ -376,9 +487,12 @@ export class ModelCache {
     const result = await withLock(lockPath, async () => {
       // Double-check: another process may have downloaded while we waited
-      const cachedNow = await this.getCachedPath(uri);
-      if (cachedNow) {
-        return { ok: true as const, value: cachedNow };
+      const cachedNow = await this.getValidatedCachedPath(uri);
+      if (cachedNow.ok) {
+        return { ok: true as const, value: cachedNow.path };
+      }
+      if (cachedNow.kind === "invalid") {
+        return { ok: false as const, error: cachedNow.error };
       }
       // Download with progress
@@ -412,26 +526,12 @@ export class ModelCache {
     // Handle file: URIs directly (check filesystem, not manifest)
     const parsed = parseModelUri(uri);
     if (parsed.ok && parsed.value.scheme === "file") {
-      const exists = await this.fileExists(parsed.value.file);
-      return exists ? parsed.value.file : null;
-    }
-    // HF URIs: check manifest
-    const manifest = await this.loadManifest();
-    const entry = manifest.models.find((m) => m.uri === uri);
-    if (!entry) {
-      return null;
+      const validation = await validateGgufFile(parsed.value.file, uri, "user");
+      return validation.ok ? parsed.value.file : null;
     }
-    // Verify file still exists
-    const exists = await this.fileExists(entry.path);
-    if (!exists) {
-      // Remove stale entry
-      await this.removeFromManifest(uri);
-      return null;
-    }
-    return entry.path;
+    const cached = await this.getValidatedCachedPath(uri);
+    return cached.ok ? cached.path : null;
   }
   /**
@@ -493,6 +593,33 @@ export class ModelCache {
     }
   }
+  private async getValidatedCachedPath(
+    uri: string
+  ): Promise<ValidatedCachedPath> {
+    const manifest = await this.loadManifest();
+    const entry = manifest.models.find((m) => m.uri === uri);
+    if (!entry) {
+      return { ok: false, kind: "missing" };
+    }
+    const exists = await this.fileExists(entry.path);
+    if (!exists) {
+      await this.removeFromManifest(uri);
+      return { ok: false, kind: "missing" };
+    }
+    const validation = await validateGgufFile(entry.path, uri, "cache");
+    if (validation.ok) {
+      return { ok: true, path: entry.path };
+    }
+    await rm(entry.path, { force: true }).catch(() => {
+      // Ignore deletion errors
+    });
+    await this.removeFromManifest(uri);
+    return { ok: false, kind: "invalid", error: validation.error };
+  }
   private async loadManifest(): Promise<Manifest> {
     if (this.manifest) {
       return this.manifest;
@@ -588,6 +715,7 @@ export class ModelCache {
   ): Promise<void> {
     // Get file size outside lock (IO-bound, doesn't need protection)
     let size = 0;
+    let checksum = "";
     try {
       const stats = await stat(modelPath);
       size = stats.size;
@@ -595,6 +723,12 @@ export class ModelCache {
       // Ignore
     }
+    try {
+      checksum = await computeSha256(modelPath);
+    } catch {
+      // Best-effort metadata only
+    }
     await this.updateManifest((manifest) => {
       // Remove existing entry if present
       manifest.models = manifest.models.filter((m) => m.uri !== uri);
@@ -605,7 +739,7 @@ export class ModelCache {
         type,
         path: modelPath,
         size,
-        checksum: "", // TODO: compute SHA-256 for large files
+        checksum,
         cachedAt: new Date().toISOString(),
       });
     });

package/src/llm/errors.ts CHANGED Viewed

@@ -15,6 +15,8 @@ export type LlmErrorCode =
   | "MODEL_DOWNLOAD_FAILED"
   | "MODEL_LOAD_FAILED"
   | "MODEL_CORRUPTED"
+  | "INVALID_MODEL_FILE"
+  | "MODEL_DOWNLOAD_INTERCEPTED"
   | "INFERENCE_FAILED"
   | "TIMEOUT"
   | "OUT_OF_MEMORY"
@@ -160,6 +162,36 @@ export function corruptedError(uri: string, cause?: unknown): LlmError {
   });
 }
+export function invalidModelFileError(
+  uri: string,
+  path: string,
+  details?: string
+): LlmError {
+  return llmError("INVALID_MODEL_FILE", {
+    message: `Model file is not a GGUF file: ${path}${details ? ` (${details})` : ""}`,
+    modelUri: uri,
+    retryable: false,
+    suggestion: "Remove the file or run: gno models pull --force",
+  });
+}
+export function modelDownloadInterceptedError(
+  uri: string,
+  path: string,
+  owner: "cache" | "user"
+): LlmError {
+  return llmError("MODEL_DOWNLOAD_INTERCEPTED", {
+    message:
+      `Model file looks like HTML instead of GGUF: ${path}. ` +
+      `A proxy, firewall, or captive portal likely intercepted the download.` +
+      (owner === "cache" ? " The cached file was removed." : ""),
+    modelUri: uri,
+    retryable: false,
+    suggestion:
+      "Check network access to Hugging Face, then run: gno models pull --force",
+  });
+}
 export function inferenceFailedError(uri: string, cause?: unknown): LlmError {
   return llmError("INFERENCE_FAILED", {
     message: `Inference failed for model: ${uri}`,

package/src/llm/lockfile.ts CHANGED Viewed

@@ -5,7 +5,7 @@
  * @module src/llm/lockfile
  */
-import { open, rename, rm, stat } from "node:fs/promises";
+import { open, readFile, rename, rm, stat } from "node:fs/promises";
 // node:os: hostname and user for lock ownership
 import { hostname, userInfo } from "node:os";
 // node:path: join for manifest lock path
@@ -68,6 +68,48 @@ function sleep(ms: number): Promise<void> {
   return new Promise((resolve) => setTimeout(resolve, ms));
 }
+async function readLockMeta(lockPath: string): Promise<LockMeta | null> {
+  try {
+    const parsed = JSON.parse(
+      await readFile(lockPath, "utf-8")
+    ) as Partial<LockMeta>;
+    if (
+      typeof parsed.pid !== "number" ||
+      typeof parsed.hostname !== "string" ||
+      typeof parsed.user !== "string" ||
+      typeof parsed.createdAt !== "string"
+    ) {
+      return null;
+    }
+    return {
+      pid: parsed.pid,
+      hostname: parsed.hostname,
+      user: parsed.user,
+      createdAt: parsed.createdAt,
+    };
+  } catch {
+    return null;
+  }
+}
+function isProcessAlive(pid: number): boolean {
+  if (!Number.isInteger(pid) || pid <= 0) {
+    return false;
+  }
+  try {
+    process.kill(pid, 0);
+    return true;
+  } catch (error) {
+    return (
+      error !== null &&
+      typeof error === "object" &&
+      "code" in error &&
+      error.code === "EPERM"
+    );
+  }
+}
 /**
  * Check if a lockfile is stale (older than TTL or owner process dead).
  */
@@ -81,9 +123,12 @@ async function isLockStale(lockPath: string, ttlMs: number): Promise<boolean> {
       return true;
     }
-    // TODO: Could also check if PID is alive on same hostname
-    // For now, just use TTL-based staleness
-    return false;
+    const meta = await readLockMeta(lockPath);
+    if (!meta || meta.hostname !== hostname()) {
+      return false;
+    }
+    return !isProcessAlive(meta.pid);
   } catch {
     // Lock doesn't exist or can't be read
     return true;

package/src/llm/nodeLlamaCpp/embedding.ts CHANGED Viewed

@@ -31,6 +31,12 @@ interface EmbeddingWorker {
   pending: number;
 }
+interface TokenizingModel {
+  trainContextSize?: number;
+  tokenize(text: string): readonly number[];
+  detokenize(tokens: readonly number[]): string;
+}
 // ─────────────────────────────────────────────────────────────────────────────
 // Constants
 // ─────────────────────────────────────────────────────────────────────────────
@@ -51,6 +57,9 @@ export class NodeLlamaCppEmbedding implements EmbeddingPort {
     null;
   private lifecycleVersion = 0;
   private dims: number | null = null;
+  private llamaModel: TokenizingModel | null = null;
+  private warnedSingleTruncation = false;
+  private warnedBatchTruncation = false;
   private readonly manager: ModelManager;
   readonly modelUri: string;
   private readonly modelPath: string;
@@ -76,8 +85,12 @@ export class NodeLlamaCppEmbedding implements EmbeddingPort {
     }
     try {
+      const prepared = this.truncateForEmbedding(text, "single");
+      if (!prepared.ok) {
+        return { ok: false, error: prepared.error };
+      }
       const embedding = await this.runOnWorker((worker) =>
-        worker.context.getEmbeddingFor(text)
+        worker.context.getEmbeddingFor(prepared.value.text)
       );
       const vector = Array.from(embedding.vector) as number[];
@@ -103,6 +116,15 @@ export class NodeLlamaCppEmbedding implements EmbeddingPort {
     }
     try {
+      const preparedTexts: string[] = [];
+      for (const text of texts) {
+        const prepared = this.truncateForEmbedding(text, "batch");
+        if (!prepared.ok) {
+          return { ok: false, error: prepared.error };
+        }
+        preparedTexts.push(prepared.value.text);
+      }
       const allResults = Array.from(
         { length: texts.length },
         () => [] as number[]
@@ -114,14 +136,14 @@ export class NodeLlamaCppEmbedding implements EmbeddingPort {
           while (true) {
             const index = nextIndex;
             nextIndex += 1;
-            if (index >= texts.length) {
+            if (index >= preparedTexts.length) {
               return;
             }
             const embedding = await this.runOnSpecificWorker(
               worker,
               (current) =>
-                current.context.getEmbeddingFor(texts[index] as string)
+                current.context.getEmbeddingFor(preparedTexts[index] as string)
             );
             allResults[index] = Array.from(embedding.vector) as number[];
           }
@@ -263,6 +285,7 @@ export class NodeLlamaCppEmbedding implements EmbeddingPort {
     try {
       const llamaModel = model.value.model as LlamaModel;
+      this.llamaModel = llamaModel as TokenizingModel;
       const llama = await this.manager.getLlama();
       const lifecycleVersion = this.lifecycleVersion;
       const targetPoolSize = this.resolveTargetPoolSize(llama);
@@ -321,4 +344,47 @@ export class NodeLlamaCppEmbedding implements EmbeddingPort {
       return { ok: false, error: inferenceFailedError(this.modelUri, e) };
     }
   }
+  private truncateForEmbedding(
+    text: string,
+    mode: "single" | "batch"
+  ): LlmResult<{ text: string }> {
+    const model = this.llamaModel;
+    const rawLimit =
+      typeof model?.trainContextSize === "number" &&
+      Number.isFinite(model.trainContextSize) &&
+      model.trainContextSize > 0
+        ? Math.floor(model.trainContextSize)
+        : undefined;
+    if (!model || rawLimit === undefined) {
+      return { ok: true, value: { text } };
+    }
+    const limit = Math.max(1, rawLimit - 4);
+    try {
+      const tokens = model.tokenize(text);
+      if (tokens.length <= limit) {
+        return { ok: true, value: { text } };
+      }
+      const truncatedText = model.detokenize(tokens.slice(0, limit));
+      const shouldWarn =
+        mode === "single"
+          ? !this.warnedSingleTruncation
+          : !this.warnedBatchTruncation;
+      if (shouldWarn) {
+        if (mode === "single") {
+          this.warnedSingleTruncation = true;
+        } else {
+          this.warnedBatchTruncation = true;
+        }
+        console.warn(
+          `[llama] Truncated embedding input from ${tokens.length} to ${limit} tokens`
+        );
+      }
+      return { ok: true, value: { text: truncatedText } };
+    } catch (error) {
+      return { ok: false, error: inferenceFailedError(this.modelUri, error) };
+    }
+  }
 }