@gmickel/gno 1.2.1 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. package/README.md +1 -1
  2. package/assets/skill/SKILL.md +3 -0
  3. package/assets/skill/cli-reference.md +5 -0
  4. package/assets/skill/examples.md +2 -0
  5. package/package.json +1 -1
  6. package/src/app/constants.ts +64 -8
  7. package/src/cli/commands/embed.ts +6 -2
  8. package/src/cli/commands/get.ts +15 -5
  9. package/src/cli/commands/index-cmd.ts +4 -0
  10. package/src/cli/commands/multi-get.ts +62 -1
  11. package/src/cli/commands/query.ts +8 -2
  12. package/src/cli/commands/search.ts +8 -2
  13. package/src/cli/commands/shared.ts +18 -1
  14. package/src/cli/commands/status.ts +4 -2
  15. package/src/cli/commands/update.ts +6 -1
  16. package/src/cli/commands/vsearch.ts +8 -2
  17. package/src/cli/format/search-results.ts +1 -1
  18. package/src/cli/program.ts +22 -1
  19. package/src/ingestion/chunker.ts +6 -0
  20. package/src/llm/cache.ts +133 -27
  21. package/src/llm/errors.ts +32 -0
  22. package/src/llm/nodeLlamaCpp/embedding.ts +69 -3
  23. package/src/llm/nodeLlamaCpp/lifecycle.ts +60 -4
  24. package/src/mcp/resources/index.ts +13 -4
  25. package/src/mcp/server.ts +2 -0
  26. package/src/mcp/tools/get.ts +7 -2
  27. package/src/mcp/tools/multi-get.ts +2 -2
  28. package/src/mcp/tools/query.ts +2 -1
  29. package/src/mcp/tools/search.ts +2 -1
  30. package/src/mcp/tools/vsearch.ts +2 -1
  31. package/src/pipeline/explain.ts +12 -2
  32. package/src/pipeline/hybrid.ts +9 -1
  33. package/src/pipeline/search.ts +1 -0
  34. package/src/pipeline/types.ts +2 -0
  35. package/src/pipeline/vsearch.ts +14 -8
  36. package/src/sdk/client.ts +83 -28
  37. package/src/store/sqlite/adapter.ts +3 -2
  38. package/src/store/vector/sqlite-vec.ts +10 -4
  39. package/src/store/vector/types.ts +2 -0
@@ -311,6 +311,7 @@ function wireSearchCommands(program: Command): void {
311
311
  .action(async (queryText: string, cmdOpts: Record<string, unknown>) => {
312
312
  const format = getFormat(cmdOpts);
313
313
  assertFormatSupported(CMD.search, format);
314
+ const globals = getGlobals();
314
315
 
315
316
  // Validate empty query
316
317
  if (!queryText.trim()) {
@@ -348,6 +349,8 @@ function wireSearchCommands(program: Command): void {
348
349
 
349
350
  const { search, formatSearch } = await import("./commands/search");
350
351
  const result = await search(queryText, {
352
+ configPath: globals.config,
353
+ indexName: globals.index,
351
354
  limit,
352
355
  minScore,
353
356
  collection: cmdOpts.collection as string | undefined,
@@ -425,6 +428,7 @@ function wireSearchCommands(program: Command): void {
425
428
  .action(async (queryText: string, cmdOpts: Record<string, unknown>) => {
426
429
  const format = getFormat(cmdOpts);
427
430
  assertFormatSupported(CMD.vsearch, format);
431
+ const globals = getGlobals();
428
432
 
429
433
  // Validate empty query
430
434
  if (!queryText.trim()) {
@@ -462,6 +466,8 @@ function wireSearchCommands(program: Command): void {
462
466
 
463
467
  const { vsearch, formatVsearch } = await import("./commands/vsearch");
464
468
  const result = await vsearch(queryText, {
469
+ configPath: globals.config,
470
+ indexName: globals.index,
465
471
  limit,
466
472
  minScore,
467
473
  collection: cmdOpts.collection as string | undefined,
@@ -631,6 +637,8 @@ function wireSearchCommands(program: Command): void {
631
637
 
632
638
  const { query, formatQuery } = await import("./commands/query");
633
639
  const result = await query(queryText, {
640
+ configPath: globals.config,
641
+ indexName: globals.index,
634
642
  limit,
635
643
  minScore,
636
644
  collection: cmdOpts.collection as string | undefined,
@@ -885,6 +893,8 @@ function wireOnboardingCommands(program: Command): void {
885
893
  const globals = getGlobals();
886
894
  const { index, formatIndex } = await import("./commands/index-cmd");
887
895
  const opts = {
896
+ configPath: globals.config,
897
+ indexName: globals.index,
888
898
  collection,
889
899
  noEmbed: cmdOpts.embed === false,
890
900
  gitPull: Boolean(cmdOpts.gitPull),
@@ -911,7 +921,12 @@ function wireOnboardingCommands(program: Command): void {
911
921
  assertFormatSupported(CMD.status, format);
912
922
 
913
923
  const { status, formatStatus } = await import("./commands/status");
914
- const result = await status({ json: format === "json" });
924
+ const globals = getGlobals();
925
+ const result = await status({
926
+ configPath: globals.config,
927
+ indexName: globals.index,
928
+ json: format === "json",
929
+ });
915
930
 
916
931
  if (!result.success) {
917
932
  throw new CliError("RUNTIME", result.error ?? "Status failed");
@@ -969,6 +984,7 @@ function wireRetrievalCommands(program: Command): void {
969
984
  const { get, formatGet } = await import("./commands/get");
970
985
  const result = await get(ref, {
971
986
  configPath: globals.config,
987
+ indexName: globals.index,
972
988
  from: cmdOpts.from as number | undefined,
973
989
  limit: cmdOpts.limit as number | undefined,
974
990
  lineNumbers: Boolean(cmdOpts.lineNumbers),
@@ -1014,6 +1030,7 @@ function wireRetrievalCommands(program: Command): void {
1014
1030
  const { multiGet, formatMultiGet } = await import("./commands/multi-get");
1015
1031
  const result = await multiGet(refs, {
1016
1032
  configPath: globals.config,
1033
+ indexName: globals.index,
1017
1034
  maxBytes: cmdOpts.maxBytes as number | undefined,
1018
1035
  lineNumbers: Boolean(cmdOpts.lineNumbers),
1019
1036
  json: format === "json",
@@ -1515,6 +1532,8 @@ function wireManagementCommands(program: Command): void {
1515
1532
  const globals = getGlobals();
1516
1533
  const { update, formatUpdate } = await import("./commands/update");
1517
1534
  const opts = {
1535
+ configPath: globals.config,
1536
+ indexName: globals.index,
1518
1537
  gitPull: Boolean(cmdOpts.gitPull),
1519
1538
  verbose: globals.verbose,
1520
1539
  };
@@ -1548,6 +1567,8 @@ function wireManagementCommands(program: Command): void {
1548
1567
  const collection =
1549
1568
  collectionArg ?? (cmdOpts.collection as string | undefined);
1550
1569
  const opts = {
1570
+ configPath: globals.config,
1571
+ indexName: globals.index,
1551
1572
  collection,
1552
1573
  model: cmdOpts.model as string | undefined,
1553
1574
  batchSize: parsePositiveInt("batch-size", cmdOpts.batchSize),
@@ -372,6 +372,12 @@ export class MarkdownChunker implements ChunkerPort {
372
372
  // Find a good prose break point
373
373
  findBreakPoint(markdown, targetEnd, windowSize);
374
374
  }
375
+ if (endPos <= pos) {
376
+ endPos = Math.min(markdown.length, pos + maxChars);
377
+ }
378
+ if (endPos - pos > maxChars + windowSize) {
379
+ endPos = Math.min(markdown.length, pos + maxChars);
380
+ }
375
381
 
376
382
  // Extract chunk text - preserve exactly (no trim!)
377
383
  // This maintains accurate pos/line mappings and Markdown semantics
package/src/llm/cache.ts CHANGED
@@ -13,6 +13,7 @@ import { isAbsolute, join } from "node:path";
13
13
  // node:url: fileURLToPath for proper file:// URL handling
14
14
  import { fileURLToPath } from "node:url";
15
15
 
16
+ import type { LlmError } from "./errors";
16
17
  import type { DownloadPolicy } from "./policy";
17
18
  import type {
18
19
  DownloadProgress,
@@ -26,8 +27,10 @@ import { getModelsCachePath } from "../app/constants";
26
27
  import {
27
28
  autoDownloadDisabledError,
28
29
  downloadFailedError,
30
+ invalidModelFileError,
29
31
  invalidUriError,
30
32
  lockFailedError,
33
+ modelDownloadInterceptedError,
31
34
  modelNotCachedError,
32
35
  modelNotFoundError,
33
36
  } from "./errors";
@@ -40,6 +43,67 @@ import { getLockPath, getManifestLockPath, withLock } from "./lockfile";
40
43
  // Regex patterns for URI parsing (top-level for performance)
41
44
  const HF_QUANT_PATTERN = /^([^/]+)\/([^/:]+):(\w+)$/;
42
45
  const HF_PATH_PATTERN = /^([^/]+)\/([^/]+)\/(.+\.gguf)$/;
46
+ const GGUF_MAGIC = new Uint8Array([0x47, 0x47, 0x55, 0x46]);
47
+
48
+ type ModelFileOwner = "cache" | "user";
49
+
50
+ type ValidatedCachedPath =
51
+ | { ok: true; path: string }
52
+ | { ok: false; kind: "missing" }
53
+ | { ok: false; kind: "invalid"; error: LlmError };
54
+
55
+ function looksLikeHtml(bytes: Uint8Array): boolean {
56
+ const text = new TextDecoder("utf-8", { fatal: false })
57
+ .decode(bytes)
58
+ .toLowerCase();
59
+ return (
60
+ text.includes("<!doctype") ||
61
+ text.includes("<html") ||
62
+ text.includes("<head") ||
63
+ text.includes("<body") ||
64
+ (text.includes("huggingface") && text.includes("<"))
65
+ );
66
+ }
67
+
68
+ function hasGgufMagic(bytes: Uint8Array): boolean {
69
+ return GGUF_MAGIC.every((value, index) => bytes[index] === value);
70
+ }
71
+
72
+ export async function validateGgufFile(
73
+ path: string,
74
+ uri: string,
75
+ owner: ModelFileOwner
76
+ ): Promise<LlmResult<void>> {
77
+ const file = Bun.file(path);
78
+ const exists = await file.exists();
79
+ if (!exists) {
80
+ return {
81
+ ok: false,
82
+ error: modelNotFoundError(uri, `File not found: ${path}`),
83
+ };
84
+ }
85
+
86
+ const bytes = new Uint8Array(await file.slice(0, 512).arrayBuffer());
87
+ if (hasGgufMagic(bytes)) {
88
+ return { ok: true, value: undefined };
89
+ }
90
+
91
+ if (looksLikeHtml(bytes)) {
92
+ return {
93
+ ok: false,
94
+ error: modelDownloadInterceptedError(uri, path, owner),
95
+ };
96
+ }
97
+
98
+ return {
99
+ ok: false,
100
+ error: invalidModelFileError(
101
+ uri,
102
+ path,
103
+ bytes.length === 0 ? "empty file" : "missing GGUF magic header"
104
+ ),
105
+ };
106
+ }
43
107
 
44
108
  async function computeSha256(path: string): Promise<string> {
45
109
  const hasher = new Bun.CryptoHasher("sha256");
@@ -233,13 +297,20 @@ export class ModelCache {
233
297
  ),
234
298
  };
235
299
  }
300
+ const validation = await validateGgufFile(parsed.value.file, uri, "user");
301
+ if (!validation.ok) {
302
+ return validation;
303
+ }
236
304
  return { ok: true, value: parsed.value.file };
237
305
  }
238
306
 
239
307
  // HF models: check cache
240
- const cached = await this.getCachedPath(uri);
241
- if (cached) {
242
- return { ok: true, value: cached };
308
+ const cached = await this.getValidatedCachedPath(uri);
309
+ if (cached.ok) {
310
+ return { ok: true, value: cached.path };
311
+ }
312
+ if (cached.kind === "invalid") {
313
+ return { ok: false, error: cached.error };
243
314
  }
244
315
 
245
316
  return { ok: false, error: modelNotCachedError(uri, type) };
@@ -272,6 +343,10 @@ export class ModelCache {
272
343
  ),
273
344
  };
274
345
  }
346
+ const validation = await validateGgufFile(parsed.value.file, uri, "user");
347
+ if (!validation.ok) {
348
+ return validation;
349
+ }
275
350
  return { ok: true, value: parsed.value.file };
276
351
  }
277
352
 
@@ -326,6 +401,14 @@ export class ModelCache {
326
401
  : undefined,
327
402
  });
328
403
 
404
+ const validation = await validateGgufFile(resolvedPath, uri, "cache");
405
+ if (!validation.ok) {
406
+ await rm(resolvedPath, { force: true }).catch(() => {
407
+ // Ignore deletion errors
408
+ });
409
+ return validation;
410
+ }
411
+
329
412
  // Update manifest
330
413
  await this.addToManifest(uri, type, resolvedPath);
331
414
 
@@ -351,9 +434,12 @@ export class ModelCache {
351
434
  onProgress?: ProgressCallback
352
435
  ): Promise<LlmResult<string>> {
353
436
  // Fast path: check if already cached
354
- const cached = await this.getCachedPath(uri);
355
- if (cached) {
356
- return { ok: true, value: cached };
437
+ const cached = await this.getValidatedCachedPath(uri);
438
+ if (cached.ok) {
439
+ return { ok: true, value: cached.path };
440
+ }
441
+ if (cached.kind === "invalid") {
442
+ return { ok: false, error: cached.error };
357
443
  }
358
444
 
359
445
  // Parse and validate URI
@@ -374,6 +460,10 @@ export class ModelCache {
374
460
  ),
375
461
  };
376
462
  }
463
+ const validation = await validateGgufFile(parsed.value.file, uri, "user");
464
+ if (!validation.ok) {
465
+ return validation;
466
+ }
377
467
  return { ok: true, value: parsed.value.file };
378
468
  }
379
469
 
@@ -397,9 +487,12 @@ export class ModelCache {
397
487
 
398
488
  const result = await withLock(lockPath, async () => {
399
489
  // Double-check: another process may have downloaded while we waited
400
- const cachedNow = await this.getCachedPath(uri);
401
- if (cachedNow) {
402
- return { ok: true as const, value: cachedNow };
490
+ const cachedNow = await this.getValidatedCachedPath(uri);
491
+ if (cachedNow.ok) {
492
+ return { ok: true as const, value: cachedNow.path };
493
+ }
494
+ if (cachedNow.kind === "invalid") {
495
+ return { ok: false as const, error: cachedNow.error };
403
496
  }
404
497
 
405
498
  // Download with progress
@@ -433,26 +526,12 @@ export class ModelCache {
433
526
  // Handle file: URIs directly (check filesystem, not manifest)
434
527
  const parsed = parseModelUri(uri);
435
528
  if (parsed.ok && parsed.value.scheme === "file") {
436
- const exists = await this.fileExists(parsed.value.file);
437
- return exists ? parsed.value.file : null;
529
+ const validation = await validateGgufFile(parsed.value.file, uri, "user");
530
+ return validation.ok ? parsed.value.file : null;
438
531
  }
439
532
 
440
- // HF URIs: check manifest
441
- const manifest = await this.loadManifest();
442
- const entry = manifest.models.find((m) => m.uri === uri);
443
- if (!entry) {
444
- return null;
445
- }
446
-
447
- // Verify file still exists
448
- const exists = await this.fileExists(entry.path);
449
- if (!exists) {
450
- // Remove stale entry
451
- await this.removeFromManifest(uri);
452
- return null;
453
- }
454
-
455
- return entry.path;
533
+ const cached = await this.getValidatedCachedPath(uri);
534
+ return cached.ok ? cached.path : null;
456
535
  }
457
536
 
458
537
  /**
@@ -514,6 +593,33 @@ export class ModelCache {
514
593
  }
515
594
  }
516
595
 
596
+ private async getValidatedCachedPath(
597
+ uri: string
598
+ ): Promise<ValidatedCachedPath> {
599
+ const manifest = await this.loadManifest();
600
+ const entry = manifest.models.find((m) => m.uri === uri);
601
+ if (!entry) {
602
+ return { ok: false, kind: "missing" };
603
+ }
604
+
605
+ const exists = await this.fileExists(entry.path);
606
+ if (!exists) {
607
+ await this.removeFromManifest(uri);
608
+ return { ok: false, kind: "missing" };
609
+ }
610
+
611
+ const validation = await validateGgufFile(entry.path, uri, "cache");
612
+ if (validation.ok) {
613
+ return { ok: true, path: entry.path };
614
+ }
615
+
616
+ await rm(entry.path, { force: true }).catch(() => {
617
+ // Ignore deletion errors
618
+ });
619
+ await this.removeFromManifest(uri);
620
+ return { ok: false, kind: "invalid", error: validation.error };
621
+ }
622
+
517
623
  private async loadManifest(): Promise<Manifest> {
518
624
  if (this.manifest) {
519
625
  return this.manifest;
package/src/llm/errors.ts CHANGED
@@ -15,6 +15,8 @@ export type LlmErrorCode =
15
15
  | "MODEL_DOWNLOAD_FAILED"
16
16
  | "MODEL_LOAD_FAILED"
17
17
  | "MODEL_CORRUPTED"
18
+ | "INVALID_MODEL_FILE"
19
+ | "MODEL_DOWNLOAD_INTERCEPTED"
18
20
  | "INFERENCE_FAILED"
19
21
  | "TIMEOUT"
20
22
  | "OUT_OF_MEMORY"
@@ -160,6 +162,36 @@ export function corruptedError(uri: string, cause?: unknown): LlmError {
160
162
  });
161
163
  }
162
164
 
165
+ export function invalidModelFileError(
166
+ uri: string,
167
+ path: string,
168
+ details?: string
169
+ ): LlmError {
170
+ return llmError("INVALID_MODEL_FILE", {
171
+ message: `Model file is not a GGUF file: ${path}${details ? ` (${details})` : ""}`,
172
+ modelUri: uri,
173
+ retryable: false,
174
+ suggestion: "Remove the file or run: gno models pull --force",
175
+ });
176
+ }
177
+
178
+ export function modelDownloadInterceptedError(
179
+ uri: string,
180
+ path: string,
181
+ owner: "cache" | "user"
182
+ ): LlmError {
183
+ return llmError("MODEL_DOWNLOAD_INTERCEPTED", {
184
+ message:
185
+ `Model file looks like HTML instead of GGUF: ${path}. ` +
186
+ `A proxy, firewall, or captive portal likely intercepted the download.` +
187
+ (owner === "cache" ? " The cached file was removed." : ""),
188
+ modelUri: uri,
189
+ retryable: false,
190
+ suggestion:
191
+ "Check network access to Hugging Face, then run: gno models pull --force",
192
+ });
193
+ }
194
+
163
195
  export function inferenceFailedError(uri: string, cause?: unknown): LlmError {
164
196
  return llmError("INFERENCE_FAILED", {
165
197
  message: `Inference failed for model: ${uri}`,
@@ -31,6 +31,12 @@ interface EmbeddingWorker {
31
31
  pending: number;
32
32
  }
33
33
 
34
+ interface TokenizingModel {
35
+ trainContextSize?: number;
36
+ tokenize(text: string): readonly number[];
37
+ detokenize(tokens: readonly number[]): string;
38
+ }
39
+
34
40
  // ─────────────────────────────────────────────────────────────────────────────
35
41
  // Constants
36
42
  // ─────────────────────────────────────────────────────────────────────────────
@@ -51,6 +57,9 @@ export class NodeLlamaCppEmbedding implements EmbeddingPort {
51
57
  null;
52
58
  private lifecycleVersion = 0;
53
59
  private dims: number | null = null;
60
+ private llamaModel: TokenizingModel | null = null;
61
+ private warnedSingleTruncation = false;
62
+ private warnedBatchTruncation = false;
54
63
  private readonly manager: ModelManager;
55
64
  readonly modelUri: string;
56
65
  private readonly modelPath: string;
@@ -76,8 +85,12 @@ export class NodeLlamaCppEmbedding implements EmbeddingPort {
76
85
  }
77
86
 
78
87
  try {
88
+ const prepared = this.truncateForEmbedding(text, "single");
89
+ if (!prepared.ok) {
90
+ return { ok: false, error: prepared.error };
91
+ }
79
92
  const embedding = await this.runOnWorker((worker) =>
80
- worker.context.getEmbeddingFor(text)
93
+ worker.context.getEmbeddingFor(prepared.value.text)
81
94
  );
82
95
  const vector = Array.from(embedding.vector) as number[];
83
96
 
@@ -103,6 +116,15 @@ export class NodeLlamaCppEmbedding implements EmbeddingPort {
103
116
  }
104
117
 
105
118
  try {
119
+ const preparedTexts: string[] = [];
120
+ for (const text of texts) {
121
+ const prepared = this.truncateForEmbedding(text, "batch");
122
+ if (!prepared.ok) {
123
+ return { ok: false, error: prepared.error };
124
+ }
125
+ preparedTexts.push(prepared.value.text);
126
+ }
127
+
106
128
  const allResults = Array.from(
107
129
  { length: texts.length },
108
130
  () => [] as number[]
@@ -114,14 +136,14 @@ export class NodeLlamaCppEmbedding implements EmbeddingPort {
114
136
  while (true) {
115
137
  const index = nextIndex;
116
138
  nextIndex += 1;
117
- if (index >= texts.length) {
139
+ if (index >= preparedTexts.length) {
118
140
  return;
119
141
  }
120
142
 
121
143
  const embedding = await this.runOnSpecificWorker(
122
144
  worker,
123
145
  (current) =>
124
- current.context.getEmbeddingFor(texts[index] as string)
146
+ current.context.getEmbeddingFor(preparedTexts[index] as string)
125
147
  );
126
148
  allResults[index] = Array.from(embedding.vector) as number[];
127
149
  }
@@ -263,6 +285,7 @@ export class NodeLlamaCppEmbedding implements EmbeddingPort {
263
285
 
264
286
  try {
265
287
  const llamaModel = model.value.model as LlamaModel;
288
+ this.llamaModel = llamaModel as TokenizingModel;
266
289
  const llama = await this.manager.getLlama();
267
290
  const lifecycleVersion = this.lifecycleVersion;
268
291
  const targetPoolSize = this.resolveTargetPoolSize(llama);
@@ -321,4 +344,47 @@ export class NodeLlamaCppEmbedding implements EmbeddingPort {
321
344
  return { ok: false, error: inferenceFailedError(this.modelUri, e) };
322
345
  }
323
346
  }
347
+
348
+ private truncateForEmbedding(
349
+ text: string,
350
+ mode: "single" | "batch"
351
+ ): LlmResult<{ text: string }> {
352
+ const model = this.llamaModel;
353
+ const rawLimit =
354
+ typeof model?.trainContextSize === "number" &&
355
+ Number.isFinite(model.trainContextSize) &&
356
+ model.trainContextSize > 0
357
+ ? Math.floor(model.trainContextSize)
358
+ : undefined;
359
+ if (!model || rawLimit === undefined) {
360
+ return { ok: true, value: { text } };
361
+ }
362
+
363
+ const limit = Math.max(1, rawLimit - 4);
364
+ try {
365
+ const tokens = model.tokenize(text);
366
+ if (tokens.length <= limit) {
367
+ return { ok: true, value: { text } };
368
+ }
369
+
370
+ const truncatedText = model.detokenize(tokens.slice(0, limit));
371
+ const shouldWarn =
372
+ mode === "single"
373
+ ? !this.warnedSingleTruncation
374
+ : !this.warnedBatchTruncation;
375
+ if (shouldWarn) {
376
+ if (mode === "single") {
377
+ this.warnedSingleTruncation = true;
378
+ } else {
379
+ this.warnedBatchTruncation = true;
380
+ }
381
+ console.warn(
382
+ `[llama] Truncated embedding input from ${tokens.length} to ${limit} tokens`
383
+ );
384
+ }
385
+ return { ok: true, value: { text: truncatedText } };
386
+ } catch (error) {
387
+ return { ok: false, error: inferenceFailedError(this.modelUri, error) };
388
+ }
389
+ }
324
390
  }
@@ -16,6 +16,7 @@ import { loadFailedError, outOfMemoryError, timeoutError } from "../errors";
16
16
 
17
17
  type Llama = Awaited<ReturnType<typeof import("node-llama-cpp").getLlama>>;
18
18
  type LlamaModel = Awaited<ReturnType<Llama["loadModel"]>>;
19
+ export type LlamaGpuMode = "auto" | "metal" | "vulkan" | "cuda" | false;
19
20
 
20
21
  interface CachedModel {
21
22
  uri: string;
@@ -24,6 +25,40 @@ interface CachedModel {
24
25
  loadedAt: number;
25
26
  }
26
27
 
28
+ let invalidGpuModeWarned = false;
29
+ let gpuFallbackWarned = false;
30
+
31
+ export function resolveLlamaGpuMode(
32
+ env: NodeJS.ProcessEnv = process.env
33
+ ): LlamaGpuMode {
34
+ const raw = (env.GNO_LLAMA_GPU ?? env.NODE_LLAMA_CPP_GPU ?? "auto")
35
+ .trim()
36
+ .toLowerCase();
37
+ if (!raw || raw === "auto") {
38
+ return "auto";
39
+ }
40
+ if (raw === "metal" || raw === "vulkan" || raw === "cuda") {
41
+ return raw;
42
+ }
43
+ if (
44
+ raw === "false" ||
45
+ raw === "off" ||
46
+ raw === "none" ||
47
+ raw === "disable" ||
48
+ raw === "disabled" ||
49
+ raw === "0"
50
+ ) {
51
+ return false;
52
+ }
53
+ if (!invalidGpuModeWarned) {
54
+ invalidGpuModeWarned = true;
55
+ console.warn(
56
+ `[llama] Invalid GNO_LLAMA_GPU/NODE_LLAMA_CPP_GPU value "${raw}", using auto`
57
+ );
58
+ }
59
+ return "auto";
60
+ }
61
+
27
62
  // ─────────────────────────────────────────────────────────────────────────────
28
63
  // ModelManager
29
64
  // ─────────────────────────────────────────────────────────────────────────────
@@ -48,11 +83,32 @@ export class ModelManager {
48
83
  async getLlama(): Promise<Llama> {
49
84
  if (!this.llama) {
50
85
  const { getLlama, LlamaLogLevel } = await import("node-llama-cpp");
86
+ const gpu = resolveLlamaGpuMode();
51
87
  // Suppress model loading warnings (vocab tokens, pooling type)
52
- this.llama = await getLlama({
53
- build: "autoAttempt",
54
- logLevel: LlamaLogLevel.error,
55
- });
88
+ try {
89
+ this.llama = await getLlama({
90
+ build: "autoAttempt",
91
+ gpu,
92
+ logLevel: LlamaLogLevel.error,
93
+ });
94
+ } catch (error) {
95
+ if (gpu === "auto" || gpu === false) {
96
+ throw error;
97
+ }
98
+ if (!gpuFallbackWarned) {
99
+ gpuFallbackWarned = true;
100
+ console.warn(
101
+ `[llama] GPU backend "${gpu}" failed, retrying with CPU: ${
102
+ error instanceof Error ? error.message : String(error)
103
+ }`
104
+ );
105
+ }
106
+ this.llama = await getLlama({
107
+ build: "autoAttempt",
108
+ gpu: false,
109
+ logLevel: LlamaLogLevel.error,
110
+ });
111
+ }
56
112
  }
57
113
  return this.llama;
58
114
  }
@@ -13,7 +13,12 @@ import { join as pathJoin } from "node:path";
13
13
  import type { DocumentRow, TagCount } from "../../store/types";
14
14
  import type { ToolContext } from "../server";
15
15
 
16
- import { buildUri, parseUri, URI_PREFIX } from "../../app/constants";
16
+ import {
17
+ buildUri,
18
+ decorateUriForIndex,
19
+ parseUri,
20
+ URI_PREFIX,
21
+ } from "../../app/constants";
17
22
  import { MCP_ERRORS } from "../../core/errors";
18
23
  import { normalizeTag, validateTag } from "../../core/tags";
19
24
  import { normalizeCollectionName } from "../../core/validation";
@@ -64,7 +69,8 @@ function formatResourceContent(
64
69
  const langLine = doc.languageHint
65
70
  ? `\n language: ${doc.languageHint}`
66
71
  : "";
67
- const header = `<!-- ${doc.uri}
72
+ const displayUri = decorateUriForIndex(doc.uri, ctx.indexName);
73
+ const header = `<!-- ${displayUri}
68
74
  docid: ${doc.docid}
69
75
  source: ${absPath}
70
76
  mime: ${doc.sourceMime}${langLine}
@@ -94,7 +100,7 @@ export function registerResources(server: McpServer, ctx: ToolContext): void {
94
100
 
95
101
  return {
96
102
  resources: listResult.value.map((doc) => ({
97
- uri: doc.uri,
103
+ uri: decorateUriForIndex(doc.uri, ctx.indexName),
98
104
  name: doc.relPath,
99
105
  mimeType: doc.sourceMime || "text/markdown",
100
106
  description: doc.title ?? undefined,
@@ -160,7 +166,10 @@ export function registerResources(server: McpServer, ctx: ToolContext): void {
160
166
  const formattedContent = formatResourceContent(doc, content, ctx);
161
167
 
162
168
  // Build canonical URI
163
- const canonicalUri = buildUri(collection, path);
169
+ const canonicalUri = decorateUriForIndex(
170
+ buildUri(collection, path),
171
+ parsed.indexName ?? ctx.indexName
172
+ );
164
173
 
165
174
  return {
166
175
  contents: [
package/src/mcp/server.ts CHANGED
@@ -57,6 +57,7 @@ export interface ToolContext {
57
57
  config: Config;
58
58
  collections: Collection[];
59
59
  actualConfigPath: string;
60
+ indexName?: string;
60
61
  toolMutex: Mutex;
61
62
  jobManager: JobManager;
62
63
  serverInstanceId: string;
@@ -164,6 +165,7 @@ export async function startMcpServer(options: McpServerOptions): Promise<void> {
164
165
  config,
165
166
  collections,
166
167
  actualConfigPath,
168
+ indexName: options.indexName,
167
169
  toolMutex,
168
170
  jobManager,
169
171
  serverInstanceId,
@@ -9,7 +9,7 @@ import { join as pathJoin } from "node:path";
9
9
  import type { DocumentRow, StorePort } from "../../store/types";
10
10
  import type { ToolContext } from "../server";
11
11
 
12
- import { parseUri } from "../../app/constants";
12
+ import { decorateUriForIndex, parseUri } from "../../app/constants";
13
13
  import { parseRef } from "../../cli/commands/ref-parser";
14
14
  import {
15
15
  getDocumentCapabilities,
@@ -196,7 +196,12 @@ export function handleGet(
196
196
 
197
197
  const response: GetResponse = {
198
198
  docid: doc.docid,
199
- uri: doc.uri,
199
+ uri: decorateUriForIndex(
200
+ doc.uri,
201
+ parsed.type === "uri"
202
+ ? (parseUri(parsed.value)?.indexName ?? ctx.indexName)
203
+ : ctx.indexName
204
+ ),
200
205
  title: doc.title ?? undefined,
201
206
  content,
202
207
  totalLines,