@gmickel/gno 1.2.0 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. package/README.md +1 -1
  2. package/assets/skill/SKILL.md +3 -0
  3. package/assets/skill/cli-reference.md +5 -0
  4. package/assets/skill/examples.md +2 -0
  5. package/package.json +1 -1
  6. package/src/app/constants.ts +64 -8
  7. package/src/cli/commands/embed.ts +6 -2
  8. package/src/cli/commands/get.ts +15 -5
  9. package/src/cli/commands/index-cmd.ts +4 -0
  10. package/src/cli/commands/multi-get.ts +62 -1
  11. package/src/cli/commands/query.ts +8 -2
  12. package/src/cli/commands/search.ts +8 -2
  13. package/src/cli/commands/shared.ts +18 -1
  14. package/src/cli/commands/status.ts +4 -2
  15. package/src/cli/commands/update.ts +6 -1
  16. package/src/cli/commands/vsearch.ts +8 -2
  17. package/src/cli/format/search-results.ts +1 -1
  18. package/src/cli/program.ts +22 -1
  19. package/src/ingestion/chunker.ts +6 -0
  20. package/src/llm/cache.ts +162 -28
  21. package/src/llm/errors.ts +32 -0
  22. package/src/llm/lockfile.ts +49 -4
  23. package/src/llm/nodeLlamaCpp/embedding.ts +69 -3
  24. package/src/llm/nodeLlamaCpp/lifecycle.ts +60 -4
  25. package/src/mcp/resources/index.ts +13 -4
  26. package/src/mcp/server.ts +2 -0
  27. package/src/mcp/tools/get.ts +7 -2
  28. package/src/mcp/tools/multi-get.ts +2 -2
  29. package/src/mcp/tools/query.ts +2 -1
  30. package/src/mcp/tools/search.ts +2 -1
  31. package/src/mcp/tools/vsearch.ts +2 -1
  32. package/src/pipeline/explain.ts +12 -2
  33. package/src/pipeline/hybrid.ts +9 -1
  34. package/src/pipeline/search.ts +16 -7
  35. package/src/pipeline/types.ts +2 -0
  36. package/src/pipeline/vsearch.ts +29 -15
  37. package/src/publish/export-service.ts +27 -2
  38. package/src/sdk/client.ts +83 -28
  39. package/src/store/content-batch.ts +38 -0
  40. package/src/store/sqlite/adapter.ts +38 -2
  41. package/src/store/types.ts +8 -0
  42. package/src/store/vector/sqlite-vec.ts +10 -4
  43. package/src/store/vector/types.ts +2 -0
@@ -311,6 +311,7 @@ function wireSearchCommands(program: Command): void {
311
311
  .action(async (queryText: string, cmdOpts: Record<string, unknown>) => {
312
312
  const format = getFormat(cmdOpts);
313
313
  assertFormatSupported(CMD.search, format);
314
+ const globals = getGlobals();
314
315
 
315
316
  // Validate empty query
316
317
  if (!queryText.trim()) {
@@ -348,6 +349,8 @@ function wireSearchCommands(program: Command): void {
348
349
 
349
350
  const { search, formatSearch } = await import("./commands/search");
350
351
  const result = await search(queryText, {
352
+ configPath: globals.config,
353
+ indexName: globals.index,
351
354
  limit,
352
355
  minScore,
353
356
  collection: cmdOpts.collection as string | undefined,
@@ -425,6 +428,7 @@ function wireSearchCommands(program: Command): void {
425
428
  .action(async (queryText: string, cmdOpts: Record<string, unknown>) => {
426
429
  const format = getFormat(cmdOpts);
427
430
  assertFormatSupported(CMD.vsearch, format);
431
+ const globals = getGlobals();
428
432
 
429
433
  // Validate empty query
430
434
  if (!queryText.trim()) {
@@ -462,6 +466,8 @@ function wireSearchCommands(program: Command): void {
462
466
 
463
467
  const { vsearch, formatVsearch } = await import("./commands/vsearch");
464
468
  const result = await vsearch(queryText, {
469
+ configPath: globals.config,
470
+ indexName: globals.index,
465
471
  limit,
466
472
  minScore,
467
473
  collection: cmdOpts.collection as string | undefined,
@@ -631,6 +637,8 @@ function wireSearchCommands(program: Command): void {
631
637
 
632
638
  const { query, formatQuery } = await import("./commands/query");
633
639
  const result = await query(queryText, {
640
+ configPath: globals.config,
641
+ indexName: globals.index,
634
642
  limit,
635
643
  minScore,
636
644
  collection: cmdOpts.collection as string | undefined,
@@ -885,6 +893,8 @@ function wireOnboardingCommands(program: Command): void {
885
893
  const globals = getGlobals();
886
894
  const { index, formatIndex } = await import("./commands/index-cmd");
887
895
  const opts = {
896
+ configPath: globals.config,
897
+ indexName: globals.index,
888
898
  collection,
889
899
  noEmbed: cmdOpts.embed === false,
890
900
  gitPull: Boolean(cmdOpts.gitPull),
@@ -911,7 +921,12 @@ function wireOnboardingCommands(program: Command): void {
911
921
  assertFormatSupported(CMD.status, format);
912
922
 
913
923
  const { status, formatStatus } = await import("./commands/status");
914
- const result = await status({ json: format === "json" });
924
+ const globals = getGlobals();
925
+ const result = await status({
926
+ configPath: globals.config,
927
+ indexName: globals.index,
928
+ json: format === "json",
929
+ });
915
930
 
916
931
  if (!result.success) {
917
932
  throw new CliError("RUNTIME", result.error ?? "Status failed");
@@ -969,6 +984,7 @@ function wireRetrievalCommands(program: Command): void {
969
984
  const { get, formatGet } = await import("./commands/get");
970
985
  const result = await get(ref, {
971
986
  configPath: globals.config,
987
+ indexName: globals.index,
972
988
  from: cmdOpts.from as number | undefined,
973
989
  limit: cmdOpts.limit as number | undefined,
974
990
  lineNumbers: Boolean(cmdOpts.lineNumbers),
@@ -1014,6 +1030,7 @@ function wireRetrievalCommands(program: Command): void {
1014
1030
  const { multiGet, formatMultiGet } = await import("./commands/multi-get");
1015
1031
  const result = await multiGet(refs, {
1016
1032
  configPath: globals.config,
1033
+ indexName: globals.index,
1017
1034
  maxBytes: cmdOpts.maxBytes as number | undefined,
1018
1035
  lineNumbers: Boolean(cmdOpts.lineNumbers),
1019
1036
  json: format === "json",
@@ -1515,6 +1532,8 @@ function wireManagementCommands(program: Command): void {
1515
1532
  const globals = getGlobals();
1516
1533
  const { update, formatUpdate } = await import("./commands/update");
1517
1534
  const opts = {
1535
+ configPath: globals.config,
1536
+ indexName: globals.index,
1518
1537
  gitPull: Boolean(cmdOpts.gitPull),
1519
1538
  verbose: globals.verbose,
1520
1539
  };
@@ -1548,6 +1567,8 @@ function wireManagementCommands(program: Command): void {
1548
1567
  const collection =
1549
1568
  collectionArg ?? (cmdOpts.collection as string | undefined);
1550
1569
  const opts = {
1570
+ configPath: globals.config,
1571
+ indexName: globals.index,
1551
1572
  collection,
1552
1573
  model: cmdOpts.model as string | undefined,
1553
1574
  batchSize: parsePositiveInt("batch-size", cmdOpts.batchSize),
@@ -372,6 +372,12 @@ export class MarkdownChunker implements ChunkerPort {
372
372
  // Find a good prose break point
373
373
  findBreakPoint(markdown, targetEnd, windowSize);
374
374
  }
375
+ if (endPos <= pos) {
376
+ endPos = Math.min(markdown.length, pos + maxChars);
377
+ }
378
+ if (endPos - pos > maxChars + windowSize) {
379
+ endPos = Math.min(markdown.length, pos + maxChars);
380
+ }
375
381
 
376
382
  // Extract chunk text - preserve exactly (no trim!)
377
383
  // This maintains accurate pos/line mappings and Markdown semantics
package/src/llm/cache.ts CHANGED
@@ -13,6 +13,7 @@ import { isAbsolute, join } from "node:path";
13
13
  // node:url: fileURLToPath for proper file:// URL handling
14
14
  import { fileURLToPath } from "node:url";
15
15
 
16
+ import type { LlmError } from "./errors";
16
17
  import type { DownloadPolicy } from "./policy";
17
18
  import type {
18
19
  DownloadProgress,
@@ -26,8 +27,10 @@ import { getModelsCachePath } from "../app/constants";
26
27
  import {
27
28
  autoDownloadDisabledError,
28
29
  downloadFailedError,
30
+ invalidModelFileError,
29
31
  invalidUriError,
30
32
  lockFailedError,
33
+ modelDownloadInterceptedError,
31
34
  modelNotCachedError,
32
35
  modelNotFoundError,
33
36
  } from "./errors";
@@ -40,6 +43,88 @@ import { getLockPath, getManifestLockPath, withLock } from "./lockfile";
40
43
  // Regex patterns for URI parsing (top-level for performance)
41
44
  const HF_QUANT_PATTERN = /^([^/]+)\/([^/:]+):(\w+)$/;
42
45
  const HF_PATH_PATTERN = /^([^/]+)\/([^/]+)\/(.+\.gguf)$/;
46
+ const GGUF_MAGIC = new Uint8Array([0x47, 0x47, 0x55, 0x46]);
47
+
48
+ type ModelFileOwner = "cache" | "user";
49
+
50
+ type ValidatedCachedPath =
51
+ | { ok: true; path: string }
52
+ | { ok: false; kind: "missing" }
53
+ | { ok: false; kind: "invalid"; error: LlmError };
54
+
55
+ function looksLikeHtml(bytes: Uint8Array): boolean {
56
+ const text = new TextDecoder("utf-8", { fatal: false })
57
+ .decode(bytes)
58
+ .toLowerCase();
59
+ return (
60
+ text.includes("<!doctype") ||
61
+ text.includes("<html") ||
62
+ text.includes("<head") ||
63
+ text.includes("<body") ||
64
+ (text.includes("huggingface") && text.includes("<"))
65
+ );
66
+ }
67
+
68
+ function hasGgufMagic(bytes: Uint8Array): boolean {
69
+ return GGUF_MAGIC.every((value, index) => bytes[index] === value);
70
+ }
71
+
72
+ export async function validateGgufFile(
73
+ path: string,
74
+ uri: string,
75
+ owner: ModelFileOwner
76
+ ): Promise<LlmResult<void>> {
77
+ const file = Bun.file(path);
78
+ const exists = await file.exists();
79
+ if (!exists) {
80
+ return {
81
+ ok: false,
82
+ error: modelNotFoundError(uri, `File not found: ${path}`),
83
+ };
84
+ }
85
+
86
+ const bytes = new Uint8Array(await file.slice(0, 512).arrayBuffer());
87
+ if (hasGgufMagic(bytes)) {
88
+ return { ok: true, value: undefined };
89
+ }
90
+
91
+ if (looksLikeHtml(bytes)) {
92
+ return {
93
+ ok: false,
94
+ error: modelDownloadInterceptedError(uri, path, owner),
95
+ };
96
+ }
97
+
98
+ return {
99
+ ok: false,
100
+ error: invalidModelFileError(
101
+ uri,
102
+ path,
103
+ bytes.length === 0 ? "empty file" : "missing GGUF magic header"
104
+ ),
105
+ };
106
+ }
107
+
108
+ async function computeSha256(path: string): Promise<string> {
109
+ const hasher = new Bun.CryptoHasher("sha256");
110
+ const reader = Bun.file(path).stream().getReader();
111
+
112
+ try {
113
+ while (true) {
114
+ const { done, value } = await reader.read();
115
+ if (done) {
116
+ break;
117
+ }
118
+ if (value) {
119
+ hasher.update(value);
120
+ }
121
+ }
122
+ } finally {
123
+ reader.releaseLock();
124
+ }
125
+
126
+ return hasher.digest("hex");
127
+ }
43
128
 
44
129
  export type ParsedModelUri =
45
130
  | {
@@ -212,13 +297,20 @@ export class ModelCache {
212
297
  ),
213
298
  };
214
299
  }
300
+ const validation = await validateGgufFile(parsed.value.file, uri, "user");
301
+ if (!validation.ok) {
302
+ return validation;
303
+ }
215
304
  return { ok: true, value: parsed.value.file };
216
305
  }
217
306
 
218
307
  // HF models: check cache
219
- const cached = await this.getCachedPath(uri);
220
- if (cached) {
221
- return { ok: true, value: cached };
308
+ const cached = await this.getValidatedCachedPath(uri);
309
+ if (cached.ok) {
310
+ return { ok: true, value: cached.path };
311
+ }
312
+ if (cached.kind === "invalid") {
313
+ return { ok: false, error: cached.error };
222
314
  }
223
315
 
224
316
  return { ok: false, error: modelNotCachedError(uri, type) };
@@ -251,6 +343,10 @@ export class ModelCache {
251
343
  ),
252
344
  };
253
345
  }
346
+ const validation = await validateGgufFile(parsed.value.file, uri, "user");
347
+ if (!validation.ok) {
348
+ return validation;
349
+ }
254
350
  return { ok: true, value: parsed.value.file };
255
351
  }
256
352
 
@@ -305,6 +401,14 @@ export class ModelCache {
305
401
  : undefined,
306
402
  });
307
403
 
404
+ const validation = await validateGgufFile(resolvedPath, uri, "cache");
405
+ if (!validation.ok) {
406
+ await rm(resolvedPath, { force: true }).catch(() => {
407
+ // Ignore deletion errors
408
+ });
409
+ return validation;
410
+ }
411
+
308
412
  // Update manifest
309
413
  await this.addToManifest(uri, type, resolvedPath);
310
414
 
@@ -330,9 +434,12 @@ export class ModelCache {
330
434
  onProgress?: ProgressCallback
331
435
  ): Promise<LlmResult<string>> {
332
436
  // Fast path: check if already cached
333
- const cached = await this.getCachedPath(uri);
334
- if (cached) {
335
- return { ok: true, value: cached };
437
+ const cached = await this.getValidatedCachedPath(uri);
438
+ if (cached.ok) {
439
+ return { ok: true, value: cached.path };
440
+ }
441
+ if (cached.kind === "invalid") {
442
+ return { ok: false, error: cached.error };
336
443
  }
337
444
 
338
445
  // Parse and validate URI
@@ -353,6 +460,10 @@ export class ModelCache {
353
460
  ),
354
461
  };
355
462
  }
463
+ const validation = await validateGgufFile(parsed.value.file, uri, "user");
464
+ if (!validation.ok) {
465
+ return validation;
466
+ }
356
467
  return { ok: true, value: parsed.value.file };
357
468
  }
358
469
 
@@ -376,9 +487,12 @@ export class ModelCache {
376
487
 
377
488
  const result = await withLock(lockPath, async () => {
378
489
  // Double-check: another process may have downloaded while we waited
379
- const cachedNow = await this.getCachedPath(uri);
380
- if (cachedNow) {
381
- return { ok: true as const, value: cachedNow };
490
+ const cachedNow = await this.getValidatedCachedPath(uri);
491
+ if (cachedNow.ok) {
492
+ return { ok: true as const, value: cachedNow.path };
493
+ }
494
+ if (cachedNow.kind === "invalid") {
495
+ return { ok: false as const, error: cachedNow.error };
382
496
  }
383
497
 
384
498
  // Download with progress
@@ -412,26 +526,12 @@ export class ModelCache {
412
526
  // Handle file: URIs directly (check filesystem, not manifest)
413
527
  const parsed = parseModelUri(uri);
414
528
  if (parsed.ok && parsed.value.scheme === "file") {
415
- const exists = await this.fileExists(parsed.value.file);
416
- return exists ? parsed.value.file : null;
417
- }
418
-
419
- // HF URIs: check manifest
420
- const manifest = await this.loadManifest();
421
- const entry = manifest.models.find((m) => m.uri === uri);
422
- if (!entry) {
423
- return null;
529
+ const validation = await validateGgufFile(parsed.value.file, uri, "user");
530
+ return validation.ok ? parsed.value.file : null;
424
531
  }
425
532
 
426
- // Verify file still exists
427
- const exists = await this.fileExists(entry.path);
428
- if (!exists) {
429
- // Remove stale entry
430
- await this.removeFromManifest(uri);
431
- return null;
432
- }
433
-
434
- return entry.path;
533
+ const cached = await this.getValidatedCachedPath(uri);
534
+ return cached.ok ? cached.path : null;
435
535
  }
436
536
 
437
537
  /**
@@ -493,6 +593,33 @@ export class ModelCache {
493
593
  }
494
594
  }
495
595
 
596
+ private async getValidatedCachedPath(
597
+ uri: string
598
+ ): Promise<ValidatedCachedPath> {
599
+ const manifest = await this.loadManifest();
600
+ const entry = manifest.models.find((m) => m.uri === uri);
601
+ if (!entry) {
602
+ return { ok: false, kind: "missing" };
603
+ }
604
+
605
+ const exists = await this.fileExists(entry.path);
606
+ if (!exists) {
607
+ await this.removeFromManifest(uri);
608
+ return { ok: false, kind: "missing" };
609
+ }
610
+
611
+ const validation = await validateGgufFile(entry.path, uri, "cache");
612
+ if (validation.ok) {
613
+ return { ok: true, path: entry.path };
614
+ }
615
+
616
+ await rm(entry.path, { force: true }).catch(() => {
617
+ // Ignore deletion errors
618
+ });
619
+ await this.removeFromManifest(uri);
620
+ return { ok: false, kind: "invalid", error: validation.error };
621
+ }
622
+
496
623
  private async loadManifest(): Promise<Manifest> {
497
624
  if (this.manifest) {
498
625
  return this.manifest;
@@ -588,6 +715,7 @@ export class ModelCache {
588
715
  ): Promise<void> {
589
716
  // Get file size outside lock (IO-bound, doesn't need protection)
590
717
  let size = 0;
718
+ let checksum = "";
591
719
  try {
592
720
  const stats = await stat(modelPath);
593
721
  size = stats.size;
@@ -595,6 +723,12 @@ export class ModelCache {
595
723
  // Ignore
596
724
  }
597
725
 
726
+ try {
727
+ checksum = await computeSha256(modelPath);
728
+ } catch {
729
+ // Best-effort metadata only
730
+ }
731
+
598
732
  await this.updateManifest((manifest) => {
599
733
  // Remove existing entry if present
600
734
  manifest.models = manifest.models.filter((m) => m.uri !== uri);
@@ -605,7 +739,7 @@ export class ModelCache {
605
739
  type,
606
740
  path: modelPath,
607
741
  size,
608
- checksum: "", // TODO: compute SHA-256 for large files
742
+ checksum,
609
743
  cachedAt: new Date().toISOString(),
610
744
  });
611
745
  });
package/src/llm/errors.ts CHANGED
@@ -15,6 +15,8 @@ export type LlmErrorCode =
15
15
  | "MODEL_DOWNLOAD_FAILED"
16
16
  | "MODEL_LOAD_FAILED"
17
17
  | "MODEL_CORRUPTED"
18
+ | "INVALID_MODEL_FILE"
19
+ | "MODEL_DOWNLOAD_INTERCEPTED"
18
20
  | "INFERENCE_FAILED"
19
21
  | "TIMEOUT"
20
22
  | "OUT_OF_MEMORY"
@@ -160,6 +162,36 @@ export function corruptedError(uri: string, cause?: unknown): LlmError {
160
162
  });
161
163
  }
162
164
 
165
+ export function invalidModelFileError(
166
+ uri: string,
167
+ path: string,
168
+ details?: string
169
+ ): LlmError {
170
+ return llmError("INVALID_MODEL_FILE", {
171
+ message: `Model file is not a GGUF file: ${path}${details ? ` (${details})` : ""}`,
172
+ modelUri: uri,
173
+ retryable: false,
174
+ suggestion: "Remove the file or run: gno models pull --force",
175
+ });
176
+ }
177
+
178
+ export function modelDownloadInterceptedError(
179
+ uri: string,
180
+ path: string,
181
+ owner: "cache" | "user"
182
+ ): LlmError {
183
+ return llmError("MODEL_DOWNLOAD_INTERCEPTED", {
184
+ message:
185
+ `Model file looks like HTML instead of GGUF: ${path}. ` +
186
+ `A proxy, firewall, or captive portal likely intercepted the download.` +
187
+ (owner === "cache" ? " The cached file was removed." : ""),
188
+ modelUri: uri,
189
+ retryable: false,
190
+ suggestion:
191
+ "Check network access to Hugging Face, then run: gno models pull --force",
192
+ });
193
+ }
194
+
163
195
  export function inferenceFailedError(uri: string, cause?: unknown): LlmError {
164
196
  return llmError("INFERENCE_FAILED", {
165
197
  message: `Inference failed for model: ${uri}`,
@@ -5,7 +5,7 @@
5
5
  * @module src/llm/lockfile
6
6
  */
7
7
 
8
- import { open, rename, rm, stat } from "node:fs/promises";
8
+ import { open, readFile, rename, rm, stat } from "node:fs/promises";
9
9
  // node:os: hostname and user for lock ownership
10
10
  import { hostname, userInfo } from "node:os";
11
11
  // node:path: join for manifest lock path
@@ -68,6 +68,48 @@ function sleep(ms: number): Promise<void> {
68
68
  return new Promise((resolve) => setTimeout(resolve, ms));
69
69
  }
70
70
 
71
+ async function readLockMeta(lockPath: string): Promise<LockMeta | null> {
72
+ try {
73
+ const parsed = JSON.parse(
74
+ await readFile(lockPath, "utf-8")
75
+ ) as Partial<LockMeta>;
76
+ if (
77
+ typeof parsed.pid !== "number" ||
78
+ typeof parsed.hostname !== "string" ||
79
+ typeof parsed.user !== "string" ||
80
+ typeof parsed.createdAt !== "string"
81
+ ) {
82
+ return null;
83
+ }
84
+ return {
85
+ pid: parsed.pid,
86
+ hostname: parsed.hostname,
87
+ user: parsed.user,
88
+ createdAt: parsed.createdAt,
89
+ };
90
+ } catch {
91
+ return null;
92
+ }
93
+ }
94
+
95
+ function isProcessAlive(pid: number): boolean {
96
+ if (!Number.isInteger(pid) || pid <= 0) {
97
+ return false;
98
+ }
99
+
100
+ try {
101
+ process.kill(pid, 0);
102
+ return true;
103
+ } catch (error) {
104
+ return (
105
+ error !== null &&
106
+ typeof error === "object" &&
107
+ "code" in error &&
108
+ error.code === "EPERM"
109
+ );
110
+ }
111
+ }
112
+
71
113
  /**
72
114
  * Check if a lockfile is stale (older than TTL or owner process dead).
73
115
  */
@@ -81,9 +123,12 @@ async function isLockStale(lockPath: string, ttlMs: number): Promise<boolean> {
81
123
  return true;
82
124
  }
83
125
 
84
- // TODO: Could also check if PID is alive on same hostname
85
- // For now, just use TTL-based staleness
86
- return false;
126
+ const meta = await readLockMeta(lockPath);
127
+ if (!meta || meta.hostname !== hostname()) {
128
+ return false;
129
+ }
130
+
131
+ return !isProcessAlive(meta.pid);
87
132
  } catch {
88
133
  // Lock doesn't exist or can't be read
89
134
  return true;
@@ -31,6 +31,12 @@ interface EmbeddingWorker {
31
31
  pending: number;
32
32
  }
33
33
 
34
+ interface TokenizingModel {
35
+ trainContextSize?: number;
36
+ tokenize(text: string): readonly number[];
37
+ detokenize(tokens: readonly number[]): string;
38
+ }
39
+
34
40
  // ─────────────────────────────────────────────────────────────────────────────
35
41
  // Constants
36
42
  // ─────────────────────────────────────────────────────────────────────────────
@@ -51,6 +57,9 @@ export class NodeLlamaCppEmbedding implements EmbeddingPort {
51
57
  null;
52
58
  private lifecycleVersion = 0;
53
59
  private dims: number | null = null;
60
+ private llamaModel: TokenizingModel | null = null;
61
+ private warnedSingleTruncation = false;
62
+ private warnedBatchTruncation = false;
54
63
  private readonly manager: ModelManager;
55
64
  readonly modelUri: string;
56
65
  private readonly modelPath: string;
@@ -76,8 +85,12 @@ export class NodeLlamaCppEmbedding implements EmbeddingPort {
76
85
  }
77
86
 
78
87
  try {
88
+ const prepared = this.truncateForEmbedding(text, "single");
89
+ if (!prepared.ok) {
90
+ return { ok: false, error: prepared.error };
91
+ }
79
92
  const embedding = await this.runOnWorker((worker) =>
80
- worker.context.getEmbeddingFor(text)
93
+ worker.context.getEmbeddingFor(prepared.value.text)
81
94
  );
82
95
  const vector = Array.from(embedding.vector) as number[];
83
96
 
@@ -103,6 +116,15 @@ export class NodeLlamaCppEmbedding implements EmbeddingPort {
103
116
  }
104
117
 
105
118
  try {
119
+ const preparedTexts: string[] = [];
120
+ for (const text of texts) {
121
+ const prepared = this.truncateForEmbedding(text, "batch");
122
+ if (!prepared.ok) {
123
+ return { ok: false, error: prepared.error };
124
+ }
125
+ preparedTexts.push(prepared.value.text);
126
+ }
127
+
106
128
  const allResults = Array.from(
107
129
  { length: texts.length },
108
130
  () => [] as number[]
@@ -114,14 +136,14 @@ export class NodeLlamaCppEmbedding implements EmbeddingPort {
114
136
  while (true) {
115
137
  const index = nextIndex;
116
138
  nextIndex += 1;
117
- if (index >= texts.length) {
139
+ if (index >= preparedTexts.length) {
118
140
  return;
119
141
  }
120
142
 
121
143
  const embedding = await this.runOnSpecificWorker(
122
144
  worker,
123
145
  (current) =>
124
- current.context.getEmbeddingFor(texts[index] as string)
146
+ current.context.getEmbeddingFor(preparedTexts[index] as string)
125
147
  );
126
148
  allResults[index] = Array.from(embedding.vector) as number[];
127
149
  }
@@ -263,6 +285,7 @@ export class NodeLlamaCppEmbedding implements EmbeddingPort {
263
285
 
264
286
  try {
265
287
  const llamaModel = model.value.model as LlamaModel;
288
+ this.llamaModel = llamaModel as TokenizingModel;
266
289
  const llama = await this.manager.getLlama();
267
290
  const lifecycleVersion = this.lifecycleVersion;
268
291
  const targetPoolSize = this.resolveTargetPoolSize(llama);
@@ -321,4 +344,47 @@ export class NodeLlamaCppEmbedding implements EmbeddingPort {
321
344
  return { ok: false, error: inferenceFailedError(this.modelUri, e) };
322
345
  }
323
346
  }
347
+
348
+ private truncateForEmbedding(
349
+ text: string,
350
+ mode: "single" | "batch"
351
+ ): LlmResult<{ text: string }> {
352
+ const model = this.llamaModel;
353
+ const rawLimit =
354
+ typeof model?.trainContextSize === "number" &&
355
+ Number.isFinite(model.trainContextSize) &&
356
+ model.trainContextSize > 0
357
+ ? Math.floor(model.trainContextSize)
358
+ : undefined;
359
+ if (!model || rawLimit === undefined) {
360
+ return { ok: true, value: { text } };
361
+ }
362
+
363
+ const limit = Math.max(1, rawLimit - 4);
364
+ try {
365
+ const tokens = model.tokenize(text);
366
+ if (tokens.length <= limit) {
367
+ return { ok: true, value: { text } };
368
+ }
369
+
370
+ const truncatedText = model.detokenize(tokens.slice(0, limit));
371
+ const shouldWarn =
372
+ mode === "single"
373
+ ? !this.warnedSingleTruncation
374
+ : !this.warnedBatchTruncation;
375
+ if (shouldWarn) {
376
+ if (mode === "single") {
377
+ this.warnedSingleTruncation = true;
378
+ } else {
379
+ this.warnedBatchTruncation = true;
380
+ }
381
+ console.warn(
382
+ `[llama] Truncated embedding input from ${tokens.length} to ${limit} tokens`
383
+ );
384
+ }
385
+ return { ok: true, value: { text: truncatedText } };
386
+ } catch (error) {
387
+ return { ok: false, error: inferenceFailedError(this.modelUri, error) };
388
+ }
389
+ }
324
390
  }