@gmickel/gno 1.2.0 → 1.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -1
- package/assets/skill/SKILL.md +3 -0
- package/assets/skill/cli-reference.md +5 -0
- package/assets/skill/examples.md +2 -0
- package/package.json +1 -1
- package/src/app/constants.ts +64 -8
- package/src/cli/commands/embed.ts +6 -2
- package/src/cli/commands/get.ts +15 -5
- package/src/cli/commands/index-cmd.ts +4 -0
- package/src/cli/commands/multi-get.ts +62 -1
- package/src/cli/commands/query.ts +8 -2
- package/src/cli/commands/search.ts +8 -2
- package/src/cli/commands/shared.ts +18 -1
- package/src/cli/commands/status.ts +4 -2
- package/src/cli/commands/update.ts +6 -1
- package/src/cli/commands/vsearch.ts +8 -2
- package/src/cli/format/search-results.ts +1 -1
- package/src/cli/program.ts +22 -1
- package/src/ingestion/chunker.ts +6 -0
- package/src/llm/cache.ts +162 -28
- package/src/llm/errors.ts +32 -0
- package/src/llm/lockfile.ts +49 -4
- package/src/llm/nodeLlamaCpp/embedding.ts +69 -3
- package/src/llm/nodeLlamaCpp/lifecycle.ts +60 -4
- package/src/mcp/resources/index.ts +13 -4
- package/src/mcp/server.ts +2 -0
- package/src/mcp/tools/get.ts +7 -2
- package/src/mcp/tools/multi-get.ts +2 -2
- package/src/mcp/tools/query.ts +2 -1
- package/src/mcp/tools/search.ts +2 -1
- package/src/mcp/tools/vsearch.ts +2 -1
- package/src/pipeline/explain.ts +12 -2
- package/src/pipeline/hybrid.ts +9 -1
- package/src/pipeline/search.ts +16 -7
- package/src/pipeline/types.ts +2 -0
- package/src/pipeline/vsearch.ts +29 -15
- package/src/publish/export-service.ts +27 -2
- package/src/sdk/client.ts +83 -28
- package/src/store/content-batch.ts +38 -0
- package/src/store/sqlite/adapter.ts +38 -2
- package/src/store/types.ts +8 -0
- package/src/store/vector/sqlite-vec.ts +10 -4
- package/src/store/vector/types.ts +2 -0
package/src/cli/program.ts
CHANGED
|
@@ -311,6 +311,7 @@ function wireSearchCommands(program: Command): void {
|
|
|
311
311
|
.action(async (queryText: string, cmdOpts: Record<string, unknown>) => {
|
|
312
312
|
const format = getFormat(cmdOpts);
|
|
313
313
|
assertFormatSupported(CMD.search, format);
|
|
314
|
+
const globals = getGlobals();
|
|
314
315
|
|
|
315
316
|
// Validate empty query
|
|
316
317
|
if (!queryText.trim()) {
|
|
@@ -348,6 +349,8 @@ function wireSearchCommands(program: Command): void {
|
|
|
348
349
|
|
|
349
350
|
const { search, formatSearch } = await import("./commands/search");
|
|
350
351
|
const result = await search(queryText, {
|
|
352
|
+
configPath: globals.config,
|
|
353
|
+
indexName: globals.index,
|
|
351
354
|
limit,
|
|
352
355
|
minScore,
|
|
353
356
|
collection: cmdOpts.collection as string | undefined,
|
|
@@ -425,6 +428,7 @@ function wireSearchCommands(program: Command): void {
|
|
|
425
428
|
.action(async (queryText: string, cmdOpts: Record<string, unknown>) => {
|
|
426
429
|
const format = getFormat(cmdOpts);
|
|
427
430
|
assertFormatSupported(CMD.vsearch, format);
|
|
431
|
+
const globals = getGlobals();
|
|
428
432
|
|
|
429
433
|
// Validate empty query
|
|
430
434
|
if (!queryText.trim()) {
|
|
@@ -462,6 +466,8 @@ function wireSearchCommands(program: Command): void {
|
|
|
462
466
|
|
|
463
467
|
const { vsearch, formatVsearch } = await import("./commands/vsearch");
|
|
464
468
|
const result = await vsearch(queryText, {
|
|
469
|
+
configPath: globals.config,
|
|
470
|
+
indexName: globals.index,
|
|
465
471
|
limit,
|
|
466
472
|
minScore,
|
|
467
473
|
collection: cmdOpts.collection as string | undefined,
|
|
@@ -631,6 +637,8 @@ function wireSearchCommands(program: Command): void {
|
|
|
631
637
|
|
|
632
638
|
const { query, formatQuery } = await import("./commands/query");
|
|
633
639
|
const result = await query(queryText, {
|
|
640
|
+
configPath: globals.config,
|
|
641
|
+
indexName: globals.index,
|
|
634
642
|
limit,
|
|
635
643
|
minScore,
|
|
636
644
|
collection: cmdOpts.collection as string | undefined,
|
|
@@ -885,6 +893,8 @@ function wireOnboardingCommands(program: Command): void {
|
|
|
885
893
|
const globals = getGlobals();
|
|
886
894
|
const { index, formatIndex } = await import("./commands/index-cmd");
|
|
887
895
|
const opts = {
|
|
896
|
+
configPath: globals.config,
|
|
897
|
+
indexName: globals.index,
|
|
888
898
|
collection,
|
|
889
899
|
noEmbed: cmdOpts.embed === false,
|
|
890
900
|
gitPull: Boolean(cmdOpts.gitPull),
|
|
@@ -911,7 +921,12 @@ function wireOnboardingCommands(program: Command): void {
|
|
|
911
921
|
assertFormatSupported(CMD.status, format);
|
|
912
922
|
|
|
913
923
|
const { status, formatStatus } = await import("./commands/status");
|
|
914
|
-
const
|
|
924
|
+
const globals = getGlobals();
|
|
925
|
+
const result = await status({
|
|
926
|
+
configPath: globals.config,
|
|
927
|
+
indexName: globals.index,
|
|
928
|
+
json: format === "json",
|
|
929
|
+
});
|
|
915
930
|
|
|
916
931
|
if (!result.success) {
|
|
917
932
|
throw new CliError("RUNTIME", result.error ?? "Status failed");
|
|
@@ -969,6 +984,7 @@ function wireRetrievalCommands(program: Command): void {
|
|
|
969
984
|
const { get, formatGet } = await import("./commands/get");
|
|
970
985
|
const result = await get(ref, {
|
|
971
986
|
configPath: globals.config,
|
|
987
|
+
indexName: globals.index,
|
|
972
988
|
from: cmdOpts.from as number | undefined,
|
|
973
989
|
limit: cmdOpts.limit as number | undefined,
|
|
974
990
|
lineNumbers: Boolean(cmdOpts.lineNumbers),
|
|
@@ -1014,6 +1030,7 @@ function wireRetrievalCommands(program: Command): void {
|
|
|
1014
1030
|
const { multiGet, formatMultiGet } = await import("./commands/multi-get");
|
|
1015
1031
|
const result = await multiGet(refs, {
|
|
1016
1032
|
configPath: globals.config,
|
|
1033
|
+
indexName: globals.index,
|
|
1017
1034
|
maxBytes: cmdOpts.maxBytes as number | undefined,
|
|
1018
1035
|
lineNumbers: Boolean(cmdOpts.lineNumbers),
|
|
1019
1036
|
json: format === "json",
|
|
@@ -1515,6 +1532,8 @@ function wireManagementCommands(program: Command): void {
|
|
|
1515
1532
|
const globals = getGlobals();
|
|
1516
1533
|
const { update, formatUpdate } = await import("./commands/update");
|
|
1517
1534
|
const opts = {
|
|
1535
|
+
configPath: globals.config,
|
|
1536
|
+
indexName: globals.index,
|
|
1518
1537
|
gitPull: Boolean(cmdOpts.gitPull),
|
|
1519
1538
|
verbose: globals.verbose,
|
|
1520
1539
|
};
|
|
@@ -1548,6 +1567,8 @@ function wireManagementCommands(program: Command): void {
|
|
|
1548
1567
|
const collection =
|
|
1549
1568
|
collectionArg ?? (cmdOpts.collection as string | undefined);
|
|
1550
1569
|
const opts = {
|
|
1570
|
+
configPath: globals.config,
|
|
1571
|
+
indexName: globals.index,
|
|
1551
1572
|
collection,
|
|
1552
1573
|
model: cmdOpts.model as string | undefined,
|
|
1553
1574
|
batchSize: parsePositiveInt("batch-size", cmdOpts.batchSize),
|
package/src/ingestion/chunker.ts
CHANGED
|
@@ -372,6 +372,12 @@ export class MarkdownChunker implements ChunkerPort {
|
|
|
372
372
|
// Find a good prose break point
|
|
373
373
|
findBreakPoint(markdown, targetEnd, windowSize);
|
|
374
374
|
}
|
|
375
|
+
if (endPos <= pos) {
|
|
376
|
+
endPos = Math.min(markdown.length, pos + maxChars);
|
|
377
|
+
}
|
|
378
|
+
if (endPos - pos > maxChars + windowSize) {
|
|
379
|
+
endPos = Math.min(markdown.length, pos + maxChars);
|
|
380
|
+
}
|
|
375
381
|
|
|
376
382
|
// Extract chunk text - preserve exactly (no trim!)
|
|
377
383
|
// This maintains accurate pos/line mappings and Markdown semantics
|
package/src/llm/cache.ts
CHANGED
|
@@ -13,6 +13,7 @@ import { isAbsolute, join } from "node:path";
|
|
|
13
13
|
// node:url: fileURLToPath for proper file:// URL handling
|
|
14
14
|
import { fileURLToPath } from "node:url";
|
|
15
15
|
|
|
16
|
+
import type { LlmError } from "./errors";
|
|
16
17
|
import type { DownloadPolicy } from "./policy";
|
|
17
18
|
import type {
|
|
18
19
|
DownloadProgress,
|
|
@@ -26,8 +27,10 @@ import { getModelsCachePath } from "../app/constants";
|
|
|
26
27
|
import {
|
|
27
28
|
autoDownloadDisabledError,
|
|
28
29
|
downloadFailedError,
|
|
30
|
+
invalidModelFileError,
|
|
29
31
|
invalidUriError,
|
|
30
32
|
lockFailedError,
|
|
33
|
+
modelDownloadInterceptedError,
|
|
31
34
|
modelNotCachedError,
|
|
32
35
|
modelNotFoundError,
|
|
33
36
|
} from "./errors";
|
|
@@ -40,6 +43,88 @@ import { getLockPath, getManifestLockPath, withLock } from "./lockfile";
|
|
|
40
43
|
// Regex patterns for URI parsing (top-level for performance)
|
|
41
44
|
const HF_QUANT_PATTERN = /^([^/]+)\/([^/:]+):(\w+)$/;
|
|
42
45
|
const HF_PATH_PATTERN = /^([^/]+)\/([^/]+)\/(.+\.gguf)$/;
|
|
46
|
+
const GGUF_MAGIC = new Uint8Array([0x47, 0x47, 0x55, 0x46]);
|
|
47
|
+
|
|
48
|
+
type ModelFileOwner = "cache" | "user";
|
|
49
|
+
|
|
50
|
+
type ValidatedCachedPath =
|
|
51
|
+
| { ok: true; path: string }
|
|
52
|
+
| { ok: false; kind: "missing" }
|
|
53
|
+
| { ok: false; kind: "invalid"; error: LlmError };
|
|
54
|
+
|
|
55
|
+
function looksLikeHtml(bytes: Uint8Array): boolean {
|
|
56
|
+
const text = new TextDecoder("utf-8", { fatal: false })
|
|
57
|
+
.decode(bytes)
|
|
58
|
+
.toLowerCase();
|
|
59
|
+
return (
|
|
60
|
+
text.includes("<!doctype") ||
|
|
61
|
+
text.includes("<html") ||
|
|
62
|
+
text.includes("<head") ||
|
|
63
|
+
text.includes("<body") ||
|
|
64
|
+
(text.includes("huggingface") && text.includes("<"))
|
|
65
|
+
);
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
function hasGgufMagic(bytes: Uint8Array): boolean {
|
|
69
|
+
return GGUF_MAGIC.every((value, index) => bytes[index] === value);
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
export async function validateGgufFile(
|
|
73
|
+
path: string,
|
|
74
|
+
uri: string,
|
|
75
|
+
owner: ModelFileOwner
|
|
76
|
+
): Promise<LlmResult<void>> {
|
|
77
|
+
const file = Bun.file(path);
|
|
78
|
+
const exists = await file.exists();
|
|
79
|
+
if (!exists) {
|
|
80
|
+
return {
|
|
81
|
+
ok: false,
|
|
82
|
+
error: modelNotFoundError(uri, `File not found: ${path}`),
|
|
83
|
+
};
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
const bytes = new Uint8Array(await file.slice(0, 512).arrayBuffer());
|
|
87
|
+
if (hasGgufMagic(bytes)) {
|
|
88
|
+
return { ok: true, value: undefined };
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
if (looksLikeHtml(bytes)) {
|
|
92
|
+
return {
|
|
93
|
+
ok: false,
|
|
94
|
+
error: modelDownloadInterceptedError(uri, path, owner),
|
|
95
|
+
};
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
return {
|
|
99
|
+
ok: false,
|
|
100
|
+
error: invalidModelFileError(
|
|
101
|
+
uri,
|
|
102
|
+
path,
|
|
103
|
+
bytes.length === 0 ? "empty file" : "missing GGUF magic header"
|
|
104
|
+
),
|
|
105
|
+
};
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
async function computeSha256(path: string): Promise<string> {
|
|
109
|
+
const hasher = new Bun.CryptoHasher("sha256");
|
|
110
|
+
const reader = Bun.file(path).stream().getReader();
|
|
111
|
+
|
|
112
|
+
try {
|
|
113
|
+
while (true) {
|
|
114
|
+
const { done, value } = await reader.read();
|
|
115
|
+
if (done) {
|
|
116
|
+
break;
|
|
117
|
+
}
|
|
118
|
+
if (value) {
|
|
119
|
+
hasher.update(value);
|
|
120
|
+
}
|
|
121
|
+
}
|
|
122
|
+
} finally {
|
|
123
|
+
reader.releaseLock();
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
return hasher.digest("hex");
|
|
127
|
+
}
|
|
43
128
|
|
|
44
129
|
export type ParsedModelUri =
|
|
45
130
|
| {
|
|
@@ -212,13 +297,20 @@ export class ModelCache {
|
|
|
212
297
|
),
|
|
213
298
|
};
|
|
214
299
|
}
|
|
300
|
+
const validation = await validateGgufFile(parsed.value.file, uri, "user");
|
|
301
|
+
if (!validation.ok) {
|
|
302
|
+
return validation;
|
|
303
|
+
}
|
|
215
304
|
return { ok: true, value: parsed.value.file };
|
|
216
305
|
}
|
|
217
306
|
|
|
218
307
|
// HF models: check cache
|
|
219
|
-
const cached = await this.
|
|
220
|
-
if (cached) {
|
|
221
|
-
return { ok: true, value: cached };
|
|
308
|
+
const cached = await this.getValidatedCachedPath(uri);
|
|
309
|
+
if (cached.ok) {
|
|
310
|
+
return { ok: true, value: cached.path };
|
|
311
|
+
}
|
|
312
|
+
if (cached.kind === "invalid") {
|
|
313
|
+
return { ok: false, error: cached.error };
|
|
222
314
|
}
|
|
223
315
|
|
|
224
316
|
return { ok: false, error: modelNotCachedError(uri, type) };
|
|
@@ -251,6 +343,10 @@ export class ModelCache {
|
|
|
251
343
|
),
|
|
252
344
|
};
|
|
253
345
|
}
|
|
346
|
+
const validation = await validateGgufFile(parsed.value.file, uri, "user");
|
|
347
|
+
if (!validation.ok) {
|
|
348
|
+
return validation;
|
|
349
|
+
}
|
|
254
350
|
return { ok: true, value: parsed.value.file };
|
|
255
351
|
}
|
|
256
352
|
|
|
@@ -305,6 +401,14 @@ export class ModelCache {
|
|
|
305
401
|
: undefined,
|
|
306
402
|
});
|
|
307
403
|
|
|
404
|
+
const validation = await validateGgufFile(resolvedPath, uri, "cache");
|
|
405
|
+
if (!validation.ok) {
|
|
406
|
+
await rm(resolvedPath, { force: true }).catch(() => {
|
|
407
|
+
// Ignore deletion errors
|
|
408
|
+
});
|
|
409
|
+
return validation;
|
|
410
|
+
}
|
|
411
|
+
|
|
308
412
|
// Update manifest
|
|
309
413
|
await this.addToManifest(uri, type, resolvedPath);
|
|
310
414
|
|
|
@@ -330,9 +434,12 @@ export class ModelCache {
|
|
|
330
434
|
onProgress?: ProgressCallback
|
|
331
435
|
): Promise<LlmResult<string>> {
|
|
332
436
|
// Fast path: check if already cached
|
|
333
|
-
const cached = await this.
|
|
334
|
-
if (cached) {
|
|
335
|
-
return { ok: true, value: cached };
|
|
437
|
+
const cached = await this.getValidatedCachedPath(uri);
|
|
438
|
+
if (cached.ok) {
|
|
439
|
+
return { ok: true, value: cached.path };
|
|
440
|
+
}
|
|
441
|
+
if (cached.kind === "invalid") {
|
|
442
|
+
return { ok: false, error: cached.error };
|
|
336
443
|
}
|
|
337
444
|
|
|
338
445
|
// Parse and validate URI
|
|
@@ -353,6 +460,10 @@ export class ModelCache {
|
|
|
353
460
|
),
|
|
354
461
|
};
|
|
355
462
|
}
|
|
463
|
+
const validation = await validateGgufFile(parsed.value.file, uri, "user");
|
|
464
|
+
if (!validation.ok) {
|
|
465
|
+
return validation;
|
|
466
|
+
}
|
|
356
467
|
return { ok: true, value: parsed.value.file };
|
|
357
468
|
}
|
|
358
469
|
|
|
@@ -376,9 +487,12 @@ export class ModelCache {
|
|
|
376
487
|
|
|
377
488
|
const result = await withLock(lockPath, async () => {
|
|
378
489
|
// Double-check: another process may have downloaded while we waited
|
|
379
|
-
const cachedNow = await this.
|
|
380
|
-
if (cachedNow) {
|
|
381
|
-
return { ok: true as const, value: cachedNow };
|
|
490
|
+
const cachedNow = await this.getValidatedCachedPath(uri);
|
|
491
|
+
if (cachedNow.ok) {
|
|
492
|
+
return { ok: true as const, value: cachedNow.path };
|
|
493
|
+
}
|
|
494
|
+
if (cachedNow.kind === "invalid") {
|
|
495
|
+
return { ok: false as const, error: cachedNow.error };
|
|
382
496
|
}
|
|
383
497
|
|
|
384
498
|
// Download with progress
|
|
@@ -412,26 +526,12 @@ export class ModelCache {
|
|
|
412
526
|
// Handle file: URIs directly (check filesystem, not manifest)
|
|
413
527
|
const parsed = parseModelUri(uri);
|
|
414
528
|
if (parsed.ok && parsed.value.scheme === "file") {
|
|
415
|
-
const
|
|
416
|
-
return
|
|
417
|
-
}
|
|
418
|
-
|
|
419
|
-
// HF URIs: check manifest
|
|
420
|
-
const manifest = await this.loadManifest();
|
|
421
|
-
const entry = manifest.models.find((m) => m.uri === uri);
|
|
422
|
-
if (!entry) {
|
|
423
|
-
return null;
|
|
529
|
+
const validation = await validateGgufFile(parsed.value.file, uri, "user");
|
|
530
|
+
return validation.ok ? parsed.value.file : null;
|
|
424
531
|
}
|
|
425
532
|
|
|
426
|
-
|
|
427
|
-
|
|
428
|
-
if (!exists) {
|
|
429
|
-
// Remove stale entry
|
|
430
|
-
await this.removeFromManifest(uri);
|
|
431
|
-
return null;
|
|
432
|
-
}
|
|
433
|
-
|
|
434
|
-
return entry.path;
|
|
533
|
+
const cached = await this.getValidatedCachedPath(uri);
|
|
534
|
+
return cached.ok ? cached.path : null;
|
|
435
535
|
}
|
|
436
536
|
|
|
437
537
|
/**
|
|
@@ -493,6 +593,33 @@ export class ModelCache {
|
|
|
493
593
|
}
|
|
494
594
|
}
|
|
495
595
|
|
|
596
|
+
private async getValidatedCachedPath(
|
|
597
|
+
uri: string
|
|
598
|
+
): Promise<ValidatedCachedPath> {
|
|
599
|
+
const manifest = await this.loadManifest();
|
|
600
|
+
const entry = manifest.models.find((m) => m.uri === uri);
|
|
601
|
+
if (!entry) {
|
|
602
|
+
return { ok: false, kind: "missing" };
|
|
603
|
+
}
|
|
604
|
+
|
|
605
|
+
const exists = await this.fileExists(entry.path);
|
|
606
|
+
if (!exists) {
|
|
607
|
+
await this.removeFromManifest(uri);
|
|
608
|
+
return { ok: false, kind: "missing" };
|
|
609
|
+
}
|
|
610
|
+
|
|
611
|
+
const validation = await validateGgufFile(entry.path, uri, "cache");
|
|
612
|
+
if (validation.ok) {
|
|
613
|
+
return { ok: true, path: entry.path };
|
|
614
|
+
}
|
|
615
|
+
|
|
616
|
+
await rm(entry.path, { force: true }).catch(() => {
|
|
617
|
+
// Ignore deletion errors
|
|
618
|
+
});
|
|
619
|
+
await this.removeFromManifest(uri);
|
|
620
|
+
return { ok: false, kind: "invalid", error: validation.error };
|
|
621
|
+
}
|
|
622
|
+
|
|
496
623
|
private async loadManifest(): Promise<Manifest> {
|
|
497
624
|
if (this.manifest) {
|
|
498
625
|
return this.manifest;
|
|
@@ -588,6 +715,7 @@ export class ModelCache {
|
|
|
588
715
|
): Promise<void> {
|
|
589
716
|
// Get file size outside lock (IO-bound, doesn't need protection)
|
|
590
717
|
let size = 0;
|
|
718
|
+
let checksum = "";
|
|
591
719
|
try {
|
|
592
720
|
const stats = await stat(modelPath);
|
|
593
721
|
size = stats.size;
|
|
@@ -595,6 +723,12 @@ export class ModelCache {
|
|
|
595
723
|
// Ignore
|
|
596
724
|
}
|
|
597
725
|
|
|
726
|
+
try {
|
|
727
|
+
checksum = await computeSha256(modelPath);
|
|
728
|
+
} catch {
|
|
729
|
+
// Best-effort metadata only
|
|
730
|
+
}
|
|
731
|
+
|
|
598
732
|
await this.updateManifest((manifest) => {
|
|
599
733
|
// Remove existing entry if present
|
|
600
734
|
manifest.models = manifest.models.filter((m) => m.uri !== uri);
|
|
@@ -605,7 +739,7 @@ export class ModelCache {
|
|
|
605
739
|
type,
|
|
606
740
|
path: modelPath,
|
|
607
741
|
size,
|
|
608
|
-
checksum
|
|
742
|
+
checksum,
|
|
609
743
|
cachedAt: new Date().toISOString(),
|
|
610
744
|
});
|
|
611
745
|
});
|
package/src/llm/errors.ts
CHANGED
|
@@ -15,6 +15,8 @@ export type LlmErrorCode =
|
|
|
15
15
|
| "MODEL_DOWNLOAD_FAILED"
|
|
16
16
|
| "MODEL_LOAD_FAILED"
|
|
17
17
|
| "MODEL_CORRUPTED"
|
|
18
|
+
| "INVALID_MODEL_FILE"
|
|
19
|
+
| "MODEL_DOWNLOAD_INTERCEPTED"
|
|
18
20
|
| "INFERENCE_FAILED"
|
|
19
21
|
| "TIMEOUT"
|
|
20
22
|
| "OUT_OF_MEMORY"
|
|
@@ -160,6 +162,36 @@ export function corruptedError(uri: string, cause?: unknown): LlmError {
|
|
|
160
162
|
});
|
|
161
163
|
}
|
|
162
164
|
|
|
165
|
+
export function invalidModelFileError(
|
|
166
|
+
uri: string,
|
|
167
|
+
path: string,
|
|
168
|
+
details?: string
|
|
169
|
+
): LlmError {
|
|
170
|
+
return llmError("INVALID_MODEL_FILE", {
|
|
171
|
+
message: `Model file is not a GGUF file: ${path}${details ? ` (${details})` : ""}`,
|
|
172
|
+
modelUri: uri,
|
|
173
|
+
retryable: false,
|
|
174
|
+
suggestion: "Remove the file or run: gno models pull --force",
|
|
175
|
+
});
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
export function modelDownloadInterceptedError(
|
|
179
|
+
uri: string,
|
|
180
|
+
path: string,
|
|
181
|
+
owner: "cache" | "user"
|
|
182
|
+
): LlmError {
|
|
183
|
+
return llmError("MODEL_DOWNLOAD_INTERCEPTED", {
|
|
184
|
+
message:
|
|
185
|
+
`Model file looks like HTML instead of GGUF: ${path}. ` +
|
|
186
|
+
`A proxy, firewall, or captive portal likely intercepted the download.` +
|
|
187
|
+
(owner === "cache" ? " The cached file was removed." : ""),
|
|
188
|
+
modelUri: uri,
|
|
189
|
+
retryable: false,
|
|
190
|
+
suggestion:
|
|
191
|
+
"Check network access to Hugging Face, then run: gno models pull --force",
|
|
192
|
+
});
|
|
193
|
+
}
|
|
194
|
+
|
|
163
195
|
export function inferenceFailedError(uri: string, cause?: unknown): LlmError {
|
|
164
196
|
return llmError("INFERENCE_FAILED", {
|
|
165
197
|
message: `Inference failed for model: ${uri}`,
|
package/src/llm/lockfile.ts
CHANGED
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
* @module src/llm/lockfile
|
|
6
6
|
*/
|
|
7
7
|
|
|
8
|
-
import { open, rename, rm, stat } from "node:fs/promises";
|
|
8
|
+
import { open, readFile, rename, rm, stat } from "node:fs/promises";
|
|
9
9
|
// node:os: hostname and user for lock ownership
|
|
10
10
|
import { hostname, userInfo } from "node:os";
|
|
11
11
|
// node:path: join for manifest lock path
|
|
@@ -68,6 +68,48 @@ function sleep(ms: number): Promise<void> {
|
|
|
68
68
|
return new Promise((resolve) => setTimeout(resolve, ms));
|
|
69
69
|
}
|
|
70
70
|
|
|
71
|
+
async function readLockMeta(lockPath: string): Promise<LockMeta | null> {
|
|
72
|
+
try {
|
|
73
|
+
const parsed = JSON.parse(
|
|
74
|
+
await readFile(lockPath, "utf-8")
|
|
75
|
+
) as Partial<LockMeta>;
|
|
76
|
+
if (
|
|
77
|
+
typeof parsed.pid !== "number" ||
|
|
78
|
+
typeof parsed.hostname !== "string" ||
|
|
79
|
+
typeof parsed.user !== "string" ||
|
|
80
|
+
typeof parsed.createdAt !== "string"
|
|
81
|
+
) {
|
|
82
|
+
return null;
|
|
83
|
+
}
|
|
84
|
+
return {
|
|
85
|
+
pid: parsed.pid,
|
|
86
|
+
hostname: parsed.hostname,
|
|
87
|
+
user: parsed.user,
|
|
88
|
+
createdAt: parsed.createdAt,
|
|
89
|
+
};
|
|
90
|
+
} catch {
|
|
91
|
+
return null;
|
|
92
|
+
}
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
function isProcessAlive(pid: number): boolean {
|
|
96
|
+
if (!Number.isInteger(pid) || pid <= 0) {
|
|
97
|
+
return false;
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
try {
|
|
101
|
+
process.kill(pid, 0);
|
|
102
|
+
return true;
|
|
103
|
+
} catch (error) {
|
|
104
|
+
return (
|
|
105
|
+
error !== null &&
|
|
106
|
+
typeof error === "object" &&
|
|
107
|
+
"code" in error &&
|
|
108
|
+
error.code === "EPERM"
|
|
109
|
+
);
|
|
110
|
+
}
|
|
111
|
+
}
|
|
112
|
+
|
|
71
113
|
/**
|
|
72
114
|
* Check if a lockfile is stale (older than TTL or owner process dead).
|
|
73
115
|
*/
|
|
@@ -81,9 +123,12 @@ async function isLockStale(lockPath: string, ttlMs: number): Promise<boolean> {
|
|
|
81
123
|
return true;
|
|
82
124
|
}
|
|
83
125
|
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
126
|
+
const meta = await readLockMeta(lockPath);
|
|
127
|
+
if (!meta || meta.hostname !== hostname()) {
|
|
128
|
+
return false;
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
return !isProcessAlive(meta.pid);
|
|
87
132
|
} catch {
|
|
88
133
|
// Lock doesn't exist or can't be read
|
|
89
134
|
return true;
|
|
@@ -31,6 +31,12 @@ interface EmbeddingWorker {
|
|
|
31
31
|
pending: number;
|
|
32
32
|
}
|
|
33
33
|
|
|
34
|
+
interface TokenizingModel {
|
|
35
|
+
trainContextSize?: number;
|
|
36
|
+
tokenize(text: string): readonly number[];
|
|
37
|
+
detokenize(tokens: readonly number[]): string;
|
|
38
|
+
}
|
|
39
|
+
|
|
34
40
|
// ─────────────────────────────────────────────────────────────────────────────
|
|
35
41
|
// Constants
|
|
36
42
|
// ─────────────────────────────────────────────────────────────────────────────
|
|
@@ -51,6 +57,9 @@ export class NodeLlamaCppEmbedding implements EmbeddingPort {
|
|
|
51
57
|
null;
|
|
52
58
|
private lifecycleVersion = 0;
|
|
53
59
|
private dims: number | null = null;
|
|
60
|
+
private llamaModel: TokenizingModel | null = null;
|
|
61
|
+
private warnedSingleTruncation = false;
|
|
62
|
+
private warnedBatchTruncation = false;
|
|
54
63
|
private readonly manager: ModelManager;
|
|
55
64
|
readonly modelUri: string;
|
|
56
65
|
private readonly modelPath: string;
|
|
@@ -76,8 +85,12 @@ export class NodeLlamaCppEmbedding implements EmbeddingPort {
|
|
|
76
85
|
}
|
|
77
86
|
|
|
78
87
|
try {
|
|
88
|
+
const prepared = this.truncateForEmbedding(text, "single");
|
|
89
|
+
if (!prepared.ok) {
|
|
90
|
+
return { ok: false, error: prepared.error };
|
|
91
|
+
}
|
|
79
92
|
const embedding = await this.runOnWorker((worker) =>
|
|
80
|
-
worker.context.getEmbeddingFor(text)
|
|
93
|
+
worker.context.getEmbeddingFor(prepared.value.text)
|
|
81
94
|
);
|
|
82
95
|
const vector = Array.from(embedding.vector) as number[];
|
|
83
96
|
|
|
@@ -103,6 +116,15 @@ export class NodeLlamaCppEmbedding implements EmbeddingPort {
|
|
|
103
116
|
}
|
|
104
117
|
|
|
105
118
|
try {
|
|
119
|
+
const preparedTexts: string[] = [];
|
|
120
|
+
for (const text of texts) {
|
|
121
|
+
const prepared = this.truncateForEmbedding(text, "batch");
|
|
122
|
+
if (!prepared.ok) {
|
|
123
|
+
return { ok: false, error: prepared.error };
|
|
124
|
+
}
|
|
125
|
+
preparedTexts.push(prepared.value.text);
|
|
126
|
+
}
|
|
127
|
+
|
|
106
128
|
const allResults = Array.from(
|
|
107
129
|
{ length: texts.length },
|
|
108
130
|
() => [] as number[]
|
|
@@ -114,14 +136,14 @@ export class NodeLlamaCppEmbedding implements EmbeddingPort {
|
|
|
114
136
|
while (true) {
|
|
115
137
|
const index = nextIndex;
|
|
116
138
|
nextIndex += 1;
|
|
117
|
-
if (index >=
|
|
139
|
+
if (index >= preparedTexts.length) {
|
|
118
140
|
return;
|
|
119
141
|
}
|
|
120
142
|
|
|
121
143
|
const embedding = await this.runOnSpecificWorker(
|
|
122
144
|
worker,
|
|
123
145
|
(current) =>
|
|
124
|
-
current.context.getEmbeddingFor(
|
|
146
|
+
current.context.getEmbeddingFor(preparedTexts[index] as string)
|
|
125
147
|
);
|
|
126
148
|
allResults[index] = Array.from(embedding.vector) as number[];
|
|
127
149
|
}
|
|
@@ -263,6 +285,7 @@ export class NodeLlamaCppEmbedding implements EmbeddingPort {
|
|
|
263
285
|
|
|
264
286
|
try {
|
|
265
287
|
const llamaModel = model.value.model as LlamaModel;
|
|
288
|
+
this.llamaModel = llamaModel as TokenizingModel;
|
|
266
289
|
const llama = await this.manager.getLlama();
|
|
267
290
|
const lifecycleVersion = this.lifecycleVersion;
|
|
268
291
|
const targetPoolSize = this.resolveTargetPoolSize(llama);
|
|
@@ -321,4 +344,47 @@ export class NodeLlamaCppEmbedding implements EmbeddingPort {
|
|
|
321
344
|
return { ok: false, error: inferenceFailedError(this.modelUri, e) };
|
|
322
345
|
}
|
|
323
346
|
}
|
|
347
|
+
|
|
348
|
+
private truncateForEmbedding(
|
|
349
|
+
text: string,
|
|
350
|
+
mode: "single" | "batch"
|
|
351
|
+
): LlmResult<{ text: string }> {
|
|
352
|
+
const model = this.llamaModel;
|
|
353
|
+
const rawLimit =
|
|
354
|
+
typeof model?.trainContextSize === "number" &&
|
|
355
|
+
Number.isFinite(model.trainContextSize) &&
|
|
356
|
+
model.trainContextSize > 0
|
|
357
|
+
? Math.floor(model.trainContextSize)
|
|
358
|
+
: undefined;
|
|
359
|
+
if (!model || rawLimit === undefined) {
|
|
360
|
+
return { ok: true, value: { text } };
|
|
361
|
+
}
|
|
362
|
+
|
|
363
|
+
const limit = Math.max(1, rawLimit - 4);
|
|
364
|
+
try {
|
|
365
|
+
const tokens = model.tokenize(text);
|
|
366
|
+
if (tokens.length <= limit) {
|
|
367
|
+
return { ok: true, value: { text } };
|
|
368
|
+
}
|
|
369
|
+
|
|
370
|
+
const truncatedText = model.detokenize(tokens.slice(0, limit));
|
|
371
|
+
const shouldWarn =
|
|
372
|
+
mode === "single"
|
|
373
|
+
? !this.warnedSingleTruncation
|
|
374
|
+
: !this.warnedBatchTruncation;
|
|
375
|
+
if (shouldWarn) {
|
|
376
|
+
if (mode === "single") {
|
|
377
|
+
this.warnedSingleTruncation = true;
|
|
378
|
+
} else {
|
|
379
|
+
this.warnedBatchTruncation = true;
|
|
380
|
+
}
|
|
381
|
+
console.warn(
|
|
382
|
+
`[llama] Truncated embedding input from ${tokens.length} to ${limit} tokens`
|
|
383
|
+
);
|
|
384
|
+
}
|
|
385
|
+
return { ok: true, value: { text: truncatedText } };
|
|
386
|
+
} catch (error) {
|
|
387
|
+
return { ok: false, error: inferenceFailedError(this.modelUri, error) };
|
|
388
|
+
}
|
|
389
|
+
}
|
|
324
390
|
}
|