@gmickel/gno 1.2.1 → 1.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -1
- package/assets/skill/SKILL.md +3 -0
- package/assets/skill/cli-reference.md +5 -0
- package/assets/skill/examples.md +2 -0
- package/package.json +1 -1
- package/src/app/constants.ts +64 -8
- package/src/cli/commands/embed.ts +6 -2
- package/src/cli/commands/get.ts +15 -5
- package/src/cli/commands/index-cmd.ts +4 -0
- package/src/cli/commands/multi-get.ts +62 -1
- package/src/cli/commands/query.ts +8 -2
- package/src/cli/commands/search.ts +8 -2
- package/src/cli/commands/shared.ts +18 -1
- package/src/cli/commands/status.ts +4 -2
- package/src/cli/commands/update.ts +6 -1
- package/src/cli/commands/vsearch.ts +8 -2
- package/src/cli/format/search-results.ts +1 -1
- package/src/cli/program.ts +22 -1
- package/src/ingestion/chunker.ts +6 -0
- package/src/llm/cache.ts +133 -27
- package/src/llm/errors.ts +32 -0
- package/src/llm/nodeLlamaCpp/embedding.ts +69 -3
- package/src/llm/nodeLlamaCpp/lifecycle.ts +60 -4
- package/src/mcp/resources/index.ts +13 -4
- package/src/mcp/server.ts +2 -0
- package/src/mcp/tools/get.ts +7 -2
- package/src/mcp/tools/multi-get.ts +2 -2
- package/src/mcp/tools/query.ts +2 -1
- package/src/mcp/tools/search.ts +2 -1
- package/src/mcp/tools/vsearch.ts +2 -1
- package/src/pipeline/explain.ts +12 -2
- package/src/pipeline/hybrid.ts +9 -1
- package/src/pipeline/search.ts +1 -0
- package/src/pipeline/types.ts +2 -0
- package/src/pipeline/vsearch.ts +14 -8
- package/src/sdk/client.ts +83 -28
- package/src/store/sqlite/adapter.ts +3 -2
- package/src/store/vector/sqlite-vec.ts +10 -4
- package/src/store/vector/types.ts +2 -0
package/src/cli/program.ts
CHANGED
|
@@ -311,6 +311,7 @@ function wireSearchCommands(program: Command): void {
|
|
|
311
311
|
.action(async (queryText: string, cmdOpts: Record<string, unknown>) => {
|
|
312
312
|
const format = getFormat(cmdOpts);
|
|
313
313
|
assertFormatSupported(CMD.search, format);
|
|
314
|
+
const globals = getGlobals();
|
|
314
315
|
|
|
315
316
|
// Validate empty query
|
|
316
317
|
if (!queryText.trim()) {
|
|
@@ -348,6 +349,8 @@ function wireSearchCommands(program: Command): void {
|
|
|
348
349
|
|
|
349
350
|
const { search, formatSearch } = await import("./commands/search");
|
|
350
351
|
const result = await search(queryText, {
|
|
352
|
+
configPath: globals.config,
|
|
353
|
+
indexName: globals.index,
|
|
351
354
|
limit,
|
|
352
355
|
minScore,
|
|
353
356
|
collection: cmdOpts.collection as string | undefined,
|
|
@@ -425,6 +428,7 @@ function wireSearchCommands(program: Command): void {
|
|
|
425
428
|
.action(async (queryText: string, cmdOpts: Record<string, unknown>) => {
|
|
426
429
|
const format = getFormat(cmdOpts);
|
|
427
430
|
assertFormatSupported(CMD.vsearch, format);
|
|
431
|
+
const globals = getGlobals();
|
|
428
432
|
|
|
429
433
|
// Validate empty query
|
|
430
434
|
if (!queryText.trim()) {
|
|
@@ -462,6 +466,8 @@ function wireSearchCommands(program: Command): void {
|
|
|
462
466
|
|
|
463
467
|
const { vsearch, formatVsearch } = await import("./commands/vsearch");
|
|
464
468
|
const result = await vsearch(queryText, {
|
|
469
|
+
configPath: globals.config,
|
|
470
|
+
indexName: globals.index,
|
|
465
471
|
limit,
|
|
466
472
|
minScore,
|
|
467
473
|
collection: cmdOpts.collection as string | undefined,
|
|
@@ -631,6 +637,8 @@ function wireSearchCommands(program: Command): void {
|
|
|
631
637
|
|
|
632
638
|
const { query, formatQuery } = await import("./commands/query");
|
|
633
639
|
const result = await query(queryText, {
|
|
640
|
+
configPath: globals.config,
|
|
641
|
+
indexName: globals.index,
|
|
634
642
|
limit,
|
|
635
643
|
minScore,
|
|
636
644
|
collection: cmdOpts.collection as string | undefined,
|
|
@@ -885,6 +893,8 @@ function wireOnboardingCommands(program: Command): void {
|
|
|
885
893
|
const globals = getGlobals();
|
|
886
894
|
const { index, formatIndex } = await import("./commands/index-cmd");
|
|
887
895
|
const opts = {
|
|
896
|
+
configPath: globals.config,
|
|
897
|
+
indexName: globals.index,
|
|
888
898
|
collection,
|
|
889
899
|
noEmbed: cmdOpts.embed === false,
|
|
890
900
|
gitPull: Boolean(cmdOpts.gitPull),
|
|
@@ -911,7 +921,12 @@ function wireOnboardingCommands(program: Command): void {
|
|
|
911
921
|
assertFormatSupported(CMD.status, format);
|
|
912
922
|
|
|
913
923
|
const { status, formatStatus } = await import("./commands/status");
|
|
914
|
-
const
|
|
924
|
+
const globals = getGlobals();
|
|
925
|
+
const result = await status({
|
|
926
|
+
configPath: globals.config,
|
|
927
|
+
indexName: globals.index,
|
|
928
|
+
json: format === "json",
|
|
929
|
+
});
|
|
915
930
|
|
|
916
931
|
if (!result.success) {
|
|
917
932
|
throw new CliError("RUNTIME", result.error ?? "Status failed");
|
|
@@ -969,6 +984,7 @@ function wireRetrievalCommands(program: Command): void {
|
|
|
969
984
|
const { get, formatGet } = await import("./commands/get");
|
|
970
985
|
const result = await get(ref, {
|
|
971
986
|
configPath: globals.config,
|
|
987
|
+
indexName: globals.index,
|
|
972
988
|
from: cmdOpts.from as number | undefined,
|
|
973
989
|
limit: cmdOpts.limit as number | undefined,
|
|
974
990
|
lineNumbers: Boolean(cmdOpts.lineNumbers),
|
|
@@ -1014,6 +1030,7 @@ function wireRetrievalCommands(program: Command): void {
|
|
|
1014
1030
|
const { multiGet, formatMultiGet } = await import("./commands/multi-get");
|
|
1015
1031
|
const result = await multiGet(refs, {
|
|
1016
1032
|
configPath: globals.config,
|
|
1033
|
+
indexName: globals.index,
|
|
1017
1034
|
maxBytes: cmdOpts.maxBytes as number | undefined,
|
|
1018
1035
|
lineNumbers: Boolean(cmdOpts.lineNumbers),
|
|
1019
1036
|
json: format === "json",
|
|
@@ -1515,6 +1532,8 @@ function wireManagementCommands(program: Command): void {
|
|
|
1515
1532
|
const globals = getGlobals();
|
|
1516
1533
|
const { update, formatUpdate } = await import("./commands/update");
|
|
1517
1534
|
const opts = {
|
|
1535
|
+
configPath: globals.config,
|
|
1536
|
+
indexName: globals.index,
|
|
1518
1537
|
gitPull: Boolean(cmdOpts.gitPull),
|
|
1519
1538
|
verbose: globals.verbose,
|
|
1520
1539
|
};
|
|
@@ -1548,6 +1567,8 @@ function wireManagementCommands(program: Command): void {
|
|
|
1548
1567
|
const collection =
|
|
1549
1568
|
collectionArg ?? (cmdOpts.collection as string | undefined);
|
|
1550
1569
|
const opts = {
|
|
1570
|
+
configPath: globals.config,
|
|
1571
|
+
indexName: globals.index,
|
|
1551
1572
|
collection,
|
|
1552
1573
|
model: cmdOpts.model as string | undefined,
|
|
1553
1574
|
batchSize: parsePositiveInt("batch-size", cmdOpts.batchSize),
|
package/src/ingestion/chunker.ts
CHANGED
|
@@ -372,6 +372,12 @@ export class MarkdownChunker implements ChunkerPort {
|
|
|
372
372
|
// Find a good prose break point
|
|
373
373
|
findBreakPoint(markdown, targetEnd, windowSize);
|
|
374
374
|
}
|
|
375
|
+
if (endPos <= pos) {
|
|
376
|
+
endPos = Math.min(markdown.length, pos + maxChars);
|
|
377
|
+
}
|
|
378
|
+
if (endPos - pos > maxChars + windowSize) {
|
|
379
|
+
endPos = Math.min(markdown.length, pos + maxChars);
|
|
380
|
+
}
|
|
375
381
|
|
|
376
382
|
// Extract chunk text - preserve exactly (no trim!)
|
|
377
383
|
// This maintains accurate pos/line mappings and Markdown semantics
|
package/src/llm/cache.ts
CHANGED
|
@@ -13,6 +13,7 @@ import { isAbsolute, join } from "node:path";
|
|
|
13
13
|
// node:url: fileURLToPath for proper file:// URL handling
|
|
14
14
|
import { fileURLToPath } from "node:url";
|
|
15
15
|
|
|
16
|
+
import type { LlmError } from "./errors";
|
|
16
17
|
import type { DownloadPolicy } from "./policy";
|
|
17
18
|
import type {
|
|
18
19
|
DownloadProgress,
|
|
@@ -26,8 +27,10 @@ import { getModelsCachePath } from "../app/constants";
|
|
|
26
27
|
import {
|
|
27
28
|
autoDownloadDisabledError,
|
|
28
29
|
downloadFailedError,
|
|
30
|
+
invalidModelFileError,
|
|
29
31
|
invalidUriError,
|
|
30
32
|
lockFailedError,
|
|
33
|
+
modelDownloadInterceptedError,
|
|
31
34
|
modelNotCachedError,
|
|
32
35
|
modelNotFoundError,
|
|
33
36
|
} from "./errors";
|
|
@@ -40,6 +43,67 @@ import { getLockPath, getManifestLockPath, withLock } from "./lockfile";
|
|
|
40
43
|
// Regex patterns for URI parsing (top-level for performance)
|
|
41
44
|
const HF_QUANT_PATTERN = /^([^/]+)\/([^/:]+):(\w+)$/;
|
|
42
45
|
const HF_PATH_PATTERN = /^([^/]+)\/([^/]+)\/(.+\.gguf)$/;
|
|
46
|
+
const GGUF_MAGIC = new Uint8Array([0x47, 0x47, 0x55, 0x46]);
|
|
47
|
+
|
|
48
|
+
type ModelFileOwner = "cache" | "user";
|
|
49
|
+
|
|
50
|
+
type ValidatedCachedPath =
|
|
51
|
+
| { ok: true; path: string }
|
|
52
|
+
| { ok: false; kind: "missing" }
|
|
53
|
+
| { ok: false; kind: "invalid"; error: LlmError };
|
|
54
|
+
|
|
55
|
+
function looksLikeHtml(bytes: Uint8Array): boolean {
|
|
56
|
+
const text = new TextDecoder("utf-8", { fatal: false })
|
|
57
|
+
.decode(bytes)
|
|
58
|
+
.toLowerCase();
|
|
59
|
+
return (
|
|
60
|
+
text.includes("<!doctype") ||
|
|
61
|
+
text.includes("<html") ||
|
|
62
|
+
text.includes("<head") ||
|
|
63
|
+
text.includes("<body") ||
|
|
64
|
+
(text.includes("huggingface") && text.includes("<"))
|
|
65
|
+
);
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
function hasGgufMagic(bytes: Uint8Array): boolean {
|
|
69
|
+
return GGUF_MAGIC.every((value, index) => bytes[index] === value);
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
export async function validateGgufFile(
|
|
73
|
+
path: string,
|
|
74
|
+
uri: string,
|
|
75
|
+
owner: ModelFileOwner
|
|
76
|
+
): Promise<LlmResult<void>> {
|
|
77
|
+
const file = Bun.file(path);
|
|
78
|
+
const exists = await file.exists();
|
|
79
|
+
if (!exists) {
|
|
80
|
+
return {
|
|
81
|
+
ok: false,
|
|
82
|
+
error: modelNotFoundError(uri, `File not found: ${path}`),
|
|
83
|
+
};
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
const bytes = new Uint8Array(await file.slice(0, 512).arrayBuffer());
|
|
87
|
+
if (hasGgufMagic(bytes)) {
|
|
88
|
+
return { ok: true, value: undefined };
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
if (looksLikeHtml(bytes)) {
|
|
92
|
+
return {
|
|
93
|
+
ok: false,
|
|
94
|
+
error: modelDownloadInterceptedError(uri, path, owner),
|
|
95
|
+
};
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
return {
|
|
99
|
+
ok: false,
|
|
100
|
+
error: invalidModelFileError(
|
|
101
|
+
uri,
|
|
102
|
+
path,
|
|
103
|
+
bytes.length === 0 ? "empty file" : "missing GGUF magic header"
|
|
104
|
+
),
|
|
105
|
+
};
|
|
106
|
+
}
|
|
43
107
|
|
|
44
108
|
async function computeSha256(path: string): Promise<string> {
|
|
45
109
|
const hasher = new Bun.CryptoHasher("sha256");
|
|
@@ -233,13 +297,20 @@ export class ModelCache {
|
|
|
233
297
|
),
|
|
234
298
|
};
|
|
235
299
|
}
|
|
300
|
+
const validation = await validateGgufFile(parsed.value.file, uri, "user");
|
|
301
|
+
if (!validation.ok) {
|
|
302
|
+
return validation;
|
|
303
|
+
}
|
|
236
304
|
return { ok: true, value: parsed.value.file };
|
|
237
305
|
}
|
|
238
306
|
|
|
239
307
|
// HF models: check cache
|
|
240
|
-
const cached = await this.
|
|
241
|
-
if (cached) {
|
|
242
|
-
return { ok: true, value: cached };
|
|
308
|
+
const cached = await this.getValidatedCachedPath(uri);
|
|
309
|
+
if (cached.ok) {
|
|
310
|
+
return { ok: true, value: cached.path };
|
|
311
|
+
}
|
|
312
|
+
if (cached.kind === "invalid") {
|
|
313
|
+
return { ok: false, error: cached.error };
|
|
243
314
|
}
|
|
244
315
|
|
|
245
316
|
return { ok: false, error: modelNotCachedError(uri, type) };
|
|
@@ -272,6 +343,10 @@ export class ModelCache {
|
|
|
272
343
|
),
|
|
273
344
|
};
|
|
274
345
|
}
|
|
346
|
+
const validation = await validateGgufFile(parsed.value.file, uri, "user");
|
|
347
|
+
if (!validation.ok) {
|
|
348
|
+
return validation;
|
|
349
|
+
}
|
|
275
350
|
return { ok: true, value: parsed.value.file };
|
|
276
351
|
}
|
|
277
352
|
|
|
@@ -326,6 +401,14 @@ export class ModelCache {
|
|
|
326
401
|
: undefined,
|
|
327
402
|
});
|
|
328
403
|
|
|
404
|
+
const validation = await validateGgufFile(resolvedPath, uri, "cache");
|
|
405
|
+
if (!validation.ok) {
|
|
406
|
+
await rm(resolvedPath, { force: true }).catch(() => {
|
|
407
|
+
// Ignore deletion errors
|
|
408
|
+
});
|
|
409
|
+
return validation;
|
|
410
|
+
}
|
|
411
|
+
|
|
329
412
|
// Update manifest
|
|
330
413
|
await this.addToManifest(uri, type, resolvedPath);
|
|
331
414
|
|
|
@@ -351,9 +434,12 @@ export class ModelCache {
|
|
|
351
434
|
onProgress?: ProgressCallback
|
|
352
435
|
): Promise<LlmResult<string>> {
|
|
353
436
|
// Fast path: check if already cached
|
|
354
|
-
const cached = await this.
|
|
355
|
-
if (cached) {
|
|
356
|
-
return { ok: true, value: cached };
|
|
437
|
+
const cached = await this.getValidatedCachedPath(uri);
|
|
438
|
+
if (cached.ok) {
|
|
439
|
+
return { ok: true, value: cached.path };
|
|
440
|
+
}
|
|
441
|
+
if (cached.kind === "invalid") {
|
|
442
|
+
return { ok: false, error: cached.error };
|
|
357
443
|
}
|
|
358
444
|
|
|
359
445
|
// Parse and validate URI
|
|
@@ -374,6 +460,10 @@ export class ModelCache {
|
|
|
374
460
|
),
|
|
375
461
|
};
|
|
376
462
|
}
|
|
463
|
+
const validation = await validateGgufFile(parsed.value.file, uri, "user");
|
|
464
|
+
if (!validation.ok) {
|
|
465
|
+
return validation;
|
|
466
|
+
}
|
|
377
467
|
return { ok: true, value: parsed.value.file };
|
|
378
468
|
}
|
|
379
469
|
|
|
@@ -397,9 +487,12 @@ export class ModelCache {
|
|
|
397
487
|
|
|
398
488
|
const result = await withLock(lockPath, async () => {
|
|
399
489
|
// Double-check: another process may have downloaded while we waited
|
|
400
|
-
const cachedNow = await this.
|
|
401
|
-
if (cachedNow) {
|
|
402
|
-
return { ok: true as const, value: cachedNow };
|
|
490
|
+
const cachedNow = await this.getValidatedCachedPath(uri);
|
|
491
|
+
if (cachedNow.ok) {
|
|
492
|
+
return { ok: true as const, value: cachedNow.path };
|
|
493
|
+
}
|
|
494
|
+
if (cachedNow.kind === "invalid") {
|
|
495
|
+
return { ok: false as const, error: cachedNow.error };
|
|
403
496
|
}
|
|
404
497
|
|
|
405
498
|
// Download with progress
|
|
@@ -433,26 +526,12 @@ export class ModelCache {
|
|
|
433
526
|
// Handle file: URIs directly (check filesystem, not manifest)
|
|
434
527
|
const parsed = parseModelUri(uri);
|
|
435
528
|
if (parsed.ok && parsed.value.scheme === "file") {
|
|
436
|
-
const
|
|
437
|
-
return
|
|
529
|
+
const validation = await validateGgufFile(parsed.value.file, uri, "user");
|
|
530
|
+
return validation.ok ? parsed.value.file : null;
|
|
438
531
|
}
|
|
439
532
|
|
|
440
|
-
|
|
441
|
-
|
|
442
|
-
const entry = manifest.models.find((m) => m.uri === uri);
|
|
443
|
-
if (!entry) {
|
|
444
|
-
return null;
|
|
445
|
-
}
|
|
446
|
-
|
|
447
|
-
// Verify file still exists
|
|
448
|
-
const exists = await this.fileExists(entry.path);
|
|
449
|
-
if (!exists) {
|
|
450
|
-
// Remove stale entry
|
|
451
|
-
await this.removeFromManifest(uri);
|
|
452
|
-
return null;
|
|
453
|
-
}
|
|
454
|
-
|
|
455
|
-
return entry.path;
|
|
533
|
+
const cached = await this.getValidatedCachedPath(uri);
|
|
534
|
+
return cached.ok ? cached.path : null;
|
|
456
535
|
}
|
|
457
536
|
|
|
458
537
|
/**
|
|
@@ -514,6 +593,33 @@ export class ModelCache {
|
|
|
514
593
|
}
|
|
515
594
|
}
|
|
516
595
|
|
|
596
|
+
private async getValidatedCachedPath(
|
|
597
|
+
uri: string
|
|
598
|
+
): Promise<ValidatedCachedPath> {
|
|
599
|
+
const manifest = await this.loadManifest();
|
|
600
|
+
const entry = manifest.models.find((m) => m.uri === uri);
|
|
601
|
+
if (!entry) {
|
|
602
|
+
return { ok: false, kind: "missing" };
|
|
603
|
+
}
|
|
604
|
+
|
|
605
|
+
const exists = await this.fileExists(entry.path);
|
|
606
|
+
if (!exists) {
|
|
607
|
+
await this.removeFromManifest(uri);
|
|
608
|
+
return { ok: false, kind: "missing" };
|
|
609
|
+
}
|
|
610
|
+
|
|
611
|
+
const validation = await validateGgufFile(entry.path, uri, "cache");
|
|
612
|
+
if (validation.ok) {
|
|
613
|
+
return { ok: true, path: entry.path };
|
|
614
|
+
}
|
|
615
|
+
|
|
616
|
+
await rm(entry.path, { force: true }).catch(() => {
|
|
617
|
+
// Ignore deletion errors
|
|
618
|
+
});
|
|
619
|
+
await this.removeFromManifest(uri);
|
|
620
|
+
return { ok: false, kind: "invalid", error: validation.error };
|
|
621
|
+
}
|
|
622
|
+
|
|
517
623
|
private async loadManifest(): Promise<Manifest> {
|
|
518
624
|
if (this.manifest) {
|
|
519
625
|
return this.manifest;
|
package/src/llm/errors.ts
CHANGED
|
@@ -15,6 +15,8 @@ export type LlmErrorCode =
|
|
|
15
15
|
| "MODEL_DOWNLOAD_FAILED"
|
|
16
16
|
| "MODEL_LOAD_FAILED"
|
|
17
17
|
| "MODEL_CORRUPTED"
|
|
18
|
+
| "INVALID_MODEL_FILE"
|
|
19
|
+
| "MODEL_DOWNLOAD_INTERCEPTED"
|
|
18
20
|
| "INFERENCE_FAILED"
|
|
19
21
|
| "TIMEOUT"
|
|
20
22
|
| "OUT_OF_MEMORY"
|
|
@@ -160,6 +162,36 @@ export function corruptedError(uri: string, cause?: unknown): LlmError {
|
|
|
160
162
|
});
|
|
161
163
|
}
|
|
162
164
|
|
|
165
|
+
export function invalidModelFileError(
|
|
166
|
+
uri: string,
|
|
167
|
+
path: string,
|
|
168
|
+
details?: string
|
|
169
|
+
): LlmError {
|
|
170
|
+
return llmError("INVALID_MODEL_FILE", {
|
|
171
|
+
message: `Model file is not a GGUF file: ${path}${details ? ` (${details})` : ""}`,
|
|
172
|
+
modelUri: uri,
|
|
173
|
+
retryable: false,
|
|
174
|
+
suggestion: "Remove the file or run: gno models pull --force",
|
|
175
|
+
});
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
export function modelDownloadInterceptedError(
|
|
179
|
+
uri: string,
|
|
180
|
+
path: string,
|
|
181
|
+
owner: "cache" | "user"
|
|
182
|
+
): LlmError {
|
|
183
|
+
return llmError("MODEL_DOWNLOAD_INTERCEPTED", {
|
|
184
|
+
message:
|
|
185
|
+
`Model file looks like HTML instead of GGUF: ${path}. ` +
|
|
186
|
+
`A proxy, firewall, or captive portal likely intercepted the download.` +
|
|
187
|
+
(owner === "cache" ? " The cached file was removed." : ""),
|
|
188
|
+
modelUri: uri,
|
|
189
|
+
retryable: false,
|
|
190
|
+
suggestion:
|
|
191
|
+
"Check network access to Hugging Face, then run: gno models pull --force",
|
|
192
|
+
});
|
|
193
|
+
}
|
|
194
|
+
|
|
163
195
|
export function inferenceFailedError(uri: string, cause?: unknown): LlmError {
|
|
164
196
|
return llmError("INFERENCE_FAILED", {
|
|
165
197
|
message: `Inference failed for model: ${uri}`,
|
|
@@ -31,6 +31,12 @@ interface EmbeddingWorker {
|
|
|
31
31
|
pending: number;
|
|
32
32
|
}
|
|
33
33
|
|
|
34
|
+
interface TokenizingModel {
|
|
35
|
+
trainContextSize?: number;
|
|
36
|
+
tokenize(text: string): readonly number[];
|
|
37
|
+
detokenize(tokens: readonly number[]): string;
|
|
38
|
+
}
|
|
39
|
+
|
|
34
40
|
// ─────────────────────────────────────────────────────────────────────────────
|
|
35
41
|
// Constants
|
|
36
42
|
// ─────────────────────────────────────────────────────────────────────────────
|
|
@@ -51,6 +57,9 @@ export class NodeLlamaCppEmbedding implements EmbeddingPort {
|
|
|
51
57
|
null;
|
|
52
58
|
private lifecycleVersion = 0;
|
|
53
59
|
private dims: number | null = null;
|
|
60
|
+
private llamaModel: TokenizingModel | null = null;
|
|
61
|
+
private warnedSingleTruncation = false;
|
|
62
|
+
private warnedBatchTruncation = false;
|
|
54
63
|
private readonly manager: ModelManager;
|
|
55
64
|
readonly modelUri: string;
|
|
56
65
|
private readonly modelPath: string;
|
|
@@ -76,8 +85,12 @@ export class NodeLlamaCppEmbedding implements EmbeddingPort {
|
|
|
76
85
|
}
|
|
77
86
|
|
|
78
87
|
try {
|
|
88
|
+
const prepared = this.truncateForEmbedding(text, "single");
|
|
89
|
+
if (!prepared.ok) {
|
|
90
|
+
return { ok: false, error: prepared.error };
|
|
91
|
+
}
|
|
79
92
|
const embedding = await this.runOnWorker((worker) =>
|
|
80
|
-
worker.context.getEmbeddingFor(text)
|
|
93
|
+
worker.context.getEmbeddingFor(prepared.value.text)
|
|
81
94
|
);
|
|
82
95
|
const vector = Array.from(embedding.vector) as number[];
|
|
83
96
|
|
|
@@ -103,6 +116,15 @@ export class NodeLlamaCppEmbedding implements EmbeddingPort {
|
|
|
103
116
|
}
|
|
104
117
|
|
|
105
118
|
try {
|
|
119
|
+
const preparedTexts: string[] = [];
|
|
120
|
+
for (const text of texts) {
|
|
121
|
+
const prepared = this.truncateForEmbedding(text, "batch");
|
|
122
|
+
if (!prepared.ok) {
|
|
123
|
+
return { ok: false, error: prepared.error };
|
|
124
|
+
}
|
|
125
|
+
preparedTexts.push(prepared.value.text);
|
|
126
|
+
}
|
|
127
|
+
|
|
106
128
|
const allResults = Array.from(
|
|
107
129
|
{ length: texts.length },
|
|
108
130
|
() => [] as number[]
|
|
@@ -114,14 +136,14 @@ export class NodeLlamaCppEmbedding implements EmbeddingPort {
|
|
|
114
136
|
while (true) {
|
|
115
137
|
const index = nextIndex;
|
|
116
138
|
nextIndex += 1;
|
|
117
|
-
if (index >=
|
|
139
|
+
if (index >= preparedTexts.length) {
|
|
118
140
|
return;
|
|
119
141
|
}
|
|
120
142
|
|
|
121
143
|
const embedding = await this.runOnSpecificWorker(
|
|
122
144
|
worker,
|
|
123
145
|
(current) =>
|
|
124
|
-
current.context.getEmbeddingFor(
|
|
146
|
+
current.context.getEmbeddingFor(preparedTexts[index] as string)
|
|
125
147
|
);
|
|
126
148
|
allResults[index] = Array.from(embedding.vector) as number[];
|
|
127
149
|
}
|
|
@@ -263,6 +285,7 @@ export class NodeLlamaCppEmbedding implements EmbeddingPort {
|
|
|
263
285
|
|
|
264
286
|
try {
|
|
265
287
|
const llamaModel = model.value.model as LlamaModel;
|
|
288
|
+
this.llamaModel = llamaModel as TokenizingModel;
|
|
266
289
|
const llama = await this.manager.getLlama();
|
|
267
290
|
const lifecycleVersion = this.lifecycleVersion;
|
|
268
291
|
const targetPoolSize = this.resolveTargetPoolSize(llama);
|
|
@@ -321,4 +344,47 @@ export class NodeLlamaCppEmbedding implements EmbeddingPort {
|
|
|
321
344
|
return { ok: false, error: inferenceFailedError(this.modelUri, e) };
|
|
322
345
|
}
|
|
323
346
|
}
|
|
347
|
+
|
|
348
|
+
private truncateForEmbedding(
|
|
349
|
+
text: string,
|
|
350
|
+
mode: "single" | "batch"
|
|
351
|
+
): LlmResult<{ text: string }> {
|
|
352
|
+
const model = this.llamaModel;
|
|
353
|
+
const rawLimit =
|
|
354
|
+
typeof model?.trainContextSize === "number" &&
|
|
355
|
+
Number.isFinite(model.trainContextSize) &&
|
|
356
|
+
model.trainContextSize > 0
|
|
357
|
+
? Math.floor(model.trainContextSize)
|
|
358
|
+
: undefined;
|
|
359
|
+
if (!model || rawLimit === undefined) {
|
|
360
|
+
return { ok: true, value: { text } };
|
|
361
|
+
}
|
|
362
|
+
|
|
363
|
+
const limit = Math.max(1, rawLimit - 4);
|
|
364
|
+
try {
|
|
365
|
+
const tokens = model.tokenize(text);
|
|
366
|
+
if (tokens.length <= limit) {
|
|
367
|
+
return { ok: true, value: { text } };
|
|
368
|
+
}
|
|
369
|
+
|
|
370
|
+
const truncatedText = model.detokenize(tokens.slice(0, limit));
|
|
371
|
+
const shouldWarn =
|
|
372
|
+
mode === "single"
|
|
373
|
+
? !this.warnedSingleTruncation
|
|
374
|
+
: !this.warnedBatchTruncation;
|
|
375
|
+
if (shouldWarn) {
|
|
376
|
+
if (mode === "single") {
|
|
377
|
+
this.warnedSingleTruncation = true;
|
|
378
|
+
} else {
|
|
379
|
+
this.warnedBatchTruncation = true;
|
|
380
|
+
}
|
|
381
|
+
console.warn(
|
|
382
|
+
`[llama] Truncated embedding input from ${tokens.length} to ${limit} tokens`
|
|
383
|
+
);
|
|
384
|
+
}
|
|
385
|
+
return { ok: true, value: { text: truncatedText } };
|
|
386
|
+
} catch (error) {
|
|
387
|
+
return { ok: false, error: inferenceFailedError(this.modelUri, error) };
|
|
388
|
+
}
|
|
389
|
+
}
|
|
324
390
|
}
|
|
@@ -16,6 +16,7 @@ import { loadFailedError, outOfMemoryError, timeoutError } from "../errors";
|
|
|
16
16
|
|
|
17
17
|
type Llama = Awaited<ReturnType<typeof import("node-llama-cpp").getLlama>>;
|
|
18
18
|
type LlamaModel = Awaited<ReturnType<Llama["loadModel"]>>;
|
|
19
|
+
export type LlamaGpuMode = "auto" | "metal" | "vulkan" | "cuda" | false;
|
|
19
20
|
|
|
20
21
|
interface CachedModel {
|
|
21
22
|
uri: string;
|
|
@@ -24,6 +25,40 @@ interface CachedModel {
|
|
|
24
25
|
loadedAt: number;
|
|
25
26
|
}
|
|
26
27
|
|
|
28
|
+
let invalidGpuModeWarned = false;
|
|
29
|
+
let gpuFallbackWarned = false;
|
|
30
|
+
|
|
31
|
+
export function resolveLlamaGpuMode(
|
|
32
|
+
env: NodeJS.ProcessEnv = process.env
|
|
33
|
+
): LlamaGpuMode {
|
|
34
|
+
const raw = (env.GNO_LLAMA_GPU ?? env.NODE_LLAMA_CPP_GPU ?? "auto")
|
|
35
|
+
.trim()
|
|
36
|
+
.toLowerCase();
|
|
37
|
+
if (!raw || raw === "auto") {
|
|
38
|
+
return "auto";
|
|
39
|
+
}
|
|
40
|
+
if (raw === "metal" || raw === "vulkan" || raw === "cuda") {
|
|
41
|
+
return raw;
|
|
42
|
+
}
|
|
43
|
+
if (
|
|
44
|
+
raw === "false" ||
|
|
45
|
+
raw === "off" ||
|
|
46
|
+
raw === "none" ||
|
|
47
|
+
raw === "disable" ||
|
|
48
|
+
raw === "disabled" ||
|
|
49
|
+
raw === "0"
|
|
50
|
+
) {
|
|
51
|
+
return false;
|
|
52
|
+
}
|
|
53
|
+
if (!invalidGpuModeWarned) {
|
|
54
|
+
invalidGpuModeWarned = true;
|
|
55
|
+
console.warn(
|
|
56
|
+
`[llama] Invalid GNO_LLAMA_GPU/NODE_LLAMA_CPP_GPU value "${raw}", using auto`
|
|
57
|
+
);
|
|
58
|
+
}
|
|
59
|
+
return "auto";
|
|
60
|
+
}
|
|
61
|
+
|
|
27
62
|
// ─────────────────────────────────────────────────────────────────────────────
|
|
28
63
|
// ModelManager
|
|
29
64
|
// ─────────────────────────────────────────────────────────────────────────────
|
|
@@ -48,11 +83,32 @@ export class ModelManager {
|
|
|
48
83
|
async getLlama(): Promise<Llama> {
|
|
49
84
|
if (!this.llama) {
|
|
50
85
|
const { getLlama, LlamaLogLevel } = await import("node-llama-cpp");
|
|
86
|
+
const gpu = resolveLlamaGpuMode();
|
|
51
87
|
// Suppress model loading warnings (vocab tokens, pooling type)
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
88
|
+
try {
|
|
89
|
+
this.llama = await getLlama({
|
|
90
|
+
build: "autoAttempt",
|
|
91
|
+
gpu,
|
|
92
|
+
logLevel: LlamaLogLevel.error,
|
|
93
|
+
});
|
|
94
|
+
} catch (error) {
|
|
95
|
+
if (gpu === "auto" || gpu === false) {
|
|
96
|
+
throw error;
|
|
97
|
+
}
|
|
98
|
+
if (!gpuFallbackWarned) {
|
|
99
|
+
gpuFallbackWarned = true;
|
|
100
|
+
console.warn(
|
|
101
|
+
`[llama] GPU backend "${gpu}" failed, retrying with CPU: ${
|
|
102
|
+
error instanceof Error ? error.message : String(error)
|
|
103
|
+
}`
|
|
104
|
+
);
|
|
105
|
+
}
|
|
106
|
+
this.llama = await getLlama({
|
|
107
|
+
build: "autoAttempt",
|
|
108
|
+
gpu: false,
|
|
109
|
+
logLevel: LlamaLogLevel.error,
|
|
110
|
+
});
|
|
111
|
+
}
|
|
56
112
|
}
|
|
57
113
|
return this.llama;
|
|
58
114
|
}
|
|
@@ -13,7 +13,12 @@ import { join as pathJoin } from "node:path";
|
|
|
13
13
|
import type { DocumentRow, TagCount } from "../../store/types";
|
|
14
14
|
import type { ToolContext } from "../server";
|
|
15
15
|
|
|
16
|
-
import {
|
|
16
|
+
import {
|
|
17
|
+
buildUri,
|
|
18
|
+
decorateUriForIndex,
|
|
19
|
+
parseUri,
|
|
20
|
+
URI_PREFIX,
|
|
21
|
+
} from "../../app/constants";
|
|
17
22
|
import { MCP_ERRORS } from "../../core/errors";
|
|
18
23
|
import { normalizeTag, validateTag } from "../../core/tags";
|
|
19
24
|
import { normalizeCollectionName } from "../../core/validation";
|
|
@@ -64,7 +69,8 @@ function formatResourceContent(
|
|
|
64
69
|
const langLine = doc.languageHint
|
|
65
70
|
? `\n language: ${doc.languageHint}`
|
|
66
71
|
: "";
|
|
67
|
-
const
|
|
72
|
+
const displayUri = decorateUriForIndex(doc.uri, ctx.indexName);
|
|
73
|
+
const header = `<!-- ${displayUri}
|
|
68
74
|
docid: ${doc.docid}
|
|
69
75
|
source: ${absPath}
|
|
70
76
|
mime: ${doc.sourceMime}${langLine}
|
|
@@ -94,7 +100,7 @@ export function registerResources(server: McpServer, ctx: ToolContext): void {
|
|
|
94
100
|
|
|
95
101
|
return {
|
|
96
102
|
resources: listResult.value.map((doc) => ({
|
|
97
|
-
uri: doc.uri,
|
|
103
|
+
uri: decorateUriForIndex(doc.uri, ctx.indexName),
|
|
98
104
|
name: doc.relPath,
|
|
99
105
|
mimeType: doc.sourceMime || "text/markdown",
|
|
100
106
|
description: doc.title ?? undefined,
|
|
@@ -160,7 +166,10 @@ export function registerResources(server: McpServer, ctx: ToolContext): void {
|
|
|
160
166
|
const formattedContent = formatResourceContent(doc, content, ctx);
|
|
161
167
|
|
|
162
168
|
// Build canonical URI
|
|
163
|
-
const canonicalUri =
|
|
169
|
+
const canonicalUri = decorateUriForIndex(
|
|
170
|
+
buildUri(collection, path),
|
|
171
|
+
parsed.indexName ?? ctx.indexName
|
|
172
|
+
);
|
|
164
173
|
|
|
165
174
|
return {
|
|
166
175
|
contents: [
|
package/src/mcp/server.ts
CHANGED
|
@@ -57,6 +57,7 @@ export interface ToolContext {
|
|
|
57
57
|
config: Config;
|
|
58
58
|
collections: Collection[];
|
|
59
59
|
actualConfigPath: string;
|
|
60
|
+
indexName?: string;
|
|
60
61
|
toolMutex: Mutex;
|
|
61
62
|
jobManager: JobManager;
|
|
62
63
|
serverInstanceId: string;
|
|
@@ -164,6 +165,7 @@ export async function startMcpServer(options: McpServerOptions): Promise<void> {
|
|
|
164
165
|
config,
|
|
165
166
|
collections,
|
|
166
167
|
actualConfigPath,
|
|
168
|
+
indexName: options.indexName,
|
|
167
169
|
toolMutex,
|
|
168
170
|
jobManager,
|
|
169
171
|
serverInstanceId,
|
package/src/mcp/tools/get.ts
CHANGED
|
@@ -9,7 +9,7 @@ import { join as pathJoin } from "node:path";
|
|
|
9
9
|
import type { DocumentRow, StorePort } from "../../store/types";
|
|
10
10
|
import type { ToolContext } from "../server";
|
|
11
11
|
|
|
12
|
-
import { parseUri } from "../../app/constants";
|
|
12
|
+
import { decorateUriForIndex, parseUri } from "../../app/constants";
|
|
13
13
|
import { parseRef } from "../../cli/commands/ref-parser";
|
|
14
14
|
import {
|
|
15
15
|
getDocumentCapabilities,
|
|
@@ -196,7 +196,12 @@ export function handleGet(
|
|
|
196
196
|
|
|
197
197
|
const response: GetResponse = {
|
|
198
198
|
docid: doc.docid,
|
|
199
|
-
uri:
|
|
199
|
+
uri: decorateUriForIndex(
|
|
200
|
+
doc.uri,
|
|
201
|
+
parsed.type === "uri"
|
|
202
|
+
? (parseUri(parsed.value)?.indexName ?? ctx.indexName)
|
|
203
|
+
: ctx.indexName
|
|
204
|
+
),
|
|
200
205
|
title: doc.title ?? undefined,
|
|
201
206
|
content,
|
|
202
207
|
totalLines,
|