@gmickel/gno 0.22.6 → 0.25.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +76 -3
- package/package.json +14 -2
- package/src/cli/AGENTS.md +2 -1
- package/src/cli/CLAUDE.md +2 -1
- package/src/cli/commands/ask.ts +33 -14
- package/src/cli/commands/models/clear.ts +10 -3
- package/src/cli/commands/models/list.ts +17 -4
- package/src/cli/commands/models/pull.ts +15 -7
- package/src/cli/commands/query.ts +13 -10
- package/src/cli/program.ts +76 -43
- package/src/config/types.ts +8 -1
- package/src/core/depth-policy.ts +78 -0
- package/src/core/structured-query.ts +198 -0
- package/src/llm/errors.ts +1 -1
- package/src/llm/nodeLlamaCpp/adapter.ts +39 -3
- package/src/llm/registry.ts +21 -0
- package/src/llm/types.ts +1 -1
- package/src/mcp/tools/index.ts +2 -2
- package/src/mcp/tools/query.ts +42 -40
- package/src/pipeline/hybrid.ts +4 -4
- package/src/pipeline/query-modes.ts +17 -12
- package/src/sdk/client.ts +624 -0
- package/src/sdk/documents.ts +348 -0
- package/src/sdk/embed.ts +287 -0
- package/src/sdk/errors.ts +42 -0
- package/src/sdk/index.ts +51 -0
- package/src/sdk/types.ts +138 -0
- package/src/serve/AGENTS.md +2 -1
- package/src/serve/CLAUDE.md +2 -1
- package/src/serve/context.ts +23 -9
- package/src/serve/public/app.tsx +8 -0
- package/src/serve/public/components/AIModelSelector.tsx +48 -10
- package/src/serve/public/globals.built.css +1 -1
- package/src/serve/public/pages/Ask.tsx +109 -41
- package/src/serve/public/pages/Browse.tsx +141 -5
- package/src/serve/public/pages/Collections.tsx +135 -38
- package/src/serve/public/pages/Dashboard.tsx +31 -4
- package/src/serve/public/pages/GraphView.tsx +24 -0
- package/src/serve/public/pages/Search.tsx +125 -36
- package/src/serve/routes/api.ts +73 -20
|
@@ -0,0 +1,348 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* SDK document retrieval helpers.
|
|
3
|
+
*
|
|
4
|
+
* @module src/sdk/documents
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
import { minimatch } from "minimatch";
|
|
8
|
+
|
|
9
|
+
import type { GetResponse } from "../cli/commands/get";
|
|
10
|
+
import type { LsResponse } from "../cli/commands/ls";
|
|
11
|
+
import type {
|
|
12
|
+
MultiGetDocument,
|
|
13
|
+
MultiGetResponse,
|
|
14
|
+
SkippedDoc,
|
|
15
|
+
} from "../cli/commands/multi-get";
|
|
16
|
+
import type { ParsedRef } from "../cli/commands/ref-parser";
|
|
17
|
+
import type { Config } from "../config/types";
|
|
18
|
+
import type { DocumentRow, StorePort } from "../store/types";
|
|
19
|
+
import type {
|
|
20
|
+
GnoGetOptions,
|
|
21
|
+
GnoListOptions,
|
|
22
|
+
GnoMultiGetOptions,
|
|
23
|
+
} from "./types";
|
|
24
|
+
|
|
25
|
+
import { isGlobPattern, parseRef, splitRefs } from "../cli/commands/ref-parser";
|
|
26
|
+
import { sdkError } from "./errors";
|
|
27
|
+
|
|
28
|
+
const URI_PREFIX_PATTERN = /^gno:\/\/[^/]+\//;
|
|
29
|
+
const encoder = new TextEncoder();
|
|
30
|
+
|
|
31
|
+
function lookupDocument(store: StorePort, parsed: ParsedRef) {
|
|
32
|
+
switch (parsed.type) {
|
|
33
|
+
case "docid":
|
|
34
|
+
return store.getDocumentByDocid(parsed.value);
|
|
35
|
+
case "uri":
|
|
36
|
+
return store.getDocumentByUri(parsed.value);
|
|
37
|
+
case "collPath":
|
|
38
|
+
if (!(parsed.collection && parsed.relPath)) {
|
|
39
|
+
return Promise.resolve({ ok: true as const, value: null });
|
|
40
|
+
}
|
|
41
|
+
return store.getDocument(parsed.collection, parsed.relPath);
|
|
42
|
+
}
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
function buildSourceMeta(
|
|
46
|
+
doc: DocumentRow,
|
|
47
|
+
config: Config
|
|
48
|
+
): GetResponse["source"] {
|
|
49
|
+
const coll = config.collections.find((c) => c.name === doc.collection);
|
|
50
|
+
return {
|
|
51
|
+
absPath: coll ? `${coll.path}/${doc.relPath}` : undefined,
|
|
52
|
+
relPath: doc.relPath,
|
|
53
|
+
mime: doc.sourceMime,
|
|
54
|
+
ext: doc.sourceExt,
|
|
55
|
+
sizeBytes: doc.sourceSize,
|
|
56
|
+
sourceHash: doc.sourceHash,
|
|
57
|
+
};
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
function buildConversionMeta(
|
|
61
|
+
doc: DocumentRow
|
|
62
|
+
): GetResponse["conversion"] | undefined {
|
|
63
|
+
if (!doc.converterId) {
|
|
64
|
+
return;
|
|
65
|
+
}
|
|
66
|
+
return {
|
|
67
|
+
converterId: doc.converterId ?? undefined,
|
|
68
|
+
converterVersion: doc.converterVersion ?? undefined,
|
|
69
|
+
mirrorHash: doc.mirrorHash ?? undefined,
|
|
70
|
+
};
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
export async function getDocumentByRef(
|
|
74
|
+
store: StorePort,
|
|
75
|
+
config: Config,
|
|
76
|
+
ref: string,
|
|
77
|
+
options: GnoGetOptions = {}
|
|
78
|
+
): Promise<GetResponse> {
|
|
79
|
+
if (options.from !== undefined && options.from <= 0) {
|
|
80
|
+
throw sdkError("VALIDATION", "--from must be a positive integer");
|
|
81
|
+
}
|
|
82
|
+
if (options.limit !== undefined && options.limit < 0) {
|
|
83
|
+
throw sdkError("VALIDATION", "limit cannot be negative");
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
const parsed = parseRef(ref);
|
|
87
|
+
if ("error" in parsed) {
|
|
88
|
+
throw sdkError("VALIDATION", parsed.error);
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
const docResult = await lookupDocument(store, parsed);
|
|
92
|
+
if (!docResult.ok) {
|
|
93
|
+
throw sdkError("STORE", docResult.error.message, {
|
|
94
|
+
cause: docResult.error.cause,
|
|
95
|
+
});
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
const doc = docResult.value;
|
|
99
|
+
if (!doc?.active) {
|
|
100
|
+
throw sdkError("NOT_FOUND", "Document not found");
|
|
101
|
+
}
|
|
102
|
+
if (!doc.mirrorHash) {
|
|
103
|
+
throw sdkError("RUNTIME", "Mirror content unavailable (conversion error)");
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
const contentResult = await store.getContent(doc.mirrorHash);
|
|
107
|
+
if (!contentResult.ok || contentResult.value === null) {
|
|
108
|
+
throw sdkError("RUNTIME", "Mirror content unavailable", {
|
|
109
|
+
cause: !contentResult.ok ? contentResult.error.cause : undefined,
|
|
110
|
+
});
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
const lines = contentResult.value.split("\n");
|
|
114
|
+
const totalLines = lines.length;
|
|
115
|
+
if (options.limit === 0) {
|
|
116
|
+
return {
|
|
117
|
+
docid: doc.docid,
|
|
118
|
+
uri: doc.uri,
|
|
119
|
+
title: doc.title ?? undefined,
|
|
120
|
+
content: "",
|
|
121
|
+
totalLines,
|
|
122
|
+
language: doc.languageHint ?? undefined,
|
|
123
|
+
source: buildSourceMeta(doc, config),
|
|
124
|
+
conversion: buildConversionMeta(doc),
|
|
125
|
+
};
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
const startLine = options.from ?? parsed.line ?? 1;
|
|
129
|
+
const limit = options.limit ?? totalLines;
|
|
130
|
+
const clampedStart = Math.max(1, Math.min(startLine, totalLines));
|
|
131
|
+
const clampedEnd = Math.min(clampedStart + limit - 1, totalLines);
|
|
132
|
+
const selectedLines = lines.slice(clampedStart - 1, clampedEnd);
|
|
133
|
+
const isPartial = clampedStart > 1 || clampedEnd < totalLines;
|
|
134
|
+
|
|
135
|
+
return {
|
|
136
|
+
docid: doc.docid,
|
|
137
|
+
uri: doc.uri,
|
|
138
|
+
title: doc.title ?? undefined,
|
|
139
|
+
content: selectedLines.join("\n"),
|
|
140
|
+
totalLines,
|
|
141
|
+
returnedLines: isPartial
|
|
142
|
+
? { start: clampedStart, end: clampedEnd }
|
|
143
|
+
: undefined,
|
|
144
|
+
language: doc.languageHint ?? undefined,
|
|
145
|
+
source: buildSourceMeta(doc, config),
|
|
146
|
+
conversion: buildConversionMeta(doc),
|
|
147
|
+
};
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
function truncateContent(
|
|
151
|
+
content: string,
|
|
152
|
+
maxBytes: number
|
|
153
|
+
): { content: string; truncated: boolean } {
|
|
154
|
+
if (encoder.encode(content).length <= maxBytes) {
|
|
155
|
+
return { content, truncated: false };
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
const lines = content.split("\n");
|
|
159
|
+
let accumulated = "";
|
|
160
|
+
let byteLen = 0;
|
|
161
|
+
for (const line of lines) {
|
|
162
|
+
const lineBytes = encoder.encode(`${line}\n`).length;
|
|
163
|
+
if (byteLen + lineBytes > maxBytes) {
|
|
164
|
+
return { content: accumulated.trimEnd(), truncated: true };
|
|
165
|
+
}
|
|
166
|
+
accumulated += `${line}\n`;
|
|
167
|
+
byteLen += lineBytes;
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
return { content: accumulated.trimEnd(), truncated: false };
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
async function expandGlobs(
|
|
174
|
+
refs: string[],
|
|
175
|
+
store: StorePort
|
|
176
|
+
): Promise<{ expanded: string[]; invalidRefs: string[] }> {
|
|
177
|
+
const expanded: string[] = [];
|
|
178
|
+
const invalidRefs: string[] = [];
|
|
179
|
+
|
|
180
|
+
for (const ref of refs) {
|
|
181
|
+
if (!isGlobPattern(ref)) {
|
|
182
|
+
expanded.push(ref);
|
|
183
|
+
continue;
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
const slashIdx = ref.indexOf("/");
|
|
187
|
+
if (slashIdx === -1) {
|
|
188
|
+
invalidRefs.push(ref);
|
|
189
|
+
continue;
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
const collection = ref.slice(0, slashIdx);
|
|
193
|
+
const pattern = ref.slice(slashIdx + 1);
|
|
194
|
+
const listResult = await store.listDocuments(collection);
|
|
195
|
+
if (!listResult.ok) {
|
|
196
|
+
invalidRefs.push(ref);
|
|
197
|
+
continue;
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
for (const doc of listResult.value) {
|
|
201
|
+
if (doc.active && minimatch(doc.relPath, pattern)) {
|
|
202
|
+
expanded.push(`${collection}/${doc.relPath}`);
|
|
203
|
+
}
|
|
204
|
+
}
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
return { expanded, invalidRefs };
|
|
208
|
+
}
|
|
209
|
+
|
|
210
|
+
export async function multiGetDocuments(
|
|
211
|
+
store: StorePort,
|
|
212
|
+
config: Config,
|
|
213
|
+
refs: string[],
|
|
214
|
+
options: GnoMultiGetOptions = {}
|
|
215
|
+
): Promise<MultiGetResponse> {
|
|
216
|
+
const maxBytes = options.maxBytes ?? 10_240;
|
|
217
|
+
const allRefs = splitRefs(refs);
|
|
218
|
+
const { expanded, invalidRefs } = await expandGlobs(allRefs, store);
|
|
219
|
+
|
|
220
|
+
const documents: MultiGetDocument[] = [];
|
|
221
|
+
const skipped: SkippedDoc[] = invalidRefs.map((ref) => ({
|
|
222
|
+
ref,
|
|
223
|
+
reason: "invalid_ref",
|
|
224
|
+
}));
|
|
225
|
+
const seen = new Set<string>();
|
|
226
|
+
|
|
227
|
+
for (const ref of expanded) {
|
|
228
|
+
if (seen.has(ref)) {
|
|
229
|
+
continue;
|
|
230
|
+
}
|
|
231
|
+
seen.add(ref);
|
|
232
|
+
|
|
233
|
+
const parsed = parseRef(ref);
|
|
234
|
+
if ("error" in parsed) {
|
|
235
|
+
skipped.push({ ref, reason: "invalid_ref" });
|
|
236
|
+
continue;
|
|
237
|
+
}
|
|
238
|
+
|
|
239
|
+
const docResult = await lookupDocument(store, parsed);
|
|
240
|
+
if (!docResult.ok || !docResult.value?.active) {
|
|
241
|
+
skipped.push({ ref, reason: "not_found" });
|
|
242
|
+
continue;
|
|
243
|
+
}
|
|
244
|
+
|
|
245
|
+
const doc = docResult.value;
|
|
246
|
+
if (!doc.mirrorHash) {
|
|
247
|
+
skipped.push({ ref, reason: "conversion_error" });
|
|
248
|
+
continue;
|
|
249
|
+
}
|
|
250
|
+
|
|
251
|
+
const contentResult = await store.getContent(doc.mirrorHash);
|
|
252
|
+
if (!contentResult.ok || contentResult.value === null) {
|
|
253
|
+
skipped.push({ ref, reason: "conversion_error" });
|
|
254
|
+
continue;
|
|
255
|
+
}
|
|
256
|
+
|
|
257
|
+
const { content, truncated } = truncateContent(
|
|
258
|
+
contentResult.value,
|
|
259
|
+
maxBytes
|
|
260
|
+
);
|
|
261
|
+
const coll = config.collections.find((c) => c.name === doc.collection);
|
|
262
|
+
documents.push({
|
|
263
|
+
docid: doc.docid,
|
|
264
|
+
uri: doc.uri,
|
|
265
|
+
title: doc.title ?? undefined,
|
|
266
|
+
content,
|
|
267
|
+
truncated: truncated || undefined,
|
|
268
|
+
totalLines: content.split("\n").length,
|
|
269
|
+
source: {
|
|
270
|
+
absPath: coll ? `${coll.path}/${doc.relPath}` : undefined,
|
|
271
|
+
relPath: doc.relPath,
|
|
272
|
+
mime: doc.sourceMime,
|
|
273
|
+
ext: doc.sourceExt,
|
|
274
|
+
},
|
|
275
|
+
});
|
|
276
|
+
}
|
|
277
|
+
|
|
278
|
+
return {
|
|
279
|
+
documents,
|
|
280
|
+
skipped,
|
|
281
|
+
meta: {
|
|
282
|
+
requested: expanded.length + invalidRefs.length,
|
|
283
|
+
returned: documents.length,
|
|
284
|
+
skipped: skipped.length,
|
|
285
|
+
maxBytes,
|
|
286
|
+
},
|
|
287
|
+
};
|
|
288
|
+
}
|
|
289
|
+
|
|
290
|
+
export async function listDocuments(
|
|
291
|
+
store: StorePort,
|
|
292
|
+
options: GnoListOptions = {}
|
|
293
|
+
): Promise<LsResponse> {
|
|
294
|
+
const scope = options.scope;
|
|
295
|
+
if (scope?.startsWith("gno://")) {
|
|
296
|
+
if (scope === "gno://") {
|
|
297
|
+
throw sdkError("VALIDATION", "Invalid scope: missing collection");
|
|
298
|
+
}
|
|
299
|
+
if (!URI_PREFIX_PATTERN.test(scope)) {
|
|
300
|
+
throw sdkError(
|
|
301
|
+
"VALIDATION",
|
|
302
|
+
"Invalid scope: missing trailing path (use gno://collection/)"
|
|
303
|
+
);
|
|
304
|
+
}
|
|
305
|
+
}
|
|
306
|
+
|
|
307
|
+
const docsResult = !scope
|
|
308
|
+
? await store.listDocuments()
|
|
309
|
+
: scope.startsWith("gno://")
|
|
310
|
+
? await store.listDocuments()
|
|
311
|
+
: await store.listDocuments(scope);
|
|
312
|
+
|
|
313
|
+
if (!docsResult.ok) {
|
|
314
|
+
throw sdkError("STORE", docsResult.error.message, {
|
|
315
|
+
cause: docsResult.error.cause,
|
|
316
|
+
});
|
|
317
|
+
}
|
|
318
|
+
|
|
319
|
+
const allActive = docsResult.value
|
|
320
|
+
.filter((d) => d.active)
|
|
321
|
+
.filter((d) =>
|
|
322
|
+
!scope?.startsWith("gno://") ? true : d.uri.startsWith(scope)
|
|
323
|
+
)
|
|
324
|
+
.map((d) => ({
|
|
325
|
+
docid: d.docid,
|
|
326
|
+
uri: d.uri,
|
|
327
|
+
title: d.title ?? undefined,
|
|
328
|
+
source: {
|
|
329
|
+
relPath: d.relPath,
|
|
330
|
+
mime: d.sourceMime,
|
|
331
|
+
ext: d.sourceExt,
|
|
332
|
+
},
|
|
333
|
+
}))
|
|
334
|
+
.sort((a, b) => a.uri.localeCompare(b.uri));
|
|
335
|
+
|
|
336
|
+
const offset = options.offset ?? 0;
|
|
337
|
+
const limit = options.limit ?? 20;
|
|
338
|
+
const paged = allActive.slice(offset, offset + limit);
|
|
339
|
+
|
|
340
|
+
return {
|
|
341
|
+
documents: paged,
|
|
342
|
+
meta: {
|
|
343
|
+
total: allActive.length,
|
|
344
|
+
returned: paged.length,
|
|
345
|
+
offset,
|
|
346
|
+
},
|
|
347
|
+
};
|
|
348
|
+
}
|
package/src/sdk/embed.ts
ADDED
|
@@ -0,0 +1,287 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* SDK embedding helpers.
|
|
3
|
+
*
|
|
4
|
+
* @module src/sdk/embed
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
import type { Database } from "bun:sqlite";
|
|
8
|
+
|
|
9
|
+
import type { Config } from "../config/types";
|
|
10
|
+
import type { LlmAdapter } from "../llm/nodeLlamaCpp/adapter";
|
|
11
|
+
import type { EmbeddingPort } from "../llm/types";
|
|
12
|
+
import type { SqliteAdapter } from "../store/sqlite/adapter";
|
|
13
|
+
import type { StoreResult } from "../store/types";
|
|
14
|
+
import type {
|
|
15
|
+
BacklogItem,
|
|
16
|
+
VectorIndexPort,
|
|
17
|
+
VectorStatsPort,
|
|
18
|
+
} from "../store/vector";
|
|
19
|
+
import type { GnoEmbedOptions, GnoEmbedResult } from "./types";
|
|
20
|
+
|
|
21
|
+
import { embedBacklog } from "../embed";
|
|
22
|
+
import { getActivePreset } from "../llm/registry";
|
|
23
|
+
import { formatDocForEmbedding } from "../pipeline/contextual";
|
|
24
|
+
import { err, ok } from "../store/types";
|
|
25
|
+
import {
|
|
26
|
+
createVectorIndexPort,
|
|
27
|
+
createVectorStatsPort,
|
|
28
|
+
type VectorRow,
|
|
29
|
+
} from "../store/vector";
|
|
30
|
+
import { sdkError } from "./errors";
|
|
31
|
+
|
|
32
|
+
interface EmbedRuntimeOptions {
|
|
33
|
+
config: Config;
|
|
34
|
+
store: SqliteAdapter;
|
|
35
|
+
llm: LlmAdapter;
|
|
36
|
+
downloadPolicy?: import("../llm/policy").DownloadPolicy;
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
function getActiveChunkCount(db: Database): Promise<StoreResult<number>> {
|
|
40
|
+
try {
|
|
41
|
+
const result = db
|
|
42
|
+
.prepare(
|
|
43
|
+
`
|
|
44
|
+
SELECT COUNT(*) as count FROM content_chunks c
|
|
45
|
+
WHERE EXISTS (
|
|
46
|
+
SELECT 1 FROM documents d
|
|
47
|
+
WHERE d.mirror_hash = c.mirror_hash AND d.active = 1
|
|
48
|
+
)
|
|
49
|
+
`
|
|
50
|
+
)
|
|
51
|
+
.get() as { count: number };
|
|
52
|
+
return Promise.resolve(ok(result.count));
|
|
53
|
+
} catch (cause) {
|
|
54
|
+
return Promise.resolve(
|
|
55
|
+
err(
|
|
56
|
+
"QUERY_FAILED",
|
|
57
|
+
cause instanceof Error ? cause.message : "Failed to count chunks",
|
|
58
|
+
cause
|
|
59
|
+
)
|
|
60
|
+
);
|
|
61
|
+
}
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
function getActiveChunks(
|
|
65
|
+
db: Database,
|
|
66
|
+
limit: number,
|
|
67
|
+
after?: { mirrorHash: string; seq: number }
|
|
68
|
+
): Promise<StoreResult<BacklogItem[]>> {
|
|
69
|
+
try {
|
|
70
|
+
const sql = after
|
|
71
|
+
? `
|
|
72
|
+
SELECT c.mirror_hash as mirrorHash, c.seq, c.text,
|
|
73
|
+
(SELECT d.title FROM documents d WHERE d.mirror_hash = c.mirror_hash AND d.active = 1 LIMIT 1) as title,
|
|
74
|
+
'force' as reason
|
|
75
|
+
FROM content_chunks c
|
|
76
|
+
WHERE EXISTS (
|
|
77
|
+
SELECT 1 FROM documents d
|
|
78
|
+
WHERE d.mirror_hash = c.mirror_hash AND d.active = 1
|
|
79
|
+
)
|
|
80
|
+
AND (c.mirror_hash > ? OR (c.mirror_hash = ? AND c.seq > ?))
|
|
81
|
+
ORDER BY c.mirror_hash, c.seq
|
|
82
|
+
LIMIT ?
|
|
83
|
+
`
|
|
84
|
+
: `
|
|
85
|
+
SELECT c.mirror_hash as mirrorHash, c.seq, c.text,
|
|
86
|
+
(SELECT d.title FROM documents d WHERE d.mirror_hash = c.mirror_hash AND d.active = 1 LIMIT 1) as title,
|
|
87
|
+
'force' as reason
|
|
88
|
+
FROM content_chunks c
|
|
89
|
+
WHERE EXISTS (
|
|
90
|
+
SELECT 1 FROM documents d
|
|
91
|
+
WHERE d.mirror_hash = c.mirror_hash AND d.active = 1
|
|
92
|
+
)
|
|
93
|
+
ORDER BY c.mirror_hash, c.seq
|
|
94
|
+
LIMIT ?
|
|
95
|
+
`;
|
|
96
|
+
|
|
97
|
+
const params = after
|
|
98
|
+
? [after.mirrorHash, after.mirrorHash, after.seq, limit]
|
|
99
|
+
: [limit];
|
|
100
|
+
const result = db.prepare(sql).all(...params) as BacklogItem[];
|
|
101
|
+
return Promise.resolve(ok(result));
|
|
102
|
+
} catch (cause) {
|
|
103
|
+
return Promise.resolve(
|
|
104
|
+
err(
|
|
105
|
+
"QUERY_FAILED",
|
|
106
|
+
cause instanceof Error ? cause.message : "Failed to get chunks",
|
|
107
|
+
cause
|
|
108
|
+
)
|
|
109
|
+
);
|
|
110
|
+
}
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
async function forceEmbedAll(
|
|
114
|
+
db: Database,
|
|
115
|
+
embedPort: EmbeddingPort,
|
|
116
|
+
vectorIndex: VectorIndexPort,
|
|
117
|
+
modelUri: string,
|
|
118
|
+
batchSize: number
|
|
119
|
+
): Promise<{ embedded: number; errors: number }> {
|
|
120
|
+
let embedded = 0;
|
|
121
|
+
let errors = 0;
|
|
122
|
+
let cursor: { mirrorHash: string; seq: number } | undefined;
|
|
123
|
+
|
|
124
|
+
while (true) {
|
|
125
|
+
const batchResult = await getActiveChunks(db, batchSize, cursor);
|
|
126
|
+
if (!batchResult.ok) {
|
|
127
|
+
throw sdkError("STORE", batchResult.error.message, {
|
|
128
|
+
cause: batchResult.error.cause,
|
|
129
|
+
});
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
const batch = batchResult.value;
|
|
133
|
+
if (batch.length === 0) {
|
|
134
|
+
break;
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
const lastItem = batch.at(-1);
|
|
138
|
+
if (lastItem) {
|
|
139
|
+
cursor = { mirrorHash: lastItem.mirrorHash, seq: lastItem.seq };
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
const embedResult = await embedPort.embedBatch(
|
|
143
|
+
batch.map((item) =>
|
|
144
|
+
formatDocForEmbedding(item.text, item.title ?? undefined)
|
|
145
|
+
)
|
|
146
|
+
);
|
|
147
|
+
if (!embedResult.ok || embedResult.value.length !== batch.length) {
|
|
148
|
+
errors += batch.length;
|
|
149
|
+
continue;
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
const vectors: VectorRow[] = batch.map((item, idx) => ({
|
|
153
|
+
mirrorHash: item.mirrorHash,
|
|
154
|
+
seq: item.seq,
|
|
155
|
+
model: modelUri,
|
|
156
|
+
embedding: new Float32Array(embedResult.value[idx] as number[]),
|
|
157
|
+
}));
|
|
158
|
+
const storeResult = await vectorIndex.upsertVectors(vectors);
|
|
159
|
+
if (!storeResult.ok) {
|
|
160
|
+
errors += batch.length;
|
|
161
|
+
continue;
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
embedded += batch.length;
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
if (vectorIndex.vecDirty) {
|
|
168
|
+
const syncResult = await vectorIndex.syncVecIndex();
|
|
169
|
+
if (syncResult.ok) {
|
|
170
|
+
vectorIndex.vecDirty = false;
|
|
171
|
+
}
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
return { embedded, errors };
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
async function checkVecAvailable(db: Database): Promise<boolean> {
|
|
178
|
+
try {
|
|
179
|
+
const sqliteVec = await import("sqlite-vec");
|
|
180
|
+
sqliteVec.load(db);
|
|
181
|
+
return true;
|
|
182
|
+
} catch {
|
|
183
|
+
return false;
|
|
184
|
+
}
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
export async function runEmbed(
|
|
188
|
+
runtime: EmbedRuntimeOptions,
|
|
189
|
+
options: GnoEmbedOptions = {}
|
|
190
|
+
): Promise<GnoEmbedResult> {
|
|
191
|
+
const batchSize = options.batchSize ?? 32;
|
|
192
|
+
const force = options.force ?? false;
|
|
193
|
+
const dryRun = options.dryRun ?? false;
|
|
194
|
+
const preset = getActivePreset(runtime.config);
|
|
195
|
+
const modelUri = options.model ?? preset.embed;
|
|
196
|
+
const db = runtime.store.getRawDb();
|
|
197
|
+
const stats: VectorStatsPort = createVectorStatsPort(db);
|
|
198
|
+
|
|
199
|
+
const backlogResult = force
|
|
200
|
+
? await getActiveChunkCount(db)
|
|
201
|
+
: await stats.countBacklog(modelUri);
|
|
202
|
+
if (!backlogResult.ok) {
|
|
203
|
+
throw sdkError("STORE", backlogResult.error.message, {
|
|
204
|
+
cause: backlogResult.error.cause,
|
|
205
|
+
});
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
const totalToEmbed = backlogResult.value;
|
|
209
|
+
if (totalToEmbed === 0 || dryRun) {
|
|
210
|
+
return {
|
|
211
|
+
embedded: totalToEmbed,
|
|
212
|
+
errors: 0,
|
|
213
|
+
duration: 0,
|
|
214
|
+
model: modelUri,
|
|
215
|
+
searchAvailable: await checkVecAvailable(db),
|
|
216
|
+
};
|
|
217
|
+
}
|
|
218
|
+
|
|
219
|
+
const embedResult = await runtime.llm.createEmbeddingPort(modelUri, {
|
|
220
|
+
policy: runtime.downloadPolicy,
|
|
221
|
+
});
|
|
222
|
+
if (!embedResult.ok) {
|
|
223
|
+
throw sdkError("MODEL", embedResult.error.message, {
|
|
224
|
+
cause: embedResult.error.cause,
|
|
225
|
+
});
|
|
226
|
+
}
|
|
227
|
+
|
|
228
|
+
const embedPort = embedResult.value;
|
|
229
|
+
try {
|
|
230
|
+
const probeResult = await embedPort.embed("dimension probe");
|
|
231
|
+
if (!probeResult.ok) {
|
|
232
|
+
throw sdkError("MODEL", probeResult.error.message, {
|
|
233
|
+
cause: probeResult.error.cause,
|
|
234
|
+
});
|
|
235
|
+
}
|
|
236
|
+
|
|
237
|
+
const vectorResult = await createVectorIndexPort(db, {
|
|
238
|
+
model: modelUri,
|
|
239
|
+
dimensions: probeResult.value.length,
|
|
240
|
+
});
|
|
241
|
+
if (!vectorResult.ok) {
|
|
242
|
+
throw sdkError("STORE", vectorResult.error.message, {
|
|
243
|
+
cause: vectorResult.error.cause,
|
|
244
|
+
});
|
|
245
|
+
}
|
|
246
|
+
|
|
247
|
+
const vectorIndex = vectorResult.value;
|
|
248
|
+
const startedAt = Date.now();
|
|
249
|
+
let result: { embedded: number; errors: number };
|
|
250
|
+
if (force) {
|
|
251
|
+
result = await forceEmbedAll(
|
|
252
|
+
db,
|
|
253
|
+
embedPort,
|
|
254
|
+
vectorIndex,
|
|
255
|
+
modelUri,
|
|
256
|
+
batchSize
|
|
257
|
+
);
|
|
258
|
+
} else {
|
|
259
|
+
const processed = await embedBacklog({
|
|
260
|
+
statsPort: stats,
|
|
261
|
+
embedPort,
|
|
262
|
+
vectorIndex,
|
|
263
|
+
modelUri,
|
|
264
|
+
batchSize,
|
|
265
|
+
});
|
|
266
|
+
if (!processed.ok) {
|
|
267
|
+
throw sdkError("STORE", processed.error.message, {
|
|
268
|
+
cause: processed.error.cause,
|
|
269
|
+
});
|
|
270
|
+
}
|
|
271
|
+
result = {
|
|
272
|
+
embedded: processed.value.embedded,
|
|
273
|
+
errors: processed.value.errors,
|
|
274
|
+
};
|
|
275
|
+
}
|
|
276
|
+
|
|
277
|
+
return {
|
|
278
|
+
embedded: result.embedded,
|
|
279
|
+
errors: result.errors,
|
|
280
|
+
duration: (Date.now() - startedAt) / 1000,
|
|
281
|
+
model: modelUri,
|
|
282
|
+
searchAvailable: vectorIndex.searchAvailable,
|
|
283
|
+
};
|
|
284
|
+
} finally {
|
|
285
|
+
await embedPort.dispose();
|
|
286
|
+
}
|
|
287
|
+
}
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* SDK error types.
|
|
3
|
+
*
|
|
4
|
+
* @module src/sdk/errors
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
export type GnoSdkErrorCode =
|
|
8
|
+
| "CONFIG"
|
|
9
|
+
| "VALIDATION"
|
|
10
|
+
| "RUNTIME"
|
|
11
|
+
| "STORE"
|
|
12
|
+
| "MODEL"
|
|
13
|
+
| "NOT_FOUND";
|
|
14
|
+
|
|
15
|
+
export interface GnoSdkErrorOptions {
|
|
16
|
+
cause?: unknown;
|
|
17
|
+
details?: Record<string, unknown>;
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
export class GnoSdkError extends Error {
|
|
21
|
+
readonly code: GnoSdkErrorCode;
|
|
22
|
+
readonly details?: Record<string, unknown>;
|
|
23
|
+
|
|
24
|
+
constructor(
|
|
25
|
+
code: GnoSdkErrorCode,
|
|
26
|
+
message: string,
|
|
27
|
+
options: GnoSdkErrorOptions = {}
|
|
28
|
+
) {
|
|
29
|
+
super(message, { cause: options.cause });
|
|
30
|
+
this.name = "GnoSdkError";
|
|
31
|
+
this.code = code;
|
|
32
|
+
this.details = options.details;
|
|
33
|
+
}
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
export function sdkError(
|
|
37
|
+
code: GnoSdkErrorCode,
|
|
38
|
+
message: string,
|
|
39
|
+
options: GnoSdkErrorOptions = {}
|
|
40
|
+
): GnoSdkError {
|
|
41
|
+
return new GnoSdkError(code, message, options);
|
|
42
|
+
}
|