@gmickel/gno 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +256 -0
- package/assets/skill/SKILL.md +112 -0
- package/assets/skill/cli-reference.md +327 -0
- package/assets/skill/examples.md +234 -0
- package/assets/skill/mcp-reference.md +159 -0
- package/package.json +90 -0
- package/src/app/constants.ts +313 -0
- package/src/cli/colors.ts +65 -0
- package/src/cli/commands/ask.ts +545 -0
- package/src/cli/commands/cleanup.ts +105 -0
- package/src/cli/commands/collection/add.ts +120 -0
- package/src/cli/commands/collection/index.ts +10 -0
- package/src/cli/commands/collection/list.ts +108 -0
- package/src/cli/commands/collection/remove.ts +64 -0
- package/src/cli/commands/collection/rename.ts +95 -0
- package/src/cli/commands/context/add.ts +67 -0
- package/src/cli/commands/context/check.ts +153 -0
- package/src/cli/commands/context/index.ts +10 -0
- package/src/cli/commands/context/list.ts +109 -0
- package/src/cli/commands/context/rm.ts +52 -0
- package/src/cli/commands/doctor.ts +393 -0
- package/src/cli/commands/embed.ts +462 -0
- package/src/cli/commands/get.ts +356 -0
- package/src/cli/commands/index-cmd.ts +119 -0
- package/src/cli/commands/index.ts +102 -0
- package/src/cli/commands/init.ts +328 -0
- package/src/cli/commands/ls.ts +217 -0
- package/src/cli/commands/mcp/config.ts +300 -0
- package/src/cli/commands/mcp/index.ts +24 -0
- package/src/cli/commands/mcp/install.ts +203 -0
- package/src/cli/commands/mcp/paths.ts +470 -0
- package/src/cli/commands/mcp/status.ts +222 -0
- package/src/cli/commands/mcp/uninstall.ts +158 -0
- package/src/cli/commands/mcp.ts +20 -0
- package/src/cli/commands/models/clear.ts +103 -0
- package/src/cli/commands/models/index.ts +32 -0
- package/src/cli/commands/models/list.ts +214 -0
- package/src/cli/commands/models/path.ts +51 -0
- package/src/cli/commands/models/pull.ts +199 -0
- package/src/cli/commands/models/use.ts +85 -0
- package/src/cli/commands/multi-get.ts +400 -0
- package/src/cli/commands/query.ts +220 -0
- package/src/cli/commands/ref-parser.ts +108 -0
- package/src/cli/commands/reset.ts +191 -0
- package/src/cli/commands/search.ts +136 -0
- package/src/cli/commands/shared.ts +156 -0
- package/src/cli/commands/skill/index.ts +19 -0
- package/src/cli/commands/skill/install.ts +197 -0
- package/src/cli/commands/skill/paths-cmd.ts +81 -0
- package/src/cli/commands/skill/paths.ts +191 -0
- package/src/cli/commands/skill/show.ts +73 -0
- package/src/cli/commands/skill/uninstall.ts +141 -0
- package/src/cli/commands/status.ts +205 -0
- package/src/cli/commands/update.ts +68 -0
- package/src/cli/commands/vsearch.ts +188 -0
- package/src/cli/context.ts +64 -0
- package/src/cli/errors.ts +64 -0
- package/src/cli/format/search-results.ts +211 -0
- package/src/cli/options.ts +183 -0
- package/src/cli/program.ts +1330 -0
- package/src/cli/run.ts +213 -0
- package/src/cli/ui.ts +92 -0
- package/src/config/defaults.ts +20 -0
- package/src/config/index.ts +55 -0
- package/src/config/loader.ts +161 -0
- package/src/config/paths.ts +87 -0
- package/src/config/saver.ts +153 -0
- package/src/config/types.ts +280 -0
- package/src/converters/adapters/markitdownTs/adapter.ts +140 -0
- package/src/converters/adapters/officeparser/adapter.ts +126 -0
- package/src/converters/canonicalize.ts +89 -0
- package/src/converters/errors.ts +218 -0
- package/src/converters/index.ts +51 -0
- package/src/converters/mime.ts +163 -0
- package/src/converters/native/markdown.ts +115 -0
- package/src/converters/native/plaintext.ts +56 -0
- package/src/converters/path.ts +48 -0
- package/src/converters/pipeline.ts +159 -0
- package/src/converters/registry.ts +74 -0
- package/src/converters/types.ts +123 -0
- package/src/converters/versions.ts +24 -0
- package/src/index.ts +27 -0
- package/src/ingestion/chunker.ts +238 -0
- package/src/ingestion/index.ts +32 -0
- package/src/ingestion/language.ts +276 -0
- package/src/ingestion/sync.ts +671 -0
- package/src/ingestion/types.ts +219 -0
- package/src/ingestion/walker.ts +235 -0
- package/src/llm/cache.ts +467 -0
- package/src/llm/errors.ts +191 -0
- package/src/llm/index.ts +58 -0
- package/src/llm/nodeLlamaCpp/adapter.ts +133 -0
- package/src/llm/nodeLlamaCpp/embedding.ts +165 -0
- package/src/llm/nodeLlamaCpp/generation.ts +88 -0
- package/src/llm/nodeLlamaCpp/lifecycle.ts +317 -0
- package/src/llm/nodeLlamaCpp/rerank.ts +94 -0
- package/src/llm/registry.ts +86 -0
- package/src/llm/types.ts +129 -0
- package/src/mcp/resources/index.ts +151 -0
- package/src/mcp/server.ts +229 -0
- package/src/mcp/tools/get.ts +220 -0
- package/src/mcp/tools/index.ts +160 -0
- package/src/mcp/tools/multi-get.ts +263 -0
- package/src/mcp/tools/query.ts +226 -0
- package/src/mcp/tools/search.ts +119 -0
- package/src/mcp/tools/status.ts +81 -0
- package/src/mcp/tools/vsearch.ts +198 -0
- package/src/pipeline/chunk-lookup.ts +44 -0
- package/src/pipeline/expansion.ts +256 -0
- package/src/pipeline/explain.ts +115 -0
- package/src/pipeline/fusion.ts +185 -0
- package/src/pipeline/hybrid.ts +535 -0
- package/src/pipeline/index.ts +64 -0
- package/src/pipeline/query-language.ts +118 -0
- package/src/pipeline/rerank.ts +223 -0
- package/src/pipeline/search.ts +261 -0
- package/src/pipeline/types.ts +328 -0
- package/src/pipeline/vsearch.ts +348 -0
- package/src/store/index.ts +41 -0
- package/src/store/migrations/001-initial.ts +196 -0
- package/src/store/migrations/index.ts +20 -0
- package/src/store/migrations/runner.ts +187 -0
- package/src/store/sqlite/adapter.ts +1242 -0
- package/src/store/sqlite/index.ts +7 -0
- package/src/store/sqlite/setup.ts +129 -0
- package/src/store/sqlite/types.ts +28 -0
- package/src/store/types.ts +506 -0
- package/src/store/vector/index.ts +13 -0
- package/src/store/vector/sqlite-vec.ts +373 -0
- package/src/store/vector/stats.ts +152 -0
- package/src/store/vector/types.ts +115 -0
|
@@ -0,0 +1,462 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* gno embed command implementation.
|
|
3
|
+
* Batch embed chunks into vector storage.
|
|
4
|
+
*
|
|
5
|
+
* @module src/cli/commands/embed
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
import type { Database } from 'bun:sqlite';
|
|
9
|
+
import { getIndexDbPath } from '../../app/constants';
|
|
10
|
+
import { getConfigPaths, isInitialized, loadConfig } from '../../config';
|
|
11
|
+
import { LlmAdapter } from '../../llm/nodeLlamaCpp/adapter';
|
|
12
|
+
import { getActivePreset } from '../../llm/registry';
|
|
13
|
+
import type { EmbeddingPort } from '../../llm/types';
|
|
14
|
+
import { SqliteAdapter } from '../../store/sqlite/adapter';
|
|
15
|
+
import type { StoreResult } from '../../store/types';
|
|
16
|
+
import { err, ok } from '../../store/types';
|
|
17
|
+
import {
|
|
18
|
+
type BacklogItem,
|
|
19
|
+
createVectorIndexPort,
|
|
20
|
+
createVectorStatsPort,
|
|
21
|
+
type VectorIndexPort,
|
|
22
|
+
type VectorRow,
|
|
23
|
+
type VectorStatsPort,
|
|
24
|
+
} from '../../store/vector';
|
|
25
|
+
|
|
26
|
+
// ─────────────────────────────────────────────────────────────────────────────
|
|
27
|
+
// Types
|
|
28
|
+
// ─────────────────────────────────────────────────────────────────────────────
|
|
29
|
+
|
|
30
|
+
export interface EmbedOptions {
|
|
31
|
+
/** Override config path */
|
|
32
|
+
configPath?: string;
|
|
33
|
+
/** Override model URI */
|
|
34
|
+
model?: string;
|
|
35
|
+
/** Batch size for embedding */
|
|
36
|
+
batchSize?: number;
|
|
37
|
+
/** Re-embed all chunks (not just backlog) */
|
|
38
|
+
force?: boolean;
|
|
39
|
+
/** Show what would be done without embedding */
|
|
40
|
+
dryRun?: boolean;
|
|
41
|
+
/** Skip confirmation prompts */
|
|
42
|
+
yes?: boolean;
|
|
43
|
+
/** Output as JSON */
|
|
44
|
+
json?: boolean;
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
export type EmbedResult =
|
|
48
|
+
| {
|
|
49
|
+
success: true;
|
|
50
|
+
embedded: number;
|
|
51
|
+
errors: number;
|
|
52
|
+
duration: number;
|
|
53
|
+
model: string;
|
|
54
|
+
searchAvailable: boolean;
|
|
55
|
+
}
|
|
56
|
+
| { success: false; error: string };
|
|
57
|
+
|
|
58
|
+
// ─────────────────────────────────────────────────────────────────────────────
|
|
59
|
+
// Helpers
|
|
60
|
+
// ─────────────────────────────────────────────────────────────────────────────
|
|
61
|
+
|
|
62
|
+
function formatDuration(seconds: number): string {
|
|
63
|
+
if (seconds < 60) {
|
|
64
|
+
return `${seconds.toFixed(1)}s`;
|
|
65
|
+
}
|
|
66
|
+
const mins = Math.floor(seconds / 60);
|
|
67
|
+
const secs = seconds % 60;
|
|
68
|
+
return `${mins}m ${secs.toFixed(0)}s`;
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
async function checkVecAvailable(
|
|
72
|
+
db: import('bun:sqlite').Database
|
|
73
|
+
): Promise<boolean> {
|
|
74
|
+
try {
|
|
75
|
+
const sqliteVec = await import('sqlite-vec');
|
|
76
|
+
sqliteVec.load(db);
|
|
77
|
+
return true;
|
|
78
|
+
} catch {
|
|
79
|
+
return false;
|
|
80
|
+
}
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
interface BatchContext {
|
|
84
|
+
db: import('bun:sqlite').Database;
|
|
85
|
+
stats: VectorStatsPort;
|
|
86
|
+
embedPort: EmbeddingPort;
|
|
87
|
+
vectorIndex: VectorIndexPort;
|
|
88
|
+
modelUri: string;
|
|
89
|
+
batchSize: number;
|
|
90
|
+
force: boolean;
|
|
91
|
+
showProgress: boolean;
|
|
92
|
+
totalToEmbed: number;
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
type BatchResult =
|
|
96
|
+
| { ok: true; embedded: number; errors: number; duration: number }
|
|
97
|
+
| { ok: false; error: string };
|
|
98
|
+
|
|
99
|
+
interface Cursor {
|
|
100
|
+
mirrorHash: string;
|
|
101
|
+
seq: number;
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
async function processBatches(ctx: BatchContext): Promise<BatchResult> {
|
|
105
|
+
const startTime = Date.now();
|
|
106
|
+
let embedded = 0;
|
|
107
|
+
let errors = 0;
|
|
108
|
+
let cursor: Cursor | undefined;
|
|
109
|
+
|
|
110
|
+
while (embedded + errors < ctx.totalToEmbed) {
|
|
111
|
+
// Get next batch using seek pagination (cursor-based)
|
|
112
|
+
const batchResult = ctx.force
|
|
113
|
+
? await getActiveChunks(ctx.db, ctx.batchSize, cursor)
|
|
114
|
+
: await ctx.stats.getBacklog(ctx.modelUri, {
|
|
115
|
+
limit: ctx.batchSize,
|
|
116
|
+
after: cursor,
|
|
117
|
+
});
|
|
118
|
+
|
|
119
|
+
if (!batchResult.ok) {
|
|
120
|
+
return { ok: false, error: batchResult.error.message };
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
const batch = batchResult.value;
|
|
124
|
+
if (batch.length === 0) {
|
|
125
|
+
break;
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
// Advance cursor to last item (even on failure, to avoid infinite loops)
|
|
129
|
+
const lastItem = batch.at(-1);
|
|
130
|
+
if (lastItem) {
|
|
131
|
+
cursor = { mirrorHash: lastItem.mirrorHash, seq: lastItem.seq };
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
// Embed batch
|
|
135
|
+
const batchEmbedResult = await ctx.embedPort.embedBatch(
|
|
136
|
+
batch.map((b) => b.text)
|
|
137
|
+
);
|
|
138
|
+
if (!batchEmbedResult.ok) {
|
|
139
|
+
errors += batch.length;
|
|
140
|
+
continue;
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
// Validate batch/embedding count match
|
|
144
|
+
const embeddings = batchEmbedResult.value;
|
|
145
|
+
if (embeddings.length !== batch.length) {
|
|
146
|
+
errors += batch.length;
|
|
147
|
+
continue;
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
// Store vectors
|
|
151
|
+
const vectors: VectorRow[] = batch.map((b, idx) => ({
|
|
152
|
+
mirrorHash: b.mirrorHash,
|
|
153
|
+
seq: b.seq,
|
|
154
|
+
model: ctx.modelUri,
|
|
155
|
+
embedding: new Float32Array(embeddings[idx] as number[]),
|
|
156
|
+
embeddedAt: new Date().toISOString(),
|
|
157
|
+
}));
|
|
158
|
+
|
|
159
|
+
const storeResult = await ctx.vectorIndex.upsertVectors(vectors);
|
|
160
|
+
if (!storeResult.ok) {
|
|
161
|
+
errors += batch.length;
|
|
162
|
+
continue;
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
embedded += batch.length;
|
|
166
|
+
|
|
167
|
+
// Progress output
|
|
168
|
+
if (ctx.showProgress) {
|
|
169
|
+
const pct = ((embedded + errors) / ctx.totalToEmbed) * 100;
|
|
170
|
+
const elapsed = (Date.now() - startTime) / 1000;
|
|
171
|
+
const rate = embedded / Math.max(elapsed, 0.001);
|
|
172
|
+
const eta =
|
|
173
|
+
(ctx.totalToEmbed - embedded - errors) / Math.max(rate, 0.001);
|
|
174
|
+
process.stdout.write(
|
|
175
|
+
`\rEmbedding: ${embedded.toLocaleString()}/${ctx.totalToEmbed.toLocaleString()} (${pct.toFixed(1)}%) | ${rate.toFixed(1)} chunks/s | ETA ${formatDuration(eta)}`
|
|
176
|
+
);
|
|
177
|
+
}
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
if (ctx.showProgress) {
|
|
181
|
+
process.stdout.write('\n');
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
return {
|
|
185
|
+
ok: true,
|
|
186
|
+
embedded,
|
|
187
|
+
errors,
|
|
188
|
+
duration: (Date.now() - startTime) / 1000,
|
|
189
|
+
};
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
// ─────────────────────────────────────────────────────────────────────────────
|
|
193
|
+
// Main Command
|
|
194
|
+
// ─────────────────────────────────────────────────────────────────────────────
|
|
195
|
+
|
|
196
|
+
/**
|
|
197
|
+
* Execute gno embed command.
|
|
198
|
+
*/
|
|
199
|
+
export async function embed(options: EmbedOptions = {}): Promise<EmbedResult> {
|
|
200
|
+
const batchSize = options.batchSize ?? 32;
|
|
201
|
+
const force = options.force ?? false;
|
|
202
|
+
const dryRun = options.dryRun ?? false;
|
|
203
|
+
|
|
204
|
+
// Check initialization
|
|
205
|
+
const initialized = await isInitialized(options.configPath);
|
|
206
|
+
if (!initialized) {
|
|
207
|
+
return { success: false, error: 'GNO not initialized. Run: gno init' };
|
|
208
|
+
}
|
|
209
|
+
|
|
210
|
+
// Load config
|
|
211
|
+
const configResult = await loadConfig(options.configPath);
|
|
212
|
+
if (!configResult.ok) {
|
|
213
|
+
return { success: false, error: configResult.error.message };
|
|
214
|
+
}
|
|
215
|
+
const config = configResult.value;
|
|
216
|
+
|
|
217
|
+
// Get model URI
|
|
218
|
+
const preset = getActivePreset(config);
|
|
219
|
+
const modelUri = options.model ?? preset.embed;
|
|
220
|
+
|
|
221
|
+
// Open store
|
|
222
|
+
const store = new SqliteAdapter();
|
|
223
|
+
const dbPath = getIndexDbPath();
|
|
224
|
+
const paths = getConfigPaths();
|
|
225
|
+
store.setConfigPath(paths.configFile);
|
|
226
|
+
|
|
227
|
+
const openResult = await store.open(dbPath, config.ftsTokenizer);
|
|
228
|
+
if (!openResult.ok) {
|
|
229
|
+
return { success: false, error: openResult.error.message };
|
|
230
|
+
}
|
|
231
|
+
|
|
232
|
+
// Get raw DB for vector ops (SqliteAdapter always implements SqliteDbProvider)
|
|
233
|
+
const db = store.getRawDb();
|
|
234
|
+
let embedPort: EmbeddingPort | null = null;
|
|
235
|
+
let vectorIndex: VectorIndexPort | null = null;
|
|
236
|
+
|
|
237
|
+
try {
|
|
238
|
+
// Create stats port for backlog detection
|
|
239
|
+
const stats: VectorStatsPort = createVectorStatsPort(db);
|
|
240
|
+
|
|
241
|
+
// Get backlog count first (before loading model)
|
|
242
|
+
const backlogResult = force
|
|
243
|
+
? await getActiveChunkCount(db)
|
|
244
|
+
: await stats.countBacklog(modelUri);
|
|
245
|
+
|
|
246
|
+
if (!backlogResult.ok) {
|
|
247
|
+
return { success: false, error: backlogResult.error.message };
|
|
248
|
+
}
|
|
249
|
+
|
|
250
|
+
const totalToEmbed = backlogResult.value;
|
|
251
|
+
|
|
252
|
+
if (totalToEmbed === 0) {
|
|
253
|
+
const vecAvailable = await checkVecAvailable(db);
|
|
254
|
+
return {
|
|
255
|
+
success: true,
|
|
256
|
+
embedded: 0,
|
|
257
|
+
errors: 0,
|
|
258
|
+
duration: 0,
|
|
259
|
+
model: modelUri,
|
|
260
|
+
searchAvailable: vecAvailable,
|
|
261
|
+
};
|
|
262
|
+
}
|
|
263
|
+
|
|
264
|
+
if (dryRun) {
|
|
265
|
+
const vecAvailable = await checkVecAvailable(db);
|
|
266
|
+
return {
|
|
267
|
+
success: true,
|
|
268
|
+
embedded: totalToEmbed,
|
|
269
|
+
errors: 0,
|
|
270
|
+
duration: 0,
|
|
271
|
+
model: modelUri,
|
|
272
|
+
searchAvailable: vecAvailable,
|
|
273
|
+
};
|
|
274
|
+
}
|
|
275
|
+
|
|
276
|
+
// Create LLM adapter and embedding port
|
|
277
|
+
const llm = new LlmAdapter(config);
|
|
278
|
+
const embedResult = await llm.createEmbeddingPort(modelUri);
|
|
279
|
+
if (!embedResult.ok) {
|
|
280
|
+
return { success: false, error: embedResult.error.message };
|
|
281
|
+
}
|
|
282
|
+
embedPort = embedResult.value;
|
|
283
|
+
|
|
284
|
+
// Discover dimensions via probe embedding
|
|
285
|
+
const probeResult = await embedPort.embed('dimension probe');
|
|
286
|
+
if (!probeResult.ok) {
|
|
287
|
+
return { success: false, error: probeResult.error.message };
|
|
288
|
+
}
|
|
289
|
+
const dimensions = probeResult.value.length;
|
|
290
|
+
|
|
291
|
+
// Create vector index port
|
|
292
|
+
const vectorResult = await createVectorIndexPort(db, {
|
|
293
|
+
model: modelUri,
|
|
294
|
+
dimensions,
|
|
295
|
+
});
|
|
296
|
+
if (!vectorResult.ok) {
|
|
297
|
+
return { success: false, error: vectorResult.error.message };
|
|
298
|
+
}
|
|
299
|
+
vectorIndex = vectorResult.value;
|
|
300
|
+
|
|
301
|
+
// Process batches
|
|
302
|
+
const result = await processBatches({
|
|
303
|
+
db,
|
|
304
|
+
stats,
|
|
305
|
+
embedPort,
|
|
306
|
+
vectorIndex,
|
|
307
|
+
modelUri,
|
|
308
|
+
batchSize,
|
|
309
|
+
force,
|
|
310
|
+
showProgress: !options.json,
|
|
311
|
+
totalToEmbed,
|
|
312
|
+
});
|
|
313
|
+
|
|
314
|
+
if (!result.ok) {
|
|
315
|
+
return { success: false, error: result.error };
|
|
316
|
+
}
|
|
317
|
+
|
|
318
|
+
return {
|
|
319
|
+
success: true,
|
|
320
|
+
embedded: result.embedded,
|
|
321
|
+
errors: result.errors,
|
|
322
|
+
duration: result.duration,
|
|
323
|
+
model: modelUri,
|
|
324
|
+
searchAvailable: vectorIndex.searchAvailable,
|
|
325
|
+
};
|
|
326
|
+
} finally {
|
|
327
|
+
if (embedPort) {
|
|
328
|
+
await embedPort.dispose();
|
|
329
|
+
}
|
|
330
|
+
await store.close();
|
|
331
|
+
}
|
|
332
|
+
}
|
|
333
|
+
|
|
334
|
+
// ─────────────────────────────────────────────────────────────────────────────
|
|
335
|
+
// Helper: Get all active chunks (for --force mode)
|
|
336
|
+
// ─────────────────────────────────────────────────────────────────────────────
|
|
337
|
+
|
|
338
|
+
function getActiveChunkCount(db: Database): Promise<StoreResult<number>> {
|
|
339
|
+
try {
|
|
340
|
+
const result = db
|
|
341
|
+
.prepare(
|
|
342
|
+
`
|
|
343
|
+
SELECT COUNT(*) as count FROM content_chunks c
|
|
344
|
+
WHERE EXISTS (
|
|
345
|
+
SELECT 1 FROM documents d
|
|
346
|
+
WHERE d.mirror_hash = c.mirror_hash AND d.active = 1
|
|
347
|
+
)
|
|
348
|
+
`
|
|
349
|
+
)
|
|
350
|
+
.get() as { count: number };
|
|
351
|
+
return Promise.resolve(ok(result.count));
|
|
352
|
+
} catch (e) {
|
|
353
|
+
return Promise.resolve(
|
|
354
|
+
err(
|
|
355
|
+
'QUERY_FAILED',
|
|
356
|
+
`Failed to count chunks: ${e instanceof Error ? e.message : String(e)}`
|
|
357
|
+
)
|
|
358
|
+
);
|
|
359
|
+
}
|
|
360
|
+
}
|
|
361
|
+
|
|
362
|
+
function getActiveChunks(
|
|
363
|
+
db: Database,
|
|
364
|
+
limit: number,
|
|
365
|
+
after?: { mirrorHash: string; seq: number }
|
|
366
|
+
): Promise<StoreResult<BacklogItem[]>> {
|
|
367
|
+
try {
|
|
368
|
+
const sql = after
|
|
369
|
+
? `
|
|
370
|
+
SELECT c.mirror_hash as mirrorHash, c.seq, c.text, 'force' as reason
|
|
371
|
+
FROM content_chunks c
|
|
372
|
+
WHERE EXISTS (
|
|
373
|
+
SELECT 1 FROM documents d
|
|
374
|
+
WHERE d.mirror_hash = c.mirror_hash AND d.active = 1
|
|
375
|
+
)
|
|
376
|
+
AND (c.mirror_hash > ? OR (c.mirror_hash = ? AND c.seq > ?))
|
|
377
|
+
ORDER BY c.mirror_hash, c.seq
|
|
378
|
+
LIMIT ?
|
|
379
|
+
`
|
|
380
|
+
: `
|
|
381
|
+
SELECT c.mirror_hash as mirrorHash, c.seq, c.text, 'force' as reason
|
|
382
|
+
FROM content_chunks c
|
|
383
|
+
WHERE EXISTS (
|
|
384
|
+
SELECT 1 FROM documents d
|
|
385
|
+
WHERE d.mirror_hash = c.mirror_hash AND d.active = 1
|
|
386
|
+
)
|
|
387
|
+
ORDER BY c.mirror_hash, c.seq
|
|
388
|
+
LIMIT ?
|
|
389
|
+
`;
|
|
390
|
+
|
|
391
|
+
const params = after
|
|
392
|
+
? [after.mirrorHash, after.mirrorHash, after.seq, limit]
|
|
393
|
+
: [limit];
|
|
394
|
+
|
|
395
|
+
const results = db.prepare(sql).all(...params) as BacklogItem[];
|
|
396
|
+
return Promise.resolve(ok(results));
|
|
397
|
+
} catch (e) {
|
|
398
|
+
return Promise.resolve(
|
|
399
|
+
err(
|
|
400
|
+
'QUERY_FAILED',
|
|
401
|
+
`Failed to get chunks: ${e instanceof Error ? e.message : String(e)}`
|
|
402
|
+
)
|
|
403
|
+
);
|
|
404
|
+
}
|
|
405
|
+
}
|
|
406
|
+
|
|
407
|
+
// ─────────────────────────────────────────────────────────────────────────────
|
|
408
|
+
// Format
|
|
409
|
+
// ─────────────────────────────────────────────────────────────────────────────
|
|
410
|
+
|
|
411
|
+
/**
|
|
412
|
+
* Format embed result for output.
|
|
413
|
+
*/
|
|
414
|
+
export function formatEmbed(
|
|
415
|
+
result: EmbedResult,
|
|
416
|
+
options: EmbedOptions
|
|
417
|
+
): string {
|
|
418
|
+
if (!result.success) {
|
|
419
|
+
return options.json
|
|
420
|
+
? JSON.stringify({ error: { code: 'RUNTIME', message: result.error } })
|
|
421
|
+
: `Error: ${result.error}`;
|
|
422
|
+
}
|
|
423
|
+
|
|
424
|
+
if (options.json) {
|
|
425
|
+
return JSON.stringify(
|
|
426
|
+
{
|
|
427
|
+
embedded: result.embedded,
|
|
428
|
+
errors: result.errors,
|
|
429
|
+
duration: result.duration,
|
|
430
|
+
model: result.model,
|
|
431
|
+
searchAvailable: result.searchAvailable,
|
|
432
|
+
},
|
|
433
|
+
null,
|
|
434
|
+
2
|
|
435
|
+
);
|
|
436
|
+
}
|
|
437
|
+
|
|
438
|
+
if (options.dryRun) {
|
|
439
|
+
return `Dry run: would embed ${result.embedded.toLocaleString()} chunks with model ${result.model}`;
|
|
440
|
+
}
|
|
441
|
+
|
|
442
|
+
if (result.embedded === 0 && result.errors === 0) {
|
|
443
|
+
return 'No chunks need embedding. All up to date.';
|
|
444
|
+
}
|
|
445
|
+
|
|
446
|
+
const lines: string[] = [];
|
|
447
|
+
lines.push(
|
|
448
|
+
`Embedded ${result.embedded.toLocaleString()} chunks in ${formatDuration(result.duration)}`
|
|
449
|
+
);
|
|
450
|
+
|
|
451
|
+
if (result.errors > 0) {
|
|
452
|
+
lines.push(`${result.errors} chunks failed to embed.`);
|
|
453
|
+
}
|
|
454
|
+
|
|
455
|
+
if (!result.searchAvailable) {
|
|
456
|
+
lines.push(
|
|
457
|
+
'Warning: sqlite-vec not available. Embeddings stored but KNN search disabled.'
|
|
458
|
+
);
|
|
459
|
+
}
|
|
460
|
+
|
|
461
|
+
return lines.join('\n');
|
|
462
|
+
}
|