@gmickel/gno 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (131) hide show
  1. package/README.md +256 -0
  2. package/assets/skill/SKILL.md +112 -0
  3. package/assets/skill/cli-reference.md +327 -0
  4. package/assets/skill/examples.md +234 -0
  5. package/assets/skill/mcp-reference.md +159 -0
  6. package/package.json +90 -0
  7. package/src/app/constants.ts +313 -0
  8. package/src/cli/colors.ts +65 -0
  9. package/src/cli/commands/ask.ts +545 -0
  10. package/src/cli/commands/cleanup.ts +105 -0
  11. package/src/cli/commands/collection/add.ts +120 -0
  12. package/src/cli/commands/collection/index.ts +10 -0
  13. package/src/cli/commands/collection/list.ts +108 -0
  14. package/src/cli/commands/collection/remove.ts +64 -0
  15. package/src/cli/commands/collection/rename.ts +95 -0
  16. package/src/cli/commands/context/add.ts +67 -0
  17. package/src/cli/commands/context/check.ts +153 -0
  18. package/src/cli/commands/context/index.ts +10 -0
  19. package/src/cli/commands/context/list.ts +109 -0
  20. package/src/cli/commands/context/rm.ts +52 -0
  21. package/src/cli/commands/doctor.ts +393 -0
  22. package/src/cli/commands/embed.ts +462 -0
  23. package/src/cli/commands/get.ts +356 -0
  24. package/src/cli/commands/index-cmd.ts +119 -0
  25. package/src/cli/commands/index.ts +102 -0
  26. package/src/cli/commands/init.ts +328 -0
  27. package/src/cli/commands/ls.ts +217 -0
  28. package/src/cli/commands/mcp/config.ts +300 -0
  29. package/src/cli/commands/mcp/index.ts +24 -0
  30. package/src/cli/commands/mcp/install.ts +203 -0
  31. package/src/cli/commands/mcp/paths.ts +470 -0
  32. package/src/cli/commands/mcp/status.ts +222 -0
  33. package/src/cli/commands/mcp/uninstall.ts +158 -0
  34. package/src/cli/commands/mcp.ts +20 -0
  35. package/src/cli/commands/models/clear.ts +103 -0
  36. package/src/cli/commands/models/index.ts +32 -0
  37. package/src/cli/commands/models/list.ts +214 -0
  38. package/src/cli/commands/models/path.ts +51 -0
  39. package/src/cli/commands/models/pull.ts +199 -0
  40. package/src/cli/commands/models/use.ts +85 -0
  41. package/src/cli/commands/multi-get.ts +400 -0
  42. package/src/cli/commands/query.ts +220 -0
  43. package/src/cli/commands/ref-parser.ts +108 -0
  44. package/src/cli/commands/reset.ts +191 -0
  45. package/src/cli/commands/search.ts +136 -0
  46. package/src/cli/commands/shared.ts +156 -0
  47. package/src/cli/commands/skill/index.ts +19 -0
  48. package/src/cli/commands/skill/install.ts +197 -0
  49. package/src/cli/commands/skill/paths-cmd.ts +81 -0
  50. package/src/cli/commands/skill/paths.ts +191 -0
  51. package/src/cli/commands/skill/show.ts +73 -0
  52. package/src/cli/commands/skill/uninstall.ts +141 -0
  53. package/src/cli/commands/status.ts +205 -0
  54. package/src/cli/commands/update.ts +68 -0
  55. package/src/cli/commands/vsearch.ts +188 -0
  56. package/src/cli/context.ts +64 -0
  57. package/src/cli/errors.ts +64 -0
  58. package/src/cli/format/search-results.ts +211 -0
  59. package/src/cli/options.ts +183 -0
  60. package/src/cli/program.ts +1330 -0
  61. package/src/cli/run.ts +213 -0
  62. package/src/cli/ui.ts +92 -0
  63. package/src/config/defaults.ts +20 -0
  64. package/src/config/index.ts +55 -0
  65. package/src/config/loader.ts +161 -0
  66. package/src/config/paths.ts +87 -0
  67. package/src/config/saver.ts +153 -0
  68. package/src/config/types.ts +280 -0
  69. package/src/converters/adapters/markitdownTs/adapter.ts +140 -0
  70. package/src/converters/adapters/officeparser/adapter.ts +126 -0
  71. package/src/converters/canonicalize.ts +89 -0
  72. package/src/converters/errors.ts +218 -0
  73. package/src/converters/index.ts +51 -0
  74. package/src/converters/mime.ts +163 -0
  75. package/src/converters/native/markdown.ts +115 -0
  76. package/src/converters/native/plaintext.ts +56 -0
  77. package/src/converters/path.ts +48 -0
  78. package/src/converters/pipeline.ts +159 -0
  79. package/src/converters/registry.ts +74 -0
  80. package/src/converters/types.ts +123 -0
  81. package/src/converters/versions.ts +24 -0
  82. package/src/index.ts +27 -0
  83. package/src/ingestion/chunker.ts +238 -0
  84. package/src/ingestion/index.ts +32 -0
  85. package/src/ingestion/language.ts +276 -0
  86. package/src/ingestion/sync.ts +671 -0
  87. package/src/ingestion/types.ts +219 -0
  88. package/src/ingestion/walker.ts +235 -0
  89. package/src/llm/cache.ts +467 -0
  90. package/src/llm/errors.ts +191 -0
  91. package/src/llm/index.ts +58 -0
  92. package/src/llm/nodeLlamaCpp/adapter.ts +133 -0
  93. package/src/llm/nodeLlamaCpp/embedding.ts +165 -0
  94. package/src/llm/nodeLlamaCpp/generation.ts +88 -0
  95. package/src/llm/nodeLlamaCpp/lifecycle.ts +317 -0
  96. package/src/llm/nodeLlamaCpp/rerank.ts +94 -0
  97. package/src/llm/registry.ts +86 -0
  98. package/src/llm/types.ts +129 -0
  99. package/src/mcp/resources/index.ts +151 -0
  100. package/src/mcp/server.ts +229 -0
  101. package/src/mcp/tools/get.ts +220 -0
  102. package/src/mcp/tools/index.ts +160 -0
  103. package/src/mcp/tools/multi-get.ts +263 -0
  104. package/src/mcp/tools/query.ts +226 -0
  105. package/src/mcp/tools/search.ts +119 -0
  106. package/src/mcp/tools/status.ts +81 -0
  107. package/src/mcp/tools/vsearch.ts +198 -0
  108. package/src/pipeline/chunk-lookup.ts +44 -0
  109. package/src/pipeline/expansion.ts +256 -0
  110. package/src/pipeline/explain.ts +115 -0
  111. package/src/pipeline/fusion.ts +185 -0
  112. package/src/pipeline/hybrid.ts +535 -0
  113. package/src/pipeline/index.ts +64 -0
  114. package/src/pipeline/query-language.ts +118 -0
  115. package/src/pipeline/rerank.ts +223 -0
  116. package/src/pipeline/search.ts +261 -0
  117. package/src/pipeline/types.ts +328 -0
  118. package/src/pipeline/vsearch.ts +348 -0
  119. package/src/store/index.ts +41 -0
  120. package/src/store/migrations/001-initial.ts +196 -0
  121. package/src/store/migrations/index.ts +20 -0
  122. package/src/store/migrations/runner.ts +187 -0
  123. package/src/store/sqlite/adapter.ts +1242 -0
  124. package/src/store/sqlite/index.ts +7 -0
  125. package/src/store/sqlite/setup.ts +129 -0
  126. package/src/store/sqlite/types.ts +28 -0
  127. package/src/store/types.ts +506 -0
  128. package/src/store/vector/index.ts +13 -0
  129. package/src/store/vector/sqlite-vec.ts +373 -0
  130. package/src/store/vector/stats.ts +152 -0
  131. package/src/store/vector/types.ts +115 -0
@@ -0,0 +1,462 @@
1
+ /**
2
+ * gno embed command implementation.
3
+ * Batch embed chunks into vector storage.
4
+ *
5
+ * @module src/cli/commands/embed
6
+ */
7
+
8
+ import type { Database } from 'bun:sqlite';
9
+ import { getIndexDbPath } from '../../app/constants';
10
+ import { getConfigPaths, isInitialized, loadConfig } from '../../config';
11
+ import { LlmAdapter } from '../../llm/nodeLlamaCpp/adapter';
12
+ import { getActivePreset } from '../../llm/registry';
13
+ import type { EmbeddingPort } from '../../llm/types';
14
+ import { SqliteAdapter } from '../../store/sqlite/adapter';
15
+ import type { StoreResult } from '../../store/types';
16
+ import { err, ok } from '../../store/types';
17
+ import {
18
+ type BacklogItem,
19
+ createVectorIndexPort,
20
+ createVectorStatsPort,
21
+ type VectorIndexPort,
22
+ type VectorRow,
23
+ type VectorStatsPort,
24
+ } from '../../store/vector';
25
+
26
+ // ─────────────────────────────────────────────────────────────────────────────
27
+ // Types
28
+ // ─────────────────────────────────────────────────────────────────────────────
29
+
30
+ export interface EmbedOptions {
31
+ /** Override config path */
32
+ configPath?: string;
33
+ /** Override model URI */
34
+ model?: string;
35
+ /** Batch size for embedding */
36
+ batchSize?: number;
37
+ /** Re-embed all chunks (not just backlog) */
38
+ force?: boolean;
39
+ /** Show what would be done without embedding */
40
+ dryRun?: boolean;
41
+ /** Skip confirmation prompts */
42
+ yes?: boolean;
43
+ /** Output as JSON */
44
+ json?: boolean;
45
+ }
46
+
47
+ export type EmbedResult =
48
+ | {
49
+ success: true;
50
+ embedded: number;
51
+ errors: number;
52
+ duration: number;
53
+ model: string;
54
+ searchAvailable: boolean;
55
+ }
56
+ | { success: false; error: string };
57
+
58
+ // ─────────────────────────────────────────────────────────────────────────────
59
+ // Helpers
60
+ // ─────────────────────────────────────────────────────────────────────────────
61
+
62
+ function formatDuration(seconds: number): string {
63
+ if (seconds < 60) {
64
+ return `${seconds.toFixed(1)}s`;
65
+ }
66
+ const mins = Math.floor(seconds / 60);
67
+ const secs = seconds % 60;
68
+ return `${mins}m ${secs.toFixed(0)}s`;
69
+ }
70
+
71
+ async function checkVecAvailable(
72
+ db: import('bun:sqlite').Database
73
+ ): Promise<boolean> {
74
+ try {
75
+ const sqliteVec = await import('sqlite-vec');
76
+ sqliteVec.load(db);
77
+ return true;
78
+ } catch {
79
+ return false;
80
+ }
81
+ }
82
+
83
+ interface BatchContext {
84
+ db: import('bun:sqlite').Database;
85
+ stats: VectorStatsPort;
86
+ embedPort: EmbeddingPort;
87
+ vectorIndex: VectorIndexPort;
88
+ modelUri: string;
89
+ batchSize: number;
90
+ force: boolean;
91
+ showProgress: boolean;
92
+ totalToEmbed: number;
93
+ }
94
+
95
+ type BatchResult =
96
+ | { ok: true; embedded: number; errors: number; duration: number }
97
+ | { ok: false; error: string };
98
+
99
+ interface Cursor {
100
+ mirrorHash: string;
101
+ seq: number;
102
+ }
103
+
104
+ async function processBatches(ctx: BatchContext): Promise<BatchResult> {
105
+ const startTime = Date.now();
106
+ let embedded = 0;
107
+ let errors = 0;
108
+ let cursor: Cursor | undefined;
109
+
110
+ while (embedded + errors < ctx.totalToEmbed) {
111
+ // Get next batch using seek pagination (cursor-based)
112
+ const batchResult = ctx.force
113
+ ? await getActiveChunks(ctx.db, ctx.batchSize, cursor)
114
+ : await ctx.stats.getBacklog(ctx.modelUri, {
115
+ limit: ctx.batchSize,
116
+ after: cursor,
117
+ });
118
+
119
+ if (!batchResult.ok) {
120
+ return { ok: false, error: batchResult.error.message };
121
+ }
122
+
123
+ const batch = batchResult.value;
124
+ if (batch.length === 0) {
125
+ break;
126
+ }
127
+
128
+ // Advance cursor to last item (even on failure, to avoid infinite loops)
129
+ const lastItem = batch.at(-1);
130
+ if (lastItem) {
131
+ cursor = { mirrorHash: lastItem.mirrorHash, seq: lastItem.seq };
132
+ }
133
+
134
+ // Embed batch
135
+ const batchEmbedResult = await ctx.embedPort.embedBatch(
136
+ batch.map((b) => b.text)
137
+ );
138
+ if (!batchEmbedResult.ok) {
139
+ errors += batch.length;
140
+ continue;
141
+ }
142
+
143
+ // Validate batch/embedding count match
144
+ const embeddings = batchEmbedResult.value;
145
+ if (embeddings.length !== batch.length) {
146
+ errors += batch.length;
147
+ continue;
148
+ }
149
+
150
+ // Store vectors
151
+ const vectors: VectorRow[] = batch.map((b, idx) => ({
152
+ mirrorHash: b.mirrorHash,
153
+ seq: b.seq,
154
+ model: ctx.modelUri,
155
+ embedding: new Float32Array(embeddings[idx] as number[]),
156
+ embeddedAt: new Date().toISOString(),
157
+ }));
158
+
159
+ const storeResult = await ctx.vectorIndex.upsertVectors(vectors);
160
+ if (!storeResult.ok) {
161
+ errors += batch.length;
162
+ continue;
163
+ }
164
+
165
+ embedded += batch.length;
166
+
167
+ // Progress output
168
+ if (ctx.showProgress) {
169
+ const pct = ((embedded + errors) / ctx.totalToEmbed) * 100;
170
+ const elapsed = (Date.now() - startTime) / 1000;
171
+ const rate = embedded / Math.max(elapsed, 0.001);
172
+ const eta =
173
+ (ctx.totalToEmbed - embedded - errors) / Math.max(rate, 0.001);
174
+ process.stdout.write(
175
+ `\rEmbedding: ${embedded.toLocaleString()}/${ctx.totalToEmbed.toLocaleString()} (${pct.toFixed(1)}%) | ${rate.toFixed(1)} chunks/s | ETA ${formatDuration(eta)}`
176
+ );
177
+ }
178
+ }
179
+
180
+ if (ctx.showProgress) {
181
+ process.stdout.write('\n');
182
+ }
183
+
184
+ return {
185
+ ok: true,
186
+ embedded,
187
+ errors,
188
+ duration: (Date.now() - startTime) / 1000,
189
+ };
190
+ }
191
+
192
+ // ─────────────────────────────────────────────────────────────────────────────
193
+ // Main Command
194
+ // ─────────────────────────────────────────────────────────────────────────────
195
+
196
+ /**
197
+ * Execute gno embed command.
198
+ */
199
+ export async function embed(options: EmbedOptions = {}): Promise<EmbedResult> {
200
+ const batchSize = options.batchSize ?? 32;
201
+ const force = options.force ?? false;
202
+ const dryRun = options.dryRun ?? false;
203
+
204
+ // Check initialization
205
+ const initialized = await isInitialized(options.configPath);
206
+ if (!initialized) {
207
+ return { success: false, error: 'GNO not initialized. Run: gno init' };
208
+ }
209
+
210
+ // Load config
211
+ const configResult = await loadConfig(options.configPath);
212
+ if (!configResult.ok) {
213
+ return { success: false, error: configResult.error.message };
214
+ }
215
+ const config = configResult.value;
216
+
217
+ // Get model URI
218
+ const preset = getActivePreset(config);
219
+ const modelUri = options.model ?? preset.embed;
220
+
221
+ // Open store
222
+ const store = new SqliteAdapter();
223
+ const dbPath = getIndexDbPath();
224
+ const paths = getConfigPaths();
225
+ store.setConfigPath(paths.configFile);
226
+
227
+ const openResult = await store.open(dbPath, config.ftsTokenizer);
228
+ if (!openResult.ok) {
229
+ return { success: false, error: openResult.error.message };
230
+ }
231
+
232
+ // Get raw DB for vector ops (SqliteAdapter always implements SqliteDbProvider)
233
+ const db = store.getRawDb();
234
+ let embedPort: EmbeddingPort | null = null;
235
+ let vectorIndex: VectorIndexPort | null = null;
236
+
237
+ try {
238
+ // Create stats port for backlog detection
239
+ const stats: VectorStatsPort = createVectorStatsPort(db);
240
+
241
+ // Get backlog count first (before loading model)
242
+ const backlogResult = force
243
+ ? await getActiveChunkCount(db)
244
+ : await stats.countBacklog(modelUri);
245
+
246
+ if (!backlogResult.ok) {
247
+ return { success: false, error: backlogResult.error.message };
248
+ }
249
+
250
+ const totalToEmbed = backlogResult.value;
251
+
252
+ if (totalToEmbed === 0) {
253
+ const vecAvailable = await checkVecAvailable(db);
254
+ return {
255
+ success: true,
256
+ embedded: 0,
257
+ errors: 0,
258
+ duration: 0,
259
+ model: modelUri,
260
+ searchAvailable: vecAvailable,
261
+ };
262
+ }
263
+
264
+ if (dryRun) {
265
+ const vecAvailable = await checkVecAvailable(db);
266
+ return {
267
+ success: true,
268
+ embedded: totalToEmbed,
269
+ errors: 0,
270
+ duration: 0,
271
+ model: modelUri,
272
+ searchAvailable: vecAvailable,
273
+ };
274
+ }
275
+
276
+ // Create LLM adapter and embedding port
277
+ const llm = new LlmAdapter(config);
278
+ const embedResult = await llm.createEmbeddingPort(modelUri);
279
+ if (!embedResult.ok) {
280
+ return { success: false, error: embedResult.error.message };
281
+ }
282
+ embedPort = embedResult.value;
283
+
284
+ // Discover dimensions via probe embedding
285
+ const probeResult = await embedPort.embed('dimension probe');
286
+ if (!probeResult.ok) {
287
+ return { success: false, error: probeResult.error.message };
288
+ }
289
+ const dimensions = probeResult.value.length;
290
+
291
+ // Create vector index port
292
+ const vectorResult = await createVectorIndexPort(db, {
293
+ model: modelUri,
294
+ dimensions,
295
+ });
296
+ if (!vectorResult.ok) {
297
+ return { success: false, error: vectorResult.error.message };
298
+ }
299
+ vectorIndex = vectorResult.value;
300
+
301
+ // Process batches
302
+ const result = await processBatches({
303
+ db,
304
+ stats,
305
+ embedPort,
306
+ vectorIndex,
307
+ modelUri,
308
+ batchSize,
309
+ force,
310
+ showProgress: !options.json,
311
+ totalToEmbed,
312
+ });
313
+
314
+ if (!result.ok) {
315
+ return { success: false, error: result.error };
316
+ }
317
+
318
+ return {
319
+ success: true,
320
+ embedded: result.embedded,
321
+ errors: result.errors,
322
+ duration: result.duration,
323
+ model: modelUri,
324
+ searchAvailable: vectorIndex.searchAvailable,
325
+ };
326
+ } finally {
327
+ if (embedPort) {
328
+ await embedPort.dispose();
329
+ }
330
+ await store.close();
331
+ }
332
+ }
333
+
334
+ // ─────────────────────────────────────────────────────────────────────────────
335
+ // Helper: Get all active chunks (for --force mode)
336
+ // ─────────────────────────────────────────────────────────────────────────────
337
+
338
+ function getActiveChunkCount(db: Database): Promise<StoreResult<number>> {
339
+ try {
340
+ const result = db
341
+ .prepare(
342
+ `
343
+ SELECT COUNT(*) as count FROM content_chunks c
344
+ WHERE EXISTS (
345
+ SELECT 1 FROM documents d
346
+ WHERE d.mirror_hash = c.mirror_hash AND d.active = 1
347
+ )
348
+ `
349
+ )
350
+ .get() as { count: number };
351
+ return Promise.resolve(ok(result.count));
352
+ } catch (e) {
353
+ return Promise.resolve(
354
+ err(
355
+ 'QUERY_FAILED',
356
+ `Failed to count chunks: ${e instanceof Error ? e.message : String(e)}`
357
+ )
358
+ );
359
+ }
360
+ }
361
+
362
+ function getActiveChunks(
363
+ db: Database,
364
+ limit: number,
365
+ after?: { mirrorHash: string; seq: number }
366
+ ): Promise<StoreResult<BacklogItem[]>> {
367
+ try {
368
+ const sql = after
369
+ ? `
370
+ SELECT c.mirror_hash as mirrorHash, c.seq, c.text, 'force' as reason
371
+ FROM content_chunks c
372
+ WHERE EXISTS (
373
+ SELECT 1 FROM documents d
374
+ WHERE d.mirror_hash = c.mirror_hash AND d.active = 1
375
+ )
376
+ AND (c.mirror_hash > ? OR (c.mirror_hash = ? AND c.seq > ?))
377
+ ORDER BY c.mirror_hash, c.seq
378
+ LIMIT ?
379
+ `
380
+ : `
381
+ SELECT c.mirror_hash as mirrorHash, c.seq, c.text, 'force' as reason
382
+ FROM content_chunks c
383
+ WHERE EXISTS (
384
+ SELECT 1 FROM documents d
385
+ WHERE d.mirror_hash = c.mirror_hash AND d.active = 1
386
+ )
387
+ ORDER BY c.mirror_hash, c.seq
388
+ LIMIT ?
389
+ `;
390
+
391
+ const params = after
392
+ ? [after.mirrorHash, after.mirrorHash, after.seq, limit]
393
+ : [limit];
394
+
395
+ const results = db.prepare(sql).all(...params) as BacklogItem[];
396
+ return Promise.resolve(ok(results));
397
+ } catch (e) {
398
+ return Promise.resolve(
399
+ err(
400
+ 'QUERY_FAILED',
401
+ `Failed to get chunks: ${e instanceof Error ? e.message : String(e)}`
402
+ )
403
+ );
404
+ }
405
+ }
406
+
407
+ // ─────────────────────────────────────────────────────────────────────────────
408
+ // Format
409
+ // ─────────────────────────────────────────────────────────────────────────────
410
+
411
+ /**
412
+ * Format embed result for output.
413
+ */
414
+ export function formatEmbed(
415
+ result: EmbedResult,
416
+ options: EmbedOptions
417
+ ): string {
418
+ if (!result.success) {
419
+ return options.json
420
+ ? JSON.stringify({ error: { code: 'RUNTIME', message: result.error } })
421
+ : `Error: ${result.error}`;
422
+ }
423
+
424
+ if (options.json) {
425
+ return JSON.stringify(
426
+ {
427
+ embedded: result.embedded,
428
+ errors: result.errors,
429
+ duration: result.duration,
430
+ model: result.model,
431
+ searchAvailable: result.searchAvailable,
432
+ },
433
+ null,
434
+ 2
435
+ );
436
+ }
437
+
438
+ if (options.dryRun) {
439
+ return `Dry run: would embed ${result.embedded.toLocaleString()} chunks with model ${result.model}`;
440
+ }
441
+
442
+ if (result.embedded === 0 && result.errors === 0) {
443
+ return 'No chunks need embedding. All up to date.';
444
+ }
445
+
446
+ const lines: string[] = [];
447
+ lines.push(
448
+ `Embedded ${result.embedded.toLocaleString()} chunks in ${formatDuration(result.duration)}`
449
+ );
450
+
451
+ if (result.errors > 0) {
452
+ lines.push(`${result.errors} chunks failed to embed.`);
453
+ }
454
+
455
+ if (!result.searchAvailable) {
456
+ lines.push(
457
+ 'Warning: sqlite-vec not available. Embeddings stored but KNN search disabled.'
458
+ );
459
+ }
460
+
461
+ return lines.join('\n');
462
+ }