openprompt-lang 1.2.7 → 1.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (79) hide show
  1. package/README.md +62 -8
  2. package/bin/cli.js +2 -0
  3. package/docs/00-ARCHITECTURE/OPL-BOOST-MULTI-AGENT.md +406 -0
  4. package/docs/02-STANDARDS/AGENTS.template.md +89 -0
  5. package/docs/02-STANDARDS/ticket-driven-development.md +99 -0
  6. package/docs/04-TICKETS/BOOST-001-profile-registry.md +66 -0
  7. package/docs/04-TICKETS/BOOST-002-context-compression.md +58 -0
  8. package/docs/04-TICKETS/BOOST-003-template-hydration.md +69 -0
  9. package/docs/04-TICKETS/BOOST-004-fewshot-engine.md +58 -0
  10. package/docs/04-TICKETS/BOOST-005-agent-pool.md +69 -0
  11. package/docs/04-TICKETS/BOOST-006-specialized-agents.md +53 -0
  12. package/docs/04-TICKETS/BOOST-007-validation-loop.md +56 -0
  13. package/docs/04-TICKETS/BOOST-008-orchestrator.md +71 -0
  14. package/docs/04-TICKETS/BOOST-009-cache-system.md +56 -0
  15. package/docs/04-TICKETS/BOOST-010-cli-mcp.md +67 -0
  16. package/docs/04-TICKETS/BOOST-011-self-learning.md +50 -0
  17. package/docs/04-TICKETS/BOOST-012-prompt-preamble.md +109 -0
  18. package/docs/04-TICKETS/BOOST-013-hydrator-duplicate-code.md +132 -0
  19. package/docs/04-TICKETS/BOOST-014-multiagent-missing-parts.md +87 -0
  20. package/docs/04-TICKETS/BOOST-015-skeleton-type-missing.md +76 -0
  21. package/docs/04-TICKETS/BOOST-016-output-path-duplicate.md +68 -0
  22. package/docs/04-TICKETS/INDEX.md +89 -0
  23. package/docs/04-TICKETS/_archive/BOOST-005-micro-tasking.md +67 -0
  24. package/docs/04-TICKETS/_archive/BOOST-006-validation-loop.md +66 -0
  25. package/docs/04-TICKETS/_archive/BOOST-007-progressive-pipeline.md +69 -0
  26. package/docs/04-TICKETS/_archive/BOOST-008-cli-mcp-integration.md +74 -0
  27. package/docs/AI_CONTEXT.md +16 -0
  28. package/docs/EMBEDDINGS.md +214 -0
  29. package/docs/ONBOARDING_WORKFLOW.md +151 -0
  30. package/docs/OPL_ACADEMIC_ISSUES.md +158 -0
  31. package/docs/WEB_SCRAPER_PLAN.md +454 -0
  32. package/package.json +9 -2
  33. package/scripts/postinstall.js +37 -0
  34. package/src/boost/agent-pool.js +442 -0
  35. package/src/boost/agents/index.js +79 -0
  36. package/src/boost/cache.js +241 -0
  37. package/src/boost/context-compressor.js +354 -0
  38. package/src/boost/fewshot-retriever.js +332 -0
  39. package/src/boost/hardware-detector.js +486 -0
  40. package/src/boost/hydrator.js +398 -0
  41. package/src/boost/index.js +60 -0
  42. package/src/boost/orchestrator.js +615 -0
  43. package/src/boost/preamble.js +217 -0
  44. package/src/boost/profile-registry.js +264 -0
  45. package/src/boost/self-learn.js +247 -0
  46. package/src/boost/skeletons/component.skeleton.js +24 -0
  47. package/src/boost/skeletons/hook.skeleton.js +27 -0
  48. package/src/boost/skeletons/index.js +67 -0
  49. package/src/boost/skeletons/page.skeleton.js +22 -0
  50. package/src/boost/skeletons/service.skeleton.js +20 -0
  51. package/src/boost/skeletons/store.skeleton.js +18 -0
  52. package/src/boost/skeletons/type.skeleton.js +11 -0
  53. package/src/boost/task-dispatcher.js +142 -0
  54. package/src/boost/validation-loop.js +495 -0
  55. package/src/cli/commands-boost.js +394 -0
  56. package/src/cli/commands-knowledge.js +1 -0
  57. package/src/cli/commands-opl.js +79 -1
  58. package/src/cli/commands-workflow.js +125 -6
  59. package/src/commands/init-core.js +169 -5
  60. package/src/commands/knowledge-ops.js +52 -0
  61. package/src/commands/opl-embeddings.js +556 -0
  62. package/src/commands/opl-help.js +26 -2
  63. package/src/commands/opl-search.js +106 -2
  64. package/src/commands/opl-webscrape.js +390 -0
  65. package/src/commands/workflow/epic-cli.js +192 -0
  66. package/src/commands/workflow/select.js +146 -0
  67. package/src/commands/workflow/sprint-cli.js +174 -0
  68. package/src/core/webscrape/analyzer.js +481 -0
  69. package/src/core/webscrape/deep-scraper.js +1027 -0
  70. package/src/core/workflow/epic-manager.js +845 -0
  71. package/src/core/workflow/gates.js +180 -1
  72. package/src/core/workflow/selector.js +707 -0
  73. package/src/embeddings/chunker.js +450 -0
  74. package/src/embeddings/embedder.js +431 -0
  75. package/src/embeddings/index-pipeline.js +320 -0
  76. package/src/embeddings/vector-store.js +505 -0
  77. package/src/mcp-refactor/handlers/boost.js +295 -0
  78. package/src/mcp-refactor/router.js +19 -0
  79. package/src/mcp-refactor/tools.js +113 -0
@@ -0,0 +1,505 @@
1
+ // @use(kind, contract, limit, deps, pattern)
2
+ // @kind(module)
3
+ // @contract(in: ChunkWithVector -> out: void, sideEffect: SQLite writes)
4
+ // @limit(lines: 380)
5
+ // @deps(better-sqlite3, ../persistence/sqlite/connection)
6
+ // @pattern(repository)
7
+
8
+ /**
9
+ * Almacenamiento vectorial en SQLite con búsqueda por similitud de coseno.
10
+ *
11
+ * Para ~5000 chunks, la búsqueda lineal por coseno es suficiente.
12
+ * Si el volumen supera 100k chunks, considerar índice HNSW.
13
+ *
14
+ * Serialización de vectores:
15
+ * - Almacenamiento: Float32Array → Buffer (4 bytes por float)
16
+ * - Recuperación: Buffer → Float32Array
17
+ */
18
+
19
+ import { open } from "../persistence/sqlite/connection.js"
20
+ import { ensureSchema } from "../persistence/sqlite/schema.js"
21
+
22
+ // ─── Constantes ────────────────────────────────────────────────────
23
+
24
+ const DEFAULT_DB_PATH = "./.opencode/opl.db"
25
+ const DEFAULT_TOP_K = 10
26
+ const DEFAULT_MIN_SCORE = 0.0
27
+
28
+ // ─── Schema ────────────────────────────────────────────────────────
29
+
30
+ const VECTOR_STORE_SQL = `
31
+ CREATE TABLE IF NOT EXISTS embeddings (
32
+ id TEXT PRIMARY KEY,
33
+ doc_id TEXT NOT NULL,
34
+ doc_title TEXT NOT NULL DEFAULT '',
35
+ chapter_idx INTEGER NOT NULL DEFAULT 0,
36
+ chapter_title TEXT NOT NULL DEFAULT '',
37
+ chunk_idx INTEGER NOT NULL DEFAULT 0,
38
+ content TEXT NOT NULL,
39
+ vector BLOB NOT NULL,
40
+ dimension INTEGER NOT NULL DEFAULT 768,
41
+ tokens INTEGER NOT NULL DEFAULT 0,
42
+ model TEXT NOT NULL DEFAULT '',
43
+ strategy TEXT NOT NULL DEFAULT 'section',
44
+ metadata TEXT NOT NULL DEFAULT '{}',
45
+ created_at TEXT NOT NULL DEFAULT (datetime('now'))
46
+ );
47
+
48
+ CREATE INDEX IF NOT EXISTS idx_embeddings_doc_id ON embeddings(doc_id);
49
+ CREATE INDEX IF NOT EXISTS idx_embeddings_created ON embeddings(created_at);
50
+
51
+ CREATE VIRTUAL TABLE IF NOT EXISTS embeddings_fts USING fts5(
52
+ content,
53
+ content=embeddings,
54
+ content_rowid=rowid
55
+ );
56
+ `
57
+
58
+ // ─── Serialización de vectores ─────────────────────────────────────
59
+
60
+ /**
61
+ * Convierte un array de números a Buffer para almacenamiento SQLite.
62
+ * Usa Float32 (4 bytes por float).
63
+ *
64
+ * @param {number[]} vector
65
+ * @returns {Buffer}
66
+ */
67
+ function vectorToBuffer(vector) {
68
+ const float32 = new Float32Array(vector)
69
+ return Buffer.from(float32.buffer)
70
+ }
71
+
72
+ /**
73
+ * Convierte un Buffer SQLite a array de números.
74
+ *
75
+ * @param {Buffer} buffer
76
+ * @returns {number[]}
77
+ */
78
+ function bufferToVector(buffer) {
79
+ const float32 = new Float32Array(buffer.buffer, buffer.byteOffset, buffer.byteLength / 4)
80
+ return Array.from(float32)
81
+ }
82
+
83
+ // ─── Coso similitud (re-exportada aquí para conveniencia) ──────────
84
+
85
+ /**
86
+ * Calcula la similitud de coseno entre dos vectores.
87
+ *
88
+ * @param {number[]} a
89
+ * @param {number[]} b
90
+ * @returns {number}
91
+ */
92
+ export function cosineSimilarity(a, b) {
93
+ if (a.length !== b.length) {
94
+ throw new Error(`Dimensión incorrecta: vector A tiene ${a.length}, vector B tiene ${b.length}.`)
95
+ }
96
+ if (a.length === 0) return 0
97
+
98
+ let dotProduct = 0
99
+ let normA = 0
100
+ let normB = 0
101
+
102
+ for (let i = 0; i < a.length; i++) {
103
+ dotProduct += a[i] * b[i]
104
+ normA += a[i] * a[i]
105
+ normB += b[i] * b[i]
106
+ }
107
+
108
+ const magnitude = Math.sqrt(normA) * Math.sqrt(normB)
109
+ if (magnitude === 0) return 0
110
+ return dotProduct / magnitude
111
+ }
112
+
113
+ // ─── Helpers de BD ─────────────────────────────────────────────────
114
+
115
+ /**
116
+ * Obtiene o crea la conexión a la BD de vectores.
117
+ *
118
+ * @param {string} dbPath
119
+ * @returns {{ db: import('better-sqlite3').Database }}
120
+ */
121
+ function getDb(dbPath) {
122
+ const conn = open(dbPath || DEFAULT_DB_PATH)
123
+ ensureSchema(conn.db)
124
+ conn.db.exec(VECTOR_STORE_SQL)
125
+ return { db: conn.db }
126
+ }
127
+
128
+ /**
129
+ * Serializa metadatos de chunk para almacenar en metadata JSON.
130
+ *
131
+ * @param {Object} chunk
132
+ * @returns {string}
133
+ */
134
+ function serializeMetadata(chunk) {
135
+ return JSON.stringify(chunk.metadata || {})
136
+ }
137
+
138
+ // ─── API Pública ───────────────────────────────────────────────────
139
+
140
+ /**
141
+ * Guarda un chunk con su vector embedding en SQLite.
142
+ *
143
+ * @param {Object} chunk - Chunk del documento (de chunker.js)
144
+ * @param {number[]} vector - Vector embedding
145
+ * @param {string} model - Modelo usado para el embedding
146
+ * @param {Object} [options]
147
+ * @param {string} [options.dbPath]
148
+ * @returns {{ id: string, success: boolean }}
149
+ */
150
+ export function storeEmbedding(chunk, vector, model, options = {}) {
151
+ const { db } = getDb(options.dbPath)
152
+
153
+ if (!chunk || !chunk.id) {
154
+ throw new Error("Chunk inválido: se requiere un objeto con id")
155
+ }
156
+ if (!vector || vector.length === 0) {
157
+ throw new Error("Vector inválido: se requiere un array no vacío")
158
+ }
159
+
160
+ const vectorBuffer = vectorToBuffer(vector)
161
+ const metadata = serializeMetadata(chunk)
162
+
163
+ const insert = db.transaction(() => {
164
+ const stmt = db.prepare(`
165
+ INSERT OR REPLACE INTO embeddings
166
+ (id, doc_id, doc_title, chapter_idx, chapter_title, chunk_idx,
167
+ content, vector, dimension, tokens, model, strategy, metadata)
168
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
169
+ `)
170
+
171
+ stmt.run(
172
+ chunk.id,
173
+ chunk.docId || "",
174
+ chunk.docTitle || "",
175
+ chunk.chapterIdx || 0,
176
+ chunk.chapterTitle || "",
177
+ chunk.chunkIndex || 0,
178
+ chunk.content || "",
179
+ vectorBuffer,
180
+ vector.length,
181
+ chunk.tokens || 0,
182
+ model || "",
183
+ chunk.strategy || "section",
184
+ metadata
185
+ )
186
+
187
+ // Sincronizar FTS5
188
+ try {
189
+ const row = db.prepare(`SELECT rowid FROM embeddings WHERE id = ?`).get(chunk.id)
190
+ if (row) {
191
+ db.prepare(`INSERT INTO embeddings_fts(rowid, content) VALUES (?, ?)`).run(
192
+ row.rowid,
193
+ chunk.content || ""
194
+ )
195
+ }
196
+ } catch {
197
+ db.exec(`INSERT INTO embeddings_fts(embeddings_fts) VALUES('rebuild')`)
198
+ }
199
+ })
200
+
201
+ insert()
202
+
203
+ return { id: chunk.id, success: true }
204
+ }
205
+
206
+ /**
207
+ * Guarda múltiples embeddings en una transacción SQLite.
208
+ *
209
+ * @param {Array<{ chunk: Object, vector: number[], model: string }>} entries
210
+ * @param {Object} [options]
211
+ * @param {string} [options.dbPath]
212
+ * @returns {{ count: number, durationMs: number }}
213
+ */
214
+ export function storeEmbeddingsBatch(entries, options = {}) {
215
+ const { db } = getDb(options.dbPath)
216
+
217
+ if (!Array.isArray(entries) || entries.length === 0) {
218
+ return { count: 0, durationMs: 0 }
219
+ }
220
+
221
+ const start = Date.now()
222
+
223
+ const stmt = db.prepare(`
224
+ INSERT OR REPLACE INTO embeddings
225
+ (id, doc_id, doc_title, chapter_idx, chapter_title, chunk_idx,
226
+ content, vector, dimension, tokens, model, strategy, metadata)
227
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
228
+ `)
229
+
230
+ const insertMany = db.transaction((items) => {
231
+ for (const item of items) {
232
+ const { chunk, vector, model } = item
233
+ const vectorBuffer = vectorToBuffer(vector)
234
+
235
+ stmt.run(
236
+ chunk.id,
237
+ chunk.docId || "",
238
+ chunk.docTitle || "",
239
+ chunk.chapterIdx || 0,
240
+ chunk.chapterTitle || "",
241
+ chunk.chunkIndex || 0,
242
+ chunk.content || "",
243
+ vectorBuffer,
244
+ vector.length,
245
+ chunk.tokens || 0,
246
+ model || "",
247
+ chunk.strategy || "section",
248
+ serializeMetadata(chunk)
249
+ )
250
+ }
251
+ })
252
+
253
+ insertMany(entries)
254
+
255
+ // Reconstruir índice FTS5
256
+ try {
257
+ db.exec(`INSERT INTO embeddings_fts(embeddings_fts) VALUES('rebuild')`)
258
+ } catch {
259
+ // FTS5 puede no estar disponible, no es crítico
260
+ }
261
+
262
+ const durationMs = Date.now() - start
263
+ return { count: entries.length, durationMs }
264
+ }
265
+
266
+ /**
267
+ * Busca los top-K chunks más similares por similitud de coseno.
268
+ *
269
+ * Estrategia: carga todos los vectores y hace búsqueda lineal.
270
+ * Para ~5000 chunks, esto es < 50ms en Node.js.
271
+ *
272
+ * @param {number[]} queryVector - Vector de la consulta
273
+ * @param {Object} [options]
274
+ * @param {number} [options.topK=10] - Máximo de resultados
275
+ * @param {number} [options.minScore=0.0] - Score mínimo para incluir
276
+ * @param {string} [options.docFilter] - Filtrar por doc_id
277
+ * @param {string} [options.dbPath]
278
+ * @returns {Array<{
279
+ * id: string, docId: string, docTitle: string,
280
+ * chapterIdx: number, chapterTitle: string,
281
+ * content: string, score: number,
282
+ * tokens: number, model: string
283
+ * }>}
284
+ */
285
+ export function searchSimilar(queryVector, options = {}) {
286
+ const { db } = getDb(options.dbPath)
287
+ const topK = options.topK || DEFAULT_TOP_K
288
+ const minScore = options.minScore ?? DEFAULT_MIN_SCORE
289
+ const docFilter = options.docFilter || null
290
+
291
+ if (!queryVector || queryVector.length === 0) {
292
+ return []
293
+ }
294
+
295
+ // Construir query base
296
+ let sql = `SELECT id, doc_id, doc_title, chapter_idx, chapter_title,
297
+ content, vector, dimension, tokens, model, metadata
298
+ FROM embeddings`
299
+ const params = []
300
+
301
+ if (docFilter) {
302
+ sql += ` WHERE doc_id = ?`
303
+ params.push(docFilter)
304
+ }
305
+
306
+ sql += ` ORDER BY created_at DESC`
307
+
308
+ const rows = db.prepare(sql).all(...params)
309
+
310
+ if (rows.length === 0) return []
311
+
312
+ // Calcular similitud de coseno para cada fila
313
+ const scored = []
314
+ for (const row of rows) {
315
+ const vector = bufferToVector(row.vector)
316
+ const score = cosineSimilarity(queryVector, vector)
317
+
318
+ if (score >= minScore) {
319
+ scored.push({
320
+ id: row.id,
321
+ docId: row.doc_id,
322
+ docTitle: row.doc_title,
323
+ chapterIdx: row.chapter_idx,
324
+ chapterTitle: row.chapter_title,
325
+ content: row.content,
326
+ score,
327
+ tokens: row.tokens,
328
+ model: row.model,
329
+ metadata: row.metadata,
330
+ })
331
+ }
332
+ }
333
+
334
+ // Ordenar por score descendente y tomar top-K
335
+ scored.sort((a, b) => b.score - a.score)
336
+ return scored.slice(0, topK)
337
+ }
338
+
339
+ /**
340
+ * Busca por texto primero (FTS5) y reordena por similitud de coseno.
341
+ *
342
+ * 1. Busca en FTS5 por palabras clave → candidatos
343
+ * 2. Calcula similitud de coseno para cada candidato
344
+ * 3. Reordena por score
345
+ *
346
+ * @param {string} queryText - Texto para búsqueda FTS5
347
+ * @param {number[]} queryVector - Vector para reordenamiento semántico
348
+ * @param {Object} [options]
349
+ * @param {number} [options.topK=10]
350
+ * @param {string} [options.dbPath]
351
+ * @returns {Promise<Array>}
352
+ */
353
+ export function hybridSearch(queryText, queryVector, options = {}) {
354
+ const { db } = getDb(options.dbPath)
355
+ const topK = options.topK || DEFAULT_TOP_K
356
+
357
+ if (!queryText || queryText.trim().length === 0) {
358
+ return searchSimilar(queryVector, options)
359
+ }
360
+
361
+ try {
362
+ // Buscar en FTS5
363
+ const ftsQuery = queryText
364
+ .split(/\s+/)
365
+ .filter((w) => w.length > 2)
366
+ .map((w) => `${w}*`)
367
+ .join(" OR ")
368
+
369
+ if (!ftsQuery) return searchSimilar(queryVector, options)
370
+
371
+ const ftsResults = db
372
+ .prepare(
373
+ `SELECT rank, rowid FROM embeddings_fts WHERE embeddings_fts MATCH ? ORDER BY rank LIMIT ?`
374
+ )
375
+ .all(ftsQuery, topK * 3)
376
+
377
+ if (ftsResults.length === 0) {
378
+ return searchSimilar(queryVector, options)
379
+ }
380
+
381
+ // Obtener vectores completos de los candidatos FTS (por rowid)
382
+ const rowids = ftsResults.map((r) => r.rowid)
383
+ const placeholders = rowids.map(() => "?").join(",")
384
+
385
+ const rows = db
386
+ .prepare(
387
+ `SELECT id, doc_id, doc_title, chapter_idx, chapter_title,
388
+ content, vector, dimension, tokens, model
389
+ FROM embeddings WHERE rowid IN (${placeholders})`
390
+ )
391
+ .all(...rowids)
392
+
393
+ if (rows.length === 0) return []
394
+
395
+ // Reordenar por similitud de coseno
396
+ const scored = []
397
+ for (const row of rows) {
398
+ const vector = bufferToVector(row.vector)
399
+ const score = cosineSimilarity(queryVector, vector)
400
+ scored.push({
401
+ id: row.id,
402
+ docId: row.doc_id,
403
+ docTitle: row.doc_title,
404
+ chapterIdx: row.chapter_idx,
405
+ chapterTitle: row.chapter_title,
406
+ content: row.content,
407
+ score,
408
+ tokens: row.tokens,
409
+ model: row.model,
410
+ })
411
+ }
412
+
413
+ scored.sort((a, b) => b.score - a.score)
414
+ return scored.slice(0, topK)
415
+ } catch {
416
+ // Si FTS5 falla, caer a búsqueda por coseno puro
417
+ return searchSimilar(queryVector, options)
418
+ }
419
+ }
420
+
421
+ /**
422
+ * Elimina todos los embeddings de un documento específico.
423
+ *
424
+ * @param {string} docId - ID del documento a eliminar
425
+ * @param {Object} [options]
426
+ * @param {string} [options.dbPath]
427
+ * @returns {{ deleted: number }}
428
+ */
429
+ export function deleteDocumentEmbeddings(docId, options = {}) {
430
+ const { db } = getDb(options.dbPath)
431
+
432
+ if (!docId) {
433
+ throw new Error("docId es requerido para eliminar embeddings")
434
+ }
435
+
436
+ // Eliminar primero del FTS
437
+ try {
438
+ db.prepare(
439
+ `DELETE FROM embeddings_fts WHERE rowid IN (SELECT rowid FROM embeddings WHERE doc_id = ?)`
440
+ ).run(docId)
441
+ } catch {
442
+ // FTS puede no existir
443
+ }
444
+
445
+ const result = db.prepare(`DELETE FROM embeddings WHERE doc_id = ?`).run(docId)
446
+
447
+ return { deleted: result.changes }
448
+ }
449
+
450
+ /**
451
+ * Obtiene estadísticas del índice de embeddings.
452
+ *
453
+ * @param {Object} [options]
454
+ * @param {string} [options.dbPath]
455
+ * @returns {{
456
+ * totalEmbeddings: number,
457
+ * totalDocs: number,
458
+ * dimension: number,
459
+ * model: string,
460
+ * lastIndexed: string|null,
461
+ * storageBytes: number
462
+ * }}
463
+ */
464
+ export function getEmbeddingStats(options = {}) {
465
+ const { db } = getDb(options.dbPath)
466
+
467
+ const countResult = db.prepare(`SELECT COUNT(*) as count FROM embeddings`).get()
468
+ const totalEmbeddings = countResult.count
469
+
470
+ if (totalEmbeddings === 0) {
471
+ return {
472
+ totalEmbeddings: 0,
473
+ totalDocs: 0,
474
+ dimension: 0,
475
+ model: "",
476
+ lastIndexed: null,
477
+ storageBytes: 0,
478
+ }
479
+ }
480
+
481
+ const docsResult = db.prepare(`SELECT COUNT(DISTINCT doc_id) as count FROM embeddings`).get()
482
+ const lastResult = db.prepare(`SELECT MAX(created_at) as last FROM embeddings`).get()
483
+ const modelResult = db
484
+ .prepare(
485
+ `SELECT model, COUNT(*) as count FROM embeddings GROUP BY model ORDER BY count DESC LIMIT 1`
486
+ )
487
+ .get()
488
+ const dimResult = db
489
+ .prepare(
490
+ `SELECT dimension, COUNT(*) as count FROM embeddings GROUP BY dimension ORDER BY count DESC LIMIT 1`
491
+ )
492
+ .get()
493
+
494
+ // Estimar tamaño en bytes
495
+ const vectorSize = db.prepare(`SELECT SUM(LENGTH(vector)) as total FROM embeddings`).get()
496
+
497
+ return {
498
+ totalEmbeddings,
499
+ totalDocs: docsResult.count,
500
+ dimension: dimResult ? dimResult.dimension : 0,
501
+ model: modelResult ? modelResult.model : "",
502
+ lastIndexed: lastResult ? lastResult.last : null,
503
+ storageBytes: vectorSize ? vectorSize.total + totalEmbeddings * 512 : 0, // 512 aprox por metadata
504
+ }
505
+ }