openprompt-lang 1.2.7 → 1.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +62 -8
- package/docs/EMBEDDINGS.md +214 -0
- package/docs/ONBOARDING_WORKFLOW.md +151 -0
- package/docs/OPL_ACADEMIC_ISSUES.md +158 -0
- package/docs/WEB_SCRAPER_PLAN.md +454 -0
- package/package.json +7 -1
- package/scripts/postinstall.js +37 -0
- package/src/cli/commands-knowledge.js +1 -0
- package/src/cli/commands-opl.js +79 -1
- package/src/cli/commands-workflow.js +125 -6
- package/src/commands/init-core.js +169 -5
- package/src/commands/knowledge-ops.js +52 -0
- package/src/commands/opl-embeddings.js +556 -0
- package/src/commands/opl-help.js +26 -2
- package/src/commands/opl-search.js +106 -2
- package/src/commands/opl-webscrape.js +390 -0
- package/src/commands/workflow/epic-cli.js +192 -0
- package/src/commands/workflow/select.js +146 -0
- package/src/commands/workflow/sprint-cli.js +174 -0
- package/src/core/webscrape/analyzer.js +481 -0
- package/src/core/webscrape/deep-scraper.js +1027 -0
- package/src/core/workflow/epic-manager.js +845 -0
- package/src/core/workflow/gates.js +180 -1
- package/src/core/workflow/selector.js +707 -0
- package/src/embeddings/chunker.js +450 -0
- package/src/embeddings/embedder.js +431 -0
- package/src/embeddings/index-pipeline.js +320 -0
- package/src/embeddings/vector-store.js +505 -0
|
@@ -0,0 +1,556 @@
|
|
|
1
|
+
// @use(kind, contract, limit, deps)
|
|
2
|
+
// @kind(feature)
|
|
3
|
+
// @contract(in: subcommand, options -> out: void, sideEffect: indexing/status/remove operations)
|
|
4
|
+
// @limit(lines: 300)
|
|
5
|
+
// @deps(../embeddings/index-pipeline, ../embeddings/vector-store, ../embeddings/embedder)
|
|
6
|
+
|
|
7
|
+
/**
|
|
8
|
+
* Comando CLI para gestionar embeddings vectoriales.
|
|
9
|
+
*
|
|
10
|
+
* Subcomandos:
|
|
11
|
+
* opl embeddings index <docId> → Indexar un documento
|
|
12
|
+
* opl embeddings status → Mostrar estado del índice
|
|
13
|
+
* opl embeddings remove <docId> → Eliminar embeddings de un documento
|
|
14
|
+
* opl embeddings config → Mostrar/configurar proveedor de embeddings
|
|
15
|
+
*/
|
|
16
|
+
|
|
17
|
+
import chalk from "chalk"
|
|
18
|
+
import { indexDocument, reindexDocument } from "../embeddings/index-pipeline.js"
|
|
19
|
+
import { getEmbeddingStats, deleteDocumentEmbeddings } from "../embeddings/vector-store.js"
|
|
20
|
+
import { setActiveProvider, getActiveProvider, checkProvider } from "../embeddings/embedder.js"
|
|
21
|
+
|
|
22
|
+
// ─── Constantes ────────────────────────────────────────────────────
|
|
23
|
+
|
|
24
|
+
const CHUNK_STRATEGIES = ["section", "paragraph", "fixed"]
|
|
25
|
+
const PROVIDERS = ["ollama", "transformers"]
|
|
26
|
+
|
|
27
|
+
// ─── Handler principal ─────────────────────────────────────────────
|
|
28
|
+
|
|
29
|
+
/**
|
|
30
|
+
* Ejecuta un subcomando de embeddings.
|
|
31
|
+
*
|
|
32
|
+
* @param {string} subcommand - Subcomando a ejecutar
|
|
33
|
+
* @param {Object} options - Opciones adicionales
|
|
34
|
+
*/
|
|
35
|
+
export async function embeddings(subcommand, options = {}) {
|
|
36
|
+
switch (subcommand) {
|
|
37
|
+
case "index":
|
|
38
|
+
return cmdIndex(options)
|
|
39
|
+
case "status":
|
|
40
|
+
return cmdStatus()
|
|
41
|
+
case "remove":
|
|
42
|
+
return cmdRemove(options)
|
|
43
|
+
case "config":
|
|
44
|
+
return cmdConfig(options)
|
|
45
|
+
default:
|
|
46
|
+
showHelp()
|
|
47
|
+
}
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
// ─── Subcomandos ───────────────────────────────────────────────────
|
|
51
|
+
|
|
52
|
+
/**
|
|
53
|
+
* Indexa un documento en el vector store.
|
|
54
|
+
*/
|
|
55
|
+
async function cmdIndex(options) {
|
|
56
|
+
const docId = options.docId || options.args?.[0]
|
|
57
|
+
const strategy = options.strategy || "section"
|
|
58
|
+
const provider = options.provider || null
|
|
59
|
+
const model = options.model || null
|
|
60
|
+
const dryRun = !!options.dryRun
|
|
61
|
+
const resume = !!options.resume
|
|
62
|
+
|
|
63
|
+
if (!docId) {
|
|
64
|
+
console.log(
|
|
65
|
+
chalk.yellow("\n⚠️ Especifica un documento a indexar: opl embeddings index <docId>\n")
|
|
66
|
+
)
|
|
67
|
+
return
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
if (!CHUNK_STRATEGIES.includes(strategy)) {
|
|
71
|
+
console.log(
|
|
72
|
+
chalk.yellow(
|
|
73
|
+
`\n⚠️ Estrategia inválida: "${strategy}". Usa: ${CHUNK_STRATEGIES.join(", ")}\n`
|
|
74
|
+
)
|
|
75
|
+
)
|
|
76
|
+
return
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
console.log("")
|
|
80
|
+
console.log(chalk.cyan(`📦 Indexando documento: "${chalk.bold(docId)}"`))
|
|
81
|
+
console.log(chalk.gray(` Estrategia: ${strategy}`))
|
|
82
|
+
if (provider) console.log(chalk.gray(` Proveedor: ${provider}`))
|
|
83
|
+
if (model) console.log(chalk.gray(` Modelo: ${model}`))
|
|
84
|
+
if (dryRun) console.log(chalk.gray(` Modo: ${chalk.yellow("dry-run")} (no persiste)`))
|
|
85
|
+
if (resume) console.log(chalk.gray(` Modo: ${chalk.yellow("resume")} (salta chunks existentes)`))
|
|
86
|
+
console.log("")
|
|
87
|
+
|
|
88
|
+
// Buscar el documento en el conocimiento
|
|
89
|
+
const doc = await findDocument(docId)
|
|
90
|
+
if (!doc) {
|
|
91
|
+
console.log(chalk.red(` ✗ Documento "${docId}" no encontrado en el conocimiento.`))
|
|
92
|
+
console.log(chalk.gray(" Usa el comando: opl index para ver los documentos disponibles."))
|
|
93
|
+
console.log("")
|
|
94
|
+
return
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
// Intentar proveedor
|
|
98
|
+
if (provider) {
|
|
99
|
+
const status = await checkProvider(provider)
|
|
100
|
+
if (!status.available) {
|
|
101
|
+
console.log(chalk.yellow(` ⚠ Proveedor "${provider}" no disponible: ${status.error}`))
|
|
102
|
+
console.log(chalk.gray(" Usa: opl embeddings config --provider transformers"))
|
|
103
|
+
console.log("")
|
|
104
|
+
return
|
|
105
|
+
}
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
const indexOptions = {
|
|
109
|
+
strategy,
|
|
110
|
+
provider,
|
|
111
|
+
model,
|
|
112
|
+
dryRun,
|
|
113
|
+
resume,
|
|
114
|
+
onProgress: (p) => {
|
|
115
|
+
if (p.phase === "done") return
|
|
116
|
+
const icon =
|
|
117
|
+
p.phase === "chunking"
|
|
118
|
+
? "📄"
|
|
119
|
+
: p.phase === "embedding"
|
|
120
|
+
? "🧠"
|
|
121
|
+
: p.phase === "storing"
|
|
122
|
+
? "💾"
|
|
123
|
+
: "•"
|
|
124
|
+
const percent = p.percent !== undefined ? ` ${p.percent}%` : ""
|
|
125
|
+
console.log(
|
|
126
|
+
` ${icon} ${p.phase.charAt(0).toUpperCase() + p.phase.slice(1)}: ${p.current}/${p.total}${percent}${p.currentChunk ? ` (${p.currentChunk})` : ""}`
|
|
127
|
+
)
|
|
128
|
+
},
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
const result = await indexDocument(doc, indexOptions)
|
|
132
|
+
|
|
133
|
+
console.log("")
|
|
134
|
+
if (result.success) {
|
|
135
|
+
console.log(chalk.green(` ✅ Documento indexado: ${result.docId}`))
|
|
136
|
+
} else {
|
|
137
|
+
console.log(chalk.yellow(` ⚠️ Indexación parcial: ${result.docId}`))
|
|
138
|
+
}
|
|
139
|
+
console.log(chalk.gray(` ─────────────────────────────────`))
|
|
140
|
+
console.log(chalk.gray(` Total chunks: ${result.totalChunks}`))
|
|
141
|
+
console.log(chalk.gray(` Indexados: ${result.indexedChunks}`))
|
|
142
|
+
console.log(chalk.gray(` Saltados: ${result.skippedChunks}`))
|
|
143
|
+
console.log(chalk.gray(` Fallos: ${result.failedChunks}`))
|
|
144
|
+
console.log(chalk.gray(` Duración: ${result.durationMs}ms`))
|
|
145
|
+
console.log(chalk.gray(` Estrategia: ${result.strategy}`))
|
|
146
|
+
console.log(chalk.gray(` Modelo: ${result.model}`))
|
|
147
|
+
console.log(chalk.gray(` Tokens totales: ${result.totalTokens}`))
|
|
148
|
+
console.log("")
|
|
149
|
+
|
|
150
|
+
if (result.errors.length > 0) {
|
|
151
|
+
console.log(chalk.yellow(" Errores:"))
|
|
152
|
+
for (const err of result.errors.slice(0, 5)) {
|
|
153
|
+
console.log(chalk.gray(` ${err.id}: ${err.error}`))
|
|
154
|
+
}
|
|
155
|
+
if (result.errors.length > 5) {
|
|
156
|
+
console.log(chalk.gray(` ... y ${result.errors.length - 5} más`))
|
|
157
|
+
}
|
|
158
|
+
console.log("")
|
|
159
|
+
}
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
/**
|
|
163
|
+
* Muestra el estado del índice de embeddings.
|
|
164
|
+
*/
|
|
165
|
+
async function cmdStatus() {
|
|
166
|
+
console.log("")
|
|
167
|
+
console.log(chalk.cyan("📊 Estado del índice de embeddings"))
|
|
168
|
+
console.log("")
|
|
169
|
+
|
|
170
|
+
const stats = getEmbeddingStats()
|
|
171
|
+
|
|
172
|
+
if (stats.totalEmbeddings === 0) {
|
|
173
|
+
console.log(chalk.yellow(" No hay documentos indexados."))
|
|
174
|
+
console.log(chalk.gray(" Usa: opl embeddings index <docId>"))
|
|
175
|
+
console.log("")
|
|
176
|
+
return
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
console.log(chalk.gray(` ┌─────────────────────────────────────────────┐`))
|
|
180
|
+
console.log(
|
|
181
|
+
chalk.gray(" │ ") +
|
|
182
|
+
chalk.white("Chunks indexados:".padEnd(25)) +
|
|
183
|
+
chalk.bold(String(stats.totalEmbeddings).padStart(18)) +
|
|
184
|
+
chalk.gray(" │")
|
|
185
|
+
)
|
|
186
|
+
console.log(
|
|
187
|
+
chalk.gray(" │ ") +
|
|
188
|
+
chalk.white("Documentos:".padEnd(25)) +
|
|
189
|
+
chalk.bold(String(stats.totalDocs).padStart(18)) +
|
|
190
|
+
chalk.gray(" │")
|
|
191
|
+
)
|
|
192
|
+
console.log(
|
|
193
|
+
chalk.gray(" │ ") +
|
|
194
|
+
chalk.white("Dimensión:".padEnd(25)) +
|
|
195
|
+
chalk.bold(String(stats.dimension).padStart(18)) +
|
|
196
|
+
chalk.gray(" │")
|
|
197
|
+
)
|
|
198
|
+
console.log(
|
|
199
|
+
chalk.gray(" │ ") +
|
|
200
|
+
chalk.white("Modelo:".padEnd(25)) +
|
|
201
|
+
chalk.bold((stats.model || "—").padStart(18)) +
|
|
202
|
+
chalk.gray(" │")
|
|
203
|
+
)
|
|
204
|
+
console.log(
|
|
205
|
+
chalk.gray(" │ ") +
|
|
206
|
+
chalk.white("Último indexado:".padEnd(25)) +
|
|
207
|
+
chalk.bold((stats.lastIndexed || "—").padStart(18)) +
|
|
208
|
+
chalk.gray(" │")
|
|
209
|
+
)
|
|
210
|
+
console.log(
|
|
211
|
+
chalk.gray(" │ ") +
|
|
212
|
+
chalk.white("Almacenamiento:".padEnd(25)) +
|
|
213
|
+
chalk.bold(formatBytes(stats.storageBytes).padStart(18)) +
|
|
214
|
+
chalk.gray(" │")
|
|
215
|
+
)
|
|
216
|
+
console.log(chalk.gray(` └─────────────────────────────────────────────┘`))
|
|
217
|
+
|
|
218
|
+
// Mostrar proveedor activo
|
|
219
|
+
const activeProvider = getActiveProvider()
|
|
220
|
+
console.log("")
|
|
221
|
+
console.log(chalk.gray(` Proveedor activo: ${activeProvider || "auto-detect"}`))
|
|
222
|
+
console.log("")
|
|
223
|
+
}
|
|
224
|
+
|
|
225
|
+
/**
|
|
226
|
+
* Elimina embeddings de un documento.
|
|
227
|
+
*/
|
|
228
|
+
async function cmdRemove(options) {
|
|
229
|
+
const docId = options.docId || options.args?.[0]
|
|
230
|
+
|
|
231
|
+
if (!docId) {
|
|
232
|
+
console.log(chalk.yellow("\n⚠️ Especifica un documento: opl embeddings remove <docId>\n"))
|
|
233
|
+
return
|
|
234
|
+
}
|
|
235
|
+
|
|
236
|
+
const result = deleteDocumentEmbeddings(docId)
|
|
237
|
+
console.log("")
|
|
238
|
+
if (result.deleted > 0) {
|
|
239
|
+
console.log(chalk.green(` ✅ Eliminados ${result.deleted} chunks del documento "${docId}".`))
|
|
240
|
+
} else {
|
|
241
|
+
console.log(chalk.yellow(` ⚠️ No se encontraron embeddings del documento "${docId}".`))
|
|
242
|
+
}
|
|
243
|
+
console.log("")
|
|
244
|
+
}
|
|
245
|
+
|
|
246
|
+
/**
|
|
247
|
+
* Muestra o configura el proveedor de embeddings.
|
|
248
|
+
*/
|
|
249
|
+
async function cmdConfig(options) {
|
|
250
|
+
const setProvider = options.provider || null
|
|
251
|
+
|
|
252
|
+
if (setProvider) {
|
|
253
|
+
if (!PROVIDERS.includes(setProvider)) {
|
|
254
|
+
console.log(
|
|
255
|
+
chalk.yellow(`\n⚠️ Proveedor inválido: "${setProvider}". Usa: ${PROVIDERS.join(", ")}\n`)
|
|
256
|
+
)
|
|
257
|
+
return
|
|
258
|
+
}
|
|
259
|
+
|
|
260
|
+
const status = await checkProvider(setProvider)
|
|
261
|
+
if (!status.available) {
|
|
262
|
+
console.log(chalk.yellow(`\n⚠️ Proveedor "${setProvider}" no disponible: ${status.error}\n`))
|
|
263
|
+
return
|
|
264
|
+
}
|
|
265
|
+
|
|
266
|
+
setActiveProvider(setProvider)
|
|
267
|
+
console.log(chalk.green(`\n✅ Proveedor cambiado a: ${setProvider}\n`))
|
|
268
|
+
return
|
|
269
|
+
}
|
|
270
|
+
|
|
271
|
+
// Mostrar configuración actual
|
|
272
|
+
console.log("")
|
|
273
|
+
console.log(chalk.cyan("⚙️ Configuración de embeddings"))
|
|
274
|
+
console.log("")
|
|
275
|
+
|
|
276
|
+
const activeProvider = getActiveProvider()
|
|
277
|
+
console.log(chalk.gray(` Proveedor activo: ${activeProvider || "auto-detect"}`))
|
|
278
|
+
console.log("")
|
|
279
|
+
|
|
280
|
+
// Chequear disponibilidad de proveedores
|
|
281
|
+
for (const p of PROVIDERS) {
|
|
282
|
+
const status = await checkProvider(p)
|
|
283
|
+
const icon = status.available ? "✅" : "❌"
|
|
284
|
+
const detail = status.available ? "" : chalk.gray(` (${status.error})`)
|
|
285
|
+
console.log(` ${icon} ${p.padEnd(14)}${detail}`)
|
|
286
|
+
}
|
|
287
|
+
|
|
288
|
+
console.log("")
|
|
289
|
+
console.log(
|
|
290
|
+
chalk.gray(" Para cambiar: opl embeddings config --provider <ollama|transformers>")
|
|
291
|
+
)
|
|
292
|
+
console.log(chalk.gray(" Para indexar: opl embeddings index <docId>"))
|
|
293
|
+
console.log("")
|
|
294
|
+
}
|
|
295
|
+
|
|
296
|
+
// ─── Helpers ───────────────────────────────────────────────────────
|
|
297
|
+
|
|
298
|
+
/**
|
|
299
|
+
* Busca un documento en los archivos de conocimiento del proyecto.
|
|
300
|
+
* Intenta cargar el documento desde la estructura de directorios de conocimiento OPL.
|
|
301
|
+
*
|
|
302
|
+
* @param {string} docId - ID del documento a buscar
|
|
303
|
+
* @returns {Promise<Object|null>}
|
|
304
|
+
*/
|
|
305
|
+
async function findDocument(docId) {
|
|
306
|
+
const { existsSync, readdirSync, readFileSync } = await import("fs")
|
|
307
|
+
const { join } = await import("path")
|
|
308
|
+
const { getProjectKnowledgeDir } = await import("./knowledge-helpers.js")
|
|
309
|
+
|
|
310
|
+
const knowledgeDir = getProjectKnowledgeDir()
|
|
311
|
+
if (!knowledgeDir || !existsSync(knowledgeDir)) return null
|
|
312
|
+
|
|
313
|
+
// 1. Buscar en dominios (knowledge/<domain>/<docId>/)
|
|
314
|
+
const domains = readdirSync(knowledgeDir, { withFileTypes: true }).filter((d) => d.isDirectory())
|
|
315
|
+
|
|
316
|
+
for (const domain of domains) {
|
|
317
|
+
const domainPath = join(knowledgeDir, domain.name)
|
|
318
|
+
const entries = readdirSync(domainPath, { withFileTypes: true })
|
|
319
|
+
|
|
320
|
+
for (const entry of entries) {
|
|
321
|
+
if (!entry.isDirectory()) continue
|
|
322
|
+
|
|
323
|
+
if (entry.name === docId || entry.name.includes(docId)) {
|
|
324
|
+
const docPath = join(domainPath, entry.name)
|
|
325
|
+
const docFiles = readdirSync(docPath).filter(
|
|
326
|
+
(f) => f.endsWith(".json") || f.endsWith(".md")
|
|
327
|
+
)
|
|
328
|
+
|
|
329
|
+
for (const file of docFiles) {
|
|
330
|
+
if (file.endsWith(".json")) {
|
|
331
|
+
try {
|
|
332
|
+
const data = JSON.parse(readFileSync(join(docPath, file), "utf-8"))
|
|
333
|
+
return normalizeDocument(data, entry.name)
|
|
334
|
+
} catch {
|
|
335
|
+
/* continuar */
|
|
336
|
+
}
|
|
337
|
+
}
|
|
338
|
+
}
|
|
339
|
+
|
|
340
|
+
if (docFiles.length > 0) {
|
|
341
|
+
const chapters = buildChaptersFromMarkdown(docPath, docFiles, readFileSync, join)
|
|
342
|
+
if (chapters.length > 0) {
|
|
343
|
+
return { id: entry.name, title: entry.name.replace(/-/g, " "), chapters }
|
|
344
|
+
}
|
|
345
|
+
}
|
|
346
|
+
}
|
|
347
|
+
}
|
|
348
|
+
}
|
|
349
|
+
|
|
350
|
+
// 2. Buscar en processed/extractions/<docId>/ (documentos extraídos de PDFs)
|
|
351
|
+
const extractionsDir = join(knowledgeDir, "processed", "extractions")
|
|
352
|
+
if (existsSync(extractionsDir)) {
|
|
353
|
+
const extractions = readdirSync(extractionsDir, { withFileTypes: true }).filter((d) =>
|
|
354
|
+
d.isDirectory()
|
|
355
|
+
)
|
|
356
|
+
for (const entry of extractions) {
|
|
357
|
+
if (entry.name.includes(docId)) {
|
|
358
|
+
const docPath = join(extractionsDir, entry.name)
|
|
359
|
+
const files = readdirSync(docPath).filter((f) => f.endsWith(".md") || f.endsWith(".json"))
|
|
360
|
+
|
|
361
|
+
// Intentar construir desde full.md
|
|
362
|
+
const fullMd = files.find((f) => f === "full.md")
|
|
363
|
+
if (fullMd) {
|
|
364
|
+
const content = readFileSync(join(docPath, fullMd), "utf-8")
|
|
365
|
+
return buildDocumentFromFullMd(docId, content)
|
|
366
|
+
}
|
|
367
|
+
|
|
368
|
+
// Fallback: todos los .md como capítulos
|
|
369
|
+
const chapters = buildChaptersFromMarkdown(docPath, files, readFileSync, join)
|
|
370
|
+
if (chapters.length > 0) {
|
|
371
|
+
return { id: entry.name, title: entry.name.replace(/-/g, " "), chapters }
|
|
372
|
+
}
|
|
373
|
+
}
|
|
374
|
+
}
|
|
375
|
+
}
|
|
376
|
+
|
|
377
|
+
// 3. Buscar en sources/chapters/<docId>/ (capítulos de PDFs procesados)
|
|
378
|
+
const chaptersDir = join(knowledgeDir, "sources", "chapters")
|
|
379
|
+
if (existsSync(chaptersDir)) {
|
|
380
|
+
const chapterDocs = readdirSync(chaptersDir, { withFileTypes: true }).filter((d) =>
|
|
381
|
+
d.isDirectory()
|
|
382
|
+
)
|
|
383
|
+
for (const entry of chapterDocs) {
|
|
384
|
+
if (entry.name.includes(docId)) {
|
|
385
|
+
const docPath = join(chaptersDir, entry.name)
|
|
386
|
+
const files = readdirSync(docPath)
|
|
387
|
+
.filter((f) => f.endsWith(".md"))
|
|
388
|
+
.sort()
|
|
389
|
+
const chapters = buildChaptersFromMarkdown(docPath, files, readFileSync, join)
|
|
390
|
+
if (chapters.length > 0) {
|
|
391
|
+
return { id: entry.name, title: entry.name.replace(/-/g, " "), chapters }
|
|
392
|
+
}
|
|
393
|
+
}
|
|
394
|
+
}
|
|
395
|
+
}
|
|
396
|
+
|
|
397
|
+
return null
|
|
398
|
+
}
|
|
399
|
+
|
|
400
|
+
/**
|
|
401
|
+
* Construye capítulos desde archivos markdown en un directorio.
|
|
402
|
+
*
|
|
403
|
+
* @param {string} dirPath
|
|
404
|
+
* @param {string[]} files
|
|
405
|
+
* @param {Function} readFileSync - fs.readFileSync
|
|
406
|
+
* @param {Function} pathJoin - path.join
|
|
407
|
+
* @returns {Array<{ index: number, title: string, content: string }>}
|
|
408
|
+
*/
|
|
409
|
+
function buildChaptersFromMarkdown(dirPath, files, readFileSync, pathJoin) {
|
|
410
|
+
const chapters = []
|
|
411
|
+
for (const file of files.sort()) {
|
|
412
|
+
if (file.endsWith(".md")) {
|
|
413
|
+
const content = readFileSync(pathJoin(dirPath, file), "utf-8")
|
|
414
|
+
const title = file.replace(/\.md$/, "").replace(/^\d+-/, "").replace(/-/g, " ")
|
|
415
|
+
chapters.push({
|
|
416
|
+
index: chapters.length,
|
|
417
|
+
title: title.charAt(0).toUpperCase() + title.slice(1),
|
|
418
|
+
content,
|
|
419
|
+
})
|
|
420
|
+
}
|
|
421
|
+
}
|
|
422
|
+
return chapters
|
|
423
|
+
}
|
|
424
|
+
|
|
425
|
+
/**
|
|
426
|
+
* Construye un documento desde un full.md (documento extraído completo).
|
|
427
|
+
* Detecta capítulos por ## headings o # headings.
|
|
428
|
+
*
|
|
429
|
+
* @param {string} docId
|
|
430
|
+
* @param {string} content
|
|
431
|
+
* @returns {Object|null}
|
|
432
|
+
*/
|
|
433
|
+
function buildDocumentFromFullMd(docId, content) {
|
|
434
|
+
const lines = content.split("\n")
|
|
435
|
+
let title = ""
|
|
436
|
+
const chapters = []
|
|
437
|
+
let currentTitle = "Contenido"
|
|
438
|
+
let currentContent = []
|
|
439
|
+
|
|
440
|
+
for (const line of lines) {
|
|
441
|
+
// Primer # heading es el título del documento
|
|
442
|
+
const h1Match = line.match(/^# (.+)/)
|
|
443
|
+
if (h1Match && !title) {
|
|
444
|
+
title = h1Match[1]
|
|
445
|
+
continue
|
|
446
|
+
}
|
|
447
|
+
|
|
448
|
+
// ## headings marcan capítulos
|
|
449
|
+
const h2Match = line.match(/^## (.+)/)
|
|
450
|
+
if (h2Match) {
|
|
451
|
+
if (currentContent.length > 0) {
|
|
452
|
+
chapters.push({
|
|
453
|
+
index: chapters.length,
|
|
454
|
+
title: currentTitle,
|
|
455
|
+
content: currentContent.join("\n").trim(),
|
|
456
|
+
})
|
|
457
|
+
currentContent = []
|
|
458
|
+
}
|
|
459
|
+
currentTitle = h2Match[1]
|
|
460
|
+
} else {
|
|
461
|
+
currentContent.push(line)
|
|
462
|
+
}
|
|
463
|
+
}
|
|
464
|
+
|
|
465
|
+
// Último capítulo
|
|
466
|
+
if (currentContent.length > 0) {
|
|
467
|
+
chapters.push({
|
|
468
|
+
index: chapters.length,
|
|
469
|
+
title: currentTitle,
|
|
470
|
+
content: currentContent.join("\n").trim(),
|
|
471
|
+
})
|
|
472
|
+
}
|
|
473
|
+
|
|
474
|
+
if (chapters.length === 0) {
|
|
475
|
+
chapters.push({ index: 0, title: title || docId, content: content.trim() })
|
|
476
|
+
}
|
|
477
|
+
|
|
478
|
+
return {
|
|
479
|
+
id: docId,
|
|
480
|
+
title: title || docId,
|
|
481
|
+
chapters,
|
|
482
|
+
}
|
|
483
|
+
}
|
|
484
|
+
|
|
485
|
+
/**
|
|
486
|
+
* Normaliza un documento JSON al formato esperado por el pipeline.
|
|
487
|
+
*
|
|
488
|
+
* @param {Object} data
|
|
489
|
+
* @param {string} docId
|
|
490
|
+
* @returns {Object|null}
|
|
491
|
+
*/
|
|
492
|
+
function normalizeDocument(data, docId) {
|
|
493
|
+
if (!data) return null
|
|
494
|
+
|
|
495
|
+
// Si ya tiene el formato opl (id, chapters[])
|
|
496
|
+
if (data.chapters && Array.isArray(data.chapters)) {
|
|
497
|
+
return {
|
|
498
|
+
id: data.id || docId,
|
|
499
|
+
title: data.title || docId,
|
|
500
|
+
chapters: data.chapters.map((ch, i) => ({
|
|
501
|
+
index: ch.index ?? i,
|
|
502
|
+
title: ch.title || "",
|
|
503
|
+
content: typeof ch === "string" ? ch : ch.content || "",
|
|
504
|
+
})),
|
|
505
|
+
}
|
|
506
|
+
}
|
|
507
|
+
|
|
508
|
+
// Si tiene content directamente, crear un capítulo único
|
|
509
|
+
if (data.content) {
|
|
510
|
+
return {
|
|
511
|
+
id: data.id || docId,
|
|
512
|
+
title: data.title || docId,
|
|
513
|
+
chapters: [{ index: 0, title: data.title || docId, content: data.content }],
|
|
514
|
+
}
|
|
515
|
+
}
|
|
516
|
+
|
|
517
|
+
return null
|
|
518
|
+
}
|
|
519
|
+
|
|
520
|
+
/**
|
|
521
|
+
* Formatea bytes a representación legible.
|
|
522
|
+
*
|
|
523
|
+
* @param {number} bytes
|
|
524
|
+
* @returns {string}
|
|
525
|
+
*/
|
|
526
|
+
function formatBytes(bytes) {
|
|
527
|
+
if (bytes === 0) return "0 B"
|
|
528
|
+
const units = ["B", "KB", "MB", "GB"]
|
|
529
|
+
const i = Math.floor(Math.log(bytes) / Math.log(1024))
|
|
530
|
+
return `${(bytes / Math.pow(1024, i)).toFixed(1)} ${units[i]}`
|
|
531
|
+
}
|
|
532
|
+
|
|
533
|
+
/**
|
|
534
|
+
* Muestra ayuda del comando embeddings.
|
|
535
|
+
*/
|
|
536
|
+
function showHelp() {
|
|
537
|
+
console.log("")
|
|
538
|
+
console.log(chalk.cyan("🧠 Comando: opl embeddings"))
|
|
539
|
+
console.log("")
|
|
540
|
+
console.log(chalk.white(" Subcomandos:"))
|
|
541
|
+
console.log(chalk.gray(" index <docId> Indexar un documento (chunk → embed → store)"))
|
|
542
|
+
console.log(chalk.gray(" status Mostrar estado del índice de embeddings"))
|
|
543
|
+
console.log(chalk.gray(" remove <docId> Eliminar embeddings de un documento"))
|
|
544
|
+
console.log(chalk.gray(" config Ver/configurar proveedor de embeddings"))
|
|
545
|
+
console.log("")
|
|
546
|
+
console.log(chalk.gray(" Opciones para index:"))
|
|
547
|
+
console.log(
|
|
548
|
+
chalk.gray(" --strategy <s> Estrategia: section, paragraph, fixed (default: section)")
|
|
549
|
+
)
|
|
550
|
+
console.log(
|
|
551
|
+
chalk.gray(" --provider <p> Proveedor: ollama, transformers (default: auto-detect)")
|
|
552
|
+
)
|
|
553
|
+
console.log(chalk.gray(" --dry-run Simular sin persistir"))
|
|
554
|
+
console.log(chalk.gray(" --resume Saltar chunks ya indexados"))
|
|
555
|
+
console.log("")
|
|
556
|
+
}
|
package/src/commands/opl-help.js
CHANGED
|
@@ -108,7 +108,25 @@ export async function showWelcome(options = {}) {
|
|
|
108
108
|
` ${chalk.cyan("opl validate")} ${chalk.gray("→")} ${chalk.white("Validar anotaciones del proyecto")}`
|
|
109
109
|
)
|
|
110
110
|
console.log(
|
|
111
|
-
` ${chalk.cyan("opl workflow")} ${chalk.gray("→")} ${chalk.white("Workflow
|
|
111
|
+
` ${chalk.cyan("opl workflow")} ${chalk.gray("→")} ${chalk.white("Workflow select | discovery | spec | delivery | close")}`
|
|
112
|
+
)
|
|
113
|
+
console.log(
|
|
114
|
+
` ${chalk.cyan("opl workflow select")} ${chalk.gray("→")} ${chalk.white("IA selecciona workflow óptimo según descripción de la tarea")}`
|
|
115
|
+
)
|
|
116
|
+
console.log(
|
|
117
|
+
` ${chalk.cyan("opl epic")} ${chalk.gray("→")} ${chalk.white("Épicas: create | list | show | close | status")}`
|
|
118
|
+
)
|
|
119
|
+
console.log(
|
|
120
|
+
` ${chalk.cyan("opl sprint")} ${chalk.gray("→")} ${chalk.white("Sprints: create | list | plan | close")}`
|
|
121
|
+
)
|
|
122
|
+
console.log(
|
|
123
|
+
` ${chalk.cyan("opl ticket")} ${chalk.gray("→")} ${chalk.white("Tickets: create | list | close (bugs y tareas)")}`
|
|
124
|
+
)
|
|
125
|
+
console.log(
|
|
126
|
+
` ${chalk.cyan("opl embeddings")} ${chalk.gray("→")} ${chalk.white("Indexar / status / remove / config (vectores semánticos)")}`
|
|
127
|
+
)
|
|
128
|
+
console.log(
|
|
129
|
+
` ${chalk.cyan("opl webscrape")} ${chalk.gray("→")} ${chalk.white("Extraer contenido web → knowledge + embeddings")}`
|
|
112
130
|
)
|
|
113
131
|
console.log(
|
|
114
132
|
` ${chalk.cyan("opl teach")} ${chalk.gray("→")} ${chalk.white("Sistema de enseñanza: assess, study, project-guide, template")}`
|
|
@@ -137,13 +155,19 @@ export async function showWelcome(options = {}) {
|
|
|
137
155
|
` ${chalk.green("$")} ${chalk.cyan("opl system list")} ${chalk.gray("—")} ${chalk.white("Ver sistemas de conocimiento (agrupaciones)")}`
|
|
138
156
|
)
|
|
139
157
|
console.log(
|
|
140
|
-
` ${chalk.green("$")} ${chalk.cyan("opl workflow
|
|
158
|
+
` ${chalk.green("$")} ${chalk.cyan("opl workflow select")} ${chalk.gray("—")} ${chalk.white("Seleccionar workflow óptimo para tu tarea")}`
|
|
159
|
+
)
|
|
160
|
+
console.log(
|
|
161
|
+
` ${chalk.green("$")} ${chalk.cyan("opl workflow delivery")} ${chalk.gray("—")} ${chalk.white("Iniciar desarrollo con tickets")}`
|
|
141
162
|
)
|
|
142
163
|
console.log(` ${chalk.green("$")} ${chalk.cyan("opl teach progress")} ${chalk.gray("—")} ${chalk.white("Ver tu progreso de aprendizaje consolidado")}
|
|
143
164
|
${chalk.green("$")} ${chalk.cyan("opl teach assess")} ${chalk.gray("—")} ${chalk.white("Diagnosticar tu nivel en un concepto")}`)
|
|
144
165
|
console.log(
|
|
145
166
|
` ${chalk.green("$")} ${chalk.cyan("opl assess")} ${chalk.gray("—")} ${chalk.white("Evaluar qué tan listo para producción estás")}`
|
|
146
167
|
)
|
|
168
|
+
console.log(
|
|
169
|
+
` ${chalk.green("$")} ${chalk.cyan("opl embeddings status")} ${chalk.gray("—")} ${chalk.white("Ver estado de los vectores semánticos")}`
|
|
170
|
+
)
|
|
147
171
|
console.log("")
|
|
148
172
|
|
|
149
173
|
// Ayuda adicional
|