openprompt-lang 1.2.6 → 1.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +62 -8
- package/docs/EMBEDDINGS.md +214 -0
- package/docs/FRAMEWORK.md +52 -0
- package/docs/ONBOARDING_WORKFLOW.md +151 -0
- package/docs/OPL-ERRORES.md +504 -0
- package/docs/OPL_ACADEMIC_ISSUES.md +158 -0
- package/docs/WEB_SCRAPER_PLAN.md +454 -0
- package/package.json +7 -1
- package/scripts/postinstall.js +37 -0
- package/src/cli/commands-knowledge.js +1 -0
- package/src/cli/commands-opl.js +79 -1
- package/src/cli/commands-work.js +3 -1
- package/src/cli/commands-workflow.js +125 -6
- package/src/commands/init-core.js +188 -12
- package/src/commands/init-existing.js +13 -6
- package/src/commands/init-helpers.js +20 -14
- package/src/commands/knowledge-ops.js +52 -0
- package/src/commands/opl-embeddings.js +556 -0
- package/src/commands/opl-help.js +26 -2
- package/src/commands/opl-search.js +106 -2
- package/src/commands/opl-webscrape.js +390 -0
- package/src/commands/work-context.js +17 -0
- package/src/commands/workflow/close/index.js +2 -1
- package/src/commands/workflow/delivery/index.js +4 -0
- package/src/commands/workflow/discovery/index.js +4 -0
- package/src/commands/workflow/epic-cli.js +192 -0
- package/src/commands/workflow/select.js +146 -0
- package/src/commands/workflow/specification/index.js +4 -0
- package/src/commands/workflow/sprint-cli.js +174 -0
- package/src/core/engine/sandbox.js +7 -3
- package/src/core/webscrape/analyzer.js +481 -0
- package/src/core/webscrape/deep-scraper.js +1027 -0
- package/src/core/workflow/epic-manager.js +845 -0
- package/src/core/workflow/gates.js +180 -1
- package/src/core/workflow/selector.js +707 -0
- package/src/embeddings/chunker.js +450 -0
- package/src/embeddings/embedder.js +431 -0
- package/src/embeddings/index-pipeline.js +320 -0
- package/src/embeddings/vector-store.js +505 -0
- package/src/mcp-plan-server.js +12 -5
- package/src/mcp-shared-state.js +25 -0
- package/src/mcp-refactor/mcp-server.js +0 -171
- package/src/mcp-server-backup.js +0 -1913
|
@@ -8,6 +8,8 @@ import { join } from "path"
|
|
|
8
8
|
import chalk from "chalk"
|
|
9
9
|
import { getProjectKnowledgeDir, getDomainsFromTaxonomy, getBookMeta } from "./knowledge-helpers.js"
|
|
10
10
|
import { systemDetect, getSystemTags } from "./opl-system.js"
|
|
11
|
+
import { hybridSearch, getEmbeddingStats } from "../embeddings/vector-store.js"
|
|
12
|
+
import { embed } from "../embeddings/embedder.js"
|
|
11
13
|
|
|
12
14
|
// ──────────────────────────────────────────────
|
|
13
15
|
// Búsqueda por tags (rápida)
|
|
@@ -183,6 +185,66 @@ function searchSemantic(query, domains) {
|
|
|
183
185
|
return results.sort((a, b) => b.matchCount - a.matchCount).slice(0, 15)
|
|
184
186
|
}
|
|
185
187
|
|
|
188
|
+
// ──────────────────────────────────────────────
|
|
189
|
+
// Búsqueda vectorial (embeddings)
|
|
190
|
+
// ──────────────────────────────────────────────
|
|
191
|
+
|
|
192
|
+
async function searchVector(query, domains) {
|
|
193
|
+
try {
|
|
194
|
+
// 1. Generar embedding de la consulta
|
|
195
|
+
let queryVector
|
|
196
|
+
try {
|
|
197
|
+
queryVector = await embed(query)
|
|
198
|
+
} catch {
|
|
199
|
+
// Embedder no disponible
|
|
200
|
+
return { vectorResults: [], stats: null }
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
if (!queryVector || queryVector.length === 0) return { vectorResults: [], stats: null }
|
|
204
|
+
|
|
205
|
+
// 2. Buscar en vector store
|
|
206
|
+
const vectorResults = await hybridSearch(query, queryVector, { topK: 10 })
|
|
207
|
+
|
|
208
|
+
// 3. Mapear chunks a documentos
|
|
209
|
+
const docMap = new Map()
|
|
210
|
+
for (const vr of vectorResults) {
|
|
211
|
+
const key = vr.docId
|
|
212
|
+
if (!docMap.has(key)) {
|
|
213
|
+
docMap.set(key, {
|
|
214
|
+
domain: "knowledge",
|
|
215
|
+
id: vr.docId,
|
|
216
|
+
title: vr.docTitle || vr.docId,
|
|
217
|
+
type: "vector",
|
|
218
|
+
score: vr.score,
|
|
219
|
+
matchCount: 1,
|
|
220
|
+
chunks: [],
|
|
221
|
+
})
|
|
222
|
+
}
|
|
223
|
+
const entry = docMap.get(key)
|
|
224
|
+
entry.matchCount++
|
|
225
|
+
entry.chunks.push({
|
|
226
|
+
content: vr.content.slice(0, 120),
|
|
227
|
+
score: vr.score,
|
|
228
|
+
chapterTitle: vr.chapterTitle,
|
|
229
|
+
})
|
|
230
|
+
// Actualizar score al máximo
|
|
231
|
+
if (vr.score > entry.score) entry.score = vr.score
|
|
232
|
+
}
|
|
233
|
+
|
|
234
|
+
// 4. Obtener stats
|
|
235
|
+
const stats = getEmbeddingStats()
|
|
236
|
+
|
|
237
|
+
return {
|
|
238
|
+
vectorResults: Array.from(docMap.values())
|
|
239
|
+
.sort((a, b) => b.score - a.score)
|
|
240
|
+
.slice(0, 10),
|
|
241
|
+
stats,
|
|
242
|
+
}
|
|
243
|
+
} catch {
|
|
244
|
+
return { vectorResults: [], stats: null }
|
|
245
|
+
}
|
|
246
|
+
}
|
|
247
|
+
|
|
186
248
|
// ──────────────────────────────────────────────
|
|
187
249
|
// Main search function
|
|
188
250
|
// ──────────────────────────────────────────────
|
|
@@ -196,6 +258,9 @@ export async function search(query, options = {}) {
|
|
|
196
258
|
console.log(chalk.gray(" --mode tags → Búsqueda rápida por etiquetas"))
|
|
197
259
|
console.log(chalk.gray(" --mode fulltext → Búsqueda literal en texto"))
|
|
198
260
|
console.log(chalk.gray(" --mode semantic → Búsqueda por concepto/significado"))
|
|
261
|
+
console.log(
|
|
262
|
+
chalk.gray(" --mode vector → Búsqueda semántica con embeddings (requiere Ollama)")
|
|
263
|
+
)
|
|
199
264
|
console.log(chalk.gray(" --mode hybrid → Combinación de todos (default)"))
|
|
200
265
|
console.log("")
|
|
201
266
|
return
|
|
@@ -208,6 +273,7 @@ export async function search(query, options = {}) {
|
|
|
208
273
|
|
|
209
274
|
const domains = getDomainsFromTaxonomy()
|
|
210
275
|
const results = []
|
|
276
|
+
let vectorStats = null
|
|
211
277
|
|
|
212
278
|
if (mode === "tags" || mode === "hybrid") {
|
|
213
279
|
const tagResults = searchByTags(query, domains)
|
|
@@ -224,6 +290,12 @@ export async function search(query, options = {}) {
|
|
|
224
290
|
results.push(...semResults.map((r) => ({ ...r, mode: "semantic" })))
|
|
225
291
|
}
|
|
226
292
|
|
|
293
|
+
if (mode === "vector" || mode === "hybrid") {
|
|
294
|
+
const { vectorResults, stats } = await searchVector(query, domains)
|
|
295
|
+
results.push(...vectorResults.map((r) => ({ ...r, mode: "vector" })))
|
|
296
|
+
vectorStats = stats
|
|
297
|
+
}
|
|
298
|
+
|
|
227
299
|
// Deduplicar y ordenar
|
|
228
300
|
const seen = new Set()
|
|
229
301
|
const unique = results
|
|
@@ -240,14 +312,34 @@ export async function search(query, options = {}) {
|
|
|
240
312
|
console.log(chalk.gray(" Sugerencias:"))
|
|
241
313
|
console.log(chalk.gray(" - Prueba con términos más generales"))
|
|
242
314
|
console.log(chalk.gray(" - Usa --mode semantic para buscar por concepto"))
|
|
315
|
+
console.log(chalk.gray(" - Usa --mode vector para búsqueda semántica con IA"))
|
|
243
316
|
console.log(chalk.gray(" - Usa opl index para navegar manualmente"))
|
|
317
|
+
|
|
318
|
+
// Mostrar estado de embeddings
|
|
319
|
+
if (vectorStats) {
|
|
320
|
+
console.log("")
|
|
321
|
+
console.log(chalk.gray(" Estado de embeddings:"))
|
|
322
|
+
console.log(
|
|
323
|
+
chalk.gray(
|
|
324
|
+
` 📊 ${vectorStats.totalEmbeddings} chunks indexados de ${vectorStats.totalDocs} documentos`
|
|
325
|
+
)
|
|
326
|
+
)
|
|
327
|
+
}
|
|
328
|
+
|
|
244
329
|
console.log("")
|
|
245
330
|
return
|
|
246
331
|
}
|
|
247
332
|
|
|
248
|
-
const modeIcons = { tags: "🏷️", fulltext: "📄", semantic: "🧠", hybrid: "🔀" }
|
|
333
|
+
const modeIcons = { tags: "🏷️", fulltext: "📄", semantic: "🧠", vector: "🔬", hybrid: "🔀" }
|
|
249
334
|
|
|
250
335
|
console.log(chalk.bold(` ${unique.length} resultado(s):`))
|
|
336
|
+
if (vectorStats && vectorStats.totalEmbeddings > 0) {
|
|
337
|
+
console.log(
|
|
338
|
+
chalk.gray(
|
|
339
|
+
` 📊 ${vectorStats.totalEmbeddings} chunks · ${vectorStats.totalDocs} docs · modelo: ${vectorStats.model || "—"}`
|
|
340
|
+
)
|
|
341
|
+
)
|
|
342
|
+
}
|
|
251
343
|
console.log("")
|
|
252
344
|
|
|
253
345
|
for (let i = 0; i < unique.length; i++) {
|
|
@@ -257,9 +349,14 @@ export async function search(query, options = {}) {
|
|
|
257
349
|
const title = (r.title || "").length > 50 ? `${(r.title || "").slice(0, 47)}...` : r.title || ""
|
|
258
350
|
const chapter = r.chapter ? chalk.gray(` → ${r.chapter.slice(0, 50)}`) : ""
|
|
259
351
|
const terms = r.terms ? chalk.gray(` [${r.terms.join(", ")}]`) : ""
|
|
352
|
+
const score = r.score !== undefined ? chalk.gray(` sim: ${(r.score * 100).toFixed(0)}%`) : ""
|
|
260
353
|
|
|
261
|
-
console.log(` ${chalk.cyan(`${num}.`)} ${icon} ${chalk.white(title)}`)
|
|
354
|
+
console.log(` ${chalk.cyan(`${num}.`)} ${icon} ${chalk.white(title)}${score}`)
|
|
262
355
|
console.log(` ${chalk.gray("Dominio:")} ${r.domain}${chapter}${terms}`)
|
|
356
|
+
if (r.chunks && r.chunks.length > 0) {
|
|
357
|
+
const topChunk = r.chunks[0]
|
|
358
|
+
console.log(` ${chalk.gray("Chunk:")} "${topChunk.content.slice(0, 80)}..."`)
|
|
359
|
+
}
|
|
263
360
|
console.log(` ${chalk.cyan(`opl read ${r.domain}/${r.id}`)}`)
|
|
264
361
|
console.log("")
|
|
265
362
|
}
|
|
@@ -303,6 +400,13 @@ export async function search(query, options = {}) {
|
|
|
303
400
|
chalk.italic("Para navegar:") +
|
|
304
401
|
chalk.gray(` opl index <dominio> │`)
|
|
305
402
|
)
|
|
403
|
+
if (vectorStats) {
|
|
404
|
+
console.log(
|
|
405
|
+
chalk.gray("│ ") +
|
|
406
|
+
chalk.italic("Embeddings:") +
|
|
407
|
+
chalk.gray(` ${vectorStats.totalEmbeddings} chunks · ${vectorStats.model} │`)
|
|
408
|
+
)
|
|
409
|
+
}
|
|
306
410
|
console.log(chalk.gray("└─────────────────────────────────────────────────────────────┘"))
|
|
307
411
|
console.log("")
|
|
308
412
|
}
|
|
@@ -0,0 +1,390 @@
|
|
|
1
|
+
// @use(kind, contract, limit, deps)
|
|
2
|
+
// @kind(feature)
|
|
3
|
+
// @contract(in: url:string -> out: void, sideEffect: fetch web + crea archivo knowledge + indexa embeddings)
|
|
4
|
+
// @limit(lines: 350)
|
|
5
|
+
// @deps(@mozilla/readability, turndown, node-fetch|undici, ../embeddings/index-pipeline)
|
|
6
|
+
|
|
7
|
+
/**
|
|
8
|
+
* Web Scraper OPL — Extrae contenido web, lo limpia con Ollama y lo indexa.
|
|
9
|
+
*
|
|
10
|
+
* Flujo:
|
|
11
|
+
* 1. GET url → HTML
|
|
12
|
+
* 2. Readability.js → contenido limpio (título + body HTML)
|
|
13
|
+
* 3. Turndown → Markdown
|
|
14
|
+
* 4. (opcional) Ollama → limpieza + estructuración
|
|
15
|
+
* 5. Guardar en knowledge/<dominio>/<id>/
|
|
16
|
+
* 6. Indexar embeddings automáticamente
|
|
17
|
+
*
|
|
18
|
+
* Uso:
|
|
19
|
+
* opl webscrape <url>
|
|
20
|
+
* opl webscrape <url> --domain frontend
|
|
21
|
+
* opl webscrape <url> --no-embed
|
|
22
|
+
* opl webscrape <url> --clean (usa Ollama para limpiar)
|
|
23
|
+
*/
|
|
24
|
+
|
|
25
|
+
import chalk from "chalk"
|
|
26
|
+
|
|
27
|
+
// ─── Constantes ────────────────────────────────────────────────────
|
|
28
|
+
|
|
29
|
+
const DEFAULT_DOMAIN = "web"
|
|
30
|
+
|
|
31
|
+
// ─── Handler principal ─────────────────────────────────────────────
|
|
32
|
+
|
|
33
|
+
/**
|
|
34
|
+
* Ejecuta el web scraper.
|
|
35
|
+
*
|
|
36
|
+
* @param {string} url - URL a scrapear
|
|
37
|
+
* @param {Object} [options]
|
|
38
|
+
* @param {string} [options.domain='web'] - Dominio de conocimiento destino
|
|
39
|
+
* @param {boolean} [options.noEmbed] - Saltar indexación de embeddings
|
|
40
|
+
* @param {boolean} [options.clean] - Limpiar contenido con Ollama
|
|
41
|
+
*/
|
|
42
|
+
export async function webscrape(url, options = {}) {
|
|
43
|
+
if (!url) {
|
|
44
|
+
showHelp()
|
|
45
|
+
return
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
const domain = options.domain || DEFAULT_DOMAIN
|
|
49
|
+
const noEmbed = options.noEmbed === true
|
|
50
|
+
const doClean = options.clean === true
|
|
51
|
+
|
|
52
|
+
// Validar URL
|
|
53
|
+
let parsedUrl
|
|
54
|
+
try {
|
|
55
|
+
parsedUrl = new URL(url)
|
|
56
|
+
} catch {
|
|
57
|
+
console.log(chalk.red(`\n❌ URL inválida: "${url}"\n`))
|
|
58
|
+
return
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
console.log("")
|
|
62
|
+
console.log(chalk.cyan(`🌐 Web Scraper: "${chalk.bold(url)}"`))
|
|
63
|
+
console.log(chalk.gray(` Dominio destino: ${domain}`))
|
|
64
|
+
console.log("")
|
|
65
|
+
|
|
66
|
+
try {
|
|
67
|
+
// ── Fase 1: Fetch ──────────────────────────────────────────
|
|
68
|
+
console.log(chalk.blue(" 📥 Descargando página..."))
|
|
69
|
+
|
|
70
|
+
const response = await fetch(url, {
|
|
71
|
+
headers: {
|
|
72
|
+
"User-Agent": "Mozilla/5.0 (compatible; OPL-WebScraper/1.0; +https://openprompt-lang.dev)",
|
|
73
|
+
Accept: "text/html,application/xhtml+xml",
|
|
74
|
+
},
|
|
75
|
+
signal: AbortSignal.timeout(15000),
|
|
76
|
+
})
|
|
77
|
+
|
|
78
|
+
if (!response.ok) {
|
|
79
|
+
console.log(chalk.red(` ❌ Error HTTP: ${response.status} ${response.statusText}`))
|
|
80
|
+
console.log("")
|
|
81
|
+
return
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
const html = await response.text()
|
|
85
|
+
console.log(chalk.green(` ✅ Descargado: ${(html.length / 1024).toFixed(1)} KB`))
|
|
86
|
+
|
|
87
|
+
// ── Fase 2: Extraer con Readability ────────────────────────
|
|
88
|
+
console.log(chalk.blue(" 📄 Extrayendo contenido..."))
|
|
89
|
+
|
|
90
|
+
let title, content
|
|
91
|
+
try {
|
|
92
|
+
const { JSDOM } = await import("jsdom")
|
|
93
|
+
const dom = new JSDOM(html, { url })
|
|
94
|
+
const reader = new (await import("@mozilla/readability")).Readability(dom.window.document)
|
|
95
|
+
const article = reader.parse()
|
|
96
|
+
|
|
97
|
+
if (!article || !article.content) {
|
|
98
|
+
console.log(chalk.yellow(" ⚠️ Readability no pudo extraer contenido estructurado."))
|
|
99
|
+
console.log(chalk.gray(" Usando raw <body> como fallback..."))
|
|
100
|
+
const body = dom.window.document.querySelector("body")
|
|
101
|
+
title = dom.window.document.title || new URL(url).hostname
|
|
102
|
+
content = body ? body.innerHTML : html
|
|
103
|
+
} else {
|
|
104
|
+
title = article.title
|
|
105
|
+
content = article.content
|
|
106
|
+
}
|
|
107
|
+
} catch (err) {
|
|
108
|
+
console.log(chalk.yellow(` ⚠️ Readability falló: ${err.message}`))
|
|
109
|
+
console.log(chalk.gray(" Usando contenido raw HTML..."))
|
|
110
|
+
title = parsedUrl.hostname
|
|
111
|
+
content = html
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
console.log(chalk.green(` ✅ Extraído: "${title}"`))
|
|
115
|
+
|
|
116
|
+
// ── Fase 3: Convertir a Markdown ───────────────────────────
|
|
117
|
+
console.log(chalk.blue(" 📝 Convirtiendo a Markdown..."))
|
|
118
|
+
|
|
119
|
+
let markdown
|
|
120
|
+
try {
|
|
121
|
+
const TurndownService = (await import("turndown")).default
|
|
122
|
+
const turndown = new TurndownService({
|
|
123
|
+
headingStyle: "atx",
|
|
124
|
+
codeBlockStyle: "fenced",
|
|
125
|
+
emDelimiter: "*",
|
|
126
|
+
})
|
|
127
|
+
markdown = turndown.turndown(content)
|
|
128
|
+
} catch (err) {
|
|
129
|
+
console.log(chalk.yellow(` ⚠️ Turndown falló: ${err.message}`))
|
|
130
|
+
markdown = content.replace(/<[^>]+>/g, "").trim()
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
const wordCount = markdown.split(/\s+/).filter(Boolean).length
|
|
134
|
+
console.log(chalk.green(` ✅ Markdown: ${wordCount} palabras`))
|
|
135
|
+
|
|
136
|
+
// ── Fase 4 (opcional): Limpiar con Ollama ──────────────────
|
|
137
|
+
if (doClean) {
|
|
138
|
+
console.log(chalk.blue(" 🧹 Limpiando con Ollama..."))
|
|
139
|
+
markdown = await cleanWithOllama(markdown, title)
|
|
140
|
+
console.log(chalk.green(" ✅ Contenido limpiado"))
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
// ── Fase 5: Guardar en knowledge ───────────────────────────
|
|
144
|
+
console.log(chalk.blue(" 💾 Guardando en conocimiento..."))
|
|
145
|
+
|
|
146
|
+
const docId = slugify(title)
|
|
147
|
+
const saved = await saveToKnowledge(docId, domain, title, markdown, url)
|
|
148
|
+
|
|
149
|
+
if (!saved) {
|
|
150
|
+
console.log(chalk.red(" ❌ No se pudo guardar el documento."))
|
|
151
|
+
console.log("")
|
|
152
|
+
return
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
console.log(chalk.green(` ✅ Guardado: knowledge/${domain}/${docId}/`))
|
|
156
|
+
console.log(chalk.gray(` ${saved.chapterCount} capítulo(s)`))
|
|
157
|
+
console.log(chalk.gray(` ${saved.filePath}`))
|
|
158
|
+
|
|
159
|
+
// ── Fase 6: Indexar embeddings ─────────────────────────────
|
|
160
|
+
if (!noEmbed) {
|
|
161
|
+
console.log(chalk.blue(" 🧠 Indexando embeddings..."))
|
|
162
|
+
try {
|
|
163
|
+
const { indexDocument } = await import("../embeddings/index-pipeline.js")
|
|
164
|
+
|
|
165
|
+
const doc = {
|
|
166
|
+
id: docId,
|
|
167
|
+
title,
|
|
168
|
+
chapters: saved.chapters.map((ch, i) => ({
|
|
169
|
+
index: i,
|
|
170
|
+
title: ch.title,
|
|
171
|
+
content: ch.content,
|
|
172
|
+
})),
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
const result = await indexDocument(doc, { strategy: "section" })
|
|
176
|
+
|
|
177
|
+
if (result.success) {
|
|
178
|
+
console.log(
|
|
179
|
+
chalk.green(` ✅ Embeddings: ${result.indexedChunks} chunks · ${result.durationMs}ms`)
|
|
180
|
+
)
|
|
181
|
+
} else {
|
|
182
|
+
console.log(
|
|
183
|
+
chalk.yellow(` ⚠️ Embedding parcial: ${result.indexedChunks}/${result.totalChunks}`)
|
|
184
|
+
)
|
|
185
|
+
}
|
|
186
|
+
} catch (embedErr) {
|
|
187
|
+
console.log(chalk.gray(` ⏭️ Embedding no disponible: ${embedErr.message}`))
|
|
188
|
+
}
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
console.log("")
|
|
192
|
+
console.log(chalk.cyan(`✅ Página scrapeada: "${title}"`))
|
|
193
|
+
console.log(chalk.gray(` ${url}`))
|
|
194
|
+
console.log(chalk.gray(` knowledge/${domain}/${docId}/`))
|
|
195
|
+
console.log("")
|
|
196
|
+
} catch (error) {
|
|
197
|
+
console.log(chalk.red(`\n❌ Error: ${error.message}\n`))
|
|
198
|
+
}
|
|
199
|
+
}
|
|
200
|
+
|
|
201
|
+
// ─── Helpers ───────────────────────────────────────────────────────
|
|
202
|
+
|
|
203
|
+
/**
|
|
204
|
+
* Limpia y estructura contenido markdown usando Ollama.
|
|
205
|
+
*
|
|
206
|
+
* @param {string} markdown - Contenido markdown
|
|
207
|
+
* @param {string} title - Título del documento
|
|
208
|
+
* @returns {Promise<string>}
|
|
209
|
+
*/
|
|
210
|
+
async function cleanWithOllama(markdown, title) {
|
|
211
|
+
try {
|
|
212
|
+
const response = await fetch("http://localhost:11434/api/generate", {
|
|
213
|
+
method: "POST",
|
|
214
|
+
headers: { "Content-Type": "application/json" },
|
|
215
|
+
body: JSON.stringify({
|
|
216
|
+
model: "llama3.2",
|
|
217
|
+
prompt: `Limpia y estructura el siguiente contenido web en Markdown.
|
|
218
|
+
Elimina navegación, publicidad, barras laterales y elementos no relevantes.
|
|
219
|
+
Mantén solo el contenido principal bien estructurado con encabezados.
|
|
220
|
+
|
|
221
|
+
Título: ${title}
|
|
222
|
+
|
|
223
|
+
Contenido:
|
|
224
|
+
${markdown.slice(0, 12000)}`,
|
|
225
|
+
stream: false,
|
|
226
|
+
options: { temperature: 0.1, num_predict: 4096 },
|
|
227
|
+
}),
|
|
228
|
+
signal: AbortSignal.timeout(60000),
|
|
229
|
+
})
|
|
230
|
+
|
|
231
|
+
if (!response.ok) return markdown
|
|
232
|
+
|
|
233
|
+
const data = await response.json()
|
|
234
|
+
return data.response || markdown
|
|
235
|
+
} catch {
|
|
236
|
+
return markdown // fallback: devolver sin limpiar
|
|
237
|
+
}
|
|
238
|
+
}
|
|
239
|
+
|
|
240
|
+
/**
|
|
241
|
+
* Guarda el contenido scrapeado en la estructura knowledge/.
|
|
242
|
+
*
|
|
243
|
+
* @param {string} docId
|
|
244
|
+
* @param {string} domain
|
|
245
|
+
* @param {string} title
|
|
246
|
+
* @param {string} markdown
|
|
247
|
+
* @param {string} sourceUrl
|
|
248
|
+
* @returns {Promise<Object|null>}
|
|
249
|
+
*/
|
|
250
|
+
async function saveToKnowledge(docId, domain, title, markdown, sourceUrl) {
|
|
251
|
+
const { existsSync, mkdirSync, writeFileSync } = await import("fs")
|
|
252
|
+
const { join } = await import("path")
|
|
253
|
+
const { getProjectKnowledgeDir } = await import("./knowledge-helpers.js")
|
|
254
|
+
|
|
255
|
+
const knowledgeDir = getProjectKnowledgeDir()
|
|
256
|
+
if (!knowledgeDir) return null
|
|
257
|
+
|
|
258
|
+
const docDir = join(knowledgeDir, domain, docId)
|
|
259
|
+
mkdirSync(docDir, { recursive: true })
|
|
260
|
+
|
|
261
|
+
// Guardar metadata
|
|
262
|
+
const meta = {
|
|
263
|
+
id: docId,
|
|
264
|
+
title,
|
|
265
|
+
source: sourceUrl,
|
|
266
|
+
scrapedAt: new Date().toISOString(),
|
|
267
|
+
domain,
|
|
268
|
+
}
|
|
269
|
+
writeFileSync(join(docDir, "meta.json"), JSON.stringify(meta, null, 2), "utf-8")
|
|
270
|
+
|
|
271
|
+
// Guardar contenido completo
|
|
272
|
+
writeFileSync(join(docDir, "full.md"), markdown, "utf-8")
|
|
273
|
+
|
|
274
|
+
// Detectar capítulos por ## headings
|
|
275
|
+
const chapters = detectChapters(markdown, title)
|
|
276
|
+
const chDir = join(docDir, "chapters")
|
|
277
|
+
mkdirSync(chDir, { recursive: true })
|
|
278
|
+
|
|
279
|
+
for (const ch of chapters) {
|
|
280
|
+
const safeTitle = slugify(ch.title)
|
|
281
|
+
const fileName = `${String(ch.index).padStart(2, "0")}-${safeTitle}.md`
|
|
282
|
+
writeFileSync(join(chDir, fileName), ch.content, "utf-8")
|
|
283
|
+
}
|
|
284
|
+
|
|
285
|
+
return {
|
|
286
|
+
filePath: join(domain, docId),
|
|
287
|
+
chapterCount: chapters.length,
|
|
288
|
+
chapters,
|
|
289
|
+
}
|
|
290
|
+
}
|
|
291
|
+
|
|
292
|
+
/**
|
|
293
|
+
* Detecta capítulos en un texto markdown usando ## headings.
|
|
294
|
+
*
|
|
295
|
+
* @param {string} markdown
|
|
296
|
+
* @param {string} defaultTitle
|
|
297
|
+
* @returns {Array<{ index: number, title: string, content: string }>}
|
|
298
|
+
*/
|
|
299
|
+
function detectChapters(markdown, defaultTitle) {
|
|
300
|
+
const lines = markdown.split("\n")
|
|
301
|
+
const chapters = []
|
|
302
|
+
let currentTitle = defaultTitle || "Introducción"
|
|
303
|
+
let currentContent = []
|
|
304
|
+
|
|
305
|
+
for (const line of lines) {
|
|
306
|
+
const headingMatch = line.match(/^## (.+)/)
|
|
307
|
+
if (headingMatch) {
|
|
308
|
+
if (currentContent.length > 0 || chapters.length > 0) {
|
|
309
|
+
chapters.push({
|
|
310
|
+
index: chapters.length,
|
|
311
|
+
title: currentTitle,
|
|
312
|
+
content: currentContent.join("\n").trim(),
|
|
313
|
+
})
|
|
314
|
+
currentContent = []
|
|
315
|
+
}
|
|
316
|
+
currentTitle = headingMatch[1]
|
|
317
|
+
} else {
|
|
318
|
+
currentContent.push(line)
|
|
319
|
+
}
|
|
320
|
+
}
|
|
321
|
+
|
|
322
|
+
// Último capítulo
|
|
323
|
+
if (currentContent.length > 0) {
|
|
324
|
+
chapters.push({
|
|
325
|
+
index: chapters.length,
|
|
326
|
+
title: currentTitle,
|
|
327
|
+
content: currentContent.join("\n").trim(),
|
|
328
|
+
})
|
|
329
|
+
}
|
|
330
|
+
|
|
331
|
+
// Si no se encontraron headings, crear un capítulo único
|
|
332
|
+
if (chapters.length === 0) {
|
|
333
|
+
chapters.push({
|
|
334
|
+
index: 0,
|
|
335
|
+
title: defaultTitle || "Contenido",
|
|
336
|
+
content: markdown.trim(),
|
|
337
|
+
})
|
|
338
|
+
}
|
|
339
|
+
|
|
340
|
+
return chapters
|
|
341
|
+
}
|
|
342
|
+
|
|
343
|
+
/**
|
|
344
|
+
* Convierte un texto a slug para usar como ID/directorio.
|
|
345
|
+
*
|
|
346
|
+
* @param {string} text
|
|
347
|
+
* @returns {string}
|
|
348
|
+
*/
|
|
349
|
+
function slugify(text) {
|
|
350
|
+
return text
|
|
351
|
+
.toLowerCase()
|
|
352
|
+
.replace(/[^a-z0-9áéíóúüñ\s-]/g, "")
|
|
353
|
+
.replace(/\s+/g, "-")
|
|
354
|
+
.replace(/-+/g, "-")
|
|
355
|
+
.replace(/^-|-$/g, "")
|
|
356
|
+
.slice(0, 80)
|
|
357
|
+
}
|
|
358
|
+
|
|
359
|
+
/**
|
|
360
|
+
* Muestra la ayuda del comando.
|
|
361
|
+
*/
|
|
362
|
+
function showHelp() {
|
|
363
|
+
console.log("")
|
|
364
|
+
console.log(chalk.cyan("🌐 Comando: opl webscrape <url>"))
|
|
365
|
+
console.log("")
|
|
366
|
+
console.log(chalk.white(" Extrae contenido de una página web y lo indexa en el conocimiento."))
|
|
367
|
+
console.log("")
|
|
368
|
+
console.log(chalk.gray(" Opciones:"))
|
|
369
|
+
console.log(chalk.gray(" --domain <d> Dominio de conocimiento (default: web)"))
|
|
370
|
+
console.log(chalk.gray(" --no-embed No indexar embeddings automáticamente"))
|
|
371
|
+
console.log(chalk.gray(" --clean Limpiar contenido con Ollama (requiere Ollama)"))
|
|
372
|
+
console.log(
|
|
373
|
+
chalk.gray(" --deep 🕸️ Deep scrape: navega categorías y subcategorías")
|
|
374
|
+
)
|
|
375
|
+
console.log(chalk.gray(" --max-depth <n> Profundidad máxima para deep scrape (default: 2)"))
|
|
376
|
+
console.log(chalk.gray(" --concurrency <n> Fetchs simultáneos (default: 3)"))
|
|
377
|
+
console.log(chalk.gray(" --verbose Log detallado del proceso"))
|
|
378
|
+
console.log("")
|
|
379
|
+
console.log(chalk.gray(" Ejemplos:"))
|
|
380
|
+
console.log(chalk.gray(" opl webscrape https://react.dev/learn"))
|
|
381
|
+
console.log(chalk.gray(" opl webscrape https://freefrontend.com --deep --max-depth 2"))
|
|
382
|
+
console.log(
|
|
383
|
+
chalk.gray(
|
|
384
|
+
" opl webscrape https://freefrontend.com/tailwind-code-examples --deep --concurrency 5"
|
|
385
|
+
)
|
|
386
|
+
)
|
|
387
|
+
console.log(chalk.gray(" opl webscrape https://example.com --domain frontend"))
|
|
388
|
+
console.log(chalk.gray(" opl webscrape https://example.com --clean"))
|
|
389
|
+
console.log("")
|
|
390
|
+
}
|
|
@@ -603,6 +603,15 @@ async function start(description) {
|
|
|
603
603
|
|
|
604
604
|
writeJSON(SESSION_FILE, session)
|
|
605
605
|
appendLog("session_start", description)
|
|
606
|
+
|
|
607
|
+
// Sincronizar con MCP shared state (workflow delivery → modo execute)
|
|
608
|
+
try {
|
|
609
|
+
const { syncWorkflowPhase } = await import("../mcp-shared-state.js")
|
|
610
|
+
syncWorkflowPhase("DELIVERY")
|
|
611
|
+
} catch {
|
|
612
|
+
/* silencioso */
|
|
613
|
+
}
|
|
614
|
+
|
|
606
615
|
console.log(chalk.green(`\n✅ Sesión iniciada: ${session.session.id}\n`))
|
|
607
616
|
console.log(` ${chalk.bold("Tarea:")} ${description}`)
|
|
608
617
|
console.log(` ${chalk.bold("Rama:")} ${branch}`)
|
|
@@ -824,6 +833,14 @@ async function close() {
|
|
|
824
833
|
writeJSON(SESSION_FILE, session)
|
|
825
834
|
appendLog("session_close", `Sesión cerrada: ${session.session.id}`)
|
|
826
835
|
|
|
836
|
+
// Sincronizar con MCP shared state (workflow close → modo plan de nuevo)
|
|
837
|
+
try {
|
|
838
|
+
const { syncWorkflowPhase } = await import("../mcp-shared-state.js")
|
|
839
|
+
syncWorkflowPhase("CLOSE_SESSION")
|
|
840
|
+
} catch {
|
|
841
|
+
/* silencioso */
|
|
842
|
+
}
|
|
843
|
+
|
|
827
844
|
try {
|
|
828
845
|
const { updateProjectContext } = await import("./project-context.js")
|
|
829
846
|
await updateProjectContext(resolveBase())
|
|
@@ -16,7 +16,7 @@ import {
|
|
|
16
16
|
} from "../../../persistence/sqlite/queries.js"
|
|
17
17
|
import { executeTransition, canTransition } from "../../../core/workflow/transitions.js"
|
|
18
18
|
import { generateSessionDocs, generateIndexDoc } from "../../../docgen/session-docs.js"
|
|
19
|
-
import { setPhase } from "../../../mcp-shared-state.js"
|
|
19
|
+
import { setPhase, syncWorkflowPhase } from "../../../mcp-shared-state.js"
|
|
20
20
|
import chalk from "chalk"
|
|
21
21
|
|
|
22
22
|
/**
|
|
@@ -148,6 +148,7 @@ export async function closeWorkflow(options = {}) {
|
|
|
148
148
|
setMetadata(db, "active_session", "")
|
|
149
149
|
setMetadata(db, "active_ticket", "")
|
|
150
150
|
setPhase("idle", process.cwd())
|
|
151
|
+
syncWorkflowPhase("CLOSE_SESSION")
|
|
151
152
|
|
|
152
153
|
// ─── 3. Generar documentación viva ────────────────────────────────────
|
|
153
154
|
const sessionFiles = generateSessionDocs({
|
|
@@ -13,6 +13,7 @@ import {
|
|
|
13
13
|
createTicket,
|
|
14
14
|
createSession,
|
|
15
15
|
} from "../../../persistence/sqlite/queries.js"
|
|
16
|
+
import { syncWorkflowPhase } from "../../../mcp-shared-state.js"
|
|
16
17
|
import { executeTransition, canTransition } from "../../../core/workflow/transitions.js"
|
|
17
18
|
import { getPhase } from "../../../core/workflow/phases.js"
|
|
18
19
|
import chalk from "chalk"
|
|
@@ -133,6 +134,9 @@ export async function delivery(options = {}) {
|
|
|
133
134
|
setMetadata(db, "active_ticket", ticketId)
|
|
134
135
|
setMetadata(db, "workflow_phase", "DELIVERY")
|
|
135
136
|
|
|
137
|
+
// Sincronizar con MCP shared state: DELIVERY → modo execute
|
|
138
|
+
syncWorkflowPhase("DELIVERY")
|
|
139
|
+
|
|
136
140
|
conn.close()
|
|
137
141
|
|
|
138
142
|
const phaseInfo = getPhase("DELIVERY")
|
|
@@ -12,6 +12,7 @@ import {
|
|
|
12
12
|
canTransition,
|
|
13
13
|
getInitialContext,
|
|
14
14
|
} from "../../../core/workflow/transitions.js"
|
|
15
|
+
import { syncWorkflowPhase } from "../../../mcp-shared-state.js"
|
|
15
16
|
import { runWizard, getWizardResponses } from "../../../wizard/orchestrator.js"
|
|
16
17
|
import chalk from "chalk"
|
|
17
18
|
|
|
@@ -75,6 +76,9 @@ export async function discovery(options = {}) {
|
|
|
75
76
|
setMetadata(db, "workflow_project_id", projectId)
|
|
76
77
|
}
|
|
77
78
|
|
|
79
|
+
// Sincronizar con MCP shared state: DISCOVERY → modo plan
|
|
80
|
+
syncWorkflowPhase("DISCOVERY")
|
|
81
|
+
|
|
78
82
|
conn.close()
|
|
79
83
|
|
|
80
84
|
console.log(chalk.green(`\n✅ Discovery completado para: ${projectId}`))
|