npm - openprompt-lang - Versions diffs - 1.3.0 → 1.4.0 - Mend

openprompt-lang 1.3.0 → 1.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (52) hide show

package/bin/cli.js +2 -0
package/docs/00-ARCHITECTURE/OPL-BOOST-MULTI-AGENT.md +406 -0
package/docs/02-STANDARDS/AGENTS.template.md +89 -0
package/docs/02-STANDARDS/ticket-driven-development.md +99 -0
package/docs/04-TICKETS/BOOST-001-profile-registry.md +66 -0
package/docs/04-TICKETS/BOOST-002-context-compression.md +58 -0
package/docs/04-TICKETS/BOOST-003-template-hydration.md +69 -0
package/docs/04-TICKETS/BOOST-004-fewshot-engine.md +58 -0
package/docs/04-TICKETS/BOOST-005-agent-pool.md +69 -0
package/docs/04-TICKETS/BOOST-006-specialized-agents.md +53 -0
package/docs/04-TICKETS/BOOST-007-validation-loop.md +56 -0
package/docs/04-TICKETS/BOOST-008-orchestrator.md +71 -0
package/docs/04-TICKETS/BOOST-009-cache-system.md +56 -0
package/docs/04-TICKETS/BOOST-010-cli-mcp.md +67 -0
package/docs/04-TICKETS/BOOST-011-self-learning.md +50 -0
package/docs/04-TICKETS/BOOST-012-prompt-preamble.md +109 -0
package/docs/04-TICKETS/BOOST-013-hydrator-duplicate-code.md +132 -0
package/docs/04-TICKETS/BOOST-014-multiagent-missing-parts.md +87 -0
package/docs/04-TICKETS/BOOST-015-skeleton-type-missing.md +76 -0
package/docs/04-TICKETS/BOOST-016-output-path-duplicate.md +68 -0
package/docs/04-TICKETS/INDEX.md +89 -0
package/docs/04-TICKETS/_archive/BOOST-005-micro-tasking.md +67 -0
package/docs/04-TICKETS/_archive/BOOST-006-validation-loop.md +66 -0
package/docs/04-TICKETS/_archive/BOOST-007-progressive-pipeline.md +69 -0
package/docs/04-TICKETS/_archive/BOOST-008-cli-mcp-integration.md +74 -0
package/docs/AI_CONTEXT.md +16 -0
package/package.json +3 -2
package/src/boost/agent-pool.js +442 -0
package/src/boost/agents/index.js +79 -0
package/src/boost/cache.js +241 -0
package/src/boost/context-compressor.js +354 -0
package/src/boost/fewshot-retriever.js +332 -0
package/src/boost/hardware-detector.js +486 -0
package/src/boost/hydrator.js +398 -0
package/src/boost/index.js +60 -0
package/src/boost/orchestrator.js +615 -0
package/src/boost/preamble.js +217 -0
package/src/boost/profile-registry.js +264 -0
package/src/boost/self-learn.js +247 -0
package/src/boost/skeletons/component.skeleton.js +24 -0
package/src/boost/skeletons/hook.skeleton.js +27 -0
package/src/boost/skeletons/index.js +67 -0
package/src/boost/skeletons/page.skeleton.js +22 -0
package/src/boost/skeletons/service.skeleton.js +20 -0
package/src/boost/skeletons/store.skeleton.js +18 -0
package/src/boost/skeletons/type.skeleton.js +11 -0
package/src/boost/task-dispatcher.js +142 -0
package/src/boost/validation-loop.js +495 -0
package/src/cli/commands-boost.js +394 -0
package/src/mcp-refactor/handlers/boost.js +295 -0
package/src/mcp-refactor/router.js +19 -0
package/src/mcp-refactor/tools.js +113 -0

package/src/boost/hardware-detector.js ADDED Viewed

@@ -0,0 +1,486 @@
+// @use(kind, contract, limit)
+// @kind(util)
+// @contract(in: none -> out: detectHardware, getRuntimeStatus, getSafeParallelism, getRAMWarning, generateBoostConfig, saveBoostConfig, getSetupMessage)
+// @limit(lines: 350)
+/**
+ * Hardware Detector — Módulo OPL Boost
+ *
+ * Detecta las capacidades del PC del usuario para recomendar
+ * la configuración óptima de Boost: perfil, paralelismo, modelo.
+ *
+ * Se ejecuta en:
+ *   - Primera instalación (postinstall / opl boost setup)
+ *   - Bajo demanda (opl boost setup --re-detect)
+ *
+ * La configuración se guarda en prompt-lang.json > boost.hardware
+ */
+import os from "os"
+import { execSync } from "child_process"
+import { readFileSync, writeFileSync, existsSync, mkdirSync } from "fs"
+import { join } from "path"
+import { PROFILES } from "./profile-registry.js"
+// ──────────────────────────────────────────────
+// Detección de hardware
+// ──────────────────────────────────────────────
+function bytesToGB(bytes) {
+  return Math.round(bytes / (1024 * 1024 * 1024))
+}
+function detectRAM() {
+  return {
+    totalBytes: os.totalmem(),
+    totalGB: bytesToGB(os.totalmem()),
+    freeBytes: os.freemem(),
+    freeGB: bytesToGB(os.freemem()),
+  }
+}
+function detectCPU() {
+  const cpus = os.cpus()
+  return {
+    cores: cpus.length,
+    model: cpus[0]?.model?.trim() || "Desconocido",
+    arch: process.arch,
+    platform: process.platform,
+  }
+}
+function detectGPU() {
+  try {
+    if (process.platform === "win32") {
+      const output = execSync(
+        'wmic path win32_VideoController get name,adapterram 2>nul',
+        { encoding: "utf-8", timeout: 5000 }
+      )
+      const lines = output.split("\n").filter((l) => l.trim() && !l.includes("Name"))
+      if (lines.length > 0) {
+        const parts = lines[0].trim().split(/\s{2,}/)
+        return {
+          name: parts[0] || "GPU detectada",
+          vramBytes: parts[1] ? parseInt(parts[1]) : null,
+          vramGB: parts[1] ? bytesToGB(parseInt(parts[1])) : null,
+        }
+      }
+    } else {
+      // Linux / macOS — nvidia-smi
+      const output = execSync(
+        'nvidia-smi --query-gpu=name,memory.total --format=csv,noheader 2>/dev/null || echo ""',
+        { encoding: "utf-8", timeout: 5000 }
+      )
+      const line = output.trim()
+      if (line) {
+        const [name, memStr] = line.split(", ")
+        const vramMB = memStr ? parseInt(memStr.replace(" MiB", "").trim()) : null
+        return {
+          name: name?.trim() || "NVIDIA GPU detectada",
+          vramBytes: vramMB ? vramMB * 1024 * 1024 : null,
+          vramGB: vramMB ? Math.round(vramMB / 1024) : null,
+        }
+      }
+      // Intel ARC / AMD — intentar con glxinfo
+      const glxOutput = execSync(
+        'glxinfo -B 2>/dev/null | grep "Device:" || echo ""',
+        { encoding: "utf-8", timeout: 5000 }
+      )
+      if (glxOutput.trim()) {
+        return { name: glxOutput.trim().replace("Device:", "").trim(), vramBytes: null, vramGB: null }
+      }
+    }
+  } catch {
+    // No GPU detectada o no hay drivers
+  }
+  return null
+}
+function detectOllamaStatus() {
+  try {
+    const output = execSync(
+      'curl -s http://localhost:11434/api/tags 2>/dev/null || echo ""',
+      { encoding: "utf-8", timeout: 3000 }
+    )
+    if (output) {
+      const data = JSON.parse(output)
+      const models = (data.models || []).map((m) => ({
+        name: m.name,
+        sizeGB: Math.round(m.size / (1024 * 1024 * 1024) * 10) / 10,
+      }))
+      return { installed: true, models }
+    }
+  } catch {
+    // Ollama no está corriendo
+  }
+  return { installed: false, models: [] }
+}
+// ──────────────────────────────────────────────
+// Recomendación de configuración
+// ──────────────────────────────────────────────
+function recommendProfile(ram, cpu, gpu) {
+  const ramGB = ram.totalGB
+  const cores = cpu.cores
+  const hasGPU = gpu !== null
+  const vramGB = gpu?.vramGB || 0
+  // Reglas de recomendación basadas en hardware real
+  if (ramGB >= 32 && cores >= 8 && hasGPU && vramGB >= 8) {
+    return { profile: "large", reason: "PC de alta capacidad: RAM ≥32GB, GPU ≥8GB VRAM, ≥8 núcleos" }
+  }
+  if (ramGB >= 16 && cores >= 6) {
+    return { profile: "medium", reason: "PC de capacidad media: RAM ≥16GB, ≥6 núcleos" }
+  }
+  if (ramGB >= 8 && cores >= 4) {
+    return { profile: "medium", reason: "PC estándar: RAM ≥8GB, ≥4 núcleos. Se usará modo cola para no saturar." }
+  }
+  return { profile: "small", reason: "PC con recursos limitados: se usará el modelo más pequeño y modo cola" }
+}
+function recommendParallelism(ram, gpu, profile) {
+  const ramGB = ram.totalGB
+  const hasGPU = gpu !== null
+  const vramGB = gpu?.vramGB || 0
+  if (profile === "large" && hasGPU && vramGB >= 12) {
+    return { mode: "parallel", maxConcurrency: 3, reason: "GPU con suficiente VRAM para 3 instancias" }
+  }
+  if (profile === "large" && hasGPU) {
+    return { mode: "parallel", maxConcurrency: 2, reason: "GPU disponible, 2 instancias en paralelo" }
+  }
+  if (ramGB >= 32 && hasGPU) {
+    return { mode: "parallel", maxConcurrency: 2, reason: "Suficiente RAM y GPU para 2 instancias" }
+  }
+  if (ramGB >= 16) {
+    return { mode: "queue", maxConcurrency: 1, reason: "RAM suficiente para cola, no para paralelo real" }
+  }
+  return { mode: "queue", maxConcurrency: 1, reason: "Modo cola: un agente a la vez para no saturar el sistema" }
+}
+function recommendModel(gpu, ram, profile) {
+  const hasGPU = gpu !== null
+  const vramGB = gpu?.vramGB || 0
+  if (profile === "large" && hasGPU && vramGB >= 16) {
+    return "qwen2.5-coder:7b" // o 14b si está disponible
+  }
+  if (profile === "medium" && hasGPU && vramGB >= 6) {
+    return "qwen2.5-coder:7b"
+  }
+  if (profile === "medium" && ram.totalGB >= 16) {
+    return "qwen2.5-coder:7b" // CPU con suficiente RAM
+  }
+  return "llama3.2:latest" // Modelo pequeño para CPU o RAM limitada
+}
+// ──────────────────────────────────────────────
+// API pública
+// ──────────────────────────────────────────────
+export function detectHardware() {
+  const ram = detectRAM()
+  const cpu = detectCPU()
+  const gpu = detectGPU()
+  const ollama = detectOllamaStatus()
+  const profileRec = recommendProfile(ram, cpu, gpu)
+  const parallelRec = recommendParallelism(ram, gpu, profileRec.profile)
+  const modelRec = recommendModel(gpu, ram, profileRec.profile)
+  return {
+    timestamp: new Date().toISOString(),
+    os: `${process.platform} ${process.arch}`,
+    ram,
+    cpu,
+    gpu,
+    ollama,
+    recommendation: {
+      profile: profileRec.profile,
+      profileReason: profileRec.reason,
+      agentMode: parallelRec.mode,
+      maxConcurrency: parallelRec.maxConcurrency,
+      concurrencyReason: parallelRec.reason,
+      recommendedModel: modelRec,
+    },
+  }
+}
+export function generateBoostConfig(hardware) {
+  const { recommendation, ollama } = hardware
+  return {
+    enabled: true,
+    defaultProfile: "auto",
+    hardware: {
+      totalRamGB: hardware.ram.totalGB,
+      cpuCores: hardware.cpu.cores,
+      gpuName: hardware.gpu?.name || null,
+      gpuVramGB: hardware.gpu?.vramGB || null,
+      hasOllama: ollama.installed,
+      ollamaModels: ollama.models.map((m) => m.name),
+      detectedAt: hardware.timestamp,
+    },
+    profiles: {
+      small: PROFILES.small,
+      medium: PROFILES.medium,
+      large: PROFILES.large,
+    },
+    agentPool: {
+      mode: recommendation.agentMode,
+      maxConcurrency: recommendation.maxConcurrency,
+      modelName: recommendation.recommendedModel,
+      timeout: 30000,
+    },
+  }
+}
+export function saveBoostConfig(boostConfig) {
+  const configPath = join(process.cwd(), "prompt-lang.json")
+  let config
+  if (existsSync(configPath)) {
+    config = JSON.parse(readFileSync(configPath, "utf-8"))
+  } else {
+    config = {}
+  }
+  config.boost = boostConfig
+  writeFileSync(configPath, JSON.stringify(config, null, 2), "utf-8")
+  return configPath
+}
+export function getSetupMessage(hardware) {
+  const { ram, cpu, gpu, ollama, recommendation } = hardware
+  const lines = []
+  lines.push("")
+  lines.push("╔══════════════════════════════════════════════════════════╗")
+  lines.push("║           🚀 OPL Boost — Setup de Hardware            ║")
+  lines.push("╚══════════════════════════════════════════════════════════╝")
+  lines.push("")
+  lines.push("📊  Hardware detectado:")
+  lines.push(`     RAM:  ${ram.totalGB} GB (${ram.freeGB} GB libres)`)
+  lines.push(`     CPU:  ${cpu.cores} núcleos — ${cpu.model}`)
+  lines.push(`     GPU:  ${gpu ? `${gpu.name} (${gpu.vramGB ? gpu.vramGB + " GB VRAM" : "desconocida"})` : "No detectada"}`)
+  if (ollama.installed) {
+    lines.push(`     🟢  Ollama: INSTALADO (${ollama.models.length} modelos disponibles)`)
+    for (const m of ollama.models) {
+      lines.push(`           • ${m.name} (${m.sizeGB} GB)`)
+    }
+  } else {
+    lines.push(`     🔴  Ollama: NO DETECTADO`)
+  }
+  lines.push("")
+  lines.push("💡  ¿Por qué necesitamos Ollima?")
+  lines.push("     OPL Boost usa modelos locales de IA para generar código.")
+  lines.push("     En lugar de gastar tokens de un modelo grande (GPT-4, Claude),")
+  lines.push("     Boost reparte el trabajo entre modelos pequeños (7B) que corren")
+  lines.push("     en tu PC. Es más lento pero MUCHO más barato, y te permite")
+  lines.push("     trabajar sin conexión ni límites de API.")
+  lines.push("")
+  lines.push("📋  Configuración recomendada para tu PC:")
+  lines.push(`     Perfil:       ${recommendation.profile}`)
+  lines.push(`     Modo agentes: ${recommendation.agentMode}`)
+  lines.push(`     Concurrencia: ${recommendation.maxConcurrency} agente(s) simultáneo(s)`)
+  lines.push(`     Modelo:       ${recommendation.recommendedModel}`)
+  lines.push(`     Razón:        ${recommendation.profileReason}`)
+  lines.push("")
+  if (!ollama.installed) {
+    lines.push("⚠️  IMPORTANTE: Ollama NO está instalado.")
+    lines.push("     Boost necesita Ollama para funcionar. Instálalo con:")
+    lines.push("")
+    lines.push("     curl -fsSL https://ollama.com/install.sh | sh")
+    lines.push("")
+    lines.push("     Luego descarga un modelo:")
+    lines.push(`     ollama pull ${recommendation.recommendedModel}`)
+    lines.push("")
+    lines.push("     Después, ejecuta:  opl boost setup")
+    lines.push("")
+  }
+  lines.push("✅  Configuración guardada en prompt-lang.json")
+  lines.push("    Puedes re-ejecutar:  opl boost setup --re-detect")
+  lines.push("")
+  return lines.join("\n")
+}
+// ──────────────────────────────────────────────
+// Monitoreo en tiempo real
+// ──────────────────────────────────────────────
+/**
+ * Estado actual del sistema en tiempo real.
+ * A diferencia de detectHardware() (que es estático), esto se llama
+ * en cada solicitud de agente para decidir si ejecutar ahora o esperar.
+ */
+// Modelos de Ollama en RAM estimado (valores típicos)
+const MODEL_RAM_ESTIMATE = {
+  "llama3.2:latest": 2.5,   // GB
+  "qwen2.5-coder:7b": 5.5,  // GB
+  "llama3:latest": 6.0,     // GB
+  "deepseek-coder:7b": 5.0, // GB
+  "default": 4.0,           // GB estimado genérico
+}
+export function getRuntimeStatus() {
+  const freeBytes = os.freemem()
+  const totalBytes = os.totalmem()
+  const freeGB = bytesToGB(freeBytes)
+  const totalGB = bytesToGB(totalBytes)
+  const usedGB = totalGB - freeGB
+  const usagePercent = Math.round((1 - freeBytes / totalBytes) * 100)
+  // Carga del sistema
+  const loadAvg = os.loadavg ? os.loadavg() : [0, 0, 0]
+  const cpuCount = os.cpus().length
+  return {
+    timestamp: new Date().toISOString(),
+    ram: {
+      totalGB,
+      freeGB,
+      usedGB,
+      usagePercent,
+      isCritical: freeGB < 2,
+      isLow: freeGB < 4,
+      isHealthy: freeGB >= 6,
+    },
+    cpu: {
+      cores: cpuCount,
+      loadAvg1m: loadAvg[0]?.toFixed(1) || "0.0",
+      loadAvg5m: loadAvg[1]?.toFixed(1) || "0.0",
+      isLoaded: cpuCount > 0 && loadAvg[0] > cpuCount * 0.8,
+    },
+    safeToRunAgent: freeGB >= 4,
+    warning: null,
+  }
+}
+/**
+ * Determina cuántos agentes pueden ejecutarse AHORA sin saturar el PC.
+ * Considera RAM libre actual + carga actual de CPU.
+ *
+ * @param {object} hardwareConfig - Config de hardware guardada (opcional)
+ * @returns {{ safe: boolean, maxAgents: number, mode: string, reason: string }}
+ */
+export function getSafeParallelism(hardwareConfig) {
+  const runtime = getRuntimeStatus()
+  // Reglas basadas en RAM libre ACTUAL
+  if (runtime.ram.isCritical) {
+    return {
+      safe: false,
+      maxAgents: 0,
+      mode: "blocked",
+      reason: `RAM crítica: ${runtime.ram.freeGB}GB libres. Libera memoria antes de ejecutar agentes.`,
+    }
+  }
+  if (runtime.ram.isLow) {
+    return {
+      safe: true,
+      maxAgents: 1,
+      mode: "queue",
+      reason: `RAM baja: ${runtime.ram.freeGB}GB libres. Solo 1 agente en cola.`,
+    }
+  }
+  if (runtime.cpu.isLoaded) {
+    return {
+      safe: true,
+      maxAgents: 1,
+      mode: "queue",
+      reason: `CPU cargada (load: ${runtime.cpu.loadAvg1m}). Reduciendo a 1 agente.`,
+    }
+  }
+  // RAM saludable — usar configuración guardada
+  const configuredMax = hardwareConfig?.agentPool?.maxConcurrency || 2
+  if (runtime.ram.freeGB >= 16 && configuredMax >= 2) {
+    return {
+      safe: true,
+      maxAgents: Math.min(configuredMax, 3),
+      mode: "parallel",
+      reason: `RAM saludable: ${runtime.ram.freeGB}GB libres. Hasta ${Math.min(configuredMax, 3)} agentes en paralelo.`,
+    }
+  }
+  if (runtime.ram.freeGB >= 8) {
+    return {
+      safe: true,
+      maxAgents: Math.min(configuredMax, 2),
+      mode: "parallel",
+      reason: `RAM suficiente: ${runtime.ram.freeGB}GB libres. Hasta ${Math.min(configuredMax, 2)} agentes.`,
+    }
+  }
+  return {
+    safe: true,
+    maxAgents: 1,
+    mode: "queue",
+    reason: `RAM moderada: ${runtime.ram.freeGB}GB libres. 1 agente por vez.`,
+  }
+}
+/**
+ * Genera una advertencia si el sistema está muy cargado.
+ * @returns {string|null} Mensaje de advertencia o null si todo bien
+ */
+export function getRAMWarning() {
+  const runtime = getRuntimeStatus()
+  if (runtime.ram.isCritical) {
+    return (
+      `⚠️  RAM CRÍTICA: Solo ${runtime.ram.freeGB}GB libres de ${runtime.ram.totalGB}GB (${runtime.ram.usagePercent}% usado).\n` +
+      `    Los agentes Boost NO se ejecutarán hasta que liberes memoria.\n` +
+      `    → Cierra navegadores, Docker, o programas pesados.\n` +
+      `    → Luego ejecuta: opl boost check`
+    )
+  }
+  if (runtime.ram.isLow && runtime.cpu.isLoaded) {
+    return (
+      `⚠️  RAM baja (${runtime.ram.freeGB}GB libres) y CPU cargada (load: ${runtime.cpu.loadAvg1m}).\n` +
+      `    Los agentes se ejecutarán en MODO COLA (1 por vez) para no saturar.\n` +
+      `    Será más lento, pero tu PC se mantendrá usable.`
+    )
+  }
+  if (runtime.ram.isLow) {
+    return (
+      `ℹ️  RAM baja: ${runtime.ram.freeGB}GB libres de ${runtime.ram.totalGB}GB.\n` +
+      `    Modo cola activado. Si notas lentitud, cierra otras aplicaciones.`
+    )
+  }
+  if (runtime.cpu.isLoaded) {
+    return (
+      `ℹ️  CPU cargada (load: ${runtime.cpu.loadAvg1m}/${runtime.cpu.cores} núcleos).\n` +
+      `    Los agentes se ralentizarán automáticamente.`
+    )
+  }
+  return null
+}
+/**
+ * Calcula cuánta RAM消耗rá un modelo específico en Ollama.
+ * @param {string} modelName - Nombre del modelo
+ * @returns {number} RAM estimada en GB
+ */
+export function estimateModelRAM(modelName) {
+  const name = modelName?.toLowerCase() || ""
+  for (const [key, gb] of Object.entries(MODEL_RAM_ESTIMATE)) {
+    if (name.includes(key.replace(":latest", ""))) return gb
+  }
+  return MODEL_RAM_ESTIMATE.default
+}