npm - @cccarv82/freya - Versions diffs - 3.5.1 → 3.6.0 - Mend

@cccarv82/freya 3.5.1 → 3.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

package/cli/web.js +346 -21
package/package.json +1 -1
package/scripts/lib/DataLayer.js +6 -0
package/scripts/lib/DataManager.js +89 -0

package/cli/web.js CHANGED Viewed

@@ -98,6 +98,283 @@ function newestFile(dir, prefix) {
   return files[0]?.p || null;
 }
+// ---------------------------------------------------------------------------
+// Daily-logs ↔ SQLite sync: keeps the daily_logs table in sync with .md files
+// ---------------------------------------------------------------------------
+function syncDailyLogs(workspaceDir) {
+  try {
+    const logsDir = path.join(workspaceDir, 'logs', 'daily');
+    if (!exists(logsDir)) return { synced: 0, toEmbed: [] };
+    const files = fs.readdirSync(logsDir).filter(f => /^\d{4}-\d{2}-\d{2}\.md$/.test(f));
+    if (!files.length) return { synced: 0, toEmbed: [] };
+    const upsert = dl.db.prepare(`
+      INSERT INTO daily_logs (date, raw_markdown) VALUES (?, ?)
+      ON CONFLICT(date) DO UPDATE SET raw_markdown = excluded.raw_markdown
+    `);
+    let synced = 0;
+    const toEmbed = []; // collect logs that need embedding
+    const tx = dl.db.transaction((fileList) => {
+      for (const file of fileList) {
+        const date = file.replace('.md', '');
+        const content = fs.readFileSync(path.join(logsDir, file), 'utf8');
+        upsert.run(date, content);
+        toEmbed.push({ date, content });
+        synced++;
+      }
+    });
+    tx(files);
+    return { synced, toEmbed };
+  } catch (e) {
+    console.error('[sync] Daily-logs sync failed:', e.message);
+    return { synced: 0, toEmbed: [] };
+  }
+}
+// ---------------------------------------------------------------------------
+// Background embedding generation — runs async, never blocks
+// ---------------------------------------------------------------------------
+async function generateEmbeddingsBackground(workspaceDir, items) {
+  // items = [{ type: 'daily_log'|'task'|'blocker', id: string, text: string }]
+  if (!items || !items.length) return;
+  try {
+    const dm = new DataManager(workspaceDir, path.join(workspaceDir, 'logs'));
+    let generated = 0;
+    for (const item of items) {
+      try {
+        // Skip if embeddings already exist AND item is not a daily_log
+        // (daily logs get updated frequently, so always regenerate)
+        if (item.type !== 'daily_log' && dm.hasEmbeddings(item.type, item.id)) continue;
+        const count = await dm.generateEmbeddings(item.type, item.id, item.text);
+        generated += count;
+      } catch (err) {
+        console.error(`[embeddings] Failed for ${item.type}/${item.id}:`, err.message);
+      }
+    }
+    if (generated > 0) console.log(`[embeddings] Generated ${generated} embedding chunks`);
+  } catch (err) {
+    console.error('[embeddings] Background generation failed:', err.message);
+  }
+}
+// ---------------------------------------------------------------------------
+// Build structured data context (tasks, blockers, projects) — always compact
+// ---------------------------------------------------------------------------
+function buildStructuredContext() {
+  const parts = [];
+  // Pending tasks
+  try {
+    const tasks = dl.db.prepare("SELECT id, description, category, status, project_slug, created_at, due_date FROM tasks WHERE status = 'PENDING' ORDER BY created_at DESC LIMIT 50").all();
+    if (tasks.length) {
+      parts.push('\n[TASKS PENDENTES (' + tasks.length + ')]');
+      for (const t of tasks) {
+        parts.push(`• [${t.category}] ${t.description} (projeto: ${t.project_slug || 'N/A'}${t.due_date ? ', prazo: ' + t.due_date : ''})`);
+      }
+    } else {
+      parts.push('\n[TASKS: nenhuma task pendente registrada no sistema]');
+    }
+  } catch { /* ignore */ }
+  // Open blockers
+  try {
+    const blockers = dl.db.prepare("SELECT id, title, severity, status, project_slug, owner, next_action, created_at FROM blockers WHERE status IN ('OPEN','MITIGATING') ORDER BY created_at DESC LIMIT 30").all();
+    if (blockers.length) {
+      parts.push('\n[BLOCKERS ABERTOS (' + blockers.length + ')]');
+      for (const b of blockers) {
+        parts.push(`• [${b.severity}] ${b.title} (projeto: ${b.project_slug || 'N/A'}, owner: ${b.owner || '?'})`);
+      }
+    } else {
+      parts.push('\n[BLOCKERS: nenhum blocker aberto registrado no sistema]');
+    }
+  } catch { /* ignore */ }
+  // Active projects
+  try {
+    const projects = dl.db.prepare("SELECT slug, client, name FROM projects WHERE is_active = 1 ORDER BY slug").all();
+    if (projects.length) {
+      parts.push('\n[PROJETOS ATIVOS (' + projects.length + ')]');
+      for (const p of projects) {
+        parts.push(`• ${p.slug} — ${p.name || p.client || 'sem nome'}`);
+      }
+    }
+  } catch { /* ignore */ }
+  return parts.join('\n');
+}
+// ---------------------------------------------------------------------------
+// Smart context builder: uses RAG when available, falls back to raw logs
+// ---------------------------------------------------------------------------
+async function buildSmartContext(workspaceDir, query) {
+  const parts = [];
+  const dm = new DataManager(workspaceDir, path.join(workspaceDir, 'logs'));
+  const TOKEN_BUDGET = 12000; // chars budget for log/RAG context
+  let usedBudget = 0;
+  // 1. Try semantic search first (if embeddings exist)
+  let ragUsed = false;
+  try {
+    const embCount = dm.getEmbeddingCount();
+    if (embCount > 0) {
+      const ragResults = await dm.semanticSearch(query, 15);
+      const relevant = ragResults.filter(r => r.score > 0.25);
+      if (relevant.length > 0) {
+        ragUsed = true;
+        parts.push('\n[CONTEXTO RELEVANTE — Busca Semântica]');
+        for (const r of relevant) {
+          const chunk = `\n--- ${r.reference_type} (${r.reference_id}) [relevância: ${Math.round(r.score * 100)}%] ---\n${r.text_chunk}`;
+          if (usedBudget + chunk.length > TOKEN_BUDGET) break;
+          parts.push(chunk);
+          usedBudget += chunk.length;
+        }
+      }
+    }
+  } catch (ragErr) {
+    console.error('[context] RAG search failed:', ragErr.message);
+  }
+  // 2. Fallback: if RAG not available or returned few results, include recent daily logs
+  if (!ragUsed || usedBudget < TOKEN_BUDGET / 3) {
+    try {
+      const logsDir = path.join(workspaceDir, 'logs', 'daily');
+      if (exists(logsDir)) {
+        const maxDays = ragUsed ? 3 : 5; // fewer if RAG already provided some context
+        const files = fs.readdirSync(logsDir)
+          .filter(f => /^\d{4}-\d{2}-\d{2}\.md$/.test(f))
+          .sort()
+          .slice(-maxDays);
+        if (files.length) {
+          parts.push('\n[DAILY LOGS — ÚLTIMOS ' + files.length + ' DIAS]');
+          for (const file of files) {
+            const date = file.replace('.md', '');
+            const content = fs.readFileSync(path.join(logsDir, file), 'utf8');
+            const maxPerLog = Math.floor((TOKEN_BUDGET - usedBudget) / files.length);
+            const trimmed = content.length > maxPerLog ? content.slice(0, maxPerLog) + '\n...(truncado)' : content;
+            parts.push(`\n--- LOG ${date} ---\n${trimmed}`);
+            usedBudget += trimmed.length;
+            if (usedBudget >= TOKEN_BUDGET) break;
+          }
+        }
+      }
+    } catch (e) {
+      console.error('[context] Failed to read daily logs:', e.message);
+    }
+  }
+  // 3. Always include structured data (compact, always useful)
+  parts.push(buildStructuredContext());
+  return parts.join('\n');
+}
+// ---------------------------------------------------------------------------
+// Background auto-ingest from chat: extracts tasks/blockers from conversation
+// ---------------------------------------------------------------------------
+const INGEST_SIGNALS = /\b(criar|crie|registr|task|tarefa|blocker|impediment|problem|urgente|preciso|agendar|schedule|delegat|prioriz|adicionar?|anotar?|lembr|reminder|todo|pendente|pendência)\b/i;
+const QUERY_ONLY = /^(o que|como|quando|qual|quais|quem|onde|por que|porque|quantos?|existe|tem |show|list|status|resumo|report|relatório|buscar?|search|find)/i;
+async function backgroundIngestFromChat(workspaceDir, userQuery) {
+  // Skip pure queries — only ingest actionable messages
+  if (!userQuery || userQuery.length < 25) return;
+  if (QUERY_ONLY.test(userQuery.trim()) && !INGEST_SIGNALS.test(userQuery)) return;
+  if (!INGEST_SIGNALS.test(userQuery)) return;
+  try {
+    const cmd = process.env.COPILOT_CMD || 'copilot';
+    // Build a minimal planner prompt
+    const schema = {
+      actions: [
+        { type: 'create_task', description: '<string>', priority: 'HIGH|MEDIUM|LOW', category: 'DO_NOW|SCHEDULE|DELEGATE|IGNORE', projectSlug: '<string optional>' },
+        { type: 'create_blocker', title: '<string>', severity: 'CRITICAL|HIGH|MEDIUM|LOW', notes: '<string>', projectSlug: '<string optional>' }
+      ]
+    };
+    const prompt = `Você é o planner do sistema F.R.E.Y.A.\n\nAnalise o texto abaixo e extraia APENAS tarefas e blockers explícitos.\nSe NÃO houver tarefas ou blockers claros, retorne: {"actions":[]}\nRetorne APENAS JSON válido no formato: ${JSON.stringify(schema)}\nNÃO use code fences. NÃO inclua texto extra.\n\nTEXTO:\n${userQuery}\n`;
+    const agentEnv = { FREYA_WORKSPACE_DIR: workspaceDir };
+    const baseArgs = ['-s', '--no-color', '--stream', 'off', '-p', prompt];
+    const r = await run(cmd, baseArgs, workspaceDir, agentEnv);
+    const out = (r.stdout + r.stderr).trim();
+    if (r.code !== 0 || !out) return;
+    // Try to parse JSON plan
+    const jsonText = extractFirstJsonObject(out) || out;
+    let plan;
+    try {
+      plan = JSON.parse(jsonText);
+    } catch {
+      try { plan = JSON.parse(escapeJsonControlChars(jsonText)); } catch { return; }
+    }
+    const actions = Array.isArray(plan.actions) ? plan.actions : [];
+    const taskActions = actions.filter(a => a && a.type === 'create_task' && a.description);
+    const blockerActions = actions.filter(a => a && a.type === 'create_blocker' && a.title);
+    if (!taskActions.length && !blockerActions.length) return;
+    // Apply actions directly to SQLite
+    const slugMap = readProjectSlugMap(workspaceDir);
+    const validTaskCats = new Set(['DO_NOW', 'SCHEDULE', 'DELEGATE', 'IGNORE']);
+    const insertTask = dl.db.prepare(`INSERT INTO tasks (id, project_slug, description, category, status, metadata) VALUES (?, ?, ?, ?, ?, ?)`);
+    const insertBlocker = dl.db.prepare(`INSERT INTO blockers (id, project_slug, title, severity, status, owner, next_action, metadata) VALUES (?, ?, ?, ?, ?, ?, ?, ?)`);
+    let tasksCreated = 0, blockersCreated = 0;
+    const embeddingQueue = [];
+    const ingestTx = dl.db.transaction(() => {
+      // Dedup check
+      const recentTasks = dl.db.prepare("SELECT description FROM tasks WHERE created_at >= datetime('now', '-1 day')").all();
+      const existingKeys = new Set(recentTasks.map(t => sha1(normalizeTextForKey(t.description))));
+      const recentBlockers = dl.db.prepare("SELECT title FROM blockers WHERE created_at >= datetime('now', '-1 day')").all();
+      const existingBKeys = new Set(recentBlockers.map(b => sha1(normalizeTextForKey(b.title))));
+      for (const a of taskActions) {
+        const desc = normalizeWhitespace(a.description);
+        if (!desc) continue;
+        const projectSlug = String(a.projectSlug || '').trim() || inferProjectSlug(desc, slugMap);
+        const key = sha1(normalizeTextForKey((projectSlug ? projectSlug + ' ' : '') + desc));
+        if (existingKeys.has(key)) continue;
+        const id = `t-${Date.now()}-${Math.random().toString(16).slice(2, 8)}`;
+        const category = validTaskCats.has(String(a.category || '').trim()) ? String(a.category).trim() : 'DO_NOW';
+        const metadata = JSON.stringify({ priority: a.priority || 'medium' });
+        insertTask.run(id, projectSlug || null, desc, category, 'PENDING', metadata);
+        existingKeys.add(key);
+        tasksCreated++;
+        embeddingQueue.push({ type: 'task', id, text: desc });
+      }
+      for (const a of blockerActions) {
+        const title = normalizeWhitespace(a.title);
+        if (!title) continue;
+        const projectSlug = String(a.projectSlug || '').trim() || inferProjectSlug(title, slugMap);
+        const key = sha1(normalizeTextForKey((projectSlug ? projectSlug + ' ' : '') + title));
+        if (existingBKeys.has(key)) continue;
+        const id = `b-${Date.now()}-${Math.random().toString(16).slice(2, 8)}`;
+        const severity = String(a.severity || 'MEDIUM').toUpperCase();
+        const metadata = JSON.stringify({ description: a.notes || title });
+        insertBlocker.run(id, projectSlug || null, title, severity, 'OPEN', null, null, metadata);
+        existingBKeys.add(key);
+        blockersCreated++;
+        embeddingQueue.push({ type: 'blocker', id, text: title + ' ' + (a.notes || '') });
+      }
+    });
+    ingestTx();
+    if (tasksCreated || blockersCreated) {
+      console.log(`[chat-ingest] Auto-ingested ${tasksCreated} tasks, ${blockersCreated} blockers from chat`);
+      // Generate embeddings in background
+      generateEmbeddingsBackground(workspaceDir, embeddingQueue).catch(() => {});
+    }
+  } catch (err) {
+    console.error('[chat-ingest] Background ingestion failed:', err.message);
+  }
+}
 function settingsPath(workspaceDir) {
   return path.join(workspaceDir, 'data', 'settings', 'settings.json');
 }
@@ -2755,6 +3032,21 @@ async function cmdWeb({ port, dir, open, dev }) {
     await autoUpdate(wsDir);
   } catch { /* non-fatal */ }
+  // Sync daily log .md files → SQLite daily_logs table on startup
+  try {
+    const { synced, toEmbed } = syncDailyLogs(wsDir);
+    if (synced > 0) console.log(`[FREYA] Synced ${synced} daily logs to SQLite`);
+    // Generate embeddings in background (non-blocking, last 30 days max)
+    if (toEmbed.length > 0) {
+      const recentLogs = toEmbed.slice(-30).map(l => ({ type: 'daily_log', id: l.date, text: l.content }));
+      generateEmbeddingsBackground(wsDir, recentLogs).catch(err => {
+        console.error('[FREYA] Embedding generation failed (non-fatal):', err.message);
+      });
+    }
+  } catch (e) {
+    console.error('[FREYA] Warning: daily-logs sync failed:', e.message || String(e));
+  }
   const host = '127.0.0.1';
   const server = http.createServer(async (req, res) => {
@@ -3736,6 +4028,17 @@ async function cmdWeb({ port, dir, open, dev }) {
             }
           }
+          // Sync this daily log file to SQLite so chat/RAG can find it
+          try {
+            const logContent = fs.readFileSync(file, 'utf8');
+            const upsert = dl.db.prepare(`INSERT INTO daily_logs (date, raw_markdown) VALUES (?, ?) ON CONFLICT(date) DO UPDATE SET raw_markdown = excluded.raw_markdown`);
+            upsert.run(d, logContent);
+            // Regenerate embeddings for this log in background
+            generateEmbeddingsBackground(workspaceDir, [{ type: 'daily_log', id: d, text: logContent }]).catch(() => {});
+          } catch (syncErr) {
+            console.error('[inbox] Failed to sync daily log to SQLite:', syncErr.message);
+          }
           return safeJson(res, 200, { ok: true, file: path.relative(workspaceDir, file).replace(/\\/g, '/'), appended: true });
         }
@@ -4097,6 +4400,8 @@ async function cmdWeb({ port, dir, open, dev }) {
                 insertTask.run(id, projectSlug || null, description, category, 'PENDING', metadata);
                 applied.tasks++;
+                if (!applied._embedQueue) applied._embedQueue = [];
+                applied._embedQueue.push({ type: 'task', id, text: description });
                 existingTaskKeys24h.add(key); // prevent duplicates within same batch
                 continue;
               }
@@ -4117,6 +4422,8 @@ async function cmdWeb({ port, dir, open, dev }) {
                 insertBlocker.run(id, projectSlug || null, title, severity, 'OPEN', null, null, metadata);
                 applied.blockers++;
+                if (!applied._embedQueue) applied._embedQueue = [];
+                applied._embedQueue.push({ type: 'blocker', id, text: title + ' ' + (notes || '') });
                 existingBlockerKeys24h.add(key); // prevent duplicates within same batch
                 continue;
               }
@@ -4137,6 +4444,12 @@ async function cmdWeb({ port, dir, open, dev }) {
           applyTx(actions);
+          // Generate embeddings for newly created tasks/blockers (background, non-blocking)
+          if (applied._embedQueue && applied._embedQueue.length > 0) {
+            generateEmbeddingsBackground(workspaceDir, applied._embedQueue).catch(() => {});
+            delete applied._embedQueue; // don't send internal queue in response
+          }
           // Auto-suggest reports when planner didn't include any
           if (!applied.reportsSuggested.length) {
             const sug = [];
@@ -4277,20 +4590,11 @@ async function cmdWeb({ port, dir, open, dev }) {
             return `\n\n---\nFILE: ${rel}\n---\n` + fs.readFileSync(p, 'utf8');
           }).join('');
-          // V2 RAG Context (graceful fallback if embedder/sharp not available)
-          const dm = new DataManager(workspaceDir, path.join(workspaceDir, 'logs'));
-          let ragContext = '';
-          try {
-            const ragResults = await dm.semanticSearch(query, 12);
-            if (ragResults.length > 0) {
-              ragContext = '\n\n[MEMÓRIA DE LONGO PRAZO RECUPERADA (RAG VIA SQLITE)]\n';
-              for (const r of ragResults) {
-                ragContext += `\n---\nFONTE: ${r.reference_type} -> ID: ${r.reference_id} (Score: ${r.score.toFixed(3)})\nCONTEÚDO:\n${r.text_chunk}\n`;
-              }
-            }
-          } catch (ragErr) {
-            console.error('[oracle] RAG search failed (embedder/sharp unavailable), continuing without context:', ragErr.message);
-          }
+          // Ensure daily logs are synced to SQLite before querying
+          try { syncDailyLogs(workspaceDir); } catch { /* non-fatal */ }
+          // Build smart context: RAG (if embeddings exist) + fallback to raw logs + structured data
+          const dataContext = await buildSmartContext(workspaceDir, query);
           // Build image context for the prompt (Copilot reads files via --allow-all-tools)
           let imageContext = '';
@@ -4301,8 +4605,23 @@ async function cmdWeb({ port, dir, open, dev }) {
             }
           }
-          // System instructions (small, always fits in -p)
-          const oracleSysInstr = `Você é o Orchestrator do sistema F.R.E.Y.A.\n\nVocê NÃO é o Oracle. Você é o agente principal que COORDENA todos os sub-agentes.\nSiga o fluxo definido no master.mdc: analise o intent do usuário, execute o plano internamente (Oracle para buscar dados, SM Agent para sintetizar), e SEMPRE retorne uma resposta FINAL em linguagem natural, estruturada e consultiva para o usuário.\n\nNUNCA exponha JSONs brutos, hierarquia interna de agentes, ou peça ao usuário para "chamar outro agente".\nVocê DEVE sintetizar os dados recuperados em uma resposta clara, organizada e útil.\n\nIdioma: Português do Brasil.\n${ragContext}${imageContext}`;
+          // System instructions — includes REAL data context so the Orchestrator
+          // can synthesize answers without needing to "call" sub-agents at runtime
+          const oracleSysInstr = `Você é FREYA — Assistente Responsiva com Otimização Aprimorada.
+PAPEL: Você é o agente principal do sistema. Responda SEMPRE em linguagem natural, estruturada e consultiva.
+REGRAS ABSOLUTAS:
+- NUNCA exponha JSONs brutos, nomes de agentes internos (Oracle, SM Agent, Ingestor), ou hierarquia de roteamento.
+- NUNCA peça ao usuário para "chamar outro agente" ou "invocar o Orchestrator".
+- NUNCA diga "como agente X, não posso...". Você é FREYA, um sistema único e coeso.
+- SEMPRE sintetize os dados abaixo em respostas úteis, organizadas e em português brasileiro.
+- Use a estrutura: Contexto → Análise → Recomendações → Próximos passos.
+- Termine com: — FREYA\\nAssistente Responsiva com Otimização Aprimorada
+DADOS REAIS DO WORKSPACE (use estes dados para responder):
+${dataContext}
+${imageContext}`;
           const cmd = process.env.COPILOT_CMD || 'copilot';
@@ -4318,15 +4637,15 @@ async function cmdWeb({ port, dir, open, dev }) {
               }
             }
-            // ENAMETOOLONG fix: when prompt is large, write user query to temp file
+            // ENAMETOOLONG fix: when prompt is large, write full prompt to temp file
             const fullOraclePrompt = `${oracleSysInstr}\n\nREGRAS:${rulesText}\n\nCONSULTA DO USUÁRIO:\n${query}\n`;
             const SAFE_ARG_LEN = 24000;
             let oracleTmpFile = null;
             let r;
             if (fullOraclePrompt.length > SAFE_ARG_LEN) {
-              oracleTmpFile = path.join(os.tmpdir(), `freya-oracle-input-${Date.now()}.txt`);
-              fs.writeFileSync(oracleTmpFile, query, 'utf8');
-              const filePrompt = `${oracleSysInstr}\n\nREGRAS:${rulesText}\n\nCONSULTA DO USUÁRIO:\nA consulta do usuário é grande e foi salva no arquivo abaixo. LEIA o conteúdo completo do arquivo e responda com base nele.\nARQUIVO: ${oracleTmpFile}\n\nIMPORTANTE: NÃO descreva o arquivo. LEIA e RESPONDA à consulta.\n`;
+              oracleTmpFile = path.join(os.tmpdir(), `freya-orchestrator-${Date.now()}.txt`);
+              fs.writeFileSync(oracleTmpFile, fullOraclePrompt, 'utf8');
+              const filePrompt = `Leia o arquivo abaixo que contém suas instruções completas, regras, dados do workspace e a consulta do usuário. Siga TODAS as instruções contidas nele.\nARQUIVO: ${oracleTmpFile}\n\nIMPORTANTE: Leia o arquivo INTEIRO e responda à consulta do usuário que está no final do arquivo.`;
               copilotArgs.push('--add-dir', os.tmpdir());
               copilotArgs.push('--allow-all-tools', '-p', filePrompt);
               r = await run(cmd, copilotArgs, workspaceDir, oracleEnv);
@@ -4339,7 +4658,13 @@ async function cmdWeb({ port, dir, open, dev }) {
             if (r.code !== 0) {
               return safeJson(res, 200, { ok: false, answer: 'Falha no processamento do agente FREYA:\n' + (out || 'Exit code != 0'), sessionId });
             }
-            return safeJson(res, 200, { ok: true, answer: out, sessionId });
+            // Send response immediately
+            safeJson(res, 200, { ok: true, answer: out, sessionId });
+            // Fire-and-forget: auto-ingest tasks/blockers from user message
+            backgroundIngestFromChat(workspaceDir, query).catch(err => {
+              console.error('[chat-ingest] Background failed:', err.message);
+            });
+            return;
           } catch (e) {
             return safeJson(res, 200, {
               ok: false,

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@cccarv82/freya",
-  "version": "3.5.1",
+  "version": "3.6.0",
   "description": "Personal AI Assistant with local-first persistence",
   "scripts": {
     "health": "node scripts/validate-data.js && node scripts/validate-structure.js",

package/scripts/lib/DataLayer.js CHANGED Viewed

@@ -325,6 +325,12 @@ class DataLayer {
         embedding BLOB NOT NULL, /* Stored as Buffer of Float32Array */
         created_at DATETIME DEFAULT CURRENT_TIMESTAMP
       );
+      CREATE UNIQUE INDEX IF NOT EXISTS idx_doc_emb_ref
+        ON document_embeddings(reference_type, reference_id, chunk_index);
+      CREATE INDEX IF NOT EXISTS idx_doc_emb_type
+        ON document_embeddings(reference_type);
     `);
     // --- Migrations for existing databases ---

package/scripts/lib/DataManager.js CHANGED Viewed

@@ -229,6 +229,95 @@ class DataManager {
         return NaN;
     }
+    // --- Embedding Generation ---
+    /**
+     * Split text into chunks suitable for embedding (~400-600 chars each).
+     * Splits on markdown headings, then paragraphs, then sentences.
+     */
+    chunkText(text, maxChunkSize = 500) {
+        if (!text || text.length <= maxChunkSize) return [text].filter(Boolean);
+        const chunks = [];
+        // First split on markdown ## headings
+        const sections = text.split(/(?=^## )/m).filter(s => s.trim());
+        for (const section of sections) {
+            if (section.length <= maxChunkSize) {
+                chunks.push(section.trim());
+                continue;
+            }
+            // Split long sections on double newlines (paragraphs)
+            const paragraphs = section.split(/\n\n+/).filter(p => p.trim());
+            let buffer = '';
+            for (const para of paragraphs) {
+                if (buffer.length + para.length + 2 > maxChunkSize && buffer) {
+                    chunks.push(buffer.trim());
+                    buffer = '';
+                }
+                buffer += (buffer ? '\n\n' : '') + para;
+            }
+            if (buffer.trim()) chunks.push(buffer.trim());
+        }
+        return chunks.filter(c => c.length > 10); // skip tiny fragments
+    }
+    /**
+     * Generate embeddings for a piece of content and store in document_embeddings.
+     * Deletes existing embeddings for (referenceType, referenceId) first to avoid stale data.
+     * @param {string} referenceType - 'daily_log', 'task', or 'blocker'
+     * @param {string} referenceId - unique ID (date for logs, task/blocker id)
+     * @param {string} text - content to embed
+     */
+    async generateEmbeddings(referenceType, referenceId, text) {
+        if (!text || !text.trim()) return 0;
+        const chunks = this.chunkText(text);
+        if (!chunks.length) return 0;
+        // Delete existing embeddings for this reference
+        dl.db.prepare('DELETE FROM document_embeddings WHERE reference_type = ? AND reference_id = ?')
+            .run(referenceType, referenceId);
+        const insert = dl.db.prepare(`
+            INSERT INTO document_embeddings (reference_type, reference_id, chunk_index, text_chunk, embedding)
+            VALUES (?, ?, ?, ?, ?)
+        `);
+        let count = 0;
+        for (let i = 0; i < chunks.length; i++) {
+            try {
+                const vector = await defaultEmbedder.embedText(chunks[i]);
+                const buffer = defaultEmbedder.vectorToBuffer(vector);
+                insert.run(referenceType, referenceId, i, chunks[i], buffer);
+                count++;
+            } catch (err) {
+                console.error(`[embeddings] Failed to embed chunk ${i} of ${referenceType}/${referenceId}:`, err.message);
+            }
+        }
+        return count;
+    }
+    /**
+     * Check if embeddings exist and are up-to-date for a reference.
+     * @returns {boolean} true if embeddings exist
+     */
+    hasEmbeddings(referenceType, referenceId) {
+        const row = dl.db.prepare(
+            'SELECT COUNT(*) as c FROM document_embeddings WHERE reference_type = ? AND reference_id = ?'
+        ).get(referenceType, referenceId);
+        return row && row.c > 0;
+    }
+    /**
+     * Get total embedding count (for checking if RAG is available).
+     */
+    getEmbeddingCount() {
+        const row = dl.db.prepare('SELECT COUNT(*) as c FROM document_embeddings').get();
+        return row ? row.c : 0;
+    }
     // --- RAG (Vector Search) ---
     async semanticSearch(query, topK = 10) {
         const queryVector = await defaultEmbedder.embedText(query);