npm - @cccarv82/freya - Versions diffs - 3.5.2 → 3.6.0 - Mend

@cccarv82/freya 3.5.2 → 3.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

package/cli/web.js +246 -65
package/package.json +1 -1
package/scripts/lib/DataLayer.js +6 -0
package/scripts/lib/DataManager.js +89 -0

package/cli/web.js CHANGED Viewed

@@ -104,9 +104,9 @@ function newestFile(dir, prefix) {
 function syncDailyLogs(workspaceDir) {
   try {
     const logsDir = path.join(workspaceDir, 'logs', 'daily');
-    if (!exists(logsDir)) return 0;
+    if (!exists(logsDir)) return { synced: 0, toEmbed: [] };
     const files = fs.readdirSync(logsDir).filter(f => /^\d{4}-\d{2}-\d{2}\.md$/.test(f));
-    if (!files.length) return 0;
+    if (!files.length) return { synced: 0, toEmbed: [] };
     const upsert = dl.db.prepare(`
       INSERT INTO daily_logs (date, raw_markdown) VALUES (?, ?)
@@ -114,88 +114,87 @@ function syncDailyLogs(workspaceDir) {
     `);
     let synced = 0;
+    const toEmbed = []; // collect logs that need embedding
     const tx = dl.db.transaction((fileList) => {
       for (const file of fileList) {
         const date = file.replace('.md', '');
         const content = fs.readFileSync(path.join(logsDir, file), 'utf8');
         upsert.run(date, content);
+        toEmbed.push({ date, content });
         synced++;
       }
     });
     tx(files);
-    return synced;
+    return { synced, toEmbed };
   } catch (e) {
     console.error('[sync] Daily-logs sync failed:', e.message);
-    return 0;
+    return { synced: 0, toEmbed: [] };
   }
 }
 // ---------------------------------------------------------------------------
-// Build real data context for Orchestrator (chat) — feeds SQLite + daily logs
-// as plain-text so the LLM has actual data to synthesize answers from
+// Background embedding generation — runs async, never blocks
 // ---------------------------------------------------------------------------
-function buildDataContext(workspaceDir, maxDays) {
-  maxDays = maxDays || 7;
-  const parts = [];
-  // 1. Recent daily logs (from filesystem — most up-to-date source)
+async function generateEmbeddingsBackground(workspaceDir, items) {
+  // items = [{ type: 'daily_log'|'task'|'blocker', id: string, text: string }]
+  if (!items || !items.length) return;
   try {
-    const logsDir = path.join(workspaceDir, 'logs', 'daily');
-    if (exists(logsDir)) {
-      const files = fs.readdirSync(logsDir)
-        .filter(f => /^\d{4}-\d{2}-\d{2}\.md$/.test(f))
-        .sort()
-        .slice(-maxDays);
-      if (files.length) {
-        parts.push('\n\n[DAILY LOGS — ÚLTIMOS ' + files.length + ' DIAS]');
-        for (const file of files) {
-          const date = file.replace('.md', '');
-          const content = fs.readFileSync(path.join(logsDir, file), 'utf8');
-          // Truncate very large logs to avoid token overflow
-          const trimmed = content.length > 8000 ? content.slice(0, 8000) + '\n...(truncado)' : content;
-          parts.push(`\n--- LOG ${date} ---\n${trimmed}`);
-        }
+    const dm = new DataManager(workspaceDir, path.join(workspaceDir, 'logs'));
+    let generated = 0;
+    for (const item of items) {
+      try {
+        // Skip if embeddings already exist AND item is not a daily_log
+        // (daily logs get updated frequently, so always regenerate)
+        if (item.type !== 'daily_log' && dm.hasEmbeddings(item.type, item.id)) continue;
+        const count = await dm.generateEmbeddings(item.type, item.id, item.text);
+        generated += count;
+      } catch (err) {
+        console.error(`[embeddings] Failed for ${item.type}/${item.id}:`, err.message);
       }
     }
-  } catch (e) {
-    console.error('[context] Failed to read daily logs:', e.message);
+    if (generated > 0) console.log(`[embeddings] Generated ${generated} embedding chunks`);
+  } catch (err) {
+    console.error('[embeddings] Background generation failed:', err.message);
   }
+}
+// ---------------------------------------------------------------------------
+// Build structured data context (tasks, blockers, projects) — always compact
+// ---------------------------------------------------------------------------
+function buildStructuredContext() {
+  const parts = [];
-  // 2. Pending tasks from SQLite
+  // Pending tasks
   try {
     const tasks = dl.db.prepare("SELECT id, description, category, status, project_slug, created_at, due_date FROM tasks WHERE status = 'PENDING' ORDER BY created_at DESC LIMIT 50").all();
     if (tasks.length) {
-      parts.push('\n\n[TASKS PENDENTES — SQLite (' + tasks.length + ' tasks)]');
+      parts.push('\n[TASKS PENDENTES (' + tasks.length + ')]');
       for (const t of tasks) {
-        parts.push(`• [${t.category}] ${t.description} (projeto: ${t.project_slug || 'N/A'}, criado: ${t.created_at || '?'}${t.due_date ? ', prazo: ' + t.due_date : ''})`);
+        parts.push(`• [${t.category}] ${t.description} (projeto: ${t.project_slug || 'N/A'}${t.due_date ? ', prazo: ' + t.due_date : ''})`);
       }
     } else {
-      parts.push('\n\n[TASKS PENDENTES — SQLite: nenhuma task registrada]');
+      parts.push('\n[TASKS: nenhuma task pendente registrada no sistema]');
     }
-  } catch (e) {
-    parts.push('\n\n[TASKS: erro ao consultar SQLite — ' + e.message + ']');
-  }
+  } catch { /* ignore */ }
-  // 3. Open blockers from SQLite
+  // Open blockers
   try {
     const blockers = dl.db.prepare("SELECT id, title, severity, status, project_slug, owner, next_action, created_at FROM blockers WHERE status IN ('OPEN','MITIGATING') ORDER BY created_at DESC LIMIT 30").all();
     if (blockers.length) {
-      parts.push('\n\n[BLOCKERS ABERTOS — SQLite (' + blockers.length + ' blockers)]');
+      parts.push('\n[BLOCKERS ABERTOS (' + blockers.length + ')]');
       for (const b of blockers) {
-        parts.push(`• [${b.severity}] ${b.title} (projeto: ${b.project_slug || 'N/A'}, status: ${b.status}, owner: ${b.owner || '?'})`);
+        parts.push(`• [${b.severity}] ${b.title} (projeto: ${b.project_slug || 'N/A'}, owner: ${b.owner || '?'})`);
       }
     } else {
-      parts.push('\n\n[BLOCKERS ABERTOS — SQLite: nenhum blocker registrado]');
+      parts.push('\n[BLOCKERS: nenhum blocker aberto registrado no sistema]');
     }
-  } catch (e) {
-    parts.push('\n\n[BLOCKERS: erro ao consultar SQLite — ' + e.message + ']');
-  }
+  } catch { /* ignore */ }
-  // 4. Active projects
+  // Active projects
   try {
     const projects = dl.db.prepare("SELECT slug, client, name FROM projects WHERE is_active = 1 ORDER BY slug").all();
     if (projects.length) {
-      parts.push('\n\n[PROJETOS ATIVOS — SQLite (' + projects.length + ')]');
+      parts.push('\n[PROJETOS ATIVOS (' + projects.length + ')]');
       for (const p of projects) {
         parts.push(`• ${p.slug} — ${p.name || p.client || 'sem nome'}`);
       }
@@ -205,6 +204,177 @@ function buildDataContext(workspaceDir, maxDays) {
   return parts.join('\n');
 }
+// ---------------------------------------------------------------------------
+// Smart context builder: uses RAG when available, falls back to raw logs
+// ---------------------------------------------------------------------------
+async function buildSmartContext(workspaceDir, query) {
+  const parts = [];
+  const dm = new DataManager(workspaceDir, path.join(workspaceDir, 'logs'));
+  const TOKEN_BUDGET = 12000; // chars budget for log/RAG context
+  let usedBudget = 0;
+  // 1. Try semantic search first (if embeddings exist)
+  let ragUsed = false;
+  try {
+    const embCount = dm.getEmbeddingCount();
+    if (embCount > 0) {
+      const ragResults = await dm.semanticSearch(query, 15);
+      const relevant = ragResults.filter(r => r.score > 0.25);
+      if (relevant.length > 0) {
+        ragUsed = true;
+        parts.push('\n[CONTEXTO RELEVANTE — Busca Semântica]');
+        for (const r of relevant) {
+          const chunk = `\n--- ${r.reference_type} (${r.reference_id}) [relevância: ${Math.round(r.score * 100)}%] ---\n${r.text_chunk}`;
+          if (usedBudget + chunk.length > TOKEN_BUDGET) break;
+          parts.push(chunk);
+          usedBudget += chunk.length;
+        }
+      }
+    }
+  } catch (ragErr) {
+    console.error('[context] RAG search failed:', ragErr.message);
+  }
+  // 2. Fallback: if RAG not available or returned few results, include recent daily logs
+  if (!ragUsed || usedBudget < TOKEN_BUDGET / 3) {
+    try {
+      const logsDir = path.join(workspaceDir, 'logs', 'daily');
+      if (exists(logsDir)) {
+        const maxDays = ragUsed ? 3 : 5; // fewer if RAG already provided some context
+        const files = fs.readdirSync(logsDir)
+          .filter(f => /^\d{4}-\d{2}-\d{2}\.md$/.test(f))
+          .sort()
+          .slice(-maxDays);
+        if (files.length) {
+          parts.push('\n[DAILY LOGS — ÚLTIMOS ' + files.length + ' DIAS]');
+          for (const file of files) {
+            const date = file.replace('.md', '');
+            const content = fs.readFileSync(path.join(logsDir, file), 'utf8');
+            const maxPerLog = Math.floor((TOKEN_BUDGET - usedBudget) / files.length);
+            const trimmed = content.length > maxPerLog ? content.slice(0, maxPerLog) + '\n...(truncado)' : content;
+            parts.push(`\n--- LOG ${date} ---\n${trimmed}`);
+            usedBudget += trimmed.length;
+            if (usedBudget >= TOKEN_BUDGET) break;
+          }
+        }
+      }
+    } catch (e) {
+      console.error('[context] Failed to read daily logs:', e.message);
+    }
+  }
+  // 3. Always include structured data (compact, always useful)
+  parts.push(buildStructuredContext());
+  return parts.join('\n');
+}
+// ---------------------------------------------------------------------------
+// Background auto-ingest from chat: extracts tasks/blockers from conversation
+// ---------------------------------------------------------------------------
+const INGEST_SIGNALS = /\b(criar|crie|registr|task|tarefa|blocker|impediment|problem|urgente|preciso|agendar|schedule|delegat|prioriz|adicionar?|anotar?|lembr|reminder|todo|pendente|pendência)\b/i;
+const QUERY_ONLY = /^(o que|como|quando|qual|quais|quem|onde|por que|porque|quantos?|existe|tem |show|list|status|resumo|report|relatório|buscar?|search|find)/i;
+async function backgroundIngestFromChat(workspaceDir, userQuery) {
+  // Skip pure queries — only ingest actionable messages
+  if (!userQuery || userQuery.length < 25) return;
+  if (QUERY_ONLY.test(userQuery.trim()) && !INGEST_SIGNALS.test(userQuery)) return;
+  if (!INGEST_SIGNALS.test(userQuery)) return;
+  try {
+    const cmd = process.env.COPILOT_CMD || 'copilot';
+    // Build a minimal planner prompt
+    const schema = {
+      actions: [
+        { type: 'create_task', description: '<string>', priority: 'HIGH|MEDIUM|LOW', category: 'DO_NOW|SCHEDULE|DELEGATE|IGNORE', projectSlug: '<string optional>' },
+        { type: 'create_blocker', title: '<string>', severity: 'CRITICAL|HIGH|MEDIUM|LOW', notes: '<string>', projectSlug: '<string optional>' }
+      ]
+    };
+    const prompt = `Você é o planner do sistema F.R.E.Y.A.\n\nAnalise o texto abaixo e extraia APENAS tarefas e blockers explícitos.\nSe NÃO houver tarefas ou blockers claros, retorne: {"actions":[]}\nRetorne APENAS JSON válido no formato: ${JSON.stringify(schema)}\nNÃO use code fences. NÃO inclua texto extra.\n\nTEXTO:\n${userQuery}\n`;
+    const agentEnv = { FREYA_WORKSPACE_DIR: workspaceDir };
+    const baseArgs = ['-s', '--no-color', '--stream', 'off', '-p', prompt];
+    const r = await run(cmd, baseArgs, workspaceDir, agentEnv);
+    const out = (r.stdout + r.stderr).trim();
+    if (r.code !== 0 || !out) return;
+    // Try to parse JSON plan
+    const jsonText = extractFirstJsonObject(out) || out;
+    let plan;
+    try {
+      plan = JSON.parse(jsonText);
+    } catch {
+      try { plan = JSON.parse(escapeJsonControlChars(jsonText)); } catch { return; }
+    }
+    const actions = Array.isArray(plan.actions) ? plan.actions : [];
+    const taskActions = actions.filter(a => a && a.type === 'create_task' && a.description);
+    const blockerActions = actions.filter(a => a && a.type === 'create_blocker' && a.title);
+    if (!taskActions.length && !blockerActions.length) return;
+    // Apply actions directly to SQLite
+    const slugMap = readProjectSlugMap(workspaceDir);
+    const validTaskCats = new Set(['DO_NOW', 'SCHEDULE', 'DELEGATE', 'IGNORE']);
+    const insertTask = dl.db.prepare(`INSERT INTO tasks (id, project_slug, description, category, status, metadata) VALUES (?, ?, ?, ?, ?, ?)`);
+    const insertBlocker = dl.db.prepare(`INSERT INTO blockers (id, project_slug, title, severity, status, owner, next_action, metadata) VALUES (?, ?, ?, ?, ?, ?, ?, ?)`);
+    let tasksCreated = 0, blockersCreated = 0;
+    const embeddingQueue = [];
+    const ingestTx = dl.db.transaction(() => {
+      // Dedup check
+      const recentTasks = dl.db.prepare("SELECT description FROM tasks WHERE created_at >= datetime('now', '-1 day')").all();
+      const existingKeys = new Set(recentTasks.map(t => sha1(normalizeTextForKey(t.description))));
+      const recentBlockers = dl.db.prepare("SELECT title FROM blockers WHERE created_at >= datetime('now', '-1 day')").all();
+      const existingBKeys = new Set(recentBlockers.map(b => sha1(normalizeTextForKey(b.title))));
+      for (const a of taskActions) {
+        const desc = normalizeWhitespace(a.description);
+        if (!desc) continue;
+        const projectSlug = String(a.projectSlug || '').trim() || inferProjectSlug(desc, slugMap);
+        const key = sha1(normalizeTextForKey((projectSlug ? projectSlug + ' ' : '') + desc));
+        if (existingKeys.has(key)) continue;
+        const id = `t-${Date.now()}-${Math.random().toString(16).slice(2, 8)}`;
+        const category = validTaskCats.has(String(a.category || '').trim()) ? String(a.category).trim() : 'DO_NOW';
+        const metadata = JSON.stringify({ priority: a.priority || 'medium' });
+        insertTask.run(id, projectSlug || null, desc, category, 'PENDING', metadata);
+        existingKeys.add(key);
+        tasksCreated++;
+        embeddingQueue.push({ type: 'task', id, text: desc });
+      }
+      for (const a of blockerActions) {
+        const title = normalizeWhitespace(a.title);
+        if (!title) continue;
+        const projectSlug = String(a.projectSlug || '').trim() || inferProjectSlug(title, slugMap);
+        const key = sha1(normalizeTextForKey((projectSlug ? projectSlug + ' ' : '') + title));
+        if (existingBKeys.has(key)) continue;
+        const id = `b-${Date.now()}-${Math.random().toString(16).slice(2, 8)}`;
+        const severity = String(a.severity || 'MEDIUM').toUpperCase();
+        const metadata = JSON.stringify({ description: a.notes || title });
+        insertBlocker.run(id, projectSlug || null, title, severity, 'OPEN', null, null, metadata);
+        existingBKeys.add(key);
+        blockersCreated++;
+        embeddingQueue.push({ type: 'blocker', id, text: title + ' ' + (a.notes || '') });
+      }
+    });
+    ingestTx();
+    if (tasksCreated || blockersCreated) {
+      console.log(`[chat-ingest] Auto-ingested ${tasksCreated} tasks, ${blockersCreated} blockers from chat`);
+      // Generate embeddings in background
+      generateEmbeddingsBackground(workspaceDir, embeddingQueue).catch(() => {});
+    }
+  } catch (err) {
+    console.error('[chat-ingest] Background ingestion failed:', err.message);
+  }
+}
 function settingsPath(workspaceDir) {
   return path.join(workspaceDir, 'data', 'settings', 'settings.json');
 }
@@ -2864,8 +3034,15 @@ async function cmdWeb({ port, dir, open, dev }) {
   // Sync daily log .md files → SQLite daily_logs table on startup
   try {
-    const synced = syncDailyLogs(wsDir);
+    const { synced, toEmbed } = syncDailyLogs(wsDir);
     if (synced > 0) console.log(`[FREYA] Synced ${synced} daily logs to SQLite`);
+    // Generate embeddings in background (non-blocking, last 30 days max)
+    if (toEmbed.length > 0) {
+      const recentLogs = toEmbed.slice(-30).map(l => ({ type: 'daily_log', id: l.date, text: l.content }));
+      generateEmbeddingsBackground(wsDir, recentLogs).catch(err => {
+        console.error('[FREYA] Embedding generation failed (non-fatal):', err.message);
+      });
+    }
   } catch (e) {
     console.error('[FREYA] Warning: daily-logs sync failed:', e.message || String(e));
   }
@@ -3853,8 +4030,11 @@ async function cmdWeb({ port, dir, open, dev }) {
           // Sync this daily log file to SQLite so chat/RAG can find it
           try {
+            const logContent = fs.readFileSync(file, 'utf8');
             const upsert = dl.db.prepare(`INSERT INTO daily_logs (date, raw_markdown) VALUES (?, ?) ON CONFLICT(date) DO UPDATE SET raw_markdown = excluded.raw_markdown`);
-            upsert.run(d, fs.readFileSync(file, 'utf8'));
+            upsert.run(d, logContent);
+            // Regenerate embeddings for this log in background
+            generateEmbeddingsBackground(workspaceDir, [{ type: 'daily_log', id: d, text: logContent }]).catch(() => {});
           } catch (syncErr) {
             console.error('[inbox] Failed to sync daily log to SQLite:', syncErr.message);
           }
@@ -4220,6 +4400,8 @@ async function cmdWeb({ port, dir, open, dev }) {
                 insertTask.run(id, projectSlug || null, description, category, 'PENDING', metadata);
                 applied.tasks++;
+                if (!applied._embedQueue) applied._embedQueue = [];
+                applied._embedQueue.push({ type: 'task', id, text: description });
                 existingTaskKeys24h.add(key); // prevent duplicates within same batch
                 continue;
               }
@@ -4240,6 +4422,8 @@ async function cmdWeb({ port, dir, open, dev }) {
                 insertBlocker.run(id, projectSlug || null, title, severity, 'OPEN', null, null, metadata);
                 applied.blockers++;
+                if (!applied._embedQueue) applied._embedQueue = [];
+                applied._embedQueue.push({ type: 'blocker', id, text: title + ' ' + (notes || '') });
                 existingBlockerKeys24h.add(key); // prevent duplicates within same batch
                 continue;
               }
@@ -4260,6 +4444,12 @@ async function cmdWeb({ port, dir, open, dev }) {
           applyTx(actions);
+          // Generate embeddings for newly created tasks/blockers (background, non-blocking)
+          if (applied._embedQueue && applied._embedQueue.length > 0) {
+            generateEmbeddingsBackground(workspaceDir, applied._embedQueue).catch(() => {});
+            delete applied._embedQueue; // don't send internal queue in response
+          }
           // Auto-suggest reports when planner didn't include any
           if (!applied.reportsSuggested.length) {
             const sug = [];
@@ -4403,23 +4593,8 @@ async function cmdWeb({ port, dir, open, dev }) {
           // Ensure daily logs are synced to SQLite before querying
           try { syncDailyLogs(workspaceDir); } catch { /* non-fatal */ }
-          // Build real data context from SQLite + daily log files
-          const dataContext = buildDataContext(workspaceDir, 7);
-          // V2 RAG Context (graceful fallback if embedder/sharp not available)
-          const dm = new DataManager(workspaceDir, path.join(workspaceDir, 'logs'));
-          let ragContext = '';
-          try {
-            const ragResults = await dm.semanticSearch(query, 12);
-            if (ragResults.length > 0) {
-              ragContext = '\n\n[MEMÓRIA DE LONGO PRAZO RECUPERADA (RAG VIA SQLITE)]\n';
-              for (const r of ragResults) {
-                ragContext += `\n---\nFONTE: ${r.reference_type} -> ID: ${r.reference_id} (Score: ${r.score.toFixed(3)})\nCONTEÚDO:\n${r.text_chunk}\n`;
-              }
-            }
-          } catch (ragErr) {
-            console.error('[oracle] RAG search failed (embedder/sharp unavailable), continuing without context:', ragErr.message);
-          }
+          // Build smart context: RAG (if embeddings exist) + fallback to raw logs + structured data
+          const dataContext = await buildSmartContext(workspaceDir, query);
           // Build image context for the prompt (Copilot reads files via --allow-all-tools)
           let imageContext = '';
@@ -4446,7 +4621,7 @@ REGRAS ABSOLUTAS:
 DADOS REAIS DO WORKSPACE (use estes dados para responder):
 ${dataContext}
-${ragContext}${imageContext}`;
+${imageContext}`;
           const cmd = process.env.COPILOT_CMD || 'copilot';
@@ -4483,7 +4658,13 @@ ${ragContext}${imageContext}`;
             if (r.code !== 0) {
               return safeJson(res, 200, { ok: false, answer: 'Falha no processamento do agente FREYA:\n' + (out || 'Exit code != 0'), sessionId });
             }
-            return safeJson(res, 200, { ok: true, answer: out, sessionId });
+            // Send response immediately
+            safeJson(res, 200, { ok: true, answer: out, sessionId });
+            // Fire-and-forget: auto-ingest tasks/blockers from user message
+            backgroundIngestFromChat(workspaceDir, query).catch(err => {
+              console.error('[chat-ingest] Background failed:', err.message);
+            });
+            return;
           } catch (e) {
             return safeJson(res, 200, {
               ok: false,

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@cccarv82/freya",
-  "version": "3.5.2",
+  "version": "3.6.0",
   "description": "Personal AI Assistant with local-first persistence",
   "scripts": {
     "health": "node scripts/validate-data.js && node scripts/validate-structure.js",

package/scripts/lib/DataLayer.js CHANGED Viewed

@@ -325,6 +325,12 @@ class DataLayer {
         embedding BLOB NOT NULL, /* Stored as Buffer of Float32Array */
         created_at DATETIME DEFAULT CURRENT_TIMESTAMP
       );
+      CREATE UNIQUE INDEX IF NOT EXISTS idx_doc_emb_ref
+        ON document_embeddings(reference_type, reference_id, chunk_index);
+      CREATE INDEX IF NOT EXISTS idx_doc_emb_type
+        ON document_embeddings(reference_type);
     `);
     // --- Migrations for existing databases ---

package/scripts/lib/DataManager.js CHANGED Viewed

@@ -229,6 +229,95 @@ class DataManager {
         return NaN;
     }
+    // --- Embedding Generation ---
+    /**
+     * Split text into chunks suitable for embedding (~400-600 chars each).
+     * Splits on markdown headings, then paragraphs, then sentences.
+     */
+    chunkText(text, maxChunkSize = 500) {
+        if (!text || text.length <= maxChunkSize) return [text].filter(Boolean);
+        const chunks = [];
+        // First split on markdown ## headings
+        const sections = text.split(/(?=^## )/m).filter(s => s.trim());
+        for (const section of sections) {
+            if (section.length <= maxChunkSize) {
+                chunks.push(section.trim());
+                continue;
+            }
+            // Split long sections on double newlines (paragraphs)
+            const paragraphs = section.split(/\n\n+/).filter(p => p.trim());
+            let buffer = '';
+            for (const para of paragraphs) {
+                if (buffer.length + para.length + 2 > maxChunkSize && buffer) {
+                    chunks.push(buffer.trim());
+                    buffer = '';
+                }
+                buffer += (buffer ? '\n\n' : '') + para;
+            }
+            if (buffer.trim()) chunks.push(buffer.trim());
+        }
+        return chunks.filter(c => c.length > 10); // skip tiny fragments
+    }
+    /**
+     * Generate embeddings for a piece of content and store in document_embeddings.
+     * Deletes existing embeddings for (referenceType, referenceId) first to avoid stale data.
+     * @param {string} referenceType - 'daily_log', 'task', or 'blocker'
+     * @param {string} referenceId - unique ID (date for logs, task/blocker id)
+     * @param {string} text - content to embed
+     */
+    async generateEmbeddings(referenceType, referenceId, text) {
+        if (!text || !text.trim()) return 0;
+        const chunks = this.chunkText(text);
+        if (!chunks.length) return 0;
+        // Delete existing embeddings for this reference
+        dl.db.prepare('DELETE FROM document_embeddings WHERE reference_type = ? AND reference_id = ?')
+            .run(referenceType, referenceId);
+        const insert = dl.db.prepare(`
+            INSERT INTO document_embeddings (reference_type, reference_id, chunk_index, text_chunk, embedding)
+            VALUES (?, ?, ?, ?, ?)
+        `);
+        let count = 0;
+        for (let i = 0; i < chunks.length; i++) {
+            try {
+                const vector = await defaultEmbedder.embedText(chunks[i]);
+                const buffer = defaultEmbedder.vectorToBuffer(vector);
+                insert.run(referenceType, referenceId, i, chunks[i], buffer);
+                count++;
+            } catch (err) {
+                console.error(`[embeddings] Failed to embed chunk ${i} of ${referenceType}/${referenceId}:`, err.message);
+            }
+        }
+        return count;
+    }
+    /**
+     * Check if embeddings exist and are up-to-date for a reference.
+     * @returns {boolean} true if embeddings exist
+     */
+    hasEmbeddings(referenceType, referenceId) {
+        const row = dl.db.prepare(
+            'SELECT COUNT(*) as c FROM document_embeddings WHERE reference_type = ? AND reference_id = ?'
+        ).get(referenceType, referenceId);
+        return row && row.c > 0;
+    }
+    /**
+     * Get total embedding count (for checking if RAG is available).
+     */
+    getEmbeddingCount() {
+        const row = dl.db.prepare('SELECT COUNT(*) as c FROM document_embeddings').get();
+        return row ? row.c : 0;
+    }
     // --- RAG (Vector Search) ---
     async semanticSearch(query, topK = 10) {
         const queryVector = await defaultEmbedder.embedText(query);