@cccarv82/freya 3.5.2 → 3.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/cli/web.js CHANGED
@@ -104,9 +104,9 @@ function newestFile(dir, prefix) {
104
104
  function syncDailyLogs(workspaceDir) {
105
105
  try {
106
106
  const logsDir = path.join(workspaceDir, 'logs', 'daily');
107
- if (!exists(logsDir)) return 0;
107
+ if (!exists(logsDir)) return { synced: 0, toEmbed: [] };
108
108
  const files = fs.readdirSync(logsDir).filter(f => /^\d{4}-\d{2}-\d{2}\.md$/.test(f));
109
- if (!files.length) return 0;
109
+ if (!files.length) return { synced: 0, toEmbed: [] };
110
110
 
111
111
  const upsert = dl.db.prepare(`
112
112
  INSERT INTO daily_logs (date, raw_markdown) VALUES (?, ?)
@@ -114,88 +114,87 @@ function syncDailyLogs(workspaceDir) {
114
114
  `);
115
115
 
116
116
  let synced = 0;
117
+ const toEmbed = []; // collect logs that need embedding
117
118
  const tx = dl.db.transaction((fileList) => {
118
119
  for (const file of fileList) {
119
120
  const date = file.replace('.md', '');
120
121
  const content = fs.readFileSync(path.join(logsDir, file), 'utf8');
121
122
  upsert.run(date, content);
123
+ toEmbed.push({ date, content });
122
124
  synced++;
123
125
  }
124
126
  });
125
127
  tx(files);
126
- return synced;
128
+ return { synced, toEmbed };
127
129
  } catch (e) {
128
130
  console.error('[sync] Daily-logs sync failed:', e.message);
129
- return 0;
131
+ return { synced: 0, toEmbed: [] };
130
132
  }
131
133
  }
132
134
 
133
135
  // ---------------------------------------------------------------------------
134
- // Build real data context for Orchestrator (chat) feeds SQLite + daily logs
135
- // as plain-text so the LLM has actual data to synthesize answers from
136
+ // Background embedding generationruns async, never blocks
136
137
  // ---------------------------------------------------------------------------
137
- function buildDataContext(workspaceDir, maxDays) {
138
- maxDays = maxDays || 7;
139
- const parts = [];
140
-
141
- // 1. Recent daily logs (from filesystem — most up-to-date source)
138
+ async function generateEmbeddingsBackground(workspaceDir, items) {
139
+ // items = [{ type: 'daily_log'|'task'|'blocker', id: string, text: string }]
140
+ if (!items || !items.length) return;
142
141
  try {
143
- const logsDir = path.join(workspaceDir, 'logs', 'daily');
144
- if (exists(logsDir)) {
145
- const files = fs.readdirSync(logsDir)
146
- .filter(f => /^\d{4}-\d{2}-\d{2}\.md$/.test(f))
147
- .sort()
148
- .slice(-maxDays);
149
- if (files.length) {
150
- parts.push('\n\n[DAILY LOGS ÚLTIMOS ' + files.length + ' DIAS]');
151
- for (const file of files) {
152
- const date = file.replace('.md', '');
153
- const content = fs.readFileSync(path.join(logsDir, file), 'utf8');
154
- // Truncate very large logs to avoid token overflow
155
- const trimmed = content.length > 8000 ? content.slice(0, 8000) + '\n...(truncado)' : content;
156
- parts.push(`\n--- LOG ${date} ---\n${trimmed}`);
157
- }
142
+ const dm = new DataManager(workspaceDir, path.join(workspaceDir, 'logs'));
143
+ let generated = 0;
144
+ for (const item of items) {
145
+ try {
146
+ // Skip if embeddings already exist AND item is not a daily_log
147
+ // (daily logs get updated frequently, so always regenerate)
148
+ if (item.type !== 'daily_log' && dm.hasEmbeddings(item.type, item.id)) continue;
149
+ const count = await dm.generateEmbeddings(item.type, item.id, item.text);
150
+ generated += count;
151
+ } catch (err) {
152
+ console.error(`[embeddings] Failed for ${item.type}/${item.id}:`, err.message);
158
153
  }
159
154
  }
160
- } catch (e) {
161
- console.error('[context] Failed to read daily logs:', e.message);
155
+ if (generated > 0) console.log(`[embeddings] Generated ${generated} embedding chunks`);
156
+ } catch (err) {
157
+ console.error('[embeddings] Background generation failed:', err.message);
162
158
  }
159
+ }
160
+
161
+ // ---------------------------------------------------------------------------
162
+ // Build structured data context (tasks, blockers, projects) — always compact
163
+ // ---------------------------------------------------------------------------
164
+ function buildStructuredContext() {
165
+ const parts = [];
163
166
 
164
- // 2. Pending tasks from SQLite
167
+ // Pending tasks
165
168
  try {
166
169
  const tasks = dl.db.prepare("SELECT id, description, category, status, project_slug, created_at, due_date FROM tasks WHERE status = 'PENDING' ORDER BY created_at DESC LIMIT 50").all();
167
170
  if (tasks.length) {
168
- parts.push('\n\n[TASKS PENDENTES — SQLite (' + tasks.length + ' tasks)]');
171
+ parts.push('\n[TASKS PENDENTES (' + tasks.length + ')]');
169
172
  for (const t of tasks) {
170
- parts.push(`• [${t.category}] ${t.description} (projeto: ${t.project_slug || 'N/A'}, criado: ${t.created_at || '?'}${t.due_date ? ', prazo: ' + t.due_date : ''})`);
173
+ parts.push(`• [${t.category}] ${t.description} (projeto: ${t.project_slug || 'N/A'}${t.due_date ? ', prazo: ' + t.due_date : ''})`);
171
174
  }
172
175
  } else {
173
- parts.push('\n\n[TASKS PENDENTES — SQLite: nenhuma task registrada]');
176
+ parts.push('\n[TASKS: nenhuma task pendente registrada no sistema]');
174
177
  }
175
- } catch (e) {
176
- parts.push('\n\n[TASKS: erro ao consultar SQLite — ' + e.message + ']');
177
- }
178
+ } catch { /* ignore */ }
178
179
 
179
- // 3. Open blockers from SQLite
180
+ // Open blockers
180
181
  try {
181
182
  const blockers = dl.db.prepare("SELECT id, title, severity, status, project_slug, owner, next_action, created_at FROM blockers WHERE status IN ('OPEN','MITIGATING') ORDER BY created_at DESC LIMIT 30").all();
182
183
  if (blockers.length) {
183
- parts.push('\n\n[BLOCKERS ABERTOS — SQLite (' + blockers.length + ' blockers)]');
184
+ parts.push('\n[BLOCKERS ABERTOS (' + blockers.length + ')]');
184
185
  for (const b of blockers) {
185
- parts.push(`• [${b.severity}] ${b.title} (projeto: ${b.project_slug || 'N/A'}, status: ${b.status}, owner: ${b.owner || '?'})`);
186
+ parts.push(`• [${b.severity}] ${b.title} (projeto: ${b.project_slug || 'N/A'}, owner: ${b.owner || '?'})`);
186
187
  }
187
188
  } else {
188
- parts.push('\n\n[BLOCKERS ABERTOS — SQLite: nenhum blocker registrado]');
189
+ parts.push('\n[BLOCKERS: nenhum blocker aberto registrado no sistema]');
189
190
  }
190
- } catch (e) {
191
- parts.push('\n\n[BLOCKERS: erro ao consultar SQLite — ' + e.message + ']');
192
- }
191
+ } catch { /* ignore */ }
193
192
 
194
- // 4. Active projects
193
+ // Active projects
195
194
  try {
196
195
  const projects = dl.db.prepare("SELECT slug, client, name FROM projects WHERE is_active = 1 ORDER BY slug").all();
197
196
  if (projects.length) {
198
- parts.push('\n\n[PROJETOS ATIVOS — SQLite (' + projects.length + ')]');
197
+ parts.push('\n[PROJETOS ATIVOS (' + projects.length + ')]');
199
198
  for (const p of projects) {
200
199
  parts.push(`• ${p.slug} — ${p.name || p.client || 'sem nome'}`);
201
200
  }
@@ -205,6 +204,177 @@ function buildDataContext(workspaceDir, maxDays) {
205
204
  return parts.join('\n');
206
205
  }
207
206
 
207
+ // ---------------------------------------------------------------------------
208
+ // Smart context builder: uses RAG when available, falls back to raw logs
209
+ // ---------------------------------------------------------------------------
210
+ async function buildSmartContext(workspaceDir, query) {
211
+ const parts = [];
212
+ const dm = new DataManager(workspaceDir, path.join(workspaceDir, 'logs'));
213
+ const TOKEN_BUDGET = 12000; // chars budget for log/RAG context
214
+ let usedBudget = 0;
215
+
216
+ // 1. Try semantic search first (if embeddings exist)
217
+ let ragUsed = false;
218
+ try {
219
+ const embCount = dm.getEmbeddingCount();
220
+ if (embCount > 0) {
221
+ const ragResults = await dm.semanticSearch(query, 15);
222
+ const relevant = ragResults.filter(r => r.score > 0.25);
223
+ if (relevant.length > 0) {
224
+ ragUsed = true;
225
+ parts.push('\n[CONTEXTO RELEVANTE — Busca Semântica]');
226
+ for (const r of relevant) {
227
+ const chunk = `\n--- ${r.reference_type} (${r.reference_id}) [relevância: ${Math.round(r.score * 100)}%] ---\n${r.text_chunk}`;
228
+ if (usedBudget + chunk.length > TOKEN_BUDGET) break;
229
+ parts.push(chunk);
230
+ usedBudget += chunk.length;
231
+ }
232
+ }
233
+ }
234
+ } catch (ragErr) {
235
+ console.error('[context] RAG search failed:', ragErr.message);
236
+ }
237
+
238
+ // 2. Fallback: if RAG not available or returned few results, include recent daily logs
239
+ if (!ragUsed || usedBudget < TOKEN_BUDGET / 3) {
240
+ try {
241
+ const logsDir = path.join(workspaceDir, 'logs', 'daily');
242
+ if (exists(logsDir)) {
243
+ const maxDays = ragUsed ? 3 : 5; // fewer if RAG already provided some context
244
+ const files = fs.readdirSync(logsDir)
245
+ .filter(f => /^\d{4}-\d{2}-\d{2}\.md$/.test(f))
246
+ .sort()
247
+ .slice(-maxDays);
248
+ if (files.length) {
249
+ parts.push('\n[DAILY LOGS — ÚLTIMOS ' + files.length + ' DIAS]');
250
+ for (const file of files) {
251
+ const date = file.replace('.md', '');
252
+ const content = fs.readFileSync(path.join(logsDir, file), 'utf8');
253
+ const maxPerLog = Math.floor((TOKEN_BUDGET - usedBudget) / files.length);
254
+ const trimmed = content.length > maxPerLog ? content.slice(0, maxPerLog) + '\n...(truncado)' : content;
255
+ parts.push(`\n--- LOG ${date} ---\n${trimmed}`);
256
+ usedBudget += trimmed.length;
257
+ if (usedBudget >= TOKEN_BUDGET) break;
258
+ }
259
+ }
260
+ }
261
+ } catch (e) {
262
+ console.error('[context] Failed to read daily logs:', e.message);
263
+ }
264
+ }
265
+
266
+ // 3. Always include structured data (compact, always useful)
267
+ parts.push(buildStructuredContext());
268
+
269
+ return parts.join('\n');
270
+ }
271
+
272
+ // ---------------------------------------------------------------------------
273
+ // Background auto-ingest from chat: extracts tasks/blockers from conversation
274
+ // ---------------------------------------------------------------------------
275
+ const INGEST_SIGNALS = /\b(criar|crie|registr|task|tarefa|blocker|impediment|problem|urgente|preciso|agendar|schedule|delegat|prioriz|adicionar?|anotar?|lembr|reminder|todo|pendente|pendência)\b/i;
276
+ const QUERY_ONLY = /^(o que|como|quando|qual|quais|quem|onde|por que|porque|quantos?|existe|tem |show|list|status|resumo|report|relatório|buscar?|search|find)/i;
277
+
278
+ async function backgroundIngestFromChat(workspaceDir, userQuery) {
279
+ // Skip pure queries — only ingest actionable messages
280
+ if (!userQuery || userQuery.length < 25) return;
281
+ if (QUERY_ONLY.test(userQuery.trim()) && !INGEST_SIGNALS.test(userQuery)) return;
282
+ if (!INGEST_SIGNALS.test(userQuery)) return;
283
+
284
+ try {
285
+ const cmd = process.env.COPILOT_CMD || 'copilot';
286
+
287
+ // Build a minimal planner prompt
288
+ const schema = {
289
+ actions: [
290
+ { type: 'create_task', description: '<string>', priority: 'HIGH|MEDIUM|LOW', category: 'DO_NOW|SCHEDULE|DELEGATE|IGNORE', projectSlug: '<string optional>' },
291
+ { type: 'create_blocker', title: '<string>', severity: 'CRITICAL|HIGH|MEDIUM|LOW', notes: '<string>', projectSlug: '<string optional>' }
292
+ ]
293
+ };
294
+ const prompt = `Você é o planner do sistema F.R.E.Y.A.\n\nAnalise o texto abaixo e extraia APENAS tarefas e blockers explícitos.\nSe NÃO houver tarefas ou blockers claros, retorne: {"actions":[]}\nRetorne APENAS JSON válido no formato: ${JSON.stringify(schema)}\nNÃO use code fences. NÃO inclua texto extra.\n\nTEXTO:\n${userQuery}\n`;
295
+
296
+ const agentEnv = { FREYA_WORKSPACE_DIR: workspaceDir };
297
+ const baseArgs = ['-s', '--no-color', '--stream', 'off', '-p', prompt];
298
+
299
+ const r = await run(cmd, baseArgs, workspaceDir, agentEnv);
300
+ const out = (r.stdout + r.stderr).trim();
301
+ if (r.code !== 0 || !out) return;
302
+
303
+ // Try to parse JSON plan
304
+ const jsonText = extractFirstJsonObject(out) || out;
305
+ let plan;
306
+ try {
307
+ plan = JSON.parse(jsonText);
308
+ } catch {
309
+ try { plan = JSON.parse(escapeJsonControlChars(jsonText)); } catch { return; }
310
+ }
311
+
312
+ const actions = Array.isArray(plan.actions) ? plan.actions : [];
313
+ const taskActions = actions.filter(a => a && a.type === 'create_task' && a.description);
314
+ const blockerActions = actions.filter(a => a && a.type === 'create_blocker' && a.title);
315
+
316
+ if (!taskActions.length && !blockerActions.length) return;
317
+
318
+ // Apply actions directly to SQLite
319
+ const slugMap = readProjectSlugMap(workspaceDir);
320
+ const validTaskCats = new Set(['DO_NOW', 'SCHEDULE', 'DELEGATE', 'IGNORE']);
321
+ const insertTask = dl.db.prepare(`INSERT INTO tasks (id, project_slug, description, category, status, metadata) VALUES (?, ?, ?, ?, ?, ?)`);
322
+ const insertBlocker = dl.db.prepare(`INSERT INTO blockers (id, project_slug, title, severity, status, owner, next_action, metadata) VALUES (?, ?, ?, ?, ?, ?, ?, ?)`);
323
+
324
+ let tasksCreated = 0, blockersCreated = 0;
325
+ const embeddingQueue = [];
326
+
327
+ const ingestTx = dl.db.transaction(() => {
328
+ // Dedup check
329
+ const recentTasks = dl.db.prepare("SELECT description FROM tasks WHERE created_at >= datetime('now', '-1 day')").all();
330
+ const existingKeys = new Set(recentTasks.map(t => sha1(normalizeTextForKey(t.description))));
331
+ const recentBlockers = dl.db.prepare("SELECT title FROM blockers WHERE created_at >= datetime('now', '-1 day')").all();
332
+ const existingBKeys = new Set(recentBlockers.map(b => sha1(normalizeTextForKey(b.title))));
333
+
334
+ for (const a of taskActions) {
335
+ const desc = normalizeWhitespace(a.description);
336
+ if (!desc) continue;
337
+ const projectSlug = String(a.projectSlug || '').trim() || inferProjectSlug(desc, slugMap);
338
+ const key = sha1(normalizeTextForKey((projectSlug ? projectSlug + ' ' : '') + desc));
339
+ if (existingKeys.has(key)) continue;
340
+
341
+ const id = `t-${Date.now()}-${Math.random().toString(16).slice(2, 8)}`;
342
+ const category = validTaskCats.has(String(a.category || '').trim()) ? String(a.category).trim() : 'DO_NOW';
343
+ const metadata = JSON.stringify({ priority: a.priority || 'medium' });
344
+ insertTask.run(id, projectSlug || null, desc, category, 'PENDING', metadata);
345
+ existingKeys.add(key);
346
+ tasksCreated++;
347
+ embeddingQueue.push({ type: 'task', id, text: desc });
348
+ }
349
+
350
+ for (const a of blockerActions) {
351
+ const title = normalizeWhitespace(a.title);
352
+ if (!title) continue;
353
+ const projectSlug = String(a.projectSlug || '').trim() || inferProjectSlug(title, slugMap);
354
+ const key = sha1(normalizeTextForKey((projectSlug ? projectSlug + ' ' : '') + title));
355
+ if (existingBKeys.has(key)) continue;
356
+
357
+ const id = `b-${Date.now()}-${Math.random().toString(16).slice(2, 8)}`;
358
+ const severity = String(a.severity || 'MEDIUM').toUpperCase();
359
+ const metadata = JSON.stringify({ description: a.notes || title });
360
+ insertBlocker.run(id, projectSlug || null, title, severity, 'OPEN', null, null, metadata);
361
+ existingBKeys.add(key);
362
+ blockersCreated++;
363
+ embeddingQueue.push({ type: 'blocker', id, text: title + ' ' + (a.notes || '') });
364
+ }
365
+ });
366
+ ingestTx();
367
+
368
+ if (tasksCreated || blockersCreated) {
369
+ console.log(`[chat-ingest] Auto-ingested ${tasksCreated} tasks, ${blockersCreated} blockers from chat`);
370
+ // Generate embeddings in background
371
+ generateEmbeddingsBackground(workspaceDir, embeddingQueue).catch(() => {});
372
+ }
373
+ } catch (err) {
374
+ console.error('[chat-ingest] Background ingestion failed:', err.message);
375
+ }
376
+ }
377
+
208
378
  function settingsPath(workspaceDir) {
209
379
  return path.join(workspaceDir, 'data', 'settings', 'settings.json');
210
380
  }
@@ -2864,8 +3034,15 @@ async function cmdWeb({ port, dir, open, dev }) {
2864
3034
 
2865
3035
  // Sync daily log .md files → SQLite daily_logs table on startup
2866
3036
  try {
2867
- const synced = syncDailyLogs(wsDir);
3037
+ const { synced, toEmbed } = syncDailyLogs(wsDir);
2868
3038
  if (synced > 0) console.log(`[FREYA] Synced ${synced} daily logs to SQLite`);
3039
+ // Generate embeddings in background (non-blocking, last 30 days max)
3040
+ if (toEmbed.length > 0) {
3041
+ const recentLogs = toEmbed.slice(-30).map(l => ({ type: 'daily_log', id: l.date, text: l.content }));
3042
+ generateEmbeddingsBackground(wsDir, recentLogs).catch(err => {
3043
+ console.error('[FREYA] Embedding generation failed (non-fatal):', err.message);
3044
+ });
3045
+ }
2869
3046
  } catch (e) {
2870
3047
  console.error('[FREYA] Warning: daily-logs sync failed:', e.message || String(e));
2871
3048
  }
@@ -3853,8 +4030,11 @@ async function cmdWeb({ port, dir, open, dev }) {
3853
4030
 
3854
4031
  // Sync this daily log file to SQLite so chat/RAG can find it
3855
4032
  try {
4033
+ const logContent = fs.readFileSync(file, 'utf8');
3856
4034
  const upsert = dl.db.prepare(`INSERT INTO daily_logs (date, raw_markdown) VALUES (?, ?) ON CONFLICT(date) DO UPDATE SET raw_markdown = excluded.raw_markdown`);
3857
- upsert.run(d, fs.readFileSync(file, 'utf8'));
4035
+ upsert.run(d, logContent);
4036
+ // Regenerate embeddings for this log in background
4037
+ generateEmbeddingsBackground(workspaceDir, [{ type: 'daily_log', id: d, text: logContent }]).catch(() => {});
3858
4038
  } catch (syncErr) {
3859
4039
  console.error('[inbox] Failed to sync daily log to SQLite:', syncErr.message);
3860
4040
  }
@@ -4220,6 +4400,8 @@ async function cmdWeb({ port, dir, open, dev }) {
4220
4400
  insertTask.run(id, projectSlug || null, description, category, 'PENDING', metadata);
4221
4401
 
4222
4402
  applied.tasks++;
4403
+ if (!applied._embedQueue) applied._embedQueue = [];
4404
+ applied._embedQueue.push({ type: 'task', id, text: description });
4223
4405
  existingTaskKeys24h.add(key); // prevent duplicates within same batch
4224
4406
  continue;
4225
4407
  }
@@ -4240,6 +4422,8 @@ async function cmdWeb({ port, dir, open, dev }) {
4240
4422
  insertBlocker.run(id, projectSlug || null, title, severity, 'OPEN', null, null, metadata);
4241
4423
 
4242
4424
  applied.blockers++;
4425
+ if (!applied._embedQueue) applied._embedQueue = [];
4426
+ applied._embedQueue.push({ type: 'blocker', id, text: title + ' ' + (notes || '') });
4243
4427
  existingBlockerKeys24h.add(key); // prevent duplicates within same batch
4244
4428
  continue;
4245
4429
  }
@@ -4260,6 +4444,12 @@ async function cmdWeb({ port, dir, open, dev }) {
4260
4444
 
4261
4445
  applyTx(actions);
4262
4446
 
4447
+ // Generate embeddings for newly created tasks/blockers (background, non-blocking)
4448
+ if (applied._embedQueue && applied._embedQueue.length > 0) {
4449
+ generateEmbeddingsBackground(workspaceDir, applied._embedQueue).catch(() => {});
4450
+ delete applied._embedQueue; // don't send internal queue in response
4451
+ }
4452
+
4263
4453
  // Auto-suggest reports when planner didn't include any
4264
4454
  if (!applied.reportsSuggested.length) {
4265
4455
  const sug = [];
@@ -4403,23 +4593,8 @@ async function cmdWeb({ port, dir, open, dev }) {
4403
4593
  // Ensure daily logs are synced to SQLite before querying
4404
4594
  try { syncDailyLogs(workspaceDir); } catch { /* non-fatal */ }
4405
4595
 
4406
- // Build real data context from SQLite + daily log files
4407
- const dataContext = buildDataContext(workspaceDir, 7);
4408
-
4409
- // V2 RAG Context (graceful fallback if embedder/sharp not available)
4410
- const dm = new DataManager(workspaceDir, path.join(workspaceDir, 'logs'));
4411
- let ragContext = '';
4412
- try {
4413
- const ragResults = await dm.semanticSearch(query, 12);
4414
- if (ragResults.length > 0) {
4415
- ragContext = '\n\n[MEMÓRIA DE LONGO PRAZO RECUPERADA (RAG VIA SQLITE)]\n';
4416
- for (const r of ragResults) {
4417
- ragContext += `\n---\nFONTE: ${r.reference_type} -> ID: ${r.reference_id} (Score: ${r.score.toFixed(3)})\nCONTEÚDO:\n${r.text_chunk}\n`;
4418
- }
4419
- }
4420
- } catch (ragErr) {
4421
- console.error('[oracle] RAG search failed (embedder/sharp unavailable), continuing without context:', ragErr.message);
4422
- }
4596
+ // Build smart context: RAG (if embeddings exist) + fallback to raw logs + structured data
4597
+ const dataContext = await buildSmartContext(workspaceDir, query);
4423
4598
 
4424
4599
  // Build image context for the prompt (Copilot reads files via --allow-all-tools)
4425
4600
  let imageContext = '';
@@ -4446,7 +4621,7 @@ REGRAS ABSOLUTAS:
4446
4621
 
4447
4622
  DADOS REAIS DO WORKSPACE (use estes dados para responder):
4448
4623
  ${dataContext}
4449
- ${ragContext}${imageContext}`;
4624
+ ${imageContext}`;
4450
4625
 
4451
4626
  const cmd = process.env.COPILOT_CMD || 'copilot';
4452
4627
 
@@ -4483,7 +4658,13 @@ ${ragContext}${imageContext}`;
4483
4658
  if (r.code !== 0) {
4484
4659
  return safeJson(res, 200, { ok: false, answer: 'Falha no processamento do agente FREYA:\n' + (out || 'Exit code != 0'), sessionId });
4485
4660
  }
4486
- return safeJson(res, 200, { ok: true, answer: out, sessionId });
4661
+ // Send response immediately
4662
+ safeJson(res, 200, { ok: true, answer: out, sessionId });
4663
+ // Fire-and-forget: auto-ingest tasks/blockers from user message
4664
+ backgroundIngestFromChat(workspaceDir, query).catch(err => {
4665
+ console.error('[chat-ingest] Background failed:', err.message);
4666
+ });
4667
+ return;
4487
4668
  } catch (e) {
4488
4669
  return safeJson(res, 200, {
4489
4670
  ok: false,
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@cccarv82/freya",
3
- "version": "3.5.2",
3
+ "version": "3.6.0",
4
4
  "description": "Personal AI Assistant with local-first persistence",
5
5
  "scripts": {
6
6
  "health": "node scripts/validate-data.js && node scripts/validate-structure.js",
@@ -325,6 +325,12 @@ class DataLayer {
325
325
  embedding BLOB NOT NULL, /* Stored as Buffer of Float32Array */
326
326
  created_at DATETIME DEFAULT CURRENT_TIMESTAMP
327
327
  );
328
+
329
+ CREATE UNIQUE INDEX IF NOT EXISTS idx_doc_emb_ref
330
+ ON document_embeddings(reference_type, reference_id, chunk_index);
331
+
332
+ CREATE INDEX IF NOT EXISTS idx_doc_emb_type
333
+ ON document_embeddings(reference_type);
328
334
  `);
329
335
 
330
336
  // --- Migrations for existing databases ---
@@ -229,6 +229,95 @@ class DataManager {
229
229
  return NaN;
230
230
  }
231
231
 
232
+ // --- Embedding Generation ---
233
+
234
+ /**
235
+ * Split text into chunks suitable for embedding (~400-600 chars each).
236
+ * Splits on markdown headings, then paragraphs, then sentences.
237
+ */
238
+ chunkText(text, maxChunkSize = 500) {
239
+ if (!text || text.length <= maxChunkSize) return [text].filter(Boolean);
240
+
241
+ const chunks = [];
242
+ // First split on markdown ## headings
243
+ const sections = text.split(/(?=^## )/m).filter(s => s.trim());
244
+
245
+ for (const section of sections) {
246
+ if (section.length <= maxChunkSize) {
247
+ chunks.push(section.trim());
248
+ continue;
249
+ }
250
+ // Split long sections on double newlines (paragraphs)
251
+ const paragraphs = section.split(/\n\n+/).filter(p => p.trim());
252
+ let buffer = '';
253
+ for (const para of paragraphs) {
254
+ if (buffer.length + para.length + 2 > maxChunkSize && buffer) {
255
+ chunks.push(buffer.trim());
256
+ buffer = '';
257
+ }
258
+ buffer += (buffer ? '\n\n' : '') + para;
259
+ }
260
+ if (buffer.trim()) chunks.push(buffer.trim());
261
+ }
262
+
263
+ return chunks.filter(c => c.length > 10); // skip tiny fragments
264
+ }
265
+
266
+ /**
267
+ * Generate embeddings for a piece of content and store in document_embeddings.
268
+ * Deletes existing embeddings for (referenceType, referenceId) first to avoid stale data.
269
+ * @param {string} referenceType - 'daily_log', 'task', or 'blocker'
270
+ * @param {string} referenceId - unique ID (date for logs, task/blocker id)
271
+ * @param {string} text - content to embed
272
+ */
273
+ async generateEmbeddings(referenceType, referenceId, text) {
274
+ if (!text || !text.trim()) return 0;
275
+
276
+ const chunks = this.chunkText(text);
277
+ if (!chunks.length) return 0;
278
+
279
+ // Delete existing embeddings for this reference
280
+ dl.db.prepare('DELETE FROM document_embeddings WHERE reference_type = ? AND reference_id = ?')
281
+ .run(referenceType, referenceId);
282
+
283
+ const insert = dl.db.prepare(`
284
+ INSERT INTO document_embeddings (reference_type, reference_id, chunk_index, text_chunk, embedding)
285
+ VALUES (?, ?, ?, ?, ?)
286
+ `);
287
+
288
+ let count = 0;
289
+ for (let i = 0; i < chunks.length; i++) {
290
+ try {
291
+ const vector = await defaultEmbedder.embedText(chunks[i]);
292
+ const buffer = defaultEmbedder.vectorToBuffer(vector);
293
+ insert.run(referenceType, referenceId, i, chunks[i], buffer);
294
+ count++;
295
+ } catch (err) {
296
+ console.error(`[embeddings] Failed to embed chunk ${i} of ${referenceType}/${referenceId}:`, err.message);
297
+ }
298
+ }
299
+ return count;
300
+ }
301
+
302
+ /**
303
+ * Check if embeddings exist and are up-to-date for a reference.
304
+ * @returns {boolean} true if embeddings exist
305
+ */
306
+ hasEmbeddings(referenceType, referenceId) {
307
+ const row = dl.db.prepare(
308
+ 'SELECT COUNT(*) as c FROM document_embeddings WHERE reference_type = ? AND reference_id = ?'
309
+ ).get(referenceType, referenceId);
310
+ return row && row.c > 0;
311
+ }
312
+
313
+ /**
314
+ * Get total embedding count (for checking if RAG is available).
315
+ */
316
+ getEmbeddingCount() {
317
+ const row = dl.db.prepare('SELECT COUNT(*) as c FROM document_embeddings').get();
318
+ return row ? row.c : 0;
319
+ }
320
+
232
321
  // --- RAG (Vector Search) ---
233
322
  async semanticSearch(query, topK = 10) {
234
323
  const queryVector = await defaultEmbedder.embedText(query);