@cccarv82/freya 3.6.0 → 3.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/cli/init.js CHANGED
@@ -88,7 +88,8 @@ function ensurePackageJson(targetDir, force, summary) {
88
88
  'sm-weekly': 'node scripts/generate-sm-weekly-report.js',
89
89
  daily: 'node scripts/generate-daily-summary.js',
90
90
  status: 'node scripts/generate-executive-report.js',
91
- blockers: 'node scripts/generate-blockers-report.js'
91
+ blockers: 'node scripts/generate-blockers-report.js',
92
+ 'retroactive-ingest': 'node scripts/retroactive-ingest.js'
92
93
  };
93
94
 
94
95
  const depsToEnsure = {
package/cli/web.js CHANGED
@@ -235,31 +235,27 @@ async function buildSmartContext(workspaceDir, query) {
235
235
  console.error('[context] RAG search failed:', ragErr.message);
236
236
  }
237
237
 
238
- // 2. Fallback: if RAG not available or returned few results, include recent daily logs
238
+ // 2. Fallback: if RAG not available or returned few results, include recent daily logs from SQLite
239
239
  if (!ragUsed || usedBudget < TOKEN_BUDGET / 3) {
240
240
  try {
241
- const logsDir = path.join(workspaceDir, 'logs', 'daily');
242
- if (exists(logsDir)) {
243
- const maxDays = ragUsed ? 3 : 5; // fewer if RAG already provided some context
244
- const files = fs.readdirSync(logsDir)
245
- .filter(f => /^\d{4}-\d{2}-\d{2}\.md$/.test(f))
246
- .sort()
247
- .slice(-maxDays);
248
- if (files.length) {
249
- parts.push('\n[DAILY LOGS ÚLTIMOS ' + files.length + ' DIAS]');
250
- for (const file of files) {
251
- const date = file.replace('.md', '');
252
- const content = fs.readFileSync(path.join(logsDir, file), 'utf8');
253
- const maxPerLog = Math.floor((TOKEN_BUDGET - usedBudget) / files.length);
254
- const trimmed = content.length > maxPerLog ? content.slice(0, maxPerLog) + '\n...(truncado)' : content;
255
- parts.push(`\n--- LOG ${date} ---\n${trimmed}`);
256
- usedBudget += trimmed.length;
257
- if (usedBudget >= TOKEN_BUDGET) break;
258
- }
241
+ const maxDays = ragUsed ? 3 : 5;
242
+ const recentLogs = dl.db.prepare(
243
+ `SELECT date, raw_markdown FROM daily_logs ORDER BY date DESC LIMIT ?`
244
+ ).all(maxDays);
245
+ if (recentLogs.length) {
246
+ parts.push('\n[DAILY LOGS — ÚLTIMOS ' + recentLogs.length + ' DIAS]');
247
+ // Reverse to show chronologically (oldest first)
248
+ for (const log of recentLogs.reverse()) {
249
+ const maxPerLog = Math.floor((TOKEN_BUDGET - usedBudget) / recentLogs.length);
250
+ const content = log.raw_markdown || '';
251
+ const trimmed = content.length > maxPerLog ? content.slice(0, maxPerLog) + '\n...(truncado)' : content;
252
+ parts.push(`\n--- LOG ${log.date} ---\n${trimmed}`);
253
+ usedBudget += trimmed.length;
254
+ if (usedBudget >= TOKEN_BUDGET) break;
259
255
  }
260
256
  }
261
257
  } catch (e) {
262
- console.error('[context] Failed to read daily logs:', e.message);
258
+ console.error('[context] Failed to read daily logs from SQLite:', e.message);
263
259
  }
264
260
  }
265
261
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@cccarv82/freya",
3
- "version": "3.6.0",
3
+ "version": "3.6.1",
4
4
  "description": "Personal AI Assistant with local-first persistence",
5
5
  "scripts": {
6
6
  "health": "node scripts/validate-data.js && node scripts/validate-structure.js",
@@ -10,6 +10,7 @@
10
10
  "status": "node scripts/generate-executive-report.js",
11
11
  "blockers": "node scripts/generate-blockers-report.js",
12
12
  "export-obsidian": "node scripts/export-obsidian.js",
13
+ "retroactive-ingest": "node scripts/retroactive-ingest.js",
13
14
  "build-index": "node scripts/index/build-index.js",
14
15
  "update-index": "node scripts/index/update-index.js",
15
16
  "test": "node tests/unit/test-package-config.js && node tests/unit/test-cli-init.js && node tests/unit/test-cli-web-help.js && node tests/unit/test-web-static-assets.js && node tests/unit/test-fs-utils.js && node tests/unit/test-search-utils.js && node tests/unit/test-index-utils.js && node tests/unit/test-daily-generation.js && node tests/unit/test-report-generation.js && node tests/unit/test-executive-report-logs.js && node tests/unit/test-oracle-retrieval.js && node tests/unit/test-task-completion.js && node tests/unit/test-migrate-data.js && node tests/unit/test-blockers-report.js && node tests/unit/test-sm-weekly-report.js && node tests/integration/test-ingestor-task.js && node tests/unit/test-structure-validation.js"
@@ -0,0 +1,386 @@
1
+ #!/usr/bin/env node
2
+ /**
3
+ * retroactive-ingest.js
4
+ *
5
+ * Reads ALL existing daily logs, sends each through the Copilot CLI planner
6
+ * to extract tasks/blockers, and applies them to SQLite.
7
+ * Also generates embeddings for all daily logs.
8
+ *
9
+ * Usage:
10
+ * node scripts/retroactive-ingest.js [--dry-run] [--days N] [--embeddings-only]
11
+ *
12
+ * Options:
13
+ * --dry-run Show what would be extracted without writing to SQLite
14
+ * --days N Only process the last N days (default: all)
15
+ * --embeddings-only Skip planner, only generate embeddings for existing logs
16
+ * --skip-embeddings Skip embedding generation (only extract tasks/blockers)
17
+ */
18
+
19
+ 'use strict';
20
+
21
+ const fs = require('fs');
22
+ const path = require('path');
23
+ const crypto = require('crypto');
24
+ const { spawn } = require('child_process');
25
+ const os = require('os');
26
+
27
+ // Parse args
28
+ const args = process.argv.slice(2);
29
+ const DRY_RUN = args.includes('--dry-run');
30
+ const EMBEDDINGS_ONLY = args.includes('--embeddings-only');
31
+ const SKIP_EMBEDDINGS = args.includes('--skip-embeddings');
32
+ const daysIdx = args.indexOf('--days');
33
+ const MAX_DAYS = daysIdx >= 0 ? parseInt(args[daysIdx + 1], 10) : 0;
34
+
35
+ // Resolve workspace directory
36
+ const workspaceDir = process.env.FREYA_WORKSPACE_DIR
37
+ ? path.resolve(process.env.FREYA_WORKSPACE_DIR)
38
+ : path.join(__dirname, '..');
39
+
40
+ const { defaultInstance: dl, ready } = require('./lib/DataLayer');
41
+ const DataManager = require('./lib/DataManager');
42
+
43
+ function sha1(text) {
44
+ return crypto.createHash('sha1').update(text).digest('hex');
45
+ }
46
+
47
+ function normalizeWhitespace(t) {
48
+ return String(t || '').replace(/\s+/g, ' ').trim();
49
+ }
50
+
51
+ function normalizeTextForKey(t) {
52
+ return normalizeWhitespace(t).toLowerCase();
53
+ }
54
+
55
+ function run(cmd, args, cwd, extraEnv) {
56
+ return new Promise((resolve) => {
57
+ let child;
58
+ const env = extraEnv ? { ...process.env, ...extraEnv } : process.env;
59
+ try {
60
+ if (process.platform === 'win32') {
61
+ const comspec = process.env.ComSpec || 'cmd.exe';
62
+ child = spawn(comspec, ['/d', '/s', '/c', cmd, ...args], { cwd, shell: false, env });
63
+ } else {
64
+ child = spawn(cmd, args, { cwd, shell: false, env });
65
+ }
66
+ } catch (e) {
67
+ return resolve({ code: 1, stdout: '', stderr: e.message || String(e) });
68
+ }
69
+ let stdout = '';
70
+ let stderr = '';
71
+ child.stdout && child.stdout.on('data', (d) => { stdout += d.toString(); });
72
+ child.stderr && child.stderr.on('data', (d) => { stderr += d.toString(); });
73
+ child.on('error', (e) => { stderr += `\n${e.message}`; resolve({ code: 1, stdout, stderr }); });
74
+ child.on('close', (code) => resolve({ code: code ?? 0, stdout, stderr }));
75
+ });
76
+ }
77
+
78
+ function extractFirstJsonObject(text) {
79
+ if (!text) return null;
80
+ const start = text.indexOf('{');
81
+ if (start === -1) return null;
82
+ let depth = 0;
83
+ for (let i = start; i < text.length; i++) {
84
+ if (text[i] === '{') depth++;
85
+ else if (text[i] === '}') { depth--; if (depth === 0) return text.slice(start, i + 1); }
86
+ }
87
+ return null;
88
+ }
89
+
90
+ function escapeJsonControlChars(jsonText) {
91
+ return jsonText.replace(/[\x00-\x1F\x7F]/g, (ch) => {
92
+ if (ch === '\n' || ch === '\r' || ch === '\t') return ch;
93
+ return '\\u' + ch.charCodeAt(0).toString(16).padStart(4, '0');
94
+ });
95
+ }
96
+
97
+ function readProjectSlugMap(wsDir) {
98
+ const p = path.join(wsDir, 'data', 'settings', 'project-slug-map.json');
99
+ try {
100
+ return JSON.parse(fs.readFileSync(p, 'utf8'));
101
+ } catch { return {}; }
102
+ }
103
+
104
+ function inferProjectSlug(text, map) {
105
+ if (!text || !map || typeof map !== 'object') return '';
106
+ const lower = text.toLowerCase();
107
+ let bestSlug = '';
108
+ let bestLen = 0;
109
+ for (const [keyword, slug] of Object.entries(map)) {
110
+ if (lower.includes(keyword.toLowerCase()) && keyword.length > bestLen) {
111
+ bestSlug = slug;
112
+ bestLen = keyword.length;
113
+ }
114
+ }
115
+ return bestSlug;
116
+ }
117
+
118
+ async function main() {
119
+ await ready;
120
+
121
+ console.log('╔══════════════════════════════════════════════════════╗');
122
+ console.log('║ FREYA — Retroactive Ingestion ║');
123
+ console.log('╚══════════════════════════════════════════════════════╝');
124
+ console.log(`Workspace: ${workspaceDir}`);
125
+ console.log(`Mode: ${DRY_RUN ? 'DRY RUN' : EMBEDDINGS_ONLY ? 'EMBEDDINGS ONLY' : 'FULL INGESTION'}`);
126
+ console.log('');
127
+
128
+ const logsDir = path.join(workspaceDir, 'logs', 'daily');
129
+ if (!fs.existsSync(logsDir)) {
130
+ console.log('❌ No daily logs directory found at:', logsDir);
131
+ process.exit(1);
132
+ }
133
+
134
+ let files = fs.readdirSync(logsDir)
135
+ .filter(f => /^\d{4}-\d{2}-\d{2}\.md$/.test(f))
136
+ .sort();
137
+
138
+ if (MAX_DAYS > 0) {
139
+ files = files.slice(-MAX_DAYS);
140
+ }
141
+
142
+ console.log(`📁 Found ${files.length} daily log files to process`);
143
+ console.log('');
144
+
145
+ // Step 1: Sync all daily logs to SQLite
146
+ console.log('── Step 1: Syncing daily logs to SQLite ──');
147
+ const upsert = dl.db.prepare(`
148
+ INSERT INTO daily_logs (date, raw_markdown) VALUES (?, ?)
149
+ ON CONFLICT(date) DO UPDATE SET raw_markdown = excluded.raw_markdown
150
+ `);
151
+ const syncTx = dl.db.transaction((fileList) => {
152
+ for (const file of fileList) {
153
+ const date = file.replace('.md', '');
154
+ const content = fs.readFileSync(path.join(logsDir, file), 'utf8');
155
+ upsert.run(date, content);
156
+ }
157
+ });
158
+ syncTx(files);
159
+ console.log(`✅ ${files.length} daily logs synced to SQLite`);
160
+ console.log('');
161
+
162
+ // Step 2: Generate embeddings for all daily logs
163
+ if (!SKIP_EMBEDDINGS) {
164
+ console.log('── Step 2: Generating embeddings ──');
165
+ const dm = new DataManager(workspaceDir, logsDir);
166
+ let totalChunks = 0;
167
+ for (let i = 0; i < files.length; i++) {
168
+ const date = files[i].replace('.md', '');
169
+ const content = fs.readFileSync(path.join(logsDir, files[i]), 'utf8');
170
+ try {
171
+ const count = await dm.generateEmbeddings('daily_log', date, content);
172
+ totalChunks += count;
173
+ process.stdout.write(`\r [${i + 1}/${files.length}] ${date} — ${count} chunks`);
174
+ } catch (err) {
175
+ process.stdout.write(`\r [${i + 1}/${files.length}] ${date} — ❌ ${err.message}`);
176
+ }
177
+ }
178
+ console.log(`\n✅ Generated ${totalChunks} embedding chunks total`);
179
+ console.log('');
180
+ }
181
+
182
+ if (EMBEDDINGS_ONLY) {
183
+ console.log('── Embeddings-only mode. Skipping task/blocker extraction. ──');
184
+ dl.db.save();
185
+ console.log('\n✅ Done!');
186
+ return;
187
+ }
188
+
189
+ // Step 3: Extract tasks/blockers from each daily log via planner
190
+ console.log('── Step 3: Extracting tasks & blockers via planner ──');
191
+
192
+ const cmd = process.env.COPILOT_CMD || 'copilot';
193
+ const agentEnv = { FREYA_WORKSPACE_DIR: workspaceDir };
194
+ const slugMap = readProjectSlugMap(workspaceDir);
195
+ const validTaskCats = new Set(['DO_NOW', 'SCHEDULE', 'DELEGATE', 'IGNORE']);
196
+
197
+ const schema = {
198
+ actions: [
199
+ { type: 'create_task', description: '<string>', priority: 'HIGH|MEDIUM|LOW', category: 'DO_NOW|SCHEDULE|DELEGATE|IGNORE', projectSlug: '<string optional>' },
200
+ { type: 'create_blocker', title: '<string>', severity: 'CRITICAL|HIGH|MEDIUM|LOW', notes: '<string>', projectSlug: '<string optional>' }
201
+ ]
202
+ };
203
+
204
+ const sysInstructions = `Você é o planner do sistema F.R.E.Y.A.
205
+
206
+ Analise o daily log abaixo e extraia TODAS as tarefas e blockers mencionados.
207
+ Procure por: ações mencionadas, pendências, problemas, impedimentos, decisões que geram trabalho.
208
+ Se NÃO houver tarefas ou blockers claros, retorne: {"actions":[]}
209
+ Retorne APENAS JSON válido no formato: ${JSON.stringify(schema)}
210
+ NÃO use code fences. NÃO inclua texto extra.
211
+ IMPORTANTE: Extraia APENAS informações explícitas do log. NÃO invente dados.`;
212
+
213
+ let totalTasks = 0;
214
+ let totalBlockers = 0;
215
+ let totalSkipped = 0;
216
+ let totalErrors = 0;
217
+
218
+ const insertTask = dl.db.prepare(`INSERT OR IGNORE INTO tasks (id, project_slug, description, category, status, created_at, metadata) VALUES (?, ?, ?, ?, ?, ?, ?)`);
219
+ const insertBlocker = dl.db.prepare(`INSERT OR IGNORE INTO blockers (id, project_slug, title, severity, status, created_at, metadata) VALUES (?, ?, ?, ?, ?, ?, ?)`);
220
+
221
+ // Build existing keys for dedup
222
+ const existingTaskDescs = new Set(
223
+ dl.db.prepare("SELECT description FROM tasks").all().map(t => sha1(normalizeTextForKey(t.description)))
224
+ );
225
+ const existingBlockerTitles = new Set(
226
+ dl.db.prepare("SELECT title FROM blockers").all().map(b => sha1(normalizeTextForKey(b.title)))
227
+ );
228
+
229
+ for (let i = 0; i < files.length; i++) {
230
+ const file = files[i];
231
+ const date = file.replace('.md', '');
232
+ const content = fs.readFileSync(path.join(logsDir, file), 'utf8');
233
+
234
+ // Skip very small logs (< 50 chars) — likely empty or just a header
235
+ if (content.trim().length < 50) {
236
+ process.stdout.write(`\r [${i + 1}/${files.length}] ${date} — skipped (too small)`);
237
+ continue;
238
+ }
239
+
240
+ const fullPrompt = `${sysInstructions}\n\nDAILY LOG (${date}):\n${content}\n`;
241
+ const SAFE_ARG_LEN = 24000;
242
+
243
+ try {
244
+ let r;
245
+ if (fullPrompt.length > SAFE_ARG_LEN) {
246
+ const tmpFile = path.join(os.tmpdir(), `freya-retro-${Date.now()}.txt`);
247
+ fs.writeFileSync(tmpFile, fullPrompt, 'utf8');
248
+ const filePrompt = `Leia o arquivo abaixo e extraia tasks/blockers conforme as instruções contidas nele.\nARQUIVO: ${tmpFile}`;
249
+ r = await run(cmd, ['-s', '--no-color', '--stream', 'off', '--add-dir', os.tmpdir(), '--allow-all-tools', '-p', filePrompt], workspaceDir, agentEnv);
250
+ try { fs.unlinkSync(tmpFile); } catch { }
251
+ } else {
252
+ r = await run(cmd, ['-s', '--no-color', '--stream', 'off', '-p', fullPrompt], workspaceDir, agentEnv);
253
+ }
254
+
255
+ const out = (r.stdout + r.stderr).trim();
256
+ if (r.code !== 0 || !out) {
257
+ totalErrors++;
258
+ process.stdout.write(`\r [${i + 1}/${files.length}] ${date} — ❌ planner error `);
259
+ continue;
260
+ }
261
+
262
+ // Parse plan
263
+ const jsonText = extractFirstJsonObject(out) || out;
264
+ let plan;
265
+ try {
266
+ plan = JSON.parse(jsonText);
267
+ } catch {
268
+ try { plan = JSON.parse(escapeJsonControlChars(jsonText)); } catch {
269
+ totalErrors++;
270
+ process.stdout.write(`\r [${i + 1}/${files.length}] ${date} — ❌ invalid JSON `);
271
+ continue;
272
+ }
273
+ }
274
+
275
+ const actions = Array.isArray(plan.actions) ? plan.actions : [];
276
+ let fileTasks = 0;
277
+ let fileBlockers = 0;
278
+ let fileSkipped = 0;
279
+
280
+ if (!DRY_RUN) {
281
+ const applyTx = dl.db.transaction(() => {
282
+ for (const a of actions) {
283
+ if (!a || typeof a !== 'object') continue;
284
+
285
+ if (a.type === 'create_task' && a.description) {
286
+ const desc = normalizeWhitespace(a.description);
287
+ if (!desc) continue;
288
+ const projectSlug = String(a.projectSlug || '').trim() || inferProjectSlug(desc, slugMap);
289
+ const key = sha1(normalizeTextForKey((projectSlug ? projectSlug + ' ' : '') + desc));
290
+ if (existingTaskDescs.has(key)) { fileSkipped++; continue; }
291
+
292
+ const id = `t-retro-${Date.now()}-${Math.random().toString(16).slice(2, 8)}`;
293
+ const category = validTaskCats.has(String(a.category || '').trim()) ? String(a.category).trim() : 'DO_NOW';
294
+ const metadata = JSON.stringify({ priority: a.priority || 'medium', source: 'retroactive', sourceDate: date });
295
+ // Use the log date as created_at for chronological accuracy
296
+ insertTask.run(id, projectSlug || null, desc, category, 'PENDING', `${date}T12:00:00.000Z`, metadata);
297
+ existingTaskDescs.add(key);
298
+ fileTasks++;
299
+ }
300
+
301
+ if (a.type === 'create_blocker' && a.title) {
302
+ const title = normalizeWhitespace(a.title);
303
+ if (!title) continue;
304
+ const notes = normalizeWhitespace(a.notes);
305
+ const projectSlug = String(a.projectSlug || '').trim() || inferProjectSlug(title + ' ' + notes, slugMap);
306
+ const key = sha1(normalizeTextForKey((projectSlug ? projectSlug + ' ' : '') + title));
307
+ if (existingBlockerTitles.has(key)) { fileSkipped++; continue; }
308
+
309
+ const id = `b-retro-${Date.now()}-${Math.random().toString(16).slice(2, 8)}`;
310
+ const severity = String(a.severity || 'MEDIUM').toUpperCase();
311
+ const metadata = JSON.stringify({ description: notes || title, source: 'retroactive', sourceDate: date });
312
+ insertBlocker.run(id, projectSlug || null, title, severity, 'OPEN', `${date}T12:00:00.000Z`, metadata);
313
+ existingBlockerTitles.add(key);
314
+ fileBlockers++;
315
+ }
316
+ }
317
+ });
318
+ applyTx();
319
+ } else {
320
+ // Dry run — just count
321
+ for (const a of actions) {
322
+ if (a && a.type === 'create_task' && a.description) fileTasks++;
323
+ if (a && a.type === 'create_blocker' && a.title) fileBlockers++;
324
+ }
325
+ }
326
+
327
+ totalTasks += fileTasks;
328
+ totalBlockers += fileBlockers;
329
+ totalSkipped += fileSkipped;
330
+
331
+ const status = fileTasks || fileBlockers
332
+ ? `${fileTasks}T ${fileBlockers}B${fileSkipped ? ` (${fileSkipped} dup)` : ''}`
333
+ : 'no actions';
334
+ process.stdout.write(`\r [${i + 1}/${files.length}] ${date} — ${status} `);
335
+
336
+ // Small delay to avoid rate limiting
337
+ if (i < files.length - 1) {
338
+ await new Promise(r => setTimeout(r, 500));
339
+ }
340
+
341
+ } catch (err) {
342
+ totalErrors++;
343
+ process.stdout.write(`\r [${i + 1}/${files.length}] ${date} — ❌ ${err.message} `);
344
+ }
345
+ }
346
+
347
+ // Ensure data is persisted
348
+ dl.db.save();
349
+
350
+ console.log('\n');
351
+ console.log('══════════════════════════════════════════════════════');
352
+ console.log(` 📊 Results ${DRY_RUN ? '(DRY RUN)' : ''}`);
353
+ console.log(` Tasks created: ${totalTasks}`);
354
+ console.log(` Blockers created: ${totalBlockers}`);
355
+ console.log(` Duplicates skipped: ${totalSkipped}`);
356
+ console.log(` Errors: ${totalErrors}`);
357
+ console.log('══════════════════════════════════════════════════════');
358
+
359
+ // Step 4: Generate embeddings for newly created tasks/blockers
360
+ if (!DRY_RUN && !SKIP_EMBEDDINGS && (totalTasks > 0 || totalBlockers > 0)) {
361
+ console.log('\n── Step 4: Generating embeddings for new tasks/blockers ──');
362
+ const dm = new DataManager(workspaceDir, logsDir);
363
+ const newTasks = dl.db.prepare("SELECT id, description FROM tasks WHERE json_extract(metadata, '$.source') = 'retroactive'").all();
364
+ const newBlockers = dl.db.prepare("SELECT id, title, json_extract(metadata, '$.description') as notes FROM blockers WHERE json_extract(metadata, '$.source') = 'retroactive'").all();
365
+
366
+ let embCount = 0;
367
+ for (const t of newTasks) {
368
+ try {
369
+ embCount += await dm.generateEmbeddings('task', t.id, t.description);
370
+ } catch { }
371
+ }
372
+ for (const b of newBlockers) {
373
+ try {
374
+ embCount += await dm.generateEmbeddings('blocker', b.id, b.title + ' ' + (b.notes || ''));
375
+ } catch { }
376
+ }
377
+ console.log(`✅ Generated ${embCount} embedding chunks for new entities`);
378
+ }
379
+
380
+ console.log('\n✅ Retroactive ingestion complete!');
381
+ }
382
+
383
+ main().catch(err => {
384
+ console.error('\n❌ Fatal error:', err.message || err);
385
+ process.exit(1);
386
+ });
@@ -325,6 +325,12 @@ class DataLayer {
325
325
  embedding BLOB NOT NULL, /* Stored as Buffer of Float32Array */
326
326
  created_at DATETIME DEFAULT CURRENT_TIMESTAMP
327
327
  );
328
+
329
+ CREATE UNIQUE INDEX IF NOT EXISTS idx_doc_emb_ref
330
+ ON document_embeddings(reference_type, reference_id, chunk_index);
331
+
332
+ CREATE INDEX IF NOT EXISTS idx_doc_emb_type
333
+ ON document_embeddings(reference_type);
328
334
  `);
329
335
 
330
336
  // --- Migrations for existing databases ---
@@ -229,6 +229,95 @@ class DataManager {
229
229
  return NaN;
230
230
  }
231
231
 
232
+ // --- Embedding Generation ---
233
+
234
+ /**
235
+ * Split text into chunks suitable for embedding (~400-600 chars each).
236
+ * Splits on markdown headings, then paragraphs, then sentences.
237
+ */
238
+ chunkText(text, maxChunkSize = 500) {
239
+ if (!text || text.length <= maxChunkSize) return [text].filter(Boolean);
240
+
241
+ const chunks = [];
242
+ // First split on markdown ## headings
243
+ const sections = text.split(/(?=^## )/m).filter(s => s.trim());
244
+
245
+ for (const section of sections) {
246
+ if (section.length <= maxChunkSize) {
247
+ chunks.push(section.trim());
248
+ continue;
249
+ }
250
+ // Split long sections on double newlines (paragraphs)
251
+ const paragraphs = section.split(/\n\n+/).filter(p => p.trim());
252
+ let buffer = '';
253
+ for (const para of paragraphs) {
254
+ if (buffer.length + para.length + 2 > maxChunkSize && buffer) {
255
+ chunks.push(buffer.trim());
256
+ buffer = '';
257
+ }
258
+ buffer += (buffer ? '\n\n' : '') + para;
259
+ }
260
+ if (buffer.trim()) chunks.push(buffer.trim());
261
+ }
262
+
263
+ return chunks.filter(c => c.length > 10); // skip tiny fragments
264
+ }
265
+
266
+ /**
267
+ * Generate embeddings for a piece of content and store in document_embeddings.
268
+ * Deletes existing embeddings for (referenceType, referenceId) first to avoid stale data.
269
+ * @param {string} referenceType - 'daily_log', 'task', or 'blocker'
270
+ * @param {string} referenceId - unique ID (date for logs, task/blocker id)
271
+ * @param {string} text - content to embed
272
+ */
273
+ async generateEmbeddings(referenceType, referenceId, text) {
274
+ if (!text || !text.trim()) return 0;
275
+
276
+ const chunks = this.chunkText(text);
277
+ if (!chunks.length) return 0;
278
+
279
+ // Delete existing embeddings for this reference
280
+ dl.db.prepare('DELETE FROM document_embeddings WHERE reference_type = ? AND reference_id = ?')
281
+ .run(referenceType, referenceId);
282
+
283
+ const insert = dl.db.prepare(`
284
+ INSERT INTO document_embeddings (reference_type, reference_id, chunk_index, text_chunk, embedding)
285
+ VALUES (?, ?, ?, ?, ?)
286
+ `);
287
+
288
+ let count = 0;
289
+ for (let i = 0; i < chunks.length; i++) {
290
+ try {
291
+ const vector = await defaultEmbedder.embedText(chunks[i]);
292
+ const buffer = defaultEmbedder.vectorToBuffer(vector);
293
+ insert.run(referenceType, referenceId, i, chunks[i], buffer);
294
+ count++;
295
+ } catch (err) {
296
+ console.error(`[embeddings] Failed to embed chunk ${i} of ${referenceType}/${referenceId}:`, err.message);
297
+ }
298
+ }
299
+ return count;
300
+ }
301
+
302
+ /**
303
+ * Check if embeddings exist and are up-to-date for a reference.
304
+ * @returns {boolean} true if embeddings exist
305
+ */
306
+ hasEmbeddings(referenceType, referenceId) {
307
+ const row = dl.db.prepare(
308
+ 'SELECT COUNT(*) as c FROM document_embeddings WHERE reference_type = ? AND reference_id = ?'
309
+ ).get(referenceType, referenceId);
310
+ return row && row.c > 0;
311
+ }
312
+
313
+ /**
314
+ * Get total embedding count (for checking if RAG is available).
315
+ */
316
+ getEmbeddingCount() {
317
+ const row = dl.db.prepare('SELECT COUNT(*) as c FROM document_embeddings').get();
318
+ return row ? row.c : 0;
319
+ }
320
+
232
321
  // --- RAG (Vector Search) ---
233
322
  async semanticSearch(query, topK = 10) {
234
323
  const queryVector = await defaultEmbedder.embedText(query);
@@ -0,0 +1,386 @@
1
+ #!/usr/bin/env node
2
+ /**
3
+ * retroactive-ingest.js
4
+ *
5
+ * Reads ALL existing daily logs, sends each through the Copilot CLI planner
6
+ * to extract tasks/blockers, and applies them to SQLite.
7
+ * Also generates embeddings for all daily logs.
8
+ *
9
+ * Usage:
10
+ * node scripts/retroactive-ingest.js [--dry-run] [--days N] [--embeddings-only]
11
+ *
12
+ * Options:
13
+ * --dry-run Show what would be extracted without writing to SQLite
14
+ * --days N Only process the last N days (default: all)
15
+ * --embeddings-only Skip planner, only generate embeddings for existing logs
16
+ * --skip-embeddings Skip embedding generation (only extract tasks/blockers)
17
+ */
18
+
19
+ 'use strict';
20
+
21
+ const fs = require('fs');
22
+ const path = require('path');
23
+ const crypto = require('crypto');
24
+ const { spawn } = require('child_process');
25
+ const os = require('os');
26
+
27
+ // Parse args
28
+ const args = process.argv.slice(2);
29
+ const DRY_RUN = args.includes('--dry-run');
30
+ const EMBEDDINGS_ONLY = args.includes('--embeddings-only');
31
+ const SKIP_EMBEDDINGS = args.includes('--skip-embeddings');
32
+ const daysIdx = args.indexOf('--days');
33
+ const MAX_DAYS = daysIdx >= 0 ? parseInt(args[daysIdx + 1], 10) : 0;
34
+
35
+ // Resolve workspace directory
36
+ const workspaceDir = process.env.FREYA_WORKSPACE_DIR
37
+ ? path.resolve(process.env.FREYA_WORKSPACE_DIR)
38
+ : path.join(__dirname, '..');
39
+
40
+ const { defaultInstance: dl, ready } = require('./lib/DataLayer');
41
+ const DataManager = require('./lib/DataManager');
42
+
43
+ function sha1(text) {
44
+ return crypto.createHash('sha1').update(text).digest('hex');
45
+ }
46
+
47
+ function normalizeWhitespace(t) {
48
+ return String(t || '').replace(/\s+/g, ' ').trim();
49
+ }
50
+
51
+ function normalizeTextForKey(t) {
52
+ return normalizeWhitespace(t).toLowerCase();
53
+ }
54
+
55
+ function run(cmd, args, cwd, extraEnv) {
56
+ return new Promise((resolve) => {
57
+ let child;
58
+ const env = extraEnv ? { ...process.env, ...extraEnv } : process.env;
59
+ try {
60
+ if (process.platform === 'win32') {
61
+ const comspec = process.env.ComSpec || 'cmd.exe';
62
+ child = spawn(comspec, ['/d', '/s', '/c', cmd, ...args], { cwd, shell: false, env });
63
+ } else {
64
+ child = spawn(cmd, args, { cwd, shell: false, env });
65
+ }
66
+ } catch (e) {
67
+ return resolve({ code: 1, stdout: '', stderr: e.message || String(e) });
68
+ }
69
+ let stdout = '';
70
+ let stderr = '';
71
+ child.stdout && child.stdout.on('data', (d) => { stdout += d.toString(); });
72
+ child.stderr && child.stderr.on('data', (d) => { stderr += d.toString(); });
73
+ child.on('error', (e) => { stderr += `\n${e.message}`; resolve({ code: 1, stdout, stderr }); });
74
+ child.on('close', (code) => resolve({ code: code ?? 0, stdout, stderr }));
75
+ });
76
+ }
77
+
78
+ function extractFirstJsonObject(text) {
79
+ if (!text) return null;
80
+ const start = text.indexOf('{');
81
+ if (start === -1) return null;
82
+ let depth = 0;
83
+ for (let i = start; i < text.length; i++) {
84
+ if (text[i] === '{') depth++;
85
+ else if (text[i] === '}') { depth--; if (depth === 0) return text.slice(start, i + 1); }
86
+ }
87
+ return null;
88
+ }
89
+
90
+ function escapeJsonControlChars(jsonText) {
91
+ return jsonText.replace(/[\x00-\x1F\x7F]/g, (ch) => {
92
+ if (ch === '\n' || ch === '\r' || ch === '\t') return ch;
93
+ return '\\u' + ch.charCodeAt(0).toString(16).padStart(4, '0');
94
+ });
95
+ }
96
+
97
+ function readProjectSlugMap(wsDir) {
98
+ const p = path.join(wsDir, 'data', 'settings', 'project-slug-map.json');
99
+ try {
100
+ return JSON.parse(fs.readFileSync(p, 'utf8'));
101
+ } catch { return {}; }
102
+ }
103
+
104
+ function inferProjectSlug(text, map) {
105
+ if (!text || !map || typeof map !== 'object') return '';
106
+ const lower = text.toLowerCase();
107
+ let bestSlug = '';
108
+ let bestLen = 0;
109
+ for (const [keyword, slug] of Object.entries(map)) {
110
+ if (lower.includes(keyword.toLowerCase()) && keyword.length > bestLen) {
111
+ bestSlug = slug;
112
+ bestLen = keyword.length;
113
+ }
114
+ }
115
+ return bestSlug;
116
+ }
117
+
118
+ async function main() {
119
+ await ready;
120
+
121
+ console.log('╔══════════════════════════════════════════════════════╗');
122
+ console.log('║ FREYA — Retroactive Ingestion ║');
123
+ console.log('╚══════════════════════════════════════════════════════╝');
124
+ console.log(`Workspace: ${workspaceDir}`);
125
+ console.log(`Mode: ${DRY_RUN ? 'DRY RUN' : EMBEDDINGS_ONLY ? 'EMBEDDINGS ONLY' : 'FULL INGESTION'}`);
126
+ console.log('');
127
+
128
+ const logsDir = path.join(workspaceDir, 'logs', 'daily');
129
+ if (!fs.existsSync(logsDir)) {
130
+ console.log('❌ No daily logs directory found at:', logsDir);
131
+ process.exit(1);
132
+ }
133
+
134
+ let files = fs.readdirSync(logsDir)
135
+ .filter(f => /^\d{4}-\d{2}-\d{2}\.md$/.test(f))
136
+ .sort();
137
+
138
+ if (MAX_DAYS > 0) {
139
+ files = files.slice(-MAX_DAYS);
140
+ }
141
+
142
+ console.log(`📁 Found ${files.length} daily log files to process`);
143
+ console.log('');
144
+
145
+ // Step 1: Sync all daily logs to SQLite
146
+ console.log('── Step 1: Syncing daily logs to SQLite ──');
147
+ const upsert = dl.db.prepare(`
148
+ INSERT INTO daily_logs (date, raw_markdown) VALUES (?, ?)
149
+ ON CONFLICT(date) DO UPDATE SET raw_markdown = excluded.raw_markdown
150
+ `);
151
+ const syncTx = dl.db.transaction((fileList) => {
152
+ for (const file of fileList) {
153
+ const date = file.replace('.md', '');
154
+ const content = fs.readFileSync(path.join(logsDir, file), 'utf8');
155
+ upsert.run(date, content);
156
+ }
157
+ });
158
+ syncTx(files);
159
+ console.log(`✅ ${files.length} daily logs synced to SQLite`);
160
+ console.log('');
161
+
162
+ // Step 2: Generate embeddings for all daily logs
163
+ if (!SKIP_EMBEDDINGS) {
164
+ console.log('── Step 2: Generating embeddings ──');
165
+ const dm = new DataManager(workspaceDir, logsDir);
166
+ let totalChunks = 0;
167
+ for (let i = 0; i < files.length; i++) {
168
+ const date = files[i].replace('.md', '');
169
+ const content = fs.readFileSync(path.join(logsDir, files[i]), 'utf8');
170
+ try {
171
+ const count = await dm.generateEmbeddings('daily_log', date, content);
172
+ totalChunks += count;
173
+ process.stdout.write(`\r [${i + 1}/${files.length}] ${date} — ${count} chunks`);
174
+ } catch (err) {
175
+ process.stdout.write(`\r [${i + 1}/${files.length}] ${date} — ❌ ${err.message}`);
176
+ }
177
+ }
178
+ console.log(`\n✅ Generated ${totalChunks} embedding chunks total`);
179
+ console.log('');
180
+ }
181
+
182
+ if (EMBEDDINGS_ONLY) {
183
+ console.log('── Embeddings-only mode. Skipping task/blocker extraction. ──');
184
+ dl.db.save();
185
+ console.log('\n✅ Done!');
186
+ return;
187
+ }
188
+
189
+ // Step 3: Extract tasks/blockers from each daily log via planner
190
+ console.log('── Step 3: Extracting tasks & blockers via planner ──');
191
+
192
+ const cmd = process.env.COPILOT_CMD || 'copilot';
193
+ const agentEnv = { FREYA_WORKSPACE_DIR: workspaceDir };
194
+ const slugMap = readProjectSlugMap(workspaceDir);
195
+ const validTaskCats = new Set(['DO_NOW', 'SCHEDULE', 'DELEGATE', 'IGNORE']);
196
+
197
+ const schema = {
198
+ actions: [
199
+ { type: 'create_task', description: '<string>', priority: 'HIGH|MEDIUM|LOW', category: 'DO_NOW|SCHEDULE|DELEGATE|IGNORE', projectSlug: '<string optional>' },
200
+ { type: 'create_blocker', title: '<string>', severity: 'CRITICAL|HIGH|MEDIUM|LOW', notes: '<string>', projectSlug: '<string optional>' }
201
+ ]
202
+ };
203
+
204
+ const sysInstructions = `Você é o planner do sistema F.R.E.Y.A.
205
+
206
+ Analise o daily log abaixo e extraia TODAS as tarefas e blockers mencionados.
207
+ Procure por: ações mencionadas, pendências, problemas, impedimentos, decisões que geram trabalho.
208
+ Se NÃO houver tarefas ou blockers claros, retorne: {"actions":[]}
209
+ Retorne APENAS JSON válido no formato: ${JSON.stringify(schema)}
210
+ NÃO use code fences. NÃO inclua texto extra.
211
+ IMPORTANTE: Extraia APENAS informações explícitas do log. NÃO invente dados.`;
212
+
213
+ let totalTasks = 0;
214
+ let totalBlockers = 0;
215
+ let totalSkipped = 0;
216
+ let totalErrors = 0;
217
+
218
+ const insertTask = dl.db.prepare(`INSERT OR IGNORE INTO tasks (id, project_slug, description, category, status, created_at, metadata) VALUES (?, ?, ?, ?, ?, ?, ?)`);
219
+ const insertBlocker = dl.db.prepare(`INSERT OR IGNORE INTO blockers (id, project_slug, title, severity, status, created_at, metadata) VALUES (?, ?, ?, ?, ?, ?, ?)`);
220
+
221
+ // Build existing keys for dedup
222
+ const existingTaskDescs = new Set(
223
+ dl.db.prepare("SELECT description FROM tasks").all().map(t => sha1(normalizeTextForKey(t.description)))
224
+ );
225
+ const existingBlockerTitles = new Set(
226
+ dl.db.prepare("SELECT title FROM blockers").all().map(b => sha1(normalizeTextForKey(b.title)))
227
+ );
228
+
229
+ for (let i = 0; i < files.length; i++) {
230
+ const file = files[i];
231
+ const date = file.replace('.md', '');
232
+ const content = fs.readFileSync(path.join(logsDir, file), 'utf8');
233
+
234
+ // Skip very small logs (< 50 chars) — likely empty or just a header
235
+ if (content.trim().length < 50) {
236
+ process.stdout.write(`\r [${i + 1}/${files.length}] ${date} — skipped (too small)`);
237
+ continue;
238
+ }
239
+
240
+ const fullPrompt = `${sysInstructions}\n\nDAILY LOG (${date}):\n${content}\n`;
241
+ const SAFE_ARG_LEN = 24000;
242
+
243
+ try {
244
+ let r;
245
+ if (fullPrompt.length > SAFE_ARG_LEN) {
246
+ const tmpFile = path.join(os.tmpdir(), `freya-retro-${Date.now()}.txt`);
247
+ fs.writeFileSync(tmpFile, fullPrompt, 'utf8');
248
+ const filePrompt = `Leia o arquivo abaixo e extraia tasks/blockers conforme as instruções contidas nele.\nARQUIVO: ${tmpFile}`;
249
+ r = await run(cmd, ['-s', '--no-color', '--stream', 'off', '--add-dir', os.tmpdir(), '--allow-all-tools', '-p', filePrompt], workspaceDir, agentEnv);
250
+ try { fs.unlinkSync(tmpFile); } catch { }
251
+ } else {
252
+ r = await run(cmd, ['-s', '--no-color', '--stream', 'off', '-p', fullPrompt], workspaceDir, agentEnv);
253
+ }
254
+
255
+ const out = (r.stdout + r.stderr).trim();
256
+ if (r.code !== 0 || !out) {
257
+ totalErrors++;
258
+ process.stdout.write(`\r [${i + 1}/${files.length}] ${date} — ❌ planner error `);
259
+ continue;
260
+ }
261
+
262
+ // Parse plan
263
+ const jsonText = extractFirstJsonObject(out) || out;
264
+ let plan;
265
+ try {
266
+ plan = JSON.parse(jsonText);
267
+ } catch {
268
+ try { plan = JSON.parse(escapeJsonControlChars(jsonText)); } catch {
269
+ totalErrors++;
270
+ process.stdout.write(`\r [${i + 1}/${files.length}] ${date} — ❌ invalid JSON `);
271
+ continue;
272
+ }
273
+ }
274
+
275
+ const actions = Array.isArray(plan.actions) ? plan.actions : [];
276
+ let fileTasks = 0;
277
+ let fileBlockers = 0;
278
+ let fileSkipped = 0;
279
+
280
+ if (!DRY_RUN) {
281
+ const applyTx = dl.db.transaction(() => {
282
+ for (const a of actions) {
283
+ if (!a || typeof a !== 'object') continue;
284
+
285
+ if (a.type === 'create_task' && a.description) {
286
+ const desc = normalizeWhitespace(a.description);
287
+ if (!desc) continue;
288
+ const projectSlug = String(a.projectSlug || '').trim() || inferProjectSlug(desc, slugMap);
289
+ const key = sha1(normalizeTextForKey((projectSlug ? projectSlug + ' ' : '') + desc));
290
+ if (existingTaskDescs.has(key)) { fileSkipped++; continue; }
291
+
292
+ const id = `t-retro-${Date.now()}-${Math.random().toString(16).slice(2, 8)}`;
293
+ const category = validTaskCats.has(String(a.category || '').trim()) ? String(a.category).trim() : 'DO_NOW';
294
+ const metadata = JSON.stringify({ priority: a.priority || 'medium', source: 'retroactive', sourceDate: date });
295
+ // Use the log date as created_at for chronological accuracy
296
+ insertTask.run(id, projectSlug || null, desc, category, 'PENDING', `${date}T12:00:00.000Z`, metadata);
297
+ existingTaskDescs.add(key);
298
+ fileTasks++;
299
+ }
300
+
301
+ if (a.type === 'create_blocker' && a.title) {
302
+ const title = normalizeWhitespace(a.title);
303
+ if (!title) continue;
304
+ const notes = normalizeWhitespace(a.notes);
305
+ const projectSlug = String(a.projectSlug || '').trim() || inferProjectSlug(title + ' ' + notes, slugMap);
306
+ const key = sha1(normalizeTextForKey((projectSlug ? projectSlug + ' ' : '') + title));
307
+ if (existingBlockerTitles.has(key)) { fileSkipped++; continue; }
308
+
309
+ const id = `b-retro-${Date.now()}-${Math.random().toString(16).slice(2, 8)}`;
310
+ const severity = String(a.severity || 'MEDIUM').toUpperCase();
311
+ const metadata = JSON.stringify({ description: notes || title, source: 'retroactive', sourceDate: date });
312
+ insertBlocker.run(id, projectSlug || null, title, severity, 'OPEN', `${date}T12:00:00.000Z`, metadata);
313
+ existingBlockerTitles.add(key);
314
+ fileBlockers++;
315
+ }
316
+ }
317
+ });
318
+ applyTx();
319
+ } else {
320
+ // Dry run — just count
321
+ for (const a of actions) {
322
+ if (a && a.type === 'create_task' && a.description) fileTasks++;
323
+ if (a && a.type === 'create_blocker' && a.title) fileBlockers++;
324
+ }
325
+ }
326
+
327
+ totalTasks += fileTasks;
328
+ totalBlockers += fileBlockers;
329
+ totalSkipped += fileSkipped;
330
+
331
+ const status = fileTasks || fileBlockers
332
+ ? `${fileTasks}T ${fileBlockers}B${fileSkipped ? ` (${fileSkipped} dup)` : ''}`
333
+ : 'no actions';
334
+ process.stdout.write(`\r [${i + 1}/${files.length}] ${date} — ${status} `);
335
+
336
+ // Small delay to avoid rate limiting
337
+ if (i < files.length - 1) {
338
+ await new Promise(r => setTimeout(r, 500));
339
+ }
340
+
341
+ } catch (err) {
342
+ totalErrors++;
343
+ process.stdout.write(`\r [${i + 1}/${files.length}] ${date} — ❌ ${err.message} `);
344
+ }
345
+ }
346
+
347
+ // Ensure data is persisted
348
+ dl.db.save();
349
+
350
+ console.log('\n');
351
+ console.log('══════════════════════════════════════════════════════');
352
+ console.log(` 📊 Results ${DRY_RUN ? '(DRY RUN)' : ''}`);
353
+ console.log(` Tasks created: ${totalTasks}`);
354
+ console.log(` Blockers created: ${totalBlockers}`);
355
+ console.log(` Duplicates skipped: ${totalSkipped}`);
356
+ console.log(` Errors: ${totalErrors}`);
357
+ console.log('══════════════════════════════════════════════════════');
358
+
359
+ // Step 4: Generate embeddings for newly created tasks/blockers
360
+ if (!DRY_RUN && !SKIP_EMBEDDINGS && (totalTasks > 0 || totalBlockers > 0)) {
361
+ console.log('\n── Step 4: Generating embeddings for new tasks/blockers ──');
362
+ const dm = new DataManager(workspaceDir, logsDir);
363
+ const newTasks = dl.db.prepare("SELECT id, description FROM tasks WHERE json_extract(metadata, '$.source') = 'retroactive'").all();
364
+ const newBlockers = dl.db.prepare("SELECT id, title, json_extract(metadata, '$.description') as notes FROM blockers WHERE json_extract(metadata, '$.source') = 'retroactive'").all();
365
+
366
+ let embCount = 0;
367
+ for (const t of newTasks) {
368
+ try {
369
+ embCount += await dm.generateEmbeddings('task', t.id, t.description);
370
+ } catch { }
371
+ }
372
+ for (const b of newBlockers) {
373
+ try {
374
+ embCount += await dm.generateEmbeddings('blocker', b.id, b.title + ' ' + (b.notes || ''));
375
+ } catch { }
376
+ }
377
+ console.log(`✅ Generated ${embCount} embedding chunks for new entities`);
378
+ }
379
+
380
+ console.log('\n✅ Retroactive ingestion complete!');
381
+ }
382
+
383
+ main().catch(err => {
384
+ console.error('\n❌ Fatal error:', err.message || err);
385
+ process.exit(1);
386
+ });