@cccarv82/freya 3.5.2 → 3.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,386 @@
1
+ #!/usr/bin/env node
2
+ /**
3
+ * retroactive-ingest.js
4
+ *
5
+ * Reads ALL existing daily logs, sends each through the Copilot CLI planner
6
+ * to extract tasks/blockers, and applies them to SQLite.
7
+ * Also generates embeddings for all daily logs.
8
+ *
9
+ * Usage:
10
+ * node scripts/retroactive-ingest.js [--dry-run] [--days N] [--embeddings-only]
11
+ *
12
+ * Options:
13
+ * --dry-run Show what would be extracted without writing to SQLite
14
+ * --days N Only process the last N days (default: all)
15
+ * --embeddings-only Skip planner, only generate embeddings for existing logs
16
+ * --skip-embeddings Skip embedding generation (only extract tasks/blockers)
17
+ */
18
+
19
+ 'use strict';
20
+
21
+ const fs = require('fs');
22
+ const path = require('path');
23
+ const crypto = require('crypto');
24
+ const { spawn } = require('child_process');
25
+ const os = require('os');
26
+
27
+ // Parse args
28
+ const args = process.argv.slice(2);
29
+ const DRY_RUN = args.includes('--dry-run');
30
+ const EMBEDDINGS_ONLY = args.includes('--embeddings-only');
31
+ const SKIP_EMBEDDINGS = args.includes('--skip-embeddings');
32
+ const daysIdx = args.indexOf('--days');
33
+ const MAX_DAYS = daysIdx >= 0 ? parseInt(args[daysIdx + 1], 10) : 0;
34
+
35
+ // Resolve workspace directory
36
+ const workspaceDir = process.env.FREYA_WORKSPACE_DIR
37
+ ? path.resolve(process.env.FREYA_WORKSPACE_DIR)
38
+ : path.join(__dirname, '..');
39
+
40
+ const { defaultInstance: dl, ready } = require('./lib/DataLayer');
41
+ const DataManager = require('./lib/DataManager');
42
+
43
+ function sha1(text) {
44
+ return crypto.createHash('sha1').update(text).digest('hex');
45
+ }
46
+
47
+ function normalizeWhitespace(t) {
48
+ return String(t || '').replace(/\s+/g, ' ').trim();
49
+ }
50
+
51
+ function normalizeTextForKey(t) {
52
+ return normalizeWhitespace(t).toLowerCase();
53
+ }
54
+
55
+ function run(cmd, args, cwd, extraEnv) {
56
+ return new Promise((resolve) => {
57
+ let child;
58
+ const env = extraEnv ? { ...process.env, ...extraEnv } : process.env;
59
+ try {
60
+ if (process.platform === 'win32') {
61
+ const comspec = process.env.ComSpec || 'cmd.exe';
62
+ child = spawn(comspec, ['/d', '/s', '/c', cmd, ...args], { cwd, shell: false, env });
63
+ } else {
64
+ child = spawn(cmd, args, { cwd, shell: false, env });
65
+ }
66
+ } catch (e) {
67
+ return resolve({ code: 1, stdout: '', stderr: e.message || String(e) });
68
+ }
69
+ let stdout = '';
70
+ let stderr = '';
71
+ child.stdout && child.stdout.on('data', (d) => { stdout += d.toString(); });
72
+ child.stderr && child.stderr.on('data', (d) => { stderr += d.toString(); });
73
+ child.on('error', (e) => { stderr += `\n${e.message}`; resolve({ code: 1, stdout, stderr }); });
74
+ child.on('close', (code) => resolve({ code: code ?? 0, stdout, stderr }));
75
+ });
76
+ }
77
+
78
+ function extractFirstJsonObject(text) {
79
+ if (!text) return null;
80
+ const start = text.indexOf('{');
81
+ if (start === -1) return null;
82
+ let depth = 0;
83
+ for (let i = start; i < text.length; i++) {
84
+ if (text[i] === '{') depth++;
85
+ else if (text[i] === '}') { depth--; if (depth === 0) return text.slice(start, i + 1); }
86
+ }
87
+ return null;
88
+ }
89
+
90
+ function escapeJsonControlChars(jsonText) {
91
+ return jsonText.replace(/[\x00-\x1F\x7F]/g, (ch) => {
92
+ if (ch === '\n' || ch === '\r' || ch === '\t') return ch;
93
+ return '\\u' + ch.charCodeAt(0).toString(16).padStart(4, '0');
94
+ });
95
+ }
96
+
97
+ function readProjectSlugMap(wsDir) {
98
+ const p = path.join(wsDir, 'data', 'settings', 'project-slug-map.json');
99
+ try {
100
+ return JSON.parse(fs.readFileSync(p, 'utf8'));
101
+ } catch { return {}; }
102
+ }
103
+
104
+ function inferProjectSlug(text, map) {
105
+ if (!text || !map || typeof map !== 'object') return '';
106
+ const lower = text.toLowerCase();
107
+ let bestSlug = '';
108
+ let bestLen = 0;
109
+ for (const [keyword, slug] of Object.entries(map)) {
110
+ if (lower.includes(keyword.toLowerCase()) && keyword.length > bestLen) {
111
+ bestSlug = slug;
112
+ bestLen = keyword.length;
113
+ }
114
+ }
115
+ return bestSlug;
116
+ }
117
+
118
+ async function main() {
119
+ await ready;
120
+
121
+ console.log('╔══════════════════════════════════════════════════════╗');
122
+ console.log('║ FREYA — Retroactive Ingestion ║');
123
+ console.log('╚══════════════════════════════════════════════════════╝');
124
+ console.log(`Workspace: ${workspaceDir}`);
125
+ console.log(`Mode: ${DRY_RUN ? 'DRY RUN' : EMBEDDINGS_ONLY ? 'EMBEDDINGS ONLY' : 'FULL INGESTION'}`);
126
+ console.log('');
127
+
128
+ const logsDir = path.join(workspaceDir, 'logs', 'daily');
129
+ if (!fs.existsSync(logsDir)) {
130
+ console.log('❌ No daily logs directory found at:', logsDir);
131
+ process.exit(1);
132
+ }
133
+
134
+ let files = fs.readdirSync(logsDir)
135
+ .filter(f => /^\d{4}-\d{2}-\d{2}\.md$/.test(f))
136
+ .sort();
137
+
138
+ if (MAX_DAYS > 0) {
139
+ files = files.slice(-MAX_DAYS);
140
+ }
141
+
142
+ console.log(`📁 Found ${files.length} daily log files to process`);
143
+ console.log('');
144
+
145
+ // Step 1: Sync all daily logs to SQLite
146
+ console.log('── Step 1: Syncing daily logs to SQLite ──');
147
+ const upsert = dl.db.prepare(`
148
+ INSERT INTO daily_logs (date, raw_markdown) VALUES (?, ?)
149
+ ON CONFLICT(date) DO UPDATE SET raw_markdown = excluded.raw_markdown
150
+ `);
151
+ const syncTx = dl.db.transaction((fileList) => {
152
+ for (const file of fileList) {
153
+ const date = file.replace('.md', '');
154
+ const content = fs.readFileSync(path.join(logsDir, file), 'utf8');
155
+ upsert.run(date, content);
156
+ }
157
+ });
158
+ syncTx(files);
159
+ console.log(`✅ ${files.length} daily logs synced to SQLite`);
160
+ console.log('');
161
+
162
+ // Step 2: Generate embeddings for all daily logs
163
+ if (!SKIP_EMBEDDINGS) {
164
+ console.log('── Step 2: Generating embeddings ──');
165
+ const dm = new DataManager(workspaceDir, logsDir);
166
+ let totalChunks = 0;
167
+ for (let i = 0; i < files.length; i++) {
168
+ const date = files[i].replace('.md', '');
169
+ const content = fs.readFileSync(path.join(logsDir, files[i]), 'utf8');
170
+ try {
171
+ const count = await dm.generateEmbeddings('daily_log', date, content);
172
+ totalChunks += count;
173
+ process.stdout.write(`\r [${i + 1}/${files.length}] ${date} — ${count} chunks`);
174
+ } catch (err) {
175
+ process.stdout.write(`\r [${i + 1}/${files.length}] ${date} — ❌ ${err.message}`);
176
+ }
177
+ }
178
+ console.log(`\n✅ Generated ${totalChunks} embedding chunks total`);
179
+ console.log('');
180
+ }
181
+
182
+ if (EMBEDDINGS_ONLY) {
183
+ console.log('── Embeddings-only mode. Skipping task/blocker extraction. ──');
184
+ dl.db.save();
185
+ console.log('\n✅ Done!');
186
+ return;
187
+ }
188
+
189
+ // Step 3: Extract tasks/blockers from each daily log via planner
190
+ console.log('── Step 3: Extracting tasks & blockers via planner ──');
191
+
192
+ const cmd = process.env.COPILOT_CMD || 'copilot';
193
+ const agentEnv = { FREYA_WORKSPACE_DIR: workspaceDir };
194
+ const slugMap = readProjectSlugMap(workspaceDir);
195
+ const validTaskCats = new Set(['DO_NOW', 'SCHEDULE', 'DELEGATE', 'IGNORE']);
196
+
197
+ const schema = {
198
+ actions: [
199
+ { type: 'create_task', description: '<string>', priority: 'HIGH|MEDIUM|LOW', category: 'DO_NOW|SCHEDULE|DELEGATE|IGNORE', projectSlug: '<string optional>' },
200
+ { type: 'create_blocker', title: '<string>', severity: 'CRITICAL|HIGH|MEDIUM|LOW', notes: '<string>', projectSlug: '<string optional>' }
201
+ ]
202
+ };
203
+
204
+ const sysInstructions = `Você é o planner do sistema F.R.E.Y.A.
205
+
206
+ Analise o daily log abaixo e extraia TODAS as tarefas e blockers mencionados.
207
+ Procure por: ações mencionadas, pendências, problemas, impedimentos, decisões que geram trabalho.
208
+ Se NÃO houver tarefas ou blockers claros, retorne: {"actions":[]}
209
+ Retorne APENAS JSON válido no formato: ${JSON.stringify(schema)}
210
+ NÃO use code fences. NÃO inclua texto extra.
211
+ IMPORTANTE: Extraia APENAS informações explícitas do log. NÃO invente dados.`;
212
+
213
+ let totalTasks = 0;
214
+ let totalBlockers = 0;
215
+ let totalSkipped = 0;
216
+ let totalErrors = 0;
217
+
218
+ const insertTask = dl.db.prepare(`INSERT OR IGNORE INTO tasks (id, project_slug, description, category, status, created_at, metadata) VALUES (?, ?, ?, ?, ?, ?, ?)`);
219
+ const insertBlocker = dl.db.prepare(`INSERT OR IGNORE INTO blockers (id, project_slug, title, severity, status, created_at, metadata) VALUES (?, ?, ?, ?, ?, ?, ?)`);
220
+
221
+ // Build existing keys for dedup
222
+ const existingTaskDescs = new Set(
223
+ dl.db.prepare("SELECT description FROM tasks").all().map(t => sha1(normalizeTextForKey(t.description)))
224
+ );
225
+ const existingBlockerTitles = new Set(
226
+ dl.db.prepare("SELECT title FROM blockers").all().map(b => sha1(normalizeTextForKey(b.title)))
227
+ );
228
+
229
+ for (let i = 0; i < files.length; i++) {
230
+ const file = files[i];
231
+ const date = file.replace('.md', '');
232
+ const content = fs.readFileSync(path.join(logsDir, file), 'utf8');
233
+
234
+ // Skip very small logs (< 50 chars) — likely empty or just a header
235
+ if (content.trim().length < 50) {
236
+ process.stdout.write(`\r [${i + 1}/${files.length}] ${date} — skipped (too small)`);
237
+ continue;
238
+ }
239
+
240
+ const fullPrompt = `${sysInstructions}\n\nDAILY LOG (${date}):\n${content}\n`;
241
+ const SAFE_ARG_LEN = 24000;
242
+
243
+ try {
244
+ let r;
245
+ if (fullPrompt.length > SAFE_ARG_LEN) {
246
+ const tmpFile = path.join(os.tmpdir(), `freya-retro-${Date.now()}.txt`);
247
+ fs.writeFileSync(tmpFile, fullPrompt, 'utf8');
248
+ const filePrompt = `Leia o arquivo abaixo e extraia tasks/blockers conforme as instruções contidas nele.\nARQUIVO: ${tmpFile}`;
249
+ r = await run(cmd, ['-s', '--no-color', '--stream', 'off', '--add-dir', os.tmpdir(), '--allow-all-tools', '-p', filePrompt], workspaceDir, agentEnv);
250
+ try { fs.unlinkSync(tmpFile); } catch { }
251
+ } else {
252
+ r = await run(cmd, ['-s', '--no-color', '--stream', 'off', '-p', fullPrompt], workspaceDir, agentEnv);
253
+ }
254
+
255
+ const out = (r.stdout + r.stderr).trim();
256
+ if (r.code !== 0 || !out) {
257
+ totalErrors++;
258
+ process.stdout.write(`\r [${i + 1}/${files.length}] ${date} — ❌ planner error `);
259
+ continue;
260
+ }
261
+
262
+ // Parse plan
263
+ const jsonText = extractFirstJsonObject(out) || out;
264
+ let plan;
265
+ try {
266
+ plan = JSON.parse(jsonText);
267
+ } catch {
268
+ try { plan = JSON.parse(escapeJsonControlChars(jsonText)); } catch {
269
+ totalErrors++;
270
+ process.stdout.write(`\r [${i + 1}/${files.length}] ${date} — ❌ invalid JSON `);
271
+ continue;
272
+ }
273
+ }
274
+
275
+ const actions = Array.isArray(plan.actions) ? plan.actions : [];
276
+ let fileTasks = 0;
277
+ let fileBlockers = 0;
278
+ let fileSkipped = 0;
279
+
280
+ if (!DRY_RUN) {
281
+ const applyTx = dl.db.transaction(() => {
282
+ for (const a of actions) {
283
+ if (!a || typeof a !== 'object') continue;
284
+
285
+ if (a.type === 'create_task' && a.description) {
286
+ const desc = normalizeWhitespace(a.description);
287
+ if (!desc) continue;
288
+ const projectSlug = String(a.projectSlug || '').trim() || inferProjectSlug(desc, slugMap);
289
+ const key = sha1(normalizeTextForKey((projectSlug ? projectSlug + ' ' : '') + desc));
290
+ if (existingTaskDescs.has(key)) { fileSkipped++; continue; }
291
+
292
+ const id = `t-retro-${Date.now()}-${Math.random().toString(16).slice(2, 8)}`;
293
+ const category = validTaskCats.has(String(a.category || '').trim()) ? String(a.category).trim() : 'DO_NOW';
294
+ const metadata = JSON.stringify({ priority: a.priority || 'medium', source: 'retroactive', sourceDate: date });
295
+ // Use the log date as created_at for chronological accuracy
296
+ insertTask.run(id, projectSlug || null, desc, category, 'PENDING', `${date}T12:00:00.000Z`, metadata);
297
+ existingTaskDescs.add(key);
298
+ fileTasks++;
299
+ }
300
+
301
+ if (a.type === 'create_blocker' && a.title) {
302
+ const title = normalizeWhitespace(a.title);
303
+ if (!title) continue;
304
+ const notes = normalizeWhitespace(a.notes);
305
+ const projectSlug = String(a.projectSlug || '').trim() || inferProjectSlug(title + ' ' + notes, slugMap);
306
+ const key = sha1(normalizeTextForKey((projectSlug ? projectSlug + ' ' : '') + title));
307
+ if (existingBlockerTitles.has(key)) { fileSkipped++; continue; }
308
+
309
+ const id = `b-retro-${Date.now()}-${Math.random().toString(16).slice(2, 8)}`;
310
+ const severity = String(a.severity || 'MEDIUM').toUpperCase();
311
+ const metadata = JSON.stringify({ description: notes || title, source: 'retroactive', sourceDate: date });
312
+ insertBlocker.run(id, projectSlug || null, title, severity, 'OPEN', `${date}T12:00:00.000Z`, metadata);
313
+ existingBlockerTitles.add(key);
314
+ fileBlockers++;
315
+ }
316
+ }
317
+ });
318
+ applyTx();
319
+ } else {
320
+ // Dry run — just count
321
+ for (const a of actions) {
322
+ if (a && a.type === 'create_task' && a.description) fileTasks++;
323
+ if (a && a.type === 'create_blocker' && a.title) fileBlockers++;
324
+ }
325
+ }
326
+
327
+ totalTasks += fileTasks;
328
+ totalBlockers += fileBlockers;
329
+ totalSkipped += fileSkipped;
330
+
331
+ const status = fileTasks || fileBlockers
332
+ ? `${fileTasks}T ${fileBlockers}B${fileSkipped ? ` (${fileSkipped} dup)` : ''}`
333
+ : 'no actions';
334
+ process.stdout.write(`\r [${i + 1}/${files.length}] ${date} — ${status} `);
335
+
336
+ // Small delay to avoid rate limiting
337
+ if (i < files.length - 1) {
338
+ await new Promise(r => setTimeout(r, 500));
339
+ }
340
+
341
+ } catch (err) {
342
+ totalErrors++;
343
+ process.stdout.write(`\r [${i + 1}/${files.length}] ${date} — ❌ ${err.message} `);
344
+ }
345
+ }
346
+
347
+ // Ensure data is persisted
348
+ dl.db.save();
349
+
350
+ console.log('\n');
351
+ console.log('══════════════════════════════════════════════════════');
352
+ console.log(` 📊 Results ${DRY_RUN ? '(DRY RUN)' : ''}`);
353
+ console.log(` Tasks created: ${totalTasks}`);
354
+ console.log(` Blockers created: ${totalBlockers}`);
355
+ console.log(` Duplicates skipped: ${totalSkipped}`);
356
+ console.log(` Errors: ${totalErrors}`);
357
+ console.log('══════════════════════════════════════════════════════');
358
+
359
+ // Step 4: Generate embeddings for newly created tasks/blockers
360
+ if (!DRY_RUN && !SKIP_EMBEDDINGS && (totalTasks > 0 || totalBlockers > 0)) {
361
+ console.log('\n── Step 4: Generating embeddings for new tasks/blockers ──');
362
+ const dm = new DataManager(workspaceDir, logsDir);
363
+ const newTasks = dl.db.prepare("SELECT id, description FROM tasks WHERE json_extract(metadata, '$.source') = 'retroactive'").all();
364
+ const newBlockers = dl.db.prepare("SELECT id, title, json_extract(metadata, '$.description') as notes FROM blockers WHERE json_extract(metadata, '$.source') = 'retroactive'").all();
365
+
366
+ let embCount = 0;
367
+ for (const t of newTasks) {
368
+ try {
369
+ embCount += await dm.generateEmbeddings('task', t.id, t.description);
370
+ } catch { }
371
+ }
372
+ for (const b of newBlockers) {
373
+ try {
374
+ embCount += await dm.generateEmbeddings('blocker', b.id, b.title + ' ' + (b.notes || ''));
375
+ } catch { }
376
+ }
377
+ console.log(`✅ Generated ${embCount} embedding chunks for new entities`);
378
+ }
379
+
380
+ console.log('\n✅ Retroactive ingestion complete!');
381
+ }
382
+
383
+ main().catch(err => {
384
+ console.error('\n❌ Fatal error:', err.message || err);
385
+ process.exit(1);
386
+ });