@cccarv82/freya 3.6.0 → 3.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/cli/init.js +4 -2
- package/cli/web.js +18 -21
- package/package.json +2 -1
- package/scripts/lib/Embedder.js +54 -3
- package/scripts/retroactive-ingest.js +419 -0
- package/templates/base/scripts/lib/DataLayer.js +6 -0
- package/templates/base/scripts/lib/DataManager.js +89 -0
- package/templates/base/scripts/lib/Embedder.js +54 -3
- package/templates/base/scripts/retroactive-ingest.js +419 -0
package/cli/init.js
CHANGED
|
@@ -88,11 +88,13 @@ function ensurePackageJson(targetDir, force, summary) {
|
|
|
88
88
|
'sm-weekly': 'node scripts/generate-sm-weekly-report.js',
|
|
89
89
|
daily: 'node scripts/generate-daily-summary.js',
|
|
90
90
|
status: 'node scripts/generate-executive-report.js',
|
|
91
|
-
blockers: 'node scripts/generate-blockers-report.js'
|
|
91
|
+
blockers: 'node scripts/generate-blockers-report.js',
|
|
92
|
+
'retroactive-ingest': 'node scripts/retroactive-ingest.js'
|
|
92
93
|
};
|
|
93
94
|
|
|
94
95
|
const depsToEnsure = {
|
|
95
|
-
'sql.js': '^1.12.0'
|
|
96
|
+
'sql.js': '^1.12.0',
|
|
97
|
+
'@huggingface/transformers': '^3.8.1'
|
|
96
98
|
};
|
|
97
99
|
|
|
98
100
|
if (!existing) {
|
package/cli/web.js
CHANGED
|
@@ -235,31 +235,27 @@ async function buildSmartContext(workspaceDir, query) {
|
|
|
235
235
|
console.error('[context] RAG search failed:', ragErr.message);
|
|
236
236
|
}
|
|
237
237
|
|
|
238
|
-
// 2. Fallback: if RAG not available or returned few results, include recent daily logs
|
|
238
|
+
// 2. Fallback: if RAG not available or returned few results, include recent daily logs from SQLite
|
|
239
239
|
if (!ragUsed || usedBudget < TOKEN_BUDGET / 3) {
|
|
240
240
|
try {
|
|
241
|
-
const
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
parts.push(`\n--- LOG ${date} ---\n${trimmed}`);
|
|
256
|
-
usedBudget += trimmed.length;
|
|
257
|
-
if (usedBudget >= TOKEN_BUDGET) break;
|
|
258
|
-
}
|
|
241
|
+
const maxDays = ragUsed ? 3 : 5;
|
|
242
|
+
const recentLogs = dl.db.prepare(
|
|
243
|
+
`SELECT date, raw_markdown FROM daily_logs ORDER BY date DESC LIMIT ?`
|
|
244
|
+
).all(maxDays);
|
|
245
|
+
if (recentLogs.length) {
|
|
246
|
+
parts.push('\n[DAILY LOGS — ÚLTIMOS ' + recentLogs.length + ' DIAS]');
|
|
247
|
+
// Reverse to show chronologically (oldest first)
|
|
248
|
+
for (const log of recentLogs.reverse()) {
|
|
249
|
+
const maxPerLog = Math.floor((TOKEN_BUDGET - usedBudget) / recentLogs.length);
|
|
250
|
+
const content = log.raw_markdown || '';
|
|
251
|
+
const trimmed = content.length > maxPerLog ? content.slice(0, maxPerLog) + '\n...(truncado)' : content;
|
|
252
|
+
parts.push(`\n--- LOG ${log.date} ---\n${trimmed}`);
|
|
253
|
+
usedBudget += trimmed.length;
|
|
254
|
+
if (usedBudget >= TOKEN_BUDGET) break;
|
|
259
255
|
}
|
|
260
256
|
}
|
|
261
257
|
} catch (e) {
|
|
262
|
-
console.error('[context] Failed to read daily logs:', e.message);
|
|
258
|
+
console.error('[context] Failed to read daily logs from SQLite:', e.message);
|
|
263
259
|
}
|
|
264
260
|
}
|
|
265
261
|
|
|
@@ -914,7 +910,8 @@ function run(cmd, args, cwd, extraEnv, stdinData) {
|
|
|
914
910
|
|
|
915
911
|
try {
|
|
916
912
|
// On Windows, reliably execute CLI tools through cmd.exe.
|
|
917
|
-
|
|
913
|
+
// This ensures PATH resolution works for tools like copilot, gh, npx, npm.
|
|
914
|
+
if (process.platform === 'win32') {
|
|
918
915
|
const comspec = process.env.ComSpec || 'cmd.exe';
|
|
919
916
|
child = spawn(comspec, ['/d', '/s', '/c', cmd, ...args], { cwd, shell: false, env });
|
|
920
917
|
} else {
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@cccarv82/freya",
|
|
3
|
-
"version": "3.
|
|
3
|
+
"version": "3.7.0",
|
|
4
4
|
"description": "Personal AI Assistant with local-first persistence",
|
|
5
5
|
"scripts": {
|
|
6
6
|
"health": "node scripts/validate-data.js && node scripts/validate-structure.js",
|
|
@@ -10,6 +10,7 @@
|
|
|
10
10
|
"status": "node scripts/generate-executive-report.js",
|
|
11
11
|
"blockers": "node scripts/generate-blockers-report.js",
|
|
12
12
|
"export-obsidian": "node scripts/export-obsidian.js",
|
|
13
|
+
"retroactive-ingest": "node scripts/retroactive-ingest.js",
|
|
13
14
|
"build-index": "node scripts/index/build-index.js",
|
|
14
15
|
"update-index": "node scripts/index/update-index.js",
|
|
15
16
|
"test": "node tests/unit/test-package-config.js && node tests/unit/test-cli-init.js && node tests/unit/test-cli-web-help.js && node tests/unit/test-web-static-assets.js && node tests/unit/test-fs-utils.js && node tests/unit/test-search-utils.js && node tests/unit/test-index-utils.js && node tests/unit/test-daily-generation.js && node tests/unit/test-report-generation.js && node tests/unit/test-executive-report-logs.js && node tests/unit/test-oracle-retrieval.js && node tests/unit/test-task-completion.js && node tests/unit/test-migrate-data.js && node tests/unit/test-blockers-report.js && node tests/unit/test-sm-weekly-report.js && node tests/integration/test-ingestor-task.js && node tests/unit/test-structure-validation.js"
|
package/scripts/lib/Embedder.js
CHANGED
|
@@ -1,3 +1,56 @@
|
|
|
1
|
+
const path = require('path');
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Resolve the @huggingface/transformers package from multiple locations:
|
|
5
|
+
* 1. Local workspace node_modules (preferred)
|
|
6
|
+
* 2. Global FREYA package node_modules (fallback)
|
|
7
|
+
* 3. Parent directory traversal (npm global installs)
|
|
8
|
+
*/
|
|
9
|
+
async function resolveTransformers() {
|
|
10
|
+
// Try 1: direct dynamic import (works if in local node_modules)
|
|
11
|
+
try {
|
|
12
|
+
return await import('@huggingface/transformers');
|
|
13
|
+
} catch { /* not found locally */ }
|
|
14
|
+
|
|
15
|
+
// Try 2: resolve from the global FREYA package directory
|
|
16
|
+
// When installed globally, the package lives at <prefix>/node_modules/@cccarv82/freya/
|
|
17
|
+
// and its dependencies are at <prefix>/node_modules/@huggingface/transformers/
|
|
18
|
+
const globalPaths = [
|
|
19
|
+
// npm global: sibling of @cccarv82/freya in the same node_modules
|
|
20
|
+
path.resolve(__dirname, '..', '..', '..', 'node_modules', '@huggingface', 'transformers'),
|
|
21
|
+
// npm global: nested inside the FREYA package
|
|
22
|
+
path.resolve(__dirname, '..', '..', 'node_modules', '@huggingface', 'transformers'),
|
|
23
|
+
// Hoisted in global prefix
|
|
24
|
+
path.resolve(__dirname, '..', '..', '..', '..', '@huggingface', 'transformers'),
|
|
25
|
+
];
|
|
26
|
+
|
|
27
|
+
for (const p of globalPaths) {
|
|
28
|
+
try {
|
|
29
|
+
const resolved = require.resolve(p);
|
|
30
|
+
if (resolved) return await import(resolved);
|
|
31
|
+
} catch { /* try next */ }
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
// Try 3: use require.resolve with paths option to search upward
|
|
35
|
+
try {
|
|
36
|
+
const modPath = require.resolve('@huggingface/transformers', {
|
|
37
|
+
paths: [
|
|
38
|
+
path.resolve(__dirname, '..', '..'), // workspace root
|
|
39
|
+
path.resolve(__dirname, '..', '..', '..'), // parent of workspace
|
|
40
|
+
path.resolve(__dirname, '..', '..', '..', '..'), // grandparent
|
|
41
|
+
process.execPath ? path.dirname(path.dirname(process.execPath)) : '', // node prefix
|
|
42
|
+
].filter(Boolean)
|
|
43
|
+
});
|
|
44
|
+
return await import(modPath);
|
|
45
|
+
} catch { /* exhausted all options */ }
|
|
46
|
+
|
|
47
|
+
throw new Error(
|
|
48
|
+
'Cannot find @huggingface/transformers. ' +
|
|
49
|
+
'Run "npm install" in your FREYA workspace to install dependencies, ' +
|
|
50
|
+
'or run "freya init" first to update your workspace package.json.'
|
|
51
|
+
);
|
|
52
|
+
}
|
|
53
|
+
|
|
1
54
|
class Embedder {
|
|
2
55
|
constructor() {
|
|
3
56
|
// V3: use the official HuggingFace model name (same weights, new namespace)
|
|
@@ -10,9 +63,7 @@ class Embedder {
|
|
|
10
63
|
if (this.extractorInfo) return;
|
|
11
64
|
if (!this.initPromise) {
|
|
12
65
|
this.initPromise = (async () => {
|
|
13
|
-
|
|
14
|
-
// sharp 0.34+ uses prebuilt platform binaries (no node-gyp needed)
|
|
15
|
-
const { pipeline } = await import('@huggingface/transformers');
|
|
66
|
+
const { pipeline } = await resolveTransformers();
|
|
16
67
|
this.extractorInfo = await pipeline('feature-extraction', this.modelName, { quantized: true });
|
|
17
68
|
})().catch((err) => {
|
|
18
69
|
this.initPromise = null;
|
|
@@ -0,0 +1,419 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
/**
|
|
3
|
+
* retroactive-ingest.js
|
|
4
|
+
*
|
|
5
|
+
* Reads ALL existing daily logs, sends each through the Copilot CLI planner
|
|
6
|
+
* to extract tasks/blockers, and applies them to SQLite.
|
|
7
|
+
* Also generates embeddings for all daily logs.
|
|
8
|
+
*
|
|
9
|
+
* Usage:
|
|
10
|
+
* node scripts/retroactive-ingest.js [--dry-run] [--days N] [--embeddings-only]
|
|
11
|
+
*
|
|
12
|
+
* Options:
|
|
13
|
+
* --dry-run Show what would be extracted without writing to SQLite
|
|
14
|
+
* --days N Only process the last N days (default: all)
|
|
15
|
+
* --embeddings-only Skip planner, only generate embeddings for existing logs
|
|
16
|
+
* --skip-embeddings Skip embedding generation (only extract tasks/blockers)
|
|
17
|
+
*/
|
|
18
|
+
|
|
19
|
+
'use strict';
|
|
20
|
+
|
|
21
|
+
const fs = require('fs');
|
|
22
|
+
const path = require('path');
|
|
23
|
+
const crypto = require('crypto');
|
|
24
|
+
const { spawn } = require('child_process');
|
|
25
|
+
const os = require('os');
|
|
26
|
+
|
|
27
|
+
// Parse args
|
|
28
|
+
const args = process.argv.slice(2);
|
|
29
|
+
const DRY_RUN = args.includes('--dry-run');
|
|
30
|
+
const EMBEDDINGS_ONLY = args.includes('--embeddings-only');
|
|
31
|
+
const SKIP_EMBEDDINGS = args.includes('--skip-embeddings');
|
|
32
|
+
const daysIdx = args.indexOf('--days');
|
|
33
|
+
const MAX_DAYS = daysIdx >= 0 ? parseInt(args[daysIdx + 1], 10) : 0;
|
|
34
|
+
|
|
35
|
+
// Resolve workspace directory
|
|
36
|
+
const workspaceDir = process.env.FREYA_WORKSPACE_DIR
|
|
37
|
+
? path.resolve(process.env.FREYA_WORKSPACE_DIR)
|
|
38
|
+
: path.join(__dirname, '..');
|
|
39
|
+
|
|
40
|
+
const { defaultInstance: dl, ready } = require('./lib/DataLayer');
|
|
41
|
+
const DataManager = require('./lib/DataManager');
|
|
42
|
+
|
|
43
|
+
function sha1(text) {
|
|
44
|
+
return crypto.createHash('sha1').update(text).digest('hex');
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
function normalizeWhitespace(t) {
|
|
48
|
+
return String(t || '').replace(/\s+/g, ' ').trim();
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
function normalizeTextForKey(t) {
|
|
52
|
+
return normalizeWhitespace(t).toLowerCase();
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
function run(cmd, args, cwd, extraEnv) {
|
|
56
|
+
return new Promise((resolve) => {
|
|
57
|
+
let child;
|
|
58
|
+
const env = extraEnv ? { ...process.env, ...extraEnv } : process.env;
|
|
59
|
+
try {
|
|
60
|
+
if (process.platform === 'win32') {
|
|
61
|
+
const comspec = process.env.ComSpec || 'cmd.exe';
|
|
62
|
+
child = spawn(comspec, ['/d', '/s', '/c', cmd, ...args], { cwd, shell: false, env });
|
|
63
|
+
} else {
|
|
64
|
+
child = spawn(cmd, args, { cwd, shell: false, env });
|
|
65
|
+
}
|
|
66
|
+
} catch (e) {
|
|
67
|
+
return resolve({ code: 1, stdout: '', stderr: e.message || String(e) });
|
|
68
|
+
}
|
|
69
|
+
let stdout = '';
|
|
70
|
+
let stderr = '';
|
|
71
|
+
child.stdout && child.stdout.on('data', (d) => { stdout += d.toString(); });
|
|
72
|
+
child.stderr && child.stderr.on('data', (d) => { stderr += d.toString(); });
|
|
73
|
+
child.on('error', (e) => { stderr += `\n${e.message}`; resolve({ code: 1, stdout, stderr }); });
|
|
74
|
+
child.on('close', (code) => resolve({ code: code ?? 0, stdout, stderr }));
|
|
75
|
+
});
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
function extractFirstJsonObject(text) {
|
|
79
|
+
if (!text) return null;
|
|
80
|
+
const start = text.indexOf('{');
|
|
81
|
+
if (start === -1) return null;
|
|
82
|
+
let depth = 0;
|
|
83
|
+
for (let i = start; i < text.length; i++) {
|
|
84
|
+
if (text[i] === '{') depth++;
|
|
85
|
+
else if (text[i] === '}') { depth--; if (depth === 0) return text.slice(start, i + 1); }
|
|
86
|
+
}
|
|
87
|
+
return null;
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
function escapeJsonControlChars(jsonText) {
|
|
91
|
+
return jsonText.replace(/[\x00-\x1F\x7F]/g, (ch) => {
|
|
92
|
+
if (ch === '\n' || ch === '\r' || ch === '\t') return ch;
|
|
93
|
+
return '\\u' + ch.charCodeAt(0).toString(16).padStart(4, '0');
|
|
94
|
+
});
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
function readProjectSlugMap(wsDir) {
|
|
98
|
+
const p = path.join(wsDir, 'data', 'settings', 'project-slug-map.json');
|
|
99
|
+
try {
|
|
100
|
+
return JSON.parse(fs.readFileSync(p, 'utf8'));
|
|
101
|
+
} catch { return {}; }
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
function inferProjectSlug(text, map) {
|
|
105
|
+
if (!text || !map || typeof map !== 'object') return '';
|
|
106
|
+
const lower = text.toLowerCase();
|
|
107
|
+
let bestSlug = '';
|
|
108
|
+
let bestLen = 0;
|
|
109
|
+
for (const [keyword, slug] of Object.entries(map)) {
|
|
110
|
+
if (lower.includes(keyword.toLowerCase()) && keyword.length > bestLen) {
|
|
111
|
+
bestSlug = slug;
|
|
112
|
+
bestLen = keyword.length;
|
|
113
|
+
}
|
|
114
|
+
}
|
|
115
|
+
return bestSlug;
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
async function main() {
|
|
119
|
+
await ready;
|
|
120
|
+
|
|
121
|
+
console.log('╔══════════════════════════════════════════════════════╗');
|
|
122
|
+
console.log('║ FREYA — Retroactive Ingestion ║');
|
|
123
|
+
console.log('╚══════════════════════════════════════════════════════╝');
|
|
124
|
+
console.log(`Workspace: ${workspaceDir}`);
|
|
125
|
+
console.log(`Mode: ${DRY_RUN ? 'DRY RUN' : EMBEDDINGS_ONLY ? 'EMBEDDINGS ONLY' : 'FULL INGESTION'}`);
|
|
126
|
+
console.log('');
|
|
127
|
+
|
|
128
|
+
const logsDir = path.join(workspaceDir, 'logs', 'daily');
|
|
129
|
+
if (!fs.existsSync(logsDir)) {
|
|
130
|
+
console.log('❌ No daily logs directory found at:', logsDir);
|
|
131
|
+
process.exit(1);
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
let files = fs.readdirSync(logsDir)
|
|
135
|
+
.filter(f => /^\d{4}-\d{2}-\d{2}\.md$/.test(f))
|
|
136
|
+
.sort();
|
|
137
|
+
|
|
138
|
+
if (MAX_DAYS > 0) {
|
|
139
|
+
files = files.slice(-MAX_DAYS);
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
console.log(`📁 Found ${files.length} daily log files to process`);
|
|
143
|
+
console.log('');
|
|
144
|
+
|
|
145
|
+
// Step 1: Sync all daily logs to SQLite
|
|
146
|
+
console.log('── Step 1: Syncing daily logs to SQLite ──');
|
|
147
|
+
const upsert = dl.db.prepare(`
|
|
148
|
+
INSERT INTO daily_logs (date, raw_markdown) VALUES (?, ?)
|
|
149
|
+
ON CONFLICT(date) DO UPDATE SET raw_markdown = excluded.raw_markdown
|
|
150
|
+
`);
|
|
151
|
+
const syncTx = dl.db.transaction((fileList) => {
|
|
152
|
+
for (const file of fileList) {
|
|
153
|
+
const date = file.replace('.md', '');
|
|
154
|
+
const content = fs.readFileSync(path.join(logsDir, file), 'utf8');
|
|
155
|
+
upsert.run(date, content);
|
|
156
|
+
}
|
|
157
|
+
});
|
|
158
|
+
syncTx(files);
|
|
159
|
+
console.log(`✅ ${files.length} daily logs synced to SQLite`);
|
|
160
|
+
console.log('');
|
|
161
|
+
|
|
162
|
+
// Step 2: Generate embeddings for all daily logs
|
|
163
|
+
if (!SKIP_EMBEDDINGS) {
|
|
164
|
+
console.log('── Step 2: Generating embeddings ──');
|
|
165
|
+
const dm = new DataManager(workspaceDir, logsDir);
|
|
166
|
+
let totalChunks = 0;
|
|
167
|
+
for (let i = 0; i < files.length; i++) {
|
|
168
|
+
const date = files[i].replace('.md', '');
|
|
169
|
+
const content = fs.readFileSync(path.join(logsDir, files[i]), 'utf8');
|
|
170
|
+
try {
|
|
171
|
+
const count = await dm.generateEmbeddings('daily_log', date, content);
|
|
172
|
+
totalChunks += count;
|
|
173
|
+
process.stdout.write(`\r [${i + 1}/${files.length}] ${date} — ${count} chunks`);
|
|
174
|
+
} catch (err) {
|
|
175
|
+
process.stdout.write(`\r [${i + 1}/${files.length}] ${date} — ❌ ${err.message}`);
|
|
176
|
+
}
|
|
177
|
+
}
|
|
178
|
+
console.log(`\n✅ Generated ${totalChunks} embedding chunks total`);
|
|
179
|
+
console.log('');
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
if (EMBEDDINGS_ONLY) {
|
|
183
|
+
console.log('── Embeddings-only mode. Skipping task/blocker extraction. ──');
|
|
184
|
+
dl.db.save();
|
|
185
|
+
console.log('\n✅ Done!');
|
|
186
|
+
return;
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
// Step 3: Extract tasks/blockers from each daily log via planner
|
|
190
|
+
console.log('── Step 3: Extracting tasks & blockers via planner ──');
|
|
191
|
+
|
|
192
|
+
// Detect copilot command: try user override, then 'copilot', then 'gh copilot'
|
|
193
|
+
let cmd = process.env.COPILOT_CMD || '';
|
|
194
|
+
if (!cmd) {
|
|
195
|
+
// Quick test: try 'copilot --version'
|
|
196
|
+
const testCopilot = await run('copilot', ['--version'], workspaceDir);
|
|
197
|
+
if (testCopilot.code === 0) {
|
|
198
|
+
cmd = 'copilot';
|
|
199
|
+
} else {
|
|
200
|
+
// Try 'gh copilot' via 'gh'
|
|
201
|
+
const testGh = await run('gh', ['copilot', '--version'], workspaceDir);
|
|
202
|
+
if (testGh.code === 0) {
|
|
203
|
+
cmd = 'gh';
|
|
204
|
+
console.log(' ℹ Using "gh copilot" as planner command');
|
|
205
|
+
} else {
|
|
206
|
+
cmd = 'copilot'; // default, will fail with clear error
|
|
207
|
+
console.log(' ⚠ Could not detect copilot CLI. Set COPILOT_CMD env var if needed.');
|
|
208
|
+
console.log(` copilot test: code=${testCopilot.code} stderr=${(testCopilot.stderr || '').slice(0, 200)}`);
|
|
209
|
+
console.log(` gh copilot test: code=${testGh.code} stderr=${(testGh.stderr || '').slice(0, 200)}`);
|
|
210
|
+
}
|
|
211
|
+
}
|
|
212
|
+
}
|
|
213
|
+
const useGhCopilot = cmd === 'gh';
|
|
214
|
+
const agentEnv = { FREYA_WORKSPACE_DIR: workspaceDir };
|
|
215
|
+
const slugMap = readProjectSlugMap(workspaceDir);
|
|
216
|
+
const validTaskCats = new Set(['DO_NOW', 'SCHEDULE', 'DELEGATE', 'IGNORE']);
|
|
217
|
+
|
|
218
|
+
const schema = {
|
|
219
|
+
actions: [
|
|
220
|
+
{ type: 'create_task', description: '<string>', priority: 'HIGH|MEDIUM|LOW', category: 'DO_NOW|SCHEDULE|DELEGATE|IGNORE', projectSlug: '<string optional>' },
|
|
221
|
+
{ type: 'create_blocker', title: '<string>', severity: 'CRITICAL|HIGH|MEDIUM|LOW', notes: '<string>', projectSlug: '<string optional>' }
|
|
222
|
+
]
|
|
223
|
+
};
|
|
224
|
+
|
|
225
|
+
const sysInstructions = `Você é o planner do sistema F.R.E.Y.A.
|
|
226
|
+
|
|
227
|
+
Analise o daily log abaixo e extraia TODAS as tarefas e blockers mencionados.
|
|
228
|
+
Procure por: ações mencionadas, pendências, problemas, impedimentos, decisões que geram trabalho.
|
|
229
|
+
Se NÃO houver tarefas ou blockers claros, retorne: {"actions":[]}
|
|
230
|
+
Retorne APENAS JSON válido no formato: ${JSON.stringify(schema)}
|
|
231
|
+
NÃO use code fences. NÃO inclua texto extra.
|
|
232
|
+
IMPORTANTE: Extraia APENAS informações explícitas do log. NÃO invente dados.`;
|
|
233
|
+
|
|
234
|
+
let totalTasks = 0;
|
|
235
|
+
let totalBlockers = 0;
|
|
236
|
+
let totalSkipped = 0;
|
|
237
|
+
let totalErrors = 0;
|
|
238
|
+
|
|
239
|
+
const insertTask = dl.db.prepare(`INSERT OR IGNORE INTO tasks (id, project_slug, description, category, status, created_at, metadata) VALUES (?, ?, ?, ?, ?, ?, ?)`);
|
|
240
|
+
const insertBlocker = dl.db.prepare(`INSERT OR IGNORE INTO blockers (id, project_slug, title, severity, status, created_at, metadata) VALUES (?, ?, ?, ?, ?, ?, ?)`);
|
|
241
|
+
|
|
242
|
+
// Build existing keys for dedup
|
|
243
|
+
const existingTaskDescs = new Set(
|
|
244
|
+
dl.db.prepare("SELECT description FROM tasks").all().map(t => sha1(normalizeTextForKey(t.description)))
|
|
245
|
+
);
|
|
246
|
+
const existingBlockerTitles = new Set(
|
|
247
|
+
dl.db.prepare("SELECT title FROM blockers").all().map(b => sha1(normalizeTextForKey(b.title)))
|
|
248
|
+
);
|
|
249
|
+
|
|
250
|
+
for (let i = 0; i < files.length; i++) {
|
|
251
|
+
const file = files[i];
|
|
252
|
+
const date = file.replace('.md', '');
|
|
253
|
+
const content = fs.readFileSync(path.join(logsDir, file), 'utf8');
|
|
254
|
+
|
|
255
|
+
// Skip very small logs (< 50 chars) — likely empty or just a header
|
|
256
|
+
if (content.trim().length < 50) {
|
|
257
|
+
process.stdout.write(`\r [${i + 1}/${files.length}] ${date} — skipped (too small)`);
|
|
258
|
+
continue;
|
|
259
|
+
}
|
|
260
|
+
|
|
261
|
+
const fullPrompt = `${sysInstructions}\n\nDAILY LOG (${date}):\n${content}\n`;
|
|
262
|
+
const SAFE_ARG_LEN = 24000;
|
|
263
|
+
|
|
264
|
+
try {
|
|
265
|
+
let r;
|
|
266
|
+
const baseArgs = useGhCopilot
|
|
267
|
+
? ['copilot', '-s', '--no-color', '--stream', 'off']
|
|
268
|
+
: ['-s', '--no-color', '--stream', 'off'];
|
|
269
|
+
|
|
270
|
+
if (fullPrompt.length > SAFE_ARG_LEN) {
|
|
271
|
+
const tmpFile = path.join(os.tmpdir(), `freya-retro-${Date.now()}.txt`);
|
|
272
|
+
fs.writeFileSync(tmpFile, fullPrompt, 'utf8');
|
|
273
|
+
const filePrompt = `Leia o arquivo abaixo e extraia tasks/blockers conforme as instruções contidas nele.\nARQUIVO: ${tmpFile}`;
|
|
274
|
+
r = await run(cmd, [...baseArgs, '--add-dir', os.tmpdir(), '--allow-all-tools', '-p', filePrompt], workspaceDir, agentEnv);
|
|
275
|
+
try { fs.unlinkSync(tmpFile); } catch { }
|
|
276
|
+
} else {
|
|
277
|
+
r = await run(cmd, [...baseArgs, '-p', fullPrompt], workspaceDir, agentEnv);
|
|
278
|
+
}
|
|
279
|
+
|
|
280
|
+
const out = (r.stdout + r.stderr).trim();
|
|
281
|
+
if (r.code !== 0 || !out) {
|
|
282
|
+
totalErrors++;
|
|
283
|
+
// On first error, show verbose diagnostic
|
|
284
|
+
if (totalErrors === 1) {
|
|
285
|
+
console.log(`\n ⚠ Planner diagnostic for ${date}:`);
|
|
286
|
+
console.log(` Command: ${cmd} ${(useGhCopilot ? baseArgs : ['-s', '--no-color', '--stream', 'off', '-p', '...']).join(' ')}`);
|
|
287
|
+
console.log(` Exit code: ${r.code}`);
|
|
288
|
+
console.log(` stdout: ${(r.stdout || '').slice(0, 300)}`);
|
|
289
|
+
console.log(` stderr: ${(r.stderr || '').slice(0, 300)}`);
|
|
290
|
+
}
|
|
291
|
+
process.stdout.write(`\r [${i + 1}/${files.length}] ${date} — ❌ planner error (code=${r.code}) `);
|
|
292
|
+
continue;
|
|
293
|
+
}
|
|
294
|
+
|
|
295
|
+
// Parse plan
|
|
296
|
+
const jsonText = extractFirstJsonObject(out) || out;
|
|
297
|
+
let plan;
|
|
298
|
+
try {
|
|
299
|
+
plan = JSON.parse(jsonText);
|
|
300
|
+
} catch {
|
|
301
|
+
try { plan = JSON.parse(escapeJsonControlChars(jsonText)); } catch {
|
|
302
|
+
totalErrors++;
|
|
303
|
+
process.stdout.write(`\r [${i + 1}/${files.length}] ${date} — ❌ invalid JSON `);
|
|
304
|
+
continue;
|
|
305
|
+
}
|
|
306
|
+
}
|
|
307
|
+
|
|
308
|
+
const actions = Array.isArray(plan.actions) ? plan.actions : [];
|
|
309
|
+
let fileTasks = 0;
|
|
310
|
+
let fileBlockers = 0;
|
|
311
|
+
let fileSkipped = 0;
|
|
312
|
+
|
|
313
|
+
if (!DRY_RUN) {
|
|
314
|
+
const applyTx = dl.db.transaction(() => {
|
|
315
|
+
for (const a of actions) {
|
|
316
|
+
if (!a || typeof a !== 'object') continue;
|
|
317
|
+
|
|
318
|
+
if (a.type === 'create_task' && a.description) {
|
|
319
|
+
const desc = normalizeWhitespace(a.description);
|
|
320
|
+
if (!desc) continue;
|
|
321
|
+
const projectSlug = String(a.projectSlug || '').trim() || inferProjectSlug(desc, slugMap);
|
|
322
|
+
const key = sha1(normalizeTextForKey((projectSlug ? projectSlug + ' ' : '') + desc));
|
|
323
|
+
if (existingTaskDescs.has(key)) { fileSkipped++; continue; }
|
|
324
|
+
|
|
325
|
+
const id = `t-retro-${Date.now()}-${Math.random().toString(16).slice(2, 8)}`;
|
|
326
|
+
const category = validTaskCats.has(String(a.category || '').trim()) ? String(a.category).trim() : 'DO_NOW';
|
|
327
|
+
const metadata = JSON.stringify({ priority: a.priority || 'medium', source: 'retroactive', sourceDate: date });
|
|
328
|
+
// Use the log date as created_at for chronological accuracy
|
|
329
|
+
insertTask.run(id, projectSlug || null, desc, category, 'PENDING', `${date}T12:00:00.000Z`, metadata);
|
|
330
|
+
existingTaskDescs.add(key);
|
|
331
|
+
fileTasks++;
|
|
332
|
+
}
|
|
333
|
+
|
|
334
|
+
if (a.type === 'create_blocker' && a.title) {
|
|
335
|
+
const title = normalizeWhitespace(a.title);
|
|
336
|
+
if (!title) continue;
|
|
337
|
+
const notes = normalizeWhitespace(a.notes);
|
|
338
|
+
const projectSlug = String(a.projectSlug || '').trim() || inferProjectSlug(title + ' ' + notes, slugMap);
|
|
339
|
+
const key = sha1(normalizeTextForKey((projectSlug ? projectSlug + ' ' : '') + title));
|
|
340
|
+
if (existingBlockerTitles.has(key)) { fileSkipped++; continue; }
|
|
341
|
+
|
|
342
|
+
const id = `b-retro-${Date.now()}-${Math.random().toString(16).slice(2, 8)}`;
|
|
343
|
+
const severity = String(a.severity || 'MEDIUM').toUpperCase();
|
|
344
|
+
const metadata = JSON.stringify({ description: notes || title, source: 'retroactive', sourceDate: date });
|
|
345
|
+
insertBlocker.run(id, projectSlug || null, title, severity, 'OPEN', `${date}T12:00:00.000Z`, metadata);
|
|
346
|
+
existingBlockerTitles.add(key);
|
|
347
|
+
fileBlockers++;
|
|
348
|
+
}
|
|
349
|
+
}
|
|
350
|
+
});
|
|
351
|
+
applyTx();
|
|
352
|
+
} else {
|
|
353
|
+
// Dry run — just count
|
|
354
|
+
for (const a of actions) {
|
|
355
|
+
if (a && a.type === 'create_task' && a.description) fileTasks++;
|
|
356
|
+
if (a && a.type === 'create_blocker' && a.title) fileBlockers++;
|
|
357
|
+
}
|
|
358
|
+
}
|
|
359
|
+
|
|
360
|
+
totalTasks += fileTasks;
|
|
361
|
+
totalBlockers += fileBlockers;
|
|
362
|
+
totalSkipped += fileSkipped;
|
|
363
|
+
|
|
364
|
+
const status = fileTasks || fileBlockers
|
|
365
|
+
? `${fileTasks}T ${fileBlockers}B${fileSkipped ? ` (${fileSkipped} dup)` : ''}`
|
|
366
|
+
: 'no actions';
|
|
367
|
+
process.stdout.write(`\r [${i + 1}/${files.length}] ${date} — ${status} `);
|
|
368
|
+
|
|
369
|
+
// Small delay to avoid rate limiting
|
|
370
|
+
if (i < files.length - 1) {
|
|
371
|
+
await new Promise(r => setTimeout(r, 500));
|
|
372
|
+
}
|
|
373
|
+
|
|
374
|
+
} catch (err) {
|
|
375
|
+
totalErrors++;
|
|
376
|
+
process.stdout.write(`\r [${i + 1}/${files.length}] ${date} — ❌ ${err.message} `);
|
|
377
|
+
}
|
|
378
|
+
}
|
|
379
|
+
|
|
380
|
+
// Ensure data is persisted
|
|
381
|
+
dl.db.save();
|
|
382
|
+
|
|
383
|
+
console.log('\n');
|
|
384
|
+
console.log('══════════════════════════════════════════════════════');
|
|
385
|
+
console.log(` 📊 Results ${DRY_RUN ? '(DRY RUN)' : ''}`);
|
|
386
|
+
console.log(` Tasks created: ${totalTasks}`);
|
|
387
|
+
console.log(` Blockers created: ${totalBlockers}`);
|
|
388
|
+
console.log(` Duplicates skipped: ${totalSkipped}`);
|
|
389
|
+
console.log(` Errors: ${totalErrors}`);
|
|
390
|
+
console.log('══════════════════════════════════════════════════════');
|
|
391
|
+
|
|
392
|
+
// Step 4: Generate embeddings for newly created tasks/blockers
|
|
393
|
+
if (!DRY_RUN && !SKIP_EMBEDDINGS && (totalTasks > 0 || totalBlockers > 0)) {
|
|
394
|
+
console.log('\n── Step 4: Generating embeddings for new tasks/blockers ──');
|
|
395
|
+
const dm = new DataManager(workspaceDir, logsDir);
|
|
396
|
+
const newTasks = dl.db.prepare("SELECT id, description FROM tasks WHERE json_extract(metadata, '$.source') = 'retroactive'").all();
|
|
397
|
+
const newBlockers = dl.db.prepare("SELECT id, title, json_extract(metadata, '$.description') as notes FROM blockers WHERE json_extract(metadata, '$.source') = 'retroactive'").all();
|
|
398
|
+
|
|
399
|
+
let embCount = 0;
|
|
400
|
+
for (const t of newTasks) {
|
|
401
|
+
try {
|
|
402
|
+
embCount += await dm.generateEmbeddings('task', t.id, t.description);
|
|
403
|
+
} catch { }
|
|
404
|
+
}
|
|
405
|
+
for (const b of newBlockers) {
|
|
406
|
+
try {
|
|
407
|
+
embCount += await dm.generateEmbeddings('blocker', b.id, b.title + ' ' + (b.notes || ''));
|
|
408
|
+
} catch { }
|
|
409
|
+
}
|
|
410
|
+
console.log(`✅ Generated ${embCount} embedding chunks for new entities`);
|
|
411
|
+
}
|
|
412
|
+
|
|
413
|
+
console.log('\n✅ Retroactive ingestion complete!');
|
|
414
|
+
}
|
|
415
|
+
|
|
416
|
+
main().catch(err => {
|
|
417
|
+
console.error('\n❌ Fatal error:', err.message || err);
|
|
418
|
+
process.exit(1);
|
|
419
|
+
});
|
|
@@ -325,6 +325,12 @@ class DataLayer {
|
|
|
325
325
|
embedding BLOB NOT NULL, /* Stored as Buffer of Float32Array */
|
|
326
326
|
created_at DATETIME DEFAULT CURRENT_TIMESTAMP
|
|
327
327
|
);
|
|
328
|
+
|
|
329
|
+
CREATE UNIQUE INDEX IF NOT EXISTS idx_doc_emb_ref
|
|
330
|
+
ON document_embeddings(reference_type, reference_id, chunk_index);
|
|
331
|
+
|
|
332
|
+
CREATE INDEX IF NOT EXISTS idx_doc_emb_type
|
|
333
|
+
ON document_embeddings(reference_type);
|
|
328
334
|
`);
|
|
329
335
|
|
|
330
336
|
// --- Migrations for existing databases ---
|
|
@@ -229,6 +229,95 @@ class DataManager {
|
|
|
229
229
|
return NaN;
|
|
230
230
|
}
|
|
231
231
|
|
|
232
|
+
// --- Embedding Generation ---
|
|
233
|
+
|
|
234
|
+
/**
|
|
235
|
+
* Split text into chunks suitable for embedding (~400-600 chars each).
|
|
236
|
+
* Splits on markdown headings, then paragraphs, then sentences.
|
|
237
|
+
*/
|
|
238
|
+
chunkText(text, maxChunkSize = 500) {
|
|
239
|
+
if (!text || text.length <= maxChunkSize) return [text].filter(Boolean);
|
|
240
|
+
|
|
241
|
+
const chunks = [];
|
|
242
|
+
// First split on markdown ## headings
|
|
243
|
+
const sections = text.split(/(?=^## )/m).filter(s => s.trim());
|
|
244
|
+
|
|
245
|
+
for (const section of sections) {
|
|
246
|
+
if (section.length <= maxChunkSize) {
|
|
247
|
+
chunks.push(section.trim());
|
|
248
|
+
continue;
|
|
249
|
+
}
|
|
250
|
+
// Split long sections on double newlines (paragraphs)
|
|
251
|
+
const paragraphs = section.split(/\n\n+/).filter(p => p.trim());
|
|
252
|
+
let buffer = '';
|
|
253
|
+
for (const para of paragraphs) {
|
|
254
|
+
if (buffer.length + para.length + 2 > maxChunkSize && buffer) {
|
|
255
|
+
chunks.push(buffer.trim());
|
|
256
|
+
buffer = '';
|
|
257
|
+
}
|
|
258
|
+
buffer += (buffer ? '\n\n' : '') + para;
|
|
259
|
+
}
|
|
260
|
+
if (buffer.trim()) chunks.push(buffer.trim());
|
|
261
|
+
}
|
|
262
|
+
|
|
263
|
+
return chunks.filter(c => c.length > 10); // skip tiny fragments
|
|
264
|
+
}
|
|
265
|
+
|
|
266
|
+
/**
|
|
267
|
+
* Generate embeddings for a piece of content and store in document_embeddings.
|
|
268
|
+
* Deletes existing embeddings for (referenceType, referenceId) first to avoid stale data.
|
|
269
|
+
* @param {string} referenceType - 'daily_log', 'task', or 'blocker'
|
|
270
|
+
* @param {string} referenceId - unique ID (date for logs, task/blocker id)
|
|
271
|
+
* @param {string} text - content to embed
|
|
272
|
+
*/
|
|
273
|
+
async generateEmbeddings(referenceType, referenceId, text) {
|
|
274
|
+
if (!text || !text.trim()) return 0;
|
|
275
|
+
|
|
276
|
+
const chunks = this.chunkText(text);
|
|
277
|
+
if (!chunks.length) return 0;
|
|
278
|
+
|
|
279
|
+
// Delete existing embeddings for this reference
|
|
280
|
+
dl.db.prepare('DELETE FROM document_embeddings WHERE reference_type = ? AND reference_id = ?')
|
|
281
|
+
.run(referenceType, referenceId);
|
|
282
|
+
|
|
283
|
+
const insert = dl.db.prepare(`
|
|
284
|
+
INSERT INTO document_embeddings (reference_type, reference_id, chunk_index, text_chunk, embedding)
|
|
285
|
+
VALUES (?, ?, ?, ?, ?)
|
|
286
|
+
`);
|
|
287
|
+
|
|
288
|
+
let count = 0;
|
|
289
|
+
for (let i = 0; i < chunks.length; i++) {
|
|
290
|
+
try {
|
|
291
|
+
const vector = await defaultEmbedder.embedText(chunks[i]);
|
|
292
|
+
const buffer = defaultEmbedder.vectorToBuffer(vector);
|
|
293
|
+
insert.run(referenceType, referenceId, i, chunks[i], buffer);
|
|
294
|
+
count++;
|
|
295
|
+
} catch (err) {
|
|
296
|
+
console.error(`[embeddings] Failed to embed chunk ${i} of ${referenceType}/${referenceId}:`, err.message);
|
|
297
|
+
}
|
|
298
|
+
}
|
|
299
|
+
return count;
|
|
300
|
+
}
|
|
301
|
+
|
|
302
|
+
/**
|
|
303
|
+
* Check if embeddings exist and are up-to-date for a reference.
|
|
304
|
+
* @returns {boolean} true if embeddings exist
|
|
305
|
+
*/
|
|
306
|
+
hasEmbeddings(referenceType, referenceId) {
|
|
307
|
+
const row = dl.db.prepare(
|
|
308
|
+
'SELECT COUNT(*) as c FROM document_embeddings WHERE reference_type = ? AND reference_id = ?'
|
|
309
|
+
).get(referenceType, referenceId);
|
|
310
|
+
return row && row.c > 0;
|
|
311
|
+
}
|
|
312
|
+
|
|
313
|
+
/**
|
|
314
|
+
* Get total embedding count (for checking if RAG is available).
|
|
315
|
+
*/
|
|
316
|
+
getEmbeddingCount() {
|
|
317
|
+
const row = dl.db.prepare('SELECT COUNT(*) as c FROM document_embeddings').get();
|
|
318
|
+
return row ? row.c : 0;
|
|
319
|
+
}
|
|
320
|
+
|
|
232
321
|
// --- RAG (Vector Search) ---
|
|
233
322
|
async semanticSearch(query, topK = 10) {
|
|
234
323
|
const queryVector = await defaultEmbedder.embedText(query);
|
|
@@ -1,3 +1,56 @@
|
|
|
1
|
+
const path = require('path');
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Resolve the @huggingface/transformers package from multiple locations:
|
|
5
|
+
* 1. Local workspace node_modules (preferred)
|
|
6
|
+
* 2. Global FREYA package node_modules (fallback)
|
|
7
|
+
* 3. Parent directory traversal (npm global installs)
|
|
8
|
+
*/
|
|
9
|
+
async function resolveTransformers() {
|
|
10
|
+
// Try 1: direct dynamic import (works if in local node_modules)
|
|
11
|
+
try {
|
|
12
|
+
return await import('@huggingface/transformers');
|
|
13
|
+
} catch { /* not found locally */ }
|
|
14
|
+
|
|
15
|
+
// Try 2: resolve from the global FREYA package directory
|
|
16
|
+
// When installed globally, the package lives at <prefix>/node_modules/@cccarv82/freya/
|
|
17
|
+
// and its dependencies are at <prefix>/node_modules/@huggingface/transformers/
|
|
18
|
+
const globalPaths = [
|
|
19
|
+
// npm global: sibling of @cccarv82/freya in the same node_modules
|
|
20
|
+
path.resolve(__dirname, '..', '..', '..', 'node_modules', '@huggingface', 'transformers'),
|
|
21
|
+
// npm global: nested inside the FREYA package
|
|
22
|
+
path.resolve(__dirname, '..', '..', 'node_modules', '@huggingface', 'transformers'),
|
|
23
|
+
// Hoisted in global prefix
|
|
24
|
+
path.resolve(__dirname, '..', '..', '..', '..', '@huggingface', 'transformers'),
|
|
25
|
+
];
|
|
26
|
+
|
|
27
|
+
for (const p of globalPaths) {
|
|
28
|
+
try {
|
|
29
|
+
const resolved = require.resolve(p);
|
|
30
|
+
if (resolved) return await import(resolved);
|
|
31
|
+
} catch { /* try next */ }
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
// Try 3: use require.resolve with paths option to search upward
|
|
35
|
+
try {
|
|
36
|
+
const modPath = require.resolve('@huggingface/transformers', {
|
|
37
|
+
paths: [
|
|
38
|
+
path.resolve(__dirname, '..', '..'), // workspace root
|
|
39
|
+
path.resolve(__dirname, '..', '..', '..'), // parent of workspace
|
|
40
|
+
path.resolve(__dirname, '..', '..', '..', '..'), // grandparent
|
|
41
|
+
process.execPath ? path.dirname(path.dirname(process.execPath)) : '', // node prefix
|
|
42
|
+
].filter(Boolean)
|
|
43
|
+
});
|
|
44
|
+
return await import(modPath);
|
|
45
|
+
} catch { /* exhausted all options */ }
|
|
46
|
+
|
|
47
|
+
throw new Error(
|
|
48
|
+
'Cannot find @huggingface/transformers. ' +
|
|
49
|
+
'Run "npm install" in your FREYA workspace to install dependencies, ' +
|
|
50
|
+
'or run "freya init" first to update your workspace package.json.'
|
|
51
|
+
);
|
|
52
|
+
}
|
|
53
|
+
|
|
1
54
|
class Embedder {
|
|
2
55
|
constructor() {
|
|
3
56
|
// V3: use the official HuggingFace model name (same weights, new namespace)
|
|
@@ -10,9 +63,7 @@ class Embedder {
|
|
|
10
63
|
if (this.extractorInfo) return;
|
|
11
64
|
if (!this.initPromise) {
|
|
12
65
|
this.initPromise = (async () => {
|
|
13
|
-
|
|
14
|
-
// sharp 0.34+ uses prebuilt platform binaries (no node-gyp needed)
|
|
15
|
-
const { pipeline } = await import('@huggingface/transformers');
|
|
66
|
+
const { pipeline } = await resolveTransformers();
|
|
16
67
|
this.extractorInfo = await pipeline('feature-extraction', this.modelName, { quantized: true });
|
|
17
68
|
})().catch((err) => {
|
|
18
69
|
this.initPromise = null;
|
|
@@ -0,0 +1,419 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
/**
|
|
3
|
+
* retroactive-ingest.js
|
|
4
|
+
*
|
|
5
|
+
* Reads ALL existing daily logs, sends each through the Copilot CLI planner
|
|
6
|
+
* to extract tasks/blockers, and applies them to SQLite.
|
|
7
|
+
* Also generates embeddings for all daily logs.
|
|
8
|
+
*
|
|
9
|
+
* Usage:
|
|
10
|
+
* node scripts/retroactive-ingest.js [--dry-run] [--days N] [--embeddings-only]
|
|
11
|
+
*
|
|
12
|
+
* Options:
|
|
13
|
+
* --dry-run Show what would be extracted without writing to SQLite
|
|
14
|
+
* --days N Only process the last N days (default: all)
|
|
15
|
+
* --embeddings-only Skip planner, only generate embeddings for existing logs
|
|
16
|
+
* --skip-embeddings Skip embedding generation (only extract tasks/blockers)
|
|
17
|
+
*/
|
|
18
|
+
|
|
19
|
+
'use strict';
|
|
20
|
+
|
|
21
|
+
const fs = require('fs');
|
|
22
|
+
const path = require('path');
|
|
23
|
+
const crypto = require('crypto');
|
|
24
|
+
const { spawn } = require('child_process');
|
|
25
|
+
const os = require('os');
|
|
26
|
+
|
|
27
|
+
// Parse args
|
|
28
|
+
const args = process.argv.slice(2);
|
|
29
|
+
const DRY_RUN = args.includes('--dry-run');
|
|
30
|
+
const EMBEDDINGS_ONLY = args.includes('--embeddings-only');
|
|
31
|
+
const SKIP_EMBEDDINGS = args.includes('--skip-embeddings');
|
|
32
|
+
const daysIdx = args.indexOf('--days');
|
|
33
|
+
const MAX_DAYS = daysIdx >= 0 ? parseInt(args[daysIdx + 1], 10) : 0;
|
|
34
|
+
|
|
35
|
+
// Resolve workspace directory
|
|
36
|
+
const workspaceDir = process.env.FREYA_WORKSPACE_DIR
|
|
37
|
+
? path.resolve(process.env.FREYA_WORKSPACE_DIR)
|
|
38
|
+
: path.join(__dirname, '..');
|
|
39
|
+
|
|
40
|
+
const { defaultInstance: dl, ready } = require('./lib/DataLayer');
|
|
41
|
+
const DataManager = require('./lib/DataManager');
|
|
42
|
+
|
|
43
|
+
function sha1(text) {
|
|
44
|
+
return crypto.createHash('sha1').update(text).digest('hex');
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
function normalizeWhitespace(t) {
|
|
48
|
+
return String(t || '').replace(/\s+/g, ' ').trim();
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
function normalizeTextForKey(t) {
|
|
52
|
+
return normalizeWhitespace(t).toLowerCase();
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
function run(cmd, args, cwd, extraEnv) {
|
|
56
|
+
return new Promise((resolve) => {
|
|
57
|
+
let child;
|
|
58
|
+
const env = extraEnv ? { ...process.env, ...extraEnv } : process.env;
|
|
59
|
+
try {
|
|
60
|
+
if (process.platform === 'win32') {
|
|
61
|
+
const comspec = process.env.ComSpec || 'cmd.exe';
|
|
62
|
+
child = spawn(comspec, ['/d', '/s', '/c', cmd, ...args], { cwd, shell: false, env });
|
|
63
|
+
} else {
|
|
64
|
+
child = spawn(cmd, args, { cwd, shell: false, env });
|
|
65
|
+
}
|
|
66
|
+
} catch (e) {
|
|
67
|
+
return resolve({ code: 1, stdout: '', stderr: e.message || String(e) });
|
|
68
|
+
}
|
|
69
|
+
let stdout = '';
|
|
70
|
+
let stderr = '';
|
|
71
|
+
child.stdout && child.stdout.on('data', (d) => { stdout += d.toString(); });
|
|
72
|
+
child.stderr && child.stderr.on('data', (d) => { stderr += d.toString(); });
|
|
73
|
+
child.on('error', (e) => { stderr += `\n${e.message}`; resolve({ code: 1, stdout, stderr }); });
|
|
74
|
+
child.on('close', (code) => resolve({ code: code ?? 0, stdout, stderr }));
|
|
75
|
+
});
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
function extractFirstJsonObject(text) {
|
|
79
|
+
if (!text) return null;
|
|
80
|
+
const start = text.indexOf('{');
|
|
81
|
+
if (start === -1) return null;
|
|
82
|
+
let depth = 0;
|
|
83
|
+
for (let i = start; i < text.length; i++) {
|
|
84
|
+
if (text[i] === '{') depth++;
|
|
85
|
+
else if (text[i] === '}') { depth--; if (depth === 0) return text.slice(start, i + 1); }
|
|
86
|
+
}
|
|
87
|
+
return null;
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
function escapeJsonControlChars(jsonText) {
|
|
91
|
+
return jsonText.replace(/[\x00-\x1F\x7F]/g, (ch) => {
|
|
92
|
+
if (ch === '\n' || ch === '\r' || ch === '\t') return ch;
|
|
93
|
+
return '\\u' + ch.charCodeAt(0).toString(16).padStart(4, '0');
|
|
94
|
+
});
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
function readProjectSlugMap(wsDir) {
|
|
98
|
+
const p = path.join(wsDir, 'data', 'settings', 'project-slug-map.json');
|
|
99
|
+
try {
|
|
100
|
+
return JSON.parse(fs.readFileSync(p, 'utf8'));
|
|
101
|
+
} catch { return {}; }
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
function inferProjectSlug(text, map) {
|
|
105
|
+
if (!text || !map || typeof map !== 'object') return '';
|
|
106
|
+
const lower = text.toLowerCase();
|
|
107
|
+
let bestSlug = '';
|
|
108
|
+
let bestLen = 0;
|
|
109
|
+
for (const [keyword, slug] of Object.entries(map)) {
|
|
110
|
+
if (lower.includes(keyword.toLowerCase()) && keyword.length > bestLen) {
|
|
111
|
+
bestSlug = slug;
|
|
112
|
+
bestLen = keyword.length;
|
|
113
|
+
}
|
|
114
|
+
}
|
|
115
|
+
return bestSlug;
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
async function main() {
|
|
119
|
+
await ready;
|
|
120
|
+
|
|
121
|
+
console.log('╔══════════════════════════════════════════════════════╗');
|
|
122
|
+
console.log('║ FREYA — Retroactive Ingestion ║');
|
|
123
|
+
console.log('╚══════════════════════════════════════════════════════╝');
|
|
124
|
+
console.log(`Workspace: ${workspaceDir}`);
|
|
125
|
+
console.log(`Mode: ${DRY_RUN ? 'DRY RUN' : EMBEDDINGS_ONLY ? 'EMBEDDINGS ONLY' : 'FULL INGESTION'}`);
|
|
126
|
+
console.log('');
|
|
127
|
+
|
|
128
|
+
const logsDir = path.join(workspaceDir, 'logs', 'daily');
|
|
129
|
+
if (!fs.existsSync(logsDir)) {
|
|
130
|
+
console.log('❌ No daily logs directory found at:', logsDir);
|
|
131
|
+
process.exit(1);
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
let files = fs.readdirSync(logsDir)
|
|
135
|
+
.filter(f => /^\d{4}-\d{2}-\d{2}\.md$/.test(f))
|
|
136
|
+
.sort();
|
|
137
|
+
|
|
138
|
+
if (MAX_DAYS > 0) {
|
|
139
|
+
files = files.slice(-MAX_DAYS);
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
console.log(`📁 Found ${files.length} daily log files to process`);
|
|
143
|
+
console.log('');
|
|
144
|
+
|
|
145
|
+
// Step 1: Sync all daily logs to SQLite
|
|
146
|
+
console.log('── Step 1: Syncing daily logs to SQLite ──');
|
|
147
|
+
const upsert = dl.db.prepare(`
|
|
148
|
+
INSERT INTO daily_logs (date, raw_markdown) VALUES (?, ?)
|
|
149
|
+
ON CONFLICT(date) DO UPDATE SET raw_markdown = excluded.raw_markdown
|
|
150
|
+
`);
|
|
151
|
+
const syncTx = dl.db.transaction((fileList) => {
|
|
152
|
+
for (const file of fileList) {
|
|
153
|
+
const date = file.replace('.md', '');
|
|
154
|
+
const content = fs.readFileSync(path.join(logsDir, file), 'utf8');
|
|
155
|
+
upsert.run(date, content);
|
|
156
|
+
}
|
|
157
|
+
});
|
|
158
|
+
syncTx(files);
|
|
159
|
+
console.log(`✅ ${files.length} daily logs synced to SQLite`);
|
|
160
|
+
console.log('');
|
|
161
|
+
|
|
162
|
+
// Step 2: Generate embeddings for all daily logs
|
|
163
|
+
if (!SKIP_EMBEDDINGS) {
|
|
164
|
+
console.log('── Step 2: Generating embeddings ──');
|
|
165
|
+
const dm = new DataManager(workspaceDir, logsDir);
|
|
166
|
+
let totalChunks = 0;
|
|
167
|
+
for (let i = 0; i < files.length; i++) {
|
|
168
|
+
const date = files[i].replace('.md', '');
|
|
169
|
+
const content = fs.readFileSync(path.join(logsDir, files[i]), 'utf8');
|
|
170
|
+
try {
|
|
171
|
+
const count = await dm.generateEmbeddings('daily_log', date, content);
|
|
172
|
+
totalChunks += count;
|
|
173
|
+
process.stdout.write(`\r [${i + 1}/${files.length}] ${date} — ${count} chunks`);
|
|
174
|
+
} catch (err) {
|
|
175
|
+
process.stdout.write(`\r [${i + 1}/${files.length}] ${date} — ❌ ${err.message}`);
|
|
176
|
+
}
|
|
177
|
+
}
|
|
178
|
+
console.log(`\n✅ Generated ${totalChunks} embedding chunks total`);
|
|
179
|
+
console.log('');
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
if (EMBEDDINGS_ONLY) {
|
|
183
|
+
console.log('── Embeddings-only mode. Skipping task/blocker extraction. ──');
|
|
184
|
+
dl.db.save();
|
|
185
|
+
console.log('\n✅ Done!');
|
|
186
|
+
return;
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
// Step 3: Extract tasks/blockers from each daily log via planner
|
|
190
|
+
console.log('── Step 3: Extracting tasks & blockers via planner ──');
|
|
191
|
+
|
|
192
|
+
// Detect copilot command: try user override, then 'copilot', then 'gh copilot'
|
|
193
|
+
let cmd = process.env.COPILOT_CMD || '';
|
|
194
|
+
if (!cmd) {
|
|
195
|
+
// Quick test: try 'copilot --version'
|
|
196
|
+
const testCopilot = await run('copilot', ['--version'], workspaceDir);
|
|
197
|
+
if (testCopilot.code === 0) {
|
|
198
|
+
cmd = 'copilot';
|
|
199
|
+
} else {
|
|
200
|
+
// Try 'gh copilot' via 'gh'
|
|
201
|
+
const testGh = await run('gh', ['copilot', '--version'], workspaceDir);
|
|
202
|
+
if (testGh.code === 0) {
|
|
203
|
+
cmd = 'gh';
|
|
204
|
+
console.log(' ℹ Using "gh copilot" as planner command');
|
|
205
|
+
} else {
|
|
206
|
+
cmd = 'copilot'; // default, will fail with clear error
|
|
207
|
+
console.log(' ⚠ Could not detect copilot CLI. Set COPILOT_CMD env var if needed.');
|
|
208
|
+
console.log(` copilot test: code=${testCopilot.code} stderr=${(testCopilot.stderr || '').slice(0, 200)}`);
|
|
209
|
+
console.log(` gh copilot test: code=${testGh.code} stderr=${(testGh.stderr || '').slice(0, 200)}`);
|
|
210
|
+
}
|
|
211
|
+
}
|
|
212
|
+
}
|
|
213
|
+
const useGhCopilot = cmd === 'gh';
|
|
214
|
+
const agentEnv = { FREYA_WORKSPACE_DIR: workspaceDir };
|
|
215
|
+
const slugMap = readProjectSlugMap(workspaceDir);
|
|
216
|
+
const validTaskCats = new Set(['DO_NOW', 'SCHEDULE', 'DELEGATE', 'IGNORE']);
|
|
217
|
+
|
|
218
|
+
const schema = {
|
|
219
|
+
actions: [
|
|
220
|
+
{ type: 'create_task', description: '<string>', priority: 'HIGH|MEDIUM|LOW', category: 'DO_NOW|SCHEDULE|DELEGATE|IGNORE', projectSlug: '<string optional>' },
|
|
221
|
+
{ type: 'create_blocker', title: '<string>', severity: 'CRITICAL|HIGH|MEDIUM|LOW', notes: '<string>', projectSlug: '<string optional>' }
|
|
222
|
+
]
|
|
223
|
+
};
|
|
224
|
+
|
|
225
|
+
const sysInstructions = `Você é o planner do sistema F.R.E.Y.A.
|
|
226
|
+
|
|
227
|
+
Analise o daily log abaixo e extraia TODAS as tarefas e blockers mencionados.
|
|
228
|
+
Procure por: ações mencionadas, pendências, problemas, impedimentos, decisões que geram trabalho.
|
|
229
|
+
Se NÃO houver tarefas ou blockers claros, retorne: {"actions":[]}
|
|
230
|
+
Retorne APENAS JSON válido no formato: ${JSON.stringify(schema)}
|
|
231
|
+
NÃO use code fences. NÃO inclua texto extra.
|
|
232
|
+
IMPORTANTE: Extraia APENAS informações explícitas do log. NÃO invente dados.`;
|
|
233
|
+
|
|
234
|
+
let totalTasks = 0;
|
|
235
|
+
let totalBlockers = 0;
|
|
236
|
+
let totalSkipped = 0;
|
|
237
|
+
let totalErrors = 0;
|
|
238
|
+
|
|
239
|
+
const insertTask = dl.db.prepare(`INSERT OR IGNORE INTO tasks (id, project_slug, description, category, status, created_at, metadata) VALUES (?, ?, ?, ?, ?, ?, ?)`);
|
|
240
|
+
const insertBlocker = dl.db.prepare(`INSERT OR IGNORE INTO blockers (id, project_slug, title, severity, status, created_at, metadata) VALUES (?, ?, ?, ?, ?, ?, ?)`);
|
|
241
|
+
|
|
242
|
+
// Build existing keys for dedup
|
|
243
|
+
const existingTaskDescs = new Set(
|
|
244
|
+
dl.db.prepare("SELECT description FROM tasks").all().map(t => sha1(normalizeTextForKey(t.description)))
|
|
245
|
+
);
|
|
246
|
+
const existingBlockerTitles = new Set(
|
|
247
|
+
dl.db.prepare("SELECT title FROM blockers").all().map(b => sha1(normalizeTextForKey(b.title)))
|
|
248
|
+
);
|
|
249
|
+
|
|
250
|
+
for (let i = 0; i < files.length; i++) {
|
|
251
|
+
const file = files[i];
|
|
252
|
+
const date = file.replace('.md', '');
|
|
253
|
+
const content = fs.readFileSync(path.join(logsDir, file), 'utf8');
|
|
254
|
+
|
|
255
|
+
// Skip very small logs (< 50 chars) — likely empty or just a header
|
|
256
|
+
if (content.trim().length < 50) {
|
|
257
|
+
process.stdout.write(`\r [${i + 1}/${files.length}] ${date} — skipped (too small)`);
|
|
258
|
+
continue;
|
|
259
|
+
}
|
|
260
|
+
|
|
261
|
+
const fullPrompt = `${sysInstructions}\n\nDAILY LOG (${date}):\n${content}\n`;
|
|
262
|
+
const SAFE_ARG_LEN = 24000;
|
|
263
|
+
|
|
264
|
+
try {
|
|
265
|
+
let r;
|
|
266
|
+
const baseArgs = useGhCopilot
|
|
267
|
+
? ['copilot', '-s', '--no-color', '--stream', 'off']
|
|
268
|
+
: ['-s', '--no-color', '--stream', 'off'];
|
|
269
|
+
|
|
270
|
+
if (fullPrompt.length > SAFE_ARG_LEN) {
|
|
271
|
+
const tmpFile = path.join(os.tmpdir(), `freya-retro-${Date.now()}.txt`);
|
|
272
|
+
fs.writeFileSync(tmpFile, fullPrompt, 'utf8');
|
|
273
|
+
const filePrompt = `Leia o arquivo abaixo e extraia tasks/blockers conforme as instruções contidas nele.\nARQUIVO: ${tmpFile}`;
|
|
274
|
+
r = await run(cmd, [...baseArgs, '--add-dir', os.tmpdir(), '--allow-all-tools', '-p', filePrompt], workspaceDir, agentEnv);
|
|
275
|
+
try { fs.unlinkSync(tmpFile); } catch { }
|
|
276
|
+
} else {
|
|
277
|
+
r = await run(cmd, [...baseArgs, '-p', fullPrompt], workspaceDir, agentEnv);
|
|
278
|
+
}
|
|
279
|
+
|
|
280
|
+
const out = (r.stdout + r.stderr).trim();
|
|
281
|
+
if (r.code !== 0 || !out) {
|
|
282
|
+
totalErrors++;
|
|
283
|
+
// On first error, show verbose diagnostic
|
|
284
|
+
if (totalErrors === 1) {
|
|
285
|
+
console.log(`\n ⚠ Planner diagnostic for ${date}:`);
|
|
286
|
+
console.log(` Command: ${cmd} ${(useGhCopilot ? baseArgs : ['-s', '--no-color', '--stream', 'off', '-p', '...']).join(' ')}`);
|
|
287
|
+
console.log(` Exit code: ${r.code}`);
|
|
288
|
+
console.log(` stdout: ${(r.stdout || '').slice(0, 300)}`);
|
|
289
|
+
console.log(` stderr: ${(r.stderr || '').slice(0, 300)}`);
|
|
290
|
+
}
|
|
291
|
+
process.stdout.write(`\r [${i + 1}/${files.length}] ${date} — ❌ planner error (code=${r.code}) `);
|
|
292
|
+
continue;
|
|
293
|
+
}
|
|
294
|
+
|
|
295
|
+
// Parse plan
|
|
296
|
+
const jsonText = extractFirstJsonObject(out) || out;
|
|
297
|
+
let plan;
|
|
298
|
+
try {
|
|
299
|
+
plan = JSON.parse(jsonText);
|
|
300
|
+
} catch {
|
|
301
|
+
try { plan = JSON.parse(escapeJsonControlChars(jsonText)); } catch {
|
|
302
|
+
totalErrors++;
|
|
303
|
+
process.stdout.write(`\r [${i + 1}/${files.length}] ${date} — ❌ invalid JSON `);
|
|
304
|
+
continue;
|
|
305
|
+
}
|
|
306
|
+
}
|
|
307
|
+
|
|
308
|
+
const actions = Array.isArray(plan.actions) ? plan.actions : [];
|
|
309
|
+
let fileTasks = 0;
|
|
310
|
+
let fileBlockers = 0;
|
|
311
|
+
let fileSkipped = 0;
|
|
312
|
+
|
|
313
|
+
if (!DRY_RUN) {
|
|
314
|
+
const applyTx = dl.db.transaction(() => {
|
|
315
|
+
for (const a of actions) {
|
|
316
|
+
if (!a || typeof a !== 'object') continue;
|
|
317
|
+
|
|
318
|
+
if (a.type === 'create_task' && a.description) {
|
|
319
|
+
const desc = normalizeWhitespace(a.description);
|
|
320
|
+
if (!desc) continue;
|
|
321
|
+
const projectSlug = String(a.projectSlug || '').trim() || inferProjectSlug(desc, slugMap);
|
|
322
|
+
const key = sha1(normalizeTextForKey((projectSlug ? projectSlug + ' ' : '') + desc));
|
|
323
|
+
if (existingTaskDescs.has(key)) { fileSkipped++; continue; }
|
|
324
|
+
|
|
325
|
+
const id = `t-retro-${Date.now()}-${Math.random().toString(16).slice(2, 8)}`;
|
|
326
|
+
const category = validTaskCats.has(String(a.category || '').trim()) ? String(a.category).trim() : 'DO_NOW';
|
|
327
|
+
const metadata = JSON.stringify({ priority: a.priority || 'medium', source: 'retroactive', sourceDate: date });
|
|
328
|
+
// Use the log date as created_at for chronological accuracy
|
|
329
|
+
insertTask.run(id, projectSlug || null, desc, category, 'PENDING', `${date}T12:00:00.000Z`, metadata);
|
|
330
|
+
existingTaskDescs.add(key);
|
|
331
|
+
fileTasks++;
|
|
332
|
+
}
|
|
333
|
+
|
|
334
|
+
if (a.type === 'create_blocker' && a.title) {
|
|
335
|
+
const title = normalizeWhitespace(a.title);
|
|
336
|
+
if (!title) continue;
|
|
337
|
+
const notes = normalizeWhitespace(a.notes);
|
|
338
|
+
const projectSlug = String(a.projectSlug || '').trim() || inferProjectSlug(title + ' ' + notes, slugMap);
|
|
339
|
+
const key = sha1(normalizeTextForKey((projectSlug ? projectSlug + ' ' : '') + title));
|
|
340
|
+
if (existingBlockerTitles.has(key)) { fileSkipped++; continue; }
|
|
341
|
+
|
|
342
|
+
const id = `b-retro-${Date.now()}-${Math.random().toString(16).slice(2, 8)}`;
|
|
343
|
+
const severity = String(a.severity || 'MEDIUM').toUpperCase();
|
|
344
|
+
const metadata = JSON.stringify({ description: notes || title, source: 'retroactive', sourceDate: date });
|
|
345
|
+
insertBlocker.run(id, projectSlug || null, title, severity, 'OPEN', `${date}T12:00:00.000Z`, metadata);
|
|
346
|
+
existingBlockerTitles.add(key);
|
|
347
|
+
fileBlockers++;
|
|
348
|
+
}
|
|
349
|
+
}
|
|
350
|
+
});
|
|
351
|
+
applyTx();
|
|
352
|
+
} else {
|
|
353
|
+
// Dry run — just count
|
|
354
|
+
for (const a of actions) {
|
|
355
|
+
if (a && a.type === 'create_task' && a.description) fileTasks++;
|
|
356
|
+
if (a && a.type === 'create_blocker' && a.title) fileBlockers++;
|
|
357
|
+
}
|
|
358
|
+
}
|
|
359
|
+
|
|
360
|
+
totalTasks += fileTasks;
|
|
361
|
+
totalBlockers += fileBlockers;
|
|
362
|
+
totalSkipped += fileSkipped;
|
|
363
|
+
|
|
364
|
+
const status = fileTasks || fileBlockers
|
|
365
|
+
? `${fileTasks}T ${fileBlockers}B${fileSkipped ? ` (${fileSkipped} dup)` : ''}`
|
|
366
|
+
: 'no actions';
|
|
367
|
+
process.stdout.write(`\r [${i + 1}/${files.length}] ${date} — ${status} `);
|
|
368
|
+
|
|
369
|
+
// Small delay to avoid rate limiting
|
|
370
|
+
if (i < files.length - 1) {
|
|
371
|
+
await new Promise(r => setTimeout(r, 500));
|
|
372
|
+
}
|
|
373
|
+
|
|
374
|
+
} catch (err) {
|
|
375
|
+
totalErrors++;
|
|
376
|
+
process.stdout.write(`\r [${i + 1}/${files.length}] ${date} — ❌ ${err.message} `);
|
|
377
|
+
}
|
|
378
|
+
}
|
|
379
|
+
|
|
380
|
+
// Ensure data is persisted
|
|
381
|
+
dl.db.save();
|
|
382
|
+
|
|
383
|
+
console.log('\n');
|
|
384
|
+
console.log('══════════════════════════════════════════════════════');
|
|
385
|
+
console.log(` 📊 Results ${DRY_RUN ? '(DRY RUN)' : ''}`);
|
|
386
|
+
console.log(` Tasks created: ${totalTasks}`);
|
|
387
|
+
console.log(` Blockers created: ${totalBlockers}`);
|
|
388
|
+
console.log(` Duplicates skipped: ${totalSkipped}`);
|
|
389
|
+
console.log(` Errors: ${totalErrors}`);
|
|
390
|
+
console.log('══════════════════════════════════════════════════════');
|
|
391
|
+
|
|
392
|
+
// Step 4: Generate embeddings for newly created tasks/blockers
|
|
393
|
+
if (!DRY_RUN && !SKIP_EMBEDDINGS && (totalTasks > 0 || totalBlockers > 0)) {
|
|
394
|
+
console.log('\n── Step 4: Generating embeddings for new tasks/blockers ──');
|
|
395
|
+
const dm = new DataManager(workspaceDir, logsDir);
|
|
396
|
+
const newTasks = dl.db.prepare("SELECT id, description FROM tasks WHERE json_extract(metadata, '$.source') = 'retroactive'").all();
|
|
397
|
+
const newBlockers = dl.db.prepare("SELECT id, title, json_extract(metadata, '$.description') as notes FROM blockers WHERE json_extract(metadata, '$.source') = 'retroactive'").all();
|
|
398
|
+
|
|
399
|
+
let embCount = 0;
|
|
400
|
+
for (const t of newTasks) {
|
|
401
|
+
try {
|
|
402
|
+
embCount += await dm.generateEmbeddings('task', t.id, t.description);
|
|
403
|
+
} catch { }
|
|
404
|
+
}
|
|
405
|
+
for (const b of newBlockers) {
|
|
406
|
+
try {
|
|
407
|
+
embCount += await dm.generateEmbeddings('blocker', b.id, b.title + ' ' + (b.notes || ''));
|
|
408
|
+
} catch { }
|
|
409
|
+
}
|
|
410
|
+
console.log(`✅ Generated ${embCount} embedding chunks for new entities`);
|
|
411
|
+
}
|
|
412
|
+
|
|
413
|
+
console.log('\n✅ Retroactive ingestion complete!');
|
|
414
|
+
}
|
|
415
|
+
|
|
416
|
+
main().catch(err => {
|
|
417
|
+
console.error('\n❌ Fatal error:', err.message || err);
|
|
418
|
+
process.exit(1);
|
|
419
|
+
});
|