braintrust-lite 0.1.7 → 0.1.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +73 -102
- package/bin/braintrust +12 -0
- package/package.json +20 -20
- package/skills/consult/SKILL.md +2 -2
- package/src/config.js +60 -0
- package/src/doctor.js +120 -0
- package/src/format.js +26 -49
- package/src/judge.js +87 -0
- package/src/main.js +332 -0
- package/src/memory/db.js +183 -0
- package/src/memory/index.js +31 -0
- package/src/normalize.js +172 -0
- package/src/normalize.test.js +125 -0
- package/src/prompts/architecture.md +21 -0
- package/src/prompts/code.md +21 -0
- package/src/prompts/general.md +22 -0
- package/src/prompts/index.js +49 -0
- package/src/prompts/writing.md +21 -0
- package/src/providers/claude.js +45 -0
- package/src/providers/codex.js +69 -0
- package/src/providers/gemini.js +81 -0
- package/src/providers/index.js +22 -0
- package/src/reflector.js +244 -0
- package/src/save.js +93 -0
- package/src/server.js +245 -38
- package/LICENSE +0 -21
- package/bin/consult +0 -79
- package/scripts/setup.js +0 -66
- package/src/consult.js +0 -81
- package/src/providers.js +0 -91
package/src/main.js
ADDED
|
@@ -0,0 +1,332 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
const { spawn } = require('child_process');
|
|
4
|
+
const { readFileSync, existsSync, readdirSync, statSync } = require('fs');
|
|
5
|
+
const { join, resolve, extname } = require('path');
|
|
6
|
+
|
|
7
|
+
const { OUTPUT_DIR, DEFAULT_TIMEOUT_S, DEFAULT_JUDGE_MODEL, MAX_CONTEXT_CHARS, CONTEXT_FILE_MAX } = require('./config.js');
|
|
8
|
+
const { getActiveProviders } = require('./providers/index.js');
|
|
9
|
+
const { normalize } = require('./normalize.js');
|
|
10
|
+
const { runJudge } = require('./judge.js');
|
|
11
|
+
const { saveArtifacts } = require('./save.js');
|
|
12
|
+
const { persistRun } = require('./memory/index.js');
|
|
13
|
+
|
|
14
|
+
// ─── Arg Parsing ──────────────────────────────────────────────────────────────
|
|
15
|
+
|
|
16
|
+
function parseArgs(argv) {
|
|
17
|
+
const flags = { skip: [], timeout: DEFAULT_TIMEOUT_S, 'judge-model': DEFAULT_JUDGE_MODEL };
|
|
18
|
+
const positional = [];
|
|
19
|
+
|
|
20
|
+
for (let i = 0; i < argv.length; i++) {
|
|
21
|
+
const a = argv[i];
|
|
22
|
+
if (a === '--skip') { flags.skip.push(argv[++i]); continue; }
|
|
23
|
+
if (a.startsWith('--no-')) { flags[a.slice(5)] = false; continue; }
|
|
24
|
+
if (a.startsWith('--')) {
|
|
25
|
+
const key = a.slice(2);
|
|
26
|
+
const next = argv[i + 1];
|
|
27
|
+
flags[key] = (!next || next.startsWith('--')) ? true : (i++, next);
|
|
28
|
+
} else {
|
|
29
|
+
positional.push(a);
|
|
30
|
+
}
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
return { flags, positional };
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
// ─── Context Loading ──────────────────────────────────────────────────────────
|
|
37
|
+
|
|
38
|
+
function loadContextFile(filePath) {
|
|
39
|
+
try {
|
|
40
|
+
return readFileSync(resolve(filePath), 'utf8').slice(0, CONTEXT_FILE_MAX);
|
|
41
|
+
} catch {
|
|
42
|
+
process.stderr.write(`[warn] Cannot read context file: ${filePath}\n`);
|
|
43
|
+
return null;
|
|
44
|
+
}
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
function loadContextDir(dirPath, globPattern) {
|
|
48
|
+
const resolved = resolve(dirPath);
|
|
49
|
+
const exts = parseGlobToExtensions(globPattern || '*.md');
|
|
50
|
+
|
|
51
|
+
let entries;
|
|
52
|
+
try {
|
|
53
|
+
entries = readdirSync(resolved, { recursive: true });
|
|
54
|
+
} catch (e) {
|
|
55
|
+
process.stderr.write(`[warn] Cannot read context-dir: ${e.message}\n`);
|
|
56
|
+
return null;
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
const files = entries
|
|
60
|
+
.map(f => join(resolved, f))
|
|
61
|
+
.filter(f => { try { return statSync(f).isFile(); } catch { return false; } })
|
|
62
|
+
.filter(f => !exts || exts.includes(extname(f).toLowerCase()));
|
|
63
|
+
|
|
64
|
+
let total = 0;
|
|
65
|
+
const parts = [];
|
|
66
|
+
for (const fp of files) {
|
|
67
|
+
if (total >= MAX_CONTEXT_CHARS) break;
|
|
68
|
+
try {
|
|
69
|
+
const rel = fp.slice(resolved.length + 1);
|
|
70
|
+
const content = readFileSync(fp, 'utf8');
|
|
71
|
+
const chunk = `### ${rel}\n${content.slice(0, MAX_CONTEXT_CHARS - total)}`;
|
|
72
|
+
parts.push(chunk);
|
|
73
|
+
total += chunk.length;
|
|
74
|
+
} catch { /* skip unreadable files */ }
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
if (!parts.length) {
|
|
78
|
+
process.stderr.write(`[warn] No files matched pattern "${globPattern || '*.md'}" in ${dirPath}\n`);
|
|
79
|
+
return null;
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
process.stderr.write(`[braintrust] Loaded ${parts.length} file(s) from ${dirPath} (${total} chars, pattern: ${globPattern || '*.md'})\n`);
|
|
83
|
+
return { parts, total };
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
function parseGlobToExtensions(glob) {
|
|
87
|
+
const base = glob.split('/').pop();
|
|
88
|
+
const m1 = base.match(/\*\.(\w+)$/);
|
|
89
|
+
if (m1) return [`.${m1[1]}`];
|
|
90
|
+
const m2 = base.match(/\*\.\{([^}]+)\}/);
|
|
91
|
+
if (m2) return m2[1].split(',').map(e => `.${e.trim()}`);
|
|
92
|
+
return null; // null = all files
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
// ─── --list mode ──────────────────────────────────────────────────────────────
|
|
96
|
+
|
|
97
|
+
function handleListMode() {
|
|
98
|
+
if (!existsSync(OUTPUT_DIR)) { console.log('No runs yet.'); process.exit(0); }
|
|
99
|
+
const runs = readdirSync(OUTPUT_DIR, { withFileTypes: true })
|
|
100
|
+
.filter(d => d.isDirectory() && d.name !== '.state')
|
|
101
|
+
.map(d => d.name)
|
|
102
|
+
.sort().reverse().slice(0, 20);
|
|
103
|
+
if (!runs.length) { console.log('No runs yet.'); process.exit(0); }
|
|
104
|
+
runs.forEach(r => {
|
|
105
|
+
const summary = join(OUTPUT_DIR, r, 'summary.md');
|
|
106
|
+
const report = join(OUTPUT_DIR, r, 'report.md');
|
|
107
|
+
const target = existsSync(summary) ? summary : report;
|
|
108
|
+
if (existsSync(target)) {
|
|
109
|
+
const first = readFileSync(target, 'utf8').split('\n').find(l => l.startsWith('**问题'));
|
|
110
|
+
console.log(`${r} ${first || ''}`);
|
|
111
|
+
}
|
|
112
|
+
});
|
|
113
|
+
process.exit(0);
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
// ─── Process Runner ───────────────────────────────────────────────────────────
|
|
117
|
+
|
|
118
|
+
function makeRunner(timeoutMs, workDir) {
|
|
119
|
+
return function runProcess(cmd, args, opts = {}) {
|
|
120
|
+
const ac = new AbortController();
|
|
121
|
+
const cwd = opts.cwd || workDir;
|
|
122
|
+
const proc = spawn(cmd, args, { signal: ac.signal, stdio: ['ignore', 'pipe', 'pipe'], cwd });
|
|
123
|
+
let stdout = '', stderr = '';
|
|
124
|
+
proc.stdout.on('data', d => { stdout += d; });
|
|
125
|
+
proc.stderr.on('data', d => { stderr += d; });
|
|
126
|
+
const timer = setTimeout(() => ac.abort(), timeoutMs);
|
|
127
|
+
return new Promise(res => {
|
|
128
|
+
let resolved = false;
|
|
129
|
+
const done = (code, error_type = null) => {
|
|
130
|
+
if (resolved) return;
|
|
131
|
+
resolved = true;
|
|
132
|
+
clearTimeout(timer);
|
|
133
|
+
res({ stdout, stderr, code, error_type });
|
|
134
|
+
};
|
|
135
|
+
proc.on('close', code => done(code, code !== 0 ? 'nonzero' : null));
|
|
136
|
+
proc.on('error', err => {
|
|
137
|
+
if (err.name === 'AbortError') done('timeout', 'timeout');
|
|
138
|
+
else if (err.code === 'ENOENT') done(-1, 'enoent');
|
|
139
|
+
else done(-1, 'spawn_error');
|
|
140
|
+
});
|
|
141
|
+
});
|
|
142
|
+
};
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
// ─── Usage ────────────────────────────────────────────────────────────────────
|
|
146
|
+
|
|
147
|
+
function printUsageAndExit() {
|
|
148
|
+
process.stderr.write('Usage: braintrust [options] "your question"\n');
|
|
149
|
+
process.stderr.write(' cat file | braintrust "explain this"\n');
|
|
150
|
+
process.stderr.write('\nOptions:\n');
|
|
151
|
+
process.stderr.write(' --skip <model> Skip a model (claude|codex|gemini), repeatable\n');
|
|
152
|
+
process.stderr.write(' --no-judge Show raw results only\n');
|
|
153
|
+
process.stderr.write(' --judge-model Judge model: claude|codex|gemini (default: claude)\n');
|
|
154
|
+
process.stderr.write(' --timeout <sec> Per-model timeout in seconds (default: 120)\n');
|
|
155
|
+
process.stderr.write(' --dir <path> Working directory for CLI tools\n');
|
|
156
|
+
process.stderr.write(' --context-file <f> Append file content as context (max 8000 chars)\n');
|
|
157
|
+
process.stderr.write(' --context-dir <d> Append all matching files from a directory as context\n');
|
|
158
|
+
process.stderr.write(' --glob <pattern> File pattern for --context-dir (default: *.md)\n');
|
|
159
|
+
process.stderr.write(' --no-save Do not save results to disk\n');
|
|
160
|
+
process.stderr.write(' --json Print full JSON result to stdout\n');
|
|
161
|
+
process.stderr.write(' --list List recent runs\n');
|
|
162
|
+
process.exit(1);
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
// ─── Main ─────────────────────────────────────────────────────────────────────
|
|
166
|
+
|
|
167
|
+
async function main(argv) {
|
|
168
|
+
const { flags, positional } = parseArgs(argv);
|
|
169
|
+
|
|
170
|
+
if (flags.list) { handleListMode(); return; }
|
|
171
|
+
|
|
172
|
+
if (flags.strict) {
|
|
173
|
+
console.log('[braintrust] --strict mode (two-stage Judge + swap-compare) is planned for v2.');
|
|
174
|
+
process.exit(0);
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
// Build user prompt
|
|
178
|
+
let userPrompt = positional.join(' ');
|
|
179
|
+
|
|
180
|
+
// Read stdin if piped
|
|
181
|
+
if (!process.stdin.isTTY) {
|
|
182
|
+
const stdinData = readFileSync(0, 'utf8').trim();
|
|
183
|
+
if (stdinData) userPrompt = userPrompt ? `${userPrompt}\n\n<context>\n${stdinData}\n</context>` : stdinData;
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
// Inject context file
|
|
187
|
+
if (flags['context-file']) {
|
|
188
|
+
const ctx = loadContextFile(flags['context-file']);
|
|
189
|
+
if (ctx) userPrompt = `${userPrompt}\n\n<context-file>\n${ctx}\n</context-file>`;
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
// Inject context directory
|
|
193
|
+
if (flags['context-dir']) {
|
|
194
|
+
const result = loadContextDir(flags['context-dir'], flags.glob);
|
|
195
|
+
if (result) {
|
|
196
|
+
const block = result.parts.join('\n\n');
|
|
197
|
+
userPrompt = `${userPrompt}\n\n<context-dir path="${flags['context-dir']}" files="${result.parts.length}" chars="${result.total}">\n${block}\n</context-dir>`;
|
|
198
|
+
}
|
|
199
|
+
}
|
|
200
|
+
|
|
201
|
+
if (!userPrompt) { printUsageAndExit(); return; }
|
|
202
|
+
|
|
203
|
+
const workDir = flags.dir ? resolve(flags.dir) : process.cwd();
|
|
204
|
+
const timeoutMs = (parseInt(flags.timeout, 10) || DEFAULT_TIMEOUT_S) * 1000;
|
|
205
|
+
const judgeModel = flags['judge-model'] || DEFAULT_JUDGE_MODEL;
|
|
206
|
+
const noJudge = flags.judge === false;
|
|
207
|
+
const noSave = flags.save === false;
|
|
208
|
+
|
|
209
|
+
const runProcess = makeRunner(timeoutMs, workDir);
|
|
210
|
+
const activeProviders = getActiveProviders(flags.skip);
|
|
211
|
+
|
|
212
|
+
// Load generator system prompt (Phase 0: always 'general' variant)
|
|
213
|
+
const { buildGeneratorSystem } = require('./prompts/index.js');
|
|
214
|
+
const systemPrompt = buildGeneratorSystem('general');
|
|
215
|
+
const fullPrompt = `${systemPrompt}\n\n${userPrompt}`;
|
|
216
|
+
|
|
217
|
+
const { formatManifest } = require('./format.js');
|
|
218
|
+
process.stderr.write(`[braintrust] Running ${activeProviders.map(p => p.name).join(', ')} in parallel...\n`);
|
|
219
|
+
|
|
220
|
+
const starts = {};
|
|
221
|
+
activeProviders.forEach(p => { starts[p.name] = Date.now(); });
|
|
222
|
+
|
|
223
|
+
const rawResults = await Promise.allSettled(
|
|
224
|
+
activeProviders.map(p => runProcess(p.cmd, p.getArgs(fullPrompt)))
|
|
225
|
+
);
|
|
226
|
+
|
|
227
|
+
const raws = {};
|
|
228
|
+
const results = [];
|
|
229
|
+
for (let i = 0; i < activeProviders.length; i++) {
|
|
230
|
+
const p = activeProviders[i];
|
|
231
|
+
const raw = rawResults[i].status === 'fulfilled'
|
|
232
|
+
? rawResults[i].value
|
|
233
|
+
: { stdout: '', stderr: '', code: -1 };
|
|
234
|
+
raws[p.name] = raw;
|
|
235
|
+
const ms = Date.now() - starts[p.name];
|
|
236
|
+
const adapted = p.adapt(raw);
|
|
237
|
+
const r = normalize(p.name, raw, adapted, ms);
|
|
238
|
+
results.push(r);
|
|
239
|
+
const status = r.error ? `⚠ ${r.error_type || r.error}` : `✓ ${(ms / 1000).toFixed(1)}s parse_score=${r.parse_score.toFixed(2)}`;
|
|
240
|
+
process.stderr.write(`[${p.name}: ${status}]\n`);
|
|
241
|
+
}
|
|
242
|
+
|
|
243
|
+
// Degraded mode warning
|
|
244
|
+
const successCount = results.filter(r => !r.error).length;
|
|
245
|
+
if (successCount < activeProviders.length) {
|
|
246
|
+
process.stderr.write(`\n[braintrust] ⚠ DEGRADED: ${successCount}/${activeProviders.length} models succeeded\n`);
|
|
247
|
+
}
|
|
248
|
+
|
|
249
|
+
// Print raw results
|
|
250
|
+
console.log('\n' + '═'.repeat(60));
|
|
251
|
+
for (const r of results) {
|
|
252
|
+
console.log(`\n## ${r.provider.toUpperCase()}${r.error ? ` (${r.error})` : ''}\n`);
|
|
253
|
+
console.log(r.content || '[no output]');
|
|
254
|
+
}
|
|
255
|
+
|
|
256
|
+
// Run judge
|
|
257
|
+
let judgeOutput = null;
|
|
258
|
+
const validResults = results.filter(r => !r.error && r.content && r.content !== '[no output]');
|
|
259
|
+
if (!noJudge && validResults.length >= 2) {
|
|
260
|
+
judgeOutput = await runJudge(userPrompt, validResults, { judgeModel, runProcess });
|
|
261
|
+
console.log('\n' + '═'.repeat(60));
|
|
262
|
+
console.log('\n# 🧠 BRAINTRUST — 智囊团融合报告\n');
|
|
263
|
+
console.log(judgeOutput);
|
|
264
|
+
} else if (!noJudge && validResults.length < 2) {
|
|
265
|
+
console.log('\n[braintrust] Not enough successful responses for Judge (need ≥ 2).');
|
|
266
|
+
}
|
|
267
|
+
|
|
268
|
+
// JSON output mode
|
|
269
|
+
if (flags.json) {
|
|
270
|
+
process.stdout.write('\n' + JSON.stringify({ prompt: userPrompt, results, judge: judgeOutput }, null, 2) + '\n');
|
|
271
|
+
}
|
|
272
|
+
|
|
273
|
+
// Save artifacts
|
|
274
|
+
if (!noSave) {
|
|
275
|
+
const ts = new Date().toISOString().replace(/[:.]/g, '-').slice(0, 23);
|
|
276
|
+
const runDir = join(OUTPUT_DIR, ts);
|
|
277
|
+
saveArtifacts(runDir, userPrompt, raws, results, judgeOutput);
|
|
278
|
+
process.stderr.write(`\n[saved → ${runDir}]\n`);
|
|
279
|
+
|
|
280
|
+
// Print run manifest
|
|
281
|
+
console.log('\n' + formatManifest({ results, ts, judgeModel: noJudge ? null : judgeModel, runDir }));
|
|
282
|
+
|
|
283
|
+
// Persist to memory DB
|
|
284
|
+
const parseScoreAvg = results.length
|
|
285
|
+
? results.reduce((sum, r) => sum + (r.parse_score || 0), 0) / results.length
|
|
286
|
+
: null;
|
|
287
|
+
persistRun({
|
|
288
|
+
ts,
|
|
289
|
+
question: userPrompt,
|
|
290
|
+
variant: 'general',
|
|
291
|
+
judgeModel: noJudge ? null : judgeModel,
|
|
292
|
+
providers: activeProviders.map(p => p.name),
|
|
293
|
+
judgeReport: judgeOutput,
|
|
294
|
+
parseScoreAvg,
|
|
295
|
+
});
|
|
296
|
+
|
|
297
|
+
// Spawn async reflector (detached, main process exits immediately)
|
|
298
|
+
// Verifier ≠ Executor: uses gpt-5.4-mini, different from default judge (claude)
|
|
299
|
+
if (!flags['no-reflect'] && judgeOutput) {
|
|
300
|
+
spawnReflector(ts);
|
|
301
|
+
}
|
|
302
|
+
}
|
|
303
|
+
}
|
|
304
|
+
|
|
305
|
+
/**
|
|
306
|
+
* Spawn the reflector as a detached background process.
|
|
307
|
+
* Main process does not wait for it — exits immediately.
|
|
308
|
+
*/
|
|
309
|
+
function spawnReflector(ts) {
|
|
310
|
+
const { REFLECTOR_LOG } = require('./config.js');
|
|
311
|
+
const fs = require('fs');
|
|
312
|
+
const logFd = (() => {
|
|
313
|
+
try { return fs.openSync(REFLECTOR_LOG, 'a'); } catch { return 'ignore'; }
|
|
314
|
+
})();
|
|
315
|
+
|
|
316
|
+
try {
|
|
317
|
+
const child = spawn(process.execPath, [
|
|
318
|
+
join(__dirname, 'reflector.js'), '--run', ts,
|
|
319
|
+
], {
|
|
320
|
+
detached: true,
|
|
321
|
+
stdio: ['ignore', logFd, logFd],
|
|
322
|
+
});
|
|
323
|
+
child.unref();
|
|
324
|
+
process.stderr.write(`[reflector: spawned for ${ts}]\n`);
|
|
325
|
+
} catch (err) {
|
|
326
|
+
process.stderr.write(`[reflector: spawn failed — ${err.message}]\n`);
|
|
327
|
+
} finally {
|
|
328
|
+
if (typeof logFd === 'number') try { fs.closeSync(logFd); } catch { /* ignore */ }
|
|
329
|
+
}
|
|
330
|
+
}
|
|
331
|
+
|
|
332
|
+
module.exports = { main };
|
package/src/memory/db.js
ADDED
|
@@ -0,0 +1,183 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
const { mkdirSync, existsSync } = require('fs');
|
|
4
|
+
const { dirname } = require('path');
|
|
5
|
+
const { DB_PATH } = require('../config.js');
|
|
6
|
+
|
|
7
|
+
let _db = null;
|
|
8
|
+
|
|
9
|
+
const SCHEMA = `
|
|
10
|
+
CREATE TABLE IF NOT EXISTS runs (
|
|
11
|
+
ts TEXT PRIMARY KEY,
|
|
12
|
+
question TEXT NOT NULL,
|
|
13
|
+
domain TEXT,
|
|
14
|
+
variant TEXT,
|
|
15
|
+
judge_model TEXT,
|
|
16
|
+
providers TEXT,
|
|
17
|
+
judge_report TEXT,
|
|
18
|
+
judge_summary TEXT,
|
|
19
|
+
parse_score_avg REAL,
|
|
20
|
+
judge_score REAL,
|
|
21
|
+
state TEXT DEFAULT 'pending_reflect',
|
|
22
|
+
created_at INTEGER NOT NULL,
|
|
23
|
+
reflected_at INTEGER
|
|
24
|
+
);
|
|
25
|
+
|
|
26
|
+
CREATE TABLE IF NOT EXISTS embeddings (
|
|
27
|
+
ts TEXT PRIMARY KEY REFERENCES runs(ts),
|
|
28
|
+
model TEXT NOT NULL,
|
|
29
|
+
vector BLOB NOT NULL,
|
|
30
|
+
dim INTEGER NOT NULL
|
|
31
|
+
);
|
|
32
|
+
|
|
33
|
+
CREATE TABLE IF NOT EXISTS lessons (
|
|
34
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
35
|
+
from_run TEXT REFERENCES runs(ts),
|
|
36
|
+
domain TEXT,
|
|
37
|
+
lesson TEXT NOT NULL,
|
|
38
|
+
active INTEGER DEFAULT 1,
|
|
39
|
+
upvotes INTEGER DEFAULT 0,
|
|
40
|
+
downvotes INTEGER DEFAULT 0,
|
|
41
|
+
created_at INTEGER NOT NULL
|
|
42
|
+
);
|
|
43
|
+
|
|
44
|
+
CREATE TABLE IF NOT EXISTS skills (
|
|
45
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
46
|
+
name TEXT NOT NULL UNIQUE,
|
|
47
|
+
description TEXT NOT NULL,
|
|
48
|
+
template TEXT NOT NULL,
|
|
49
|
+
domain TEXT,
|
|
50
|
+
from_run TEXT REFERENCES runs(ts),
|
|
51
|
+
embedding BLOB,
|
|
52
|
+
use_count INTEGER DEFAULT 0,
|
|
53
|
+
success_rate REAL,
|
|
54
|
+
active INTEGER DEFAULT 1,
|
|
55
|
+
created_at INTEGER NOT NULL
|
|
56
|
+
);
|
|
57
|
+
|
|
58
|
+
CREATE TABLE IF NOT EXISTS judge_stats (
|
|
59
|
+
judge_model TEXT NOT NULL,
|
|
60
|
+
domain TEXT NOT NULL,
|
|
61
|
+
successes INTEGER DEFAULT 0,
|
|
62
|
+
failures INTEGER DEFAULT 0,
|
|
63
|
+
total INTEGER DEFAULT 0,
|
|
64
|
+
PRIMARY KEY (judge_model, domain)
|
|
65
|
+
);
|
|
66
|
+
|
|
67
|
+
CREATE TABLE IF NOT EXISTS iterations (
|
|
68
|
+
run_ts TEXT REFERENCES runs(ts),
|
|
69
|
+
round INTEGER NOT NULL,
|
|
70
|
+
provider TEXT NOT NULL,
|
|
71
|
+
content TEXT NOT NULL,
|
|
72
|
+
disagreement_score REAL,
|
|
73
|
+
PRIMARY KEY (run_ts, round, provider)
|
|
74
|
+
);
|
|
75
|
+
|
|
76
|
+
CREATE TABLE IF NOT EXISTS feedback (
|
|
77
|
+
run_ts TEXT REFERENCES runs(ts),
|
|
78
|
+
vote INTEGER NOT NULL,
|
|
79
|
+
note TEXT,
|
|
80
|
+
created_at INTEGER NOT NULL
|
|
81
|
+
);
|
|
82
|
+
|
|
83
|
+
CREATE TABLE IF NOT EXISTS eval_results (
|
|
84
|
+
run_id TEXT NOT NULL,
|
|
85
|
+
question_id TEXT NOT NULL,
|
|
86
|
+
policy_version INTEGER NOT NULL,
|
|
87
|
+
score_covers REAL,
|
|
88
|
+
score_hallu REAL,
|
|
89
|
+
score_action REAL,
|
|
90
|
+
score_avg REAL,
|
|
91
|
+
created_at INTEGER NOT NULL,
|
|
92
|
+
PRIMARY KEY (run_id, question_id)
|
|
93
|
+
);
|
|
94
|
+
|
|
95
|
+
CREATE INDEX IF NOT EXISTS idx_runs_domain ON runs(domain) WHERE state='reflected';
|
|
96
|
+
CREATE INDEX IF NOT EXISTS idx_lessons_domain_active ON lessons(domain, active);
|
|
97
|
+
CREATE INDEX IF NOT EXISTS idx_runs_created ON runs(created_at);
|
|
98
|
+
`;
|
|
99
|
+
|
|
100
|
+
/**
|
|
101
|
+
* Get or initialize the SQLite database connection.
|
|
102
|
+
* Creates the database file and schema on first call.
|
|
103
|
+
* @returns {import('better-sqlite3').Database}
|
|
104
|
+
*/
|
|
105
|
+
function getDb() {
|
|
106
|
+
if (_db) return _db;
|
|
107
|
+
|
|
108
|
+
// Ensure state directory exists
|
|
109
|
+
const dir = dirname(DB_PATH);
|
|
110
|
+
if (!existsSync(dir)) mkdirSync(dir, { recursive: true });
|
|
111
|
+
|
|
112
|
+
try {
|
|
113
|
+
const Database = require('better-sqlite3');
|
|
114
|
+
_db = new Database(DB_PATH);
|
|
115
|
+
_db.pragma('journal_mode = WAL');
|
|
116
|
+
_db.pragma('foreign_keys = ON');
|
|
117
|
+
_db.exec(SCHEMA);
|
|
118
|
+
return _db;
|
|
119
|
+
} catch (err) {
|
|
120
|
+
if (err.code === 'MODULE_NOT_FOUND') {
|
|
121
|
+
process.stderr.write('[memory] better-sqlite3 not installed — run: npm install\n');
|
|
122
|
+
return null;
|
|
123
|
+
}
|
|
124
|
+
throw err;
|
|
125
|
+
}
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
/**
|
|
129
|
+
* Insert a completed run into the database.
|
|
130
|
+
* @param {object} run
|
|
131
|
+
* @param {string} run.ts - Timestamp string (primary key)
|
|
132
|
+
* @param {string} run.question
|
|
133
|
+
* @param {string} [run.domain]
|
|
134
|
+
* @param {string} [run.variant]
|
|
135
|
+
* @param {string} [run.judgeModel]
|
|
136
|
+
* @param {string[]} [run.providers]
|
|
137
|
+
* @param {string|null} [run.judgeReport]
|
|
138
|
+
* @param {number} [run.parseScoreAvg]
|
|
139
|
+
*/
|
|
140
|
+
function insertRun(run) {
|
|
141
|
+
const db = getDb();
|
|
142
|
+
if (!db) return;
|
|
143
|
+
|
|
144
|
+
const judgeReport = run.judgeReport || null;
|
|
145
|
+
const judgeSummary = judgeReport ? judgeReport.slice(0, 800) : null;
|
|
146
|
+
|
|
147
|
+
db.prepare(`
|
|
148
|
+
INSERT OR IGNORE INTO runs
|
|
149
|
+
(ts, question, domain, variant, judge_model, providers,
|
|
150
|
+
judge_report, judge_summary, parse_score_avg, created_at)
|
|
151
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
152
|
+
`).run(
|
|
153
|
+
run.ts,
|
|
154
|
+
run.question,
|
|
155
|
+
run.domain || null,
|
|
156
|
+
run.variant || 'general',
|
|
157
|
+
run.judgeModel || null,
|
|
158
|
+
JSON.stringify(run.providers || []),
|
|
159
|
+
judgeReport,
|
|
160
|
+
judgeSummary,
|
|
161
|
+
run.parseScoreAvg || null,
|
|
162
|
+
Date.now()
|
|
163
|
+
);
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
/**
|
|
167
|
+
* Update a run's state (e.g., pending_reflect → reflected).
|
|
168
|
+
*/
|
|
169
|
+
function updateRunState(ts, state, extra = {}) {
|
|
170
|
+
const db = getDb();
|
|
171
|
+
if (!db) return;
|
|
172
|
+
|
|
173
|
+
const fields = ['state = ?'];
|
|
174
|
+
const values = [state];
|
|
175
|
+
|
|
176
|
+
if (extra.judgeScore !== undefined) { fields.push('judge_score = ?'); values.push(extra.judgeScore); }
|
|
177
|
+
if (extra.reflectedAt !== undefined) { fields.push('reflected_at = ?'); values.push(extra.reflectedAt); }
|
|
178
|
+
|
|
179
|
+
values.push(ts);
|
|
180
|
+
db.prepare(`UPDATE runs SET ${fields.join(', ')} WHERE ts = ?`).run(...values);
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
module.exports = { getDb, insertRun, updateRunState };
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
// Memory layer — Phase 1 implementation (retrieve, embed, persist)
|
|
4
|
+
// Currently exports stubs for Phase 0 compatibility.
|
|
5
|
+
// Full implementation in Phase 1.
|
|
6
|
+
|
|
7
|
+
const { getDb, insertRun, updateRunState } = require('./db.js');
|
|
8
|
+
|
|
9
|
+
/**
|
|
10
|
+
* Persist a run to the database after artifacts are saved.
|
|
11
|
+
* This is the Phase 0 write path — no embedding yet.
|
|
12
|
+
* @param {object} opts
|
|
13
|
+
*/
|
|
14
|
+
function persistRun(opts) {
|
|
15
|
+
try {
|
|
16
|
+
insertRun(opts);
|
|
17
|
+
} catch (err) {
|
|
18
|
+
process.stderr.write(`[memory] Failed to persist run: ${err.message}\n`);
|
|
19
|
+
}
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
/**
|
|
23
|
+
* Retrieve similar past runs for few-shot injection.
|
|
24
|
+
* Phase 0 stub — returns empty array until Phase 1 is implemented.
|
|
25
|
+
* @returns {Promise<Array>}
|
|
26
|
+
*/
|
|
27
|
+
async function retrieve() {
|
|
28
|
+
return [];
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
module.exports = { getDb, persistRun, retrieve, updateRunState };
|