braintrust-lite 0.1.6 → 0.1.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +73 -102
- package/bin/braintrust +12 -0
- package/package.json +21 -13
- package/skills/consult/SKILL.md +213 -0
- package/src/config.js +60 -0
- package/src/doctor.js +120 -0
- package/src/format.js +25 -37
- package/src/judge.js +87 -0
- package/src/main.js +332 -0
- package/src/memory/db.js +183 -0
- package/src/memory/index.js +31 -0
- package/src/normalize.js +172 -0
- package/src/normalize.test.js +125 -0
- package/src/prompts/architecture.md +21 -0
- package/src/prompts/code.md +21 -0
- package/src/prompts/general.md +22 -0
- package/src/prompts/index.js +49 -0
- package/src/prompts/writing.md +21 -0
- package/src/providers/claude.js +45 -0
- package/src/providers/codex.js +69 -0
- package/src/providers/gemini.js +81 -0
- package/src/providers/index.js +22 -0
- package/src/reflector.js +244 -0
- package/src/save.js +93 -0
- package/src/server.js +225 -52
- package/LICENSE +0 -21
- package/bin/consult +0 -79
- package/src/consult.js +0 -112
- package/src/providers.js +0 -88
package/bin/consult
DELETED
|
@@ -1,79 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env node
|
|
2
|
-
'use strict';
|
|
3
|
-
|
|
4
|
-
// CLI entry point for braintrust-lite.
|
|
5
|
-
// Usage: consult [options] "prompt"
|
|
6
|
-
// cat file | consult "review this"
|
|
7
|
-
|
|
8
|
-
import { readFileSync } from 'fs';
|
|
9
|
-
import { resolve } from 'path';
|
|
10
|
-
import { consult } from '../src/consult.js';
|
|
11
|
-
import { formatAsMarkdown, formatAsJson } from '../src/format.js';
|
|
12
|
-
|
|
13
|
-
// ─── Arg parsing ──────────────────────────────────────────────────────────────
|
|
14
|
-
|
|
15
|
-
const flags = { skip: [] };
|
|
16
|
-
const positional = [];
|
|
17
|
-
const argv = process.argv.slice(2);
|
|
18
|
-
|
|
19
|
-
for (let i = 0; i < argv.length; i++) {
|
|
20
|
-
const a = argv[i];
|
|
21
|
-
if (a === '--skip') { flags.skip.push(argv[++i]); continue; }
|
|
22
|
-
if (a === '--only') { flags.only = argv[++i]; continue; }
|
|
23
|
-
if (a === '--timeout') { flags.timeout = Number(argv[++i]); continue; }
|
|
24
|
-
if (a === '--dir') { flags.dir = argv[++i]; continue; }
|
|
25
|
-
if (a === '--json') { flags.json = true; continue; }
|
|
26
|
-
if (a === '--help' || a === '-h') { printHelp(); process.exit(0); }
|
|
27
|
-
positional.push(a);
|
|
28
|
-
}
|
|
29
|
-
|
|
30
|
-
function printHelp() {
|
|
31
|
-
console.error(`Usage: consult [options] "your question"
|
|
32
|
-
cat file | consult "explain this"
|
|
33
|
-
|
|
34
|
-
Options:
|
|
35
|
-
--only <model> Only run one model: codex | gemini | claude
|
|
36
|
-
--skip <model> Skip a model (repeatable)
|
|
37
|
-
--timeout <sec> Per-model timeout in seconds (default: 90)
|
|
38
|
-
--dir <path> Working directory for CLI subprocesses
|
|
39
|
-
--json Output full JSON instead of markdown
|
|
40
|
-
--help Show this help`);
|
|
41
|
-
}
|
|
42
|
-
|
|
43
|
-
// ─── Prompt ───────────────────────────────────────────────────────────────────
|
|
44
|
-
|
|
45
|
-
let prompt = positional.join(' ');
|
|
46
|
-
|
|
47
|
-
if (!process.stdin.isTTY) {
|
|
48
|
-
const stdin = readFileSync(0, 'utf8').trim();
|
|
49
|
-
if (stdin) prompt = prompt ? `${prompt}\n\n<context>\n${stdin}\n</context>` : stdin;
|
|
50
|
-
}
|
|
51
|
-
|
|
52
|
-
if (!prompt) {
|
|
53
|
-
printHelp();
|
|
54
|
-
process.exit(1);
|
|
55
|
-
}
|
|
56
|
-
|
|
57
|
-
// ─── Run ─────────────────────────────────────────────────────────────────────
|
|
58
|
-
|
|
59
|
-
const { results, mapping } = await consult({
|
|
60
|
-
prompt,
|
|
61
|
-
only: flags.only,
|
|
62
|
-
skip: flags.skip,
|
|
63
|
-
timeoutMs: flags.timeout != null ? flags.timeout * 1000 : 90_000,
|
|
64
|
-
cwd: flags.dir ? resolve(flags.dir) : undefined,
|
|
65
|
-
blind: !flags.only, // blind mode only useful when multiple providers run
|
|
66
|
-
});
|
|
67
|
-
|
|
68
|
-
// Progress summary to stderr
|
|
69
|
-
for (const r of results) {
|
|
70
|
-
const status = r.error ? `⚠ ${r.error}` : `✓ ${(r.duration_ms / 1000).toFixed(1)}s`;
|
|
71
|
-
process.stderr.write(`[${r.provider}: ${status}]\n`);
|
|
72
|
-
}
|
|
73
|
-
|
|
74
|
-
// Output to stdout
|
|
75
|
-
if (flags.json) {
|
|
76
|
-
console.log(formatAsJson(prompt, results, mapping));
|
|
77
|
-
} else {
|
|
78
|
-
console.log('\n' + formatAsMarkdown(results, mapping));
|
|
79
|
-
}
|
package/src/consult.js
DELETED
|
@@ -1,112 +0,0 @@
|
|
|
1
|
-
import {
|
|
2
|
-
runProcess,
|
|
3
|
-
adaptCodex,
|
|
4
|
-
adaptGemini,
|
|
5
|
-
adaptClaude,
|
|
6
|
-
CODEX_ARGS_PREFIX,
|
|
7
|
-
GEMINI_ARGS_PREFIX,
|
|
8
|
-
CLAUDE_ARGS_PREFIX,
|
|
9
|
-
} from './providers.js';
|
|
10
|
-
|
|
11
|
-
const SYSTEM_PROMPT = `你是一个独立思考的高级专家。请基于自己的判断给出高质量、可执行的回答。
|
|
12
|
-
要求:独立思考,不假设其他专家会补充;区分结论、依据、假设、风险;简洁但完整。`;
|
|
13
|
-
|
|
14
|
-
const PROVIDERS = {
|
|
15
|
-
codex: {
|
|
16
|
-
cmd: 'codex',
|
|
17
|
-
buildArgs: prompt => [...CODEX_ARGS_PREFIX, `${SYSTEM_PROMPT}\n\n${prompt}`],
|
|
18
|
-
adapt: adaptCodex,
|
|
19
|
-
},
|
|
20
|
-
gemini: {
|
|
21
|
-
cmd: 'gemini',
|
|
22
|
-
buildArgs: prompt => ['-p', `${SYSTEM_PROMPT}\n\n${prompt}`, ...GEMINI_ARGS_PREFIX],
|
|
23
|
-
adapt: adaptGemini,
|
|
24
|
-
},
|
|
25
|
-
claude: {
|
|
26
|
-
cmd: 'claude',
|
|
27
|
-
buildArgs: prompt => [...CLAUDE_ARGS_PREFIX, `${SYSTEM_PROMPT}\n\n${prompt}`],
|
|
28
|
-
adapt: adaptClaude,
|
|
29
|
-
},
|
|
30
|
-
};
|
|
31
|
-
|
|
32
|
-
/**
|
|
33
|
-
* Shuffle an array in-place using Fisher-Yates and return it.
|
|
34
|
-
*/
|
|
35
|
-
function shuffle(arr) {
|
|
36
|
-
for (let i = arr.length - 1; i > 0; i--) {
|
|
37
|
-
const j = Math.floor(Math.random() * (i + 1));
|
|
38
|
-
[arr[i], arr[j]] = [arr[j], arr[i]];
|
|
39
|
-
}
|
|
40
|
-
return arr;
|
|
41
|
-
}
|
|
42
|
-
|
|
43
|
-
/**
|
|
44
|
-
* Replace provider names with anonymous labels (Model A, B, C…).
|
|
45
|
-
* Order is randomised so the judge cannot infer identity from position.
|
|
46
|
-
* Returns { results: anonymized array, mapping: { 'Model A': 'gemini', … } }
|
|
47
|
-
*/
|
|
48
|
-
function anonymize(results) {
|
|
49
|
-
const labels = ['Model A', 'Model B', 'Model C', 'Model D', 'Model E'];
|
|
50
|
-
const shuffled = shuffle([...results]);
|
|
51
|
-
const mapping = {};
|
|
52
|
-
const anonymized = shuffled.map((r, i) => {
|
|
53
|
-
mapping[labels[i]] = r.provider;
|
|
54
|
-
return { ...r, provider: labels[i] };
|
|
55
|
-
});
|
|
56
|
-
return { results: anonymized, mapping };
|
|
57
|
-
}
|
|
58
|
-
|
|
59
|
-
/**
|
|
60
|
-
* Run a single provider and return a normalized result object.
|
|
61
|
-
* Never throws — errors are captured in the `error` field.
|
|
62
|
-
*/
|
|
63
|
-
async function runOne(name, prompt, { cwd, timeoutMs }) {
|
|
64
|
-
const p = PROVIDERS[name];
|
|
65
|
-
const start = Date.now();
|
|
66
|
-
const raw = await runProcess(p.cmd, p.buildArgs(prompt), { cwd, timeoutMs });
|
|
67
|
-
const duration_ms = Date.now() - start;
|
|
68
|
-
|
|
69
|
-
const error = raw.code === 'timeout' ? 'timeout'
|
|
70
|
-
: raw.code !== 0 ? `exit ${raw.code}`
|
|
71
|
-
: null;
|
|
72
|
-
|
|
73
|
-
const { content } = error ? { content: '' } : p.adapt(raw);
|
|
74
|
-
return { provider: name, content, duration_ms, error };
|
|
75
|
-
}
|
|
76
|
-
|
|
77
|
-
/**
|
|
78
|
-
* Consult Codex, Gemini, and/or Claude in parallel.
|
|
79
|
-
*
|
|
80
|
-
* @param {object} opts
|
|
81
|
-
* @param {string} opts.prompt - The question to ask.
|
|
82
|
-
* @param {string} [opts.only] - 'codex' | 'gemini' | 'claude' — only run this one.
|
|
83
|
-
* @param {string[]} [opts.skip] - Providers to skip.
|
|
84
|
-
* @param {number} [opts.timeoutMs] - Per-provider timeout in ms (default 90 000). 0 = no timeout.
|
|
85
|
-
* @param {string} [opts.cwd] - Working directory for subprocesses.
|
|
86
|
-
* @param {boolean} [opts.blind] - Anonymise provider names (default true).
|
|
87
|
-
* @returns {Promise<{ results: Array, mapping: object|null }>}
|
|
88
|
-
*/
|
|
89
|
-
export async function consult({ prompt, only, skip = [], timeoutMs = 90_000, cwd, blind = true } = {}) {
|
|
90
|
-
const targets = Object.keys(PROVIDERS)
|
|
91
|
-
.filter(name => (only ? name === only : true))
|
|
92
|
-
.filter(name => !skip.includes(name));
|
|
93
|
-
|
|
94
|
-
if (targets.length === 0) {
|
|
95
|
-
throw new Error('No providers selected — check --only / --skip flags.');
|
|
96
|
-
}
|
|
97
|
-
|
|
98
|
-
const settled = await Promise.allSettled(
|
|
99
|
-
targets.map(name => runOne(name, prompt, { cwd, timeoutMs }))
|
|
100
|
-
);
|
|
101
|
-
|
|
102
|
-
const results = targets.map((name, i) =>
|
|
103
|
-
settled[i].status === 'fulfilled'
|
|
104
|
-
? settled[i].value
|
|
105
|
-
: { provider: name, content: '', duration_ms: 0, error: settled[i].reason?.message ?? 'unknown' }
|
|
106
|
-
);
|
|
107
|
-
|
|
108
|
-
if (blind) {
|
|
109
|
-
return anonymize(results);
|
|
110
|
-
}
|
|
111
|
-
return { results, mapping: null };
|
|
112
|
-
}
|
package/src/providers.js
DELETED
|
@@ -1,88 +0,0 @@
|
|
|
1
|
-
import { spawn } from 'child_process';
|
|
2
|
-
|
|
3
|
-
// ─── Provider argv constants ──────────────────────────────────────────────────
|
|
4
|
-
|
|
5
|
-
export const CODEX_ARGS_PREFIX = ['exec', '--json', '--skip-git-repo-check', '--ephemeral'];
|
|
6
|
-
export const GEMINI_ARGS_PREFIX = ['-o', 'json'];
|
|
7
|
-
export const CLAUDE_ARGS_PREFIX = ['--output-format', 'json', '-p'];
|
|
8
|
-
|
|
9
|
-
// ─── Process runner ───────────────────────────────────────────────────────────
|
|
10
|
-
|
|
11
|
-
/**
|
|
12
|
-
* Spawn a subprocess with an AbortController-based timeout.
|
|
13
|
-
* Returns { stdout, stderr, code } — never rejects.
|
|
14
|
-
*/
|
|
15
|
-
export function runProcess(cmd, args, { cwd, timeoutMs } = {}) {
|
|
16
|
-
const ac = new AbortController();
|
|
17
|
-
const proc = spawn(cmd, args, {
|
|
18
|
-
signal: ac.signal,
|
|
19
|
-
stdio: ['ignore', 'pipe', 'pipe'],
|
|
20
|
-
...(cwd ? { cwd } : {}),
|
|
21
|
-
});
|
|
22
|
-
|
|
23
|
-
let stdout = '';
|
|
24
|
-
let stderr = '';
|
|
25
|
-
proc.stdout.on('data', d => { stdout += d; });
|
|
26
|
-
proc.stderr.on('data', d => { stderr += d; });
|
|
27
|
-
|
|
28
|
-
const timer = timeoutMs ? setTimeout(() => ac.abort(), timeoutMs) : null;
|
|
29
|
-
|
|
30
|
-
return new Promise(resolve => {
|
|
31
|
-
const done = code => {
|
|
32
|
-
if (timer) clearTimeout(timer);
|
|
33
|
-
resolve({ stdout, stderr, code });
|
|
34
|
-
};
|
|
35
|
-
proc.on('close', done);
|
|
36
|
-
proc.on('error', err => done(err.name === 'AbortError' ? 'timeout' : -1));
|
|
37
|
-
});
|
|
38
|
-
}
|
|
39
|
-
|
|
40
|
-
// ─── Adapters ─────────────────────────────────────────────────────────────────
|
|
41
|
-
|
|
42
|
-
/** Last-resort: take the tail of raw stdout. */
|
|
43
|
-
export function fallback(rawStdout) {
|
|
44
|
-
return { content: rawStdout.slice(-2000).trim() || '[no output]', parse_mode: 'fallback' };
|
|
45
|
-
}
|
|
46
|
-
|
|
47
|
-
/** Parse Codex JSONL stream → extract the last agent_message text. */
|
|
48
|
-
export function adaptCodex(raw) {
|
|
49
|
-
try {
|
|
50
|
-
const events = raw.stdout.trim().split('\n').flatMap(l => {
|
|
51
|
-
try { return [JSON.parse(l)]; } catch { return []; }
|
|
52
|
-
});
|
|
53
|
-
const msg = events.filter(e => e.type === 'item.completed' && e.item?.type === 'agent_message').pop()
|
|
54
|
-
?? events.filter(e => e.type === 'item.completed').pop();
|
|
55
|
-
if (msg?.item?.text) return { content: msg.item.text, parse_mode: 'jsonl' };
|
|
56
|
-
} catch { /* fall through */ }
|
|
57
|
-
return fallback(raw.stdout);
|
|
58
|
-
}
|
|
59
|
-
|
|
60
|
-
/** Skip any MCP startup noise before the first '{', then extract .response */
|
|
61
|
-
export function parseGeminiResponse(stdout) {
|
|
62
|
-
const jsonStart = stdout.indexOf('{');
|
|
63
|
-
if (jsonStart === -1) return null;
|
|
64
|
-
const j = JSON.parse(stdout.slice(jsonStart));
|
|
65
|
-
if (typeof j.response === 'string') return j.response;
|
|
66
|
-
for (const v of Object.values(j)) {
|
|
67
|
-
if (v && typeof v === 'object' && typeof v.response === 'string') return v.response;
|
|
68
|
-
}
|
|
69
|
-
return null;
|
|
70
|
-
}
|
|
71
|
-
|
|
72
|
-
/** Parse Gemini JSON output → content string. */
|
|
73
|
-
export function adaptGemini(raw) {
|
|
74
|
-
try {
|
|
75
|
-
const response = parseGeminiResponse(raw.stdout);
|
|
76
|
-
if (response) return { content: response, parse_mode: 'json' };
|
|
77
|
-
} catch { /* fall through */ }
|
|
78
|
-
return fallback(raw.stdout);
|
|
79
|
-
}
|
|
80
|
-
|
|
81
|
-
/** Parse Claude CLI JSON output → extract .result text. */
|
|
82
|
-
export function adaptClaude(raw) {
|
|
83
|
-
try {
|
|
84
|
-
const j = JSON.parse(raw.stdout);
|
|
85
|
-
if (typeof j.result === 'string') return { content: j.result, parse_mode: 'json' };
|
|
86
|
-
} catch { /* fall through */ }
|
|
87
|
-
return fallback(raw.stdout);
|
|
88
|
-
}
|