braintrust-lite 0.1.7 → 0.1.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +73 -102
- package/bin/braintrust +12 -0
- package/package.json +20 -20
- package/skills/consult/SKILL.md +2 -2
- package/src/config.js +60 -0
- package/src/doctor.js +120 -0
- package/src/format.js +26 -49
- package/src/judge.js +87 -0
- package/src/main.js +332 -0
- package/src/memory/db.js +183 -0
- package/src/memory/index.js +31 -0
- package/src/normalize.js +172 -0
- package/src/normalize.test.js +125 -0
- package/src/prompts/architecture.md +21 -0
- package/src/prompts/code.md +21 -0
- package/src/prompts/general.md +22 -0
- package/src/prompts/index.js +49 -0
- package/src/prompts/writing.md +21 -0
- package/src/providers/claude.js +45 -0
- package/src/providers/codex.js +69 -0
- package/src/providers/gemini.js +81 -0
- package/src/providers/index.js +22 -0
- package/src/reflector.js +244 -0
- package/src/save.js +93 -0
- package/src/server.js +245 -38
- package/LICENSE +0 -21
- package/bin/consult +0 -79
- package/scripts/setup.js +0 -66
- package/src/consult.js +0 -81
- package/src/providers.js +0 -91
package/src/reflector.js
ADDED
|
@@ -0,0 +1,244 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Reflector — async background process (Phase 2)
|
|
5
|
+
*
|
|
6
|
+
* Invoked as a detached child: node reflector.js --run <ts>
|
|
7
|
+
*
|
|
8
|
+
* Verifier ≠ Executor principle:
|
|
9
|
+
* Uses REFLECTOR_CMD (codex gpt-5.4-mini) which differs from the default
|
|
10
|
+
* judge (claude). Avoids self-evaluation bias.
|
|
11
|
+
*
|
|
12
|
+
* One LLM call, three outputs:
|
|
13
|
+
* 1. lessons — 1-3 reusable rules (≤30 chars each)
|
|
14
|
+
* 2. skills — 0-2 reusable prompt templates (Voyager skill library)
|
|
15
|
+
* 3. judge_score — 1-5 quality rating for the judge report
|
|
16
|
+
*/
|
|
17
|
+
|
|
18
|
+
const { appendFileSync } = require('fs');
|
|
19
|
+
const { spawn } = require('child_process');
|
|
20
|
+
const { join } = require('path');
|
|
21
|
+
|
|
22
|
+
const {
|
|
23
|
+
REFLECTOR_CMD,
|
|
24
|
+
REFLECTOR_ARGS_PREFIX,
|
|
25
|
+
REFLECTOR_LOG,
|
|
26
|
+
DB_PATH,
|
|
27
|
+
ECONOMY,
|
|
28
|
+
} = require('./config.js');
|
|
29
|
+
|
|
30
|
+
// ─── Logging ──────────────────────────────────────────────────────────────────
|
|
31
|
+
|
|
32
|
+
function log(msg) {
|
|
33
|
+
const line = `[${new Date().toISOString()}] ${msg}\n`;
|
|
34
|
+
try { appendFileSync(REFLECTOR_LOG, line); } catch { /* ignore */ }
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
// ─── Process runner (no timeout — reflector runs offline) ─────────────────────
|
|
38
|
+
|
|
39
|
+
function run(cmd, args) {
|
|
40
|
+
return new Promise((resolve) => {
|
|
41
|
+
const proc = spawn(cmd, args, { stdio: ['ignore', 'pipe', 'pipe'] });
|
|
42
|
+
let stdout = '', stderr = '';
|
|
43
|
+
proc.stdout.on('data', d => { stdout += d; });
|
|
44
|
+
proc.stderr.on('data', d => { stderr += d; });
|
|
45
|
+
proc.on('close', code => resolve({ stdout, stderr, code }));
|
|
46
|
+
proc.on('error', err => resolve({ stdout: '', stderr: err.message, code: -1 }));
|
|
47
|
+
});
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
// ─── Extract text from codex JSONL ────────────────────────────────────────────
|
|
51
|
+
|
|
52
|
+
function extractCodexText(stdout) {
|
|
53
|
+
const lines = stdout.trim().split('\n').reverse();
|
|
54
|
+
for (const l of lines) {
|
|
55
|
+
try {
|
|
56
|
+
const e = JSON.parse(l);
|
|
57
|
+
if (e.item?.text) return e.item.text;
|
|
58
|
+
} catch { /* skip */ }
|
|
59
|
+
}
|
|
60
|
+
return stdout.trim();
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
// ─── Build reflector prompt ───────────────────────────────────────────────────
|
|
64
|
+
|
|
65
|
+
function buildReflectorPrompt(question, judgeReport) {
|
|
66
|
+
return `你是一个 AI 系统的元认知分析器。以下是一次 AI 智囊团的讨论结果,请完成 3 个任务:
|
|
67
|
+
|
|
68
|
+
问题: ${question}
|
|
69
|
+
|
|
70
|
+
Judge 融合报告:
|
|
71
|
+
${judgeReport.slice(0, 3000)}
|
|
72
|
+
|
|
73
|
+
---
|
|
74
|
+
|
|
75
|
+
**任务 1** — 提炼 1-3 条可复用的 lesson(每条 ≤30字,可操作的规则,不是泛泛而谈)
|
|
76
|
+
**任务 2** — 抽取 0-2 条 skill(命名 + 描述 + 可注入的 prompt 片段,参考示例格式)
|
|
77
|
+
**任务 3** — 给 judge 报告质量评分 1-5 分(具体性 × 可行性 × 完整度)
|
|
78
|
+
|
|
79
|
+
严格按以下 JSON 格式输出,不要有任何额外文字:
|
|
80
|
+
{
|
|
81
|
+
"lessons": [
|
|
82
|
+
{"domain": "general", "lesson": "简短可操作规则,≤30字"}
|
|
83
|
+
],
|
|
84
|
+
"skills": [
|
|
85
|
+
{
|
|
86
|
+
"name": "snake_case_name",
|
|
87
|
+
"description": "一句话描述用途",
|
|
88
|
+
"template": "可直接注入 prompt 的模板文本",
|
|
89
|
+
"domain": "general"
|
|
90
|
+
}
|
|
91
|
+
],
|
|
92
|
+
"judge_score": 4,
|
|
93
|
+
"judge_weakness": "一句话说明 judge 报告最大的不足"
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
示例 skill:
|
|
97
|
+
{
|
|
98
|
+
"name": "constraint_first_analysis",
|
|
99
|
+
"description": "架构决策前先列约束,避免方案偏离实际",
|
|
100
|
+
"template": "请先列出至少3个硬约束(性能/成本/团队能力),再给出候选方案。",
|
|
101
|
+
"domain": "architecture"
|
|
102
|
+
}`;
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
// ─── Parse reflector JSON output ──────────────────────────────────────────────
|
|
106
|
+
|
|
107
|
+
function parseReflectorOutput(text) {
|
|
108
|
+
// Find JSON object in the response
|
|
109
|
+
const start = text.indexOf('{');
|
|
110
|
+
const end = text.lastIndexOf('}');
|
|
111
|
+
if (start === -1 || end === -1) throw new Error('No JSON object found in reflector output');
|
|
112
|
+
|
|
113
|
+
const raw = JSON.parse(text.slice(start, end + 1));
|
|
114
|
+
|
|
115
|
+
return {
|
|
116
|
+
lessons: Array.isArray(raw.lessons) ? raw.lessons : [],
|
|
117
|
+
skills: Array.isArray(raw.skills) ? raw.skills : [],
|
|
118
|
+
judgeScore: typeof raw.judge_score === 'number' ? raw.judge_score : null,
|
|
119
|
+
judgeWeakness: raw.judge_weakness || null,
|
|
120
|
+
};
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
// ─── Persist to DB ────────────────────────────────────────────────────────────
|
|
124
|
+
|
|
125
|
+
function persistReflections(ts, { lessons, skills, judgeScore }) {
|
|
126
|
+
let db;
|
|
127
|
+
try {
|
|
128
|
+
const Database = require('better-sqlite3');
|
|
129
|
+
db = new Database(DB_PATH);
|
|
130
|
+
db.pragma('journal_mode = WAL');
|
|
131
|
+
db.pragma('foreign_keys = ON');
|
|
132
|
+
} catch (err) {
|
|
133
|
+
log(`DB open failed: ${err.message}`);
|
|
134
|
+
return;
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
const now = Date.now();
|
|
138
|
+
|
|
139
|
+
// Insert lessons
|
|
140
|
+
const insertLesson = db.prepare(`
|
|
141
|
+
INSERT INTO lessons (from_run, domain, lesson, created_at)
|
|
142
|
+
VALUES (?, ?, ?, ?)
|
|
143
|
+
`);
|
|
144
|
+
for (const l of lessons) {
|
|
145
|
+
if (!l.lesson || l.lesson.length > 120) continue; // basic validation
|
|
146
|
+
insertLesson.run(ts, l.domain || 'general', l.lesson, now);
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
// Insert skills
|
|
150
|
+
const insertSkill = db.prepare(`
|
|
151
|
+
INSERT OR IGNORE INTO skills (name, description, template, domain, from_run, created_at)
|
|
152
|
+
VALUES (?, ?, ?, ?, ?, ?)
|
|
153
|
+
`);
|
|
154
|
+
for (const s of skills) {
|
|
155
|
+
if (!s.name || !s.template) continue;
|
|
156
|
+
insertSkill.run(
|
|
157
|
+
s.name, s.description || '', s.template, s.domain || 'general', ts, now
|
|
158
|
+
);
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
// Update run state + judge_score
|
|
162
|
+
const updates = ['state = ?', 'reflected_at = ?'];
|
|
163
|
+
const vals = ['reflected', now];
|
|
164
|
+
if (judgeScore !== null) { updates.push('judge_score = ?'); vals.push(judgeScore); }
|
|
165
|
+
vals.push(ts);
|
|
166
|
+
db.prepare(`UPDATE runs SET ${updates.join(', ')} WHERE ts = ?`).run(...vals);
|
|
167
|
+
|
|
168
|
+
db.close();
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
// ─── Main reflect() ───────────────────────────────────────────────────────────
|
|
172
|
+
|
|
173
|
+
async function reflect(ts) {
|
|
174
|
+
if (ECONOMY) {
|
|
175
|
+
log(`[${ts}] ECONOMY mode — reflector skipped`);
|
|
176
|
+
return;
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
log(`[${ts}] starting reflection`);
|
|
180
|
+
|
|
181
|
+
// Load run data from DB
|
|
182
|
+
let db;
|
|
183
|
+
try {
|
|
184
|
+
const Database = require('better-sqlite3');
|
|
185
|
+
db = new Database(DB_PATH);
|
|
186
|
+
} catch (err) {
|
|
187
|
+
log(`[${ts}] DB open failed: ${err.message}`);
|
|
188
|
+
return;
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
const run = db.prepare('SELECT question, judge_report FROM runs WHERE ts = ?').get(ts);
|
|
192
|
+
db.close();
|
|
193
|
+
|
|
194
|
+
if (!run) { log(`[${ts}] run not found in DB`); return; }
|
|
195
|
+
if (!run.judge_report) { log(`[${ts}] no judge_report — skipping (per design: only reflect judged runs)`); return; }
|
|
196
|
+
|
|
197
|
+
const prompt = buildReflectorPrompt(run.question, run.judge_report);
|
|
198
|
+
|
|
199
|
+
log(`[${ts}] calling ${REFLECTOR_CMD} with ${REFLECTOR_ARGS_PREFIX.join(' ')}`);
|
|
200
|
+
const raw = await run_reflector(prompt);
|
|
201
|
+
|
|
202
|
+
if (!raw.stdout) {
|
|
203
|
+
log(`[${ts}] empty output from reflector (code=${raw.code}): ${raw.stderr.slice(0, 200)}`);
|
|
204
|
+
return;
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
const text = extractCodexText(raw.stdout);
|
|
208
|
+
|
|
209
|
+
let parsed;
|
|
210
|
+
try {
|
|
211
|
+
parsed = parseReflectorOutput(text);
|
|
212
|
+
} catch (err) {
|
|
213
|
+
log(`[${ts}] JSON parse failed: ${err.message}\nRaw: ${text.slice(0, 300)}`);
|
|
214
|
+
return;
|
|
215
|
+
}
|
|
216
|
+
|
|
217
|
+
log(`[${ts}] lessons=${parsed.lessons.length} skills=${parsed.skills.length} score=${parsed.judgeScore}`);
|
|
218
|
+
|
|
219
|
+
persistReflections(ts, parsed);
|
|
220
|
+
log(`[${ts}] done`);
|
|
221
|
+
}
|
|
222
|
+
|
|
223
|
+
// We can't name the function run() due to conflict above — use alias
|
|
224
|
+
async function run_reflector(prompt) {
|
|
225
|
+
return run(REFLECTOR_CMD, [...REFLECTOR_ARGS_PREFIX, prompt]);
|
|
226
|
+
}
|
|
227
|
+
|
|
228
|
+
// ─── CLI entry point ──────────────────────────────────────────────────────────
|
|
229
|
+
|
|
230
|
+
if (require.main === module) {
|
|
231
|
+
const args = process.argv.slice(2);
|
|
232
|
+
const runIdx = args.indexOf('--run');
|
|
233
|
+
if (runIdx === -1 || !args[runIdx + 1]) {
|
|
234
|
+
process.stderr.write('Usage: node reflector.js --run <ts>\n');
|
|
235
|
+
process.exit(1);
|
|
236
|
+
}
|
|
237
|
+
const ts = args[runIdx + 1];
|
|
238
|
+
reflect(ts).catch(err => {
|
|
239
|
+
log(`[${ts}] uncaught error: ${err.message}`);
|
|
240
|
+
process.exit(1);
|
|
241
|
+
});
|
|
242
|
+
}
|
|
243
|
+
|
|
244
|
+
module.exports = { reflect };
|
package/src/save.js
ADDED
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
const { writeFileSync, mkdirSync } = require('fs');
|
|
4
|
+
const { join } = require('path');
|
|
5
|
+
|
|
6
|
+
/**
|
|
7
|
+
* Save all run artifacts to disk.
|
|
8
|
+
* Creates: raw/*.txt, normalized.json, report.md, summary.md
|
|
9
|
+
*
|
|
10
|
+
* @param {string} runDir - Absolute path to the run directory
|
|
11
|
+
* @param {string} question - The user's question
|
|
12
|
+
* @param {object} raws - Map of provider name → raw process result
|
|
13
|
+
* @param {Array} results - Normalized provider results
|
|
14
|
+
* @param {string|null} judgeOutput - Judge fusion report markdown
|
|
15
|
+
*/
|
|
16
|
+
function saveArtifacts(runDir, question, raws, results, judgeOutput) {
|
|
17
|
+
mkdirSync(join(runDir, 'raw'), { recursive: true });
|
|
18
|
+
|
|
19
|
+
// Save raw provider outputs
|
|
20
|
+
for (const [name, raw] of Object.entries(raws)) {
|
|
21
|
+
writeFileSync(join(runDir, 'raw', `${name}.txt`), raw.stdout || '');
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
// Save normalized JSON
|
|
25
|
+
writeFileSync(join(runDir, 'normalized.json'), JSON.stringify(results, null, 2));
|
|
26
|
+
|
|
27
|
+
// Save full report (all provider outputs + judge)
|
|
28
|
+
writeFileSync(join(runDir, 'report.md'), buildReport(question, results, judgeOutput));
|
|
29
|
+
|
|
30
|
+
// Save summary (question + judge only, token-efficient for context loading)
|
|
31
|
+
writeFileSync(join(runDir, 'summary.md'), buildSummary(question, results, judgeOutput));
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
/**
|
|
35
|
+
* Build the full report markdown.
|
|
36
|
+
*/
|
|
37
|
+
function buildReport(question, results, judgeOutput) {
|
|
38
|
+
const lines = [
|
|
39
|
+
`**问题**: ${question}`,
|
|
40
|
+
`**时间**: ${new Date().toISOString()}`,
|
|
41
|
+
'',
|
|
42
|
+
'---',
|
|
43
|
+
'',
|
|
44
|
+
];
|
|
45
|
+
|
|
46
|
+
for (const r of results) {
|
|
47
|
+
const timing = `${(r.duration_ms / 1000).toFixed(1)}s`;
|
|
48
|
+
const warn = r.error ? ` ⚠ ${r.error}` : '';
|
|
49
|
+
lines.push(`## ${r.provider} (${timing}${warn})`);
|
|
50
|
+
lines.push('');
|
|
51
|
+
lines.push(r.content || '[no content]');
|
|
52
|
+
lines.push('');
|
|
53
|
+
lines.push('---');
|
|
54
|
+
lines.push('');
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
if (judgeOutput) {
|
|
58
|
+
lines.push('# 🧠 BRAINTRUST — 智囊团融合报告');
|
|
59
|
+
lines.push('');
|
|
60
|
+
lines.push(judgeOutput);
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
return lines.join('\n');
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
/**
|
|
67
|
+
* Build the summary markdown (question + judge only, ~500-1000 chars).
|
|
68
|
+
* Used for token-efficient context loading via --context-dir.
|
|
69
|
+
*/
|
|
70
|
+
function buildSummary(question, results, judgeOutput) {
|
|
71
|
+
const lines = [
|
|
72
|
+
`**问题**: ${question}`,
|
|
73
|
+
`**时间**: ${new Date().toISOString()}`,
|
|
74
|
+
'',
|
|
75
|
+
];
|
|
76
|
+
|
|
77
|
+
if (judgeOutput) {
|
|
78
|
+
lines.push(judgeOutput);
|
|
79
|
+
} else {
|
|
80
|
+
// No judge: include key_claims from each successful model
|
|
81
|
+
for (const r of results) {
|
|
82
|
+
if (!r.error && r.key_claims.length) {
|
|
83
|
+
lines.push(`## ${r.provider}`);
|
|
84
|
+
lines.push(r.key_claims.join('\n'));
|
|
85
|
+
lines.push('');
|
|
86
|
+
}
|
|
87
|
+
}
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
return lines.join('\n');
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
module.exports = { saveArtifacts };
|
package/src/server.js
CHANGED
|
@@ -1,60 +1,267 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
2
|
+
'use strict';
|
|
3
|
+
|
|
4
|
+
/**
|
|
5
|
+
* MCP server for braintrust-lite.
|
|
6
|
+
*
|
|
7
|
+
* Exposes one tool: `consult`
|
|
8
|
+
* Runs Claude CLI, Codex CLI, and Gemini CLI in parallel, returns their
|
|
9
|
+
* responses as Model A / B / C (blind by default) for the calling agent to judge.
|
|
10
|
+
*
|
|
11
|
+
* Protocol: JSON-RPC 2.0 over stdio, line-delimited.
|
|
12
|
+
*/
|
|
13
|
+
|
|
14
|
+
const readline = require('readline');
|
|
15
|
+
const { spawn } = require('child_process');
|
|
16
|
+
const { resolve } = require('path');
|
|
17
|
+
const { version: PKG_VERSION } = require('../package.json');
|
|
18
|
+
|
|
19
|
+
const { DEFAULT_TIMEOUT_S } = require('./config.js');
|
|
20
|
+
const { getActiveProviders } = require('./providers/index.js');
|
|
21
|
+
const { normalize } = require('./normalize.js');
|
|
22
|
+
const { buildGeneratorSystem } = require('./prompts/index.js');
|
|
23
|
+
|
|
24
|
+
// ─── Process Runner ────────────────────────────────────────────────────────────
|
|
25
|
+
|
|
26
|
+
function makeRunner(timeoutMs, workDir) {
|
|
27
|
+
return function runProcess(cmd, args, opts = {}) {
|
|
28
|
+
const ac = new AbortController();
|
|
29
|
+
const cwd = opts.cwd || workDir;
|
|
30
|
+
const proc = spawn(cmd, args, { signal: ac.signal, stdio: ['ignore', 'pipe', 'pipe'], cwd });
|
|
31
|
+
let stdout = '', stderr = '';
|
|
32
|
+
proc.stdout.on('data', d => { stdout += d; });
|
|
33
|
+
proc.stderr.on('data', d => { stderr += d; });
|
|
34
|
+
const timer = setTimeout(() => ac.abort(), timeoutMs);
|
|
35
|
+
return new Promise(res => {
|
|
36
|
+
let resolved = false;
|
|
37
|
+
const done = (code, error_type = null) => {
|
|
38
|
+
if (resolved) return;
|
|
39
|
+
resolved = true;
|
|
40
|
+
clearTimeout(timer);
|
|
41
|
+
res({ stdout, stderr, code, error_type });
|
|
42
|
+
};
|
|
43
|
+
proc.on('close', code => done(code, code !== 0 ? 'nonzero' : null));
|
|
44
|
+
proc.on('error', err => {
|
|
45
|
+
if (err.name === 'AbortError') done('timeout', 'timeout');
|
|
46
|
+
else if (err.code === 'ENOENT') done(-1, 'enoent');
|
|
47
|
+
else done(-1, 'spawn_error');
|
|
48
|
+
});
|
|
49
|
+
});
|
|
50
|
+
};
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
// ─── MCP Response Helpers ──────────────────────────────────────────────────────
|
|
54
|
+
|
|
55
|
+
function respond(id, result) {
|
|
56
|
+
process.stdout.write(JSON.stringify({ jsonrpc: '2.0', id, result }) + '\n');
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
function respondError(id, code, message) {
|
|
60
|
+
process.stdout.write(JSON.stringify({ jsonrpc: '2.0', id, error: { code, message } }) + '\n');
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
// ─── Tool Schema ───────────────────────────────────────────────────────────────
|
|
7
64
|
|
|
8
65
|
const CONSULT_TOOL = {
|
|
9
66
|
name: 'consult',
|
|
10
|
-
description:
|
|
67
|
+
description:
|
|
68
|
+
'并发调用 Claude CLI、Codex CLI、Gemini CLI,以 Model A/B/C 匿名形式返回三模型独立回答,' +
|
|
69
|
+
'供主 Claude 担任 Judge 进行盲评合并。',
|
|
11
70
|
inputSchema: {
|
|
12
71
|
type: 'object',
|
|
72
|
+
required: ['prompt'],
|
|
13
73
|
properties: {
|
|
14
|
-
prompt:
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
74
|
+
prompt: {
|
|
75
|
+
type: 'string',
|
|
76
|
+
description: '问题或任务描述(建议自包含,含必要上下文)',
|
|
77
|
+
},
|
|
78
|
+
skip: {
|
|
79
|
+
type: 'array',
|
|
80
|
+
items: { type: 'string', enum: ['claude', 'codex', 'gemini'] },
|
|
81
|
+
description: '跳过指定模型(可多选)',
|
|
82
|
+
},
|
|
83
|
+
only: {
|
|
84
|
+
type: 'string',
|
|
85
|
+
enum: ['claude', 'codex', 'gemini'],
|
|
86
|
+
description: '只调用一个模型',
|
|
87
|
+
},
|
|
88
|
+
timeout_sec: {
|
|
89
|
+
type: 'number',
|
|
90
|
+
description: '每个模型超时秒数(0 = 不限时等待;默认 90)',
|
|
91
|
+
default: 90,
|
|
92
|
+
},
|
|
93
|
+
blind: {
|
|
94
|
+
type: 'boolean',
|
|
95
|
+
description: '匿名化模型名称为 A/B/C,防止位置偏置(默认 true)',
|
|
96
|
+
default: true,
|
|
97
|
+
},
|
|
98
|
+
cwd: {
|
|
99
|
+
type: 'string',
|
|
100
|
+
description: '子进程工作目录(默认:当前进程 cwd)',
|
|
101
|
+
},
|
|
20
102
|
},
|
|
21
|
-
required: ['prompt'],
|
|
22
103
|
},
|
|
23
104
|
};
|
|
24
105
|
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
106
|
+
// ─── Blind Label Assignment ────────────────────────────────────────────────────
|
|
107
|
+
|
|
108
|
+
/**
|
|
109
|
+
* Assign Model A/B/C labels in a stable but non-alphabetical order
|
|
110
|
+
* (sorted by a cheap hash of the provider name to reduce position bias).
|
|
111
|
+
*
|
|
112
|
+
* @param {Array} results - Normalized provider results
|
|
113
|
+
* @returns {Array<{label: string, result: object}>}
|
|
114
|
+
*/
|
|
115
|
+
function assignBlindLabels(results) {
|
|
116
|
+
const hash = s => [...s].reduce((acc, c) => ((acc * 31) + c.charCodeAt(0)) | 0, 0);
|
|
117
|
+
const sorted = [...results].sort((a, b) => hash(a.provider) - hash(b.provider));
|
|
118
|
+
return sorted.map((r, i) => ({ label: String.fromCharCode(65 + i), result: r }));
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
// ─── Consult Handler ──────────────────────────────────────────────────────────
|
|
122
|
+
|
|
123
|
+
async function handleConsult(args) {
|
|
124
|
+
const {
|
|
125
|
+
prompt,
|
|
126
|
+
skip = [],
|
|
127
|
+
only,
|
|
128
|
+
timeout_sec = DEFAULT_TIMEOUT_S,
|
|
129
|
+
blind = true,
|
|
130
|
+
cwd,
|
|
131
|
+
} = args;
|
|
132
|
+
|
|
133
|
+
if (!prompt || !prompt.trim()) {
|
|
134
|
+
throw new Error('prompt is required and must not be empty');
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
// No-timeout sentinel: use 10 min cap so the process eventually ends
|
|
138
|
+
const timeoutMs = timeout_sec === 0 ? 10 * 60 * 1000 : timeout_sec * 1000;
|
|
139
|
+
const workDir = cwd ? resolve(cwd) : process.cwd();
|
|
140
|
+
const runProcess = makeRunner(timeoutMs, workDir);
|
|
29
141
|
|
|
30
|
-
|
|
142
|
+
// Resolve active providers
|
|
143
|
+
const skipList = only
|
|
144
|
+
? ['claude', 'codex', 'gemini'].filter(n => n !== only)
|
|
145
|
+
: [...skip];
|
|
146
|
+
const activeProviders = getActiveProviders(skipList);
|
|
31
147
|
|
|
32
|
-
|
|
33
|
-
|
|
148
|
+
if (activeProviders.length === 0) {
|
|
149
|
+
throw new Error('No providers selected — check skip/only parameters.');
|
|
150
|
+
}
|
|
34
151
|
|
|
35
|
-
|
|
36
|
-
const
|
|
37
|
-
|
|
38
|
-
: 90_000;
|
|
152
|
+
// Build generator prompt
|
|
153
|
+
const systemPrompt = buildGeneratorSystem('general');
|
|
154
|
+
const fullPrompt = `${systemPrompt}\n\n${prompt}`;
|
|
39
155
|
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
156
|
+
// Run all providers in parallel
|
|
157
|
+
const startTimes = Object.fromEntries(activeProviders.map(p => [p.name, Date.now()]));
|
|
158
|
+
const settled = await Promise.allSettled(
|
|
159
|
+
activeProviders.map(p => runProcess(p.cmd, p.getArgs(fullPrompt)))
|
|
160
|
+
);
|
|
161
|
+
|
|
162
|
+
// Normalize results
|
|
163
|
+
const results = activeProviders.map((p, i) => {
|
|
164
|
+
const raw = settled[i].status === 'fulfilled'
|
|
165
|
+
? settled[i].value
|
|
166
|
+
: { stdout: '', stderr: '', code: -1, error_type: 'spawn_error' };
|
|
167
|
+
const ms = Date.now() - startTimes[p.name];
|
|
168
|
+
return normalize(p.name, raw, p.adapt(raw), ms);
|
|
47
169
|
});
|
|
48
170
|
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
171
|
+
const successCount = results.filter(r => !r.error).length;
|
|
172
|
+
|
|
173
|
+
// Build labeled pairs
|
|
174
|
+
const labeled = blind
|
|
175
|
+
? assignBlindLabels(results)
|
|
176
|
+
: results.map(r => ({ label: r.provider, result: r }));
|
|
177
|
+
|
|
178
|
+
// Compose text output
|
|
179
|
+
const parts = [];
|
|
180
|
+
|
|
181
|
+
for (const { label, result: r } of labeled) {
|
|
182
|
+
const header = blind ? `Model ${label}` : r.provider;
|
|
183
|
+
const timing = r.error
|
|
184
|
+
? ` ⚠ ${r.error_type || r.error}`
|
|
185
|
+
: ` (${(r.duration_ms / 1000).toFixed(1)}s, parse_score=${r.parse_score.toFixed(2)})`;
|
|
186
|
+
parts.push(`## ${header}${timing}\n\n${r.content || '[no output]'}`);
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
if (successCount < activeProviders.length) {
|
|
190
|
+
parts.push(
|
|
191
|
+
`> ⚠ **DEGRADED**: Only ${successCount}/${activeProviders.length} models responded successfully.`
|
|
192
|
+
);
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
if (blind) {
|
|
196
|
+
const mapping = labeled.map(({ label, result: r }) => `Model ${label} = ${r.provider}`).join(' · ');
|
|
197
|
+
parts.push(`---\n**REVEAL** (read after judging): ${mapping}`);
|
|
52
198
|
}
|
|
53
199
|
|
|
200
|
+
const text = parts.join('\n\n---\n\n');
|
|
201
|
+
|
|
54
202
|
return {
|
|
55
|
-
content: [{ type: 'text', text
|
|
203
|
+
content: [{ type: 'text', text }],
|
|
56
204
|
};
|
|
57
|
-
}
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
// ─── Request Dispatcher ────────────────────────────────────────────────────────
|
|
208
|
+
|
|
209
|
+
async function dispatch(req) {
|
|
210
|
+
const { id, method, params } = req;
|
|
211
|
+
|
|
212
|
+
switch (method) {
|
|
213
|
+
case 'initialize':
|
|
214
|
+
respond(id, {
|
|
215
|
+
protocolVersion: '2024-11-05',
|
|
216
|
+
capabilities: { tools: {} },
|
|
217
|
+
serverInfo: { name: 'braintrust-lite', version: PKG_VERSION },
|
|
218
|
+
});
|
|
219
|
+
return;
|
|
220
|
+
|
|
221
|
+
case 'notifications/initialized':
|
|
222
|
+
return; // no-op, no response needed
|
|
223
|
+
|
|
224
|
+
case 'tools/list':
|
|
225
|
+
respond(id, { tools: [CONSULT_TOOL] });
|
|
226
|
+
return;
|
|
227
|
+
|
|
228
|
+
case 'tools/call': {
|
|
229
|
+
const toolName = params && params.name;
|
|
230
|
+
if (toolName !== 'consult') {
|
|
231
|
+
respondError(id, -32601, `Unknown tool: ${toolName}`);
|
|
232
|
+
return;
|
|
233
|
+
}
|
|
234
|
+
try {
|
|
235
|
+
const result = await handleConsult(params.arguments || {});
|
|
236
|
+
respond(id, result);
|
|
237
|
+
} catch (err) {
|
|
238
|
+
respondError(id, -32603, err.message);
|
|
239
|
+
}
|
|
240
|
+
return;
|
|
241
|
+
}
|
|
242
|
+
|
|
243
|
+
default:
|
|
244
|
+
// Only send error for requests (have an id), not notifications
|
|
245
|
+
if (id !== undefined && id !== null) {
|
|
246
|
+
respondError(id, -32601, `Method not found: ${method}`);
|
|
247
|
+
}
|
|
248
|
+
}
|
|
249
|
+
}
|
|
250
|
+
|
|
251
|
+
// ─── Entry Point ──────────────────────────────────────────────────────────────
|
|
252
|
+
|
|
253
|
+
function main() {
|
|
254
|
+
const rl = readline.createInterface({ input: process.stdin, crlfDelay: Infinity });
|
|
255
|
+
|
|
256
|
+
rl.on('line', line => {
|
|
257
|
+
const trimmed = line.trim();
|
|
258
|
+
if (!trimmed) return;
|
|
259
|
+
let req;
|
|
260
|
+
try { req = JSON.parse(trimmed); } catch { return; }
|
|
261
|
+
dispatch(req).catch(err => process.stderr.write(`[server error] ${err.message}\n`));
|
|
262
|
+
});
|
|
263
|
+
|
|
264
|
+
rl.on('close', () => process.exit(0));
|
|
265
|
+
}
|
|
58
266
|
|
|
59
|
-
|
|
60
|
-
await server.connect(transport);
|
|
267
|
+
main();
|
package/LICENSE
DELETED
|
@@ -1,21 +0,0 @@
|
|
|
1
|
-
MIT License
|
|
2
|
-
|
|
3
|
-
Copyright (c) 2026 HongjieRen
|
|
4
|
-
|
|
5
|
-
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
-
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
-
in the Software without restriction, including without limitation the rights
|
|
8
|
-
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
-
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
-
furnished to do so, subject to the following conditions:
|
|
11
|
-
|
|
12
|
-
The above copyright notice and this permission notice shall be included in all
|
|
13
|
-
copies or substantial portions of the Software.
|
|
14
|
-
|
|
15
|
-
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
-
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
-
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
-
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
-
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
-
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
-
SOFTWARE.
|