braintrust-lite 0.1.3 → 0.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/consult.js +36 -5
- package/src/providers.js +10 -0
- package/src/server.js +7 -2
package/package.json
CHANGED
package/src/consult.js
CHANGED
|
@@ -2,8 +2,10 @@ import {
|
|
|
2
2
|
runProcess,
|
|
3
3
|
adaptCodex,
|
|
4
4
|
adaptGemini,
|
|
5
|
+
adaptClaude,
|
|
5
6
|
CODEX_ARGS_PREFIX,
|
|
6
7
|
GEMINI_ARGS_PREFIX,
|
|
8
|
+
CLAUDE_ARGS_PREFIX,
|
|
7
9
|
} from './providers.js';
|
|
8
10
|
|
|
9
11
|
const SYSTEM_PROMPT = `你是一个独立思考的高级专家。请基于自己的判断给出高质量、可执行的回答。
|
|
@@ -20,8 +22,34 @@ const PROVIDERS = {
|
|
|
20
22
|
buildArgs: prompt => ['-p', `${SYSTEM_PROMPT}\n\n${prompt}`, ...GEMINI_ARGS_PREFIX],
|
|
21
23
|
adapt: adaptGemini,
|
|
22
24
|
},
|
|
25
|
+
claude: {
|
|
26
|
+
cmd: 'claude',
|
|
27
|
+
buildArgs: prompt => [...CLAUDE_ARGS_PREFIX, `${SYSTEM_PROMPT}\n\n${prompt}`],
|
|
28
|
+
adapt: adaptClaude,
|
|
29
|
+
},
|
|
23
30
|
};
|
|
24
31
|
|
|
32
|
+
/**
|
|
33
|
+
* Shuffle an array in-place using Fisher-Yates and return it.
|
|
34
|
+
*/
|
|
35
|
+
function shuffle(arr) {
|
|
36
|
+
for (let i = arr.length - 1; i > 0; i--) {
|
|
37
|
+
const j = Math.floor(Math.random() * (i + 1));
|
|
38
|
+
[arr[i], arr[j]] = [arr[j], arr[i]];
|
|
39
|
+
}
|
|
40
|
+
return arr;
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
/**
|
|
44
|
+
* Replace provider names with anonymous labels (Model A, B, C…).
|
|
45
|
+
* Order is randomised so the judge cannot infer identity from position.
|
|
46
|
+
*/
|
|
47
|
+
function anonymize(results) {
|
|
48
|
+
const labels = ['Model A', 'Model B', 'Model C', 'Model D', 'Model E'];
|
|
49
|
+
const shuffled = shuffle([...results]);
|
|
50
|
+
return shuffled.map((r, i) => ({ ...r, provider: labels[i] }));
|
|
51
|
+
}
|
|
52
|
+
|
|
25
53
|
/**
|
|
26
54
|
* Run a single provider and return a normalized result object.
|
|
27
55
|
* Never throws — errors are captured in the `error` field.
|
|
@@ -41,17 +69,18 @@ async function runOne(name, prompt, { cwd, timeoutMs }) {
|
|
|
41
69
|
}
|
|
42
70
|
|
|
43
71
|
/**
|
|
44
|
-
* Consult Codex and/or
|
|
72
|
+
* Consult Codex, Gemini, and/or Claude in parallel.
|
|
45
73
|
*
|
|
46
74
|
* @param {object} opts
|
|
47
75
|
* @param {string} opts.prompt - The question to ask.
|
|
48
|
-
* @param {string} [opts.only] - 'codex' | 'gemini' — only run this one.
|
|
76
|
+
* @param {string} [opts.only] - 'codex' | 'gemini' | 'claude' — only run this one.
|
|
49
77
|
* @param {string[]} [opts.skip] - Providers to skip.
|
|
50
|
-
* @param {number} [opts.timeoutMs] - Per-provider timeout in ms (default 90 000).
|
|
78
|
+
* @param {number} [opts.timeoutMs] - Per-provider timeout in ms (default 90 000). 0 = no timeout.
|
|
51
79
|
* @param {string} [opts.cwd] - Working directory for subprocesses.
|
|
80
|
+
* @param {boolean} [opts.blind] - Anonymise provider names in output (default true).
|
|
52
81
|
* @returns {Promise<Array<{provider, content, duration_ms, error}>>}
|
|
53
82
|
*/
|
|
54
|
-
export async function consult({ prompt, only, skip = [], timeoutMs = 90_000, cwd } = {}) {
|
|
83
|
+
export async function consult({ prompt, only, skip = [], timeoutMs = 90_000, cwd, blind = true } = {}) {
|
|
55
84
|
const targets = Object.keys(PROVIDERS)
|
|
56
85
|
.filter(name => (only ? name === only : true))
|
|
57
86
|
.filter(name => !skip.includes(name));
|
|
@@ -64,9 +93,11 @@ export async function consult({ prompt, only, skip = [], timeoutMs = 90_000, cwd
|
|
|
64
93
|
targets.map(name => runOne(name, prompt, { cwd, timeoutMs }))
|
|
65
94
|
);
|
|
66
95
|
|
|
67
|
-
|
|
96
|
+
const results = targets.map((name, i) =>
|
|
68
97
|
settled[i].status === 'fulfilled'
|
|
69
98
|
? settled[i].value
|
|
70
99
|
: { provider: name, content: '', duration_ms: 0, error: settled[i].reason?.message ?? 'unknown' }
|
|
71
100
|
);
|
|
101
|
+
|
|
102
|
+
return blind ? anonymize(results) : results;
|
|
72
103
|
}
|
package/src/providers.js
CHANGED
|
@@ -4,6 +4,7 @@ import { spawn } from 'child_process';
|
|
|
4
4
|
|
|
5
5
|
export const CODEX_ARGS_PREFIX = ['exec', '--json', '--skip-git-repo-check', '--ephemeral'];
|
|
6
6
|
export const GEMINI_ARGS_PREFIX = ['-o', 'json'];
|
|
7
|
+
export const CLAUDE_ARGS_PREFIX = ['--output-format', 'json', '-p'];
|
|
7
8
|
|
|
8
9
|
// ─── Process runner ───────────────────────────────────────────────────────────
|
|
9
10
|
|
|
@@ -76,3 +77,12 @@ export function adaptGemini(raw) {
|
|
|
76
77
|
} catch { /* fall through */ }
|
|
77
78
|
return fallback(raw.stdout);
|
|
78
79
|
}
|
|
80
|
+
|
|
81
|
+
/** Parse Claude CLI JSON output → extract .result text. */
|
|
82
|
+
export function adaptClaude(raw) {
|
|
83
|
+
try {
|
|
84
|
+
const j = JSON.parse(raw.stdout);
|
|
85
|
+
if (typeof j.result === 'string') return { content: j.result, parse_mode: 'json' };
|
|
86
|
+
} catch { /* fall through */ }
|
|
87
|
+
return fallback(raw.stdout);
|
|
88
|
+
}
|
package/src/server.js
CHANGED
|
@@ -26,18 +26,22 @@ const CONSULT_TOOL = {
|
|
|
26
26
|
},
|
|
27
27
|
only: {
|
|
28
28
|
type: 'string',
|
|
29
|
-
enum: ['codex', 'gemini'],
|
|
29
|
+
enum: ['codex', 'gemini', 'claude'],
|
|
30
30
|
description: '只调用指定一个模型(省成本或调试)。',
|
|
31
31
|
},
|
|
32
32
|
skip: {
|
|
33
33
|
type: 'array',
|
|
34
|
-
items: { type: 'string', enum: ['codex', 'gemini'] },
|
|
34
|
+
items: { type: 'string', enum: ['codex', 'gemini', 'claude'] },
|
|
35
35
|
description: '跳过指定模型列表。',
|
|
36
36
|
},
|
|
37
37
|
timeout_sec: {
|
|
38
38
|
type: 'number',
|
|
39
39
|
description: '每个模型的超时秒数,默认 90。传 0 表示不限时,等待直到完成。',
|
|
40
40
|
},
|
|
41
|
+
blind: {
|
|
42
|
+
type: 'boolean',
|
|
43
|
+
description: '匿名化 provider 名称(默认 true)。结果以 Model A/B/C 呈现,顺序随机打乱,Judge 无法通过名字或位置判断来源。传 false 可查看真实模型名称。',
|
|
44
|
+
},
|
|
41
45
|
cwd: {
|
|
42
46
|
type: 'string',
|
|
43
47
|
description: '子进程工作目录,默认继承 MCP server 的 cwd。',
|
|
@@ -69,6 +73,7 @@ server.setRequestHandler(CallToolRequestSchema, async req => {
|
|
|
69
73
|
only: args.only,
|
|
70
74
|
skip: Array.isArray(args.skip) ? args.skip : [],
|
|
71
75
|
timeoutMs: args.timeout_sec != null ? Number(args.timeout_sec) * 1000 : 90_000,
|
|
76
|
+
blind: args.blind !== false,
|
|
72
77
|
cwd: args.cwd,
|
|
73
78
|
});
|
|
74
79
|
|