braintrust-lite 0.1.4 → 0.1.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/consult +6 -5
- package/package.json +1 -1
- package/src/consult.js +13 -4
- package/src/format.js +30 -8
- package/src/server.js +2 -2
package/bin/consult
CHANGED
|
@@ -32,7 +32,7 @@ function printHelp() {
|
|
|
32
32
|
cat file | consult "explain this"
|
|
33
33
|
|
|
34
34
|
Options:
|
|
35
|
-
--only <model> Only run one model: codex | gemini
|
|
35
|
+
--only <model> Only run one model: codex | gemini | claude
|
|
36
36
|
--skip <model> Skip a model (repeatable)
|
|
37
37
|
--timeout <sec> Per-model timeout in seconds (default: 90)
|
|
38
38
|
--dir <path> Working directory for CLI subprocesses
|
|
@@ -56,12 +56,13 @@ if (!prompt) {
|
|
|
56
56
|
|
|
57
57
|
// ─── Run ─────────────────────────────────────────────────────────────────────
|
|
58
58
|
|
|
59
|
-
const results = await consult({
|
|
59
|
+
const { results, mapping } = await consult({
|
|
60
60
|
prompt,
|
|
61
61
|
only: flags.only,
|
|
62
62
|
skip: flags.skip,
|
|
63
|
-
timeoutMs: flags.timeout ? flags.timeout * 1000 : 90_000,
|
|
63
|
+
timeoutMs: flags.timeout != null ? flags.timeout * 1000 : 90_000,
|
|
64
64
|
cwd: flags.dir ? resolve(flags.dir) : undefined,
|
|
65
|
+
blind: !flags.only, // blind mode only useful when multiple providers run
|
|
65
66
|
});
|
|
66
67
|
|
|
67
68
|
// Progress summary to stderr
|
|
@@ -72,7 +73,7 @@ for (const r of results) {
|
|
|
72
73
|
|
|
73
74
|
// Output to stdout
|
|
74
75
|
if (flags.json) {
|
|
75
|
-
console.log(formatAsJson(prompt, results));
|
|
76
|
+
console.log(formatAsJson(prompt, results, mapping));
|
|
76
77
|
} else {
|
|
77
|
-
console.log('\n' + formatAsMarkdown(results));
|
|
78
|
+
console.log('\n' + formatAsMarkdown(results, mapping));
|
|
78
79
|
}
|
package/package.json
CHANGED
package/src/consult.js
CHANGED
|
@@ -43,11 +43,17 @@ function shuffle(arr) {
|
|
|
43
43
|
/**
|
|
44
44
|
* Replace provider names with anonymous labels (Model A, B, C…).
|
|
45
45
|
* Order is randomised so the judge cannot infer identity from position.
|
|
46
|
+
* Returns { results: anonymized array, mapping: { 'Model A': 'gemini', … } }
|
|
46
47
|
*/
|
|
47
48
|
function anonymize(results) {
|
|
48
49
|
const labels = ['Model A', 'Model B', 'Model C', 'Model D', 'Model E'];
|
|
49
50
|
const shuffled = shuffle([...results]);
|
|
50
|
-
|
|
51
|
+
const mapping = {};
|
|
52
|
+
const anonymized = shuffled.map((r, i) => {
|
|
53
|
+
mapping[labels[i]] = r.provider;
|
|
54
|
+
return { ...r, provider: labels[i] };
|
|
55
|
+
});
|
|
56
|
+
return { results: anonymized, mapping };
|
|
51
57
|
}
|
|
52
58
|
|
|
53
59
|
/**
|
|
@@ -77,8 +83,8 @@ async function runOne(name, prompt, { cwd, timeoutMs }) {
|
|
|
77
83
|
* @param {string[]} [opts.skip] - Providers to skip.
|
|
78
84
|
* @param {number} [opts.timeoutMs] - Per-provider timeout in ms (default 90 000). 0 = no timeout.
|
|
79
85
|
* @param {string} [opts.cwd] - Working directory for subprocesses.
|
|
80
|
-
* @param {boolean} [opts.blind] - Anonymise provider names
|
|
81
|
-
* @returns {Promise<
|
|
86
|
+
* @param {boolean} [opts.blind] - Anonymise provider names (default true).
|
|
87
|
+
* @returns {Promise<{ results: Array, mapping: object|null }>}
|
|
82
88
|
*/
|
|
83
89
|
export async function consult({ prompt, only, skip = [], timeoutMs = 90_000, cwd, blind = true } = {}) {
|
|
84
90
|
const targets = Object.keys(PROVIDERS)
|
|
@@ -99,5 +105,8 @@ export async function consult({ prompt, only, skip = [], timeoutMs = 90_000, cwd
|
|
|
99
105
|
: { provider: name, content: '', duration_ms: 0, error: settled[i].reason?.message ?? 'unknown' }
|
|
100
106
|
);
|
|
101
107
|
|
|
102
|
-
|
|
108
|
+
if (blind) {
|
|
109
|
+
return anonymize(results);
|
|
110
|
+
}
|
|
111
|
+
return { results, mapping: null };
|
|
103
112
|
}
|
package/src/format.js
CHANGED
|
@@ -1,20 +1,42 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* Format an array of provider results as human-readable Markdown.
|
|
3
3
|
* Each provider gets a ## header with timing (or error), then its content.
|
|
4
|
+
*
|
|
5
|
+
* When a mapping is provided (blind mode), a REVEAL section is appended
|
|
6
|
+
* at the end so the Judge can disclose model identity to the user AFTER
|
|
7
|
+
* completing their evaluation.
|
|
4
8
|
*/
|
|
5
|
-
export function formatAsMarkdown(results) {
|
|
6
|
-
|
|
9
|
+
export function formatAsMarkdown(results, mapping = null) {
|
|
10
|
+
const body = results.map(r => {
|
|
7
11
|
const label = r.error
|
|
8
|
-
? `## ${r.provider
|
|
9
|
-
: `## ${r.provider
|
|
10
|
-
const
|
|
11
|
-
return `${label}\n\n${
|
|
12
|
+
? `## ${r.provider} (${r.error})`
|
|
13
|
+
: `## ${r.provider} (${(r.duration_ms / 1000).toFixed(1)}s)`;
|
|
14
|
+
const content = r.error ? `*调用失败: ${r.error}*` : r.content;
|
|
15
|
+
return `${label}\n\n${content}`;
|
|
12
16
|
}).join('\n\n---\n\n');
|
|
17
|
+
|
|
18
|
+
if (!mapping) return body;
|
|
19
|
+
|
|
20
|
+
const reveal = Object.entries(mapping)
|
|
21
|
+
.map(([label, provider]) => `| ${label} | **${provider}** |`)
|
|
22
|
+
.join('\n');
|
|
23
|
+
|
|
24
|
+
return `${body}
|
|
25
|
+
|
|
26
|
+
---
|
|
27
|
+
|
|
28
|
+
## 🔒 REVEAL — 仅在完成评估后阅读
|
|
29
|
+
|
|
30
|
+
> **Judge 指令**:请先完成你的完整评估和综合输出,再阅读以下映射表,并在回复末尾告知用户每个模型对应的真实身份。
|
|
31
|
+
|
|
32
|
+
| 匿名标签 | 真实模型 |
|
|
33
|
+
|---------|---------|
|
|
34
|
+
${reveal}`;
|
|
13
35
|
}
|
|
14
36
|
|
|
15
37
|
/**
|
|
16
38
|
* Format results as a compact JSON string for programmatic consumption.
|
|
17
39
|
*/
|
|
18
|
-
export function formatAsJson(prompt, results) {
|
|
19
|
-
return JSON.stringify({ prompt, results }, null, 2);
|
|
40
|
+
export function formatAsJson(prompt, results, mapping = null) {
|
|
41
|
+
return JSON.stringify({ prompt, results, mapping }, null, 2);
|
|
20
42
|
}
|
package/src/server.js
CHANGED
|
@@ -68,7 +68,7 @@ server.setRequestHandler(CallToolRequestSchema, async req => {
|
|
|
68
68
|
}
|
|
69
69
|
|
|
70
70
|
const args = req.params.arguments ?? {};
|
|
71
|
-
const results = await consult({
|
|
71
|
+
const { results, mapping } = await consult({
|
|
72
72
|
prompt: String(args.prompt ?? ''),
|
|
73
73
|
only: args.only,
|
|
74
74
|
skip: Array.isArray(args.skip) ? args.skip : [],
|
|
@@ -84,7 +84,7 @@ server.setRequestHandler(CallToolRequestSchema, async req => {
|
|
|
84
84
|
}
|
|
85
85
|
|
|
86
86
|
return {
|
|
87
|
-
content: [{ type: 'text', text: formatAsMarkdown(results) }],
|
|
87
|
+
content: [{ type: 'text', text: formatAsMarkdown(results, mapping) }],
|
|
88
88
|
};
|
|
89
89
|
});
|
|
90
90
|
|