npm - braintrust-lite - Versions diffs - 0.1.4 → 0.1.6 - Mend

braintrust-lite 0.1.4 → 0.1.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

package/bin/consult CHANGED Viewed

@@ -32,7 +32,7 @@ function printHelp() {
        cat file | consult "explain this"
 Options:
-  --only <model>     Only run one model: codex | gemini
+  --only <model>     Only run one model: codex | gemini | claude
   --skip <model>     Skip a model (repeatable)
   --timeout <sec>    Per-model timeout in seconds (default: 90)
   --dir <path>       Working directory for CLI subprocesses
@@ -56,12 +56,13 @@ if (!prompt) {
 // ─── Run ─────────────────────────────────────────────────────────────────────
-const results = await consult({
+const { results, mapping } = await consult({
   prompt,
   only: flags.only,
   skip: flags.skip,
-  timeoutMs: flags.timeout ? flags.timeout * 1000 : 90_000,
+  timeoutMs: flags.timeout != null ? flags.timeout * 1000 : 90_000,
   cwd: flags.dir ? resolve(flags.dir) : undefined,
+  blind: !flags.only, // blind mode only useful when multiple providers run
 });
 // Progress summary to stderr
@@ -72,7 +73,7 @@ for (const r of results) {
 // Output to stdout
 if (flags.json) {
-  console.log(formatAsJson(prompt, results));
+  console.log(formatAsJson(prompt, results, mapping));
 } else {
-  console.log('\n' + formatAsMarkdown(results));
+  console.log('\n' + formatAsMarkdown(results, mapping));
 }

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "braintrust-lite",
-  "version": "0.1.4",
+  "version": "0.1.6",
   "description": "Lightweight multi-model advisor for Claude Code — parallel Codex + Gemini consultation via MCP",
   "type": "module",
   "bin": {

package/src/consult.js CHANGED Viewed

@@ -43,11 +43,17 @@ function shuffle(arr) {
 /**
  * Replace provider names with anonymous labels (Model A, B, C…).
  * Order is randomised so the judge cannot infer identity from position.
+ * Returns { results: anonymized array, mapping: { 'Model A': 'gemini', … } }
  */
 function anonymize(results) {
   const labels = ['Model A', 'Model B', 'Model C', 'Model D', 'Model E'];
   const shuffled = shuffle([...results]);
-  return shuffled.map((r, i) => ({ ...r, provider: labels[i] }));
+  const mapping = {};
+  const anonymized = shuffled.map((r, i) => {
+    mapping[labels[i]] = r.provider;
+    return { ...r, provider: labels[i] };
+  });
+  return { results: anonymized, mapping };
 }
 /**
@@ -77,8 +83,8 @@ async function runOne(name, prompt, { cwd, timeoutMs }) {
  * @param {string[]} [opts.skip]       - Providers to skip.
  * @param {number}   [opts.timeoutMs]  - Per-provider timeout in ms (default 90 000). 0 = no timeout.
  * @param {string}   [opts.cwd]        - Working directory for subprocesses.
- * @param {boolean}  [opts.blind]      - Anonymise provider names in output (default true).
- * @returns {Promise<Array<{provider, content, duration_ms, error}>>}
+ * @param {boolean}  [opts.blind]      - Anonymise provider names (default true).
+ * @returns {Promise<{ results: Array, mapping: object|null }>}
  */
 export async function consult({ prompt, only, skip = [], timeoutMs = 90_000, cwd, blind = true } = {}) {
   const targets = Object.keys(PROVIDERS)
@@ -99,5 +105,8 @@ export async function consult({ prompt, only, skip = [], timeoutMs = 90_000, cwd
       : { provider: name, content: '', duration_ms: 0, error: settled[i].reason?.message ?? 'unknown' }
   );
-  return blind ? anonymize(results) : results;
+  if (blind) {
+    return anonymize(results);
+  }
+  return { results, mapping: null };
 }

package/src/format.js CHANGED Viewed

@@ -1,20 +1,42 @@
 /**
  * Format an array of provider results as human-readable Markdown.
  * Each provider gets a ## header with timing (or error), then its content.
+ *
+ * When a mapping is provided (blind mode), a REVEAL section is appended
+ * at the end so the Judge can disclose model identity to the user AFTER
+ * completing their evaluation.
  */
-export function formatAsMarkdown(results) {
-  return results.map(r => {
+export function formatAsMarkdown(results, mapping = null) {
+  const body = results.map(r => {
     const label = r.error
-      ? `## ${r.provider.toUpperCase()} (${r.error})`
-      : `## ${r.provider.toUpperCase()} (${(r.duration_ms / 1000).toFixed(1)}s)`;
-    const body = r.error ? `*调用失败: ${r.error}*` : r.content;
-    return `${label}\n\n${body}`;
+      ? `## ${r.provider} (${r.error})`
+      : `## ${r.provider} (${(r.duration_ms / 1000).toFixed(1)}s)`;
+    const content = r.error ? `*调用失败: ${r.error}*` : r.content;
+    return `${label}\n\n${content}`;
   }).join('\n\n---\n\n');
+  if (!mapping) return body;
+  const reveal = Object.entries(mapping)
+    .map(([label, provider]) => `| ${label} | **${provider}** |`)
+    .join('\n');
+  return `${body}
+---
+## 🔒 REVEAL — 仅在完成评估后阅读
+> **Judge 指令**：请先完成你的完整评估和综合输出，再阅读以下映射表，并在回复末尾告知用户每个模型对应的真实身份。
+| 匿名标签 | 真实模型 |
+|---------|---------|
+${reveal}`;
 }
 /**
  * Format results as a compact JSON string for programmatic consumption.
  */
-export function formatAsJson(prompt, results) {
-  return JSON.stringify({ prompt, results }, null, 2);
+export function formatAsJson(prompt, results, mapping = null) {
+  return JSON.stringify({ prompt, results, mapping }, null, 2);
 }

package/src/server.js CHANGED Viewed

@@ -68,7 +68,7 @@ server.setRequestHandler(CallToolRequestSchema, async req => {
   }
   const args = req.params.arguments ?? {};
-  const results = await consult({
+  const { results, mapping } = await consult({
     prompt: String(args.prompt ?? ''),
     only: args.only,
     skip: Array.isArray(args.skip) ? args.skip : [],
@@ -84,7 +84,7 @@ server.setRequestHandler(CallToolRequestSchema, async req => {
   }
   return {
-    content: [{ type: 'text', text: formatAsMarkdown(results) }],
+    content: [{ type: 'text', text: formatAsMarkdown(results, mapping) }],
   };
 });