npm - braintrust-lite - Versions diffs - 0.1.7 → 0.1.8 - Mend

braintrust-lite 0.1.7 → 0.1.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (30) hide show

package/README.md +73 -102
package/bin/braintrust +12 -0
package/package.json +20 -20
package/skills/consult/SKILL.md +2 -2
package/src/config.js +60 -0
package/src/doctor.js +120 -0
package/src/format.js +26 -49
package/src/judge.js +87 -0
package/src/main.js +332 -0
package/src/memory/db.js +183 -0
package/src/memory/index.js +31 -0
package/src/normalize.js +172 -0
package/src/normalize.test.js +125 -0
package/src/prompts/architecture.md +21 -0
package/src/prompts/code.md +21 -0
package/src/prompts/general.md +22 -0
package/src/prompts/index.js +49 -0
package/src/prompts/writing.md +21 -0
package/src/providers/claude.js +45 -0
package/src/providers/codex.js +69 -0
package/src/providers/gemini.js +81 -0
package/src/providers/index.js +22 -0
package/src/reflector.js +244 -0
package/src/save.js +93 -0
package/src/server.js +245 -38
package/LICENSE +0 -21
package/bin/consult +0 -79
package/scripts/setup.js +0 -66
package/src/consult.js +0 -81
package/src/providers.js +0 -91

package/src/normalize.js ADDED Viewed

@@ -0,0 +1,172 @@
+'use strict';
+// Known section tags in both Chinese and English variants
+const KNOWN_TAGS = [
+  '核心结论', '详细方案', '关键假设', '风险与不确定性',
+  'Key Claims', 'Details', 'Assumptions', 'Risks',
+];
+// Build a regex that matches any known tag, with optional markdown decoration
+// e.g. [核心结论], **[核心结论]**, **核心结论**, ## 核心结论, ### Key Claims
+const TAG_PATTERN = (() => {
+  const escaped = KNOWN_TAGS.map(t => t.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'));
+  return new RegExp(
+    `(?:\\*{1,2})?\\[?(${escaped.join('|')})\\]?(?:\\*{1,2})?`,
+    'g'
+  );
+})();
+/**
+ * Find all tag positions in text, returning [{tag, start}] sorted by start.
+ */
+function findTagPositions(text) {
+  const positions = [];
+  const re = new RegExp(TAG_PATTERN.source, 'gi');
+  let m;
+  while ((m = re.exec(text)) !== null) {
+    // Only record first occurrence of each tag
+    const tag = m[1];
+    if (!positions.find(p => p.tag === tag)) {
+      positions.push({ tag, start: m.index, end: m.index + m[0].length });
+    }
+  }
+  return positions.sort((a, b) => a.start - b.start);
+}
+/**
+ * Clean a single line: strip markdown noise without losing content.
+ */
+function cleanLine(line) {
+  return line
+    .replace(/^[-─—]{3,}\s*$/, '')         // pure separator lines → empty
+    .replace(/^#+\s+/, '')                  // markdown headings prefix
+    .replace(/^\*{1,2}(.*?)\*{1,2}$/, '$1') // **bold** wrappers
+    .replace(/^[-*•]\s+/, '')               // list bullets
+    .trim();
+}
+/**
+ * Return true if a line looks like structured content (list item, numbered,
+ * contains a colon, or starts with a bracket). Used to detect when trailing
+ * conversational prose begins after a blank gap in the last section.
+ */
+function isStructuredLine(line) {
+  return /^[-*•\d]/.test(line) ||  // list/numbered
+    line.includes(':') || line.includes('：') || // has colon
+    /^[[\(（【]/.test(line);         // starts with bracket
+}
+/**
+ * Extract lines from a named section of the text.
+ * Handles:
+ * - Chinese tags: [核心结论], **核心结论**, **[核心结论]**
+ * - English tags: [Key Claims], **Key Claims**
+ * - Markdown headings: ## 核心结论
+ * - Separator noise: --- lines removed
+ * - Bold list items: **item** stripped to plain text
+ * - Trailing conversational prose (codex): stops at blank + non-structured line
+ *
+ * @param {string} text
+ * @param {string} tag - One of KNOWN_TAGS
+ * @returns {string[]} Non-empty lines in that section
+ */
+function extractSection(text, tag) {
+  const positions = findTagPositions(text);
+  const entry = positions.find(p => p.tag === tag);
+  if (!entry) return [];
+  // Section runs from after the tag header to the start of the next known tag
+  const nextEntry = positions.find(p => p.start > entry.start);
+  const sectionText = nextEntry
+    ? text.slice(entry.end, nextEntry.start)
+    : text.slice(entry.end);
+  const isLastSection = !nextEntry;
+  const result = [];
+  let seenContent = false;
+  let afterBlankGap = false;
+  for (const raw of sectionText.split('\n')) {
+    const line = cleanLine(raw);
+    if (!line) {
+      if (seenContent) afterBlankGap = true;
+      continue;
+    }
+    // For the last section: stop when we encounter prose after a blank gap.
+    // This prevents codex trailing dialogue ("如果你需要更多帮助") from leaking in.
+    if (isLastSection && afterBlankGap && !isStructuredLine(line)) {
+      break;
+    }
+    result.push(line);
+    seenContent = true;
+    afterBlankGap = false;
+  }
+  return result;
+}
+/**
+ * Compute a parse quality score for a normalized result.
+ * Each known output section (key_claims, assumptions, risks) worth 0.25.
+ * Full content present worth 0.25. Fallback mode penalizes -0.2.
+ * Result clipped to [0, 1].
+ *
+ * @param {object} r - normalized result object
+ * @returns {number} score in [0, 1]
+ */
+function parseScore(r) {
+  let score = 0;
+  if (r.key_claims && r.key_claims.length > 0) score += 0.25;
+  if (r.assumptions && r.assumptions.length > 0) score += 0.25;
+  if (r.risks && r.risks.length > 0) score += 0.25;
+  if (r.content && r.content.length > 50) score += 0.25;
+  if (r.parse_mode === 'fallback') score -= 0.2;
+  return Math.max(0, Math.min(1, score));
+}
+/**
+ * Normalize raw provider output into a structured result.
+ */
+function normalize(provider, raw, adapted, durationMs) {
+  const { content, model, parse_mode } = adapted;
+  const r = {
+    provider,
+    model,
+    content,
+    key_claims: extractSection(content, '核心结论'),
+    detailed: extractSection(content, '详细方案'),
+    assumptions: extractSection(content, '关键假设'),
+    risks: extractSection(content, '风险与不确定性'),
+    duration_ms: durationMs,
+    parse_mode,
+    error_type: raw.error_type || null,
+    error: raw.error_type === 'enoent' ? 'not installed'
+      : raw.error_type === 'timeout' ? 'timeout'
+      : raw.error_type === 'nonzero' ? `exit ${raw.code}`
+      : raw.error_type ? raw.error_type
+      : null,
+    judge_score: null,
+    lessons: [],
+  };
+  r.parse_score = parseScore(r);
+  return r;
+}
+/**
+ * Build a token-efficient summary of a normalized result for the judge prompt.
+ */
+function summarize(r) {
+  const claims = r.key_claims.length ? r.key_claims.slice(0, 5).join('\n') : r.content.slice(0, 600);
+  const risks = r.risks.slice(0, 3).join('\n');
+  const assumptions = r.assumptions.slice(0, 3).join('\n');
+  return [
+    `【核心结论】\n${claims}`,
+    risks ? `【风险】\n${risks}` : '',
+    assumptions ? `【假设】\n${assumptions}` : '',
+  ].filter(Boolean).join('\n\n');
+}
+module.exports = { extractSection, normalize, summarize, parseScore, KNOWN_TAGS };

package/src/normalize.test.js ADDED Viewed

@@ -0,0 +1,125 @@
+'use strict';
+const { test } = require('node:test');
+const assert = require('node:assert/strict');
+const { extractSection } = require('./normalize.js');
+// ─── Fixture 1: Gemini bold headers ──────────────────────────────────────────
+// Gemini often wraps section tags in ** bold **
+test('Fixture 1: gemini **bold** tag format', () => {
+  const text = `
+**[核心结论]**
+结论一：使用 Redis Cluster
+结论二：读写分离
+**[详细方案]**
+1. 部署 3 主 3 从
+**[关键假设]**
+假设一：QPS < 100k
+**[风险与不确定性]**
+风险：网络分区问题
+`;
+  const claims = extractSection(text, '核心结论');
+  assert.ok(claims.length >= 2, `Expected ≥2 claims, got ${claims.length}: ${JSON.stringify(claims)}`);
+  assert.ok(claims.some(c => c.includes('Redis')), 'Should contain Redis claim');
+  assert.ok(claims.some(c => c.includes('读写分离')), 'Should contain 读写分离 claim');
+  const risks = extractSection(text, '风险与不确定性');
+  assert.ok(risks.length >= 1, `Expected ≥1 risk, got ${risks.length}`);
+  assert.ok(risks.some(r => r.includes('网络分区')), 'Should contain 网络分区 risk');
+});
+// ─── Fixture 2: Codex dialogue tail contamination ─────────────────────────────
+// Codex sometimes includes conversational text after the structured output
+test('Fixture 2: codex trailing dialogue noise', () => {
+  const text = `
+[核心结论]
+结论一：采用微服务架构
+结论二：使用 Kubernetes
+[详细方案]
+详细内容在这里
+[关键假设]
+假设：团队有 K8s 经验
+[风险与不确定性]
+风险：运维复杂度高
+如果你需要更多帮助，请告诉我！
+我可以进一步解释任何部分。
+`;
+  const claims = extractSection(text, '核心结论');
+  assert.ok(claims.length >= 2, `Expected ≥2 claims, got ${claims.length}`);
+  assert.ok(!claims.some(c => c.includes('告诉我')), 'Should not include trailing dialogue');
+  const risks = extractSection(text, '风险与不确定性');
+  assert.ok(risks.length >= 1, `Expected ≥1 risk, got ${risks.length}`);
+  assert.ok(!risks.some(r => r.includes('告诉我')), 'Trailing dialogue should not leak into risks');
+});
+// ─── Fixture 3: --- separator contamination ───────────────────────────────────
+// Some models include --- separator lines inside sections
+test('Fixture 3: --- separator noise removal', () => {
+  const text = `
+[核心结论]
+---
+结论一：选择 PostgreSQL
+---
+结论二：使用连接池
+---
+[关键假设]
+---
+假设：单机即可满足需求
+[风险与不确定性]
+---
+风险一：数据量超预期
+风险二：并发瓶颈
+`;
+  const claims = extractSection(text, '核心结论');
+  assert.ok(claims.length >= 2, `Expected ≥2 claims after stripping ---, got ${claims.length}: ${JSON.stringify(claims)}`);
+  assert.ok(!claims.some(c => c === '---'), 'Should not contain raw --- separators');
+  assert.ok(claims.some(c => c.includes('PostgreSQL')), 'Should contain PostgreSQL claim');
+  const risks = extractSection(text, '风险与不确定性');
+  assert.ok(risks.length >= 2, `Expected ≥2 risks, got ${risks.length}`);
+  assert.ok(!risks.some(r => r === '---'), 'Should not contain raw --- separators in risks');
+});
+// ─── Fixture 4: English tags ──────────────────────────────────────────────────
+test('Fixture 4: English tag variants', () => {
+  const text = `
+[Key Claims]
+Claim 1: Use distributed caching
+Claim 2: Implement rate limiting
+[Assumptions]
+Assumption: Traffic < 10k RPS
+[Risks]
+Risk: Cache invalidation complexity
+`;
+  const claims = extractSection(text, 'Key Claims');
+  assert.ok(claims.length >= 2, `Expected ≥2 English claims, got ${claims.length}`);
+  assert.ok(claims.some(c => c.includes('caching')), 'Should contain caching claim');
+});
+// ─── Fixture 5: Empty / missing section ───────────────────────────────────────
+test('Fixture 5: missing section returns empty array', () => {
+  const text = '[核心结论]\n结论一：这是结论\n';
+  const risks = extractSection(text, '风险与不确定性');
+  assert.deepEqual(risks, [], 'Missing section should return []');
+});

package/src/prompts/architecture.md ADDED Viewed

@@ -0,0 +1,21 @@
+你是一个系统架构师，专注于权衡分析和长期可维护性。请给出有深度的架构建议。
+要求：
+1. 先识别约束条件，再给方案
+2. 对比多个候选方案的优劣
+3. 明确指出方案的适用场景和前提
+4. 考虑长期演进和技术债务
+请按以下结构回答（用中文标签分隔）：
+[核心结论]
+（推荐方案，1-2条主要结论）
+[详细方案]
+（包含约束识别、候选方案对比、推荐方案的具体设计）
+[关键假设]
+（团队规模、性能要求、一致性要求、预算等约束）
+[风险与不确定性]
+（架构决策的主要风险点、需要进一步验证的假设）

package/src/prompts/code.md ADDED Viewed

@@ -0,0 +1,21 @@
+你是一个资深工程师，专注于代码质量和可维护性。请给出可以直接使用的解决方案。
+要求：
+1. 优先给出可运行的代码示例
+2. 解释关键设计决策的理由
+3. 指出潜在的边界情况和性能问题
+4. 如果有更好的替代方案，请对比说明
+请按以下结构回答（用中文标签分隔）：
+[核心结论]
+（主要解决方案，1-3条核心要点）
+[详细方案]
+（完整可运行的代码，带必要注释）
+[关键假设]
+（运行环境、依赖版本、前提条件）
+[风险与不确定性]
+（边界情况、性能瓶颈、已知限制）

package/src/prompts/general.md ADDED Viewed

@@ -0,0 +1,22 @@
+你是一个独立思考的高级专家。请基于自己的判断给出高质量、可执行、可审查的回答。
+要求：
+1. 独立思考，不要假设其他专家会补充你遗漏的部分
+2. 优先给出清晰、可执行的内容
+3. 明确区分结论、依据、假设、风险
+4. 输出简洁但完整，避免废话
+5. 如有不确定点，直接说明
+请按以下结构回答（用中文标签分隔）：
+[核心结论]
+（简洁的主要结论，2-5条）
+[详细方案]
+（可执行的具体内容）
+[关键假设]
+（你的回答依赖哪些假设）
+[风险与不确定性]
+（需要注意的风险或你不确定的点）

package/src/prompts/index.js ADDED Viewed

@@ -0,0 +1,49 @@
+'use strict';
+const { readFileSync } = require('fs');
+const { join } = require('path');
+const TEMPLATES_DIR = __dirname;
+// Supported variants and their template files
+const VARIANTS = {
+  general: 'general.md',
+  code: 'code.md',
+  architecture: 'architecture.md',
+  writing: 'writing.md',
+};
+// Cache loaded templates
+const _cache = new Map();
+function loadTemplate(name) {
+  if (_cache.has(name)) return _cache.get(name);
+  const file = VARIANTS[name] || VARIANTS.general;
+  const content = readFileSync(join(TEMPLATES_DIR, file), 'utf8').trim();
+  _cache.set(name, content);
+  return content;
+}
+/**
+ * Build the system prompt for a generator.
+ * @param {string} variant - One of general|code|architecture|writing
+ * @param {string[]} [lessons] - Injected lesson strings from memory
+ * @param {string[]} [skills] - Injected skill template strings from memory
+ * @returns {string}
+ */
+function buildGeneratorSystem(variant = 'general', lessons = [], skills = []) {
+  const base = loadTemplate(variant);
+  const parts = [base];
+  if (lessons.length > 0) {
+    parts.push(`\n<past-lessons>\n${lessons.slice(0, 5).join('\n')}\n</past-lessons>`);
+  }
+  if (skills.length > 0) {
+    parts.push(`\n<skills>\n${skills.join('\n\n')}\n</skills>`);
+  }
+  return parts.join('');
+}
+module.exports = { buildGeneratorSystem, VARIANTS };

package/src/prompts/writing.md ADDED Viewed

@@ -0,0 +1,21 @@
+你是一个专业写作顾问，擅长清晰表达和说服性写作。请给出高质量的写作建议或内容。
+要求：
+1. 内容准确、逻辑清晰、表达简洁
+2. 根据目标受众调整语气和风格
+3. 如果是修改建议，说明改动理由
+4. 避免空话和套话
+请按以下结构回答（用中文标签分隔）：
+[核心结论]
+（主要写作建议或关键改进点）
+[详细方案]
+（具体的写作内容或修改后的版本）
+[关键假设]
+（目标受众、使用场景、风格要求）
+[风险与不确定性]
+（可能需要根据实际情况调整的部分）

package/src/providers/claude.js ADDED Viewed

@@ -0,0 +1,45 @@
+'use strict';
+/**
+ * Get CLI args for invoking claude as a generator.
+ * @param {string} fullPrompt - System + user prompt combined
+ * @returns {string[]}
+ */
+function getArgs(fullPrompt) {
+  return ['-p', fullPrompt, '--output-format', 'json', '--no-session-persistence'];
+}
+/**
+ * Parse claude's JSON stdout into { content, model, parse_mode }.
+ * @param {{ stdout: string, stderr: string, code: number|string }} raw
+ * @returns {{ content: string, model: string, parse_mode: string }}
+ */
+function adapt(raw) {
+  try {
+    const j = JSON.parse(raw.stdout);
+    const content = j.result || j.content || '';
+    const model = Object.keys(j.modelUsage || {})[0] || 'claude';
+    return { content, model, parse_mode: 'json' };
+  } catch {
+    return fallback(raw.stdout);
+  }
+}
+/**
+ * Extract text from claude judge output.
+ * @param {{ stdout: string }} raw
+ * @returns {string}
+ */
+function extractJudgeText(raw) {
+  try {
+    return JSON.parse(raw.stdout).result || raw.stdout.trim();
+  } catch {
+    return raw.stdout.trim();
+  }
+}
+function fallback(stdout) {
+  return { content: stdout.slice(-2000).trim() || '[no output]', model: 'claude', parse_mode: 'fallback' };
+}
+module.exports = { getArgs, adapt, extractJudgeText };

package/src/providers/codex.js ADDED Viewed

@@ -0,0 +1,69 @@
+'use strict';
+/**
+ * Get CLI args for invoking codex as a generator.
+ * NOTE: --json MUST come before the prompt argument — codex's parser treats
+ * args after the prompt text as [COMMAND] positional, not as options.
+ * @param {string} fullPrompt - System + user prompt combined
+ * @returns {string[]}
+ */
+function getArgs(fullPrompt) {
+  return ['exec', '--json', '--skip-git-repo-check', '--ephemeral', fullPrompt];
+}
+/**
+ * Parse codex's JSONL stdout into { content, model, parse_mode }.
+ * Codex streams newline-delimited JSON events. We look for the last
+ * item.completed event with an agent_message type.
+ * @param {{ stdout: string, stderr: string, code: number|string }} raw
+ * @returns {{ content: string, model: string, parse_mode: string }}
+ */
+function adapt(raw) {
+  try {
+    const lines = raw.stdout.trim().split('\n');
+    const events = [];
+    for (const l of lines) {
+      try { events.push(JSON.parse(l)); } catch { /* skip non-JSON lines */ }
+    }
+    // Prefer agent_message events
+    const agentMsg = events
+      .filter(e => e.type === 'item.completed' && e.item?.type === 'agent_message')
+      .pop();
+    if (agentMsg?.item?.text) {
+      return { content: agentMsg.item.text, model: 'codex', parse_mode: 'jsonl' };
+    }
+    // Fallback: last completed event with any text
+    const lastWithText = events
+      .filter(e => e.type === 'item.completed' && e.item?.text)
+      .pop();
+    if (lastWithText?.item?.text) {
+      return { content: lastWithText.item.text, model: 'codex', parse_mode: 'jsonl' };
+    }
+  } catch { /* fall through */ }
+  return fallback(raw.stdout);
+}
+/**
+ * Extract text from codex judge output (same JSONL format).
+ * @param {{ stdout: string }} raw
+ * @returns {string}
+ */
+function extractJudgeText(raw) {
+  const lines = raw.stdout.trim().split('\n').reverse();
+  for (const l of lines) {
+    try {
+      const e = JSON.parse(l);
+      if (e.item?.text) return e.item.text;
+    } catch { /* skip */ }
+  }
+  return raw.stdout.trim();
+}
+function fallback(stdout) {
+  return { content: stdout.slice(-2000).trim() || '[no output]', model: 'codex', parse_mode: 'fallback' };
+}
+module.exports = { getArgs, adapt, extractJudgeText };

package/src/providers/gemini.js ADDED Viewed

@@ -0,0 +1,81 @@
+'use strict';
+/**
+ * Get CLI args for invoking gemini as a generator.
+ * --allowed-mcp-server-names skips the broken feishu-mcp server that
+ * causes ~15s startup delay and connection errors.
+ * @param {string} fullPrompt - System + user prompt combined
+ * @returns {string[]}
+ */
+function getArgs(fullPrompt) {
+  return ['-p', fullPrompt, '-o', 'json', '--allowed-mcp-server-names', 'sequential-thinking'];
+}
+/**
+ * Parse gemini's JSON stdout into { content, model, parse_mode }.
+ * Gemini prepends an MCP status line before the JSON output:
+ *   "MCP issues detected. Run /mcp list for status."
+ * We skip to the first '{' to handle this.
+ * @param {{ stdout: string, stderr: string, code: number|string }} raw
+ * @returns {{ content: string, model: string, parse_mode: string }}
+ */
+function adapt(raw) {
+  try {
+    const response = parseGeminiResponse(raw.stdout);
+    if (response) return { content: response, model: 'gemini', parse_mode: 'json' };
+  } catch { /* fall through */ }
+  return fallback(raw.stdout);
+}
+/**
+ * Extract the response text from gemini's JSON output.
+ * Handles potential JSON prefix noise using brace counter for robustness.
+ * @param {string} stdout
+ * @returns {string|null}
+ */
+function parseGeminiResponse(stdout) {
+  const jsonStart = stdout.indexOf('{');
+  if (jsonStart === -1) return null;
+  // Use brace counter to find the complete JSON object
+  let depth = 0;
+  let jsonEnd = -1;
+  for (let i = jsonStart; i < stdout.length; i++) {
+    if (stdout[i] === '{') depth++;
+    else if (stdout[i] === '}') {
+      depth--;
+      if (depth === 0) { jsonEnd = i + 1; break; }
+    }
+  }
+  const jsonStr = jsonEnd !== -1 ? stdout.slice(jsonStart, jsonEnd) : stdout.slice(jsonStart);
+  const j = JSON.parse(jsonStr);
+  if (j.response) return j.response;
+  // Handle nested response object
+  for (const v of Object.values(j)) {
+    if (v && typeof v === 'object' && typeof v.response === 'string') return v.response;
+  }
+  return null;
+}
+/**
+ * Extract text from gemini judge output.
+ * @param {{ stdout: string }} raw
+ * @returns {string}
+ */
+function extractJudgeText(raw) {
+  try {
+    const response = parseGeminiResponse(raw.stdout);
+    if (response) return response;
+  } catch { /* fall through */ }
+  return raw.stdout.trim();
+}
+function fallback(stdout) {
+  return { content: stdout.slice(-2000).trim() || '[no output]', model: 'gemini', parse_mode: 'fallback' };
+}
+module.exports = { getArgs, adapt, extractJudgeText, parseGeminiResponse };

package/src/providers/index.js ADDED Viewed

@@ -0,0 +1,22 @@
+'use strict';
+const claude = require('./claude.js');
+const codex = require('./codex.js');
+const gemini = require('./gemini.js');
+const PROVIDERS = {
+  claude: { name: 'claude', cmd: 'claude', ...claude },
+  codex:  { name: 'codex',  cmd: 'codex',  ...codex },
+  gemini: { name: 'gemini', cmd: 'gemini', ...gemini },
+};
+/**
+ * Get the list of providers to run, excluding skipped ones.
+ * @param {string[]} skip - Provider names to skip
+ * @returns {Array<{name, cmd, getArgs, adapt, extractJudgeText}>}
+ */
+function getActiveProviders(skip = []) {
+  return Object.values(PROVIDERS).filter(p => !skip.includes(p.name));
+}
+module.exports = { PROVIDERS, getActiveProviders };