@ijfw/memory-server 1.5.5 → 1.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (72) hide show
  1. package/bin/ijfw-dashboard +20 -1
  2. package/package.json +4 -3
  3. package/src/audit-roster.js +89 -12
  4. package/src/brain/tiered-llm.js +57 -7
  5. package/src/cross-orchestrator-cli.js +344 -4
  6. package/src/cross-project-search.js +39 -1
  7. package/src/dashboard-server.js +7 -1
  8. package/src/dream/runner.mjs +560 -8
  9. package/src/handlers/brain-handler.js +101 -1
  10. package/src/importers/discover.js +1 -1
  11. package/src/memory/bench-metrics.js +289 -0
  12. package/src/memory/benchmark.js +1 -1
  13. package/src/memory/search.js +53 -1
  14. package/src/orchestrator/plan-checker.js +1 -1
  15. package/src/profile/audit.js +671 -0
  16. package/src/profile/capture.js +871 -0
  17. package/src/profile/derive-dialectic.js +242 -0
  18. package/src/profile/derive-heuristic.js +733 -0
  19. package/src/profile/derive.js +156 -0
  20. package/src/profile/egress.js +306 -0
  21. package/src/profile/eval/build-real-probes.mjs +197 -0
  22. package/src/profile/eval/corpus-from-reddit.mjs +166 -0
  23. package/src/profile/eval/corpus-from-reddit.test.mjs +121 -0
  24. package/src/profile/eval/corpus-from-transcripts.mjs +264 -0
  25. package/src/profile/eval/gate-b-behavior.mjs +420 -0
  26. package/src/profile/eval/gate-b-decision-run.mjs +171 -0
  27. package/src/profile/eval/gate-b-decision-run.test.mjs +141 -0
  28. package/src/profile/eval/gate-b-run.mjs +417 -0
  29. package/src/profile/eval/gate-b-run.test.mjs +204 -0
  30. package/src/profile/eval/gate-c-capture.mjs +323 -0
  31. package/src/profile/eval/harness.mjs +551 -0
  32. package/src/profile/eval/instrument-validation.mjs +248 -0
  33. package/src/profile/eval/instrument-validation.test.mjs +125 -0
  34. package/src/profile/eval/multi-subject-harness.mjs +106 -0
  35. package/src/profile/eval/multi-subject-harness.test.mjs +99 -0
  36. package/src/profile/eval/personas.test.mjs +83 -0
  37. package/src/profile/eval/plumbing.test.mjs +69 -0
  38. package/src/profile/eval/prereg.mjs +130 -0
  39. package/src/profile/eval/prereg.test.mjs +78 -0
  40. package/src/profile/eval/real-corpus.test.mjs +103 -0
  41. package/src/profile/eval/real-personas.mjs +109 -0
  42. package/src/profile/eval/run-real-corpus-concurrent.mjs +407 -0
  43. package/src/profile/eval/run-real-corpus.mjs +358 -0
  44. package/src/profile/eval/slug-quality.mjs +464 -0
  45. package/src/profile/eval/stylometry-features.js +85 -0
  46. package/src/profile/eval/stylometry-reference.js +16 -0
  47. package/src/profile/eval/stylometry.js +224 -0
  48. package/src/profile/eval/stylometry.test.mjs +103 -0
  49. package/src/profile/eval/synthetic-personas.js +91 -0
  50. package/src/profile/eval/verifier-features.mjs +170 -0
  51. package/src/profile/eval/verifier-logreg.mjs +74 -0
  52. package/src/profile/eval/verifier-pair.mjs +122 -0
  53. package/src/profile/eval/verifier-reference.mjs +68 -0
  54. package/src/profile/eval/verifier-scorer.mjs +30 -0
  55. package/src/profile/eval/wrong-target-control.mjs +168 -0
  56. package/src/profile/eval/wrong-target-control.test.mjs +124 -0
  57. package/src/profile/exemplar-capture.js +232 -0
  58. package/src/profile/exemplar-retrieve.js +138 -0
  59. package/src/profile/exemplar-store.js +314 -0
  60. package/src/profile/lock.js +64 -0
  61. package/src/profile/merge.js +624 -0
  62. package/src/profile/path-policy.js +213 -0
  63. package/src/profile/precision-stamp.mjs +151 -0
  64. package/src/profile/render-brief.js +717 -0
  65. package/src/profile/schema.js +244 -0
  66. package/src/profile/sensitivity.js +249 -0
  67. package/src/profile/serve.js +345 -0
  68. package/src/profile/store.js +261 -0
  69. package/src/profile/telemetry.js +289 -0
  70. package/src/recovery/checkpoint.js +7 -1
  71. package/src/server.js +185 -14
  72. package/src/.registry-meta-key.pem +0 -3
@@ -79,6 +79,20 @@ const argv = process.argv.slice(2);
79
79
  const sub = argv[0] || 'status';
80
80
  const noOpen = argv.includes('--no-open');
81
81
 
82
+ // Optional `--port N` override (documented in the usage banner). When set we
83
+ // forward it to the daemon via IJFW_DASHBOARD_PORT so the server's port-walk
84
+ // starts there instead of the default 37891. Unset = current behaviour. This
85
+ // also makes the dashboard testable on a scratch port without touching 37891.
86
+ function parsePortFlag(args) {
87
+ const i = args.indexOf('--port');
88
+ if (i !== -1 && args[i + 1]) {
89
+ const n = parseInt(args[i + 1], 10);
90
+ if (Number.isInteger(n) && n > 0 && n < 65536) return String(n);
91
+ }
92
+ return null;
93
+ }
94
+ const portOverride = parsePortFlag(argv);
95
+
82
96
  if (sub === 'start') {
83
97
  ensureDir();
84
98
  const existingPid = readPid();
@@ -97,7 +111,12 @@ if (sub === 'start') {
97
111
  const child = spawn(process.execPath, [SERVER_JS, '--daemon'], {
98
112
  detached: true,
99
113
  stdio: ['ignore', 'ignore', 'ignore'],
100
- env: { ...process.env, IJFW_PID_FILE: PID_FILE, IJFW_PORT_FILE: PORT_FILE },
114
+ env: {
115
+ ...process.env,
116
+ IJFW_PID_FILE: PID_FILE,
117
+ IJFW_PORT_FILE: PORT_FILE,
118
+ ...(portOverride ? { IJFW_DASHBOARD_PORT: portOverride } : {}),
119
+ },
101
120
  });
102
121
  child.unref();
103
122
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@ijfw/memory-server",
3
- "version": "1.5.5",
3
+ "version": "1.6.0",
4
4
  "description": "Cross-platform persistent memory server for IJFW. 14 MCP tools (memory + admin/update + brain). Works with 15 platforms: 14 via MCP (Claude Code, Codex, Gemini CLI, Cursor, Windsurf, Copilot, Hermes, Wayland, OpenCode, QwenCode, Cline, KimiCode, OpenClaw, Antigravity) plus Aider via the rules-only tier.",
5
5
  "author": "Sean Donahoe",
6
6
  "contributors": [
@@ -28,10 +28,11 @@
28
28
  "scripts": {
29
29
  "start": "node src/server.js",
30
30
  "dev": "node --watch src/server.js",
31
- "test": "node test.js && node --experimental-sqlite --test --test-force-exit test-*.js test/brain/test-*.js test/memory/test-*.js test/integration/test-*.js",
31
+ "test": "node test.js && node --experimental-sqlite --test --test-force-exit test-*.js test/brain/test-*.js test/memory/test-*.js test/integration/test-*.js test/profile-*.test.mjs src/profile/eval/*.test.mjs",
32
32
  "test:smoke": "node test.js",
33
33
  "test:full": "node --experimental-sqlite --test --test-force-exit test-*.js test/brain/test-*.js test/memory/test-*.js test/integration/test-*.js",
34
- "test:graders": "node test/grade-symbol-graph-spec.js && node test/grade-symbol-graph-consistency.js && node test/grade-cascading-staleness.js && node test/grade-project-types.js"
34
+ "test:graders": "node test/grade-symbol-graph-spec.js && node test/grade-symbol-graph-consistency.js && node test/grade-cascading-staleness.js && node test/grade-project-types.js",
35
+ "functional-smoke": "node scripts/run-functional-smoke.mjs"
35
36
  },
36
37
  "engines": {
37
38
  "node": ">=18.0.0"
@@ -86,8 +86,24 @@ export const ROSTER = [
86
86
  family: 'google',
87
87
  model: '',
88
88
  name: 'Gemini CLI',
89
- invoke: 'gemini',
90
- note: 'Strong on security + architectural patterns. Auto-detects piped stdin for headless mode.',
89
+ // v1.6.0 xaudit-fix — STALE-ARGV repair. Bare `gemini` BREAKS on the
90
+ // current CLI (verified gemini-cli 0.43.0) for two reasons:
91
+ // 1. Trusted-directory gate (added like codex's --skip-git-repo-check):
92
+ // a non-interactive invoke outside a "trusted" folder exits immediately
93
+ // with "Gemini CLI is not running in a trusted directory" and emits ZERO
94
+ // output. --skip-trust bypasses it (env GEMINI_CLI_TRUST_WORKSPACE=true
95
+ // is the alternative). Without it the CLI path NEVER succeeds; runs were
96
+ // only ever rescued by the API fallback (every historical receipt shows
97
+ // gemini source='api', never 'cli').
98
+ // 2. Recursive IJFW-MCP autostart: when the ijfw gemini extension is
99
+ // loaded, `gemini` autostarts the IJFW MCP server (the same self-
100
+ // referential hang codex dodges via mcp_servers.ijfw-memory.enabled=false).
101
+ // `-e none` loads zero extensions, removing the autostart latency/noise.
102
+ // Prompt is delivered on stdin: piped non-TTY stdin triggers headless mode,
103
+ // so no -p value is needed (and `-p` with no value is a yargs error under
104
+ // whitespace-split argv). Verified returning findings end-to-end 2026-06-08.
105
+ invoke: 'gemini --skip-trust -e none',
106
+ note: 'Strong on security + architectural patterns. Reads the prompt from piped stdin in headless mode. --skip-trust bypasses the trusted-directory gate (gemini-cli >=0.43); -e none disables extensions so the IJFW MCP server is not recursively autostarted.',
91
107
  detect: (env) => Boolean(env.GEMINI_CLI || env.GOOGLE_CLOUD_PROJECT_GEMINI) || /gemini-cli/i.test(env._ || ''),
92
108
  // model is resolved at call-time via model-refresh.js (24h-cached probe).
93
109
  get apiFallback() {
@@ -99,8 +115,22 @@ export const ROSTER = [
99
115
  family: 'oss',
100
116
  model: '',
101
117
  name: 'Qwen Code',
102
- invoke: 'qwen -p',
103
- note: 'Apache-2.0 weights (Qwen3-Coder-480B-A35B), agentic-tuned (~67% SWE-Bench Verified). Fork of gemini-cli; supports qwen-oauth (free Coding Plan tier), plus openai/anthropic/gemini auth-types via `qwen auth`. Diversity value for Trident: third independent training lineage outside openai/google.',
118
+ // v1.6.0 xaudit-fix — STALE-ARGV repair. `qwen -p` (no value) is wrong on
119
+ // current Qwen Code (verified 0.15.6): `-p/--prompt` is now DEPRECATED and a
120
+ // bare `-p` with no argument is a yargs error under whitespace-split argv.
121
+ // The prompt is delivered on stdin (this fork ignores the positional `query`
122
+ // and prints "No input provided via stdin" if nothing is piped). Bare
123
+ // `qwen <stdin>` ALSO fails silently: the implicit startup auto-discovery
124
+ // runs an approval/confirmation flow that auto-cancels under non-TTY stdin
125
+ // and emits only "Operation cancelled." with ZERO findings (the gemini-cli-
126
+ // fork analog of gemini's trusted-directory gate). `--bare` skips that
127
+ // auto-discovery and `--yolo` auto-approves all actions, so the headless run
128
+ // reaches the model — or fails FAST + CLEAN on auth ("No auth type is
129
+ // selected ... before running in non-interactive mode" in ~1s when neither a
130
+ // qwen-oauth login nor DASHSCOPE_API_KEY is configured). Verified 2026-06-08:
131
+ // `--bare --yolo` errors cleanly on auth in 1s instead of the silent cancel.
132
+ invoke: 'qwen --bare --yolo',
133
+ note: 'Apache-2.0 weights (Qwen3-Coder-480B-A35B), agentic-tuned (~67% SWE-Bench Verified). Fork of gemini-cli; supports qwen-oauth (free Coding Plan tier), plus openai/anthropic/gemini auth-types via `qwen auth`. Reads the prompt from piped stdin; --bare skips the startup auto-discovery that auto-cancels under non-TTY stdin, --yolo auto-approves. Diversity value for Trident: third independent training lineage outside openai/google.',
104
134
  detect: (env) => Boolean(env.QWEN_SESSION) || /(?:^|\W)qwen(?:\W|$)/i.test(env._ || ''),
105
135
  apiFallback: { provider: 'openai-compat', model: 'qwen3-coder-plus', authEnv: 'DASHSCOPE_API_KEY', endpoint: 'https://dashscope-intl.aliyuncs.com/compatible-mode/v1/chat/completions' },
106
136
  },
@@ -119,8 +149,18 @@ export const ROSTER = [
119
149
  family: 'oss',
120
150
  model: '',
121
151
  name: 'Kimi (Moonshot)',
122
- invoke: 'kimi',
123
- note: 'Moonshot AI Kimi K2 series (Chinese open-source lineage, separate from DeepSeek). Long-context strength makes it useful for whole-file or whole-module audits where context window matters. OpenAI-compatible API via platform.moonshot.ai. Detection is left at false because no canonical session env var ships with Kimi today -- prefer double-coverage over false self-exclusion.',
152
+ // v1.6.0 xaudit-fix — STALE-ARGV repair. Bare `kimi` (verified 1.38.0) is a
153
+ // Click-style `kimi [OPTIONS] COMMAND` agent that PROMPTS INTERACTIVELY by
154
+ // default (`--prompt ... Default: prompt interactively`), so a piped prompt
155
+ // on stdin would stall waiting on a TTY. `--print` is the non-interactive
156
+ // "print mode" (it implicitly adds `--yolo`) and, with `--input-format text`
157
+ // (the default), the input is read from piped stdin. `--quiet` is the shipped
158
+ // alias for `--print --output-format text --final-message-only`, which prints
159
+ // ONLY the final assistant message to stdout (the "resume session" hint goes
160
+ // to stderr, keeping stdout clean for fence parsing). Verified 2026-06-08:
161
+ // `kimi --print --quiet` returned a clean JSON fence with both seeded bugs.
162
+ invoke: 'kimi --print --quiet',
163
+ note: 'Moonshot AI Kimi K2 series (Chinese open-source lineage, separate from DeepSeek). Long-context strength makes it useful for whole-file or whole-module audits where context window matters. `kimi --print --quiet` is the headless mode (reads the prompt from piped stdin, prints only the final assistant message). OpenAI-compatible API via platform.moonshot.ai. Detection is left at false because no canonical session env var ships with Kimi today -- prefer double-coverage over false self-exclusion.',
124
164
  detect: () => false,
125
165
  apiFallback: { provider: 'openai-compat', model: 'kimi-k2.6', authEnv: 'MOONSHOT_API_KEY', endpoint: 'https://api.moonshot.ai/v1/chat/completions' },
126
166
  },
@@ -129,8 +169,16 @@ export const ROSTER = [
129
169
  family: 'oss',
130
170
  model: '',
131
171
  name: 'opencode',
132
- invoke: 'opencode',
133
- note: 'OSS / local-friendly; good when privacy matters.',
172
+ // v1.6.0 xaudit-fix — STALE-ARGV repair. Bare `opencode` launches the
173
+ // INTERACTIVE TUI (verified opencode 1.15.11): it renders a full-screen
174
+ // ANSI app and never consumes the piped prompt, so every cross-audit run
175
+ // hung until the per-auditor SIGKILL timeout (status='timeout'). opencode
176
+ // has NO apiFallback, so there was no rescue path — opencode was 100%
177
+ // broken for every user. The non-interactive subcommand is `opencode run`,
178
+ // which reads the prompt from stdin and prints the model reply to stdout
179
+ // (verified returning a clean response 2026-06-08).
180
+ invoke: 'opencode run',
181
+ note: 'OSS / local-friendly; good when privacy matters. `opencode run` is the headless subcommand (bare `opencode` opens the interactive TUI and would hang).',
134
182
  detect: (env) => Boolean(env.OPENCODE_SESSION || env.OPENCODE_HOME),
135
183
  apiFallback: null,
136
184
  },
@@ -149,8 +197,20 @@ export const ROSTER = [
149
197
  family: 'openai',
150
198
  model: '',
151
199
  name: 'Copilot CLI',
152
- invoke: 'gh copilot suggest',
153
- note: 'Convenient if gh CLI is already authenticated.',
200
+ // v1.6.0 xaudit-fix — STALE-ARGV repair. `gh copilot suggest` was the OLD
201
+ // `gh` CLI extension, which only echoes a single shell-command suggestion
202
+ // (not a code audit) and is a different binary entirely. The standalone
203
+ // GitHub Copilot CLI (`copilot`, verified 1.0.59) is the real headless
204
+ // auditor: `-p/--prompt` runs a non-interactive scripting turn, and a bare
205
+ // `-p` (no inline value) consumes the prompt from piped stdin. Non-
206
+ // interactive mode REQUIRES `--allow-all-tools` (per the CLI's own help:
207
+ // "required for non-interactive mode"), otherwise it blocks on a tool-
208
+ // permission confirmation. Verified 2026-06-08: `copilot -p --allow-all-tools`
209
+ // reaches the service in seconds and fails FAST + CLEAN when the account's
210
+ // Copilot is org-policy-restricted (an environmental auth/entitlement error,
211
+ // not a hang). detect() left on GH_COPILOT_TOKEN/COPILOT_CLI_SESSION.
212
+ invoke: 'copilot -p --allow-all-tools',
213
+ note: 'Standalone GitHub Copilot CLI (`copilot`, not the old `gh copilot` extension). `-p` reads the prompt from piped stdin; `--allow-all-tools` is required for non-interactive mode. Convenient if Copilot CLI is already authenticated and not org-policy-restricted.',
154
214
  detect: (env) => Boolean(env.GH_COPILOT_TOKEN || env.COPILOT_CLI_SESSION),
155
215
  apiFallback: null,
156
216
  },
@@ -159,8 +219,14 @@ export const ROSTER = [
159
219
  family: 'anthropic',
160
220
  model: '',
161
221
  name: 'Claude Code',
222
+ // `claude -p` (== --print) is the headless mode: reads the prompt from piped
223
+ // stdin and prints the reply to stdout. Verified 2026-06-08 returning fast +
224
+ // clean (4s) — in this repo claude is the CALLER (CLAUDECODE=1), so detect()
225
+ // below correctly self-excludes it from its own Trident; the invoke is only
226
+ // exercised when a NON-claude caller picks claude as an auditor. When claude
227
+ // IS a valid (non-self) auditor, `claude -p` is the correct current form.
162
228
  invoke: 'claude -p',
163
- note: 'Anthropic; useful when you want a second Claude pass in a fresh session.',
229
+ note: 'Anthropic; useful when you want a second Claude pass in a fresh session. `claude -p` reads the prompt from piped stdin (headless --print mode). Self-excluded via detect() when claude is the caller.',
164
230
  detect: (env) => Boolean(env.CLAUDECODE || env.CLAUDE_CODE_ENTRYPOINT || env.CLAUDE_PLUGIN_ROOT),
165
231
  // model is resolved at call-time via model-refresh.js (24h-cached probe).
166
232
  get apiFallback() {
@@ -206,7 +272,18 @@ export function isInstalled(id) {
206
272
  const bin = entry.invoke.split(/\s+/)[0];
207
273
  // POSIX `command -v` is the portable existence check; bash builtin form
208
274
  // works reliably across macOS + Linux. spawnSync exit code = 0 → present.
209
- const r = spawnSync('bash', ['-lc', `command -v ${JSON.stringify(bin)} >/dev/null 2>&1`], { timeout: 2000 });
275
+ //
276
+ // v1.6.0 diag-fix — `command -v` returns success for a regular file on PATH
277
+ // even when it is NOT executable, so a dangling/non-+x file shadowing a real
278
+ // auditor name made doctor report `cli_installed: true` for something that
279
+ // CANNOT actually be invoked (the "present-claimed but invocation-broken"
280
+ // false report). We now additionally require the resolved target to pass
281
+ // `[ -x ]` (or be a shell builtin/keyword/function with no filesystem path,
282
+ // which `command -v` reports without a leading slash — those are genuinely
283
+ // runnable). A real installed CLI is an executable file and still passes.
284
+ const probe = `p=$(command -v ${JSON.stringify(bin)} 2>/dev/null) || exit 1; ` +
285
+ `case "$p" in /*) [ -x "$p" ] ;; *) : ;; esac`;
286
+ const r = spawnSync('bash', ['-lc', probe], { timeout: 2000 });
210
287
  const installed = r.status === 0;
211
288
  _installedCache.set(id, { value: installed, ts: Date.now() });
212
289
  return installed;
@@ -26,7 +26,7 @@ export function resolveTierModel(tier, env = process.env) {
26
26
  throw new Error(`tiered-llm: unknown tier '${tier}'`);
27
27
  }
28
28
 
29
- function defaultCallers() {
29
+ export function defaultCallers() {
30
30
  return {
31
31
  async local({ url, model, prompt, maxTokens }) {
32
32
  // Ollama-compatible /api/generate -- streamless single-response mode.
@@ -39,8 +39,48 @@ function defaultCallers() {
39
39
  const data = await res.json();
40
40
  return { text: data.response || '', usage: { input: data.prompt_eval_count, output: data.eval_count }, model, via: 'local' };
41
41
  },
42
- async anthropic({ model, prompt, maxTokens, apiKey }) {
42
+ async openaiLocal({ url, model, prompt, maxTokens, temperature }) {
43
+ // OpenAI-compatible /chat/completions -- used by the bench to grade on a
44
+ // LOCAL vLLM-served synth model. `url` already includes the API base
45
+ // (e.g. http://localhost:8000/v1). enable_thinking:false is REQUIRED:
46
+ // Qwen3.6 is a hybrid-reasoning model that otherwise emits a thinking
47
+ // trace instead of the answer; vLLM passes this through to the chat
48
+ // template. NO silent fallback to a cloud model -- callTiered routes here
49
+ // WITHOUT a try/catch so a local-synth failure surfaces honestly.
50
+ const body = {
51
+ model,
52
+ messages: [{ role: 'user', content: prompt }],
53
+ max_tokens: maxTokens,
54
+ chat_template_kwargs: { enable_thinking: false },
55
+ };
56
+ if (typeof temperature === 'number') body.temperature = temperature;
57
+ const res = await fetch(url.replace(/\/$/, '') + '/chat/completions', {
58
+ method: 'POST',
59
+ headers: { 'Content-Type': 'application/json' },
60
+ body: JSON.stringify(body),
61
+ });
62
+ if (!res.ok) throw new Error(`openai-local LLM HTTP ${res.status}`);
63
+ const data = await res.json();
64
+ const choice = data.choices && data.choices[0];
65
+ if (!choice || !choice.message) throw new Error('openai-local LLM: missing choice in response');
66
+ const usage = data.usage || {};
67
+ return {
68
+ text: choice.message.content || '',
69
+ usage: { input: usage.prompt_tokens, output: usage.completion_tokens },
70
+ model,
71
+ via: 'openai-local',
72
+ };
73
+ },
74
+ async anthropic({ model, prompt, maxTokens, apiKey, temperature }) {
43
75
  if (!apiKey) throw new Error('tiered-llm: ANTHROPIC_API_KEY (or IJFW_BRAIN_API_KEY) required for Anthropic fallback');
76
+ const payload = {
77
+ model,
78
+ max_tokens: maxTokens,
79
+ messages: [{ role: 'user', content: prompt }],
80
+ };
81
+ // Optional, backward-compatible: omitted -> API default. Used by the
82
+ // benchmark harness to pin temperature:0 for deterministic answers.
83
+ if (typeof temperature === 'number') payload.temperature = temperature;
44
84
  const res = await fetch('https://api.anthropic.com/v1/messages', {
45
85
  method: 'POST',
46
86
  headers: {
@@ -48,11 +88,7 @@ function defaultCallers() {
48
88
  'x-api-key': apiKey,
49
89
  'anthropic-version': '2023-06-01',
50
90
  },
51
- body: JSON.stringify({
52
- model,
53
- max_tokens: maxTokens,
54
- messages: [{ role: 'user', content: prompt }],
55
- }),
91
+ body: JSON.stringify(payload),
56
92
  });
57
93
  if (!res.ok) throw new Error(`Anthropic HTTP ${res.status}`);
58
94
  const data = await res.json();
@@ -67,6 +103,19 @@ export async function callTiered(tier, prompt, opts = {}) {
67
103
  const model = resolveTierModel(tier, env);
68
104
  const maxTokens = opts.maxTokens || DEFAULT_MAX_TOKENS[tier] || 512;
69
105
  const callers = opts._callers || defaultCallers();
106
+ // Opt-in OpenAI-compatible local synth (bench): point at a vLLM server.
107
+ // FAILS LOUD by design -- no try/catch, no Anthropic fallback. If this
108
+ // errors, the bench must error too rather than silently grade on a cloud
109
+ // model from a different family (which would corrupt the experiment).
110
+ if (env.IJFW_BENCH_SYNTH_URL) {
111
+ return callers.openaiLocal({
112
+ url: env.IJFW_BENCH_SYNTH_URL,
113
+ model,
114
+ prompt,
115
+ maxTokens,
116
+ temperature: opts.temperature,
117
+ });
118
+ }
70
119
  if (env.IJFW_BRAIN_LOCAL_URL) {
71
120
  try {
72
121
  return await callers.local({ url: env.IJFW_BRAIN_LOCAL_URL, model, prompt, maxTokens });
@@ -79,5 +128,6 @@ export async function callTiered(tier, prompt, opts = {}) {
79
128
  prompt,
80
129
  maxTokens,
81
130
  apiKey: env.IJFW_BRAIN_API_KEY || env.ANTHROPIC_API_KEY,
131
+ temperature: opts.temperature,
82
132
  });
83
133
  }