@ijfw/memory-server 1.5.6 → 1.6.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/ijfw-dashboard +20 -1
- package/package.json +4 -3
- package/src/audit-roster.js +89 -12
- package/src/brain/tiered-llm.js +57 -7
- package/src/cross-orchestrator-cli.js +390 -4
- package/src/cross-project-search.js +39 -1
- package/src/dashboard-server.js +23 -1
- package/src/dream/runner.mjs +560 -8
- package/src/handlers/brain-handler.js +101 -1
- package/src/importers/discover.js +1 -1
- package/src/memory/bench-metrics.js +289 -0
- package/src/memory/benchmark.js +1 -1
- package/src/memory/search.js +53 -1
- package/src/model-refresh.js +4 -2
- package/src/orchestrator/plan-checker.js +1 -1
- package/src/profile/audit.js +671 -0
- package/src/profile/capture.js +871 -0
- package/src/profile/derive-dialectic.js +242 -0
- package/src/profile/derive-heuristic.js +733 -0
- package/src/profile/derive.js +156 -0
- package/src/profile/egress.js +306 -0
- package/src/profile/eval/build-real-probes.mjs +197 -0
- package/src/profile/eval/corpus-from-reddit.mjs +166 -0
- package/src/profile/eval/corpus-from-reddit.test.mjs +121 -0
- package/src/profile/eval/corpus-from-transcripts.mjs +264 -0
- package/src/profile/eval/gate-b-behavior.mjs +420 -0
- package/src/profile/eval/gate-b-decision-run.mjs +171 -0
- package/src/profile/eval/gate-b-decision-run.test.mjs +141 -0
- package/src/profile/eval/gate-b-run.mjs +417 -0
- package/src/profile/eval/gate-b-run.test.mjs +204 -0
- package/src/profile/eval/gate-c-capture.mjs +323 -0
- package/src/profile/eval/harness.mjs +551 -0
- package/src/profile/eval/instrument-validation.mjs +248 -0
- package/src/profile/eval/instrument-validation.test.mjs +125 -0
- package/src/profile/eval/multi-subject-harness.mjs +106 -0
- package/src/profile/eval/multi-subject-harness.test.mjs +99 -0
- package/src/profile/eval/personas.test.mjs +83 -0
- package/src/profile/eval/plumbing.test.mjs +69 -0
- package/src/profile/eval/prereg.mjs +130 -0
- package/src/profile/eval/prereg.test.mjs +78 -0
- package/src/profile/eval/real-corpus.test.mjs +103 -0
- package/src/profile/eval/real-personas.mjs +109 -0
- package/src/profile/eval/run-real-corpus-concurrent.mjs +407 -0
- package/src/profile/eval/run-real-corpus.mjs +358 -0
- package/src/profile/eval/slug-quality.mjs +464 -0
- package/src/profile/eval/stylometry-features.js +85 -0
- package/src/profile/eval/stylometry-reference.js +16 -0
- package/src/profile/eval/stylometry.js +224 -0
- package/src/profile/eval/stylometry.test.mjs +103 -0
- package/src/profile/eval/synthetic-personas.js +91 -0
- package/src/profile/eval/verifier-features.mjs +170 -0
- package/src/profile/eval/verifier-logreg.mjs +74 -0
- package/src/profile/eval/verifier-pair.mjs +122 -0
- package/src/profile/eval/verifier-reference.mjs +68 -0
- package/src/profile/eval/verifier-scorer.mjs +30 -0
- package/src/profile/eval/wrong-target-control.mjs +168 -0
- package/src/profile/eval/wrong-target-control.test.mjs +124 -0
- package/src/profile/exemplar-capture.js +232 -0
- package/src/profile/exemplar-retrieve.js +138 -0
- package/src/profile/exemplar-store.js +314 -0
- package/src/profile/lock.js +64 -0
- package/src/profile/merge.js +624 -0
- package/src/profile/path-policy.js +213 -0
- package/src/profile/precision-stamp.mjs +151 -0
- package/src/profile/render-brief.js +717 -0
- package/src/profile/schema.js +244 -0
- package/src/profile/sensitivity.js +249 -0
- package/src/profile/serve.js +345 -0
- package/src/profile/store.js +261 -0
- package/src/profile/telemetry.js +289 -0
- package/src/recovery/checkpoint.js +7 -1
- package/src/server.js +194 -16
- package/src/.registry-meta-key.pem +0 -3
package/bin/ijfw-dashboard
CHANGED
|
@@ -79,6 +79,20 @@ const argv = process.argv.slice(2);
|
|
|
79
79
|
const sub = argv[0] || 'status';
|
|
80
80
|
const noOpen = argv.includes('--no-open');
|
|
81
81
|
|
|
82
|
+
// Optional `--port N` override (documented in the usage banner). When set we
|
|
83
|
+
// forward it to the daemon via IJFW_DASHBOARD_PORT so the server's port-walk
|
|
84
|
+
// starts there instead of the default 37891. Unset = current behaviour. This
|
|
85
|
+
// also makes the dashboard testable on a scratch port without touching 37891.
|
|
86
|
+
function parsePortFlag(args) {
|
|
87
|
+
const i = args.indexOf('--port');
|
|
88
|
+
if (i !== -1 && args[i + 1]) {
|
|
89
|
+
const n = parseInt(args[i + 1], 10);
|
|
90
|
+
if (Number.isInteger(n) && n > 0 && n < 65536) return String(n);
|
|
91
|
+
}
|
|
92
|
+
return null;
|
|
93
|
+
}
|
|
94
|
+
const portOverride = parsePortFlag(argv);
|
|
95
|
+
|
|
82
96
|
if (sub === 'start') {
|
|
83
97
|
ensureDir();
|
|
84
98
|
const existingPid = readPid();
|
|
@@ -97,7 +111,12 @@ if (sub === 'start') {
|
|
|
97
111
|
const child = spawn(process.execPath, [SERVER_JS, '--daemon'], {
|
|
98
112
|
detached: true,
|
|
99
113
|
stdio: ['ignore', 'ignore', 'ignore'],
|
|
100
|
-
env: {
|
|
114
|
+
env: {
|
|
115
|
+
...process.env,
|
|
116
|
+
IJFW_PID_FILE: PID_FILE,
|
|
117
|
+
IJFW_PORT_FILE: PORT_FILE,
|
|
118
|
+
...(portOverride ? { IJFW_DASHBOARD_PORT: portOverride } : {}),
|
|
119
|
+
},
|
|
101
120
|
});
|
|
102
121
|
child.unref();
|
|
103
122
|
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@ijfw/memory-server",
|
|
3
|
-
"version": "1.
|
|
3
|
+
"version": "1.6.1",
|
|
4
4
|
"description": "Cross-platform persistent memory server for IJFW. 14 MCP tools (memory + admin/update + brain). Works with 15 platforms: 14 via MCP (Claude Code, Codex, Gemini CLI, Cursor, Windsurf, Copilot, Hermes, Wayland, OpenCode, QwenCode, Cline, KimiCode, OpenClaw, Antigravity) plus Aider via the rules-only tier.",
|
|
5
5
|
"author": "Sean Donahoe",
|
|
6
6
|
"contributors": [
|
|
@@ -28,10 +28,11 @@
|
|
|
28
28
|
"scripts": {
|
|
29
29
|
"start": "node src/server.js",
|
|
30
30
|
"dev": "node --watch src/server.js",
|
|
31
|
-
"test": "node test.js && node --experimental-sqlite --test --test-force-exit test-*.js test/brain/test-*.js test/memory/test-*.js test/integration/test-*.js",
|
|
31
|
+
"test": "node test.js && node --experimental-sqlite --test --test-force-exit test-*.js test/brain/test-*.js test/memory/test-*.js test/integration/test-*.js test/profile-*.test.mjs src/profile/eval/*.test.mjs",
|
|
32
32
|
"test:smoke": "node test.js",
|
|
33
33
|
"test:full": "node --experimental-sqlite --test --test-force-exit test-*.js test/brain/test-*.js test/memory/test-*.js test/integration/test-*.js",
|
|
34
|
-
"test:graders": "node test/grade-symbol-graph-spec.js && node test/grade-symbol-graph-consistency.js && node test/grade-cascading-staleness.js && node test/grade-project-types.js"
|
|
34
|
+
"test:graders": "node test/grade-symbol-graph-spec.js && node test/grade-symbol-graph-consistency.js && node test/grade-cascading-staleness.js && node test/grade-project-types.js",
|
|
35
|
+
"functional-smoke": "node scripts/run-functional-smoke.mjs"
|
|
35
36
|
},
|
|
36
37
|
"engines": {
|
|
37
38
|
"node": ">=18.0.0"
|
package/src/audit-roster.js
CHANGED
|
@@ -86,8 +86,24 @@ export const ROSTER = [
|
|
|
86
86
|
family: 'google',
|
|
87
87
|
model: '',
|
|
88
88
|
name: 'Gemini CLI',
|
|
89
|
-
|
|
90
|
-
|
|
89
|
+
// v1.6.0 xaudit-fix — STALE-ARGV repair. Bare `gemini` BREAKS on the
|
|
90
|
+
// current CLI (verified gemini-cli 0.43.0) for two reasons:
|
|
91
|
+
// 1. Trusted-directory gate (added like codex's --skip-git-repo-check):
|
|
92
|
+
// a non-interactive invoke outside a "trusted" folder exits immediately
|
|
93
|
+
// with "Gemini CLI is not running in a trusted directory" and emits ZERO
|
|
94
|
+
// output. --skip-trust bypasses it (env GEMINI_CLI_TRUST_WORKSPACE=true
|
|
95
|
+
// is the alternative). Without it the CLI path NEVER succeeds; runs were
|
|
96
|
+
// only ever rescued by the API fallback (every historical receipt shows
|
|
97
|
+
// gemini source='api', never 'cli').
|
|
98
|
+
// 2. Recursive IJFW-MCP autostart: when the ijfw gemini extension is
|
|
99
|
+
// loaded, `gemini` autostarts the IJFW MCP server (the same self-
|
|
100
|
+
// referential hang codex dodges via mcp_servers.ijfw-memory.enabled=false).
|
|
101
|
+
// `-e none` loads zero extensions, removing the autostart latency/noise.
|
|
102
|
+
// Prompt is delivered on stdin: piped non-TTY stdin triggers headless mode,
|
|
103
|
+
// so no -p value is needed (and `-p` with no value is a yargs error under
|
|
104
|
+
// whitespace-split argv). Verified returning findings end-to-end 2026-06-08.
|
|
105
|
+
invoke: 'gemini --skip-trust -e none',
|
|
106
|
+
note: 'Strong on security + architectural patterns. Reads the prompt from piped stdin in headless mode. --skip-trust bypasses the trusted-directory gate (gemini-cli >=0.43); -e none disables extensions so the IJFW MCP server is not recursively autostarted.',
|
|
91
107
|
detect: (env) => Boolean(env.GEMINI_CLI || env.GOOGLE_CLOUD_PROJECT_GEMINI) || /gemini-cli/i.test(env._ || ''),
|
|
92
108
|
// model is resolved at call-time via model-refresh.js (24h-cached probe).
|
|
93
109
|
get apiFallback() {
|
|
@@ -99,8 +115,22 @@ export const ROSTER = [
|
|
|
99
115
|
family: 'oss',
|
|
100
116
|
model: '',
|
|
101
117
|
name: 'Qwen Code',
|
|
102
|
-
|
|
103
|
-
|
|
118
|
+
// v1.6.0 xaudit-fix — STALE-ARGV repair. `qwen -p` (no value) is wrong on
|
|
119
|
+
// current Qwen Code (verified 0.15.6): `-p/--prompt` is now DEPRECATED and a
|
|
120
|
+
// bare `-p` with no argument is a yargs error under whitespace-split argv.
|
|
121
|
+
// The prompt is delivered on stdin (this fork ignores the positional `query`
|
|
122
|
+
// and prints "No input provided via stdin" if nothing is piped). Bare
|
|
123
|
+
// `qwen <stdin>` ALSO fails silently: the implicit startup auto-discovery
|
|
124
|
+
// runs an approval/confirmation flow that auto-cancels under non-TTY stdin
|
|
125
|
+
// and emits only "Operation cancelled." with ZERO findings (the gemini-cli-
|
|
126
|
+
// fork analog of gemini's trusted-directory gate). `--bare` skips that
|
|
127
|
+
// auto-discovery and `--yolo` auto-approves all actions, so the headless run
|
|
128
|
+
// reaches the model — or fails FAST + CLEAN on auth ("No auth type is
|
|
129
|
+
// selected ... before running in non-interactive mode" in ~1s when neither a
|
|
130
|
+
// qwen-oauth login nor DASHSCOPE_API_KEY is configured). Verified 2026-06-08:
|
|
131
|
+
// `--bare --yolo` errors cleanly on auth in 1s instead of the silent cancel.
|
|
132
|
+
invoke: 'qwen --bare --yolo',
|
|
133
|
+
note: 'Apache-2.0 weights (Qwen3-Coder-480B-A35B), agentic-tuned (~67% SWE-Bench Verified). Fork of gemini-cli; supports qwen-oauth (free Coding Plan tier), plus openai/anthropic/gemini auth-types via `qwen auth`. Reads the prompt from piped stdin; --bare skips the startup auto-discovery that auto-cancels under non-TTY stdin, --yolo auto-approves. Diversity value for Trident: third independent training lineage outside openai/google.',
|
|
104
134
|
detect: (env) => Boolean(env.QWEN_SESSION) || /(?:^|\W)qwen(?:\W|$)/i.test(env._ || ''),
|
|
105
135
|
apiFallback: { provider: 'openai-compat', model: 'qwen3-coder-plus', authEnv: 'DASHSCOPE_API_KEY', endpoint: 'https://dashscope-intl.aliyuncs.com/compatible-mode/v1/chat/completions' },
|
|
106
136
|
},
|
|
@@ -119,8 +149,18 @@ export const ROSTER = [
|
|
|
119
149
|
family: 'oss',
|
|
120
150
|
model: '',
|
|
121
151
|
name: 'Kimi (Moonshot)',
|
|
122
|
-
|
|
123
|
-
|
|
152
|
+
// v1.6.0 xaudit-fix — STALE-ARGV repair. Bare `kimi` (verified 1.38.0) is a
|
|
153
|
+
// Click-style `kimi [OPTIONS] COMMAND` agent that PROMPTS INTERACTIVELY by
|
|
154
|
+
// default (`--prompt ... Default: prompt interactively`), so a piped prompt
|
|
155
|
+
// on stdin would stall waiting on a TTY. `--print` is the non-interactive
|
|
156
|
+
// "print mode" (it implicitly adds `--yolo`) and, with `--input-format text`
|
|
157
|
+
// (the default), the input is read from piped stdin. `--quiet` is the shipped
|
|
158
|
+
// alias for `--print --output-format text --final-message-only`, which prints
|
|
159
|
+
// ONLY the final assistant message to stdout (the "resume session" hint goes
|
|
160
|
+
// to stderr, keeping stdout clean for fence parsing). Verified 2026-06-08:
|
|
161
|
+
// `kimi --print --quiet` returned a clean JSON fence with both seeded bugs.
|
|
162
|
+
invoke: 'kimi --print --quiet',
|
|
163
|
+
note: 'Moonshot AI Kimi K2 series (Chinese open-source lineage, separate from DeepSeek). Long-context strength makes it useful for whole-file or whole-module audits where context window matters. `kimi --print --quiet` is the headless mode (reads the prompt from piped stdin, prints only the final assistant message). OpenAI-compatible API via platform.moonshot.ai. Detection is left at false because no canonical session env var ships with Kimi today -- prefer double-coverage over false self-exclusion.',
|
|
124
164
|
detect: () => false,
|
|
125
165
|
apiFallback: { provider: 'openai-compat', model: 'kimi-k2.6', authEnv: 'MOONSHOT_API_KEY', endpoint: 'https://api.moonshot.ai/v1/chat/completions' },
|
|
126
166
|
},
|
|
@@ -129,8 +169,16 @@ export const ROSTER = [
|
|
|
129
169
|
family: 'oss',
|
|
130
170
|
model: '',
|
|
131
171
|
name: 'opencode',
|
|
132
|
-
|
|
133
|
-
|
|
172
|
+
// v1.6.0 xaudit-fix — STALE-ARGV repair. Bare `opencode` launches the
|
|
173
|
+
// INTERACTIVE TUI (verified opencode 1.15.11): it renders a full-screen
|
|
174
|
+
// ANSI app and never consumes the piped prompt, so every cross-audit run
|
|
175
|
+
// hung until the per-auditor SIGKILL timeout (status='timeout'). opencode
|
|
176
|
+
// has NO apiFallback, so there was no rescue path — opencode was 100%
|
|
177
|
+
// broken for every user. The non-interactive subcommand is `opencode run`,
|
|
178
|
+
// which reads the prompt from stdin and prints the model reply to stdout
|
|
179
|
+
// (verified returning a clean response 2026-06-08).
|
|
180
|
+
invoke: 'opencode run',
|
|
181
|
+
note: 'OSS / local-friendly; good when privacy matters. `opencode run` is the headless subcommand (bare `opencode` opens the interactive TUI and would hang).',
|
|
134
182
|
detect: (env) => Boolean(env.OPENCODE_SESSION || env.OPENCODE_HOME),
|
|
135
183
|
apiFallback: null,
|
|
136
184
|
},
|
|
@@ -149,8 +197,20 @@ export const ROSTER = [
|
|
|
149
197
|
family: 'openai',
|
|
150
198
|
model: '',
|
|
151
199
|
name: 'Copilot CLI',
|
|
152
|
-
|
|
153
|
-
|
|
200
|
+
// v1.6.0 xaudit-fix — STALE-ARGV repair. `gh copilot suggest` was the OLD
|
|
201
|
+
// `gh` CLI extension, which only echoes a single shell-command suggestion
|
|
202
|
+
// (not a code audit) and is a different binary entirely. The standalone
|
|
203
|
+
// GitHub Copilot CLI (`copilot`, verified 1.0.59) is the real headless
|
|
204
|
+
// auditor: `-p/--prompt` runs a non-interactive scripting turn, and a bare
|
|
205
|
+
// `-p` (no inline value) consumes the prompt from piped stdin. Non-
|
|
206
|
+
// interactive mode REQUIRES `--allow-all-tools` (per the CLI's own help:
|
|
207
|
+
// "required for non-interactive mode"), otherwise it blocks on a tool-
|
|
208
|
+
// permission confirmation. Verified 2026-06-08: `copilot -p --allow-all-tools`
|
|
209
|
+
// reaches the service in seconds and fails FAST + CLEAN when the account's
|
|
210
|
+
// Copilot is org-policy-restricted (an environmental auth/entitlement error,
|
|
211
|
+
// not a hang). detect() left on GH_COPILOT_TOKEN/COPILOT_CLI_SESSION.
|
|
212
|
+
invoke: 'copilot -p --allow-all-tools',
|
|
213
|
+
note: 'Standalone GitHub Copilot CLI (`copilot`, not the old `gh copilot` extension). `-p` reads the prompt from piped stdin; `--allow-all-tools` is required for non-interactive mode. Convenient if Copilot CLI is already authenticated and not org-policy-restricted.',
|
|
154
214
|
detect: (env) => Boolean(env.GH_COPILOT_TOKEN || env.COPILOT_CLI_SESSION),
|
|
155
215
|
apiFallback: null,
|
|
156
216
|
},
|
|
@@ -159,8 +219,14 @@ export const ROSTER = [
|
|
|
159
219
|
family: 'anthropic',
|
|
160
220
|
model: '',
|
|
161
221
|
name: 'Claude Code',
|
|
222
|
+
// `claude -p` (== --print) is the headless mode: reads the prompt from piped
|
|
223
|
+
// stdin and prints the reply to stdout. Verified 2026-06-08 returning fast +
|
|
224
|
+
// clean (4s) — in this repo claude is the CALLER (CLAUDECODE=1), so detect()
|
|
225
|
+
// below correctly self-excludes it from its own Trident; the invoke is only
|
|
226
|
+
// exercised when a NON-claude caller picks claude as an auditor. When claude
|
|
227
|
+
// IS a valid (non-self) auditor, `claude -p` is the correct current form.
|
|
162
228
|
invoke: 'claude -p',
|
|
163
|
-
note: 'Anthropic; useful when you want a second Claude pass in a fresh session.',
|
|
229
|
+
note: 'Anthropic; useful when you want a second Claude pass in a fresh session. `claude -p` reads the prompt from piped stdin (headless --print mode). Self-excluded via detect() when claude is the caller.',
|
|
164
230
|
detect: (env) => Boolean(env.CLAUDECODE || env.CLAUDE_CODE_ENTRYPOINT || env.CLAUDE_PLUGIN_ROOT),
|
|
165
231
|
// model is resolved at call-time via model-refresh.js (24h-cached probe).
|
|
166
232
|
get apiFallback() {
|
|
@@ -206,7 +272,18 @@ export function isInstalled(id) {
|
|
|
206
272
|
const bin = entry.invoke.split(/\s+/)[0];
|
|
207
273
|
// POSIX `command -v` is the portable existence check; bash builtin form
|
|
208
274
|
// works reliably across macOS + Linux. spawnSync exit code = 0 → present.
|
|
209
|
-
|
|
275
|
+
//
|
|
276
|
+
// v1.6.0 diag-fix — `command -v` returns success for a regular file on PATH
|
|
277
|
+
// even when it is NOT executable, so a dangling/non-+x file shadowing a real
|
|
278
|
+
// auditor name made doctor report `cli_installed: true` for something that
|
|
279
|
+
// CANNOT actually be invoked (the "present-claimed but invocation-broken"
|
|
280
|
+
// false report). We now additionally require the resolved target to pass
|
|
281
|
+
// `[ -x ]` (or be a shell builtin/keyword/function with no filesystem path,
|
|
282
|
+
// which `command -v` reports without a leading slash — those are genuinely
|
|
283
|
+
// runnable). A real installed CLI is an executable file and still passes.
|
|
284
|
+
const probe = `p=$(command -v ${JSON.stringify(bin)} 2>/dev/null) || exit 1; ` +
|
|
285
|
+
`case "$p" in /*) [ -x "$p" ] ;; *) : ;; esac`;
|
|
286
|
+
const r = spawnSync('bash', ['-lc', probe], { timeout: 2000 });
|
|
210
287
|
const installed = r.status === 0;
|
|
211
288
|
_installedCache.set(id, { value: installed, ts: Date.now() });
|
|
212
289
|
return installed;
|
package/src/brain/tiered-llm.js
CHANGED
|
@@ -26,7 +26,7 @@ export function resolveTierModel(tier, env = process.env) {
|
|
|
26
26
|
throw new Error(`tiered-llm: unknown tier '${tier}'`);
|
|
27
27
|
}
|
|
28
28
|
|
|
29
|
-
function defaultCallers() {
|
|
29
|
+
export function defaultCallers() {
|
|
30
30
|
return {
|
|
31
31
|
async local({ url, model, prompt, maxTokens }) {
|
|
32
32
|
// Ollama-compatible /api/generate -- streamless single-response mode.
|
|
@@ -39,8 +39,48 @@ function defaultCallers() {
|
|
|
39
39
|
const data = await res.json();
|
|
40
40
|
return { text: data.response || '', usage: { input: data.prompt_eval_count, output: data.eval_count }, model, via: 'local' };
|
|
41
41
|
},
|
|
42
|
-
async
|
|
42
|
+
async openaiLocal({ url, model, prompt, maxTokens, temperature }) {
|
|
43
|
+
// OpenAI-compatible /chat/completions -- used by the bench to grade on a
|
|
44
|
+
// LOCAL vLLM-served synth model. `url` already includes the API base
|
|
45
|
+
// (e.g. http://localhost:8000/v1). enable_thinking:false is REQUIRED:
|
|
46
|
+
// Qwen3.6 is a hybrid-reasoning model that otherwise emits a thinking
|
|
47
|
+
// trace instead of the answer; vLLM passes this through to the chat
|
|
48
|
+
// template. NO silent fallback to a cloud model -- callTiered routes here
|
|
49
|
+
// WITHOUT a try/catch so a local-synth failure surfaces honestly.
|
|
50
|
+
const body = {
|
|
51
|
+
model,
|
|
52
|
+
messages: [{ role: 'user', content: prompt }],
|
|
53
|
+
max_tokens: maxTokens,
|
|
54
|
+
chat_template_kwargs: { enable_thinking: false },
|
|
55
|
+
};
|
|
56
|
+
if (typeof temperature === 'number') body.temperature = temperature;
|
|
57
|
+
const res = await fetch(url.replace(/\/$/, '') + '/chat/completions', {
|
|
58
|
+
method: 'POST',
|
|
59
|
+
headers: { 'Content-Type': 'application/json' },
|
|
60
|
+
body: JSON.stringify(body),
|
|
61
|
+
});
|
|
62
|
+
if (!res.ok) throw new Error(`openai-local LLM HTTP ${res.status}`);
|
|
63
|
+
const data = await res.json();
|
|
64
|
+
const choice = data.choices && data.choices[0];
|
|
65
|
+
if (!choice || !choice.message) throw new Error('openai-local LLM: missing choice in response');
|
|
66
|
+
const usage = data.usage || {};
|
|
67
|
+
return {
|
|
68
|
+
text: choice.message.content || '',
|
|
69
|
+
usage: { input: usage.prompt_tokens, output: usage.completion_tokens },
|
|
70
|
+
model,
|
|
71
|
+
via: 'openai-local',
|
|
72
|
+
};
|
|
73
|
+
},
|
|
74
|
+
async anthropic({ model, prompt, maxTokens, apiKey, temperature }) {
|
|
43
75
|
if (!apiKey) throw new Error('tiered-llm: ANTHROPIC_API_KEY (or IJFW_BRAIN_API_KEY) required for Anthropic fallback');
|
|
76
|
+
const payload = {
|
|
77
|
+
model,
|
|
78
|
+
max_tokens: maxTokens,
|
|
79
|
+
messages: [{ role: 'user', content: prompt }],
|
|
80
|
+
};
|
|
81
|
+
// Optional, backward-compatible: omitted -> API default. Used by the
|
|
82
|
+
// benchmark harness to pin temperature:0 for deterministic answers.
|
|
83
|
+
if (typeof temperature === 'number') payload.temperature = temperature;
|
|
44
84
|
const res = await fetch('https://api.anthropic.com/v1/messages', {
|
|
45
85
|
method: 'POST',
|
|
46
86
|
headers: {
|
|
@@ -48,11 +88,7 @@ function defaultCallers() {
|
|
|
48
88
|
'x-api-key': apiKey,
|
|
49
89
|
'anthropic-version': '2023-06-01',
|
|
50
90
|
},
|
|
51
|
-
body: JSON.stringify(
|
|
52
|
-
model,
|
|
53
|
-
max_tokens: maxTokens,
|
|
54
|
-
messages: [{ role: 'user', content: prompt }],
|
|
55
|
-
}),
|
|
91
|
+
body: JSON.stringify(payload),
|
|
56
92
|
});
|
|
57
93
|
if (!res.ok) throw new Error(`Anthropic HTTP ${res.status}`);
|
|
58
94
|
const data = await res.json();
|
|
@@ -67,6 +103,19 @@ export async function callTiered(tier, prompt, opts = {}) {
|
|
|
67
103
|
const model = resolveTierModel(tier, env);
|
|
68
104
|
const maxTokens = opts.maxTokens || DEFAULT_MAX_TOKENS[tier] || 512;
|
|
69
105
|
const callers = opts._callers || defaultCallers();
|
|
106
|
+
// Opt-in OpenAI-compatible local synth (bench): point at a vLLM server.
|
|
107
|
+
// FAILS LOUD by design -- no try/catch, no Anthropic fallback. If this
|
|
108
|
+
// errors, the bench must error too rather than silently grade on a cloud
|
|
109
|
+
// model from a different family (which would corrupt the experiment).
|
|
110
|
+
if (env.IJFW_BENCH_SYNTH_URL) {
|
|
111
|
+
return callers.openaiLocal({
|
|
112
|
+
url: env.IJFW_BENCH_SYNTH_URL,
|
|
113
|
+
model,
|
|
114
|
+
prompt,
|
|
115
|
+
maxTokens,
|
|
116
|
+
temperature: opts.temperature,
|
|
117
|
+
});
|
|
118
|
+
}
|
|
70
119
|
if (env.IJFW_BRAIN_LOCAL_URL) {
|
|
71
120
|
try {
|
|
72
121
|
return await callers.local({ url: env.IJFW_BRAIN_LOCAL_URL, model, prompt, maxTokens });
|
|
@@ -79,5 +128,6 @@ export async function callTiered(tier, prompt, opts = {}) {
|
|
|
79
128
|
prompt,
|
|
80
129
|
maxTokens,
|
|
81
130
|
apiKey: env.IJFW_BRAIN_API_KEY || env.ANTHROPIC_API_KEY,
|
|
131
|
+
temperature: opts.temperature,
|
|
82
132
|
});
|
|
83
133
|
}
|