@agjs/tsforge 0.1.14 → 0.1.16

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,389 @@
1
+ // Eval sweep: run a seed spec N times across temperature + feature flag variants, score, tabulate.
2
+ // Run: TSFORGE_SEED=money TSFORGE_TEMPS=0,0.5 TSFORGE_REPEATS=3 bun run packages/core/scripts/sweep.ts
3
+ // A/B feature variants:
4
+ // TSFORGE_FEATURE_VARIANTS=ttsr,hashline (sweep across feature toggles)
5
+ // Each variant is dim=on|off (e.g. ttsr=on×hashline=off) creating a cartesian product.
6
+ import { mkdir, readdir, rm, stat } from "node:fs/promises";
7
+ import { join } from "node:path";
8
+ import { parseSpec } from "../src/spec";
9
+ import { buildGate, prettierWriteCommand } from "../src/detect-gate";
10
+ import { runSpec, qualityRepair } from "../src/loop";
11
+ import { modelAgent } from "../src/agent";
12
+ import { OpenAICompatibleProvider } from "../src/inference";
13
+ import { resolveActiveModel, resolveApiKey } from "../src/models-config";
14
+ import { summarize, type IRunRecord } from "../src/eval";
15
+ import { renderEvent } from "../src/render";
16
+ import type { ILoopEvent } from "../src/loop";
17
+
18
+ const seed = process.env.TSFORGE_SEED ?? "todo";
19
+ const temps = (process.env.TSFORGE_TEMPS ?? "0,0.5")
20
+ .split(",")
21
+ .map((t) => Number(t.trim()));
22
+ const repeats = Number(process.env.TSFORGE_REPEATS ?? "3");
23
+ // Default quiet (batch). Set TSFORGE_STREAM=1 to watch the model live.
24
+ const stream = process.env.TSFORGE_STREAM === "1";
25
+ const qualityTarget = Number(process.env.TSFORGE_QUALITY_TARGET ?? "5");
26
+ const qualityAttempts = Number(process.env.TSFORGE_QUALITY_ATTEMPTS ?? "2");
27
+
28
+ /** Feature variants to sweep: a cartesian product of feature dimensions.
29
+ * Example: `ttsr,hashline` → generates [ttsr=on×hashline=on, ttsr=on×hashline=off,
30
+ * ttsr=off×hashline=on, ttsr=off×hashline=off]. Each dimension toggles via env var. */
31
+ type IFeatureVariant = Record<string, string>;
32
+
33
+ function parseFeatureVariants(): IFeatureVariant[] {
34
+ const featureDims = (process.env.TSFORGE_FEATURE_VARIANTS ?? "")
35
+ .split(",")
36
+ .map((s) => s.trim())
37
+ .filter((s) => s.length > 0);
38
+
39
+ if (featureDims.length === 0) {
40
+ return [{}]; // No features to sweep → one baseline variant
41
+ }
42
+
43
+ // Cartesian product: each dimension has 2 states (on=1, off=0).
44
+ const variants: IFeatureVariant[] = [];
45
+ const numVariants = Math.pow(2, featureDims.length);
46
+
47
+ for (let i = 0; i < numVariants; i++) {
48
+ const variant: IFeatureVariant = {};
49
+
50
+ for (let d = 0; d < featureDims.length; d++) {
51
+ const dim = featureDims[d];
52
+
53
+ if (dim !== undefined) {
54
+ const state = (i >> d) & 1; // Bit d of i → dimension d state
55
+
56
+ variant[dim] = state === 1 ? "1" : "0";
57
+ }
58
+ }
59
+
60
+ variants.push(variant);
61
+ }
62
+
63
+ return variants;
64
+ }
65
+
66
+ /** Map feature variant to env vars. Each feature dim maps to a TSFORGE_* var. */
67
+ function variantToEnvVars(variant: IFeatureVariant): Record<string, string> {
68
+ const envVars: Record<string, string> = {};
69
+
70
+ for (const [dim, state] of Object.entries(variant)) {
71
+ if (dim === "ttsr") {
72
+ envVars.TSFORGE_TTSR = state === "1" ? "1" : "0";
73
+ } else if (dim === "hashline") {
74
+ envVars.TSFORGE_HASHLINE = state === "1" ? "1" : "0";
75
+ } else if (dim === "lsp_write_feedback") {
76
+ envVars.TSFORGE_LSP_WRITE_FEEDBACK = state === "1" ? "1" : "0";
77
+ }
78
+ // else: unknown dimension, skip
79
+ }
80
+
81
+ return envVars;
82
+ }
83
+
84
+ /** Variant label for logging: e.g. "ttsr=on,hashline=off". */
85
+ function variantLabel(variant: IFeatureVariant): string {
86
+ const parts = Object.entries(variant)
87
+ .sort(([a], [b]) => a.localeCompare(b))
88
+ .map(([dim, state]) => `${dim}=${state === "1" ? "on" : "off"}`);
89
+
90
+ return parts.length > 0 ? parts.join(",") : "baseline";
91
+ }
92
+
93
+ const featureVariants = parseFeatureVariants();
94
+
95
+ const evalsRoot = join(import.meta.dir, "..", "..", "..", "evals");
96
+ // Prefer a local working seed (evals/<seed>); fall back to the committed corpus
97
+ // (evals/corpus/<seed>) so checked-in seeds run with no manual copy step.
98
+ const localSeedDir = join(evalsRoot, seed);
99
+ const seedDir = (await Bun.file(join(localSeedDir, `${seed}.spec.md`)).exists())
100
+ ? localSeedDir
101
+ : join(evalsRoot, "corpus", seed);
102
+ // Recursive so nested-directory apps (e.g. a React app under `src/`) copy whole;
103
+ // flat single-dir evals are unaffected (recursive readdir returns the same list).
104
+ const seedFiles = await readdir(seedDir, { recursive: true });
105
+
106
+ // Resolve the model the same way the CLI does: explicit TSFORGE_* env wins, else
107
+ // the active entry from ~/.tsforge/models.json. (Previously this hardcoded the
108
+ // localhost default and ignored the registry, so a sweep silently dialed an
109
+ // unreachable endpoint and hung with an empty run.log.)
110
+ const { entry: activeModel } = await resolveActiveModel();
111
+
112
+ const provider = new OpenAICompatibleProvider({
113
+ baseUrl: activeModel.baseUrl,
114
+ model: activeModel.model,
115
+ apiKey: resolveApiKey(activeModel),
116
+ // Thinking tokens count against the limit, so give reasoning + code room.
117
+ maxTokens: Number(process.env.TSFORGE_MAX_TOKENS ?? "16384"),
118
+ // Opt-in only: a repetition penalty breaks rare temp-0 loops but DEGRADES
119
+ // algorithmic code (it made `money` write unsafe/any code that failed the
120
+ // strict gate). Default off; enable via env if a target genuinely loops.
121
+ repetitionPenalty:
122
+ process.env.TSFORGE_REPETITION_PENALTY === undefined
123
+ ? undefined
124
+ : Number(process.env.TSFORGE_REPETITION_PENALTY),
125
+ });
126
+
127
+ // The judge scores quality. Point it at a flagship via TSFORGE_JUDGE_URL/MODEL
128
+ // (+ TSFORGE_JUDGE_KEY) to measure the gap; defaults to the active model judging itself.
129
+ const judgeProvider = new OpenAICompatibleProvider({
130
+ baseUrl: process.env.TSFORGE_JUDGE_URL ?? activeModel.baseUrl,
131
+ model: process.env.TSFORGE_JUDGE_MODEL ?? activeModel.model,
132
+ apiKey: process.env.TSFORGE_JUDGE_KEY ?? resolveApiKey(activeModel),
133
+ });
134
+
135
+ /** Sortable timestamp `YYYYMMDD-HHMMSS` so run dirs sort newest-last by name. */
136
+ function stamp(): string {
137
+ const d = new Date();
138
+ const p = (n: number): string => String(n).padStart(2, "0");
139
+
140
+ return `${d.getFullYear()}${p(d.getMonth() + 1)}${p(d.getDate())}-${p(d.getHours())}${p(d.getMinutes())}${p(d.getSeconds())}`;
141
+ }
142
+
143
+ const records: IRunRecord[] = [];
144
+
145
+ for (const variant of featureVariants) {
146
+ const variantEnv = variantToEnvVars(variant);
147
+ const vLabel = variantLabel(variant);
148
+
149
+ for (const temp of temps) {
150
+ for (let i = 0; i < repeats; i += 1) {
151
+ const runId = `${seed}-${vLabel}-t${temp}-${stamp()}-${i + 1}`;
152
+ const runDir = join(evalsRoot, "runs", runId);
153
+
154
+ // One run's failure (e.g. a request timing out) must not abort the sweep —
155
+ // record it as a blocked run and carry on, so a long batch is resilient.
156
+ try {
157
+ await runOne(runId, runDir, temp, i, variantEnv);
158
+ } catch (err) {
159
+ const message = err instanceof Error ? err.message : String(err);
160
+
161
+ records.push({
162
+ label: `${vLabel} temp=${temp}`,
163
+ passed: false,
164
+ cycles: 0,
165
+ ms: 0,
166
+ });
167
+ process.stdout.write(
168
+ ` ${seed} ${vLabel} temp=${temp} #${i + 1}: ERRORED (${message}) → ${runId}\n`
169
+ );
170
+ }
171
+ }
172
+ }
173
+ }
174
+
175
+ /** Set env vars for a variant, returning a restore function. */
176
+ function setVariantEnv(variant: Record<string, string>): () => void {
177
+ const saved: Record<string, string | undefined> = {};
178
+
179
+ for (const [key, value] of Object.entries(variant)) {
180
+ saved[key] = process.env[key];
181
+ process.env[key] = value;
182
+ }
183
+
184
+ return () => {
185
+ for (const [key, value] of Object.entries(saved)) {
186
+ if (value === undefined) {
187
+ // Rather than delete, we just don't restore the var.
188
+ // It was undefined before, so it stays undefined.
189
+ continue;
190
+ }
191
+
192
+ process.env[key] = value;
193
+ }
194
+ };
195
+ }
196
+
197
+ /** Copy seed files and prepare the run directory. */
198
+ async function setupRunDir(dir: string): Promise<void> {
199
+ await mkdir(dir, { recursive: true });
200
+
201
+ for (const file of seedFiles) {
202
+ const src = join(seedDir, file);
203
+
204
+ if ((await stat(src)).isDirectory()) {
205
+ continue;
206
+ }
207
+
208
+ await Bun.write(join(dir, file), Bun.file(src));
209
+ }
210
+ }
211
+
212
+ /** Remove task files in scratch mode (keep in existing mode). */
213
+ async function startRed(
214
+ dir: string,
215
+ spec: ReturnType<typeof parseSpec>
216
+ ): Promise<void> {
217
+ if (spec.mode !== "existing") {
218
+ for (const task of spec.tasks) {
219
+ for (const f of task.files) {
220
+ await rm(join(dir, f), { force: true });
221
+ }
222
+ }
223
+ }
224
+ }
225
+
226
+ async function runOne(
227
+ runId: string,
228
+ runDir: string,
229
+ temp: number,
230
+ i: number,
231
+ variantEnv: Record<string, string> = {}
232
+ ): Promise<void> {
233
+ const restore = setVariantEnv(variantEnv);
234
+
235
+ try {
236
+ await setupRunDir(runDir);
237
+
238
+ const spec = parseSpec(
239
+ await Bun.file(join(runDir, `${seed}.spec.md`)).text()
240
+ );
241
+
242
+ await startRed(runDir, spec);
243
+
244
+ // Apply tsforge's STRICT FLOOR (bundled tsc-strict + eslint) to the eval
245
+ // gate — the SAME gate the interactive CLI builds. Eval mode otherwise
246
+ // trusts the spec's `accept` verbatim, so an error the tests don't execute
247
+ // (an unguarded index access, an `as any`) slipped through as GREEN. Now
248
+ // every task and the whole-spec verify must clear the strict floor BEFORE
249
+ // its functional tests count.
250
+ // prettier --write FIRST (auto-format), then tsc-strict + eslint. The model
251
+ // never hand-formats, but the gate still enforces type-safety + idioms.
252
+ const strictGate = `${prettierWriteCommand()} && ${(await buildGate(runDir)).command}`;
253
+ const gatedSpec = {
254
+ ...spec,
255
+ tasks: spec.tasks.map((t) => ({
256
+ ...t,
257
+ accept: `${strictGate} && ${t.accept}`,
258
+ })),
259
+ verify:
260
+ spec.verify.length > 0 ? `${strictGate} && ${spec.verify}` : strictGate,
261
+ };
262
+
263
+ // Every run gets a full transcript at <runDir>/run.log; stream to the
264
+ // terminal too when TSFORGE_STREAM=1.
265
+ const log = Bun.file(join(runDir, "run.log")).writer();
266
+
267
+ const onEvent = (e: ILoopEvent): void => {
268
+ void log.write(renderEvent(e, { color: false }));
269
+ // Flush per event — otherwise Bun's FileSink buffers and `tail -f` shows
270
+ // nothing until the run ends. The log must be live.
271
+ void log.flush();
272
+
273
+ if (stream) {
274
+ process.stdout.write(renderEvent(e, { color: true }));
275
+ }
276
+ };
277
+
278
+ const agent = modelAgent(provider, {
279
+ temperature: temp,
280
+ ...(process.env.TSFORGE_THINKING_BUDGET === undefined
281
+ ? {}
282
+ : { thinkingTokenBudget: Number(process.env.TSFORGE_THINKING_BUDGET) }),
283
+ });
284
+ const started = performance.now();
285
+ const result = await runSpec(gatedSpec, runDir, provider, {
286
+ onEvent,
287
+ temperature: temp,
288
+ // Cap reasoning per call to trim turn time — A/B the sweet spot via env.
289
+ ...(process.env.TSFORGE_THINKING_BUDGET === undefined
290
+ ? {}
291
+ : { thinkingTokenBudget: Number(process.env.TSFORGE_THINKING_BUDGET) }),
292
+ });
293
+
294
+ const ms = Math.round(performance.now() - started);
295
+ const cycles = result.results.reduce((acc, r) => acc + r.cycles, 0);
296
+ const passed = result.status === "done";
297
+
298
+ // Once green, drive QUALITY up: judge → improve-per-critique → re-judge.
299
+ let quality: number | undefined;
300
+ let judgeNotes = "";
301
+ const firstTask = spec.tasks[0];
302
+
303
+ if (passed && firstTask !== undefined) {
304
+ const specText = await Bun.file(join(runDir, `${seed}.spec.md`)).text();
305
+
306
+ // The judge is a MEASUREMENT, not part of the build. If it fails (e.g. the
307
+ // server times out), the implement result still stands — degrade to
308
+ // "quality unknown" rather than erroring out a successful run.
309
+ try {
310
+ const qr = await qualityRepair(
311
+ firstTask,
312
+ runDir,
313
+ agent,
314
+ judgeProvider,
315
+ { goal: spec.title, criteria: specText },
316
+ { target: qualityTarget, maxAttempts: qualityAttempts, onEvent }
317
+ );
318
+
319
+ quality = qr.quality;
320
+ judgeNotes = qr.notes;
321
+ } catch (err) {
322
+ judgeNotes = `judge unavailable: ${err instanceof Error ? err.message : String(err)}`;
323
+ }
324
+ }
325
+
326
+ await log.end();
327
+
328
+ // Structured per-run artifact for comparison alongside run.log + the code.
329
+ // Include the feature variant so analysis can reconstruct the conditions.
330
+ await Bun.write(
331
+ join(runDir, "result.json"),
332
+ JSON.stringify(
333
+ {
334
+ seed,
335
+ runId,
336
+ temperature: temp,
337
+ features: variantEnv,
338
+ status: result.status,
339
+ cycles,
340
+ ms,
341
+ quality,
342
+ judgeNotes,
343
+ tasks: result.results,
344
+ },
345
+ null,
346
+ 2
347
+ )
348
+ );
349
+
350
+ const edits = result.results.reduce((a, r) => a + (r.edits ?? 0), 0);
351
+ const regressions = result.results.reduce(
352
+ (a, r) => a + (r.regressions ?? 0),
353
+ 0
354
+ );
355
+
356
+ const vLabel = variantLabel(variantEnv);
357
+
358
+ records.push({
359
+ label: `${vLabel} temp=${temp}`,
360
+ passed,
361
+ cycles,
362
+ ms,
363
+ quality,
364
+ });
365
+ process.stdout.write(
366
+ ` ${seed} ${vLabel} temp=${temp} #${i + 1}: ${passed ? "done" : "blocked"} (${cycles} cyc, ${edits} edits, ${regressions} regress, ${ms}ms${quality === undefined ? "" : `, Q${quality}/5`}) → ${runId}\n`
367
+ );
368
+ } finally {
369
+ restore();
370
+ }
371
+ }
372
+
373
+ const summaries = summarize(records);
374
+
375
+ process.stdout.write(`\n=== sweep: ${seed} (${repeats} runs/variant) ===\n`);
376
+
377
+ for (const s of summaries) {
378
+ process.stdout.write(
379
+ `${s.label.padEnd(10)} pass ${Math.round(s.passRate * 100)}% (${s.passed}/${s.runs}) Q ${s.avgQuality.toFixed(1)}/5 avg ${s.avgCycles.toFixed(1)} cyc ${Math.round(s.avgMs)}ms\n`
380
+ );
381
+ }
382
+
383
+ const outPath = join(evalsRoot, "runs", `sweep-${seed}-${stamp()}.json`);
384
+
385
+ await Bun.write(
386
+ outPath,
387
+ JSON.stringify({ seed, temps, repeats, records, summaries }, null, 2)
388
+ );
389
+ process.stdout.write(`\nsaved ${outPath}\n`);
package/src/cli.ts CHANGED
@@ -11,6 +11,7 @@ import {
11
11
  } from "./loop";
12
12
  import {
13
13
  PROVIDER_LIMITS,
14
+ PROVIDER_DEFAULTS,
14
15
  OpenAICompatibleProvider,
15
16
  type IOpenAICompatibleConfig,
16
17
  } from "./inference";
@@ -261,7 +262,12 @@ async function detectContextWindow(
261
262
 
262
263
  const entries = data.data.filter(isRecord);
263
264
  const match = entries.find((e) => e.id === entry.model) ?? entries[0];
264
- const len = match?.max_model_len;
265
+ // vLLM uses `max_model_len`; other servers expose `context_window` or
266
+ // `max_position_embeddings` — accept whichever is present.
267
+ const len =
268
+ match?.max_model_len ??
269
+ match?.context_window ??
270
+ match?.max_position_embeddings;
265
271
 
266
272
  return typeof len === "number" && Number.isFinite(len) ? len : undefined;
267
273
  } catch {
@@ -328,6 +334,16 @@ export function providerConfig(entry: IModelEntry): IOpenAICompatibleConfig {
328
334
  // instead of emitting tool calls (→ no files written). The StreamGuard is
329
335
  // the targeted loop protection. Opt in only to experiment.
330
336
  ...(repetitionPenalty === undefined ? {} : { repetitionPenalty }),
337
+ // Provider dialect + escape hatches — passed straight through so any
338
+ // OpenAI-ish endpoint (DeepSeek, OpenAI o-series, custom gateways) works.
339
+ ...(entry.reasoning === undefined ? {} : { reasoning: entry.reasoning }),
340
+ ...(entry.reasoningEffort === undefined
341
+ ? {}
342
+ : { reasoningEffort: entry.reasoningEffort }),
343
+ ...(entry.extraBody === undefined ? {} : { extraBody: entry.extraBody }),
344
+ ...(entry.extraHeaders === undefined
345
+ ? {}
346
+ : { extraHeaders: entry.extraHeaders }),
331
347
  };
332
348
  }
333
349
 
@@ -335,6 +351,26 @@ function makeProvider(entry: IModelEntry): OpenAICompatibleProvider {
335
351
  return new OpenAICompatibleProvider(providerConfig(entry));
336
352
  }
337
353
 
354
+ /** Catch the common footgun: a cloud baseUrl paired with the leftover qwen
355
+ * default `model`, which then 400s ("model not supported") on that host. */
356
+ function warnDefaultModelOnRemote(entry: IModelEntry): void {
357
+ let host: string;
358
+
359
+ try {
360
+ host = new URL(entry.baseUrl).hostname;
361
+ } catch {
362
+ return;
363
+ }
364
+
365
+ const remote = host !== "localhost" && host !== "127.0.0.1" && host !== "::1";
366
+
367
+ if (remote && entry.model === PROVIDER_DEFAULTS.model) {
368
+ process.stdout.write(
369
+ ` ⚠ models.json: model is still "${PROVIDER_DEFAULTS.model}" (the default) but baseUrl is ${host} — set the entry's "model" to a name that host supports.\n`
370
+ );
371
+ }
372
+ }
373
+
338
374
  /** Print the model registry with ★ on the active one (the `/model` listing). */
339
375
  async function listModels(
340
376
  provider: OpenAICompatibleProvider,
@@ -784,6 +820,8 @@ async function repl(args: ICliArgs): Promise<number> {
784
820
  const provider = makeProvider(activeModel.entry);
785
821
  let activeName = activeModel.name;
786
822
 
823
+ warnDefaultModelOnRemote(activeModel.entry);
824
+
787
825
  // Best-effort cleanup of stale sessions on every launch.
788
826
  await pruneSessions();
789
827
 
@@ -118,6 +118,26 @@ export interface IOpenAICompatibleConfig {
118
118
  * correctness. Omitted (1.0 = off) by default; set it on code-gen providers.
119
119
  */
120
120
  repetitionPenalty?: number;
121
+ /**
122
+ * How this provider wants reasoning/thinking expressed on the wire:
123
+ * - `qwen` (default): `chat_template_kwargs.enable_thinking` + `thinking_token_budget` (vLLM).
124
+ * - `deepseek`: top-level `thinking: { type }` + `reasoning_effort`; never sends
125
+ * `tool_choice: "required"` (DeepSeek's thinking mode rejects it).
126
+ * - `openai`: `reasoning_effort`; uses `max_completion_tokens` and omits `temperature` (o-series).
127
+ * - `none`: no reasoning fields.
128
+ */
129
+ reasoning?: ReasoningStyle;
130
+ /** Reasoning effort for `deepseek`/`openai` styles (maps to `reasoning_effort`). */
131
+ reasoningEffort?: "low" | "medium" | "high";
132
+ /** Arbitrary fields merged into the request body LAST (override anything above) —
133
+ * the escape hatch for any provider-specific param. */
134
+ extraBody?: Record<string, unknown>;
135
+ /** Arbitrary request headers (e.g. Azure `api-key`, Anthropic `x-api-key`).
136
+ * `${VAR}` in values is interpolated from the environment. */
137
+ extraHeaders?: Record<string, string>;
121
138
  /** Injectable for tests; defaults to global fetch. */
122
139
  fetch?: typeof fetch;
123
140
  }
141
+
142
+ /** Provider reasoning-param dialect. */
143
+ export type ReasoningStyle = "qwen" | "deepseek" | "openai" | "none";
@@ -7,8 +7,13 @@ import type {
7
7
  } from "./inference.types";
8
8
  import { PROVIDER_LIMITS } from "./inference.constants";
9
9
  import { fetchWithRetry } from "./transport";
10
- import { toWire, parseResponse } from "./wire";
10
+ import { parseResponse } from "./wire";
11
11
  import { streamResponse } from "./stream";
12
+ import {
13
+ buildRequestBody,
14
+ buildRequestHeaders,
15
+ chatCompletionsUrl,
16
+ } from "./request";
12
17
 
13
18
  export { salvageToolCalls } from "./wire";
14
19
 
@@ -40,38 +45,10 @@ export class OpenAICompatibleProvider implements IProvider {
40
45
  ): Promise<IModelResponse> {
41
46
  const doFetch = this.cfg.fetch ?? fetch;
42
47
  const streaming = opts.onToken !== undefined;
43
- const headers: Record<string, string> = {
44
- "content-type": "application/json",
45
- };
46
-
47
- if (this.cfg.apiKey !== undefined) {
48
- headers.authorization = `Bearer ${this.cfg.apiKey}`;
49
- }
50
-
51
- const body = JSON.stringify({
52
- model: this.cfg.model,
53
- messages: messages.map(toWire),
54
- max_tokens: this.cfg.maxTokens ?? PROVIDER_LIMITS.maxTokens,
55
- temperature: opts.temperature,
56
- ...(this.cfg.repetitionPenalty === undefined
57
- ? {}
58
- : { repetition_penalty: this.cfg.repetitionPenalty }),
59
- ...(opts.tools === undefined
60
- ? {}
61
- : { tools: opts.tools, tool_choice: opts.toolChoice ?? "auto" }),
62
- ...(opts.enableThinking === undefined
63
- ? {}
64
- : { chat_template_kwargs: { enable_thinking: opts.enableThinking } }),
65
- ...(opts.thinkingTokenBudget === undefined
66
- ? {}
67
- : { thinking_token_budget: opts.thinkingTokenBudget }),
68
- // include_usage → the stream emits a final chunk carrying token `usage`
69
- // (otherwise a streamed response reports none). Non-stream replies carry it
70
- // by default.
71
- ...(streaming
72
- ? { stream: true, stream_options: { include_usage: true } }
73
- : {}),
74
- });
48
+ const headers = buildRequestHeaders(this.cfg);
49
+ const body = JSON.stringify(
50
+ buildRequestBody(this.cfg, messages, opts, streaming)
51
+ );
75
52
 
76
53
  // Retry transient CONNECTION blips (socket close / unable-to-connect) — the
77
54
  // connect happens before any stream starts, so retrying is safe for both
@@ -79,7 +56,7 @@ export class OpenAICompatibleProvider implements IProvider {
79
56
  // a network hiccup from wrecking an eval run.
80
57
  const res = await fetchWithRetry(
81
58
  doFetch,
82
- `${this.cfg.baseUrl}/chat/completions`,
59
+ chatCompletionsUrl(this.cfg.baseUrl),
83
60
  headers,
84
61
  body,
85
62
  this.cfg.timeoutMs ?? PROVIDER_LIMITS.requestTimeoutMs,