@agjs/tsforge 0.1.15 → 0.1.16

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "@agjs/tsforge",
3
3
  "type": "module",
4
- "version": "0.1.15",
4
+ "version": "0.1.16",
5
5
  "license": "MIT",
6
6
  "description": "TypeScript coding harness with a deterministic gate, stack-aware guardrails, and stream-level correction.",
7
7
  "repository": {
package/src/cli.ts CHANGED
@@ -11,6 +11,7 @@ import {
11
11
  } from "./loop";
12
12
  import {
13
13
  PROVIDER_LIMITS,
14
+ PROVIDER_DEFAULTS,
14
15
  OpenAICompatibleProvider,
15
16
  type IOpenAICompatibleConfig,
16
17
  } from "./inference";
@@ -261,7 +262,12 @@ async function detectContextWindow(
261
262
 
262
263
  const entries = data.data.filter(isRecord);
263
264
  const match = entries.find((e) => e.id === entry.model) ?? entries[0];
264
- const len = match?.max_model_len;
265
+ // vLLM uses `max_model_len`; other servers expose `context_window` or
266
+ // `max_position_embeddings` — accept whichever is present.
267
+ const len =
268
+ match?.max_model_len ??
269
+ match?.context_window ??
270
+ match?.max_position_embeddings;
265
271
 
266
272
  return typeof len === "number" && Number.isFinite(len) ? len : undefined;
267
273
  } catch {
@@ -328,6 +334,16 @@ export function providerConfig(entry: IModelEntry): IOpenAICompatibleConfig {
328
334
  // instead of emitting tool calls (→ no files written). The StreamGuard is
329
335
  // the targeted loop protection. Opt in only to experiment.
330
336
  ...(repetitionPenalty === undefined ? {} : { repetitionPenalty }),
337
+ // Provider dialect + escape hatches — passed straight through so any
338
+ // OpenAI-ish endpoint (DeepSeek, OpenAI o-series, custom gateways) works.
339
+ ...(entry.reasoning === undefined ? {} : { reasoning: entry.reasoning }),
340
+ ...(entry.reasoningEffort === undefined
341
+ ? {}
342
+ : { reasoningEffort: entry.reasoningEffort }),
343
+ ...(entry.extraBody === undefined ? {} : { extraBody: entry.extraBody }),
344
+ ...(entry.extraHeaders === undefined
345
+ ? {}
346
+ : { extraHeaders: entry.extraHeaders }),
331
347
  };
332
348
  }
333
349
 
@@ -335,6 +351,26 @@ function makeProvider(entry: IModelEntry): OpenAICompatibleProvider {
335
351
  return new OpenAICompatibleProvider(providerConfig(entry));
336
352
  }
337
353
 
354
+ /** Catch the common footgun: a cloud baseUrl paired with the leftover qwen
355
+ * default `model`, which then 400s ("model not supported") on that host. */
356
+ function warnDefaultModelOnRemote(entry: IModelEntry): void {
357
+ let host: string;
358
+
359
+ try {
360
+ host = new URL(entry.baseUrl).hostname;
361
+ } catch {
362
+ return;
363
+ }
364
+
365
+ const remote = host !== "localhost" && host !== "127.0.0.1" && host !== "::1";
366
+
367
+ if (remote && entry.model === PROVIDER_DEFAULTS.model) {
368
+ process.stdout.write(
369
+ ` ⚠ models.json: model is still "${PROVIDER_DEFAULTS.model}" (the default) but baseUrl is ${host} — set the entry's "model" to a name that host supports.\n`
370
+ );
371
+ }
372
+ }
373
+
338
374
  /** Print the model registry with ★ on the active one (the `/model` listing). */
339
375
  async function listModels(
340
376
  provider: OpenAICompatibleProvider,
@@ -784,6 +820,8 @@ async function repl(args: ICliArgs): Promise<number> {
784
820
  const provider = makeProvider(activeModel.entry);
785
821
  let activeName = activeModel.name;
786
822
 
823
+ warnDefaultModelOnRemote(activeModel.entry);
824
+
787
825
  // Best-effort cleanup of stale sessions on every launch.
788
826
  await pruneSessions();
789
827
 
@@ -118,6 +118,26 @@ export interface IOpenAICompatibleConfig {
118
118
  * correctness. Omitted (1.0 = off) by default; set it on code-gen providers.
119
119
  */
120
120
  repetitionPenalty?: number;
121
+ /**
122
+ * How this provider wants reasoning/thinking expressed on the wire:
123
+ * - `qwen` (default): `chat_template_kwargs.enable_thinking` + `thinking_token_budget` (vLLM).
124
+ * - `deepseek`: top-level `thinking: { type }` + `reasoning_effort`; never sends
125
+ * `tool_choice: "required"` (DeepSeek's thinking mode rejects it).
126
+ * - `openai`: `reasoning_effort`; uses `max_completion_tokens` and omits `temperature` (o-series).
127
+ * - `none`: no reasoning fields.
128
+ */
129
+ reasoning?: ReasoningStyle;
130
+ /** Reasoning effort for `deepseek`/`openai` styles (maps to `reasoning_effort`). */
131
+ reasoningEffort?: "low" | "medium" | "high";
132
+ /** Arbitrary fields merged into the request body LAST (override anything above) —
133
+ * the escape hatch for any provider-specific param. */
134
+ extraBody?: Record<string, unknown>;
135
+ /** Arbitrary request headers (e.g. Azure `api-key`, Anthropic `x-api-key`).
136
+ * `${VAR}` in values is interpolated from the environment. */
137
+ extraHeaders?: Record<string, string>;
121
138
  /** Injectable for tests; defaults to global fetch. */
122
139
  fetch?: typeof fetch;
123
140
  }
141
+
142
+ /** Provider reasoning-param dialect. */
143
+ export type ReasoningStyle = "qwen" | "deepseek" | "openai" | "none";
@@ -7,8 +7,13 @@ import type {
7
7
  } from "./inference.types";
8
8
  import { PROVIDER_LIMITS } from "./inference.constants";
9
9
  import { fetchWithRetry } from "./transport";
10
- import { toWire, parseResponse } from "./wire";
10
+ import { parseResponse } from "./wire";
11
11
  import { streamResponse } from "./stream";
12
+ import {
13
+ buildRequestBody,
14
+ buildRequestHeaders,
15
+ chatCompletionsUrl,
16
+ } from "./request";
12
17
 
13
18
  export { salvageToolCalls } from "./wire";
14
19
 
@@ -40,38 +45,10 @@ export class OpenAICompatibleProvider implements IProvider {
40
45
  ): Promise<IModelResponse> {
41
46
  const doFetch = this.cfg.fetch ?? fetch;
42
47
  const streaming = opts.onToken !== undefined;
43
- const headers: Record<string, string> = {
44
- "content-type": "application/json",
45
- };
46
-
47
- if (this.cfg.apiKey !== undefined) {
48
- headers.authorization = `Bearer ${this.cfg.apiKey}`;
49
- }
50
-
51
- const body = JSON.stringify({
52
- model: this.cfg.model,
53
- messages: messages.map(toWire),
54
- max_tokens: this.cfg.maxTokens ?? PROVIDER_LIMITS.maxTokens,
55
- temperature: opts.temperature,
56
- ...(this.cfg.repetitionPenalty === undefined
57
- ? {}
58
- : { repetition_penalty: this.cfg.repetitionPenalty }),
59
- ...(opts.tools === undefined
60
- ? {}
61
- : { tools: opts.tools, tool_choice: opts.toolChoice ?? "auto" }),
62
- ...(opts.enableThinking === undefined
63
- ? {}
64
- : { chat_template_kwargs: { enable_thinking: opts.enableThinking } }),
65
- ...(opts.thinkingTokenBudget === undefined
66
- ? {}
67
- : { thinking_token_budget: opts.thinkingTokenBudget }),
68
- // include_usage → the stream emits a final chunk carrying token `usage`
69
- // (otherwise a streamed response reports none). Non-stream replies carry it
70
- // by default.
71
- ...(streaming
72
- ? { stream: true, stream_options: { include_usage: true } }
73
- : {}),
74
- });
48
+ const headers = buildRequestHeaders(this.cfg);
49
+ const body = JSON.stringify(
50
+ buildRequestBody(this.cfg, messages, opts, streaming)
51
+ );
75
52
 
76
53
  // Retry transient CONNECTION blips (socket close / unable-to-connect) — the
77
54
  // connect happens before any stream starts, so retrying is safe for both
@@ -79,7 +56,7 @@ export class OpenAICompatibleProvider implements IProvider {
79
56
  // a network hiccup from wrecking an eval run.
80
57
  const res = await fetchWithRetry(
81
58
  doFetch,
82
- `${this.cfg.baseUrl}/chat/completions`,
59
+ chatCompletionsUrl(this.cfg.baseUrl),
83
60
  headers,
84
61
  body,
85
62
  this.cfg.timeoutMs ?? PROVIDER_LIMITS.requestTimeoutMs,
@@ -0,0 +1,148 @@
1
+ import type {
2
+ IChatMessage,
3
+ ICompleteOptions,
4
+ IOpenAICompatibleConfig,
5
+ ReasoningStyle,
6
+ } from "./inference.types";
7
+ import { PROVIDER_LIMITS } from "./inference.constants";
8
+ import { toWire } from "./wire";
9
+
10
+ /** Interpolate `${VAR}` references from `env` into a string (missing → ""). */
11
+ function interpolateEnv(
12
+ value: string,
13
+ env: Readonly<Record<string, string | undefined>>
14
+ ): string {
15
+ return value.replace(
16
+ /\$\{([A-Za-z0-9_]+)\}/g,
17
+ (_m: string, name: string) => env[name] ?? ""
18
+ );
19
+ }
20
+
21
+ function style(cfg: IOpenAICompatibleConfig): ReasoningStyle {
22
+ return cfg.reasoning ?? "qwen";
23
+ }
24
+
25
+ /** Provider-specific reasoning/thinking fields for the request body. */
26
+ function reasoningFields(
27
+ cfg: IOpenAICompatibleConfig,
28
+ opts: ICompleteOptions
29
+ ): Record<string, unknown> {
30
+ switch (style(cfg)) {
31
+ case "qwen":
32
+ return {
33
+ ...(opts.enableThinking === undefined
34
+ ? {}
35
+ : { chat_template_kwargs: { enable_thinking: opts.enableThinking } }),
36
+ ...(opts.thinkingTokenBudget === undefined
37
+ ? {}
38
+ : { thinking_token_budget: opts.thinkingTokenBudget }),
39
+ };
40
+ case "deepseek":
41
+ return {
42
+ ...(opts.enableThinking === undefined
43
+ ? {}
44
+ : {
45
+ thinking: {
46
+ type: opts.enableThinking ? "enabled" : "disabled",
47
+ },
48
+ }),
49
+ ...(cfg.reasoningEffort === undefined
50
+ ? {}
51
+ : { reasoning_effort: cfg.reasoningEffort }),
52
+ };
53
+ case "openai":
54
+ return cfg.reasoningEffort === undefined
55
+ ? {}
56
+ : { reasoning_effort: cfg.reasoningEffort };
57
+ case "none":
58
+ return {};
59
+ }
60
+ }
61
+
62
+ /** The output-token cap field — o-series renamed `max_tokens` → `max_completion_tokens`. */
63
+ function tokenCapField(cfg: IOpenAICompatibleConfig): Record<string, number> {
64
+ const max = cfg.maxTokens ?? PROVIDER_LIMITS.maxTokens;
65
+
66
+ return style(cfg) === "openai"
67
+ ? { max_completion_tokens: max }
68
+ : { max_tokens: max };
69
+ }
70
+
71
+ /** Tool-choice clamped for provider constraints: DeepSeek's thinking mode rejects
72
+ * `tool_choice: "required"`, so downgrade it to `"auto"` there. */
73
+ function toolChoiceFor(
74
+ cfg: IOpenAICompatibleConfig,
75
+ requested: "auto" | "required" | "none"
76
+ ): "auto" | "required" | "none" {
77
+ if (style(cfg) === "deepseek" && requested === "required") {
78
+ return "auto";
79
+ }
80
+
81
+ return requested;
82
+ }
83
+
84
+ /** Build the request body object (pure). Field order keeps the qwen default
85
+ * byte-for-byte identical; `extraBody` is merged last so it can override
86
+ * anything for a fully custom provider. */
87
+ export function buildRequestBody(
88
+ cfg: IOpenAICompatibleConfig,
89
+ messages: IChatMessage[],
90
+ opts: ICompleteOptions,
91
+ streaming: boolean
92
+ ): Record<string, unknown> {
93
+ // o-series rejects `temperature` entirely; everywhere else send it only when set.
94
+ const omitTemperature =
95
+ style(cfg) === "openai" || opts.temperature === undefined;
96
+
97
+ return {
98
+ model: cfg.model,
99
+ messages: messages.map(toWire),
100
+ ...tokenCapField(cfg),
101
+ ...(omitTemperature ? {} : { temperature: opts.temperature }),
102
+ ...(cfg.repetitionPenalty === undefined
103
+ ? {}
104
+ : { repetition_penalty: cfg.repetitionPenalty }),
105
+ ...(opts.tools === undefined
106
+ ? {}
107
+ : {
108
+ tools: opts.tools,
109
+ tool_choice: toolChoiceFor(cfg, opts.toolChoice ?? "auto"),
110
+ }),
111
+ ...reasoningFields(cfg, opts),
112
+ ...(streaming
113
+ ? { stream: true, stream_options: { include_usage: true } }
114
+ : {}),
115
+ ...(cfg.extraBody ?? {}),
116
+ };
117
+ }
118
+
119
+ /** Build request headers: JSON + Bearer auth (when a key is set) + any
120
+ * `extraHeaders` (with `${VAR}` interpolation), which can override the defaults. */
121
+ export function buildRequestHeaders(
122
+ cfg: IOpenAICompatibleConfig,
123
+ env: Readonly<Record<string, string | undefined>> = process.env
124
+ ): Record<string, string> {
125
+ const headers: Record<string, string> = {
126
+ "content-type": "application/json",
127
+ };
128
+
129
+ if (cfg.apiKey !== undefined) {
130
+ headers.authorization = `Bearer ${cfg.apiKey}`;
131
+ }
132
+
133
+ for (const [key, value] of Object.entries(cfg.extraHeaders ?? {})) {
134
+ headers[key] = interpolateEnv(value, env);
135
+ }
136
+
137
+ return headers;
138
+ }
139
+
140
+ /** Normalize the chat-completions URL: trim trailing slashes and don't
141
+ * double-append when the baseUrl already ends with the path. */
142
+ export function chatCompletionsUrl(baseUrl: string): string {
143
+ const trimmed = baseUrl.replace(/\/+$/, "");
144
+
145
+ return trimmed.endsWith("/chat/completions")
146
+ ? trimmed
147
+ : `${trimmed}/chat/completions`;
148
+ }
@@ -3,6 +3,7 @@ import { join } from "node:path";
3
3
  import { mkdir, readFile, writeFile, chmod } from "node:fs/promises";
4
4
  import { isRecord } from "./lib/guards";
5
5
  import { PROVIDER_DEFAULTS } from "./inference/inference.constants";
6
+ import type { ReasoningStyle } from "./inference/inference.types";
6
7
 
7
8
  /**
8
9
  * The model registry — `~/.tsforge/models.json`, the central place a user
@@ -28,6 +29,18 @@ export interface IModelEntry {
28
29
  thinking?: boolean;
29
30
  /** Per-response token cap override. */
30
31
  maxTokens?: number;
32
+ /** Provider reasoning dialect: how thinking/reasoning is expressed on the wire.
33
+ * `qwen` (default) | `deepseek` | `openai` | `none`. Set `deepseek` for the
34
+ * DeepSeek API, `openai` for OpenAI o-series. */
35
+ reasoning?: ReasoningStyle;
36
+ /** Reasoning effort for `deepseek`/`openai` styles. */
37
+ reasoningEffort?: "low" | "medium" | "high";
38
+ /** Arbitrary fields merged into the request body (override built-ins) — the
39
+ * escape hatch for any provider-specific param. */
40
+ extraBody?: Record<string, unknown>;
41
+ /** Arbitrary request headers (e.g. a non-Bearer auth scheme); `${VAR}` values
42
+ * are interpolated from the environment. */
43
+ extraHeaders?: Record<string, string>;
31
44
  }
32
45
 
33
46
  export interface IModelsConfig {