npm - @agjs/tsforge - Versions diffs - 0.1.15 → 0.1.16 - Mend

@agjs/tsforge 0.1.15 → 0.1.16

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

package/package.json +1 -1
package/src/cli.ts +39 -1
package/src/inference/inference.types.ts +20 -0
package/src/inference/openai-compatible.ts +11 -34
package/src/inference/request.ts +148 -0
package/src/models-config.ts +13 -0

package/package.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "name": "@agjs/tsforge",
   "type": "module",
-  "version": "0.1.15",
+  "version": "0.1.16",
   "license": "MIT",
   "description": "TypeScript coding harness with a deterministic gate, stack-aware guardrails, and stream-level correction.",
   "repository": {

package/src/cli.ts CHANGED Viewed

@@ -11,6 +11,7 @@ import {
 } from "./loop";
 import {
   PROVIDER_LIMITS,
+  PROVIDER_DEFAULTS,
   OpenAICompatibleProvider,
   type IOpenAICompatibleConfig,
 } from "./inference";
@@ -261,7 +262,12 @@ async function detectContextWindow(
     const entries = data.data.filter(isRecord);
     const match = entries.find((e) => e.id === entry.model) ?? entries[0];
-    const len = match?.max_model_len;
+    // vLLM uses `max_model_len`; other servers expose `context_window` or
+    // `max_position_embeddings` — accept whichever is present.
+    const len =
+      match?.max_model_len ??
+      match?.context_window ??
+      match?.max_position_embeddings;
     return typeof len === "number" && Number.isFinite(len) ? len : undefined;
   } catch {
@@ -328,6 +334,16 @@ export function providerConfig(entry: IModelEntry): IOpenAICompatibleConfig {
     // instead of emitting tool calls (→ no files written). The StreamGuard is
     // the targeted loop protection. Opt in only to experiment.
     ...(repetitionPenalty === undefined ? {} : { repetitionPenalty }),
+    // Provider dialect + escape hatches — passed straight through so any
+    // OpenAI-ish endpoint (DeepSeek, OpenAI o-series, custom gateways) works.
+    ...(entry.reasoning === undefined ? {} : { reasoning: entry.reasoning }),
+    ...(entry.reasoningEffort === undefined
+      ? {}
+      : { reasoningEffort: entry.reasoningEffort }),
+    ...(entry.extraBody === undefined ? {} : { extraBody: entry.extraBody }),
+    ...(entry.extraHeaders === undefined
+      ? {}
+      : { extraHeaders: entry.extraHeaders }),
   };
 }
@@ -335,6 +351,26 @@ function makeProvider(entry: IModelEntry): OpenAICompatibleProvider {
   return new OpenAICompatibleProvider(providerConfig(entry));
 }
+/** Catch the common footgun: a cloud baseUrl paired with the leftover qwen
+ *  default `model`, which then 400s ("model not supported") on that host. */
+function warnDefaultModelOnRemote(entry: IModelEntry): void {
+  let host: string;
+  try {
+    host = new URL(entry.baseUrl).hostname;
+  } catch {
+    return;
+  }
+  const remote = host !== "localhost" && host !== "127.0.0.1" && host !== "::1";
+  if (remote && entry.model === PROVIDER_DEFAULTS.model) {
+    process.stdout.write(
+      `  ⚠ models.json: model is still "${PROVIDER_DEFAULTS.model}" (the default) but baseUrl is ${host} — set the entry's "model" to a name that host supports.\n`
+    );
+  }
+}
 /** Print the model registry with ★ on the active one (the `/model` listing). */
 async function listModels(
   provider: OpenAICompatibleProvider,
@@ -784,6 +820,8 @@ async function repl(args: ICliArgs): Promise<number> {
   const provider = makeProvider(activeModel.entry);
   let activeName = activeModel.name;
+  warnDefaultModelOnRemote(activeModel.entry);
   // Best-effort cleanup of stale sessions on every launch.
   await pruneSessions();

package/src/inference/inference.types.ts CHANGED Viewed

@@ -118,6 +118,26 @@ export interface IOpenAICompatibleConfig {
    * correctness. Omitted (1.0 = off) by default; set it on code-gen providers.
    */
   repetitionPenalty?: number;
+  /**
+   * How this provider wants reasoning/thinking expressed on the wire:
+   *  - `qwen` (default): `chat_template_kwargs.enable_thinking` + `thinking_token_budget` (vLLM).
+   *  - `deepseek`: top-level `thinking: { type }` + `reasoning_effort`; never sends
+   *    `tool_choice: "required"` (DeepSeek's thinking mode rejects it).
+   *  - `openai`: `reasoning_effort`; uses `max_completion_tokens` and omits `temperature` (o-series).
+   *  - `none`: no reasoning fields.
+   */
+  reasoning?: ReasoningStyle;
+  /** Reasoning effort for `deepseek`/`openai` styles (maps to `reasoning_effort`). */
+  reasoningEffort?: "low" | "medium" | "high";
+  /** Arbitrary fields merged into the request body LAST (override anything above) —
+   *  the escape hatch for any provider-specific param. */
+  extraBody?: Record<string, unknown>;
+  /** Arbitrary request headers (e.g. Azure `api-key`, Anthropic `x-api-key`).
+   *  `${VAR}` in values is interpolated from the environment. */
+  extraHeaders?: Record<string, string>;
   /** Injectable for tests; defaults to global fetch. */
   fetch?: typeof fetch;
 }
+/** Provider reasoning-param dialect. */
+export type ReasoningStyle = "qwen" | "deepseek" | "openai" | "none";

package/src/inference/openai-compatible.ts CHANGED Viewed

@@ -7,8 +7,13 @@ import type {
 } from "./inference.types";
 import { PROVIDER_LIMITS } from "./inference.constants";
 import { fetchWithRetry } from "./transport";
-import { toWire, parseResponse } from "./wire";
+import { parseResponse } from "./wire";
 import { streamResponse } from "./stream";
+import {
+  buildRequestBody,
+  buildRequestHeaders,
+  chatCompletionsUrl,
+} from "./request";
 export { salvageToolCalls } from "./wire";
@@ -40,38 +45,10 @@ export class OpenAICompatibleProvider implements IProvider {
   ): Promise<IModelResponse> {
     const doFetch = this.cfg.fetch ?? fetch;
     const streaming = opts.onToken !== undefined;
-    const headers: Record<string, string> = {
-      "content-type": "application/json",
-    };
-    if (this.cfg.apiKey !== undefined) {
-      headers.authorization = `Bearer ${this.cfg.apiKey}`;
-    }
-    const body = JSON.stringify({
-      model: this.cfg.model,
-      messages: messages.map(toWire),
-      max_tokens: this.cfg.maxTokens ?? PROVIDER_LIMITS.maxTokens,
-      temperature: opts.temperature,
-      ...(this.cfg.repetitionPenalty === undefined
-        ? {}
-        : { repetition_penalty: this.cfg.repetitionPenalty }),
-      ...(opts.tools === undefined
-        ? {}
-        : { tools: opts.tools, tool_choice: opts.toolChoice ?? "auto" }),
-      ...(opts.enableThinking === undefined
-        ? {}
-        : { chat_template_kwargs: { enable_thinking: opts.enableThinking } }),
-      ...(opts.thinkingTokenBudget === undefined
-        ? {}
-        : { thinking_token_budget: opts.thinkingTokenBudget }),
-      // include_usage → the stream emits a final chunk carrying token `usage`
-      // (otherwise a streamed response reports none). Non-stream replies carry it
-      // by default.
-      ...(streaming
-        ? { stream: true, stream_options: { include_usage: true } }
-        : {}),
-    });
+    const headers = buildRequestHeaders(this.cfg);
+    const body = JSON.stringify(
+      buildRequestBody(this.cfg, messages, opts, streaming)
+    );
     // Retry transient CONNECTION blips (socket close / unable-to-connect) — the
     // connect happens before any stream starts, so retrying is safe for both
@@ -79,7 +56,7 @@ export class OpenAICompatibleProvider implements IProvider {
     // a network hiccup from wrecking an eval run.
     const res = await fetchWithRetry(
       doFetch,
-      `${this.cfg.baseUrl}/chat/completions`,
+      chatCompletionsUrl(this.cfg.baseUrl),
       headers,
       body,
       this.cfg.timeoutMs ?? PROVIDER_LIMITS.requestTimeoutMs,

package/src/inference/request.ts ADDED Viewed

@@ -0,0 +1,148 @@
+import type {
+  IChatMessage,
+  ICompleteOptions,
+  IOpenAICompatibleConfig,
+  ReasoningStyle,
+} from "./inference.types";
+import { PROVIDER_LIMITS } from "./inference.constants";
+import { toWire } from "./wire";
+/** Interpolate `${VAR}` references from `env` into a string (missing → ""). */
+function interpolateEnv(
+  value: string,
+  env: Readonly<Record<string, string | undefined>>
+): string {
+  return value.replace(
+    /\$\{([A-Za-z0-9_]+)\}/g,
+    (_m: string, name: string) => env[name] ?? ""
+  );
+}
+function style(cfg: IOpenAICompatibleConfig): ReasoningStyle {
+  return cfg.reasoning ?? "qwen";
+}
+/** Provider-specific reasoning/thinking fields for the request body. */
+function reasoningFields(
+  cfg: IOpenAICompatibleConfig,
+  opts: ICompleteOptions
+): Record<string, unknown> {
+  switch (style(cfg)) {
+    case "qwen":
+      return {
+        ...(opts.enableThinking === undefined
+          ? {}
+          : { chat_template_kwargs: { enable_thinking: opts.enableThinking } }),
+        ...(opts.thinkingTokenBudget === undefined
+          ? {}
+          : { thinking_token_budget: opts.thinkingTokenBudget }),
+      };
+    case "deepseek":
+      return {
+        ...(opts.enableThinking === undefined
+          ? {}
+          : {
+              thinking: {
+                type: opts.enableThinking ? "enabled" : "disabled",
+              },
+            }),
+        ...(cfg.reasoningEffort === undefined
+          ? {}
+          : { reasoning_effort: cfg.reasoningEffort }),
+      };
+    case "openai":
+      return cfg.reasoningEffort === undefined
+        ? {}
+        : { reasoning_effort: cfg.reasoningEffort };
+    case "none":
+      return {};
+  }
+}
+/** The output-token cap field — o-series renamed `max_tokens` → `max_completion_tokens`. */
+function tokenCapField(cfg: IOpenAICompatibleConfig): Record<string, number> {
+  const max = cfg.maxTokens ?? PROVIDER_LIMITS.maxTokens;
+  return style(cfg) === "openai"
+    ? { max_completion_tokens: max }
+    : { max_tokens: max };
+}
+/** Tool-choice clamped for provider constraints: DeepSeek's thinking mode rejects
+ *  `tool_choice: "required"`, so downgrade it to `"auto"` there. */
+function toolChoiceFor(
+  cfg: IOpenAICompatibleConfig,
+  requested: "auto" | "required" | "none"
+): "auto" | "required" | "none" {
+  if (style(cfg) === "deepseek" && requested === "required") {
+    return "auto";
+  }
+  return requested;
+}
+/** Build the request body object (pure). Field order keeps the qwen default
+ *  byte-for-byte identical; `extraBody` is merged last so it can override
+ *  anything for a fully custom provider. */
+export function buildRequestBody(
+  cfg: IOpenAICompatibleConfig,
+  messages: IChatMessage[],
+  opts: ICompleteOptions,
+  streaming: boolean
+): Record<string, unknown> {
+  // o-series rejects `temperature` entirely; everywhere else send it only when set.
+  const omitTemperature =
+    style(cfg) === "openai" || opts.temperature === undefined;
+  return {
+    model: cfg.model,
+    messages: messages.map(toWire),
+    ...tokenCapField(cfg),
+    ...(omitTemperature ? {} : { temperature: opts.temperature }),
+    ...(cfg.repetitionPenalty === undefined
+      ? {}
+      : { repetition_penalty: cfg.repetitionPenalty }),
+    ...(opts.tools === undefined
+      ? {}
+      : {
+          tools: opts.tools,
+          tool_choice: toolChoiceFor(cfg, opts.toolChoice ?? "auto"),
+        }),
+    ...reasoningFields(cfg, opts),
+    ...(streaming
+      ? { stream: true, stream_options: { include_usage: true } }
+      : {}),
+    ...(cfg.extraBody ?? {}),
+  };
+}
+/** Build request headers: JSON + Bearer auth (when a key is set) + any
+ *  `extraHeaders` (with `${VAR}` interpolation), which can override the defaults. */
+export function buildRequestHeaders(
+  cfg: IOpenAICompatibleConfig,
+  env: Readonly<Record<string, string | undefined>> = process.env
+): Record<string, string> {
+  const headers: Record<string, string> = {
+    "content-type": "application/json",
+  };
+  if (cfg.apiKey !== undefined) {
+    headers.authorization = `Bearer ${cfg.apiKey}`;
+  }
+  for (const [key, value] of Object.entries(cfg.extraHeaders ?? {})) {
+    headers[key] = interpolateEnv(value, env);
+  }
+  return headers;
+}
+/** Normalize the chat-completions URL: trim trailing slashes and don't
+ *  double-append when the baseUrl already ends with the path. */
+export function chatCompletionsUrl(baseUrl: string): string {
+  const trimmed = baseUrl.replace(/\/+$/, "");
+  return trimmed.endsWith("/chat/completions")
+    ? trimmed
+    : `${trimmed}/chat/completions`;
+}

package/src/models-config.ts CHANGED Viewed

@@ -3,6 +3,7 @@ import { join } from "node:path";
 import { mkdir, readFile, writeFile, chmod } from "node:fs/promises";
 import { isRecord } from "./lib/guards";
 import { PROVIDER_DEFAULTS } from "./inference/inference.constants";
+import type { ReasoningStyle } from "./inference/inference.types";
 /**
  * The model registry — `~/.tsforge/models.json`, the central place a user
@@ -28,6 +29,18 @@ export interface IModelEntry {
   thinking?: boolean;
   /** Per-response token cap override. */
   maxTokens?: number;
+  /** Provider reasoning dialect: how thinking/reasoning is expressed on the wire.
+   *  `qwen` (default) | `deepseek` | `openai` | `none`. Set `deepseek` for the
+   *  DeepSeek API, `openai` for OpenAI o-series. */
+  reasoning?: ReasoningStyle;
+  /** Reasoning effort for `deepseek`/`openai` styles. */
+  reasoningEffort?: "low" | "medium" | "high";
+  /** Arbitrary fields merged into the request body (override built-ins) — the
+   *  escape hatch for any provider-specific param. */
+  extraBody?: Record<string, unknown>;
+  /** Arbitrary request headers (e.g. a non-Bearer auth scheme); `${VAR}` values
+   *  are interpolated from the environment. */
+  extraHeaders?: Record<string, string>;
 }
 export interface IModelsConfig {