npm - @proposit/proposit-core - Versions diffs - 1.5.1 → 1.7.0 - Mend

@proposit/proposit-core 1.5.1 → 1.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (60) hide show

package/dist/extensions/ollama/errors.js ADDED Viewed

@@ -0,0 +1,228 @@
+// Ollama-provider error classes + the `classifyOllamaError` mapping.
+//
+// The framework's `llmStage` retry policy classifies provider errors
+// by inspecting a `retryReason` tag on the thrown object (see
+// `src/lib/pipelines/stage-helpers.ts` #classifyError) — NOT by class
+// identity. The failure *codes* live in the SDK-free
+// `src/lib/pipelines/failure-codes.ts`. So these Ollama classes carry
+// the same `retryReason` tags as the OpenAI provider's classes and set
+// their `code` from the lib constants. They deliberately do NOT import
+// from `extensions/openai/` and do NOT touch `src/lib/`.
+//
+// Mapping for the framework's default retry policy
+// (`retryOn: ["schema_validation", "transient"]`):
+//
+//   * `TransientLlmError` — `retryReason: "transient"`. Genuinely
+//     transient local hiccups: mid-stream `ECONNRESET`/socket drop,
+//     undici timeout cause-codes (`UND_ERR_HEADERS_TIMEOUT` /
+//     `UND_ERR_BODY_TIMEOUT` / `UND_ERR_CONNECT_TIMEOUT` — a long local
+//     thinking-model generation that outran the dispatcher timeout),
+//     cold-model-load 5xx (model pulled but still loading into VRAM),
+//     generic 5xx. Retried by the default policy.
+//   * `RateLimitLlmError` — `retryReason: "rate_limit"`. A local
+//     daemon rarely rate-limits, but a remote-Ollama / proxy setup can
+//     return 429; mapped for contract parity. Not in the default
+//     `retryOn`, so it fails fast (callers can opt in).
+//   * `SchemaValidationLlmError` — tagged `transient`; the framework's
+//     schema-retry path handles it. Thrown only for genuinely
+//     malformed / non-parseable JSON in the model's reply — NOT for a
+//     context-overflow (which is deterministic; see below).
+//   * `NonRetryableLlmError` — no tag; framework surfaces it
+//     immediately as `LLM_NON_RETRYABLE_ERROR`. Used for: daemon
+//     unreachable (`ECONNREFUSED`), model-not-pulled (404), and
+//     context-length / eval errors (deterministic — a retry re-fails).
+//   * `ToolLoopExhaustedError` — surfaces from the function-tool loop
+//     when the round cap is hit. Non-retryable (no tag).
+import { LLM_NON_RETRYABLE_ERROR, LLM_RATE_LIMITED, LLM_TRANSIENT_ERROR, } from "../../lib/pipelines/failure-codes.js";
+export class TransientLlmError extends Error {
+    retryReason = "transient";
+    code = LLM_TRANSIENT_ERROR;
+    status;
+    constructor(args) {
+        super(args.message);
+        this.name = "TransientLlmError";
+        this.status = args.status;
+    }
+}
+export class RateLimitLlmError extends Error {
+    retryReason = "rate_limit";
+    code = LLM_RATE_LIMITED;
+    status;
+    constructor(args) {
+        super(args.message);
+        this.name = "RateLimitLlmError";
+        this.status = args.status;
+    }
+}
+/**
+ * Thrown when the model's structured-output reply is genuinely
+ * malformed / non-parseable JSON. Tagged `transient` so the framework's
+ * default retry policy retries — a single re-roll often produces
+ * conforming output.
+ *
+ * **Do not** route a context-length overflow here: an overflow is
+ * deterministic and re-fails on the retried (still-oversized) prompt.
+ * `classifyOllamaError` routes overflow to {@link NonRetryableLlmError}.
+ */
+export class SchemaValidationLlmError extends Error {
+    retryReason = "transient";
+    status;
+    constructor(args) {
+        super(args.message);
+        this.name = "SchemaValidationLlmError";
+        this.status = args.status;
+    }
+}
+export class NonRetryableLlmError extends Error {
+    code = LLM_NON_RETRYABLE_ERROR;
+    status;
+    constructor(args) {
+        super(args.message);
+        this.name = "NonRetryableLlmError";
+        this.status = args.status;
+    }
+}
+export class ToolLoopExhaustedError extends Error {
+    rounds;
+    constructor(args) {
+        super(args.message);
+        this.name = "ToolLoopExhaustedError";
+        this.rounds = args.rounds;
+    }
+}
+// -- error-shape probes ---------------------------------------------------
+//
+// The `ollama` SDK throws plain `Error`s (sometimes a `ResponseError`
+// carrying a `status_code`) and lets low-level `fetch` failures bubble
+// up. undici wraps a connection refusal as `TypeError: fetch failed`
+// with a `.cause` carrying the Node `code`. We probe both the error
+// itself and one level of `.cause` for the Node-style `code` and an
+// HTTP-ish `status` / `status_code`.
+function nodeCodeOf(err) {
+    const direct = readCode(err);
+    if (direct !== undefined)
+        return direct;
+    if (typeof err === "object" && err !== null) {
+        return readCode(err.cause);
+    }
+    return undefined;
+}
+function readCode(value) {
+    if (typeof value !== "object" || value === null)
+        return undefined;
+    const code = value.code;
+    return typeof code === "string" ? code : undefined;
+}
+function statusOf(err) {
+    if (typeof err !== "object" || err === null)
+        return undefined;
+    // `status_code` is the `ollama` SDK's ResponseError wire field — an
+    // external snake_case name, exempt from the camelCase rule.
+    /* eslint-disable @typescript-eslint/naming-convention */
+    const e = err;
+    if (typeof e.status === "number")
+        return e.status;
+    if (typeof e.status_code === "number")
+        return e.status_code;
+    /* eslint-enable @typescript-eslint/naming-convention */
+    return undefined;
+}
+function messageOf(err) {
+    if (err instanceof Error)
+        return err.message;
+    if (typeof err === "string")
+        return err;
+    return String(err);
+}
+const CONTEXT_OVERFLOW_PATTERN = /context (?:length|window)|maximum context|exceeds the (?:maximum )?context|too (?:many|long).*token|num_ctx/i;
+const MODEL_NOT_FOUND_PATTERN = /model .* not found|not found, try pulling|no such model|pull(?:ing)? it/i;
+/**
+ * Map an error surfaced by the `ollama` SDK (or an underlying `fetch`
+ * failure) to one of the framework-recognized provider error classes.
+ *
+ * The mapping is deliberately exhaustive on the cases the reviewer
+ * called out so they can't silently regress:
+ *
+ *   - `ECONNREFUSED`            → NonRetryable (daemon down; a retry
+ *                                 won't bring it up within backoff).
+ *   - model-not-pulled (404)    → NonRetryable (`ollama pull` hint).
+ *   - context-overflow/eval err → NonRetryable (deterministic; never
+ *                                 SchemaValidationLlmError).
+ *   - `ECONNRESET` / socket drop→ Transient.
+ *   - undici timeout cause-codes→ Transient (`UND_ERR_HEADERS_TIMEOUT` /
+ *     `UND_ERR_BODY_TIMEOUT` / `UND_ERR_CONNECT_TIMEOUT`; a long local
+ *     thinking-model generation outran the timeout — retryable).
+ *   - cold-load / generic 5xx   → Transient.
+ *   - 429                       → RateLimit (remote/proxy setups).
+ *   - anything else             → NonRetryable (safe fail-fast default).
+ */
+export function classifyOllamaError(err) {
+    const code = nodeCodeOf(err);
+    const status = statusOf(err);
+    const message = messageOf(err);
+    // Daemon unreachable — a connection refusal is the canonical
+    // "`ollama serve` isn't running on :11434" failure. A retry within
+    // the framework's short backoff won't bring it back up.
+    if (code === "ECONNREFUSED") {
+        return new NonRetryableLlmError({
+            message: `Cannot reach the Ollama daemon (${code}). Is \`ollama serve\` running on :11434? Original error: ${message}`,
+        });
+    }
+    // Mid-stream socket drop / transient connection loss — retryable.
+    // Includes undici's timeout cause-codes: a long local thinking-model
+    // generation that outruns the dispatcher's headers/body timeout (or a
+    // connect timeout) is transient against a still-working daemon, NOT a
+    // deterministic failure. The framework's default `retryOn: ["transient"]`
+    // then retries instead of dying `LLM_NON_RETRYABLE_ERROR`. undici wraps
+    // these as a `TypeError: fetch failed` whose `.cause.code` is the
+    // `UND_ERR_*` value — `nodeCodeOf` already probes one level of `.cause`.
+    if (code === "ECONNRESET" ||
+        code === "ETIMEDOUT" ||
+        code === "EPIPE" ||
+        code === "UND_ERR_HEADERS_TIMEOUT" ||
+        code === "UND_ERR_BODY_TIMEOUT" ||
+        code === "UND_ERR_CONNECT_TIMEOUT") {
+        return new TransientLlmError({
+            message: `Transient connection error talking to the Ollama daemon (${code}): ${message}`,
+        });
+    }
+    // Model not pulled — Ollama returns a 404 whose body says the model
+    // was not found. Deterministic until the user pulls it.
+    if (status === 404 || MODEL_NOT_FOUND_PATTERN.test(message)) {
+        return new NonRetryableLlmError({
+            message: `Ollama model not found. Run \`ollama pull <model>\` to download it first. Original error: ${message}`,
+            status,
+        });
+    }
+    // Context-length overflow / model-eval error — DETERMINISTIC. Must
+    // be NonRetryable: routing this to SchemaValidationLlmError (tagged
+    // `transient`) would burn a guaranteed-failing second attempt on
+    // the same oversized prompt.
+    if (CONTEXT_OVERFLOW_PATTERN.test(message)) {
+        return new NonRetryableLlmError({
+            message: `Ollama request exceeded the model's context window (deterministic — retrying the same prompt will re-fail). Shorten the input or raise \`num_ctx\`. Original error: ${message}`,
+            status,
+        });
+    }
+    if (status === 429) {
+        return new RateLimitLlmError({
+            message: `Ollama returned 429 (rate-limited; typical of a remote/proxied daemon): ${message}`,
+            status,
+        });
+    }
+    // Cold-model-load (model pulled but still loading into VRAM) and
+    // any other 5xx are transient — a retry after backoff can succeed
+    // once the model is resident.
+    if (status !== undefined && status >= 500) {
+        return new TransientLlmError({
+            message: `Ollama returned ${status.toString()} (possibly a cold model load into VRAM): ${message}`,
+            status,
+        });
+    }
+    // Safe default: fail fast rather than retry an unrecognized error.
+    return new NonRetryableLlmError({
+        message: `Unclassified Ollama error: ${message}`,
+        status,
+    });
+}
+//# sourceMappingURL=errors.js.map

package/dist/extensions/ollama/errors.js.map ADDED Viewed

@@ -0,0 +1 @@

+ {"version":3,"file":"errors.js","sourceRoot":"","sources":["../../../src/extensions/ollama/errors.ts"],"names":[],"mappings":"AAAA,qEAAqE;AACrE,EAAE;AACF,qEAAqE;AACrE,8DAA8D;AAC9D,sEAAsE;AACtE,qDAAqD;AACrD,sEAAsE;AACtE,uEAAuE;AACvE,uEAAuE;AACvE,yDAAyD;AACzD,EAAE;AACF,mDAAmD;AACnD,mDAAmD;AACnD,EAAE;AACF,kEAAkE;AAClE,oEAAoE;AACpE,8DAA8D;AAC9D,wEAAwE;AACxE,qEAAqE;AACrE,sEAAsE;AACtE,kDAAkD;AAClD,iEAAiE;AACjE,uEAAuE;AACvE,iEAAiE;AACjE,wDAAwD;AACxD,uEAAuE;AACvE,8DAA8D;AAC9D,sEAAsE;AACtE,4DAA4D;AAC5D,6DAA6D;AAC7D,iEAAiE;AACjE,gEAAgE;AAChE,uEAAuE;AACvE,sEAAsE;AACtE,yDAAyD;AAEzD,OAAO,EACH,uBAAuB,EACvB,gBAAgB,EAChB,mBAAmB,GACtB,MAAM,sCAAsC,CAAA;AAE7C,MAAM,OAAO,iBAAkB,SAAQ,KAAK;IACxB,WAAW,GAAG,WAAoB,CAAA;IAClC,IAAI,GAAG,mBAAmB,CAAA;IAC1B,MAAM,CAAS;IAE/B,YAAY,IAA0C;QAClD,KAAK,CAAC,IAAI,CAAC,OAAO,CAAC,CAAA;QACnB,IAAI,CAAC,IAAI,GAAG,mBAAmB,CAAA;QAC/B,IAAI,CAAC,MAAM,GAAG,IAAI,CAAC,MAAM,CAAA;IAC7B,CAAC;CACJ;AAED,MAAM,OAAO,iBAAkB,SAAQ,KAAK;IACxB,WAAW,GAAG,YAAqB,CAAA;IACnC,IAAI,GAAG,gBAAgB,CAAA;IACvB,MAAM,CAAS;IAE/B,YAAY,IAA0C;QAClD,KAAK,CAAC,IAAI,CAAC,OAAO,CAAC,CAAA;QACnB,IAAI,CAAC,IAAI,GAAG,mBAAmB,CAAA;QAC/B,IAAI,CAAC,MAAM,GAAG,IAAI,CAAC,MAAM,CAAA;IAC7B,CAAC;CACJ;AAED;;;;;;;;;GASG;AACH,MAAM,OAAO,wBAAyB,SAAQ,KAAK;IAC/B,WAAW,GAAG,WAAoB,CAAA;IAClC,MAAM,CAAS;IAE/B,YAAY,IAA0C;QAClD,KAAK,CAAC,IAAI,CAAC,OAAO,CAAC,CAAA;QACnB,IAAI,CAAC,IAAI,GAAG,0BAA0B,CAAA;QACtC,IAAI,CAAC,MAAM,GAAG,IAAI,CAAC,MAAM,CAAA;IAC7B,CAAC;CACJ;AAED,MAAM,OAAO,oBAAqB,SAAQ,KAAK;IAC3B,IAAI,GAAG,uBAAuB,CAAA;IAC9B,MAAM,CAAS;IAE/B,YAAY,IAA0C;QAClD,KAAK,CAAC,IAAI,CAAC,OAAO,CAAC,CAAA;QACnB,IAAI,CAAC,IAAI,GAAG,sBAAsB,CAAA;QAClC,IAAI,CAAC,MAAM,GAAG,IAAI,CAAC,MAAM,CAAA;IAC7B,CAAC;CACJ;AAED,MAAM,OAAO,sBAAuB,SAAQ,KAAK;IAC7B,MAAM,CAAQ;IAE9B,YAAY,IAAyC;QACjD,KAAK,CAAC,IAAI,CAAC,OAAO,CAAC,CAAA;QACnB,IAAI,CAAC,IAAI,GAAG,wBAAwB,CAAA;QACpC,IAAI,CAAC,MAAM,GAAG,IAAI,CAAC,MAAM,CAAA;IAC7B,CAAC;CACJ;AAED,4EAA4E;AAC5E,EAAE;AACF,sEAAsE;AACtE,uEAAuE;AACvE,qEAAqE;AACrE,oEAAoE;AACpE,oEAAoE;AACpE,qCAAqC;AAErC,SAAS,UAAU,CAAC,GAAY;IAC5B,MAAM,MAAM,GAAG,QAAQ,CAAC,GAAG,CAAC,CAAA;IAC5B,IAAI,MAAM,KAAK,SAAS;QAAE,OAAO,MAAM,CAAA;IACvC,IAAI,OAAO,GAAG,KAAK,QAAQ,IAAI,GAAG,KAAK,IAAI,EAAE,CAAC;QAC1C,OAAO,QAAQ,CAAE,GAA2B,CAAC,KAAK,CAAC,CAAA;IACvD,CAAC;IACD,OAAO,SAAS,CAAA;AACpB,CAAC;AAED,SAAS,QAAQ,CAAC,KAAc;IAC5B,IAAI,OAAO,KAAK,KAAK,QAAQ,IAAI,KAAK,KAAK,IAAI;QAAE,OAAO,SAAS,CAAA;IACjE,MAAM,IAAI,GAAI,KAA4B,CAAC,IAAI,CAAA;IAC/C,OAAO,OAAO,IAAI,KAAK,QAAQ,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,SAAS,CAAA;AACtD,CAAC;AAED,SAAS,QAAQ,CAAC,GAAY;IAC1B,IAAI,OAAO,GAAG,KAAK,QAAQ,IAAI,GAAG,KAAK,IAAI;QAAE,OAAO,SAAS,CAAA;IAC7D,oEAAoE;IACpE,4DAA4D;IAC5D,yDAAyD;IACzD,MAAM,CAAC,GAAG,GAAkD,CAAA;IAC5D,IAAI,OAAO,CAAC,CAAC,MAAM,KAAK,QAAQ;QAAE,OAAO,CAAC,CAAC,MAAM,CAAA;IACjD,IAAI,OAAO,CAAC,CAAC,WAAW,KAAK,QAAQ;QAAE,OAAO,CAAC,CAAC,WAAW,CAAA;IAC3D,wDAAwD;IACxD,OAAO,SAAS,CAAA;AACpB,CAAC;AAED,SAAS,SAAS,CAAC,GAAY;IAC3B,IAAI,GAAG,YAAY,KAAK;QAAE,OAAO,GAAG,CAAC,OAAO,CAAA;IAC5C,IAAI,OAAO,GAAG,KAAK,QAAQ;QAAE,OAAO,GAAG,CAAA;IACvC,OAAO,MAAM,CAAC,GAAG,CAAC,CAAA;AACtB,CAAC;AAED,MAAM,wBAAwB,GAC1B,8GAA8G,CAAA;AAElH,MAAM,uBAAuB,GACzB,0EAA0E,CAAA;AAE9E;;;;;;;;;;;;;;;;;;;GAmBG;AACH,MAAM,UAAU,mBAAmB,CAAC,GAAY;IAC5C,MAAM,IAAI,GAAG,UAAU,CAAC,GAAG,CAAC,CAAA;IAC5B,MAAM,MAAM,GAAG,QAAQ,CAAC,GAAG,CAAC,CAAA;IAC5B,MAAM,OAAO,GAAG,SAAS,CAAC,GAAG,CAAC,CAAA;IAE9B,6DAA6D;IAC7D,mEAAmE;IACnE,wDAAwD;IACxD,IAAI,IAAI,KAAK,cAAc,EAAE,CAAC;QAC1B,OAAO,IAAI,oBAAoB,CAAC;YAC5B,OAAO,EAAE,mCAAmC,IAAI,6DAA6D,OAAO,EAAE;SACzH,CAAC,CAAA;IACN,CAAC;IAED,kEAAkE;IAClE,qEAAqE;IACrE,sEAAsE;IACtE,sEAAsE;IACtE,0EAA0E;IAC1E,wEAAwE;IACxE,kEAAkE;IAClE,yEAAyE;IACzE,IACI,IAAI,KAAK,YAAY;QACrB,IAAI,KAAK,WAAW;QACpB,IAAI,KAAK,OAAO;QAChB,IAAI,KAAK,yBAAyB;QAClC,IAAI,KAAK,sBAAsB;QAC/B,IAAI,KAAK,yBAAyB,EACpC,CAAC;QACC,OAAO,IAAI,iBAAiB,CAAC;YACzB,OAAO,EAAE,4DAA4D,IAAI,MAAM,OAAO,EAAE;SAC3F,CAAC,CAAA;IACN,CAAC;IAED,oEAAoE;IACpE,wDAAwD;IACxD,IAAI,MAAM,KAAK,GAAG,IAAI,uBAAuB,CAAC,IAAI,CAAC,OAAO,CAAC,EAAE,CAAC;QAC1D,OAAO,IAAI,oBAAoB,CAAC;YAC5B,OAAO,EAAE,6FAA6F,OAAO,EAAE;YAC/G,MAAM;SACT,CAAC,CAAA;IACN,CAAC;IAED,mEAAmE;IACnE,oEAAoE;IACpE,iEAAiE;IACjE,6BAA6B;IAC7B,IAAI,wBAAwB,CAAC,IAAI,CAAC,OAAO,CAAC,EAAE,CAAC;QACzC,OAAO,IAAI,oBAAoB,CAAC;YAC5B,OAAO,EAAE,uKAAuK,OAAO,EAAE;YACzL,MAAM;SACT,CAAC,CAAA;IACN,CAAC;IAED,IAAI,MAAM,KAAK,GAAG,EAAE,CAAC;QACjB,OAAO,IAAI,iBAAiB,CAAC;YACzB,OAAO,EAAE,2EAA2E,OAAO,EAAE;YAC7F,MAAM;SACT,CAAC,CAAA;IACN,CAAC;IAED,iEAAiE;IACjE,kEAAkE;IAClE,8BAA8B;IAC9B,IAAI,MAAM,KAAK,SAAS,IAAI,MAAM,IAAI,GAAG,EAAE,CAAC;QACxC,OAAO,IAAI,iBAAiB,CAAC;YACzB,OAAO,EAAE,mBAAmB,MAAM,CAAC,QAAQ,EAAE,4CAA4C,OAAO,EAAE;YAClG,MAAM;SACT,CAAC,CAAA;IACN,CAAC;IAED,mEAAmE;IACnE,OAAO,IAAI,oBAAoB,CAAC;QAC5B,OAAO,EAAE,8BAA8B,OAAO,EAAE;QAChD,MAAM;KACT,CAAC,CAAA;AACN,CAAC"}

package/dist/extensions/ollama/index.d.ts ADDED Viewed

@@ -0,0 +1,6 @@
+export { OllamaProvider } from "./provider.js";
+export type { TOllamaProviderConfig, TOllamaClient, TOllamaChatRequest, TOllamaChatResponse, } from "./types.js";
+export { typeboxToJsonSchema } from "./structured-output.js";
+export type { TOllamaJsonSchema } from "./structured-output.js";
+export { NonRetryableLlmError, RateLimitLlmError, SchemaValidationLlmError, ToolLoopExhaustedError, TransientLlmError, classifyOllamaError, } from "./errors.js";
+//# sourceMappingURL=index.d.ts.map

package/dist/extensions/ollama/index.d.ts.map ADDED Viewed

@@ -0,0 +1 @@

+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/extensions/ollama/index.ts"],"names":[],"mappings":"AAcA,OAAO,EAAE,cAAc,EAAE,MAAM,eAAe,CAAA;AAC9C,YAAY,EACR,qBAAqB,EACrB,aAAa,EACb,kBAAkB,EAClB,mBAAmB,GACtB,MAAM,YAAY,CAAA;AACnB,OAAO,EAAE,mBAAmB,EAAE,MAAM,wBAAwB,CAAA;AAC5D,YAAY,EAAE,iBAAiB,EAAE,MAAM,wBAAwB,CAAA;AAC/D,OAAO,EACH,oBAAoB,EACpB,iBAAiB,EACjB,wBAAwB,EACxB,sBAAsB,EACtB,iBAAiB,EACjB,mBAAmB,GACtB,MAAM,aAAa,CAAA"}

package/dist/extensions/ollama/index.js ADDED Viewed

@@ -0,0 +1,17 @@
+// Barrel for the Ollama provider extension.
+//
+// Public surface consumed via the `@proposit/proposit-core/extensions/ollama`
+// subpath export: the provider constructor + its config type + the
+// standard-JSON-schema converter + the error classes (which callers may
+// `instanceof`-match for finer-grained observability).
+//
+// NOTE: the error class names (`NonRetryableLlmError`, …) intentionally
+// mirror the OpenAI provider's names but are *distinct* classes living
+// in this extension. They are surfaced only from this subpath (NOT the
+// package root) to avoid colliding with the root-exported OpenAI error
+// classes. The framework classifies by the `retryReason` tag, not class
+// identity, so the duplication is intentional and harmless.
+export { OllamaProvider } from "./provider.js";
+export { typeboxToJsonSchema } from "./structured-output.js";
+export { NonRetryableLlmError, RateLimitLlmError, SchemaValidationLlmError, ToolLoopExhaustedError, TransientLlmError, classifyOllamaError, } from "./errors.js";
+//# sourceMappingURL=index.js.map

package/dist/extensions/ollama/index.js.map ADDED Viewed

@@ -0,0 +1 @@

+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../../../src/extensions/ollama/index.ts"],"names":[],"mappings":"AAAA,4CAA4C;AAC5C,EAAE;AACF,8EAA8E;AAC9E,mEAAmE;AACnE,wEAAwE;AACxE,uDAAuD;AACvD,EAAE;AACF,wEAAwE;AACxE,uEAAuE;AACvE,uEAAuE;AACvE,uEAAuE;AACvE,wEAAwE;AACxE,4DAA4D;AAE5D,OAAO,EAAE,cAAc,EAAE,MAAM,eAAe,CAAA;AAO9C,OAAO,EAAE,mBAAmB,EAAE,MAAM,wBAAwB,CAAA;AAE5D,OAAO,EACH,oBAAoB,EACpB,iBAAiB,EACjB,wBAAwB,EACxB,sBAAsB,EACtB,iBAAiB,EACjB,mBAAmB,GACtB,MAAM,aAAa,CAAA"}

package/dist/extensions/ollama/provider.d.ts ADDED Viewed

@@ -0,0 +1,21 @@
+import type { TLlmProvider, TLlmRequest, TLlmResponse } from "../../lib/llm/types.js";
+import type { TOllamaProviderConfig } from "./types.js";
+export declare class OllamaProvider implements TLlmProvider {
+    private readonly config;
+    private clientPromise;
+    private readonly maxToolRounds;
+    private readonly numCtx;
+    private readonly requestTimeoutMs;
+    private readonly stream;
+    constructor(config?: TOllamaProviderConfig);
+    respond<T>(req: TLlmRequest<T>): Promise<TLlmResponse<T>>;
+    private runChatLoop;
+    /**
+     * Resolve the SDK client: the injected one, or a freshly imported
+     * `Ollama` instance. Memoized so the dynamic import + construction
+     * runs at most once.
+     */
+    private getClient;
+    private importAndConstructClient;
+}
+//# sourceMappingURL=provider.d.ts.map

package/dist/extensions/ollama/provider.d.ts.map ADDED Viewed

@@ -0,0 +1 @@

+ {"version":3,"file":"provider.d.ts","sourceRoot":"","sources":["../../../src/extensions/ollama/provider.ts"],"names":[],"mappings":"AAqCA,OAAO,KAAK,EACR,YAAY,EACZ,WAAW,EACX,YAAY,EAGf,MAAM,wBAAwB,CAAA;AAc/B,OAAO,KAAK,EAOR,qBAAqB,EAExB,MAAM,YAAY,CAAA;AAkBnB,qBAAa,cAAe,YAAW,YAAY;IAC/C,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAuB;IAC9C,OAAO,CAAC,aAAa,CAAsC;IAC3D,OAAO,CAAC,QAAQ,CAAC,aAAa,CAAQ;IACtC,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAQ;IAC/B,OAAO,CAAC,QAAQ,CAAC,gBAAgB,CAAQ;IACzC,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAS;gBAEpB,MAAM,CAAC,EAAE,qBAAqB;IAUpC,OAAO,CAAC,CAAC,EAAE,GAAG,EAAE,WAAW,CAAC,CAAC,CAAC,GAAG,OAAO,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;YAmCjD,WAAW;IAiJzB;;;;OAIG;IACH,OAAO,CAAC,SAAS;YAQH,wBAAwB;CAiCzC"}

package/dist/extensions/ollama/provider.js ADDED Viewed

@@ -0,0 +1,375 @@
+// Concrete `TLlmProvider` backed by a local Ollama daemon via the
+// official `ollama` npm SDK.
+//
+// Dev/test only — production stays on OpenAI. The provider exists so a
+// developer can run the entire LLM-backed stack (notably the v2
+// argument-ingestion pipeline) against a self-hosted model
+// (`qwen3.6:latest`) with zero API cost.
+//
+// Deliberate divergences from the in-repo OpenAI provider:
+//   * Uses the official `ollama` SDK (an optional peer) rather than raw
+//     `fetch`. A missing package surfaces as an actionable error at
+//     construction time (dynamic-import-or-throw).
+//   * Structured output goes through the Ollama provider's own
+//     standard-JSON-schema converter (`./structured-output.ts`), not
+//     the OpenAI strict-mode converter.
+//   * `reasoningEffort` is ignored (no Ollama analogue);
+//     `maxOutputTokens` maps to `options.num_predict` (positive values
+//     only — never 0; -1/-2 are Ollama sentinels we never emit).
+//   * Thinking is left ON (the SDK/model default) — a prior finding
+//     showed `think: false` degrades structured-output fidelity (the
+//     model drops the required object wrapper → bare array, failing
+//     `Value.Check`). This trades latency (thinking-on stages can run
+//     several minutes) for correctness; the generous `requestTimeoutMs`
+//     default (see below) accommodates the latency.
+//   * A generous per-request timeout (`requestTimeoutMs`, default 20 min)
+//     is applied via a PER-PROVIDER undici `Agent` passed as the SDK
+//     client's `fetch` — never `setGlobalDispatcher`; a library must not
+//     mutate global state. See `./timeout-fetch.ts`.
+//   * Errors are classified by `./errors.ts` #classifyOllamaError, which
+//     carries the same `retryReason` tags + lib failure-codes as the
+//     OpenAI provider. No `ollama → openai` dependency, no lib change.
+//
+// `AbortSignal` is honored by registering an abort listener that calls
+// the SDK client's `abort()`; the SDK then rejects the in-flight
+// `chat()` with an `AbortError`, which the provider re-throws verbatim
+// so `llmStage`'s mid-flight-abort detector marks the stage `skipped`.
+import { debugLlmFailure, debugLlmRequest, debugLlmResponse, } from "../../lib/pipelines/debug-log.js";
+import { typeboxToJsonSchema } from "./structured-output.js";
+import { buildTimeoutFetch } from "./timeout-fetch.js";
+import { NonRetryableLlmError, SchemaValidationLlmError, ToolLoopExhaustedError, classifyOllamaError, } from "./errors.js";
+const STAGE_ID_MARKER = /<!--\s*stage-id:\s*([^\s>]+)\s*-->/;
+const DEFAULT_BASE_URL = "http://localhost:11434";
+const DEFAULT_MAX_TOOL_ROUNDS = 6;
+// Generous default context window. Ollama silently truncates prompts
+// longer than `num_ctx` (no error — the model emits schema-valid JSON
+// from a truncated prompt), and its per-model default is often ~4096,
+// well under a real multi-KB ingestion prompt. See `TOllamaProviderConfig.numCtx`.
+const DEFAULT_NUM_CTX = 32768;
+// Generous per-request timeout for local thinking models. undici's 300s
+// default aborts long structured-extraction generations with
+// UND_ERR_HEADERS_TIMEOUT; 20 min gives qwen3.6-with-thinking room. The
+// timeout is applied via a PER-PROVIDER undici Agent (never global state)
+// — see ./timeout-fetch.ts and TOllamaProviderConfig.requestTimeoutMs.
+const DEFAULT_REQUEST_TIMEOUT_MS = 1_200_000;
+export class OllamaProvider {
+    config;
+    clientPromise = null;
+    maxToolRounds;
+    numCtx;
+    requestTimeoutMs;
+    stream;
+    constructor(config) {
+        this.config = config ?? {};
+        this.maxToolRounds =
+            this.config.maxToolCallRounds ?? DEFAULT_MAX_TOOL_ROUNDS;
+        this.numCtx = this.config.numCtx ?? DEFAULT_NUM_CTX;
+        this.requestTimeoutMs =
+            this.config.requestTimeoutMs ?? DEFAULT_REQUEST_TIMEOUT_MS;
+        this.stream = this.config.stream ?? true;
+    }
+    async respond(req) {
+        // Already-aborted short-circuit — don't even construct the
+        // client or call the daemon.
+        if (req.signal?.aborted) {
+            throw abortError();
+        }
+        const client = await this.getClient();
+        const convertedSchema = typeboxToJsonSchema(req.outputSchema);
+        const tools = req.tools ? translateTools(req.tools) : undefined;
+        const stageIdMatch = STAGE_ID_MARKER.exec(req.systemPrompt);
+        const debugStageId = stageIdMatch ? stageIdMatch[1] : null;
+        // Wire the AbortSignal to the SDK client's abort(). The SDK
+        // rejects the in-flight chat() with an AbortError when this
+        // fires.
+        const onAbort = () => {
+            client.abort();
+        };
+        req.signal?.addEventListener("abort", onAbort, { once: true });
+        try {
+            return await this.runChatLoop({
+                client,
+                req,
+                convertedSchema,
+                tools,
+                debugStageId,
+            });
+        }
+        finally {
+            req.signal?.removeEventListener("abort", onAbort);
+        }
+    }
+    async runChatLoop(args) {
+        const { client, req, convertedSchema, tools, debugStageId } = args;
+        // Running message array. Tool-call rounds append the model's
+        // tool_calls echo + the tool result before re-calling.
+        const messages = [
+            { role: "system", content: req.systemPrompt },
+            { role: "user", content: req.userMessage },
+        ];
+        let lastUsage = { input: 0, output: 0 };
+        for (let round = 0; round < this.maxToolRounds; round += 1) {
+            const chatRequest = {
+                model: req.model,
+                messages,
+                // Build `format` from the single converted object so the
+                // schema can't drift from any prompt-grounding copy.
+                format: convertedSchema,
+                stream: this.stream,
+            };
+            if (tools) {
+                chatRequest.tools = tools;
+            }
+            // `temperature: 0` for deterministic structured output;
+            // `num_ctx` set generously so Ollama doesn't silently
+            // truncate a real multi-KB ingestion prompt (its per-model
+            // default is often ~4096). `maxOutputTokens` → num_predict,
+            // positive only: 0 means "generate nothing"; -1/-2 are Ollama
+            // sentinels we never emit.
+            const options = {
+                temperature: 0,
+                num_ctx: this.numCtx,
+            };
+            if (req.maxOutputTokens !== undefined && req.maxOutputTokens > 0) {
+                options.num_predict = req.maxOutputTokens;
+            }
+            chatRequest.options = options;
+            debugLlmRequest({
+                stageId: debugStageId,
+                model: req.model,
+                maxOutputTokens: req.maxOutputTokens,
+                reasoningEffort: req.reasoningEffort,
+                systemPromptLen: req.systemPrompt.length,
+                userMessageLen: req.userMessage.length,
+                systemPromptHead: req.systemPrompt,
+                userMessageHead: req.userMessage,
+            });
+            let response;
+            try {
+                const raw = await client.chat(chatRequest);
+                response = isAsyncIterable(raw) ? await collectStream(raw) : raw;
+            }
+            catch (err) {
+                // Mid-flight abort: the SDK rejects with an AbortError
+                // when our signal listener called client.abort().
+                // Re-throw verbatim so llmStage marks the stage skipped.
+                if (isAbortError(err) || req.signal?.aborted) {
+                    throw abortError();
+                }
+                const classified = classifyOllamaError(err);
+                debugLlmFailure({
+                    stageId: debugStageId,
+                    model: req.model,
+                    errorName: classified.name,
+                    errorMessage: classified.message,
+                    tokenUsage: lastUsage,
+                });
+                throw classified;
+            }
+            lastUsage = mergeUsage(lastUsage, {
+                input: response.prompt_eval_count ?? 0,
+                output: response.eval_count ?? 0,
+            });
+            const toolCalls = response.message.tool_calls ?? [];
+            if (toolCalls.length > 0) {
+                // Echo the assistant tool-call message, then append one
+                // tool-result message per call before looping.
+                messages.push({
+                    role: "assistant",
+                    content: response.message.content,
+                    tool_calls: toolCalls,
+                });
+                for (const call of toolCalls) {
+                    const handler = findFunctionHandler(req.tools, call.function.name);
+                    if (!handler) {
+                        throw new NonRetryableLlmError({
+                            message: `Ollama requested unknown function tool "${call.function.name}".`,
+                        });
+                    }
+                    const handlerResult = await handler.handler(call.function.arguments);
+                    messages.push({
+                        role: "tool",
+                        content: typeof handlerResult === "string"
+                            ? handlerResult
+                            : JSON.stringify(handlerResult),
+                    });
+                }
+                continue;
+            }
+            const text = response.message.content;
+            if (text === undefined || text === "") {
+                throw new SchemaValidationLlmError({
+                    message: "Ollama chat response carried no assistant text content.",
+                });
+            }
+            const parsed = safeParseJson(text);
+            debugLlmResponse({
+                stageId: debugStageId,
+                outputTextLen: text.length,
+                tokenUsage: lastUsage,
+            });
+            return {
+                output: parsed,
+                tokenUsage: lastUsage,
+                // The Ollama chat response is not request-id-bearing;
+                // `rawResponseId` is optional, so leaving it undefined is
+                // contract-legal. Do not fabricate one.
+                rawResponseId: undefined,
+            };
+        }
+        throw new ToolLoopExhaustedError({
+            message: `Function-tool agent loop exceeded ${this.maxToolRounds.toString()} rounds without a final response.`,
+            rounds: this.maxToolRounds,
+        });
+    }
+    /**
+     * Resolve the SDK client: the injected one, or a freshly imported
+     * `Ollama` instance. Memoized so the dynamic import + construction
+     * runs at most once.
+     */
+    getClient() {
+        if (this.config.client) {
+            return Promise.resolve(this.config.client);
+        }
+        this.clientPromise ??= this.importAndConstructClient();
+        return this.clientPromise;
+    }
+    async importAndConstructClient() {
+        const baseUrl = this.config.baseUrl ?? DEFAULT_BASE_URL;
+        const importOllama = this.config.importOllama ??
+            (() => import("ollama"));
+        let mod;
+        try {
+            mod = await importOllama();
+        }
+        catch (err) {
+            throw new Error("OllamaProvider: the optional `ollama` package is not installed. " +
+                "Run `pnpm add ollama` (it is declared as an optional peerDependency) " +
+                "or pass a pre-built `client` via the provider config. " +
+                `Original import error: ${err instanceof Error ? err.message : String(err)}`);
+        }
+        // Per-provider raised-timeout fetch (no global mutation). Falls
+        // back to the SDK default fetch when undici is unavailable or the
+        // caller set requestTimeoutMs to 0.
+        const timeoutFetch = await buildTimeoutFetch(this.requestTimeoutMs, this.config.importUndici);
+        const sdkConfig = {
+            host: baseUrl,
+        };
+        if (timeoutFetch) {
+            sdkConfig.fetch = timeoutFetch;
+        }
+        return new mod.Ollama(sdkConfig);
+    }
+}
+// -- helpers --------------------------------------------------------------
+function abortError() {
+    const e = new Error("The Ollama request was aborted.");
+    e.name = "AbortError";
+    return e;
+}
+function isAbortError(err) {
+    return (typeof err === "object" &&
+        err !== null &&
+        err.name === "AbortError");
+}
+function safeParseJson(raw) {
+    try {
+        return JSON.parse(raw);
+    }
+    catch (err) {
+        throw new SchemaValidationLlmError({
+            message: `Ollama returned malformed JSON in structured-output content: ${err instanceof Error ? err.message : String(err)}`,
+        });
+    }
+}
+function isAsyncIterable(value) {
+    return (typeof value === "object" &&
+        value !== null &&
+        Symbol.asyncIterator in value);
+}
+/**
+ * Consume a streamed `chat()` generation and synthesize a single
+ * `TOllamaChatResponse`: concatenated `message.content`, tool_calls
+ * captured from any chunk that carries them, and the eval counts from
+ * the final (`done: true`) chunk. The synthesized response feeds the
+ * existing one-shot processing path unchanged, so `respond()`'s
+ * contract is preserved.
+ */
+async function collectStream(iterable) {
+    let content = "";
+    let role = "assistant";
+    let toolCalls;
+    let promptEvalCount = 0;
+    let evalCount = 0;
+    for await (const chunk of iterable) {
+        const msg = chunk.message;
+        if (msg) {
+            content += msg.content ?? "";
+            if (msg.role)
+                role = msg.role;
+            // Ollama emits tool_calls complete within a single chunk
+            // (not OpenAI-style per-index deltas), so take the latest
+            // chunk that carries them — concatenating would DUPLICATE
+            // calls. Ingestion is tool-free; only tool-using callers
+            // exercise this path.
+            if (msg.tool_calls && msg.tool_calls.length > 0) {
+                toolCalls = msg.tool_calls;
+            }
+        }
+        // Last-wins: the synthesized response carries the FINAL chunk's
+        // single-round eval counts, NOT a cumulative sum across chunks.
+        // The terminal chunk reports this round's complete terminal
+        // counts, so taking the last value is the correct per-round
+        // figure. `runChatLoop`'s `mergeUsage` then SUMS these per-round
+        // terminal counts across tool-call rounds — summing the chunk
+        // values here instead would double-count within a round.
+        if (chunk.prompt_eval_count !== undefined) {
+            promptEvalCount = chunk.prompt_eval_count;
+        }
+        if (chunk.eval_count !== undefined) {
+            evalCount = chunk.eval_count;
+        }
+    }
+    return {
+        message: { role, content, tool_calls: toolCalls },
+        done: true,
+        prompt_eval_count: promptEvalCount,
+        eval_count: evalCount,
+    };
+}
+function mergeUsage(accumulated, next) {
+    return {
+        input: accumulated.input + next.input,
+        output: accumulated.output + next.output,
+    };
+}
+function translateTools(tools) {
+    return tools.map((tool) => {
+        if (tool.kind === "function") {
+            return {
+                type: "function",
+                function: {
+                    name: tool.name,
+                    description: tool.description,
+                    parameters: typeboxToJsonSchema(tool.parameters),
+                },
+            };
+        }
+        // Hosted-tool kinds (web_search / file_search / mcp) have no
+        // local Ollama equivalent. Fail fast and legibly.
+        throw new NonRetryableLlmError({
+            message: `Tool kind "${tool.kind}" is not supported by the Ollama provider. Only kind "function" (local handler) is supported.`,
+        });
+    });
+}
+function findFunctionHandler(tools, name) {
+    if (!tools)
+        return undefined;
+    for (const tool of tools) {
+        if (tool.kind === "function" && tool.name === name) {
+            return tool;
+        }
+    }
+    return undefined;
+}
+//# sourceMappingURL=provider.js.map