npm - modelfusion - Versions diffs - 0.109.0 → 0.111.0 - Mend

modelfusion 0.109.0 → 0.111.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (230) hide show

package/model-provider/huggingface/HuggingFaceFacade.d.ts CHANGED Viewed

@@ -1,5 +1,14 @@
+import { PartialBaseUrlPartsApiConfigurationOptions } from "../../core/api/BaseUrlApiConfiguration.js";
+import { HuggingFaceApiConfiguration } from "./HuggingFaceApiConfiguration.js";
 import { HuggingFaceTextEmbeddingModel, HuggingFaceTextEmbeddingModelSettings } from "./HuggingFaceTextEmbeddingModel.js";
 import { HuggingFaceTextGenerationModel, HuggingFaceTextGenerationModelSettings } from "./HuggingFaceTextGenerationModel.js";
+/**
+ * Creates an API configuration for the HuggingFace API.
+ * It calls the API at https://api-inference.huggingface.co/models and uses the `HUGGINGFACE_API_KEY` env variable by default.
+ */
+export declare function Api(settings: PartialBaseUrlPartsApiConfigurationOptions & {
+    apiKey?: string;
+}): HuggingFaceApiConfiguration;
 /**
  * Create a text generation model that calls a Hugging Face Inference API Text Generation Task.
  *

package/model-provider/huggingface/HuggingFaceFacade.js CHANGED Viewed

@@ -1,5 +1,13 @@
+import { HuggingFaceApiConfiguration } from "./HuggingFaceApiConfiguration.js";
 import { HuggingFaceTextEmbeddingModel, } from "./HuggingFaceTextEmbeddingModel.js";
 import { HuggingFaceTextGenerationModel, } from "./HuggingFaceTextGenerationModel.js";
+/**
+ * Creates an API configuration for the HuggingFace API.
+ * It calls the API at https://api-inference.huggingface.co/models and uses the `HUGGINGFACE_API_KEY` env variable by default.
+ */
+export function Api(settings) {
+    return new HuggingFaceApiConfiguration(settings);
+}
 /**
  * Create a text generation model that calls a Hugging Face Inference API Text Generation Task.
  *

package/model-provider/huggingface/HuggingFaceTextEmbeddingModel.cjs CHANGED Viewed

@@ -4,6 +4,7 @@ exports.HuggingFaceTextEmbeddingModel = void 0;
 const zod_1 = require("zod");
 const callWithRetryAndThrottle_js_1 = require("../../core/api/callWithRetryAndThrottle.cjs");
 const postToApi_js_1 = require("../../core/api/postToApi.cjs");
+const ZodSchema_js_1 = require("../../core/schema/ZodSchema.cjs");
 const AbstractModel_js_1 = require("../../model-function/AbstractModel.cjs");
 const HuggingFaceApiConfiguration_js_1 = require("./HuggingFaceApiConfiguration.cjs");
 const HuggingFaceError_js_1 = require("./HuggingFaceError.cjs");
@@ -83,17 +84,24 @@ class HuggingFaceTextEmbeddingModel extends AbstractModel_js_1.AbstractModel {
         if (texts.length > this.maxValuesPerCall) {
             throw new Error(`The HuggingFace feature extraction API is configured to only support ${this.maxValuesPerCall} texts per API call.`);
         }
+        const api = this.settings.api ?? new HuggingFaceApiConfiguration_js_1.HuggingFaceApiConfiguration();
+        const abortSignal = options?.run?.abortSignal;
         return (0, callWithRetryAndThrottle_js_1.callWithRetryAndThrottle)({
-            retry: this.settings.api?.retry,
-            throttle: this.settings.api?.throttle,
-            call: async () => callHuggingFaceTextGenerationAPI({
-                options: {
-                    useCache: true,
-                    waitForModel: true,
+            retry: api.retry,
+            throttle: api.throttle,
+            call: async () => (0, postToApi_js_1.postJsonToApi)({
+                url: api.assembleUrl(`/${this.settings.model}`),
+                headers: api.headers,
+                body: {
+                    inputs: texts,
+                    options: {
+                        use_cache: this.settings.options?.useCache ?? true,
+                        wait_for_model: this.settings.options?.waitForModel ?? true,
+                    },
                 },
-                ...this.settings,
-                abortSignal: options?.run?.abortSignal,
-                inputs: texts,
+                failedResponseHandler: HuggingFaceError_js_1.failedHuggingFaceCallResponseHandler,
+                successfulResponseHandler: (0, postToApi_js_1.createJsonResponseHandler)((0, ZodSchema_js_1.zodSchema)(huggingFaceTextEmbeddingResponseSchema)),
+                abortSignal,
             }),
         });
     }
@@ -116,21 +124,3 @@ class HuggingFaceTextEmbeddingModel extends AbstractModel_js_1.AbstractModel {
 }
 exports.HuggingFaceTextEmbeddingModel = HuggingFaceTextEmbeddingModel;
 const huggingFaceTextEmbeddingResponseSchema = zod_1.z.array(zod_1.z.array(zod_1.z.number()));
-async function callHuggingFaceTextGenerationAPI({ api = new HuggingFaceApiConfiguration_js_1.HuggingFaceApiConfiguration(), abortSignal, model, inputs, options, }) {
-    return (0, postToApi_js_1.postJsonToApi)({
-        url: api.assembleUrl(`/${model}`),
-        headers: api.headers,
-        body: {
-            inputs,
-            options: options
-                ? {
-                    use_cache: options?.useCache,
-                    wait_for_model: options?.waitForModel,
-                }
-                : {},
-        },
-        failedResponseHandler: HuggingFaceError_js_1.failedHuggingFaceCallResponseHandler,
-        successfulResponseHandler: (0, postToApi_js_1.createJsonResponseHandler)(huggingFaceTextEmbeddingResponseSchema),
-        abortSignal,
-    });
-}

package/model-provider/huggingface/HuggingFaceTextEmbeddingModel.js CHANGED Viewed

@@ -1,6 +1,7 @@
 import { z } from "zod";
 import { callWithRetryAndThrottle } from "../../core/api/callWithRetryAndThrottle.js";
 import { createJsonResponseHandler, postJsonToApi, } from "../../core/api/postToApi.js";
+import { zodSchema } from "../../core/schema/ZodSchema.js";
 import { AbstractModel } from "../../model-function/AbstractModel.js";
 import { HuggingFaceApiConfiguration } from "./HuggingFaceApiConfiguration.js";
 import { failedHuggingFaceCallResponseHandler } from "./HuggingFaceError.js";
@@ -80,17 +81,24 @@ export class HuggingFaceTextEmbeddingModel extends AbstractModel {
         if (texts.length > this.maxValuesPerCall) {
             throw new Error(`The HuggingFace feature extraction API is configured to only support ${this.maxValuesPerCall} texts per API call.`);
         }
+        const api = this.settings.api ?? new HuggingFaceApiConfiguration();
+        const abortSignal = options?.run?.abortSignal;
         return callWithRetryAndThrottle({
-            retry: this.settings.api?.retry,
-            throttle: this.settings.api?.throttle,
-            call: async () => callHuggingFaceTextGenerationAPI({
-                options: {
-                    useCache: true,
-                    waitForModel: true,
+            retry: api.retry,
+            throttle: api.throttle,
+            call: async () => postJsonToApi({
+                url: api.assembleUrl(`/${this.settings.model}`),
+                headers: api.headers,
+                body: {
+                    inputs: texts,
+                    options: {
+                        use_cache: this.settings.options?.useCache ?? true,
+                        wait_for_model: this.settings.options?.waitForModel ?? true,
+                    },
                 },
-                ...this.settings,
-                abortSignal: options?.run?.abortSignal,
-                inputs: texts,
+                failedResponseHandler: failedHuggingFaceCallResponseHandler,
+                successfulResponseHandler: createJsonResponseHandler(zodSchema(huggingFaceTextEmbeddingResponseSchema)),
+                abortSignal,
             }),
         });
     }
@@ -112,21 +120,3 @@ export class HuggingFaceTextEmbeddingModel extends AbstractModel {
     }
 }
 const huggingFaceTextEmbeddingResponseSchema = z.array(z.array(z.number()));
-async function callHuggingFaceTextGenerationAPI({ api = new HuggingFaceApiConfiguration(), abortSignal, model, inputs, options, }) {
-    return postJsonToApi({
-        url: api.assembleUrl(`/${model}`),
-        headers: api.headers,
-        body: {
-            inputs,
-            options: options
-                ? {
-                    use_cache: options?.useCache,
-                    wait_for_model: options?.waitForModel,
-                }
-                : {},
-        },
-        failedResponseHandler: failedHuggingFaceCallResponseHandler,
-        successfulResponseHandler: createJsonResponseHandler(huggingFaceTextEmbeddingResponseSchema),
-        abortSignal,
-    });
-}

package/model-provider/huggingface/HuggingFaceTextGenerationModel.cjs CHANGED Viewed

@@ -4,6 +4,7 @@ exports.HuggingFaceTextGenerationModel = void 0;
 const zod_1 = require("zod");
 const callWithRetryAndThrottle_js_1 = require("../../core/api/callWithRetryAndThrottle.cjs");
 const postToApi_js_1 = require("../../core/api/postToApi.cjs");
+const ZodSchema_js_1 = require("../../core/schema/ZodSchema.cjs");
 const AbstractModel_js_1 = require("../../model-function/AbstractModel.cjs");
 const PromptTemplateTextGenerationModel_js_1 = require("../../model-function/generate-text/PromptTemplateTextGenerationModel.cjs");
 const TextGenerationModel_js_1 = require("../../model-function/generate-text/TextGenerationModel.cjs");
@@ -64,30 +65,28 @@ class HuggingFaceTextGenerationModel extends AbstractModel_js_1.AbstractModel {
         return (0, callWithRetryAndThrottle_js_1.callWithRetryAndThrottle)({
             retry: api.retry,
             throttle: api.throttle,
-            call: async () => {
-                return (0, postToApi_js_1.postJsonToApi)({
-                    url: api.assembleUrl(`/${this.settings.model}`),
-                    headers: api.headers,
-                    body: {
-                        inputs: prompt,
-                        top_k: this.settings.topK,
-                        top_p: this.settings.topP,
-                        temperature: this.settings.temperature,
-                        repetition_penalty: this.settings.repetitionPenalty,
-                        max_new_tokens: this.settings.maxGenerationTokens,
-                        max_time: this.settings.maxTime,
-                        num_return_sequences: this.settings.numberOfGenerations,
-                        do_sample: this.settings.doSample,
-                        options: {
-                            use_cache: true,
-                            wait_for_model: true,
-                        },
+            call: async () => (0, postToApi_js_1.postJsonToApi)({
+                url: api.assembleUrl(`/${this.settings.model}`),
+                headers: api.headers,
+                body: {
+                    inputs: prompt,
+                    top_k: this.settings.topK,
+                    top_p: this.settings.topP,
+                    temperature: this.settings.temperature,
+                    repetition_penalty: this.settings.repetitionPenalty,
+                    max_new_tokens: this.settings.maxGenerationTokens,
+                    max_time: this.settings.maxTime,
+                    num_return_sequences: this.settings.numberOfGenerations,
+                    do_sample: this.settings.doSample,
+                    options: {
+                        use_cache: true,
+                        wait_for_model: true,
                     },
-                    failedResponseHandler: HuggingFaceError_js_1.failedHuggingFaceCallResponseHandler,
-                    successfulResponseHandler: (0, postToApi_js_1.createJsonResponseHandler)(huggingFaceTextGenerationResponseSchema),
-                    abortSignal,
-                });
-            },
+                },
+                failedResponseHandler: HuggingFaceError_js_1.failedHuggingFaceCallResponseHandler,
+                successfulResponseHandler: (0, postToApi_js_1.createJsonResponseHandler)((0, ZodSchema_js_1.zodSchema)(huggingFaceTextGenerationResponseSchema)),
+                abortSignal,
+            }),
         });
     }
     get settingsForEvent() {

package/model-provider/huggingface/HuggingFaceTextGenerationModel.js CHANGED Viewed

@@ -1,6 +1,7 @@
 import { z } from "zod";
 import { callWithRetryAndThrottle } from "../../core/api/callWithRetryAndThrottle.js";
 import { createJsonResponseHandler, postJsonToApi, } from "../../core/api/postToApi.js";
+import { zodSchema } from "../../core/schema/ZodSchema.js";
 import { AbstractModel } from "../../model-function/AbstractModel.js";
 import { PromptTemplateTextGenerationModel } from "../../model-function/generate-text/PromptTemplateTextGenerationModel.js";
 import { textGenerationModelProperties, } from "../../model-function/generate-text/TextGenerationModel.js";
@@ -61,30 +62,28 @@ export class HuggingFaceTextGenerationModel extends AbstractModel {
         return callWithRetryAndThrottle({
             retry: api.retry,
             throttle: api.throttle,
-            call: async () => {
-                return postJsonToApi({
-                    url: api.assembleUrl(`/${this.settings.model}`),
-                    headers: api.headers,
-                    body: {
-                        inputs: prompt,
-                        top_k: this.settings.topK,
-                        top_p: this.settings.topP,
-                        temperature: this.settings.temperature,
-                        repetition_penalty: this.settings.repetitionPenalty,
-                        max_new_tokens: this.settings.maxGenerationTokens,
-                        max_time: this.settings.maxTime,
-                        num_return_sequences: this.settings.numberOfGenerations,
-                        do_sample: this.settings.doSample,
-                        options: {
-                            use_cache: true,
-                            wait_for_model: true,
-                        },
+            call: async () => postJsonToApi({
+                url: api.assembleUrl(`/${this.settings.model}`),
+                headers: api.headers,
+                body: {
+                    inputs: prompt,
+                    top_k: this.settings.topK,
+                    top_p: this.settings.topP,
+                    temperature: this.settings.temperature,
+                    repetition_penalty: this.settings.repetitionPenalty,
+                    max_new_tokens: this.settings.maxGenerationTokens,
+                    max_time: this.settings.maxTime,
+                    num_return_sequences: this.settings.numberOfGenerations,
+                    do_sample: this.settings.doSample,
+                    options: {
+                        use_cache: true,
+                        wait_for_model: true,
                     },
-                    failedResponseHandler: failedHuggingFaceCallResponseHandler,
-                    successfulResponseHandler: createJsonResponseHandler(huggingFaceTextGenerationResponseSchema),
-                    abortSignal,
-                });
-            },
+                },
+                failedResponseHandler: failedHuggingFaceCallResponseHandler,
+                successfulResponseHandler: createJsonResponseHandler(zodSchema(huggingFaceTextGenerationResponseSchema)),
+                abortSignal,
+            }),
         });
     }
     get settingsForEvent() {

package/model-provider/huggingface/index.cjs CHANGED Viewed

@@ -26,10 +26,8 @@ var __importStar = (this && this.__importStar) || function (mod) {
     return result;
 };
 Object.defineProperty(exports, "__esModule", { value: true });
-exports.huggingface = exports.HuggingFaceError = void 0;
+exports.huggingface = void 0;
 __exportStar(require("./HuggingFaceApiConfiguration.cjs"), exports);
-var HuggingFaceError_js_1 = require("./HuggingFaceError.cjs");
-Object.defineProperty(exports, "HuggingFaceError", { enumerable: true, get: function () { return HuggingFaceError_js_1.HuggingFaceError; } });
 exports.huggingface = __importStar(require("./HuggingFaceFacade.cjs"));
 __exportStar(require("./HuggingFaceTextEmbeddingModel.cjs"), exports);
 __exportStar(require("./HuggingFaceTextGenerationModel.cjs"), exports);

package/model-provider/huggingface/index.d.ts CHANGED Viewed

@@ -1,5 +1,5 @@
 export * from "./HuggingFaceApiConfiguration.js";
-export { HuggingFaceError, HuggingFaceErrorData } from "./HuggingFaceError.js";
+export { HuggingFaceErrorData } from "./HuggingFaceError.js";
 export * as huggingface from "./HuggingFaceFacade.js";
 export * from "./HuggingFaceTextEmbeddingModel.js";
 export * from "./HuggingFaceTextGenerationModel.js";

package/model-provider/huggingface/index.js CHANGED Viewed

@@ -1,5 +1,4 @@
 export * from "./HuggingFaceApiConfiguration.js";
-export { HuggingFaceError } from "./HuggingFaceError.js";
 export * as huggingface from "./HuggingFaceFacade.js";
 export * from "./HuggingFaceTextEmbeddingModel.js";
 export * from "./HuggingFaceTextGenerationModel.js";

package/model-provider/llamacpp/LlamaCppApiConfiguration.cjs CHANGED Viewed

@@ -2,13 +2,20 @@
 Object.defineProperty(exports, "__esModule", { value: true });
 exports.LlamaCppApiConfiguration = void 0;
 const BaseUrlApiConfiguration_js_1 = require("../../core/api/BaseUrlApiConfiguration.cjs");
-class LlamaCppApiConfiguration extends BaseUrlApiConfiguration_js_1.BaseUrlApiConfiguration {
-    constructor({ baseUrl = "http://127.0.0.1:8080", retry, throttle, } = {}) {
+/**
+ * Creates an API configuration for the Llama.cpp server.
+ * It calls the API at http://127.0.0.1:8080 by default.
+ */
+class LlamaCppApiConfiguration extends BaseUrlApiConfiguration_js_1.BaseUrlApiConfigurationWithDefaults {
+    constructor(settings = {}) {
         super({
-            baseUrl,
-            headers: {},
-            retry,
-            throttle,
+            ...settings,
+            baseUrlDefaults: {
+                protocol: "http",
+                host: "127.0.0.1",
+                port: "8080",
+                path: "",
+            },
         });
     }
 }

package/model-provider/llamacpp/LlamaCppApiConfiguration.d.ts CHANGED Viewed

@@ -1,10 +1,8 @@
-import { BaseUrlApiConfiguration } from "../../core/api/BaseUrlApiConfiguration.js";
-import { RetryFunction } from "../../core/api/RetryFunction.js";
-import { ThrottleFunction } from "../../core/api/ThrottleFunction.js";
-export declare class LlamaCppApiConfiguration extends BaseUrlApiConfiguration {
-    constructor({ baseUrl, retry, throttle, }?: {
-        baseUrl?: string;
-        retry?: RetryFunction;
-        throttle?: ThrottleFunction;
-    });
+import { BaseUrlApiConfigurationWithDefaults, PartialBaseUrlPartsApiConfigurationOptions } from "../../core/api/BaseUrlApiConfiguration.js";
+/**
+ * Creates an API configuration for the Llama.cpp server.
+ * It calls the API at http://127.0.0.1:8080 by default.
+ */
+export declare class LlamaCppApiConfiguration extends BaseUrlApiConfigurationWithDefaults {
+    constructor(settings?: PartialBaseUrlPartsApiConfigurationOptions);
 }

package/model-provider/llamacpp/LlamaCppApiConfiguration.js CHANGED Viewed

@@ -1,11 +1,18 @@
-import { BaseUrlApiConfiguration } from "../../core/api/BaseUrlApiConfiguration.js";
-export class LlamaCppApiConfiguration extends BaseUrlApiConfiguration {
-    constructor({ baseUrl = "http://127.0.0.1:8080", retry, throttle, } = {}) {
+import { BaseUrlApiConfigurationWithDefaults, } from "../../core/api/BaseUrlApiConfiguration.js";
+/**
+ * Creates an API configuration for the Llama.cpp server.
+ * It calls the API at http://127.0.0.1:8080 by default.
+ */
+export class LlamaCppApiConfiguration extends BaseUrlApiConfigurationWithDefaults {
+    constructor(settings = {}) {
         super({
-            baseUrl,
-            headers: {},
-            retry,
-            throttle,
+            ...settings,
+            baseUrlDefaults: {
+                protocol: "http",
+                host: "127.0.0.1",
+                port: "8080",
+                path: "",
+            },
         });
     }
 }

package/model-provider/llamacpp/LlamaCppCompletionModel.cjs CHANGED Viewed

@@ -245,13 +245,13 @@ const llamaCppTextGenerationResponseSchema = zod_1.z.object({
     tokens_predicted: zod_1.z.number(),
     truncated: zod_1.z.boolean(),
 });
-const llamaCppTextStreamChunkSchema = (0, ZodSchema_js_1.zodSchema)(zod_1.z.discriminatedUnion("stop", [
+const llamaCppTextStreamChunkSchema = zod_1.z.discriminatedUnion("stop", [
     zod_1.z.object({
         content: zod_1.z.string(),
         stop: zod_1.z.literal(false),
     }),
     llamaCppTextGenerationResponseSchema,
-]));
+]);
 async function createLlamaCppFullDeltaIterableQueue(stream) {
     const queue = new AsyncQueue_js_1.AsyncQueue();
     // process the stream asynchonously (no 'await' on purpose):
@@ -262,7 +262,7 @@ async function createLlamaCppFullDeltaIterableQueue(stream) {
                 const data = event.data;
                 const eventData = (0, parseJSON_js_1.parseJSON)({
                     text: data,
-                    schema: llamaCppTextStreamChunkSchema,
+                    schema: (0, ZodSchema_js_1.zodSchema)(llamaCppTextStreamChunkSchema),
                 });
                 queue.push({ type: "delta", deltaValue: eventData });
                 if (eventData.stop) {
@@ -287,7 +287,7 @@ exports.LlamaCppCompletionResponseFormat = {
      */
     json: {
         stream: false,
-        handler: (0, postToApi_js_1.createJsonResponseHandler)(llamaCppTextGenerationResponseSchema),
+        handler: (0, postToApi_js_1.createJsonResponseHandler)((0, ZodSchema_js_1.zodSchema)(llamaCppTextGenerationResponseSchema)),
     },
     /**
      * Returns an async iterable over the full deltas (all choices, including full current state at time of event)

package/model-provider/llamacpp/LlamaCppCompletionModel.d.ts CHANGED Viewed

@@ -479,7 +479,130 @@ declare const llamaCppTextGenerationResponseSchema: z.ZodObject<{
     truncated: boolean;
 }>;
 export type LlamaCppTextGenerationResponse = z.infer<typeof llamaCppTextGenerationResponseSchema>;
-declare const llamaCppTextStreamChunkSchema: import("../../core/schema/ZodSchema.js").ZodSchema<{
+declare const llamaCppTextStreamChunkSchema: z.ZodDiscriminatedUnion<"stop", [z.ZodObject<{
+    content: z.ZodString;
+    stop: z.ZodLiteral<false>;
+}, "strip", z.ZodTypeAny, {
+    stop: false;
+    content: string;
+}, {
+    stop: false;
+    content: string;
+}>, z.ZodObject<{
+    content: z.ZodString;
+    stop: z.ZodLiteral<true>;
+    generation_settings: z.ZodObject<{
+        frequency_penalty: z.ZodNumber;
+        ignore_eos: z.ZodBoolean;
+        logit_bias: z.ZodArray<z.ZodNumber, "many">;
+        mirostat: z.ZodNumber;
+        mirostat_eta: z.ZodNumber;
+        mirostat_tau: z.ZodNumber;
+        model: z.ZodString;
+        n_ctx: z.ZodNumber;
+        n_keep: z.ZodNumber;
+        n_predict: z.ZodNumber;
+        n_probs: z.ZodNumber;
+        penalize_nl: z.ZodBoolean;
+        presence_penalty: z.ZodNumber;
+        repeat_last_n: z.ZodNumber;
+        repeat_penalty: z.ZodNumber;
+        seed: z.ZodNumber;
+        stop: z.ZodArray<z.ZodString, "many">;
+        stream: z.ZodBoolean;
+        temperature: z.ZodOptional<z.ZodNumber>;
+        tfs_z: z.ZodNumber;
+        top_k: z.ZodNumber;
+        top_p: z.ZodNumber;
+        typical_p: z.ZodNumber;
+    }, "strip", z.ZodTypeAny, {
+        model: string;
+        stream: boolean;
+        stop: string[];
+        seed: number;
+        mirostat: number;
+        frequency_penalty: number;
+        ignore_eos: boolean;
+        logit_bias: number[];
+        mirostat_eta: number;
+        mirostat_tau: number;
+        n_ctx: number;
+        n_keep: number;
+        n_predict: number;
+        n_probs: number;
+        penalize_nl: boolean;
+        presence_penalty: number;
+        repeat_last_n: number;
+        repeat_penalty: number;
+        tfs_z: number;
+        top_k: number;
+        top_p: number;
+        typical_p: number;
+        temperature?: number | undefined;
+    }, {
+        model: string;
+        stream: boolean;
+        stop: string[];
+        seed: number;
+        mirostat: number;
+        frequency_penalty: number;
+        ignore_eos: boolean;
+        logit_bias: number[];
+        mirostat_eta: number;
+        mirostat_tau: number;
+        n_ctx: number;
+        n_keep: number;
+        n_predict: number;
+        n_probs: number;
+        penalize_nl: boolean;
+        presence_penalty: number;
+        repeat_last_n: number;
+        repeat_penalty: number;
+        tfs_z: number;
+        top_k: number;
+        top_p: number;
+        typical_p: number;
+        temperature?: number | undefined;
+    }>;
+    model: z.ZodString;
+    prompt: z.ZodString;
+    stopped_eos: z.ZodBoolean;
+    stopped_limit: z.ZodBoolean;
+    stopped_word: z.ZodBoolean;
+    stopping_word: z.ZodString;
+    timings: z.ZodObject<{
+        predicted_ms: z.ZodNumber;
+        predicted_n: z.ZodNumber;
+        predicted_per_second: z.ZodNullable<z.ZodNumber>;
+        predicted_per_token_ms: z.ZodNullable<z.ZodNumber>;
+        prompt_ms: z.ZodOptional<z.ZodNullable<z.ZodNumber>>;
+        prompt_n: z.ZodNumber;
+        prompt_per_second: z.ZodNullable<z.ZodNumber>;
+        prompt_per_token_ms: z.ZodNullable<z.ZodNumber>;
+    }, "strip", z.ZodTypeAny, {
+        predicted_ms: number;
+        predicted_n: number;
+        predicted_per_second: number | null;
+        predicted_per_token_ms: number | null;
+        prompt_n: number;
+        prompt_per_second: number | null;
+        prompt_per_token_ms: number | null;
+        prompt_ms?: number | null | undefined;
+    }, {
+        predicted_ms: number;
+        predicted_n: number;
+        predicted_per_second: number | null;
+        predicted_per_token_ms: number | null;
+        prompt_n: number;
+        prompt_per_second: number | null;
+        prompt_per_token_ms: number | null;
+        prompt_ms?: number | null | undefined;
+    }>;
+    tokens_cached: z.ZodNumber;
+    tokens_evaluated: z.ZodNumber;
+    tokens_predicted: z.ZodNumber;
+    truncated: z.ZodBoolean;
+}, "strip", z.ZodTypeAny, {
     model: string;
     stop: true;
     content: string;
@@ -527,11 +650,56 @@ declare const llamaCppTextStreamChunkSchema: import("../../core/schema/ZodSchema
     tokens_evaluated: number;
     tokens_predicted: number;
     truncated: boolean;
-} | {
-    stop: false;
+}, {
+    model: string;
+    stop: true;
     content: string;
-}>;
-export type LlamaCppTextStreamChunk = (typeof llamaCppTextStreamChunkSchema)["_type"];
+    prompt: string;
+    generation_settings: {
+        model: string;
+        stream: boolean;
+        stop: string[];
+        seed: number;
+        mirostat: number;
+        frequency_penalty: number;
+        ignore_eos: boolean;
+        logit_bias: number[];
+        mirostat_eta: number;
+        mirostat_tau: number;
+        n_ctx: number;
+        n_keep: number;
+        n_predict: number;
+        n_probs: number;
+        penalize_nl: boolean;
+        presence_penalty: number;
+        repeat_last_n: number;
+        repeat_penalty: number;
+        tfs_z: number;
+        top_k: number;
+        top_p: number;
+        typical_p: number;
+        temperature?: number | undefined;
+    };
+    stopped_eos: boolean;
+    stopped_limit: boolean;
+    stopped_word: boolean;
+    stopping_word: string;
+    timings: {
+        predicted_ms: number;
+        predicted_n: number;
+        predicted_per_second: number | null;
+        predicted_per_token_ms: number | null;
+        prompt_n: number;
+        prompt_per_second: number | null;
+        prompt_per_token_ms: number | null;
+        prompt_ms?: number | null | undefined;
+    };
+    tokens_cached: number;
+    tokens_evaluated: number;
+    tokens_predicted: number;
+    truncated: boolean;
+}>]>;
+export type LlamaCppTextStreamChunk = z.infer<typeof llamaCppTextStreamChunkSchema>;
 export type LlamaCppCompletionResponseFormatType<T> = {
     stream: boolean;
     handler: ResponseHandler<T>;

package/model-provider/llamacpp/LlamaCppCompletionModel.js CHANGED Viewed

@@ -241,13 +241,13 @@ const llamaCppTextGenerationResponseSchema = z.object({
     tokens_predicted: z.number(),
     truncated: z.boolean(),
 });
-const llamaCppTextStreamChunkSchema = zodSchema(z.discriminatedUnion("stop", [
+const llamaCppTextStreamChunkSchema = z.discriminatedUnion("stop", [
     z.object({
         content: z.string(),
         stop: z.literal(false),
     }),
     llamaCppTextGenerationResponseSchema,
-]));
+]);
 async function createLlamaCppFullDeltaIterableQueue(stream) {
     const queue = new AsyncQueue();
     // process the stream asynchonously (no 'await' on purpose):
@@ -258,7 +258,7 @@ async function createLlamaCppFullDeltaIterableQueue(stream) {
                 const data = event.data;
                 const eventData = parseJSON({
                     text: data,
-                    schema: llamaCppTextStreamChunkSchema,
+                    schema: zodSchema(llamaCppTextStreamChunkSchema),
                 });
                 queue.push({ type: "delta", deltaValue: eventData });
                 if (eventData.stop) {
@@ -283,7 +283,7 @@ export const LlamaCppCompletionResponseFormat = {
      */
     json: {
         stream: false,
-        handler: createJsonResponseHandler(llamaCppTextGenerationResponseSchema),
+        handler: createJsonResponseHandler(zodSchema(llamaCppTextGenerationResponseSchema)),
     },
     /**
      * Returns an async iterable over the full deltas (all choices, including full current state at time of event)