npm - modelfusion - Versions diffs - 0.104.0 → 0.106.0 - Mend

modelfusion 0.104.0 → 0.106.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (292) hide show

package/model-provider/llamacpp/LlamaCppBakLLaVA1PromptTemplate.cjs CHANGED Viewed

@@ -1,6 +1,8 @@
 "use strict";
 Object.defineProperty(exports, "__esModule", { value: true });
 exports.chat = exports.instruction = void 0;
+const ContentPart_js_1 = require("../../model-function/generate-text/prompt-template/ContentPart.cjs");
+const InvalidPromptError_js_1 = require("../../model-function/generate-text/prompt-template/InvalidPromptError.cjs");
 // default Vicuna 1 system message
 const DEFAULT_SYSTEM_MESSAGE = "A chat between a curious user and an artificial intelligence assistant. " +
     "The assistant gives helpful, detailed, and polite answers to the user's questions.";
@@ -78,9 +80,12 @@ function chat() {
                         break;
                     }
                     case "assistant": {
-                        text += `ASSISTANT: ${content}`;
+                        text += `ASSISTANT: ${(0, ContentPart_js_1.validateContentIsString)(content, prompt)}`;
                         break;
                     }
+                    case "tool": {
+                        throw new InvalidPromptError_js_1.InvalidPromptError("Tool messages are not supported.", prompt);
+                    }
                     default: {
                         const _exhaustiveCheck = role;
                         throw new Error(`Unsupported role: ${_exhaustiveCheck}`);

package/model-provider/llamacpp/LlamaCppBakLLaVA1PromptTemplate.js CHANGED Viewed

@@ -1,3 +1,5 @@
+import { validateContentIsString } from "../../model-function/generate-text/prompt-template/ContentPart.js";
+import { InvalidPromptError } from "../../model-function/generate-text/prompt-template/InvalidPromptError.js";
 // default Vicuna 1 system message
 const DEFAULT_SYSTEM_MESSAGE = "A chat between a curious user and an artificial intelligence assistant. " +
     "The assistant gives helpful, detailed, and polite answers to the user's questions.";
@@ -74,9 +76,12 @@ export function chat() {
                         break;
                     }
                     case "assistant": {
-                        text += `ASSISTANT: ${content}`;
+                        text += `ASSISTANT: ${validateContentIsString(content, prompt)}`;
                         break;
                     }
+                    case "tool": {
+                        throw new InvalidPromptError("Tool messages are not supported.", prompt);
+                    }
                     default: {
                         const _exhaustiveCheck = role;
                         throw new Error(`Unsupported role: ${_exhaustiveCheck}`);

package/model-provider/llamacpp/LlamaCppTextGenerationModel.cjs CHANGED Viewed

@@ -5,11 +5,11 @@ const zod_1 = require("zod");
 const callWithRetryAndThrottle_js_1 = require("../../core/api/callWithRetryAndThrottle.cjs");
 const postToApi_js_1 = require("../../core/api/postToApi.cjs");
 const ZodSchema_js_1 = require("../../core/schema/ZodSchema.cjs");
+const parseJSON_js_1 = require("../../core/schema/parseJSON.cjs");
 const AbstractModel_js_1 = require("../../model-function/AbstractModel.cjs");
 const PromptTemplateTextStreamingModel_js_1 = require("../../model-function/generate-text/PromptTemplateTextStreamingModel.cjs");
 const TextGenerationModel_js_1 = require("../../model-function/generate-text/TextGenerationModel.cjs");
 const AsyncQueue_js_1 = require("../../util/AsyncQueue.cjs");
-const parseJSON_js_1 = require("../../core/schema/parseJSON.cjs");
 const parseEventSourceStream_js_1 = require("../../util/streaming/parseEventSourceStream.cjs");
 const LlamaCppApiConfiguration_js_1 = require("./LlamaCppApiConfiguration.cjs");
 const LlamaCppError_js_1 = require("./LlamaCppError.cjs");
@@ -138,6 +138,9 @@ class LlamaCppTextGenerationModel extends AbstractModel_js_1.AbstractModel {
             responseFormat: exports.LlamaCppTextGenerationResponseFormat.deltaIterable,
         });
     }
+    extractTextDelta(delta) {
+        return delta.content;
+    }
     withTextPrompt() {
         return this.withPromptTemplate({
             format(prompt) {
@@ -228,7 +231,7 @@ const llamaCppTextGenerationResponseSchema = zod_1.z.object({
     tokens_predicted: zod_1.z.number(),
     truncated: zod_1.z.boolean(),
 });
-const llamaCppTextStreamingResponseSchema = new ZodSchema_js_1.ZodSchema(zod_1.z.discriminatedUnion("stop", [
+const llamaCppTextStreamChunkSchema = (0, ZodSchema_js_1.zodSchema)(zod_1.z.discriminatedUnion("stop", [
     zod_1.z.object({
         content: zod_1.z.string(),
         stop: zod_1.z.literal(false),
@@ -237,7 +240,6 @@ const llamaCppTextStreamingResponseSchema = new ZodSchema_js_1.ZodSchema(zod_1.z
 ]));
 async function createLlamaCppFullDeltaIterableQueue(stream) {
     const queue = new AsyncQueue_js_1.AsyncQueue();
-    let content = "";
     // process the stream asynchonously (no 'await' on purpose):
     (0, parseEventSourceStream_js_1.parseEventSourceStream)({ stream })
         .then(async (events) => {
@@ -246,18 +248,9 @@ async function createLlamaCppFullDeltaIterableQueue(stream) {
                 const data = event.data;
                 const eventData = (0, parseJSON_js_1.parseJSON)({
                     text: data,
-                    schema: llamaCppTextStreamingResponseSchema,
-                });
-                content += eventData.content;
-                queue.push({
-                    type: "delta",
-                    fullDelta: {
-                        content,
-                        isComplete: eventData.stop,
-                        delta: eventData.content,
-                    },
-                    valueDelta: eventData.content,
+                    schema: llamaCppTextStreamChunkSchema,
                 });
+                queue.push({ type: "delta", deltaValue: eventData });
                 if (eventData.stop) {
                     queue.close();
                 }

package/model-provider/llamacpp/LlamaCppTextGenerationModel.d.ts CHANGED Viewed

@@ -58,14 +58,14 @@ export declare class LlamaCppTextGenerationModel<CONTEXT_WINDOW_SIZE extends num
     countPromptTokens(prompt: LlamaCppTextGenerationPrompt): Promise<number>;
     doGenerateTexts(prompt: LlamaCppTextGenerationPrompt, options?: FunctionOptions): Promise<{
         response: {
-            stop: true;
             model: string;
-            prompt: string;
+            stop: true;
             content: string;
+            prompt: string;
             generation_settings: {
-                stop: string[];
                 model: string;
                 stream: boolean;
+                stop: string[];
                 seed: number;
                 mirostat: number;
                 frequency_penalty: number;
@@ -116,7 +116,59 @@ export declare class LlamaCppTextGenerationModel<CONTEXT_WINDOW_SIZE extends num
             totalTokens: number;
         };
     }>;
-    doStreamText(prompt: LlamaCppTextGenerationPrompt, options?: FunctionOptions): Promise<AsyncIterable<Delta<string>>>;
+    doStreamText(prompt: LlamaCppTextGenerationPrompt, options?: FunctionOptions): Promise<AsyncIterable<Delta<{
+        model: string;
+        stop: true;
+        content: string;
+        prompt: string;
+        generation_settings: {
+            model: string;
+            stream: boolean;
+            stop: string[];
+            seed: number;
+            mirostat: number;
+            frequency_penalty: number;
+            ignore_eos: boolean;
+            logit_bias: number[];
+            mirostat_eta: number;
+            mirostat_tau: number;
+            n_ctx: number;
+            n_keep: number;
+            n_predict: number;
+            n_probs: number;
+            penalize_nl: boolean;
+            presence_penalty: number;
+            repeat_last_n: number;
+            repeat_penalty: number;
+            temp: number;
+            tfs_z: number;
+            top_k: number;
+            top_p: number;
+            typical_p: number;
+        };
+        stopped_eos: boolean;
+        stopped_limit: boolean;
+        stopped_word: boolean;
+        stopping_word: string;
+        timings: {
+            predicted_ms: number;
+            predicted_n: number;
+            predicted_per_second: number | null;
+            predicted_per_token_ms: number | null;
+            prompt_ms: number | null;
+            prompt_n: number;
+            prompt_per_second: number | null;
+            prompt_per_token_ms: number | null;
+        };
+        tokens_cached: number;
+        tokens_evaluated: number;
+        tokens_predicted: number;
+        truncated: boolean;
+    } | {
+        stop: false;
+        content: string;
+    }>>>;
+    extractTextDelta(delta: unknown): string;
     withTextPrompt(): PromptTemplateTextStreamingModel<string, LlamaCppTextGenerationPrompt, LlamaCppTextGenerationModelSettings<CONTEXT_WINDOW_SIZE>, this>;
     /**
      * Maps the prompt for a text version of the Llama.cpp prompt template (without image support).
@@ -156,9 +208,9 @@ declare const llamaCppTextGenerationResponseSchema: z.ZodObject<{
         top_p: z.ZodNumber;
         typical_p: z.ZodNumber;
     }, "strip", z.ZodTypeAny, {
-        stop: string[];
         model: string;
         stream: boolean;
+        stop: string[];
         seed: number;
         mirostat: number;
         frequency_penalty: number;
@@ -180,9 +232,9 @@ declare const llamaCppTextGenerationResponseSchema: z.ZodObject<{
         top_p: number;
         typical_p: number;
     }, {
-        stop: string[];
         model: string;
         stream: boolean;
+        stop: string[];
         seed: number;
         mirostat: number;
         frequency_penalty: number;
@@ -243,14 +295,14 @@ declare const llamaCppTextGenerationResponseSchema: z.ZodObject<{
     tokens_predicted: z.ZodNumber;
     truncated: z.ZodBoolean;
 }, "strip", z.ZodTypeAny, {
-    stop: true;
     model: string;
-    prompt: string;
+    stop: true;
     content: string;
+    prompt: string;
     generation_settings: {
-        stop: string[];
         model: string;
         stream: boolean;
+        stop: string[];
         seed: number;
         mirostat: number;
         frequency_penalty: number;
@@ -291,14 +343,14 @@ declare const llamaCppTextGenerationResponseSchema: z.ZodObject<{
     tokens_predicted: number;
     truncated: boolean;
 }, {
-    stop: true;
     model: string;
-    prompt: string;
+    stop: true;
     content: string;
+    prompt: string;
     generation_settings: {
-        stop: string[];
         model: string;
         stream: boolean;
+        stop: string[];
         seed: number;
         mirostat: number;
         frequency_penalty: number;
@@ -340,11 +392,59 @@ declare const llamaCppTextGenerationResponseSchema: z.ZodObject<{
     truncated: boolean;
 }>;
 export type LlamaCppTextGenerationResponse = z.infer<typeof llamaCppTextGenerationResponseSchema>;
-export type LlamaCppTextGenerationDelta = {
+declare const llamaCppTextStreamChunkSchema: import("../../core/schema/ZodSchema.js").ZodSchema<{
+    model: string;
+    stop: true;
     content: string;
-    isComplete: boolean;
-    delta: string;
-};
+    prompt: string;
+    generation_settings: {
+        model: string;
+        stream: boolean;
+        stop: string[];
+        seed: number;
+        mirostat: number;
+        frequency_penalty: number;
+        ignore_eos: boolean;
+        logit_bias: number[];
+        mirostat_eta: number;
+        mirostat_tau: number;
+        n_ctx: number;
+        n_keep: number;
+        n_predict: number;
+        n_probs: number;
+        penalize_nl: boolean;
+        presence_penalty: number;
+        repeat_last_n: number;
+        repeat_penalty: number;
+        temp: number;
+        tfs_z: number;
+        top_k: number;
+        top_p: number;
+        typical_p: number;
+    };
+    stopped_eos: boolean;
+    stopped_limit: boolean;
+    stopped_word: boolean;
+    stopping_word: string;
+    timings: {
+        predicted_ms: number;
+        predicted_n: number;
+        predicted_per_second: number | null;
+        predicted_per_token_ms: number | null;
+        prompt_ms: number | null;
+        prompt_n: number;
+        prompt_per_second: number | null;
+        prompt_per_token_ms: number | null;
+    };
+    tokens_cached: number;
+    tokens_evaluated: number;
+    tokens_predicted: number;
+    truncated: boolean;
+} | {
+    stop: false;
+    content: string;
+}>;
+export type LlamaCppTextStreamChunk = (typeof llamaCppTextStreamChunkSchema)["_type"];
 export type LlamaCppTextGenerationResponseFormatType<T> = {
     stream: boolean;
     handler: ResponseHandler<T>;
@@ -356,14 +456,14 @@ export declare const LlamaCppTextGenerationResponseFormat: {
     json: {
         stream: false;
         handler: ResponseHandler<{
-            stop: true;
             model: string;
-            prompt: string;
+            stop: true;
             content: string;
+            prompt: string;
             generation_settings: {
-                stop: string[];
                 model: string;
                 stream: boolean;
+                stop: string[];
                 seed: number;
                 mirostat: number;
                 frequency_penalty: number;
@@ -413,7 +513,58 @@ export declare const LlamaCppTextGenerationResponseFormat: {
         stream: true;
         handler: ({ response }: {
             response: Response;
-        }) => Promise<AsyncIterable<Delta<string>>>;
+        }) => Promise<AsyncIterable<Delta<{
+            model: string;
+            stop: true;
+            content: string;
+            prompt: string;
+            generation_settings: {
+                model: string;
+                stream: boolean;
+                stop: string[];
+                seed: number;
+                mirostat: number;
+                frequency_penalty: number;
+                ignore_eos: boolean;
+                logit_bias: number[];
+                mirostat_eta: number;
+                mirostat_tau: number;
+                n_ctx: number;
+                n_keep: number;
+                n_predict: number;
+                n_probs: number;
+                penalize_nl: boolean;
+                presence_penalty: number;
+                repeat_last_n: number;
+                repeat_penalty: number;
+                temp: number;
+                tfs_z: number;
+                top_k: number;
+                top_p: number;
+                typical_p: number;
+            };
+            stopped_eos: boolean;
+            stopped_limit: boolean;
+            stopped_word: boolean;
+            stopping_word: string;
+            timings: {
+                predicted_ms: number;
+                predicted_n: number;
+                predicted_per_second: number | null;
+                predicted_per_token_ms: number | null;
+                prompt_ms: number | null;
+                prompt_n: number;
+                prompt_per_second: number | null;
+                prompt_per_token_ms: number | null;
+            };
+            tokens_cached: number;
+            tokens_evaluated: number;
+            tokens_predicted: number;
+            truncated: boolean;
+        } | {
+            stop: false;
+            content: string;
+        }>>>;
     };
 };
 export {};

package/model-provider/llamacpp/LlamaCppTextGenerationModel.js CHANGED Viewed

@@ -1,12 +1,12 @@
 import { z } from "zod";
 import { callWithRetryAndThrottle } from "../../core/api/callWithRetryAndThrottle.js";
 import { createJsonResponseHandler, postJsonToApi, } from "../../core/api/postToApi.js";
-import { ZodSchema } from "../../core/schema/ZodSchema.js";
+import { zodSchema } from "../../core/schema/ZodSchema.js";
+import { parseJSON } from "../../core/schema/parseJSON.js";
 import { AbstractModel } from "../../model-function/AbstractModel.js";
 import { PromptTemplateTextStreamingModel } from "../../model-function/generate-text/PromptTemplateTextStreamingModel.js";
 import { textGenerationModelProperties, } from "../../model-function/generate-text/TextGenerationModel.js";
 import { AsyncQueue } from "../../util/AsyncQueue.js";
-import { parseJSON } from "../../core/schema/parseJSON.js";
 import { parseEventSourceStream } from "../../util/streaming/parseEventSourceStream.js";
 import { LlamaCppApiConfiguration } from "./LlamaCppApiConfiguration.js";
 import { failedLlamaCppCallResponseHandler } from "./LlamaCppError.js";
@@ -135,6 +135,9 @@ export class LlamaCppTextGenerationModel extends AbstractModel {
             responseFormat: LlamaCppTextGenerationResponseFormat.deltaIterable,
         });
     }
+    extractTextDelta(delta) {
+        return delta.content;
+    }
     withTextPrompt() {
         return this.withPromptTemplate({
             format(prompt) {
@@ -224,7 +227,7 @@ const llamaCppTextGenerationResponseSchema = z.object({
     tokens_predicted: z.number(),
     truncated: z.boolean(),
 });
-const llamaCppTextStreamingResponseSchema = new ZodSchema(z.discriminatedUnion("stop", [
+const llamaCppTextStreamChunkSchema = zodSchema(z.discriminatedUnion("stop", [
     z.object({
         content: z.string(),
         stop: z.literal(false),
@@ -233,7 +236,6 @@ const llamaCppTextStreamingResponseSchema = new ZodSchema(z.discriminatedUnion("
 ]));
 async function createLlamaCppFullDeltaIterableQueue(stream) {
     const queue = new AsyncQueue();
-    let content = "";
     // process the stream asynchonously (no 'await' on purpose):
     parseEventSourceStream({ stream })
         .then(async (events) => {
@@ -242,18 +244,9 @@ async function createLlamaCppFullDeltaIterableQueue(stream) {
                 const data = event.data;
                 const eventData = parseJSON({
                     text: data,
-                    schema: llamaCppTextStreamingResponseSchema,
-                });
-                content += eventData.content;
-                queue.push({
-                    type: "delta",
-                    fullDelta: {
-                        content,
-                        isComplete: eventData.stop,
-                        delta: eventData.content,
-                    },
-                    valueDelta: eventData.content,
+                    schema: llamaCppTextStreamChunkSchema,
                 });
+                queue.push({ type: "delta", deltaValue: eventData });
                 if (eventData.stop) {
                     queue.close();
                 }

package/model-provider/llamacpp/LlamaCppTextGenerationModel.test.cjs ADDED Viewed

@@ -0,0 +1,37 @@
+"use strict";
+Object.defineProperty(exports, "__esModule", { value: true });
+const streamText_js_1 = require("../../model-function/generate-text/streamText.cjs");
+const StreamingTestServer_js_1 = require("../../test/StreamingTestServer.cjs");
+const arrayFromAsync_js_1 = require("../../test/arrayFromAsync.cjs");
+const LlamaCppTextGenerationModel_js_1 = require("./LlamaCppTextGenerationModel.cjs");
+describe("streamText", () => {
+    const server = new StreamingTestServer_js_1.StreamingTestServer("http://127.0.0.1:8080/completion");
+    server.setupTestEnvironment();
+    it("should return a text stream", async () => {
+        server.responseChunks = [
+            `data: {"content":"Hello","multimodal":false,"slot_id":0,"stop":false}\n\n`,
+            `data: {"content":", ","multimodal":false,"slot_id":0,"stop":false}\n\n`,
+            `data: {"content":"world!","multimodal":false,"slot_id":0,"stop":false}\n\n`,
+            `data: {"content":"","generation_settings":{"frequency_penalty":0.0,"grammar":"",` +
+                `"ignore_eos":false,"logit_bias":[],"min_p":0.05000000074505806,"mirostat":0,` +
+                `"mirostat_eta":0.10000000149011612,"mirostat_tau":5.0,"model":"models/llama-2-7b-chat.Q4_K_M.gguf",` +
+                `"n_ctx":4096,"n_keep":0,"n_predict":-1,"n_probs":0,"penalize_nl":true,"penalty_prompt_tokens":[],` +
+                `"presence_penalty":0.0,"repeat_last_n":64,"repeat_penalty":1.100000023841858,"seed":4294967295,` +
+                `"stop":[],"stream":true,"temp":0.800000011920929,"tfs_z":1.0,"top_k":40,"top_p":0.949999988079071,` +
+                `"typical_p":1.0,"use_penalty_prompt_tokens":false},"model":"models/llama-2-7b-chat.Q4_K_M.gguf",` +
+                `"prompt":"hello","slot_id":0,"stop":true,"stopped_eos":true,"stopped_limit":false,` +
+                `"stopped_word":false,"stopping_word":"","timings":{"predicted_ms":1054.704,"predicted_n":69,` +
+                `"predicted_per_second":65.421198743913,"predicted_per_token_ms":15.285565217391303,` +
+                `"prompt_ms":244.228,"prompt_n":5,"prompt_per_second":20.472673075978186,` +
+                `"prompt_per_token_ms":48.845600000000005},"tokens_cached":74,"tokens_evaluated":5,` +
+                `"tokens_predicted":69,"truncated":false}\n\n`,
+        ];
+        const stream = await (0, streamText_js_1.streamText)(new LlamaCppTextGenerationModel_js_1.LlamaCppTextGenerationModel().withTextPrompt(), "hello");
+        // note: space moved to last chunk bc of trimming
+        expect(await (0, arrayFromAsync_js_1.arrayFromAsync)(stream)).toStrictEqual([
+            "Hello",
+            ",",
+            " world!",
+        ]);
+    });
+});

package/model-provider/llamacpp/LlamaCppTextGenerationModel.test.js ADDED Viewed

@@ -0,0 +1,35 @@
+import { streamText } from "../../model-function/generate-text/streamText.js";
+import { StreamingTestServer } from "../../test/StreamingTestServer.js";
+import { arrayFromAsync } from "../../test/arrayFromAsync.js";
+import { LlamaCppTextGenerationModel } from "./LlamaCppTextGenerationModel.js";
+describe("streamText", () => {
+    const server = new StreamingTestServer("http://127.0.0.1:8080/completion");
+    server.setupTestEnvironment();
+    it("should return a text stream", async () => {
+        server.responseChunks = [
+            `data: {"content":"Hello","multimodal":false,"slot_id":0,"stop":false}\n\n`,
+            `data: {"content":", ","multimodal":false,"slot_id":0,"stop":false}\n\n`,
+            `data: {"content":"world!","multimodal":false,"slot_id":0,"stop":false}\n\n`,
+            `data: {"content":"","generation_settings":{"frequency_penalty":0.0,"grammar":"",` +
+                `"ignore_eos":false,"logit_bias":[],"min_p":0.05000000074505806,"mirostat":0,` +
+                `"mirostat_eta":0.10000000149011612,"mirostat_tau":5.0,"model":"models/llama-2-7b-chat.Q4_K_M.gguf",` +
+                `"n_ctx":4096,"n_keep":0,"n_predict":-1,"n_probs":0,"penalize_nl":true,"penalty_prompt_tokens":[],` +
+                `"presence_penalty":0.0,"repeat_last_n":64,"repeat_penalty":1.100000023841858,"seed":4294967295,` +
+                `"stop":[],"stream":true,"temp":0.800000011920929,"tfs_z":1.0,"top_k":40,"top_p":0.949999988079071,` +
+                `"typical_p":1.0,"use_penalty_prompt_tokens":false},"model":"models/llama-2-7b-chat.Q4_K_M.gguf",` +
+                `"prompt":"hello","slot_id":0,"stop":true,"stopped_eos":true,"stopped_limit":false,` +
+                `"stopped_word":false,"stopping_word":"","timings":{"predicted_ms":1054.704,"predicted_n":69,` +
+                `"predicted_per_second":65.421198743913,"predicted_per_token_ms":15.285565217391303,` +
+                `"prompt_ms":244.228,"prompt_n":5,"prompt_per_second":20.472673075978186,` +
+                `"prompt_per_token_ms":48.845600000000005},"tokens_cached":74,"tokens_evaluated":5,` +
+                `"tokens_predicted":69,"truncated":false}\n\n`,
+        ];
+        const stream = await streamText(new LlamaCppTextGenerationModel().withTextPrompt(), "hello");
+        // note: space moved to last chunk bc of trimming
+        expect(await arrayFromAsync(stream)).toStrictEqual([
+            "Hello",
+            ",",
+            " world!",
+        ]);
+    });
+});