npm - modelfusion - Versions diffs - 0.3.0 → 0.4.1 - Mend

modelfusion 0.3.0 → 0.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

package/README.md CHANGED Viewed

@@ -366,7 +366,7 @@ const { chunks } = await retrieveTextChunks(
 | [Stream text](https://modelfusion.dev/guide/function/generate-text)                   | ✅                                                                  | ✅                                                                  | ✅                                                                       |                                                                                |                                                                              |                                                                                   |
 | [Generate JSON](https://modelfusion.dev/guide/function/generate-json)                 | chat models                                                         |                                                                     |                                                                          |                                                                                |                                                                              |                                                                                   |
 | [Generate JSON or Text](https://modelfusion.dev/guide/function/generate-json-or-text) | chat models                                                         |                                                                     |                                                                          |                                                                                |                                                                              |                                                                                   |
-| [Embed text](https://modelfusion.dev/guide/function/embed-text)                       | ✅                                                                  | ✅                                                                  | ✅                                                                       |                                                                                |                                                                              |                                                                                   |
+| [Embed text](https://modelfusion.dev/guide/function/embed-text)                       | ✅                                                                  | ✅                                                                  | ✅                                                                       | ✅                                                                             |                                                                              |                                                                                   |
 | [Tokenize text](https://modelfusion.dev/guide/function/tokenize-text)                 | full                                                                | full                                                                | basic                                                                    |                                                                                |                                                                              |                                                                                   |
 | [Generate image](https://modelfusion.dev/guide/function/generate-image)               | ✅                                                                  |                                                                     |                                                                          |                                                                                | ✅                                                                           | ✅                                                                                |
 | [Transcribe audio](https://modelfusion.dev/guide/function/transcribe-audio)           | ✅                                                                  |                                                                     |                                                                          |                                                                                |                                                                              |                                                                                   |
@@ -406,17 +406,15 @@ Use higher level prompts that are mapped into model specific prompt formats.
 Examples for the individual functions and objects.
-### [PDF to Tweet](https://github.com/lgrammel/modelfusion/tree/main/examples/pdf-to-tweet)
-> _terminal app_, _PDF parsing_, _recursive information extraction_, _in memory vector index, \_style example retrieval_, _OpenAI GPT-4_, _cost calculation_
+### [Chatbot (Terminal)](https://github.com/lgrammel/modelfusion/tree/main/examples/chatbot-terminal)
-Extracts information about a topic from a PDF and writes a tweet in your own style about it.
+> _Terminal app_, _chat_, _llama.cpp_
-### [AI Chat (Next.JS)](https://github.com/lgrammel/modelfusion/tree/main/examples/ai-chat-next-js)
+### [Chatbot (Next.JS)](https://github.com/lgrammel/modelfusion/tree/main/examples/chatbot-next-js)
 > _Next.js app_, _OpenAI GPT-3.5-turbo_, _streaming_, _abort handling_
-A basic web chat with an AI assistant, implemented as a Next.js app.
+A web chat with an AI assistant, implemented as a Next.js app.
 ### [Image generator (Next.js)](https://github.com/lgrammel/modelfusion/tree/main/examples/image-generator-next-js)
@@ -430,20 +428,20 @@ Create an 19th century painting image for your input.
 Record audio with push-to-talk and transcribe it using Whisper, implemented as a Next.js app. The app shows a list of the transcriptions.
-### [BabyAGI Classic](https://github.com/lgrammel/modelfusion/tree/main/examples/baby-agi)
+### [BabyAGI Agent](https://github.com/lgrammel/modelfusion/tree/main/examples/babyagi-agent)
 > _terminal app_, _agent_, _BabyAGI_, _OpenAI text-davinci-003_
 TypeScript implementation of the classic [BabyAGI](https://github.com/yoheinakajima/babyagi/blob/main/classic/babyagi.py) by [@yoheinakajima](https://twitter.com/yoheinakajima) without embeddings.
-### [Middle school math](https://github.com/lgrammel/modelfusion/tree/main/examples/middle-school-math)
+### [Middle school math agent](https://github.com/lgrammel/modelfusion/tree/main/examples/middle-school-math-agent)
 > _terminal app_, _agent_, _tools_, _GPT-4_
 Small agent that solves middle school math problems. It uses a calculator tool to solve the problems.
-### [Terminal Chat (llama.cpp)](https://github.com/lgrammel/modelfusion/tree/main/examples/terminal-chat-llamacpp)
+### [PDF to Tweet](https://github.com/lgrammel/modelfusion/tree/main/examples/pdf-to-tweet)
-> _Terminal app_, _chat_, _llama.cpp_
+> _terminal app_, _PDF parsing_, _recursive information extraction_, _in memory vector index, \_style example retrieval_, _OpenAI GPT-4_, _cost calculation_
-A terminal chat with a Llama.cpp server backend.
+Extracts information about a topic from a PDF and writes a tweet in your own style about it.

package/composed-function/summarize/summarizeRecursivelyWithTextGenerationAndTokenSplitting.cjs CHANGED Viewed

@@ -9,17 +9,16 @@ const summarizeRecursively_js_1 = require("./summarizeRecursively.cjs");
  * It automatically splits the text into optimal chunks that are small enough to be processed by the model,
  * while leaving enough space for the model to generate text.
  */
-async function summarizeRecursivelyWithTextGenerationAndTokenSplitting({ text, model, prompt, reservedCompletionTokens, join, }, options) {
+async function summarizeRecursivelyWithTextGenerationAndTokenSplitting({ text, model, prompt, tokenLimit = model.contextWindowSize -
+    (model.maxCompletionTokens ?? model.contextWindowSize / 4), join, }, options) {
     const emptyPromptTokens = await model.countPromptTokens(await prompt({ text: "" }));
     return (0, summarizeRecursively_js_1.summarizeRecursively)({
         split: (0, splitRecursively_js_1.splitRecursivelyAtTokenAsSplitFunction)({
             tokenizer: model.tokenizer,
-            maxChunkSize: model.contextWindowSize -
-                reservedCompletionTokens -
-                emptyPromptTokens,
+            maxChunkSize: tokenLimit - emptyPromptTokens,
         }),
         summarize: async (input) => {
-            const { text } = await (0, generateText_js_1.generateText)(model.withMaxCompletionTokens(reservedCompletionTokens), await prompt(input), options);
+            const { text } = await (0, generateText_js_1.generateText)(model, await prompt(input), options);
             return text;
         },
         join,

package/composed-function/summarize/summarizeRecursivelyWithTextGenerationAndTokenSplitting.d.ts CHANGED Viewed

@@ -6,7 +6,7 @@ import { Run } from "../../run/Run.js";
  * It automatically splits the text into optimal chunks that are small enough to be processed by the model,
  * while leaving enough space for the model to generate text.
  */
-export declare function summarizeRecursivelyWithTextGenerationAndTokenSplitting<PROMPT>({ text, model, prompt, reservedCompletionTokens, join, }: {
+export declare function summarizeRecursivelyWithTextGenerationAndTokenSplitting<PROMPT>({ text, model, prompt, tokenLimit, join, }: {
     text: string;
     model: TextGenerationModel<PROMPT, any, any, TextGenerationModelSettings> & {
         contextWindowSize: number;
@@ -16,7 +16,7 @@ export declare function summarizeRecursivelyWithTextGenerationAndTokenSplitting<
     prompt: (input: {
         text: string;
     }) => Promise<PROMPT>;
-    reservedCompletionTokens: number;
+    tokenLimit?: number;
     join?: (texts: Array<string>) => string;
 }, options?: {
     functionId?: string;

package/composed-function/summarize/summarizeRecursivelyWithTextGenerationAndTokenSplitting.js CHANGED Viewed

@@ -6,17 +6,16 @@ import { summarizeRecursively } from "./summarizeRecursively.js";
  * It automatically splits the text into optimal chunks that are small enough to be processed by the model,
  * while leaving enough space for the model to generate text.
  */
-export async function summarizeRecursivelyWithTextGenerationAndTokenSplitting({ text, model, prompt, reservedCompletionTokens, join, }, options) {
+export async function summarizeRecursivelyWithTextGenerationAndTokenSplitting({ text, model, prompt, tokenLimit = model.contextWindowSize -
+    (model.maxCompletionTokens ?? model.contextWindowSize / 4), join, }, options) {
     const emptyPromptTokens = await model.countPromptTokens(await prompt({ text: "" }));
     return summarizeRecursively({
         split: splitRecursivelyAtTokenAsSplitFunction({
             tokenizer: model.tokenizer,
-            maxChunkSize: model.contextWindowSize -
-                reservedCompletionTokens -
-                emptyPromptTokens,
+            maxChunkSize: tokenLimit - emptyPromptTokens,
         }),
         summarize: async (input) => {
-            const { text } = await generateText(model.withMaxCompletionTokens(reservedCompletionTokens), await prompt(input), options);
+            const { text } = await generateText(model, await prompt(input), options);
             return text;
         },
         join,

package/model-function/generate-text/streamText.d.ts CHANGED Viewed

@@ -1,7 +1,7 @@
 import { FunctionOptions } from "../FunctionOptions.js";
+import { CallMetadata } from "../executeCall.js";
 import { DeltaEvent } from "./DeltaEvent.js";
 import { TextGenerationModel, TextGenerationModelSettings } from "./TextGenerationModel.js";
-import { CallMetadata } from "model-function/executeCall.js";
 export declare function streamText<PROMPT, FULL_DELTA, SETTINGS extends TextGenerationModelSettings>(model: TextGenerationModel<PROMPT, unknown, FULL_DELTA, SETTINGS> & {
     generateDeltaStreamResponse: (prompt: PROMPT, options: FunctionOptions<SETTINGS>) => PromiseLike<AsyncIterable<DeltaEvent<FULL_DELTA>>>;
     extractTextDelta: (fullDelta: FULL_DELTA) => string | undefined;

package/model-provider/huggingface/HuggingFaceTextEmbeddingModel.cjs ADDED Viewed

@@ -0,0 +1,138 @@
+"use strict";
+var __importDefault = (this && this.__importDefault) || function (mod) {
+    return (mod && mod.__esModule) ? mod : { "default": mod };
+};
+Object.defineProperty(exports, "__esModule", { value: true });
+exports.HuggingFaceTextEmbeddingModel = void 0;
+const zod_1 = __importDefault(require("zod"));
+const AbstractModel_js_1 = require("../../model-function/AbstractModel.cjs");
+const callWithRetryAndThrottle_js_1 = require("../../util/api/callWithRetryAndThrottle.cjs");
+const postToApi_js_1 = require("../../util/api/postToApi.cjs");
+const HuggingFaceError_js_1 = require("./HuggingFaceError.cjs");
+/**
+ * Create a text embeddinng model that calls a Hugging Face Inference API Feature Extraction Task.
+ *
+ * @see https://huggingface.co/docs/api-inference/detailed_parameters#feature-extraction-task
+ *
+ * @example
+ * const model = new HuggingFaceTextGenerationModel({
+ *   model: "intfloat/e5-base-v2",
+ *   maxTexstsPerCall: 5,
+ *   retry: retryWithExponentialBackoff({ maxTries: 5 }),
+ * });
+ *
+ * const { embeddings } = await embedTexts(
+ *   model,
+ *   [
+ *     "At first, Nox didn't know what to do with the pup.",
+ *     "He keenly observed and absorbed everything around him, from the birds in the sky to the trees in the forest.",
+ *   ]
+ * );
+ */
+class HuggingFaceTextEmbeddingModel extends AbstractModel_js_1.AbstractModel {
+    constructor(settings) {
+        super({ settings });
+        Object.defineProperty(this, "provider", {
+            enumerable: true,
+            configurable: true,
+            writable: true,
+            value: "huggingface"
+        });
+        Object.defineProperty(this, "maxTextsPerCall", {
+            enumerable: true,
+            configurable: true,
+            writable: true,
+            value: void 0
+        });
+        Object.defineProperty(this, "contextWindowSize", {
+            enumerable: true,
+            configurable: true,
+            writable: true,
+            value: undefined
+        });
+        Object.defineProperty(this, "embeddingDimensions", {
+            enumerable: true,
+            configurable: true,
+            writable: true,
+            value: void 0
+        });
+        Object.defineProperty(this, "tokenizer", {
+            enumerable: true,
+            configurable: true,
+            writable: true,
+            value: undefined
+        });
+        Object.defineProperty(this, "countPromptTokens", {
+            enumerable: true,
+            configurable: true,
+            writable: true,
+            value: undefined
+        });
+        // There is no limit documented in the HuggingFace API. Use 1024 as a reasonable default.
+        this.maxTextsPerCall = settings.maxTextsPerCall ?? 1024;
+        this.embeddingDimensions = settings.embeddingDimensions;
+    }
+    get modelName() {
+        return this.settings.model;
+    }
+    get apiKey() {
+        const apiKey = this.settings.apiKey ?? process.env.HUGGINGFACE_API_KEY;
+        if (apiKey == null) {
+            throw new Error("No Hugging Face API key provided. Pass it in the constructor or set the HUGGINGFACE_API_KEY environment variable.");
+        }
+        return apiKey;
+    }
+    async callAPI(texts, options) {
+        if (texts.length > this.maxTextsPerCall) {
+            throw new Error(`The HuggingFace feature extraction API is configured to only support ${this.maxTextsPerCall} texts per API call.`);
+        }
+        const run = options?.run;
+        const settings = options?.settings;
+        const callSettings = Object.assign({
+            apiKey: this.apiKey,
+            options: {
+                useCache: true,
+                waitForModel: true,
+            },
+        }, this.settings, settings, {
+            abortSignal: run?.abortSignal,
+            inputs: texts,
+        });
+        return (0, callWithRetryAndThrottle_js_1.callWithRetryAndThrottle)({
+            retry: this.settings.retry,
+            throttle: this.settings.throttle,
+            call: async () => callHuggingFaceTextGenerationAPI(callSettings),
+        });
+    }
+    generateEmbeddingResponse(texts, options) {
+        return this.callAPI(texts, options);
+    }
+    extractEmbeddings(response) {
+        return response;
+    }
+    withSettings(additionalSettings) {
+        return new HuggingFaceTextEmbeddingModel(Object.assign({}, this.settings, additionalSettings));
+    }
+}
+exports.HuggingFaceTextEmbeddingModel = HuggingFaceTextEmbeddingModel;
+const huggingFaceTextEmbeddingResponseSchema = zod_1.default.array(zod_1.default.array(zod_1.default.number()));
+async function callHuggingFaceTextGenerationAPI({ baseUrl = "https://api-inference.huggingface.co/pipeline/feature-extraction", abortSignal, apiKey, model, inputs, options, }) {
+    return (0, postToApi_js_1.postJsonToApi)({
+        url: `${baseUrl}/${model}`,
+        headers: {
+            Authorization: `Bearer ${apiKey}`,
+        },
+        body: {
+            inputs,
+            options: options
+                ? {
+                    use_cache: options?.useCache,
+                    wait_for_model: options?.waitForModel,
+                }
+                : undefined,
+        },
+        failedResponseHandler: HuggingFaceError_js_1.failedHuggingFaceCallResponseHandler,
+        successfulResponseHandler: (0, postToApi_js_1.createJsonResponseHandler)(huggingFaceTextEmbeddingResponseSchema),
+        abortSignal,
+    });
+}

package/model-provider/huggingface/HuggingFaceTextEmbeddingModel.d.ts ADDED Viewed

@@ -0,0 +1,57 @@
+import z from "zod";
+import { AbstractModel } from "../../model-function/AbstractModel.js";
+import { FunctionOptions } from "../../model-function/FunctionOptions.js";
+import { TextEmbeddingModel, TextEmbeddingModelSettings } from "../../model-function/embed-text/TextEmbeddingModel.js";
+import { RetryFunction } from "../../util/api/RetryFunction.js";
+import { ThrottleFunction } from "../../util/api/ThrottleFunction.js";
+export interface HuggingFaceTextEmbeddingModelSettings extends TextEmbeddingModelSettings {
+    model: string;
+    baseUrl?: string;
+    apiKey?: string;
+    maxTextsPerCall?: number;
+    embeddingDimensions?: number;
+    retry?: RetryFunction;
+    throttle?: ThrottleFunction;
+    options?: {
+        useCache?: boolean;
+        waitForModel?: boolean;
+    };
+}
+/**
+ * Create a text embeddinng model that calls a Hugging Face Inference API Feature Extraction Task.
+ *
+ * @see https://huggingface.co/docs/api-inference/detailed_parameters#feature-extraction-task
+ *
+ * @example
+ * const model = new HuggingFaceTextGenerationModel({
+ *   model: "intfloat/e5-base-v2",
+ *   maxTexstsPerCall: 5,
+ *   retry: retryWithExponentialBackoff({ maxTries: 5 }),
+ * });
+ *
+ * const { embeddings } = await embedTexts(
+ *   model,
+ *   [
+ *     "At first, Nox didn't know what to do with the pup.",
+ *     "He keenly observed and absorbed everything around him, from the birds in the sky to the trees in the forest.",
+ *   ]
+ * );
+ */
+export declare class HuggingFaceTextEmbeddingModel extends AbstractModel<HuggingFaceTextEmbeddingModelSettings> implements TextEmbeddingModel<HuggingFaceTextEmbeddingResponse, HuggingFaceTextEmbeddingModelSettings> {
+    constructor(settings: HuggingFaceTextEmbeddingModelSettings);
+    readonly provider = "huggingface";
+    get modelName(): string;
+    readonly maxTextsPerCall: number;
+    readonly contextWindowSize: undefined;
+    readonly embeddingDimensions: number | undefined;
+    readonly tokenizer: undefined;
+    private get apiKey();
+    callAPI(texts: Array<string>, options?: FunctionOptions<HuggingFaceTextEmbeddingModelSettings>): Promise<HuggingFaceTextEmbeddingResponse>;
+    readonly countPromptTokens: undefined;
+    generateEmbeddingResponse(texts: string[], options?: FunctionOptions<HuggingFaceTextEmbeddingModelSettings>): Promise<number[][]>;
+    extractEmbeddings(response: HuggingFaceTextEmbeddingResponse): number[][];
+    withSettings(additionalSettings: Partial<HuggingFaceTextEmbeddingModelSettings>): this;
+}
+declare const huggingFaceTextEmbeddingResponseSchema: z.ZodArray<z.ZodArray<z.ZodNumber, "many">, "many">;
+export type HuggingFaceTextEmbeddingResponse = z.infer<typeof huggingFaceTextEmbeddingResponseSchema>;
+export {};

package/model-provider/huggingface/HuggingFaceTextEmbeddingModel.js ADDED Viewed

@@ -0,0 +1,131 @@
+import z from "zod";
+import { AbstractModel } from "../../model-function/AbstractModel.js";
+import { callWithRetryAndThrottle } from "../../util/api/callWithRetryAndThrottle.js";
+import { createJsonResponseHandler, postJsonToApi, } from "../../util/api/postToApi.js";
+import { failedHuggingFaceCallResponseHandler } from "./HuggingFaceError.js";
+/**
+ * Create a text embeddinng model that calls a Hugging Face Inference API Feature Extraction Task.
+ *
+ * @see https://huggingface.co/docs/api-inference/detailed_parameters#feature-extraction-task
+ *
+ * @example
+ * const model = new HuggingFaceTextGenerationModel({
+ *   model: "intfloat/e5-base-v2",
+ *   maxTexstsPerCall: 5,
+ *   retry: retryWithExponentialBackoff({ maxTries: 5 }),
+ * });
+ *
+ * const { embeddings } = await embedTexts(
+ *   model,
+ *   [
+ *     "At first, Nox didn't know what to do with the pup.",
+ *     "He keenly observed and absorbed everything around him, from the birds in the sky to the trees in the forest.",
+ *   ]
+ * );
+ */
+export class HuggingFaceTextEmbeddingModel extends AbstractModel {
+    constructor(settings) {
+        super({ settings });
+        Object.defineProperty(this, "provider", {
+            enumerable: true,
+            configurable: true,
+            writable: true,
+            value: "huggingface"
+        });
+        Object.defineProperty(this, "maxTextsPerCall", {
+            enumerable: true,
+            configurable: true,
+            writable: true,
+            value: void 0
+        });
+        Object.defineProperty(this, "contextWindowSize", {
+            enumerable: true,
+            configurable: true,
+            writable: true,
+            value: undefined
+        });
+        Object.defineProperty(this, "embeddingDimensions", {
+            enumerable: true,
+            configurable: true,
+            writable: true,
+            value: void 0
+        });
+        Object.defineProperty(this, "tokenizer", {
+            enumerable: true,
+            configurable: true,
+            writable: true,
+            value: undefined
+        });
+        Object.defineProperty(this, "countPromptTokens", {
+            enumerable: true,
+            configurable: true,
+            writable: true,
+            value: undefined
+        });
+        // There is no limit documented in the HuggingFace API. Use 1024 as a reasonable default.
+        this.maxTextsPerCall = settings.maxTextsPerCall ?? 1024;
+        this.embeddingDimensions = settings.embeddingDimensions;
+    }
+    get modelName() {
+        return this.settings.model;
+    }
+    get apiKey() {
+        const apiKey = this.settings.apiKey ?? process.env.HUGGINGFACE_API_KEY;
+        if (apiKey == null) {
+            throw new Error("No Hugging Face API key provided. Pass it in the constructor or set the HUGGINGFACE_API_KEY environment variable.");
+        }
+        return apiKey;
+    }
+    async callAPI(texts, options) {
+        if (texts.length > this.maxTextsPerCall) {
+            throw new Error(`The HuggingFace feature extraction API is configured to only support ${this.maxTextsPerCall} texts per API call.`);
+        }
+        const run = options?.run;
+        const settings = options?.settings;
+        const callSettings = Object.assign({
+            apiKey: this.apiKey,
+            options: {
+                useCache: true,
+                waitForModel: true,
+            },
+        }, this.settings, settings, {
+            abortSignal: run?.abortSignal,
+            inputs: texts,
+        });
+        return callWithRetryAndThrottle({
+            retry: this.settings.retry,
+            throttle: this.settings.throttle,
+            call: async () => callHuggingFaceTextGenerationAPI(callSettings),
+        });
+    }
+    generateEmbeddingResponse(texts, options) {
+        return this.callAPI(texts, options);
+    }
+    extractEmbeddings(response) {
+        return response;
+    }
+    withSettings(additionalSettings) {
+        return new HuggingFaceTextEmbeddingModel(Object.assign({}, this.settings, additionalSettings));
+    }
+}
+const huggingFaceTextEmbeddingResponseSchema = z.array(z.array(z.number()));
+async function callHuggingFaceTextGenerationAPI({ baseUrl = "https://api-inference.huggingface.co/pipeline/feature-extraction", abortSignal, apiKey, model, inputs, options, }) {
+    return postJsonToApi({
+        url: `${baseUrl}/${model}`,
+        headers: {
+            Authorization: `Bearer ${apiKey}`,
+        },
+        body: {
+            inputs,
+            options: options
+                ? {
+                    use_cache: options?.useCache,
+                    wait_for_model: options?.waitForModel,
+                }
+                : undefined,
+        },
+        failedResponseHandler: failedHuggingFaceCallResponseHandler,
+        successfulResponseHandler: createJsonResponseHandler(huggingFaceTextEmbeddingResponseSchema),
+        abortSignal,
+    });
+}

package/model-provider/huggingface/index.cjs CHANGED Viewed

@@ -17,4 +17,5 @@ Object.defineProperty(exports, "__esModule", { value: true });
 exports.HuggingFaceError = void 0;
 var HuggingFaceError_js_1 = require("./HuggingFaceError.cjs");
 Object.defineProperty(exports, "HuggingFaceError", { enumerable: true, get: function () { return HuggingFaceError_js_1.HuggingFaceError; } });
+__exportStar(require("./HuggingFaceTextEmbeddingModel.cjs"), exports);
 __exportStar(require("./HuggingFaceTextGenerationModel.cjs"), exports);

package/model-provider/huggingface/index.d.ts CHANGED Viewed

@@ -1,2 +1,3 @@
 export { HuggingFaceError, HuggingFaceErrorData } from "./HuggingFaceError.js";
+export * from "./HuggingFaceTextEmbeddingModel.js";
 export * from "./HuggingFaceTextGenerationModel.js";

package/model-provider/huggingface/index.js CHANGED Viewed

@@ -1,2 +1,3 @@
 export { HuggingFaceError } from "./HuggingFaceError.js";
+export * from "./HuggingFaceTextEmbeddingModel.js";
 export * from "./HuggingFaceTextGenerationModel.js";

package/model-provider/llamacpp/LlamaCppTextEmbeddingModel.cjs CHANGED Viewed

@@ -35,7 +35,7 @@ class LlamaCppTextEmbeddingModel extends AbstractModel_js_1.AbstractModel {
             enumerable: true,
             configurable: true,
             writable: true,
-            value: undefined
+            value: void 0
         });
         Object.defineProperty(this, "tokenizer", {
             enumerable: true,
@@ -48,6 +48,7 @@ class LlamaCppTextEmbeddingModel extends AbstractModel_js_1.AbstractModel {
             retry: this.settings.tokenizerSettings?.retry,
             throttle: this.settings.tokenizerSettings?.throttle,
         });
+        this.embeddingDimensions = this.settings.embeddingDimensions;
     }
     get modelName() {
         return null;

package/model-provider/llamacpp/LlamaCppTextEmbeddingModel.d.ts CHANGED Viewed

@@ -4,8 +4,9 @@ import { FunctionOptions } from "../../model-function/FunctionOptions.js";
 import { TextEmbeddingModel, TextEmbeddingModelSettings } from "../../model-function/embed-text/TextEmbeddingModel.js";
 import { RetryFunction } from "../../util/api/RetryFunction.js";
 import { ThrottleFunction } from "../../util/api/ThrottleFunction.js";
-export interface LlamaCppEmbeddingModelSettings extends TextEmbeddingModelSettings {
+export interface LlamaCppTextEmbeddingModelSettings extends TextEmbeddingModelSettings {
     baseUrl?: string;
+    embeddingDimensions?: number;
     retry?: RetryFunction;
     throttle?: ThrottleFunction;
     tokenizerSettings?: {
@@ -13,21 +14,21 @@ export interface LlamaCppEmbeddingModelSettings extends TextEmbeddingModelSettin
         throttle?: ThrottleFunction;
     };
 }
-export declare class LlamaCppTextEmbeddingModel extends AbstractModel<LlamaCppEmbeddingModelSettings> implements TextEmbeddingModel<LlamaCppTextEmbeddingResponse, LlamaCppEmbeddingModelSettings> {
-    constructor(settings?: LlamaCppEmbeddingModelSettings);
+export declare class LlamaCppTextEmbeddingModel extends AbstractModel<LlamaCppTextEmbeddingModelSettings> implements TextEmbeddingModel<LlamaCppTextEmbeddingResponse, LlamaCppTextEmbeddingModelSettings> {
+    constructor(settings?: LlamaCppTextEmbeddingModelSettings);
     readonly provider: "llamacpp";
     get modelName(): null;
     readonly maxTextsPerCall = 1;
     readonly contextWindowSize: undefined;
-    readonly embeddingDimensions: undefined;
+    readonly embeddingDimensions: number | undefined;
     private readonly tokenizer;
     tokenize(text: string): Promise<number[]>;
-    callAPI(texts: Array<string>, options?: FunctionOptions<LlamaCppEmbeddingModelSettings>): Promise<LlamaCppTextEmbeddingResponse>;
-    generateEmbeddingResponse(texts: string[], options?: FunctionOptions<LlamaCppEmbeddingModelSettings>): Promise<{
+    callAPI(texts: Array<string>, options?: FunctionOptions<LlamaCppTextEmbeddingModelSettings>): Promise<LlamaCppTextEmbeddingResponse>;
+    generateEmbeddingResponse(texts: string[], options?: FunctionOptions<LlamaCppTextEmbeddingModelSettings>): Promise<{
         embedding: number[];
     }>;
     extractEmbeddings(response: LlamaCppTextEmbeddingResponse): number[][];
-    withSettings(additionalSettings: Partial<LlamaCppEmbeddingModelSettings>): this;
+    withSettings(additionalSettings: Partial<LlamaCppTextEmbeddingModelSettings>): this;
 }
 declare const llamaCppTextEmbeddingResponseSchema: z.ZodObject<{
     embedding: z.ZodArray<z.ZodNumber, "many">;

package/model-provider/llamacpp/LlamaCppTextEmbeddingModel.js CHANGED Viewed

@@ -29,7 +29,7 @@ export class LlamaCppTextEmbeddingModel extends AbstractModel {
             enumerable: true,
             configurable: true,
             writable: true,
-            value: undefined
+            value: void 0
         });
         Object.defineProperty(this, "tokenizer", {
             enumerable: true,
@@ -42,6 +42,7 @@ export class LlamaCppTextEmbeddingModel extends AbstractModel {
             retry: this.settings.tokenizerSettings?.retry,
             throttle: this.settings.tokenizerSettings?.throttle,
         });
+        this.embeddingDimensions = this.settings.embeddingDimensions;
     }
     get modelName() {
         return null;

package/package.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "name": "modelfusion",
   "description": "Build AI applications, chatbots, and agents with JavaScript and TypeScript.",
-  "version": "0.3.0",
+  "version": "0.4.1",
   "author": "Lars Grammel",
   "license": "MIT",
   "keywords": [

package/prompt/chat/trimChatPrompt.cjs CHANGED Viewed

@@ -9,8 +9,11 @@ const validateChatPrompt_js_1 = require("./validateChatPrompt.cjs");
  *
  * When the minimal chat prompt (system message + last user message) is already too long, it will only
  * return this minimal chat prompt.
+ *
+ * @see https://modelfusion.dev/guide/function/generate-text/prompt-mapping#limiting-the-chat-length
  */
-async function trimChatPrompt({ prompt, model, tokenLimit = model.contextWindowSize - model.maxCompletionTokens, }) {
+async function trimChatPrompt({ prompt, model, tokenLimit = model.contextWindowSize -
+    (model.maxCompletionTokens ?? model.contextWindowSize / 4), }) {
     (0, validateChatPrompt_js_1.validateChatPrompt)(prompt);
     const startsWithSystemMessage = "system" in prompt[0];
     const systemMessage = startsWithSystemMessage ? [prompt[0]] : [];

package/prompt/chat/trimChatPrompt.d.ts CHANGED Viewed

@@ -1,4 +1,4 @@
-import { TextGenerationModel } from "model-function/generate-text/TextGenerationModel.js";
+import { TextGenerationModel } from "../../model-function/generate-text/TextGenerationModel.js";
 import { ChatPrompt } from "./ChatPrompt.js";
 /**
  * Keeps only the most recent messages in the prompt, while leaving enough space for the completion.
@@ -7,12 +7,13 @@ import { ChatPrompt } from "./ChatPrompt.js";
  *
  * When the minimal chat prompt (system message + last user message) is already too long, it will only
  * return this minimal chat prompt.
+ *
+ * @see https://modelfusion.dev/guide/function/generate-text/prompt-mapping#limiting-the-chat-length
  */
 export declare function trimChatPrompt({ prompt, model, tokenLimit, }: {
     prompt: ChatPrompt;
     model: TextGenerationModel<ChatPrompt, any, any, any> & {
         contextWindowSize: number;
-        maxCompletionTokens: number;
         countPromptTokens: (prompt: ChatPrompt) => PromiseLike<number>;
     };
     tokenLimit?: number;

package/prompt/chat/trimChatPrompt.js CHANGED Viewed

@@ -6,8 +6,11 @@ import { validateChatPrompt } from "./validateChatPrompt.js";
  *
  * When the minimal chat prompt (system message + last user message) is already too long, it will only
  * return this minimal chat prompt.
+ *
+ * @see https://modelfusion.dev/guide/function/generate-text/prompt-mapping#limiting-the-chat-length
  */
-export async function trimChatPrompt({ prompt, model, tokenLimit = model.contextWindowSize - model.maxCompletionTokens, }) {
+export async function trimChatPrompt({ prompt, model, tokenLimit = model.contextWindowSize -
+    (model.maxCompletionTokens ?? model.contextWindowSize / 4), }) {
     validateChatPrompt(prompt);
     const startsWithSystemMessage = "system" in prompt[0];
     const systemMessage = startsWithSystemMessage ? [prompt[0]] : [];