npm - langchain - Versions diffs - 0.0.176 → 0.0.177 - Mend

langchain 0.0.176 → 0.0.177

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (52) hide show

package/dist/chat_models/bedrock.cjs +25 -4
package/dist/chat_models/bedrock.d.ts +2 -1
package/dist/chat_models/bedrock.js +25 -4
package/dist/chat_models/llama_cpp.cjs +31 -79
package/dist/chat_models/llama_cpp.d.ts +15 -58
package/dist/chat_models/llama_cpp.js +32 -80
package/dist/chat_models/openai.cjs +91 -6
package/dist/chat_models/openai.d.ts +10 -0
package/dist/chat_models/openai.js +91 -6
package/dist/embeddings/hf.cjs +10 -1
package/dist/embeddings/hf.d.ts +4 -2
package/dist/embeddings/hf.js +10 -1
package/dist/embeddings/llama_cpp.cjs +67 -0
package/dist/embeddings/llama_cpp.d.ts +26 -0
package/dist/embeddings/llama_cpp.js +63 -0
package/dist/embeddings/ollama.cjs +7 -1
package/dist/embeddings/ollama.js +7 -1
package/dist/llms/bedrock.cjs +25 -3
package/dist/llms/bedrock.d.ts +2 -1
package/dist/llms/bedrock.js +25 -3
package/dist/llms/hf.cjs +10 -1
package/dist/llms/hf.d.ts +3 -0
package/dist/llms/hf.js +10 -1
package/dist/llms/llama_cpp.cjs +25 -65
package/dist/llms/llama_cpp.d.ts +7 -43
package/dist/llms/llama_cpp.js +25 -65
package/dist/load/import_constants.cjs +1 -0
package/dist/load/import_constants.js +1 -0
package/dist/prompts/few_shot.cjs +162 -1
package/dist/prompts/few_shot.d.ts +90 -2
package/dist/prompts/few_shot.js +160 -0
package/dist/prompts/index.cjs +2 -1
package/dist/prompts/index.d.ts +1 -1
package/dist/prompts/index.js +1 -1
package/dist/retrievers/zep.cjs +26 -3
package/dist/retrievers/zep.d.ts +11 -2
package/dist/retrievers/zep.js +26 -3
package/dist/util/bedrock.d.ts +2 -0
package/dist/util/llama_cpp.cjs +34 -0
package/dist/util/llama_cpp.d.ts +46 -0
package/dist/util/llama_cpp.js +28 -0
package/dist/util/openai-format-fndef.cjs +81 -0
package/dist/util/openai-format-fndef.d.ts +44 -0
package/dist/util/openai-format-fndef.js +77 -0
package/dist/util/openapi.d.ts +2 -2
package/dist/vectorstores/pinecone.cjs +5 -5
package/dist/vectorstores/pinecone.d.ts +2 -2
package/dist/vectorstores/pinecone.js +5 -5
package/embeddings/llama_cpp.cjs +1 -0
package/embeddings/llama_cpp.d.ts +1 -0
package/embeddings/llama_cpp.js +1 -0
package/package.json +13 -5

package/dist/chat_models/bedrock.cjs CHANGED Viewed

@@ -133,6 +133,12 @@ class ChatBedrock extends base_js_1.SimpleChatModel {
             writable: true,
             value: new eventstream_codec_1.EventStreamCodec(util_utf8_1.toUtf8, util_utf8_1.fromUtf8)
         });
+        Object.defineProperty(this, "streaming", {
+            enumerable: true,
+            configurable: true,
+            writable: true,
+            value: false
+        });
         this.model = fields?.model ?? this.model;
         const allowedModels = ["ai21", "anthropic", "amazon"];
         if (!allowedModels.includes(this.model.split(".")[0])) {
@@ -150,6 +156,7 @@ class ChatBedrock extends base_js_1.SimpleChatModel {
         this.endpointHost = fields?.endpointHost ?? fields?.endpointUrl;
         this.stopSequences = fields?.stopSequences;
         this.modelKwargs = fields?.modelKwargs;
+        this.streaming = fields?.streaming ?? this.streaming;
     }
     /** Call out to Bedrock service model.
       Arguments:
@@ -161,10 +168,23 @@ class ChatBedrock extends base_js_1.SimpleChatModel {
       Example:
         response = model.call("Tell me a joke.")
     */
-    async _call(messages, options) {
+    async _call(messages, options, runManager) {
         const service = "bedrock-runtime";
         const endpointHost = this.endpointHost ?? `${service}.${this.region}.amazonaws.com`;
         const provider = this.model.split(".")[0];
+        if (this.streaming) {
+            const stream = this._streamResponseChunks(messages, options, runManager);
+            let finalResult;
+            for await (const chunk of stream) {
+                if (finalResult === undefined) {
+                    finalResult = chunk;
+                }
+                else {
+                    finalResult = finalResult.concat(chunk);
+                }
+            }
+            return finalResult?.message.content ?? "";
+        }
         const response = await this._signedFetch(messages, options, {
             bedrockMethod: "invoke",
             endpointHost,
@@ -233,7 +253,6 @@ class ChatBedrock extends base_js_1.SimpleChatModel {
                     event.headers[":content-type"].value !== "application/json") {
                     throw Error(`Failed to get event chunk: got ${chunk}`);
                 }
-                // console.log(decoder.decode(event.body));
                 const body = JSON.parse(decoder.decode(event.body));
                 if (body.message) {
                     throw new Error(body.message);
@@ -245,7 +264,8 @@ class ChatBedrock extends base_js_1.SimpleChatModel {
                         text,
                         message: new index_js_1.AIMessageChunk({ content: text }),
                     });
-                    await runManager?.handleLLMNewToken(text);
+                    // eslint-disable-next-line no-void
+                    void runManager?.handleLLMNewToken(text);
                 }
             }
         }
@@ -256,7 +276,8 @@ class ChatBedrock extends base_js_1.SimpleChatModel {
                 text,
                 message: new index_js_1.AIMessageChunk({ content: text }),
             });
-            await runManager?.handleLLMNewToken(text);
+            // eslint-disable-next-line no-void
+            void runManager?.handleLLMNewToken(text);
         }
     }
     // eslint-disable-next-line @typescript-eslint/no-explicit-any

package/dist/chat_models/bedrock.d.ts CHANGED Viewed

@@ -33,6 +33,7 @@ export declare class ChatBedrock extends SimpleChatModel implements BaseBedrockI
     stopSequences?: string[];
     modelKwargs?: Record<string, unknown>;
     codec: EventStreamCodec;
+    streaming: boolean;
     get lc_secrets(): {
         [key: string]: string;
     } | undefined;
@@ -49,7 +50,7 @@ export declare class ChatBedrock extends SimpleChatModel implements BaseBedrockI
       Example:
         response = model.call("Tell me a joke.")
     */
-    _call(messages: BaseMessage[], options: this["ParsedCallOptions"]): Promise<string>;
+    _call(messages: BaseMessage[], options: this["ParsedCallOptions"], runManager?: CallbackManagerForLLMRun): Promise<string>;
     _signedFetch(messages: BaseMessage[], options: this["ParsedCallOptions"], fields: {
         bedrockMethod: "invoke" | "invoke-with-response-stream";
         endpointHost: string;

package/dist/chat_models/bedrock.js CHANGED Viewed

@@ -128,6 +128,12 @@ export class ChatBedrock extends SimpleChatModel {
             writable: true,
             value: new EventStreamCodec(toUtf8, fromUtf8)
         });
+        Object.defineProperty(this, "streaming", {
+            enumerable: true,
+            configurable: true,
+            writable: true,
+            value: false
+        });
         this.model = fields?.model ?? this.model;
         const allowedModels = ["ai21", "anthropic", "amazon"];
         if (!allowedModels.includes(this.model.split(".")[0])) {
@@ -145,6 +151,7 @@ export class ChatBedrock extends SimpleChatModel {
         this.endpointHost = fields?.endpointHost ?? fields?.endpointUrl;
         this.stopSequences = fields?.stopSequences;
         this.modelKwargs = fields?.modelKwargs;
+        this.streaming = fields?.streaming ?? this.streaming;
     }
     /** Call out to Bedrock service model.
       Arguments:
@@ -156,10 +163,23 @@ export class ChatBedrock extends SimpleChatModel {
       Example:
         response = model.call("Tell me a joke.")
     */
-    async _call(messages, options) {
+    async _call(messages, options, runManager) {
         const service = "bedrock-runtime";
         const endpointHost = this.endpointHost ?? `${service}.${this.region}.amazonaws.com`;
         const provider = this.model.split(".")[0];
+        if (this.streaming) {
+            const stream = this._streamResponseChunks(messages, options, runManager);
+            let finalResult;
+            for await (const chunk of stream) {
+                if (finalResult === undefined) {
+                    finalResult = chunk;
+                }
+                else {
+                    finalResult = finalResult.concat(chunk);
+                }
+            }
+            return finalResult?.message.content ?? "";
+        }
         const response = await this._signedFetch(messages, options, {
             bedrockMethod: "invoke",
             endpointHost,
@@ -228,7 +248,6 @@ export class ChatBedrock extends SimpleChatModel {
                     event.headers[":content-type"].value !== "application/json") {
                     throw Error(`Failed to get event chunk: got ${chunk}`);
                 }
-                // console.log(decoder.decode(event.body));
                 const body = JSON.parse(decoder.decode(event.body));
                 if (body.message) {
                     throw new Error(body.message);
@@ -240,7 +259,8 @@ export class ChatBedrock extends SimpleChatModel {
                         text,
                         message: new AIMessageChunk({ content: text }),
                     });
-                    await runManager?.handleLLMNewToken(text);
+                    // eslint-disable-next-line no-void
+                    void runManager?.handleLLMNewToken(text);
                 }
             }
         }
@@ -251,7 +271,8 @@ export class ChatBedrock extends SimpleChatModel {
                 text,
                 message: new AIMessageChunk({ content: text }),
             });
-            await runManager?.handleLLMNewToken(text);
+            // eslint-disable-next-line no-void
+            void runManager?.handleLLMNewToken(text);
         }
     }
     // eslint-disable-next-line @typescript-eslint/no-explicit-any

package/dist/chat_models/llama_cpp.cjs CHANGED Viewed

@@ -3,6 +3,7 @@ Object.defineProperty(exports, "__esModule", { value: true });
 exports.ChatLlamaCpp = void 0;
 const node_llama_cpp_1 = require("node-llama-cpp");
 const base_js_1 = require("./base.cjs");
+const llama_cpp_js_1 = require("../util/llama_cpp.cjs");
 /**
  *  To use this model you need to have the `node-llama-cpp` module installed.
  *  This can be installed using `npm install -S node-llama-cpp` and the minimum
@@ -15,73 +16,31 @@ class ChatLlamaCpp extends base_js_1.SimpleChatModel {
     }
     constructor(inputs) {
         super(inputs);
-        Object.defineProperty(this, "batchSize", {
+        Object.defineProperty(this, "maxTokens", {
             enumerable: true,
             configurable: true,
             writable: true,
             value: void 0
         });
-        Object.defineProperty(this, "contextSize", {
+        Object.defineProperty(this, "temperature", {
             enumerable: true,
             configurable: true,
             writable: true,
             value: void 0
         });
-        Object.defineProperty(this, "embedding", {
+        Object.defineProperty(this, "topK", {
             enumerable: true,
             configurable: true,
             writable: true,
             value: void 0
         });
-        Object.defineProperty(this, "f16Kv", {
+        Object.defineProperty(this, "topP", {
             enumerable: true,
             configurable: true,
             writable: true,
             value: void 0
         });
-        Object.defineProperty(this, "gpuLayers", {
-            enumerable: true,
-            configurable: true,
-            writable: true,
-            value: void 0
-        });
-        Object.defineProperty(this, "logitsAll", {
-            enumerable: true,
-            configurable: true,
-            writable: true,
-            value: void 0
-        });
-        Object.defineProperty(this, "lowVram", {
-            enumerable: true,
-            configurable: true,
-            writable: true,
-            value: void 0
-        });
-        Object.defineProperty(this, "seed", {
-            enumerable: true,
-            configurable: true,
-            writable: true,
-            value: void 0
-        });
-        Object.defineProperty(this, "useMlock", {
-            enumerable: true,
-            configurable: true,
-            writable: true,
-            value: void 0
-        });
-        Object.defineProperty(this, "useMmap", {
-            enumerable: true,
-            configurable: true,
-            writable: true,
-            value: void 0
-        });
-        Object.defineProperty(this, "vocabOnly", {
-            enumerable: true,
-            configurable: true,
-            writable: true,
-            value: void 0
-        });
-        Object.defineProperty(this, "modelPath", {
+        Object.defineProperty(this, "trimWhitespaceSuffix", {
             enumerable: true,
             configurable: true,
             writable: true,
@@ -105,47 +64,33 @@ class ChatLlamaCpp extends base_js_1.SimpleChatModel {
             writable: true,
             value: void 0
         });
-        this.batchSize = inputs?.batchSize;
-        this.contextSize = inputs?.contextSize;
-        this.embedding = inputs?.embedding;
-        this.f16Kv = inputs?.f16Kv;
-        this.gpuLayers = inputs?.gpuLayers;
-        this.logitsAll = inputs?.logitsAll;
-        this.lowVram = inputs?.lowVram;
-        this.modelPath = inputs.modelPath;
-        this.seed = inputs?.seed;
-        this.useMlock = inputs?.useMlock;
-        this.useMmap = inputs?.useMmap;
-        this.vocabOnly = inputs?.vocabOnly;
-        this._model = new node_llama_cpp_1.LlamaModel(inputs);
-        this._context = new node_llama_cpp_1.LlamaContext({ model: this._model });
+        this.maxTokens = inputs?.maxTokens;
+        this.temperature = inputs?.temperature;
+        this.topK = inputs?.topK;
+        this.topP = inputs?.topP;
+        this.trimWhitespaceSuffix = inputs?.trimWhitespaceSuffix;
+        this._model = (0, llama_cpp_js_1.createLlamaModel)(inputs);
+        this._context = (0, llama_cpp_js_1.createLlamaContext)(this._model, inputs);
         this._session = null;
     }
     _llmType() {
         return "llama2_cpp";
     }
-    invocationParams() {
-        return {
-            batchSize: this.batchSize,
-            contextSize: this.contextSize,
-            embedding: this.embedding,
-            f16Kv: this.f16Kv,
-            gpuLayers: this.gpuLayers,
-            logitsAll: this.logitsAll,
-            lowVram: this.lowVram,
-            modelPath: this.modelPath,
-            seed: this.seed,
-            useMlock: this.useMlock,
-            useMmap: this.useMmap,
-            vocabOnly: this.vocabOnly,
-        };
-    }
     /** @ignore */
     _combineLLMOutput() {
         return {};
     }
+    invocationParams() {
+        return {
+            maxTokens: this.maxTokens,
+            temperature: this.temperature,
+            topK: this.topK,
+            topP: this.topP,
+            trimWhitespaceSuffix: this.trimWhitespaceSuffix,
+        };
+    }
     /** @ignore */
-    async _call(messages, options) {
+    async _call(messages, _options) {
         let prompt = "";
         if (messages.length > 1) {
             // We need to build a new _session
@@ -159,8 +104,15 @@ class ChatLlamaCpp extends base_js_1.SimpleChatModel {
             prompt = messages[0].content;
         }
         try {
+            const promptOptions = {
+                maxTokens: this?.maxTokens,
+                temperature: this?.temperature,
+                topK: this?.topK,
+                topP: this?.topP,
+                trimWhitespaceSuffix: this?.trimWhitespaceSuffix,
+            };
             // @ts-expect-error - TS2531: Object is possibly 'null'.
-            const completion = await this._session.prompt(prompt, options);
+            const completion = await this._session.prompt(prompt, promptOptions);
             return completion;
         }
         catch (e) {

package/dist/chat_models/llama_cpp.d.ts CHANGED Viewed

@@ -1,42 +1,13 @@
 import { LlamaModel, LlamaContext, LlamaChatSession, type ConversationInteraction } from "node-llama-cpp";
 import { SimpleChatModel, BaseChatModelParams } from "./base.js";
+import { LlamaBaseCppInputs } from "../util/llama_cpp.js";
 import { BaseLanguageModelCallOptions } from "../base_language/index.js";
 import type { BaseMessage } from "../schema/index.js";
 /**
  * Note that the modelPath is the only required parameter. For testing you
  * can set this in the environment variable `LLAMA_PATH`.
  */
-export interface LlamaCppInputs extends BaseChatModelParams {
-    /** Prompt processing batch size. */
-    batchSize?: number;
-    /** Text context size. */
-    contextSize?: number;
-    /** Embedding mode only. */
-    embedding?: boolean;
-    /** Use fp16 for KV cache. */
-    f16Kv?: boolean;
-    /** Number of layers to store in VRAM. */
-    gpuLayers?: number;
-    /** The llama_eval() call computes all logits, not just the last one. */
-    logitsAll?: boolean;
-    /** If true, reduce VRAM usage at the cost of performance. */
-    lowVram?: boolean;
-    /** Path to the model on the filesystem. */
-    modelPath: string;
-    /** If null, a random seed will be used. */
-    seed?: null | number;
-    /** The randomness of the responses, e.g. 0.1 deterministic, 1.5 creative, 0.8 balanced, 0 disables. */
-    temperature?: number;
-    /** Consider the n most likely tokens, where n is 1 to vocabulary size, 0 disables (uses full vocabulary). Note: only applies when `temperature` > 0. */
-    topK?: number;
-    /** Selects the smallest token set whose probability exceeds P, where P is between 0 - 1, 1 disables. Note: only applies when `temperature` > 0. */
-    topP?: number;
-    /** Force system to keep model in RAM. */
-    useMlock?: boolean;
-    /** Use mmap if possible. */
-    useMmap?: boolean;
-    /** Only load the vocabulary, no weights. */
-    vocabOnly?: boolean;
+export interface LlamaCppInputs extends LlamaBaseCppInputs, BaseChatModelParams {
 }
 export interface LlamaCppCallOptions extends BaseLanguageModelCallOptions {
     /** The maximum number of tokens the response should contain. */
@@ -53,42 +24,28 @@ export interface LlamaCppCallOptions extends BaseLanguageModelCallOptions {
 export declare class ChatLlamaCpp extends SimpleChatModel<LlamaCppCallOptions> {
     CallOptions: LlamaCppCallOptions;
     static inputs: LlamaCppInputs;
-    batchSize?: number;
-    contextSize?: number;
-    embedding?: boolean;
-    f16Kv?: boolean;
-    gpuLayers?: number;
-    logitsAll?: boolean;
-    lowVram?: boolean;
-    seed?: null | number;
-    useMlock?: boolean;
-    useMmap?: boolean;
-    vocabOnly?: boolean;
-    modelPath: string;
+    maxTokens?: number;
+    temperature?: number;
+    topK?: number;
+    topP?: number;
+    trimWhitespaceSuffix?: boolean;
     _model: LlamaModel;
     _context: LlamaContext;
     _session: LlamaChatSession | null;
     static lc_name(): string;
     constructor(inputs: LlamaCppInputs);
     _llmType(): string;
-    invocationParams(): {
-        batchSize: number | undefined;
-        contextSize: number | undefined;
-        embedding: boolean | undefined;
-        f16Kv: boolean | undefined;
-        gpuLayers: number | undefined;
-        logitsAll: boolean | undefined;
-        lowVram: boolean | undefined;
-        modelPath: string;
-        seed: number | null | undefined;
-        useMlock: boolean | undefined;
-        useMmap: boolean | undefined;
-        vocabOnly: boolean | undefined;
-    };
     /** @ignore */
     _combineLLMOutput(): {};
+    invocationParams(): {
+        maxTokens: number | undefined;
+        temperature: number | undefined;
+        topK: number | undefined;
+        topP: number | undefined;
+        trimWhitespaceSuffix: boolean | undefined;
+    };
     /** @ignore */
-    _call(messages: BaseMessage[], options: this["ParsedCallOptions"]): Promise<string>;
+    _call(messages: BaseMessage[], _options: this["ParsedCallOptions"]): Promise<string>;
     protected _buildSession(messages: BaseMessage[]): string;
     protected _convertMessagesToInteractions(messages: BaseMessage[]): ConversationInteraction[];
 }

package/dist/chat_models/llama_cpp.js CHANGED Viewed

@@ -1,5 +1,6 @@
-import { LlamaModel, LlamaContext, LlamaChatSession, } from "node-llama-cpp";
+import { LlamaChatSession, } from "node-llama-cpp";
 import { SimpleChatModel } from "./base.js";
+import { createLlamaModel, createLlamaContext, } from "../util/llama_cpp.js";
 /**
  *  To use this model you need to have the `node-llama-cpp` module installed.
  *  This can be installed using `npm install -S node-llama-cpp` and the minimum
@@ -12,73 +13,31 @@ export class ChatLlamaCpp extends SimpleChatModel {
     }
     constructor(inputs) {
         super(inputs);
-        Object.defineProperty(this, "batchSize", {
+        Object.defineProperty(this, "maxTokens", {
             enumerable: true,
             configurable: true,
             writable: true,
             value: void 0
         });
-        Object.defineProperty(this, "contextSize", {
+        Object.defineProperty(this, "temperature", {
             enumerable: true,
             configurable: true,
             writable: true,
             value: void 0
         });
-        Object.defineProperty(this, "embedding", {
+        Object.defineProperty(this, "topK", {
             enumerable: true,
             configurable: true,
             writable: true,
             value: void 0
         });
-        Object.defineProperty(this, "f16Kv", {
+        Object.defineProperty(this, "topP", {
             enumerable: true,
             configurable: true,
             writable: true,
             value: void 0
         });
-        Object.defineProperty(this, "gpuLayers", {
-            enumerable: true,
-            configurable: true,
-            writable: true,
-            value: void 0
-        });
-        Object.defineProperty(this, "logitsAll", {
-            enumerable: true,
-            configurable: true,
-            writable: true,
-            value: void 0
-        });
-        Object.defineProperty(this, "lowVram", {
-            enumerable: true,
-            configurable: true,
-            writable: true,
-            value: void 0
-        });
-        Object.defineProperty(this, "seed", {
-            enumerable: true,
-            configurable: true,
-            writable: true,
-            value: void 0
-        });
-        Object.defineProperty(this, "useMlock", {
-            enumerable: true,
-            configurable: true,
-            writable: true,
-            value: void 0
-        });
-        Object.defineProperty(this, "useMmap", {
-            enumerable: true,
-            configurable: true,
-            writable: true,
-            value: void 0
-        });
-        Object.defineProperty(this, "vocabOnly", {
-            enumerable: true,
-            configurable: true,
-            writable: true,
-            value: void 0
-        });
-        Object.defineProperty(this, "modelPath", {
+        Object.defineProperty(this, "trimWhitespaceSuffix", {
             enumerable: true,
             configurable: true,
             writable: true,
@@ -102,47 +61,33 @@ export class ChatLlamaCpp extends SimpleChatModel {
             writable: true,
             value: void 0
         });
-        this.batchSize = inputs?.batchSize;
-        this.contextSize = inputs?.contextSize;
-        this.embedding = inputs?.embedding;
-        this.f16Kv = inputs?.f16Kv;
-        this.gpuLayers = inputs?.gpuLayers;
-        this.logitsAll = inputs?.logitsAll;
-        this.lowVram = inputs?.lowVram;
-        this.modelPath = inputs.modelPath;
-        this.seed = inputs?.seed;
-        this.useMlock = inputs?.useMlock;
-        this.useMmap = inputs?.useMmap;
-        this.vocabOnly = inputs?.vocabOnly;
-        this._model = new LlamaModel(inputs);
-        this._context = new LlamaContext({ model: this._model });
+        this.maxTokens = inputs?.maxTokens;
+        this.temperature = inputs?.temperature;
+        this.topK = inputs?.topK;
+        this.topP = inputs?.topP;
+        this.trimWhitespaceSuffix = inputs?.trimWhitespaceSuffix;
+        this._model = createLlamaModel(inputs);
+        this._context = createLlamaContext(this._model, inputs);
         this._session = null;
     }
     _llmType() {
         return "llama2_cpp";
     }
-    invocationParams() {
-        return {
-            batchSize: this.batchSize,
-            contextSize: this.contextSize,
-            embedding: this.embedding,
-            f16Kv: this.f16Kv,
-            gpuLayers: this.gpuLayers,
-            logitsAll: this.logitsAll,
-            lowVram: this.lowVram,
-            modelPath: this.modelPath,
-            seed: this.seed,
-            useMlock: this.useMlock,
-            useMmap: this.useMmap,
-            vocabOnly: this.vocabOnly,
-        };
-    }
     /** @ignore */
     _combineLLMOutput() {
         return {};
     }
+    invocationParams() {
+        return {
+            maxTokens: this.maxTokens,
+            temperature: this.temperature,
+            topK: this.topK,
+            topP: this.topP,
+            trimWhitespaceSuffix: this.trimWhitespaceSuffix,
+        };
+    }
     /** @ignore */
-    async _call(messages, options) {
+    async _call(messages, _options) {
         let prompt = "";
         if (messages.length > 1) {
             // We need to build a new _session
@@ -156,8 +101,15 @@ export class ChatLlamaCpp extends SimpleChatModel {
             prompt = messages[0].content;
         }
         try {
+            const promptOptions = {
+                maxTokens: this?.maxTokens,
+                temperature: this?.temperature,
+                topK: this?.topK,
+                topP: this?.topP,
+                trimWhitespaceSuffix: this?.trimWhitespaceSuffix,
+            };
             // @ts-expect-error - TS2531: Object is possibly 'null'.
-            const completion = await this._session.prompt(prompt, options);
+            const completion = await this._session.prompt(prompt, promptOptions);
             return completion;
         }
         catch (e) {