npm - langchain - Versions diffs - 0.0.176 → 0.0.177 - Mend

langchain 0.0.176 → 0.0.177

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (52) hide show

package/dist/chat_models/bedrock.cjs +25 -4
package/dist/chat_models/bedrock.d.ts +2 -1
package/dist/chat_models/bedrock.js +25 -4
package/dist/chat_models/llama_cpp.cjs +31 -79
package/dist/chat_models/llama_cpp.d.ts +15 -58
package/dist/chat_models/llama_cpp.js +32 -80
package/dist/chat_models/openai.cjs +91 -6
package/dist/chat_models/openai.d.ts +10 -0
package/dist/chat_models/openai.js +91 -6
package/dist/embeddings/hf.cjs +10 -1
package/dist/embeddings/hf.d.ts +4 -2
package/dist/embeddings/hf.js +10 -1
package/dist/embeddings/llama_cpp.cjs +67 -0
package/dist/embeddings/llama_cpp.d.ts +26 -0
package/dist/embeddings/llama_cpp.js +63 -0
package/dist/embeddings/ollama.cjs +7 -1
package/dist/embeddings/ollama.js +7 -1
package/dist/llms/bedrock.cjs +25 -3
package/dist/llms/bedrock.d.ts +2 -1
package/dist/llms/bedrock.js +25 -3
package/dist/llms/hf.cjs +10 -1
package/dist/llms/hf.d.ts +3 -0
package/dist/llms/hf.js +10 -1
package/dist/llms/llama_cpp.cjs +25 -65
package/dist/llms/llama_cpp.d.ts +7 -43
package/dist/llms/llama_cpp.js +25 -65
package/dist/load/import_constants.cjs +1 -0
package/dist/load/import_constants.js +1 -0
package/dist/prompts/few_shot.cjs +162 -1
package/dist/prompts/few_shot.d.ts +90 -2
package/dist/prompts/few_shot.js +160 -0
package/dist/prompts/index.cjs +2 -1
package/dist/prompts/index.d.ts +1 -1
package/dist/prompts/index.js +1 -1
package/dist/retrievers/zep.cjs +26 -3
package/dist/retrievers/zep.d.ts +11 -2
package/dist/retrievers/zep.js +26 -3
package/dist/util/bedrock.d.ts +2 -0
package/dist/util/llama_cpp.cjs +34 -0
package/dist/util/llama_cpp.d.ts +46 -0
package/dist/util/llama_cpp.js +28 -0
package/dist/util/openai-format-fndef.cjs +81 -0
package/dist/util/openai-format-fndef.d.ts +44 -0
package/dist/util/openai-format-fndef.js +77 -0
package/dist/util/openapi.d.ts +2 -2
package/dist/vectorstores/pinecone.cjs +5 -5
package/dist/vectorstores/pinecone.d.ts +2 -2
package/dist/vectorstores/pinecone.js +5 -5
package/embeddings/llama_cpp.cjs +1 -0
package/embeddings/llama_cpp.d.ts +1 -0
package/embeddings/llama_cpp.js +1 -0
package/package.json +13 -5

package/dist/llms/bedrock.d.ts CHANGED Viewed

@@ -23,6 +23,7 @@ export declare class Bedrock extends LLM implements BaseBedrockInput {
     stopSequences?: string[];
     modelKwargs?: Record<string, unknown>;
     codec: EventStreamCodec;
+    streaming: boolean;
     get lc_secrets(): {
         [key: string]: string;
     } | undefined;
@@ -39,7 +40,7 @@ export declare class Bedrock extends LLM implements BaseBedrockInput {
       Example:
         response = model.call("Tell me a joke.")
     */
-    _call(prompt: string, options: this["ParsedCallOptions"]): Promise<string>;
+    _call(prompt: string, options: this["ParsedCallOptions"], runManager?: CallbackManagerForLLMRun): Promise<string>;
     _signedFetch(prompt: string, options: this["ParsedCallOptions"], fields: {
         bedrockMethod: "invoke" | "invoke-with-response-stream";
         endpointHost: string;

package/dist/llms/bedrock.js CHANGED Viewed

@@ -89,6 +89,12 @@ export class Bedrock extends LLM {
             writable: true,
             value: new EventStreamCodec(toUtf8, fromUtf8)
         });
+        Object.defineProperty(this, "streaming", {
+            enumerable: true,
+            configurable: true,
+            writable: true,
+            value: false
+        });
         this.model = fields?.model ?? this.model;
         const allowedModels = ["ai21", "anthropic", "amazon"];
         if (!allowedModels.includes(this.model.split(".")[0])) {
@@ -106,6 +112,7 @@ export class Bedrock extends LLM {
         this.endpointHost = fields?.endpointHost ?? fields?.endpointUrl;
         this.stopSequences = fields?.stopSequences;
         this.modelKwargs = fields?.modelKwargs;
+        this.streaming = fields?.streaming ?? this.streaming;
     }
     /** Call out to Bedrock service model.
       Arguments:
@@ -117,10 +124,23 @@ export class Bedrock extends LLM {
       Example:
         response = model.call("Tell me a joke.")
     */
-    async _call(prompt, options) {
+    async _call(prompt, options, runManager) {
         const service = "bedrock-runtime";
         const endpointHost = this.endpointHost ?? `${service}.${this.region}.amazonaws.com`;
         const provider = this.model.split(".")[0];
+        if (this.streaming) {
+            const stream = this._streamResponseChunks(prompt, options, runManager);
+            let finalResult;
+            for await (const chunk of stream) {
+                if (finalResult === undefined) {
+                    finalResult = chunk;
+                }
+                else {
+                    finalResult = finalResult.concat(chunk);
+                }
+            }
+            return finalResult?.text ?? "";
+        }
         const response = await this._signedFetch(prompt, options, {
             bedrockMethod: "invoke",
             endpointHost,
@@ -201,7 +221,8 @@ export class Bedrock extends LLM {
                         text,
                         generationInfo: {},
                     });
-                    await runManager?.handleLLMNewToken(text);
+                    // eslint-disable-next-line no-void
+                    void runManager?.handleLLMNewToken(text);
                 }
             }
         }
@@ -212,7 +233,8 @@ export class Bedrock extends LLM {
                 text,
                 generationInfo: {},
             });
-            await runManager?.handleLLMNewToken(text);
+            // eslint-disable-next-line no-void
+            void runManager?.handleLLMNewToken(text);
         }
     }
     // eslint-disable-next-line @typescript-eslint/no-explicit-any

package/dist/llms/hf.cjs CHANGED Viewed

@@ -57,6 +57,12 @@ class HuggingFaceInference extends base_js_1.LLM {
             writable: true,
             value: undefined
         });
+        Object.defineProperty(this, "endpointUrl", {
+            enumerable: true,
+            configurable: true,
+            writable: true,
+            value: undefined
+        });
         this.model = fields?.model ?? this.model;
         this.temperature = fields?.temperature ?? this.temperature;
         this.maxTokens = fields?.maxTokens ?? this.maxTokens;
@@ -65,6 +71,7 @@ class HuggingFaceInference extends base_js_1.LLM {
         this.frequencyPenalty = fields?.frequencyPenalty ?? this.frequencyPenalty;
         this.apiKey =
             fields?.apiKey ?? (0, env_js_1.getEnvironmentVariable)("HUGGINGFACEHUB_API_KEY");
+        this.endpointUrl = fields?.endpointUrl;
         if (!this.apiKey) {
             throw new Error("Please set an API key for HuggingFace Hub in the environment variable HUGGINGFACEHUB_API_KEY or in the apiKey field of the HuggingFaceInference constructor.");
         }
@@ -75,7 +82,9 @@ class HuggingFaceInference extends base_js_1.LLM {
     /** @ignore */
     async _call(prompt, options) {
         const { HfInference } = await HuggingFaceInference.imports();
-        const hf = new HfInference(this.apiKey);
+        const hf = this.endpointUrl
+            ? new HfInference(this.apiKey).endpoint(this.endpointUrl)
+            : new HfInference(this.apiKey);
         const res = await this.caller.callWithOptions({ signal: options.signal }, hf.textGeneration.bind(hf), {
             model: this.model,
             parameters: {

package/dist/llms/hf.d.ts CHANGED Viewed

@@ -6,6 +6,8 @@ import { LLM, BaseLLMParams } from "./base.js";
 export interface HFInput {
     /** Model to use */
     model: string;
+    /** Custom inference endpoint URL to use */
+    endpointUrl?: string;
     /** Sampling temperature to use */
     temperature?: number;
     /**
@@ -36,6 +38,7 @@ export declare class HuggingFaceInference extends LLM implements HFInput {
     topK: number | undefined;
     frequencyPenalty: number | undefined;
     apiKey: string | undefined;
+    endpointUrl: string | undefined;
     constructor(fields?: Partial<HFInput> & BaseLLMParams);
     _llmType(): string;
     /** @ignore */

package/dist/llms/hf.js CHANGED Viewed

@@ -54,6 +54,12 @@ export class HuggingFaceInference extends LLM {
             writable: true,
             value: undefined
         });
+        Object.defineProperty(this, "endpointUrl", {
+            enumerable: true,
+            configurable: true,
+            writable: true,
+            value: undefined
+        });
         this.model = fields?.model ?? this.model;
         this.temperature = fields?.temperature ?? this.temperature;
         this.maxTokens = fields?.maxTokens ?? this.maxTokens;
@@ -62,6 +68,7 @@ export class HuggingFaceInference extends LLM {
         this.frequencyPenalty = fields?.frequencyPenalty ?? this.frequencyPenalty;
         this.apiKey =
             fields?.apiKey ?? getEnvironmentVariable("HUGGINGFACEHUB_API_KEY");
+        this.endpointUrl = fields?.endpointUrl;
         if (!this.apiKey) {
             throw new Error("Please set an API key for HuggingFace Hub in the environment variable HUGGINGFACEHUB_API_KEY or in the apiKey field of the HuggingFaceInference constructor.");
         }
@@ -72,7 +79,9 @@ export class HuggingFaceInference extends LLM {
     /** @ignore */
     async _call(prompt, options) {
         const { HfInference } = await HuggingFaceInference.imports();
-        const hf = new HfInference(this.apiKey);
+        const hf = this.endpointUrl
+            ? new HfInference(this.apiKey).endpoint(this.endpointUrl)
+            : new HfInference(this.apiKey);
         const res = await this.caller.callWithOptions({ signal: options.signal }, hf.textGeneration.bind(hf), {
             model: this.model,
             parameters: {

package/dist/llms/llama_cpp.cjs CHANGED Viewed

@@ -1,7 +1,7 @@
 "use strict";
 Object.defineProperty(exports, "__esModule", { value: true });
 exports.LlamaCpp = void 0;
-const node_llama_cpp_1 = require("node-llama-cpp");
+const llama_cpp_js_1 = require("../util/llama_cpp.cjs");
 const base_js_1 = require("./base.cjs");
 /**
  *  To use this model you need to have the `node-llama-cpp` module installed.
@@ -15,73 +15,31 @@ class LlamaCpp extends base_js_1.LLM {
     }
     constructor(inputs) {
         super(inputs);
-        Object.defineProperty(this, "batchSize", {
+        Object.defineProperty(this, "maxTokens", {
             enumerable: true,
             configurable: true,
             writable: true,
             value: void 0
         });
-        Object.defineProperty(this, "contextSize", {
+        Object.defineProperty(this, "temperature", {
             enumerable: true,
             configurable: true,
             writable: true,
             value: void 0
         });
-        Object.defineProperty(this, "embedding", {
+        Object.defineProperty(this, "topK", {
             enumerable: true,
             configurable: true,
             writable: true,
             value: void 0
         });
-        Object.defineProperty(this, "f16Kv", {
+        Object.defineProperty(this, "topP", {
             enumerable: true,
             configurable: true,
             writable: true,
             value: void 0
         });
-        Object.defineProperty(this, "gpuLayers", {
-            enumerable: true,
-            configurable: true,
-            writable: true,
-            value: void 0
-        });
-        Object.defineProperty(this, "logitsAll", {
-            enumerable: true,
-            configurable: true,
-            writable: true,
-            value: void 0
-        });
-        Object.defineProperty(this, "lowVram", {
-            enumerable: true,
-            configurable: true,
-            writable: true,
-            value: void 0
-        });
-        Object.defineProperty(this, "seed", {
-            enumerable: true,
-            configurable: true,
-            writable: true,
-            value: void 0
-        });
-        Object.defineProperty(this, "useMlock", {
-            enumerable: true,
-            configurable: true,
-            writable: true,
-            value: void 0
-        });
-        Object.defineProperty(this, "useMmap", {
-            enumerable: true,
-            configurable: true,
-            writable: true,
-            value: void 0
-        });
-        Object.defineProperty(this, "vocabOnly", {
-            enumerable: true,
-            configurable: true,
-            writable: true,
-            value: void 0
-        });
-        Object.defineProperty(this, "modelPath", {
+        Object.defineProperty(this, "trimWhitespaceSuffix", {
             enumerable: true,
             configurable: true,
             writable: true,
@@ -105,29 +63,31 @@ class LlamaCpp extends base_js_1.LLM {
             writable: true,
             value: void 0
         });
-        this.batchSize = inputs.batchSize;
-        this.contextSize = inputs.contextSize;
-        this.embedding = inputs.embedding;
-        this.f16Kv = inputs.f16Kv;
-        this.gpuLayers = inputs.gpuLayers;
-        this.logitsAll = inputs.logitsAll;
-        this.lowVram = inputs.lowVram;
-        this.modelPath = inputs.modelPath;
-        this.seed = inputs.seed;
-        this.useMlock = inputs.useMlock;
-        this.useMmap = inputs.useMmap;
-        this.vocabOnly = inputs.vocabOnly;
-        this._model = new node_llama_cpp_1.LlamaModel(inputs);
-        this._context = new node_llama_cpp_1.LlamaContext({ model: this._model });
-        this._session = new node_llama_cpp_1.LlamaChatSession({ context: this._context });
+        this.maxTokens = inputs?.maxTokens;
+        this.temperature = inputs?.temperature;
+        this.topK = inputs?.topK;
+        this.topP = inputs?.topP;
+        this.trimWhitespaceSuffix = inputs?.trimWhitespaceSuffix;
+        this._model = (0, llama_cpp_js_1.createLlamaModel)(inputs);
+        this._context = (0, llama_cpp_js_1.createLlamaContext)(this._model, inputs);
+        this._session = (0, llama_cpp_js_1.createLlamaSession)(this._context);
     }
     _llmType() {
         return "llama2_cpp";
     }
     /** @ignore */
-    async _call(prompt, options) {
+    async _call(prompt,
+    // @ts-expect-error - TS6133: 'options' is declared but its value is never read.
+    options) {
         try {
-            const completion = await this._session.prompt(prompt, options);
+            const promptOptions = {
+                maxTokens: this?.maxTokens,
+                temperature: this?.temperature,
+                topK: this?.topK,
+                topP: this?.topP,
+                trimWhitespaceSuffix: this?.trimWhitespaceSuffix,
+            };
+            const completion = await this._session.prompt(prompt, promptOptions);
             return completion;
         }
         catch (e) {

package/dist/llms/llama_cpp.d.ts CHANGED Viewed

@@ -1,40 +1,11 @@
 import { LlamaModel, LlamaContext, LlamaChatSession } from "node-llama-cpp";
+import { LlamaBaseCppInputs } from "../util/llama_cpp.js";
 import { LLM, BaseLLMCallOptions, BaseLLMParams } from "./base.js";
 /**
  * Note that the modelPath is the only required parameter. For testing you
  * can set this in the environment variable `LLAMA_PATH`.
  */
-export interface LlamaCppInputs extends BaseLLMParams {
-    /** Prompt processing batch size. */
-    batchSize?: number;
-    /** Text context size. */
-    contextSize?: number;
-    /** Embedding mode only. */
-    embedding?: boolean;
-    /** Use fp16 for KV cache. */
-    f16Kv?: boolean;
-    /** Number of layers to store in VRAM. */
-    gpuLayers?: number;
-    /** The llama_eval() call computes all logits, not just the last one. */
-    logitsAll?: boolean;
-    /** If true, reduce VRAM usage at the cost of performance. */
-    lowVram?: boolean;
-    /** Path to the model on the filesystem. */
-    modelPath: string;
-    /** If null, a random seed will be used. */
-    seed?: null | number;
-    /** The randomness of the responses, e.g. 0.1 deterministic, 1.5 creative, 0.8 balanced, 0 disables. */
-    temperature?: number;
-    /** Consider the n most likely tokens, where n is 1 to vocabulary size, 0 disables (uses full vocabulary). Note: only applies when `temperature` > 0. */
-    topK?: number;
-    /** Selects the smallest token set whose probability exceeds P, where P is between 0 - 1, 1 disables. Note: only applies when `temperature` > 0. */
-    topP?: number;
-    /** Force system to keep model in RAM. */
-    useMlock?: boolean;
-    /** Use mmap if possible. */
-    useMmap?: boolean;
-    /** Only load the vocabulary, no weights. */
-    vocabOnly?: boolean;
+export interface LlamaCppInputs extends LlamaBaseCppInputs, BaseLLMParams {
 }
 export interface LlamaCppCallOptions extends BaseLLMCallOptions {
     /** The maximum number of tokens the response should contain. */
@@ -51,18 +22,11 @@ export interface LlamaCppCallOptions extends BaseLLMCallOptions {
 export declare class LlamaCpp extends LLM<LlamaCppCallOptions> {
     CallOptions: LlamaCppCallOptions;
     static inputs: LlamaCppInputs;
-    batchSize?: number;
-    contextSize?: number;
-    embedding?: boolean;
-    f16Kv?: boolean;
-    gpuLayers?: number;
-    logitsAll?: boolean;
-    lowVram?: boolean;
-    seed?: null | number;
-    useMlock?: boolean;
-    useMmap?: boolean;
-    vocabOnly?: boolean;
-    modelPath: string;
+    maxTokens?: number;
+    temperature?: number;
+    topK?: number;
+    topP?: number;
+    trimWhitespaceSuffix?: boolean;
     _model: LlamaModel;
     _context: LlamaContext;
     _session: LlamaChatSession;

package/dist/llms/llama_cpp.js CHANGED Viewed

@@ -1,4 +1,4 @@
-import { LlamaModel, LlamaContext, LlamaChatSession } from "node-llama-cpp";
+import { createLlamaModel, createLlamaContext, createLlamaSession, } from "../util/llama_cpp.js";
 import { LLM } from "./base.js";
 /**
  *  To use this model you need to have the `node-llama-cpp` module installed.
@@ -12,73 +12,31 @@ export class LlamaCpp extends LLM {
     }
     constructor(inputs) {
         super(inputs);
-        Object.defineProperty(this, "batchSize", {
+        Object.defineProperty(this, "maxTokens", {
             enumerable: true,
             configurable: true,
             writable: true,
             value: void 0
         });
-        Object.defineProperty(this, "contextSize", {
+        Object.defineProperty(this, "temperature", {
             enumerable: true,
             configurable: true,
             writable: true,
             value: void 0
         });
-        Object.defineProperty(this, "embedding", {
+        Object.defineProperty(this, "topK", {
             enumerable: true,
             configurable: true,
             writable: true,
             value: void 0
         });
-        Object.defineProperty(this, "f16Kv", {
+        Object.defineProperty(this, "topP", {
             enumerable: true,
             configurable: true,
             writable: true,
             value: void 0
         });
-        Object.defineProperty(this, "gpuLayers", {
-            enumerable: true,
-            configurable: true,
-            writable: true,
-            value: void 0
-        });
-        Object.defineProperty(this, "logitsAll", {
-            enumerable: true,
-            configurable: true,
-            writable: true,
-            value: void 0
-        });
-        Object.defineProperty(this, "lowVram", {
-            enumerable: true,
-            configurable: true,
-            writable: true,
-            value: void 0
-        });
-        Object.defineProperty(this, "seed", {
-            enumerable: true,
-            configurable: true,
-            writable: true,
-            value: void 0
-        });
-        Object.defineProperty(this, "useMlock", {
-            enumerable: true,
-            configurable: true,
-            writable: true,
-            value: void 0
-        });
-        Object.defineProperty(this, "useMmap", {
-            enumerable: true,
-            configurable: true,
-            writable: true,
-            value: void 0
-        });
-        Object.defineProperty(this, "vocabOnly", {
-            enumerable: true,
-            configurable: true,
-            writable: true,
-            value: void 0
-        });
-        Object.defineProperty(this, "modelPath", {
+        Object.defineProperty(this, "trimWhitespaceSuffix", {
             enumerable: true,
             configurable: true,
             writable: true,
@@ -102,29 +60,31 @@ export class LlamaCpp extends LLM {
             writable: true,
             value: void 0
         });
-        this.batchSize = inputs.batchSize;
-        this.contextSize = inputs.contextSize;
-        this.embedding = inputs.embedding;
-        this.f16Kv = inputs.f16Kv;
-        this.gpuLayers = inputs.gpuLayers;
-        this.logitsAll = inputs.logitsAll;
-        this.lowVram = inputs.lowVram;
-        this.modelPath = inputs.modelPath;
-        this.seed = inputs.seed;
-        this.useMlock = inputs.useMlock;
-        this.useMmap = inputs.useMmap;
-        this.vocabOnly = inputs.vocabOnly;
-        this._model = new LlamaModel(inputs);
-        this._context = new LlamaContext({ model: this._model });
-        this._session = new LlamaChatSession({ context: this._context });
+        this.maxTokens = inputs?.maxTokens;
+        this.temperature = inputs?.temperature;
+        this.topK = inputs?.topK;
+        this.topP = inputs?.topP;
+        this.trimWhitespaceSuffix = inputs?.trimWhitespaceSuffix;
+        this._model = createLlamaModel(inputs);
+        this._context = createLlamaContext(this._model, inputs);
+        this._session = createLlamaSession(this._context);
     }
     _llmType() {
         return "llama2_cpp";
     }
     /** @ignore */
-    async _call(prompt, options) {
+    async _call(prompt,
+    // @ts-expect-error - TS6133: 'options' is declared but its value is never read.
+    options) {
         try {
-            const completion = await this._session.prompt(prompt, options);
+            const promptOptions = {
+                maxTokens: this?.maxTokens,
+                temperature: this?.temperature,
+                topK: this?.topK,
+                topP: this?.topP,
+                trimWhitespaceSuffix: this?.trimWhitespaceSuffix,
+            };
+            const completion = await this._session.prompt(prompt, promptOptions);
             return completion;
         }
         catch (e) {

package/dist/load/import_constants.cjs CHANGED Viewed

@@ -25,6 +25,7 @@ exports.optionalImportEntrypoints = [
     "langchain/embeddings/hf_transformers",
     "langchain/embeddings/googlevertexai",
     "langchain/embeddings/googlepalm",
+    "langchain/embeddings/llama_cpp",
     "langchain/llms/load",
     "langchain/llms/cohere",
     "langchain/llms/hf",

package/dist/load/import_constants.js CHANGED Viewed

@@ -22,6 +22,7 @@ export const optionalImportEntrypoints = [
     "langchain/embeddings/hf_transformers",
     "langchain/embeddings/googlevertexai",
     "langchain/embeddings/googlepalm",
+    "langchain/embeddings/llama_cpp",
     "langchain/llms/load",
     "langchain/llms/cohere",
     "langchain/llms/hf",