npm - langchain - Versions diffs - 0.0.176 → 0.0.177 - Mend

langchain 0.0.176 → 0.0.177

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (52) hide show

package/dist/chat_models/bedrock.cjs +25 -4
package/dist/chat_models/bedrock.d.ts +2 -1
package/dist/chat_models/bedrock.js +25 -4
package/dist/chat_models/llama_cpp.cjs +31 -79
package/dist/chat_models/llama_cpp.d.ts +15 -58
package/dist/chat_models/llama_cpp.js +32 -80
package/dist/chat_models/openai.cjs +91 -6
package/dist/chat_models/openai.d.ts +10 -0
package/dist/chat_models/openai.js +91 -6
package/dist/embeddings/hf.cjs +10 -1
package/dist/embeddings/hf.d.ts +4 -2
package/dist/embeddings/hf.js +10 -1
package/dist/embeddings/llama_cpp.cjs +67 -0
package/dist/embeddings/llama_cpp.d.ts +26 -0
package/dist/embeddings/llama_cpp.js +63 -0
package/dist/embeddings/ollama.cjs +7 -1
package/dist/embeddings/ollama.js +7 -1
package/dist/llms/bedrock.cjs +25 -3
package/dist/llms/bedrock.d.ts +2 -1
package/dist/llms/bedrock.js +25 -3
package/dist/llms/hf.cjs +10 -1
package/dist/llms/hf.d.ts +3 -0
package/dist/llms/hf.js +10 -1
package/dist/llms/llama_cpp.cjs +25 -65
package/dist/llms/llama_cpp.d.ts +7 -43
package/dist/llms/llama_cpp.js +25 -65
package/dist/load/import_constants.cjs +1 -0
package/dist/load/import_constants.js +1 -0
package/dist/prompts/few_shot.cjs +162 -1
package/dist/prompts/few_shot.d.ts +90 -2
package/dist/prompts/few_shot.js +160 -0
package/dist/prompts/index.cjs +2 -1
package/dist/prompts/index.d.ts +1 -1
package/dist/prompts/index.js +1 -1
package/dist/retrievers/zep.cjs +26 -3
package/dist/retrievers/zep.d.ts +11 -2
package/dist/retrievers/zep.js +26 -3
package/dist/util/bedrock.d.ts +2 -0
package/dist/util/llama_cpp.cjs +34 -0
package/dist/util/llama_cpp.d.ts +46 -0
package/dist/util/llama_cpp.js +28 -0
package/dist/util/openai-format-fndef.cjs +81 -0
package/dist/util/openai-format-fndef.d.ts +44 -0
package/dist/util/openai-format-fndef.js +77 -0
package/dist/util/openapi.d.ts +2 -2
package/dist/vectorstores/pinecone.cjs +5 -5
package/dist/vectorstores/pinecone.d.ts +2 -2
package/dist/vectorstores/pinecone.js +5 -5
package/embeddings/llama_cpp.cjs +1 -0
package/embeddings/llama_cpp.d.ts +1 -0
package/embeddings/llama_cpp.js +1 -0
package/package.json +13 -5

package/dist/retrievers/zep.js CHANGED Viewed

@@ -44,6 +44,12 @@ export class ZepRetriever extends BaseRetriever {
             writable: true,
             value: void 0
         });
+        Object.defineProperty(this, "searchScope", {
+            enumerable: true,
+            configurable: true,
+            writable: true,
+            value: void 0
+        });
         Object.defineProperty(this, "searchType", {
             enumerable: true,
             configurable: true,
@@ -64,17 +70,18 @@ export class ZepRetriever extends BaseRetriever {
         });
         this.sessionId = config.sessionId;
         this.topK = config.topK;
+        this.searchScope = config.searchScope;
         this.searchType = config.searchType;
         this.mmrLambda = config.mmrLambda;
         this.filter = config.filter;
         this.zepClientPromise = ZepClient.init(config.url, config.apiKey);
     }
     /**
-     *  Converts an array of search results to an array of Document objects.
+     *  Converts an array of message search results to an array of Document objects.
      *  @param {MemorySearchResult[]} results - The array of search results.
      *  @returns {Document[]} An array of Document objects representing the search results.
      */
-    searchResultToDoc(results) {
+    searchMessageResultToDoc(results) {
         return results
             .filter((r) => r.message)
             .map(({ message: { content, metadata: messageMetadata } = {}, dist, ...rest }) => new Document({
@@ -82,6 +89,19 @@ export class ZepRetriever extends BaseRetriever {
             metadata: { score: dist, ...messageMetadata, ...rest },
         }));
     }
+    /**
+     *  Converts an array of summary search results to an array of Document objects.
+     *  @param {MemorySearchResult[]} results - The array of search results.
+     *  @returns {Document[]} An array of Document objects representing the search results.
+     */
+    searchSummaryResultToDoc(results) {
+        return results
+            .filter((r) => r.summary)
+            .map(({ summary: { content, metadata: summaryMetadata } = {}, dist, ...rest }) => new Document({
+            pageContent: content ?? "",
+            metadata: { score: dist, ...summaryMetadata, ...rest },
+        }));
+    }
     /**
      *  Retrieves the relevant documents based on the given query.
      *  @param {string} query - The query string.
@@ -91,6 +111,7 @@ export class ZepRetriever extends BaseRetriever {
         const payload = {
             text: query,
             metadata: this.filter,
+            search_scope: this.searchScope,
             search_type: this.searchType,
             mmr_lambda: this.mmrLambda,
         };
@@ -101,7 +122,9 @@ export class ZepRetriever extends BaseRetriever {
         }
         try {
             const results = await zepClient.memory.searchMemory(this.sessionId, payload, this.topK);
-            return this.searchResultToDoc(results);
+            return this.searchScope === "summary"
+                ? this.searchSummaryResultToDoc(results)
+                : this.searchMessageResultToDoc(results);
         }
         catch (error) {
             // eslint-disable-next-line no-instanceof/no-instanceof

package/dist/util/bedrock.d.ts CHANGED Viewed

@@ -33,6 +33,8 @@ export interface BaseBedrockInput {
     stopSequences?: string[];
     /** Additional kwargs to pass to the model. */
     modelKwargs?: Record<string, unknown>;
+    /** Whether or not to stream responses */
+    streaming: boolean;
 }
 type Dict = {
     [key: string]: unknown;

package/dist/util/llama_cpp.cjs ADDED Viewed

@@ -0,0 +1,34 @@
+"use strict";
+Object.defineProperty(exports, "__esModule", { value: true });
+exports.createLlamaSession = exports.createLlamaContext = exports.createLlamaModel = void 0;
+const node_llama_cpp_1 = require("node-llama-cpp");
+function createLlamaModel(inputs) {
+    const options = {
+        gpuLayers: inputs?.gpuLayers,
+        modelPath: inputs.modelPath,
+        useMlock: inputs?.useMlock,
+        useMmap: inputs?.useMmap,
+        vocabOnly: inputs?.vocabOnly,
+    };
+    return new node_llama_cpp_1.LlamaModel(options);
+}
+exports.createLlamaModel = createLlamaModel;
+function createLlamaContext(model, inputs) {
+    const options = {
+        batchSize: inputs?.batchSize,
+        contextSize: inputs?.contextSize,
+        embedding: inputs?.embedding,
+        f16Kv: inputs?.f16Kv,
+        logitsAll: inputs?.logitsAll,
+        model,
+        prependBos: inputs?.prependBos,
+        seed: inputs?.seed,
+        threads: inputs?.threads,
+    };
+    return new node_llama_cpp_1.LlamaContext(options);
+}
+exports.createLlamaContext = createLlamaContext;
+function createLlamaSession(context) {
+    return new node_llama_cpp_1.LlamaChatSession({ context });
+}
+exports.createLlamaSession = createLlamaSession;

package/dist/util/llama_cpp.d.ts ADDED Viewed

@@ -0,0 +1,46 @@
+import { LlamaModel, LlamaContext, LlamaChatSession } from "node-llama-cpp";
+/**
+ * Note that the modelPath is the only required parameter. For testing you
+ * can set this in the environment variable `LLAMA_PATH`.
+ */
+export interface LlamaBaseCppInputs {
+    /** Prompt processing batch size. */
+    batchSize?: number;
+    /** Text context size. */
+    contextSize?: number;
+    /** Embedding mode only. */
+    embedding?: boolean;
+    /** Use fp16 for KV cache. */
+    f16Kv?: boolean;
+    /** Number of layers to store in VRAM. */
+    gpuLayers?: number;
+    /** The llama_eval() call computes all logits, not just the last one. */
+    logitsAll?: boolean;
+    /** */
+    maxTokens?: number;
+    /** Path to the model on the filesystem. */
+    modelPath: string;
+    /** Add the begining of sentence token.  */
+    prependBos?: boolean;
+    /** If null, a random seed will be used. */
+    seed?: null | number;
+    /** The randomness of the responses, e.g. 0.1 deterministic, 1.5 creative, 0.8 balanced, 0 disables. */
+    temperature?: number;
+    /** Number of threads to use to evaluate tokens. */
+    threads?: number;
+    /** Trim whitespace from the end of the generated text Disabled by default. */
+    trimWhitespaceSuffix?: boolean;
+    /** Consider the n most likely tokens, where n is 1 to vocabulary size, 0 disables (uses full vocabulary). Note: only applies when `temperature` > 0. */
+    topK?: number;
+    /** Selects the smallest token set whose probability exceeds P, where P is between 0 - 1, 1 disables. Note: only applies when `temperature` > 0. */
+    topP?: number;
+    /** Force system to keep model in RAM. */
+    useMlock?: boolean;
+    /** Use mmap if possible. */
+    useMmap?: boolean;
+    /** Only load the vocabulary, no weights. */
+    vocabOnly?: boolean;
+}
+export declare function createLlamaModel(inputs: LlamaBaseCppInputs): LlamaModel;
+export declare function createLlamaContext(model: LlamaModel, inputs: LlamaBaseCppInputs): LlamaContext;
+export declare function createLlamaSession(context: LlamaContext): LlamaChatSession;

package/dist/util/llama_cpp.js ADDED Viewed

@@ -0,0 +1,28 @@
+import { LlamaModel, LlamaContext, LlamaChatSession } from "node-llama-cpp";
+export function createLlamaModel(inputs) {
+    const options = {
+        gpuLayers: inputs?.gpuLayers,
+        modelPath: inputs.modelPath,
+        useMlock: inputs?.useMlock,
+        useMmap: inputs?.useMmap,
+        vocabOnly: inputs?.vocabOnly,
+    };
+    return new LlamaModel(options);
+}
+export function createLlamaContext(model, inputs) {
+    const options = {
+        batchSize: inputs?.batchSize,
+        contextSize: inputs?.contextSize,
+        embedding: inputs?.embedding,
+        f16Kv: inputs?.f16Kv,
+        logitsAll: inputs?.logitsAll,
+        model,
+        prependBos: inputs?.prependBos,
+        seed: inputs?.seed,
+        threads: inputs?.threads,
+    };
+    return new LlamaContext(options);
+}
+export function createLlamaSession(context) {
+    return new LlamaChatSession({ context });
+}

package/dist/util/openai-format-fndef.cjs ADDED Viewed

@@ -0,0 +1,81 @@
+"use strict";
+Object.defineProperty(exports, "__esModule", { value: true });
+exports.formatFunctionDefinitions = void 0;
+function isAnyOfProp(prop) {
+    return (prop.anyOf !== undefined &&
+        Array.isArray(prop.anyOf));
+}
+// When OpenAI use functions in the prompt, they format them as TypeScript definitions rather than OpenAPI JSON schemas.
+// This function converts the JSON schemas into TypeScript definitions.
+function formatFunctionDefinitions(functions) {
+    const lines = ["namespace functions {", ""];
+    for (const f of functions) {
+        if (f.description) {
+            lines.push(`// ${f.description}`);
+        }
+        if (Object.keys(f.parameters.properties ?? {}).length > 0) {
+            lines.push(`type ${f.name} = (_: {`);
+            lines.push(formatObjectProperties(f.parameters, 0));
+            lines.push("}) => any;");
+        }
+        else {
+            lines.push(`type ${f.name} = () => any;`);
+        }
+        lines.push("");
+    }
+    lines.push("} // namespace functions");
+    return lines.join("\n");
+}
+exports.formatFunctionDefinitions = formatFunctionDefinitions;
+// Format just the properties of an object (not including the surrounding braces)
+function formatObjectProperties(obj, indent) {
+    const lines = [];
+    for (const [name, param] of Object.entries(obj.properties ?? {})) {
+        if (param.description && indent < 2) {
+            lines.push(`// ${param.description}`);
+        }
+        if (obj.required?.includes(name)) {
+            lines.push(`${name}: ${formatType(param, indent)},`);
+        }
+        else {
+            lines.push(`${name}?: ${formatType(param, indent)},`);
+        }
+    }
+    return lines.map((line) => " ".repeat(indent) + line).join("\n");
+}
+// Format a single property type
+function formatType(param, indent) {
+    if (isAnyOfProp(param)) {
+        return param.anyOf.map((v) => formatType(v, indent)).join(" | ");
+    }
+    switch (param.type) {
+        case "string":
+            if (param.enum) {
+                return param.enum.map((v) => `"${v}"`).join(" | ");
+            }
+            return "string";
+        case "number":
+            if (param.enum) {
+                return param.enum.map((v) => `${v}`).join(" | ");
+            }
+            return "number";
+        case "integer":
+            if (param.enum) {
+                return param.enum.map((v) => `${v}`).join(" | ");
+            }
+            return "number";
+        case "boolean":
+            return "boolean";
+        case "null":
+            return "null";
+        case "object":
+            return ["{", formatObjectProperties(param, indent + 2), "}"].join("\n");
+        case "array":
+            if (param.items) {
+                return `${formatType(param.items, indent)}[]`;
+            }
+            return "any[]";
+        default:
+            return "";
+    }
+}

package/dist/util/openai-format-fndef.d.ts ADDED Viewed

@@ -0,0 +1,44 @@
+/**
+ * Formatting function definitions for calculating openai function defination token usage.
+ *
+ * https://github.com/hmarr/openai-chat-tokens/blob/main/src/functions.ts
+ * (c) 2023 Harry Marr
+ * MIT license
+ */
+import OpenAI from "openai";
+type OpenAIFunction = OpenAI.Chat.ChatCompletionCreateParams.Function;
+export interface FunctionDef extends Omit<OpenAIFunction, "parameters"> {
+    name: string;
+    description?: string;
+    parameters: ObjectProp;
+}
+interface ObjectProp {
+    type: "object";
+    properties?: {
+        [key: string]: Prop;
+    };
+    required?: string[];
+}
+interface AnyOfProp {
+    anyOf: Prop[];
+}
+type Prop = {
+    description?: string;
+} & (AnyOfProp | ObjectProp | {
+    type: "string";
+    enum?: string[];
+} | {
+    type: "number" | "integer";
+    minimum?: number;
+    maximum?: number;
+    enum?: number[];
+} | {
+    type: "boolean";
+} | {
+    type: "null";
+} | {
+    type: "array";
+    items?: Prop;
+});
+export declare function formatFunctionDefinitions(functions: FunctionDef[]): string;
+export {};

package/dist/util/openai-format-fndef.js ADDED Viewed

@@ -0,0 +1,77 @@
+function isAnyOfProp(prop) {
+    return (prop.anyOf !== undefined &&
+        Array.isArray(prop.anyOf));
+}
+// When OpenAI use functions in the prompt, they format them as TypeScript definitions rather than OpenAPI JSON schemas.
+// This function converts the JSON schemas into TypeScript definitions.
+export function formatFunctionDefinitions(functions) {
+    const lines = ["namespace functions {", ""];
+    for (const f of functions) {
+        if (f.description) {
+            lines.push(`// ${f.description}`);
+        }
+        if (Object.keys(f.parameters.properties ?? {}).length > 0) {
+            lines.push(`type ${f.name} = (_: {`);
+            lines.push(formatObjectProperties(f.parameters, 0));
+            lines.push("}) => any;");
+        }
+        else {
+            lines.push(`type ${f.name} = () => any;`);
+        }
+        lines.push("");
+    }
+    lines.push("} // namespace functions");
+    return lines.join("\n");
+}
+// Format just the properties of an object (not including the surrounding braces)
+function formatObjectProperties(obj, indent) {
+    const lines = [];
+    for (const [name, param] of Object.entries(obj.properties ?? {})) {
+        if (param.description && indent < 2) {
+            lines.push(`// ${param.description}`);
+        }
+        if (obj.required?.includes(name)) {
+            lines.push(`${name}: ${formatType(param, indent)},`);
+        }
+        else {
+            lines.push(`${name}?: ${formatType(param, indent)},`);
+        }
+    }
+    return lines.map((line) => " ".repeat(indent) + line).join("\n");
+}
+// Format a single property type
+function formatType(param, indent) {
+    if (isAnyOfProp(param)) {
+        return param.anyOf.map((v) => formatType(v, indent)).join(" | ");
+    }
+    switch (param.type) {
+        case "string":
+            if (param.enum) {
+                return param.enum.map((v) => `"${v}"`).join(" | ");
+            }
+            return "string";
+        case "number":
+            if (param.enum) {
+                return param.enum.map((v) => `${v}`).join(" | ");
+            }
+            return "number";
+        case "integer":
+            if (param.enum) {
+                return param.enum.map((v) => `${v}`).join(" | ");
+            }
+            return "number";
+        case "boolean":
+            return "boolean";
+        case "null":
+            return "null";
+        case "object":
+            return ["{", formatObjectProperties(param, indent + 2), "}"].join("\n");
+        case "array":
+            if (param.items) {
+                return `${formatType(param.items, indent)}[]`;
+            }
+            return "any[]";
+        default:
+            return "";
+    }
+}

package/dist/util/openapi.d.ts CHANGED Viewed

@@ -7,7 +7,7 @@ export declare class OpenAPISpec {
     getParametersStrict(): Record<string, OpenAPIV3.ParameterObject | OpenAPIV3_1.ReferenceObject>;
     getSchemasStrict(): Record<string, OpenAPIV3_1.SchemaObject>;
     getRequestBodiesStrict(): Record<string, OpenAPIV3_1.ReferenceObject | OpenAPIV3_1.RequestBodyObject>;
-    getPathStrict(path: string): Omit<OpenAPIV3.PathItemObject<{}>, "servers" | "parameters"> & {
+    getPathStrict(path: string): Omit<OpenAPIV3.PathItemObject<{}>, "parameters" | "servers"> & {
         servers?: OpenAPIV3_1.ServerObject[] | undefined;
         parameters?: (OpenAPIV3.ParameterObject | OpenAPIV3_1.ReferenceObject)[] | undefined;
     } & {
@@ -59,7 +59,7 @@ export declare class OpenAPISpec {
         deprecated?: boolean | undefined;
         security?: OpenAPIV3.SecurityRequirementObject[] | undefined;
         servers?: OpenAPIV3.ServerObject[] | undefined;
-    }, "callbacks" | "servers" | "parameters" | "responses" | "requestBody"> & {
+    }, "callbacks" | "parameters" | "servers" | "responses" | "requestBody"> & {
         parameters?: (OpenAPIV3.ParameterObject | OpenAPIV3_1.ReferenceObject)[] | undefined;
         requestBody?: OpenAPIV3_1.ReferenceObject | OpenAPIV3_1.RequestBodyObject | undefined;
         responses?: OpenAPIV3_1.ResponsesObject | undefined;

package/dist/vectorstores/pinecone.cjs CHANGED Viewed

@@ -34,6 +34,7 @@ const base_js_1 = require("./base.cjs");
 const document_js_1 = require("../document.cjs");
 const async_caller_js_1 = require("../util/async_caller.cjs");
 const math_js_1 = require("../util/math.cjs");
+const chunk_js_1 = require("../util/chunk.cjs");
 /**
  * Class that extends the VectorStore class and provides methods to
  * interact with the Pinecone vector database.
@@ -138,11 +139,10 @@ class PineconeStore extends base_js_1.VectorStore {
         });
         const namespace = this.pineconeIndex.namespace(this.namespace ?? "");
         // Pinecone recommends a limit of 100 vectors per upsert request
-        const chunkSize = 50;
-        for (let i = 0; i < pineconeVectors.length; i += chunkSize) {
-            const chunk = pineconeVectors.slice(i, i + chunkSize);
-            await namespace.upsert(chunk);
-        }
+        const chunkSize = 100;
+        const chunkedVectors = (0, chunk_js_1.chunkArray)(pineconeVectors, chunkSize);
+        const batchRequests = chunkedVectors.map((chunk) => this.caller.call(async () => namespace.upsert(chunk)));
+        await Promise.all(batchRequests);
         return documentIds;
     }
     /**

package/dist/vectorstores/pinecone.d.ts CHANGED Viewed

@@ -2,9 +2,9 @@ import { RecordMetadata, Index as PineconeIndex } from "@pinecone-database/pinec
 import { MaxMarginalRelevanceSearchOptions, VectorStore } from "./base.js";
 import { Embeddings } from "../embeddings/base.js";
 import { Document } from "../document.js";
-import { AsyncCaller } from "../util/async_caller.js";
+import { AsyncCaller, AsyncCallerParams } from "../util/async_caller.js";
 type PineconeMetadata = Record<string, any>;
-export interface PineconeLibArgs {
+export interface PineconeLibArgs extends AsyncCallerParams {
     pineconeIndex: PineconeIndex;
     textKey?: string;
     namespace?: string;

package/dist/vectorstores/pinecone.js CHANGED Viewed

@@ -5,6 +5,7 @@ import { VectorStore } from "./base.js";
 import { Document } from "../document.js";
 import { AsyncCaller } from "../util/async_caller.js";
 import { maximalMarginalRelevance } from "../util/math.js";
+import { chunkArray } from "../util/chunk.js";
 /**
  * Class that extends the VectorStore class and provides methods to
  * interact with the Pinecone vector database.
@@ -109,11 +110,10 @@ export class PineconeStore extends VectorStore {
         });
         const namespace = this.pineconeIndex.namespace(this.namespace ?? "");
         // Pinecone recommends a limit of 100 vectors per upsert request
-        const chunkSize = 50;
-        for (let i = 0; i < pineconeVectors.length; i += chunkSize) {
-            const chunk = pineconeVectors.slice(i, i + chunkSize);
-            await namespace.upsert(chunk);
-        }
+        const chunkSize = 100;
+        const chunkedVectors = chunkArray(pineconeVectors, chunkSize);
+        const batchRequests = chunkedVectors.map((chunk) => this.caller.call(async () => namespace.upsert(chunk)));
+        await Promise.all(batchRequests);
         return documentIds;
     }
     /**

package/embeddings/llama_cpp.cjs ADDED Viewed

	@@ -0,0 +1 @@
1	+ module.exports = require('../dist/embeddings/llama_cpp.cjs');

package/embeddings/llama_cpp.d.ts ADDED Viewed

	@@ -0,0 +1 @@
1	+ export * from '../dist/embeddings/llama_cpp.js'

package/embeddings/llama_cpp.js ADDED Viewed

	@@ -0,0 +1 @@
1	+ export * from '../dist/embeddings/llama_cpp.js'

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "langchain",
-  "version": "0.0.176",
+  "version": "0.0.177",
   "description": "Typescript bindings for langchain",
   "type": "module",
   "engines": {
@@ -142,6 +142,9 @@
     "embeddings/minimax.cjs",
     "embeddings/minimax.js",
     "embeddings/minimax.d.ts",
+    "embeddings/llama_cpp.cjs",
+    "embeddings/llama_cpp.js",
+    "embeddings/llama_cpp.d.ts",
     "llms/load.cjs",
     "llms/load.js",
     "llms/load.d.ts",
@@ -806,12 +809,12 @@
     "@elastic/elasticsearch": "^8.4.0",
     "@faker-js/faker": "^7.6.0",
     "@getmetal/metal-sdk": "^4.0.0",
-    "@getzep/zep-js": "^0.8.0",
+    "@getzep/zep-js": "^0.9.0",
     "@gomomento/sdk": "^1.44.1",
     "@gomomento/sdk-core": "^1.44.1",
     "@google-ai/generativelanguage": "^0.2.1",
     "@google-cloud/storage": "^6.10.1",
-    "@huggingface/inference": "^1.5.1",
+    "@huggingface/inference": "^2.6.4",
     "@jest/globals": "^29.5.0",
     "@mozilla/readability": "^0.4.4",
     "@notionhq/client": "^2.2.10",
@@ -942,13 +945,13 @@
     "@cloudflare/ai": "^1.0.12",
     "@elastic/elasticsearch": "^8.4.0",
     "@getmetal/metal-sdk": "*",
-    "@getzep/zep-js": "^0.8.0",
+    "@getzep/zep-js": "^0.9.0",
     "@gomomento/sdk": "^1.44.1",
     "@gomomento/sdk-core": "^1.44.1",
     "@gomomento/sdk-web": "^1.44.1",
     "@google-ai/generativelanguage": "^0.2.1",
     "@google-cloud/storage": "^6.10.1",
-    "@huggingface/inference": "^1.5.1",
+    "@huggingface/inference": "^2.6.4",
     "@mozilla/readability": "*",
     "@notionhq/client": "^2.2.10",
     "@opensearch-project/opensearch": "*",
@@ -1577,6 +1580,11 @@
       "import": "./embeddings/minimax.js",
       "require": "./embeddings/minimax.cjs"
     },
+    "./embeddings/llama_cpp": {
+      "types": "./embeddings/llama_cpp.d.ts",
+      "import": "./embeddings/llama_cpp.js",
+      "require": "./embeddings/llama_cpp.cjs"
+    },
     "./llms/load": {
       "types": "./llms/load.d.ts",
       "import": "./llms/load.js",