npm - langchain - Versions diffs - 0.0.196 → 0.0.197-rc.0 - Mend

langchain 0.0.196 → 0.0.197-rc.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (136) hide show

package/LICENSE +21 -0
package/dist/agents/openai/index.cjs +6 -2
package/dist/agents/openai/index.js +6 -2
package/dist/agents/toolkits/conversational_retrieval/token_buffer_memory.d.ts +1 -1
package/dist/base_language/count_tokens.cjs +4 -4
package/dist/base_language/count_tokens.d.ts +1 -1
package/dist/base_language/count_tokens.js +1 -1
package/dist/base_language/index.cjs +3 -3
package/dist/base_language/index.d.ts +1 -1
package/dist/base_language/index.js +1 -1
package/dist/cache/base.cjs +1 -1
package/dist/cache/base.d.ts +1 -1
package/dist/cache/base.js +1 -1
package/dist/cache/index.cjs +2 -2
package/dist/cache/index.d.ts +1 -1
package/dist/cache/index.js +1 -1
package/dist/callbacks/handlers/console.cjs +1 -1
package/dist/callbacks/handlers/console.d.ts +1 -1
package/dist/callbacks/handlers/console.js +1 -1
package/dist/callbacks/handlers/initialize.cjs +1 -1
package/dist/callbacks/handlers/initialize.d.ts +1 -1
package/dist/callbacks/handlers/initialize.js +1 -1
package/dist/callbacks/handlers/log_stream.cjs +1 -1
package/dist/callbacks/handlers/log_stream.d.ts +1 -1
package/dist/callbacks/handlers/log_stream.js +1 -1
package/dist/callbacks/handlers/run_collector.cjs +1 -1
package/dist/callbacks/handlers/run_collector.d.ts +1 -1
package/dist/callbacks/handlers/run_collector.js +1 -1
package/dist/callbacks/handlers/tracer.cjs +1 -1
package/dist/callbacks/handlers/tracer.d.ts +1 -1
package/dist/callbacks/handlers/tracer.js +1 -1
package/dist/callbacks/handlers/tracer_langchain.cjs +1 -1
package/dist/callbacks/handlers/tracer_langchain.d.ts +1 -1
package/dist/callbacks/handlers/tracer_langchain.js +1 -1
package/dist/callbacks/handlers/tracer_langchain_v1.cjs +1 -1
package/dist/callbacks/handlers/tracer_langchain_v1.d.ts +1 -1
package/dist/callbacks/handlers/tracer_langchain_v1.js +1 -1
package/dist/chains/openai_functions/structured_output.cjs +1 -1
package/dist/chains/openai_functions/structured_output.d.ts +1 -1
package/dist/chains/openai_functions/structured_output.js +1 -1
package/dist/chat_models/anthropic.cjs +15 -348
package/dist/chat_models/anthropic.d.ts +1 -156
package/dist/chat_models/anthropic.js +1 -346
package/dist/chat_models/base.cjs +1 -1
package/dist/chat_models/base.d.ts +1 -1
package/dist/chat_models/base.js +1 -1
package/dist/chat_models/bedrock/web.cjs +21 -1
package/dist/chat_models/bedrock/web.d.ts +1 -1
package/dist/chat_models/bedrock/web.js +21 -1
package/dist/document.cjs +2 -2
package/dist/document.d.ts +1 -1
package/dist/document.js +1 -1
package/dist/document_loaders/web/azure_blob_storage_file.d.ts +1 -1
package/dist/document_loaders/web/github.cjs +105 -0
package/dist/document_loaders/web/github.d.ts +26 -0
package/dist/document_loaders/web/github.js +105 -0
package/dist/document_loaders/web/s3.d.ts +1 -1
package/dist/embeddings/base.cjs +1 -1
package/dist/embeddings/base.d.ts +1 -1
package/dist/embeddings/base.js +1 -1
package/dist/embeddings/cache_backed.cjs +1 -1
package/dist/embeddings/cache_backed.js +1 -1
package/dist/experimental/plan_and_execute/prompt.d.ts +1 -1
package/dist/llms/base.cjs +1 -1
package/dist/llms/base.d.ts +1 -1
package/dist/llms/base.js +1 -1
package/dist/llms/bedrock/web.cjs +21 -1
package/dist/llms/bedrock/web.d.ts +1 -1
package/dist/llms/bedrock/web.js +21 -1
package/dist/memory/base.cjs +2 -2
package/dist/memory/base.d.ts +2 -2
package/dist/memory/base.js +2 -2
package/dist/output_parsers/list.cjs +4 -122
package/dist/output_parsers/list.d.ts +1 -57
package/dist/output_parsers/list.js +1 -119
package/dist/output_parsers/openai_functions.cjs +1 -1
package/dist/output_parsers/openai_functions.d.ts +1 -1
package/dist/output_parsers/openai_functions.js +1 -1
package/dist/prompts/base.cjs +8 -8
package/dist/prompts/base.d.ts +3 -3
package/dist/prompts/base.js +3 -3
package/dist/prompts/chat.cjs +13 -15
package/dist/prompts/chat.d.ts +2 -1
package/dist/prompts/chat.js +2 -1
package/dist/prompts/few_shot.cjs +4 -15
package/dist/prompts/few_shot.d.ts +1 -1
package/dist/prompts/few_shot.js +1 -1
package/dist/prompts/index.cjs +2 -2
package/dist/prompts/index.d.ts +1 -1
package/dist/prompts/index.js +1 -1
package/dist/prompts/pipeline.cjs +3 -15
package/dist/prompts/pipeline.d.ts +1 -1
package/dist/prompts/pipeline.js +1 -1
package/dist/prompts/prompt.cjs +3 -15
package/dist/prompts/prompt.d.ts +1 -1
package/dist/prompts/prompt.js +1 -1
package/dist/prompts/selectors/LengthBasedExampleSelector.cjs +3 -15
package/dist/prompts/selectors/LengthBasedExampleSelector.d.ts +1 -1
package/dist/prompts/selectors/LengthBasedExampleSelector.js +1 -1
package/dist/prompts/selectors/SemanticSimilarityExampleSelector.cjs +1 -1
package/dist/prompts/selectors/SemanticSimilarityExampleSelector.d.ts +1 -1
package/dist/prompts/selectors/SemanticSimilarityExampleSelector.js +1 -1
package/dist/prompts/selectors/conditional.cjs +6 -15
package/dist/prompts/selectors/conditional.d.ts +1 -1
package/dist/prompts/selectors/conditional.js +1 -1
package/dist/prompts/serde.cjs +0 -15
package/dist/prompts/serde.d.ts +1 -1
package/dist/prompts/serde.js +1 -1
package/dist/prompts/template.cjs +9 -15
package/dist/prompts/template.d.ts +1 -1
package/dist/prompts/template.js +1 -1
package/dist/schema/document.cjs +3 -3
package/dist/schema/document.d.ts +1 -1
package/dist/schema/document.js +1 -1
package/dist/schema/index.cjs +12 -12
package/dist/schema/index.d.ts +10 -10
package/dist/schema/index.js +7 -7
package/dist/schema/output_parser.cjs +1 -1
package/dist/schema/output_parser.d.ts +1 -1
package/dist/schema/output_parser.js +1 -1
package/dist/schema/retriever.cjs +1 -1
package/dist/schema/retriever.d.ts +1 -1
package/dist/schema/retriever.js +1 -1
package/dist/schema/storage.cjs +1 -1
package/dist/schema/storage.d.ts +1 -1
package/dist/schema/storage.js +1 -1
package/dist/util/async_caller.cjs +1 -1
package/dist/util/async_caller.d.ts +1 -1
package/dist/util/async_caller.js +1 -1
package/dist/vectorstores/momento_vector_index.cjs +39 -0
package/dist/vectorstores/momento_vector_index.d.ts +17 -1
package/dist/vectorstores/momento_vector_index.js +40 -1
package/dist/vectorstores/mongodb_atlas.cjs +22 -2
package/dist/vectorstores/mongodb_atlas.d.ts +13 -0
package/dist/vectorstores/mongodb_atlas.js +22 -2
package/package.json +9 -8

package/dist/chat_models/anthropic.js CHANGED Viewed

@@ -1,346 +1 @@
-import { Anthropic, AI_PROMPT, HUMAN_PROMPT, } from "@anthropic-ai/sdk";
-import { AIMessage, AIMessageChunk, ChatGenerationChunk, ChatMessage, } from "../schema/index.js";
-import { getEnvironmentVariable } from "../util/env.js";
-import { BaseChatModel } from "./base.js";
-/**
- * Extracts the custom role of a generic chat message.
- * @param message The chat message from which to extract the custom role.
- * @returns The custom role of the chat message.
- */
-function extractGenericMessageCustomRole(message) {
-    if (message.role !== AI_PROMPT &&
-        message.role !== HUMAN_PROMPT &&
-        message.role !== "") {
-        console.warn(`Unknown message role: ${message.role}`);
-    }
-    return message.role;
-}
-/**
- * Gets the Anthropic prompt from a base message.
- * @param message The base message from which to get the Anthropic prompt.
- * @returns The Anthropic prompt from the base message.
- */
-function getAnthropicPromptFromMessage(message) {
-    const type = message._getType();
-    switch (type) {
-        case "ai":
-            return AI_PROMPT;
-        case "human":
-            return HUMAN_PROMPT;
-        case "system":
-            return "";
-        case "generic": {
-            if (!ChatMessage.isInstance(message))
-                throw new Error("Invalid generic chat message");
-            return extractGenericMessageCustomRole(message);
-        }
-        default:
-            throw new Error(`Unknown message type: ${type}`);
-    }
-}
-export const DEFAULT_STOP_SEQUENCES = [HUMAN_PROMPT];
-/**
- * Wrapper around Anthropic large language models.
- *
- * To use you should have the `@anthropic-ai/sdk` package installed, with the
- * `ANTHROPIC_API_KEY` environment variable set.
- *
- * @remarks
- * Any parameters that are valid to be passed to {@link
- * https://console.anthropic.com/docs/api/reference |
- * `anthropic.complete`} can be passed through {@link invocationKwargs},
- * even if not explicitly available on this class.
- * @example
- * ```typescript
- * const model = new ChatAnthropic({
- *   temperature: 0.9,
- *   anthropicApiKey: 'YOUR-API-KEY',
- * });
- * const res = await model.invoke({ input: 'Hello!' });
- * console.log(res);
- * ```
- */
-export class ChatAnthropic extends BaseChatModel {
-    static lc_name() {
-        return "ChatAnthropic";
-    }
-    get lc_secrets() {
-        return {
-            anthropicApiKey: "ANTHROPIC_API_KEY",
-        };
-    }
-    get lc_aliases() {
-        return {
-            modelName: "model",
-        };
-    }
-    constructor(fields) {
-        super(fields ?? {});
-        Object.defineProperty(this, "lc_serializable", {
-            enumerable: true,
-            configurable: true,
-            writable: true,
-            value: true
-        });
-        Object.defineProperty(this, "anthropicApiKey", {
-            enumerable: true,
-            configurable: true,
-            writable: true,
-            value: void 0
-        });
-        Object.defineProperty(this, "apiUrl", {
-            enumerable: true,
-            configurable: true,
-            writable: true,
-            value: void 0
-        });
-        Object.defineProperty(this, "temperature", {
-            enumerable: true,
-            configurable: true,
-            writable: true,
-            value: 1
-        });
-        Object.defineProperty(this, "topK", {
-            enumerable: true,
-            configurable: true,
-            writable: true,
-            value: -1
-        });
-        Object.defineProperty(this, "topP", {
-            enumerable: true,
-            configurable: true,
-            writable: true,
-            value: -1
-        });
-        Object.defineProperty(this, "maxTokensToSample", {
-            enumerable: true,
-            configurable: true,
-            writable: true,
-            value: 2048
-        });
-        Object.defineProperty(this, "modelName", {
-            enumerable: true,
-            configurable: true,
-            writable: true,
-            value: "claude-2"
-        });
-        Object.defineProperty(this, "invocationKwargs", {
-            enumerable: true,
-            configurable: true,
-            writable: true,
-            value: void 0
-        });
-        Object.defineProperty(this, "stopSequences", {
-            enumerable: true,
-            configurable: true,
-            writable: true,
-            value: void 0
-        });
-        Object.defineProperty(this, "streaming", {
-            enumerable: true,
-            configurable: true,
-            writable: true,
-            value: false
-        });
-        Object.defineProperty(this, "clientOptions", {
-            enumerable: true,
-            configurable: true,
-            writable: true,
-            value: void 0
-        });
-        // Used for non-streaming requests
-        Object.defineProperty(this, "batchClient", {
-            enumerable: true,
-            configurable: true,
-            writable: true,
-            value: void 0
-        });
-        // Used for streaming requests
-        Object.defineProperty(this, "streamingClient", {
-            enumerable: true,
-            configurable: true,
-            writable: true,
-            value: void 0
-        });
-        this.anthropicApiKey =
-            fields?.anthropicApiKey ?? getEnvironmentVariable("ANTHROPIC_API_KEY");
-        if (!this.anthropicApiKey) {
-            throw new Error("Anthropic API key not found");
-        }
-        // Support overriding the default API URL (i.e., https://api.anthropic.com)
-        this.apiUrl = fields?.anthropicApiUrl;
-        this.modelName = fields?.modelName ?? this.modelName;
-        this.invocationKwargs = fields?.invocationKwargs ?? {};
-        this.temperature = fields?.temperature ?? this.temperature;
-        this.topK = fields?.topK ?? this.topK;
-        this.topP = fields?.topP ?? this.topP;
-        this.maxTokensToSample =
-            fields?.maxTokensToSample ?? this.maxTokensToSample;
-        this.stopSequences = fields?.stopSequences ?? this.stopSequences;
-        this.streaming = fields?.streaming ?? false;
-        this.clientOptions = fields?.clientOptions ?? {};
-    }
-    /**
-     * Get the parameters used to invoke the model
-     */
-    invocationParams(options) {
-        return {
-            model: this.modelName,
-            temperature: this.temperature,
-            top_k: this.topK,
-            top_p: this.topP,
-            stop_sequences: options?.stop?.concat(DEFAULT_STOP_SEQUENCES) ??
-                this.stopSequences ??
-                DEFAULT_STOP_SEQUENCES,
-            max_tokens_to_sample: this.maxTokensToSample,
-            stream: this.streaming,
-            ...this.invocationKwargs,
-        };
-    }
-    /** @ignore */
-    _identifyingParams() {
-        return {
-            model_name: this.modelName,
-            ...this.invocationParams(),
-        };
-    }
-    /**
-     * Get the identifying parameters for the model
-     */
-    identifyingParams() {
-        return {
-            model_name: this.modelName,
-            ...this.invocationParams(),
-        };
-    }
-    async *_streamResponseChunks(messages, options, runManager) {
-        const params = this.invocationParams(options);
-        const stream = await this.createStreamWithRetry({
-            ...params,
-            prompt: this.formatMessagesAsPrompt(messages),
-        });
-        let modelSent = false;
-        let stopReasonSent = false;
-        for await (const data of stream) {
-            if (options.signal?.aborted) {
-                stream.controller.abort();
-                throw new Error("AbortError: User aborted the request.");
-            }
-            const additional_kwargs = {};
-            if (data.model && !modelSent) {
-                additional_kwargs.model = data.model;
-                modelSent = true;
-            }
-            else if (data.stop_reason && !stopReasonSent) {
-                additional_kwargs.stop_reason = data.stop_reason;
-                stopReasonSent = true;
-            }
-            const delta = data.completion ?? "";
-            yield new ChatGenerationChunk({
-                message: new AIMessageChunk({
-                    content: delta,
-                    additional_kwargs,
-                }),
-                text: delta,
-            });
-            await runManager?.handleLLMNewToken(delta);
-            if (data.stop_reason) {
-                break;
-            }
-        }
-    }
-    /**
-     * Formats messages as a prompt for the model.
-     * @param messages The base messages to format as a prompt.
-     * @returns The formatted prompt.
-     */
-    formatMessagesAsPrompt(messages) {
-        return (messages
-            .map((message) => {
-            const messagePrompt = getAnthropicPromptFromMessage(message);
-            return `${messagePrompt} ${message.content}`;
-        })
-            .join("") + AI_PROMPT);
-    }
-    /** @ignore */
-    async _generate(messages, options, runManager) {
-        if (this.stopSequences && options.stop) {
-            throw new Error(`"stopSequence" parameter found in input and default params`);
-        }
-        const params = this.invocationParams(options);
-        let response;
-        if (params.stream) {
-            response = {
-                completion: "",
-                model: "",
-                stop_reason: "",
-            };
-            const stream = await this._streamResponseChunks(messages, options, runManager);
-            for await (const chunk of stream) {
-                response.completion += chunk.message.content;
-                response.model =
-                    chunk.message.additional_kwargs.model ?? response.model;
-                response.stop_reason =
-                    chunk.message.additional_kwargs.stop_reason ??
-                        response.stop_reason;
-            }
-        }
-        else {
-            response = await this.completionWithRetry({
-                ...params,
-                prompt: this.formatMessagesAsPrompt(messages),
-            }, { signal: options.signal });
-        }
-        const generations = (response.completion ?? "")
-            .split(AI_PROMPT)
-            .map((message) => ({
-            text: message,
-            message: new AIMessage(message),
-        }));
-        return {
-            generations,
-        };
-    }
-    /**
-     * Creates a streaming request with retry.
-     * @param request The parameters for creating a completion.
-     * @returns A streaming request.
-     */
-    async createStreamWithRetry(request) {
-        if (!this.streamingClient) {
-            const options = this.apiUrl ? { baseURL: this.apiUrl } : undefined;
-            this.streamingClient = new Anthropic({
-                ...this.clientOptions,
-                ...options,
-                apiKey: this.anthropicApiKey,
-                maxRetries: 0,
-            });
-        }
-        const makeCompletionRequest = async () => this.streamingClient.completions.create({ ...request, stream: true }, { headers: request.headers });
-        return this.caller.call(makeCompletionRequest);
-    }
-    /** @ignore */
-    async completionWithRetry(request, options) {
-        if (!this.anthropicApiKey) {
-            throw new Error("Missing Anthropic API key.");
-        }
-        if (!this.batchClient) {
-            const options = this.apiUrl ? { baseURL: this.apiUrl } : undefined;
-            this.batchClient = new Anthropic({
-                ...this.clientOptions,
-                ...options,
-                apiKey: this.anthropicApiKey,
-                maxRetries: 0,
-            });
-        }
-        const makeCompletionRequest = async () => this.batchClient.completions.create({ ...request, stream: false }, { headers: request.headers });
-        return this.caller.callWithOptions({ signal: options.signal }, makeCompletionRequest);
-    }
-    _llmType() {
-        return "anthropic";
-    }
-    /** @ignore */
-    _combineLLMOutput() {
-        return [];
-    }
-}
+export * from "@langchain/anthropic";

package/dist/chat_models/base.cjs CHANGED Viewed

@@ -14,4 +14,4 @@ var __exportStar = (this && this.__exportStar) || function(m, exports) {
     for (var p in m) if (p !== "default" && !Object.prototype.hasOwnProperty.call(exports, p)) __createBinding(exports, m, p);
 };
 Object.defineProperty(exports, "__esModule", { value: true });
-__exportStar(require("langchain-core/chat_model"), exports);
+__exportStar(require("langchain-core/language_models/chat_models"), exports);

package/dist/chat_models/base.d.ts CHANGED Viewed

	@@ -1 +1 @@
1	- export * from "langchain-core/~~chat_model~~";
1	+ export * from "langchain-core/language_models/chat_models";

package/dist/chat_models/base.js CHANGED Viewed

	@@ -1 +1 @@
1	- export * from "langchain-core/~~chat_model~~";
1	+ export * from "langchain-core/language_models/chat_models";

package/dist/chat_models/bedrock/web.cjs CHANGED Viewed

@@ -321,11 +321,31 @@ class BedrockChat extends base_js_1.SimpleChatModel {
     }
     // eslint-disable-next-line @typescript-eslint/no-explicit-any
     _readChunks(reader) {
+        function _concatChunks(a, b) {
+            const newBuffer = new Uint8Array(a.length + b.length);
+            newBuffer.set(a);
+            newBuffer.set(b, a.length);
+            return newBuffer;
+        }
+        function getMessageLength(buffer) {
+            if (buffer.byteLength === 0)
+                return 0;
+            const view = new DataView(buffer.buffer, buffer.byteOffset, buffer.byteLength);
+            return view.getUint32(0, false);
+        }
         return {
             async *[Symbol.asyncIterator]() {
                 let readResult = await reader.read();
+                let buffer = new Uint8Array(0);
                 while (!readResult.done) {
-                    yield readResult.value;
+                    const chunk = readResult.value;
+                    buffer = _concatChunks(buffer, chunk);
+                    let messageLength = getMessageLength(buffer);
+                    while (buffer.byteLength > 0 && buffer.byteLength >= messageLength) {
+                        yield buffer.slice(0, messageLength);
+                        buffer = buffer.slice(messageLength);
+                        messageLength = getMessageLength(buffer);
+                    }
                     readResult = await reader.read();
                 }
             },

package/dist/chat_models/bedrock/web.d.ts CHANGED Viewed

@@ -72,7 +72,7 @@ export declare class BedrockChat extends SimpleChatModel implements BaseBedrockI
     }): Promise<Response>;
     _streamResponseChunks(messages: BaseMessage[], options: this["ParsedCallOptions"], runManager?: CallbackManagerForLLMRun): AsyncGenerator<ChatGenerationChunk>;
     _readChunks(reader: any): {
-        [Symbol.asyncIterator](): AsyncGenerator<any, void, unknown>;
+        [Symbol.asyncIterator](): AsyncGenerator<Uint8Array, void, unknown>;
     };
     _combineLLMOutput(): {};
 }

package/dist/chat_models/bedrock/web.js CHANGED Viewed

@@ -316,11 +316,31 @@ export class BedrockChat extends SimpleChatModel {
     }
     // eslint-disable-next-line @typescript-eslint/no-explicit-any
     _readChunks(reader) {
+        function _concatChunks(a, b) {
+            const newBuffer = new Uint8Array(a.length + b.length);
+            newBuffer.set(a);
+            newBuffer.set(b, a.length);
+            return newBuffer;
+        }
+        function getMessageLength(buffer) {
+            if (buffer.byteLength === 0)
+                return 0;
+            const view = new DataView(buffer.buffer, buffer.byteOffset, buffer.byteLength);
+            return view.getUint32(0, false);
+        }
         return {
             async *[Symbol.asyncIterator]() {
                 let readResult = await reader.read();
+                let buffer = new Uint8Array(0);
                 while (!readResult.done) {
-                    yield readResult.value;
+                    const chunk = readResult.value;
+                    buffer = _concatChunks(buffer, chunk);
+                    let messageLength = getMessageLength(buffer);
+                    while (buffer.byteLength > 0 && buffer.byteLength >= messageLength) {
+                        yield buffer.slice(0, messageLength);
+                        buffer = buffer.slice(messageLength);
+                        messageLength = getMessageLength(buffer);
+                    }
                     readResult = await reader.read();
                 }
             },

package/dist/document.cjs CHANGED Viewed

@@ -1,5 +1,5 @@
 "use strict";
 Object.defineProperty(exports, "__esModule", { value: true });
 exports.Document = void 0;
-var document_1 = require("langchain-core/schema/document");
-Object.defineProperty(exports, "Document", { enumerable: true, get: function () { return document_1.Document; } });
+var documents_1 = require("langchain-core/documents");
+Object.defineProperty(exports, "Document", { enumerable: true, get: function () { return documents_1.Document; } });

package/dist/document.d.ts CHANGED Viewed

	@@ -1 +1 @@
1	- export { type DocumentInput, Document } from "langchain-core/~~schema/document~~";
1	+ export { type DocumentInput, Document } from "langchain-core/documents";

package/dist/document.js CHANGED Viewed

	@@ -1 +1 @@
1	- export { Document } from "langchain-core/~~schema/document~~";
1	+ export { Document } from "langchain-core/documents";

package/dist/document_loaders/web/azure_blob_storage_file.d.ts CHANGED Viewed

@@ -47,6 +47,6 @@ export declare class AzureBlobStorageFileLoader extends BaseDocumentLoader {
      * are returned, and the temporary directory is deleted.
      * @returns An array of documents loaded from the file in Azure Blob Storage.
      */
-    load(): Promise<import("langchain-core/schema/document").Document<Record<string, any>>[]>;
+    load(): Promise<import("langchain-core/documents").Document<Record<string, any>>[]>;
 }
 export {};

package/dist/document_loaders/web/github.cjs CHANGED Viewed

@@ -216,6 +216,22 @@ class GithubRepoLoader extends base_js_1.BaseDocumentLoader {
         }
         return documents;
     }
+    /**
+     * Asynchronously streams documents from the entire GitHub repository.
+     * It is suitable for situations where processing large repositories in a memory-efficient manner is required.
+     * @yields Yields a Promise that resolves to a Document object for each file or submodule content found in the repository.
+     */
+    async *loadAsStream() {
+        this.log(`Loading documents from ${this.baseUrl}/${this.owner}/${this.repo}/${this.initialPath}...`);
+        yield* await this.processRepoAsStream(this.initialPath);
+        if (!this.processSubmodules) {
+            return;
+        }
+        await this.getSubmoduleInfo();
+        for (const submoduleInfo of this.submoduleInfos) {
+            yield* await this.loadSubmoduleAsStream(submoduleInfo);
+        }
+    }
     /**
      * Loads the information about Git submodules from the repository, if available.
      */
@@ -323,6 +339,37 @@ class GithubRepoLoader extends base_js_1.BaseDocumentLoader {
             }).load();
         }
     }
+    /**
+     * Asynchronously processes and streams the contents of a specified submodule in the GitHub repository.
+     * @param submoduleInfo the info about the submodule to be loaded
+     * @yields Yields a Promise that resolves to a Document object for each file found in the submodule.
+     */
+    async *loadSubmoduleAsStream(submoduleInfo) {
+        if (!submoduleInfo.url.startsWith(this.baseUrl)) {
+            this.log(`Ignoring external submodule ${submoduleInfo.url}.`);
+            yield* [];
+        }
+        if (!submoduleInfo.path.startsWith(this.initialPath)) {
+            this.log(`Ignoring submodule ${submoduleInfo.url}, as it is not on initial path.`);
+            yield* [];
+        }
+        this.log(`Accessing submodule ${submoduleInfo.name} (${submoduleInfo.url})...`);
+        const submoduleLoader = new GithubRepoLoader(submoduleInfo.url, {
+            accessToken: this.accessToken,
+            baseUrl: this.baseUrl,
+            apiUrl: this.apiUrl,
+            branch: submoduleInfo.ref,
+            recursive: this.recursive,
+            processSubmodules: this.processSubmodules,
+            unknown: this.unknown,
+            ignoreFiles: this.ignoreFiles,
+            ignorePaths: this.ignorePaths,
+            verbose: this.verbose,
+            maxConcurrency: this.maxConcurrency,
+            maxRetries: this.maxRetries,
+        });
+        yield* await submoduleLoader.processRepoAsStream(submoduleInfo.path);
+    }
     /**
      * Determines whether a file or directory should be ignored based on its
      * path and type.
@@ -414,6 +461,35 @@ class GithubRepoLoader extends base_js_1.BaseDocumentLoader {
             return Promise.reject(error);
         }
     }
+    /**
+     * Asynchronously processes the contents of the entire GitHub repository,
+     * streaming each file as a Document object.
+     * @param path The path of the directory to process.
+     * @yields Yields a Promise that resolves to a Document object for each file found in the repository.
+     */
+    async *processRepoAsStream(path) {
+        const files = await this.fetchRepoFiles(path);
+        for (const file of files) {
+            if (this.shouldIgnore(file.path, file.type)) {
+                continue;
+            }
+            if (file.type === "file") {
+                try {
+                    const fileResponse = await this.fetchFileContentWrapper(file);
+                    yield new document_js_1.Document({
+                        pageContent: fileResponse.contents,
+                        metadata: fileResponse.metadata,
+                    });
+                }
+                catch (error) {
+                    this.handleError(`Failed to fetch file content: ${file.path}, ${error}`);
+                }
+            }
+            else if (this.recursive) {
+                yield* await this.processDirectoryAsStream(file.path);
+            }
+        }
+    }
     /**
      * Fetches the contents of a directory and maps the file / directory paths
      * to promises that will fetch the file / directory contents.
@@ -430,6 +506,35 @@ class GithubRepoLoader extends base_js_1.BaseDocumentLoader {
             return Promise.reject(error);
         }
     }
+    /**
+     * Asynchronously processes the contents of a given directory in the GitHub repository,
+     * streaming each file as a Document object.
+     * @param path The path of the directory to process.
+     * @yields Yields a Promise that resolves to a Document object for each file in the directory.
+     */
+    async *processDirectoryAsStream(path) {
+        const files = await this.fetchRepoFiles(path);
+        for (const file of files) {
+            if (this.shouldIgnore(file.path, file.type)) {
+                continue;
+            }
+            if (file.type === "file") {
+                try {
+                    const fileResponse = await this.fetchFileContentWrapper(file);
+                    yield new document_js_1.Document({
+                        pageContent: fileResponse.contents,
+                        metadata: fileResponse.metadata,
+                    });
+                }
+                catch {
+                    this.handleError(`Failed to fetch file content: ${file.path}`);
+                }
+            }
+            else if (this.recursive) {
+                yield* await this.processDirectoryAsStream(file.path);
+            }
+        }
+    }
     /**
      * Fetches the files from a GitHub repository.
      * If the path denotes a single file, the resulting array contains only one element.

package/dist/document_loaders/web/github.d.ts CHANGED Viewed

@@ -99,6 +99,12 @@ export declare class GithubRepoLoader extends BaseDocumentLoader implements Gith
      * @returns A promise that resolves to an array of Document instances.
      */
     load(): Promise<Document[]>;
+    /**
+     * Asynchronously streams documents from the entire GitHub repository.
+     * It is suitable for situations where processing large repositories in a memory-efficient manner is required.
+     * @yields Yields a Promise that resolves to a Document object for each file or submodule content found in the repository.
+     */
+    loadAsStream(): AsyncGenerator<Document, void, undefined>;
     /**
      * Loads the information about Git submodules from the repository, if available.
      */
@@ -115,6 +121,12 @@ export declare class GithubRepoLoader extends BaseDocumentLoader implements Gith
      * @param submoduleInfo the info about the submodule to be loaded
      */
     private loadSubmodule;
+    /**
+     * Asynchronously processes and streams the contents of a specified submodule in the GitHub repository.
+     * @param submoduleInfo the info about the submodule to be loaded
+     * @yields Yields a Promise that resolves to a Document object for each file found in the submodule.
+     */
+    private loadSubmoduleAsStream;
     /**
      * Determines whether a file or directory should be ignored based on its
      * path and type.
@@ -137,6 +149,13 @@ export declare class GithubRepoLoader extends BaseDocumentLoader implements Gith
      * Begins the process of fetching the contents of the repository
      */
     private processRepo;
+    /**
+     * Asynchronously processes the contents of the entire GitHub repository,
+     * streaming each file as a Document object.
+     * @param path The path of the directory to process.
+     * @yields Yields a Promise that resolves to a Document object for each file found in the repository.
+     */
+    private processRepoAsStream;
     /**
      * Fetches the contents of a directory and maps the file / directory paths
      * to promises that will fetch the file / directory contents.
@@ -144,6 +163,13 @@ export declare class GithubRepoLoader extends BaseDocumentLoader implements Gith
      * @returns A promise that resolves to an array of promises that will fetch the file / directory contents.
      */
     private processDirectory;
+    /**
+     * Asynchronously processes the contents of a given directory in the GitHub repository,
+     * streaming each file as a Document object.
+     * @param path The path of the directory to process.
+     * @yields Yields a Promise that resolves to a Document object for each file in the directory.
+     */
+    private processDirectoryAsStream;
     /**
      * Fetches the files from a GitHub repository.
      * If the path denotes a single file, the resulting array contains only one element.