langchain 0.0.196 → 0.0.197-rc.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (136) hide show
  1. package/LICENSE +21 -0
  2. package/dist/agents/openai/index.cjs +6 -2
  3. package/dist/agents/openai/index.js +6 -2
  4. package/dist/agents/toolkits/conversational_retrieval/token_buffer_memory.d.ts +1 -1
  5. package/dist/base_language/count_tokens.cjs +4 -4
  6. package/dist/base_language/count_tokens.d.ts +1 -1
  7. package/dist/base_language/count_tokens.js +1 -1
  8. package/dist/base_language/index.cjs +3 -3
  9. package/dist/base_language/index.d.ts +1 -1
  10. package/dist/base_language/index.js +1 -1
  11. package/dist/cache/base.cjs +1 -1
  12. package/dist/cache/base.d.ts +1 -1
  13. package/dist/cache/base.js +1 -1
  14. package/dist/cache/index.cjs +2 -2
  15. package/dist/cache/index.d.ts +1 -1
  16. package/dist/cache/index.js +1 -1
  17. package/dist/callbacks/handlers/console.cjs +1 -1
  18. package/dist/callbacks/handlers/console.d.ts +1 -1
  19. package/dist/callbacks/handlers/console.js +1 -1
  20. package/dist/callbacks/handlers/initialize.cjs +1 -1
  21. package/dist/callbacks/handlers/initialize.d.ts +1 -1
  22. package/dist/callbacks/handlers/initialize.js +1 -1
  23. package/dist/callbacks/handlers/log_stream.cjs +1 -1
  24. package/dist/callbacks/handlers/log_stream.d.ts +1 -1
  25. package/dist/callbacks/handlers/log_stream.js +1 -1
  26. package/dist/callbacks/handlers/run_collector.cjs +1 -1
  27. package/dist/callbacks/handlers/run_collector.d.ts +1 -1
  28. package/dist/callbacks/handlers/run_collector.js +1 -1
  29. package/dist/callbacks/handlers/tracer.cjs +1 -1
  30. package/dist/callbacks/handlers/tracer.d.ts +1 -1
  31. package/dist/callbacks/handlers/tracer.js +1 -1
  32. package/dist/callbacks/handlers/tracer_langchain.cjs +1 -1
  33. package/dist/callbacks/handlers/tracer_langchain.d.ts +1 -1
  34. package/dist/callbacks/handlers/tracer_langchain.js +1 -1
  35. package/dist/callbacks/handlers/tracer_langchain_v1.cjs +1 -1
  36. package/dist/callbacks/handlers/tracer_langchain_v1.d.ts +1 -1
  37. package/dist/callbacks/handlers/tracer_langchain_v1.js +1 -1
  38. package/dist/chains/openai_functions/structured_output.cjs +1 -1
  39. package/dist/chains/openai_functions/structured_output.d.ts +1 -1
  40. package/dist/chains/openai_functions/structured_output.js +1 -1
  41. package/dist/chat_models/anthropic.cjs +15 -348
  42. package/dist/chat_models/anthropic.d.ts +1 -156
  43. package/dist/chat_models/anthropic.js +1 -346
  44. package/dist/chat_models/base.cjs +1 -1
  45. package/dist/chat_models/base.d.ts +1 -1
  46. package/dist/chat_models/base.js +1 -1
  47. package/dist/chat_models/bedrock/web.cjs +21 -1
  48. package/dist/chat_models/bedrock/web.d.ts +1 -1
  49. package/dist/chat_models/bedrock/web.js +21 -1
  50. package/dist/document.cjs +2 -2
  51. package/dist/document.d.ts +1 -1
  52. package/dist/document.js +1 -1
  53. package/dist/document_loaders/web/azure_blob_storage_file.d.ts +1 -1
  54. package/dist/document_loaders/web/github.cjs +105 -0
  55. package/dist/document_loaders/web/github.d.ts +26 -0
  56. package/dist/document_loaders/web/github.js +105 -0
  57. package/dist/document_loaders/web/s3.d.ts +1 -1
  58. package/dist/embeddings/base.cjs +1 -1
  59. package/dist/embeddings/base.d.ts +1 -1
  60. package/dist/embeddings/base.js +1 -1
  61. package/dist/embeddings/cache_backed.cjs +1 -1
  62. package/dist/embeddings/cache_backed.js +1 -1
  63. package/dist/experimental/plan_and_execute/prompt.d.ts +1 -1
  64. package/dist/llms/base.cjs +1 -1
  65. package/dist/llms/base.d.ts +1 -1
  66. package/dist/llms/base.js +1 -1
  67. package/dist/llms/bedrock/web.cjs +21 -1
  68. package/dist/llms/bedrock/web.d.ts +1 -1
  69. package/dist/llms/bedrock/web.js +21 -1
  70. package/dist/memory/base.cjs +2 -2
  71. package/dist/memory/base.d.ts +2 -2
  72. package/dist/memory/base.js +2 -2
  73. package/dist/output_parsers/list.cjs +4 -122
  74. package/dist/output_parsers/list.d.ts +1 -57
  75. package/dist/output_parsers/list.js +1 -119
  76. package/dist/output_parsers/openai_functions.cjs +1 -1
  77. package/dist/output_parsers/openai_functions.d.ts +1 -1
  78. package/dist/output_parsers/openai_functions.js +1 -1
  79. package/dist/prompts/base.cjs +8 -8
  80. package/dist/prompts/base.d.ts +3 -3
  81. package/dist/prompts/base.js +3 -3
  82. package/dist/prompts/chat.cjs +13 -15
  83. package/dist/prompts/chat.d.ts +2 -1
  84. package/dist/prompts/chat.js +2 -1
  85. package/dist/prompts/few_shot.cjs +4 -15
  86. package/dist/prompts/few_shot.d.ts +1 -1
  87. package/dist/prompts/few_shot.js +1 -1
  88. package/dist/prompts/index.cjs +2 -2
  89. package/dist/prompts/index.d.ts +1 -1
  90. package/dist/prompts/index.js +1 -1
  91. package/dist/prompts/pipeline.cjs +3 -15
  92. package/dist/prompts/pipeline.d.ts +1 -1
  93. package/dist/prompts/pipeline.js +1 -1
  94. package/dist/prompts/prompt.cjs +3 -15
  95. package/dist/prompts/prompt.d.ts +1 -1
  96. package/dist/prompts/prompt.js +1 -1
  97. package/dist/prompts/selectors/LengthBasedExampleSelector.cjs +3 -15
  98. package/dist/prompts/selectors/LengthBasedExampleSelector.d.ts +1 -1
  99. package/dist/prompts/selectors/LengthBasedExampleSelector.js +1 -1
  100. package/dist/prompts/selectors/SemanticSimilarityExampleSelector.cjs +1 -1
  101. package/dist/prompts/selectors/SemanticSimilarityExampleSelector.d.ts +1 -1
  102. package/dist/prompts/selectors/SemanticSimilarityExampleSelector.js +1 -1
  103. package/dist/prompts/selectors/conditional.cjs +6 -15
  104. package/dist/prompts/selectors/conditional.d.ts +1 -1
  105. package/dist/prompts/selectors/conditional.js +1 -1
  106. package/dist/prompts/serde.cjs +0 -15
  107. package/dist/prompts/serde.d.ts +1 -1
  108. package/dist/prompts/serde.js +1 -1
  109. package/dist/prompts/template.cjs +9 -15
  110. package/dist/prompts/template.d.ts +1 -1
  111. package/dist/prompts/template.js +1 -1
  112. package/dist/schema/document.cjs +3 -3
  113. package/dist/schema/document.d.ts +1 -1
  114. package/dist/schema/document.js +1 -1
  115. package/dist/schema/index.cjs +12 -12
  116. package/dist/schema/index.d.ts +10 -10
  117. package/dist/schema/index.js +7 -7
  118. package/dist/schema/output_parser.cjs +1 -1
  119. package/dist/schema/output_parser.d.ts +1 -1
  120. package/dist/schema/output_parser.js +1 -1
  121. package/dist/schema/retriever.cjs +1 -1
  122. package/dist/schema/retriever.d.ts +1 -1
  123. package/dist/schema/retriever.js +1 -1
  124. package/dist/schema/storage.cjs +1 -1
  125. package/dist/schema/storage.d.ts +1 -1
  126. package/dist/schema/storage.js +1 -1
  127. package/dist/util/async_caller.cjs +1 -1
  128. package/dist/util/async_caller.d.ts +1 -1
  129. package/dist/util/async_caller.js +1 -1
  130. package/dist/vectorstores/momento_vector_index.cjs +39 -0
  131. package/dist/vectorstores/momento_vector_index.d.ts +17 -1
  132. package/dist/vectorstores/momento_vector_index.js +40 -1
  133. package/dist/vectorstores/mongodb_atlas.cjs +22 -2
  134. package/dist/vectorstores/mongodb_atlas.d.ts +13 -0
  135. package/dist/vectorstores/mongodb_atlas.js +22 -2
  136. package/package.json +9 -8
@@ -1,346 +1 @@
1
- import { Anthropic, AI_PROMPT, HUMAN_PROMPT, } from "@anthropic-ai/sdk";
2
- import { AIMessage, AIMessageChunk, ChatGenerationChunk, ChatMessage, } from "../schema/index.js";
3
- import { getEnvironmentVariable } from "../util/env.js";
4
- import { BaseChatModel } from "./base.js";
5
- /**
6
- * Extracts the custom role of a generic chat message.
7
- * @param message The chat message from which to extract the custom role.
8
- * @returns The custom role of the chat message.
9
- */
10
- function extractGenericMessageCustomRole(message) {
11
- if (message.role !== AI_PROMPT &&
12
- message.role !== HUMAN_PROMPT &&
13
- message.role !== "") {
14
- console.warn(`Unknown message role: ${message.role}`);
15
- }
16
- return message.role;
17
- }
18
- /**
19
- * Gets the Anthropic prompt from a base message.
20
- * @param message The base message from which to get the Anthropic prompt.
21
- * @returns The Anthropic prompt from the base message.
22
- */
23
- function getAnthropicPromptFromMessage(message) {
24
- const type = message._getType();
25
- switch (type) {
26
- case "ai":
27
- return AI_PROMPT;
28
- case "human":
29
- return HUMAN_PROMPT;
30
- case "system":
31
- return "";
32
- case "generic": {
33
- if (!ChatMessage.isInstance(message))
34
- throw new Error("Invalid generic chat message");
35
- return extractGenericMessageCustomRole(message);
36
- }
37
- default:
38
- throw new Error(`Unknown message type: ${type}`);
39
- }
40
- }
41
- export const DEFAULT_STOP_SEQUENCES = [HUMAN_PROMPT];
42
- /**
43
- * Wrapper around Anthropic large language models.
44
- *
45
- * To use you should have the `@anthropic-ai/sdk` package installed, with the
46
- * `ANTHROPIC_API_KEY` environment variable set.
47
- *
48
- * @remarks
49
- * Any parameters that are valid to be passed to {@link
50
- * https://console.anthropic.com/docs/api/reference |
51
- * `anthropic.complete`} can be passed through {@link invocationKwargs},
52
- * even if not explicitly available on this class.
53
- * @example
54
- * ```typescript
55
- * const model = new ChatAnthropic({
56
- * temperature: 0.9,
57
- * anthropicApiKey: 'YOUR-API-KEY',
58
- * });
59
- * const res = await model.invoke({ input: 'Hello!' });
60
- * console.log(res);
61
- * ```
62
- */
63
- export class ChatAnthropic extends BaseChatModel {
64
- static lc_name() {
65
- return "ChatAnthropic";
66
- }
67
- get lc_secrets() {
68
- return {
69
- anthropicApiKey: "ANTHROPIC_API_KEY",
70
- };
71
- }
72
- get lc_aliases() {
73
- return {
74
- modelName: "model",
75
- };
76
- }
77
- constructor(fields) {
78
- super(fields ?? {});
79
- Object.defineProperty(this, "lc_serializable", {
80
- enumerable: true,
81
- configurable: true,
82
- writable: true,
83
- value: true
84
- });
85
- Object.defineProperty(this, "anthropicApiKey", {
86
- enumerable: true,
87
- configurable: true,
88
- writable: true,
89
- value: void 0
90
- });
91
- Object.defineProperty(this, "apiUrl", {
92
- enumerable: true,
93
- configurable: true,
94
- writable: true,
95
- value: void 0
96
- });
97
- Object.defineProperty(this, "temperature", {
98
- enumerable: true,
99
- configurable: true,
100
- writable: true,
101
- value: 1
102
- });
103
- Object.defineProperty(this, "topK", {
104
- enumerable: true,
105
- configurable: true,
106
- writable: true,
107
- value: -1
108
- });
109
- Object.defineProperty(this, "topP", {
110
- enumerable: true,
111
- configurable: true,
112
- writable: true,
113
- value: -1
114
- });
115
- Object.defineProperty(this, "maxTokensToSample", {
116
- enumerable: true,
117
- configurable: true,
118
- writable: true,
119
- value: 2048
120
- });
121
- Object.defineProperty(this, "modelName", {
122
- enumerable: true,
123
- configurable: true,
124
- writable: true,
125
- value: "claude-2"
126
- });
127
- Object.defineProperty(this, "invocationKwargs", {
128
- enumerable: true,
129
- configurable: true,
130
- writable: true,
131
- value: void 0
132
- });
133
- Object.defineProperty(this, "stopSequences", {
134
- enumerable: true,
135
- configurable: true,
136
- writable: true,
137
- value: void 0
138
- });
139
- Object.defineProperty(this, "streaming", {
140
- enumerable: true,
141
- configurable: true,
142
- writable: true,
143
- value: false
144
- });
145
- Object.defineProperty(this, "clientOptions", {
146
- enumerable: true,
147
- configurable: true,
148
- writable: true,
149
- value: void 0
150
- });
151
- // Used for non-streaming requests
152
- Object.defineProperty(this, "batchClient", {
153
- enumerable: true,
154
- configurable: true,
155
- writable: true,
156
- value: void 0
157
- });
158
- // Used for streaming requests
159
- Object.defineProperty(this, "streamingClient", {
160
- enumerable: true,
161
- configurable: true,
162
- writable: true,
163
- value: void 0
164
- });
165
- this.anthropicApiKey =
166
- fields?.anthropicApiKey ?? getEnvironmentVariable("ANTHROPIC_API_KEY");
167
- if (!this.anthropicApiKey) {
168
- throw new Error("Anthropic API key not found");
169
- }
170
- // Support overriding the default API URL (i.e., https://api.anthropic.com)
171
- this.apiUrl = fields?.anthropicApiUrl;
172
- this.modelName = fields?.modelName ?? this.modelName;
173
- this.invocationKwargs = fields?.invocationKwargs ?? {};
174
- this.temperature = fields?.temperature ?? this.temperature;
175
- this.topK = fields?.topK ?? this.topK;
176
- this.topP = fields?.topP ?? this.topP;
177
- this.maxTokensToSample =
178
- fields?.maxTokensToSample ?? this.maxTokensToSample;
179
- this.stopSequences = fields?.stopSequences ?? this.stopSequences;
180
- this.streaming = fields?.streaming ?? false;
181
- this.clientOptions = fields?.clientOptions ?? {};
182
- }
183
- /**
184
- * Get the parameters used to invoke the model
185
- */
186
- invocationParams(options) {
187
- return {
188
- model: this.modelName,
189
- temperature: this.temperature,
190
- top_k: this.topK,
191
- top_p: this.topP,
192
- stop_sequences: options?.stop?.concat(DEFAULT_STOP_SEQUENCES) ??
193
- this.stopSequences ??
194
- DEFAULT_STOP_SEQUENCES,
195
- max_tokens_to_sample: this.maxTokensToSample,
196
- stream: this.streaming,
197
- ...this.invocationKwargs,
198
- };
199
- }
200
- /** @ignore */
201
- _identifyingParams() {
202
- return {
203
- model_name: this.modelName,
204
- ...this.invocationParams(),
205
- };
206
- }
207
- /**
208
- * Get the identifying parameters for the model
209
- */
210
- identifyingParams() {
211
- return {
212
- model_name: this.modelName,
213
- ...this.invocationParams(),
214
- };
215
- }
216
- async *_streamResponseChunks(messages, options, runManager) {
217
- const params = this.invocationParams(options);
218
- const stream = await this.createStreamWithRetry({
219
- ...params,
220
- prompt: this.formatMessagesAsPrompt(messages),
221
- });
222
- let modelSent = false;
223
- let stopReasonSent = false;
224
- for await (const data of stream) {
225
- if (options.signal?.aborted) {
226
- stream.controller.abort();
227
- throw new Error("AbortError: User aborted the request.");
228
- }
229
- const additional_kwargs = {};
230
- if (data.model && !modelSent) {
231
- additional_kwargs.model = data.model;
232
- modelSent = true;
233
- }
234
- else if (data.stop_reason && !stopReasonSent) {
235
- additional_kwargs.stop_reason = data.stop_reason;
236
- stopReasonSent = true;
237
- }
238
- const delta = data.completion ?? "";
239
- yield new ChatGenerationChunk({
240
- message: new AIMessageChunk({
241
- content: delta,
242
- additional_kwargs,
243
- }),
244
- text: delta,
245
- });
246
- await runManager?.handleLLMNewToken(delta);
247
- if (data.stop_reason) {
248
- break;
249
- }
250
- }
251
- }
252
- /**
253
- * Formats messages as a prompt for the model.
254
- * @param messages The base messages to format as a prompt.
255
- * @returns The formatted prompt.
256
- */
257
- formatMessagesAsPrompt(messages) {
258
- return (messages
259
- .map((message) => {
260
- const messagePrompt = getAnthropicPromptFromMessage(message);
261
- return `${messagePrompt} ${message.content}`;
262
- })
263
- .join("") + AI_PROMPT);
264
- }
265
- /** @ignore */
266
- async _generate(messages, options, runManager) {
267
- if (this.stopSequences && options.stop) {
268
- throw new Error(`"stopSequence" parameter found in input and default params`);
269
- }
270
- const params = this.invocationParams(options);
271
- let response;
272
- if (params.stream) {
273
- response = {
274
- completion: "",
275
- model: "",
276
- stop_reason: "",
277
- };
278
- const stream = await this._streamResponseChunks(messages, options, runManager);
279
- for await (const chunk of stream) {
280
- response.completion += chunk.message.content;
281
- response.model =
282
- chunk.message.additional_kwargs.model ?? response.model;
283
- response.stop_reason =
284
- chunk.message.additional_kwargs.stop_reason ??
285
- response.stop_reason;
286
- }
287
- }
288
- else {
289
- response = await this.completionWithRetry({
290
- ...params,
291
- prompt: this.formatMessagesAsPrompt(messages),
292
- }, { signal: options.signal });
293
- }
294
- const generations = (response.completion ?? "")
295
- .split(AI_PROMPT)
296
- .map((message) => ({
297
- text: message,
298
- message: new AIMessage(message),
299
- }));
300
- return {
301
- generations,
302
- };
303
- }
304
- /**
305
- * Creates a streaming request with retry.
306
- * @param request The parameters for creating a completion.
307
- * @returns A streaming request.
308
- */
309
- async createStreamWithRetry(request) {
310
- if (!this.streamingClient) {
311
- const options = this.apiUrl ? { baseURL: this.apiUrl } : undefined;
312
- this.streamingClient = new Anthropic({
313
- ...this.clientOptions,
314
- ...options,
315
- apiKey: this.anthropicApiKey,
316
- maxRetries: 0,
317
- });
318
- }
319
- const makeCompletionRequest = async () => this.streamingClient.completions.create({ ...request, stream: true }, { headers: request.headers });
320
- return this.caller.call(makeCompletionRequest);
321
- }
322
- /** @ignore */
323
- async completionWithRetry(request, options) {
324
- if (!this.anthropicApiKey) {
325
- throw new Error("Missing Anthropic API key.");
326
- }
327
- if (!this.batchClient) {
328
- const options = this.apiUrl ? { baseURL: this.apiUrl } : undefined;
329
- this.batchClient = new Anthropic({
330
- ...this.clientOptions,
331
- ...options,
332
- apiKey: this.anthropicApiKey,
333
- maxRetries: 0,
334
- });
335
- }
336
- const makeCompletionRequest = async () => this.batchClient.completions.create({ ...request, stream: false }, { headers: request.headers });
337
- return this.caller.callWithOptions({ signal: options.signal }, makeCompletionRequest);
338
- }
339
- _llmType() {
340
- return "anthropic";
341
- }
342
- /** @ignore */
343
- _combineLLMOutput() {
344
- return [];
345
- }
346
- }
1
+ export * from "@langchain/anthropic";
@@ -14,4 +14,4 @@ var __exportStar = (this && this.__exportStar) || function(m, exports) {
14
14
  for (var p in m) if (p !== "default" && !Object.prototype.hasOwnProperty.call(exports, p)) __createBinding(exports, m, p);
15
15
  };
16
16
  Object.defineProperty(exports, "__esModule", { value: true });
17
- __exportStar(require("langchain-core/chat_model"), exports);
17
+ __exportStar(require("langchain-core/language_models/chat_models"), exports);
@@ -1 +1 @@
1
- export * from "langchain-core/chat_model";
1
+ export * from "langchain-core/language_models/chat_models";
@@ -1 +1 @@
1
- export * from "langchain-core/chat_model";
1
+ export * from "langchain-core/language_models/chat_models";
@@ -321,11 +321,31 @@ class BedrockChat extends base_js_1.SimpleChatModel {
321
321
  }
322
322
  // eslint-disable-next-line @typescript-eslint/no-explicit-any
323
323
  _readChunks(reader) {
324
+ function _concatChunks(a, b) {
325
+ const newBuffer = new Uint8Array(a.length + b.length);
326
+ newBuffer.set(a);
327
+ newBuffer.set(b, a.length);
328
+ return newBuffer;
329
+ }
330
+ function getMessageLength(buffer) {
331
+ if (buffer.byteLength === 0)
332
+ return 0;
333
+ const view = new DataView(buffer.buffer, buffer.byteOffset, buffer.byteLength);
334
+ return view.getUint32(0, false);
335
+ }
324
336
  return {
325
337
  async *[Symbol.asyncIterator]() {
326
338
  let readResult = await reader.read();
339
+ let buffer = new Uint8Array(0);
327
340
  while (!readResult.done) {
328
- yield readResult.value;
341
+ const chunk = readResult.value;
342
+ buffer = _concatChunks(buffer, chunk);
343
+ let messageLength = getMessageLength(buffer);
344
+ while (buffer.byteLength > 0 && buffer.byteLength >= messageLength) {
345
+ yield buffer.slice(0, messageLength);
346
+ buffer = buffer.slice(messageLength);
347
+ messageLength = getMessageLength(buffer);
348
+ }
329
349
  readResult = await reader.read();
330
350
  }
331
351
  },
@@ -72,7 +72,7 @@ export declare class BedrockChat extends SimpleChatModel implements BaseBedrockI
72
72
  }): Promise<Response>;
73
73
  _streamResponseChunks(messages: BaseMessage[], options: this["ParsedCallOptions"], runManager?: CallbackManagerForLLMRun): AsyncGenerator<ChatGenerationChunk>;
74
74
  _readChunks(reader: any): {
75
- [Symbol.asyncIterator](): AsyncGenerator<any, void, unknown>;
75
+ [Symbol.asyncIterator](): AsyncGenerator<Uint8Array, void, unknown>;
76
76
  };
77
77
  _combineLLMOutput(): {};
78
78
  }
@@ -316,11 +316,31 @@ export class BedrockChat extends SimpleChatModel {
316
316
  }
317
317
  // eslint-disable-next-line @typescript-eslint/no-explicit-any
318
318
  _readChunks(reader) {
319
+ function _concatChunks(a, b) {
320
+ const newBuffer = new Uint8Array(a.length + b.length);
321
+ newBuffer.set(a);
322
+ newBuffer.set(b, a.length);
323
+ return newBuffer;
324
+ }
325
+ function getMessageLength(buffer) {
326
+ if (buffer.byteLength === 0)
327
+ return 0;
328
+ const view = new DataView(buffer.buffer, buffer.byteOffset, buffer.byteLength);
329
+ return view.getUint32(0, false);
330
+ }
319
331
  return {
320
332
  async *[Symbol.asyncIterator]() {
321
333
  let readResult = await reader.read();
334
+ let buffer = new Uint8Array(0);
322
335
  while (!readResult.done) {
323
- yield readResult.value;
336
+ const chunk = readResult.value;
337
+ buffer = _concatChunks(buffer, chunk);
338
+ let messageLength = getMessageLength(buffer);
339
+ while (buffer.byteLength > 0 && buffer.byteLength >= messageLength) {
340
+ yield buffer.slice(0, messageLength);
341
+ buffer = buffer.slice(messageLength);
342
+ messageLength = getMessageLength(buffer);
343
+ }
324
344
  readResult = await reader.read();
325
345
  }
326
346
  },
package/dist/document.cjs CHANGED
@@ -1,5 +1,5 @@
1
1
  "use strict";
2
2
  Object.defineProperty(exports, "__esModule", { value: true });
3
3
  exports.Document = void 0;
4
- var document_1 = require("langchain-core/schema/document");
5
- Object.defineProperty(exports, "Document", { enumerable: true, get: function () { return document_1.Document; } });
4
+ var documents_1 = require("langchain-core/documents");
5
+ Object.defineProperty(exports, "Document", { enumerable: true, get: function () { return documents_1.Document; } });
@@ -1 +1 @@
1
- export { type DocumentInput, Document } from "langchain-core/schema/document";
1
+ export { type DocumentInput, Document } from "langchain-core/documents";
package/dist/document.js CHANGED
@@ -1 +1 @@
1
- export { Document } from "langchain-core/schema/document";
1
+ export { Document } from "langchain-core/documents";
@@ -47,6 +47,6 @@ export declare class AzureBlobStorageFileLoader extends BaseDocumentLoader {
47
47
  * are returned, and the temporary directory is deleted.
48
48
  * @returns An array of documents loaded from the file in Azure Blob Storage.
49
49
  */
50
- load(): Promise<import("langchain-core/schema/document").Document<Record<string, any>>[]>;
50
+ load(): Promise<import("langchain-core/documents").Document<Record<string, any>>[]>;
51
51
  }
52
52
  export {};
@@ -216,6 +216,22 @@ class GithubRepoLoader extends base_js_1.BaseDocumentLoader {
216
216
  }
217
217
  return documents;
218
218
  }
219
+ /**
220
+ * Asynchronously streams documents from the entire GitHub repository.
221
+ * It is suitable for situations where processing large repositories in a memory-efficient manner is required.
222
+ * @yields Yields a Promise that resolves to a Document object for each file or submodule content found in the repository.
223
+ */
224
+ async *loadAsStream() {
225
+ this.log(`Loading documents from ${this.baseUrl}/${this.owner}/${this.repo}/${this.initialPath}...`);
226
+ yield* await this.processRepoAsStream(this.initialPath);
227
+ if (!this.processSubmodules) {
228
+ return;
229
+ }
230
+ await this.getSubmoduleInfo();
231
+ for (const submoduleInfo of this.submoduleInfos) {
232
+ yield* await this.loadSubmoduleAsStream(submoduleInfo);
233
+ }
234
+ }
219
235
  /**
220
236
  * Loads the information about Git submodules from the repository, if available.
221
237
  */
@@ -323,6 +339,37 @@ class GithubRepoLoader extends base_js_1.BaseDocumentLoader {
323
339
  }).load();
324
340
  }
325
341
  }
342
+ /**
343
+ * Asynchronously processes and streams the contents of a specified submodule in the GitHub repository.
344
+ * @param submoduleInfo the info about the submodule to be loaded
345
+ * @yields Yields a Promise that resolves to a Document object for each file found in the submodule.
346
+ */
347
+ async *loadSubmoduleAsStream(submoduleInfo) {
348
+ if (!submoduleInfo.url.startsWith(this.baseUrl)) {
349
+ this.log(`Ignoring external submodule ${submoduleInfo.url}.`);
350
+ yield* [];
351
+ }
352
+ if (!submoduleInfo.path.startsWith(this.initialPath)) {
353
+ this.log(`Ignoring submodule ${submoduleInfo.url}, as it is not on initial path.`);
354
+ yield* [];
355
+ }
356
+ this.log(`Accessing submodule ${submoduleInfo.name} (${submoduleInfo.url})...`);
357
+ const submoduleLoader = new GithubRepoLoader(submoduleInfo.url, {
358
+ accessToken: this.accessToken,
359
+ baseUrl: this.baseUrl,
360
+ apiUrl: this.apiUrl,
361
+ branch: submoduleInfo.ref,
362
+ recursive: this.recursive,
363
+ processSubmodules: this.processSubmodules,
364
+ unknown: this.unknown,
365
+ ignoreFiles: this.ignoreFiles,
366
+ ignorePaths: this.ignorePaths,
367
+ verbose: this.verbose,
368
+ maxConcurrency: this.maxConcurrency,
369
+ maxRetries: this.maxRetries,
370
+ });
371
+ yield* await submoduleLoader.processRepoAsStream(submoduleInfo.path);
372
+ }
326
373
  /**
327
374
  * Determines whether a file or directory should be ignored based on its
328
375
  * path and type.
@@ -414,6 +461,35 @@ class GithubRepoLoader extends base_js_1.BaseDocumentLoader {
414
461
  return Promise.reject(error);
415
462
  }
416
463
  }
464
+ /**
465
+ * Asynchronously processes the contents of the entire GitHub repository,
466
+ * streaming each file as a Document object.
467
+ * @param path The path of the directory to process.
468
+ * @yields Yields a Promise that resolves to a Document object for each file found in the repository.
469
+ */
470
+ async *processRepoAsStream(path) {
471
+ const files = await this.fetchRepoFiles(path);
472
+ for (const file of files) {
473
+ if (this.shouldIgnore(file.path, file.type)) {
474
+ continue;
475
+ }
476
+ if (file.type === "file") {
477
+ try {
478
+ const fileResponse = await this.fetchFileContentWrapper(file);
479
+ yield new document_js_1.Document({
480
+ pageContent: fileResponse.contents,
481
+ metadata: fileResponse.metadata,
482
+ });
483
+ }
484
+ catch (error) {
485
+ this.handleError(`Failed to fetch file content: ${file.path}, ${error}`);
486
+ }
487
+ }
488
+ else if (this.recursive) {
489
+ yield* await this.processDirectoryAsStream(file.path);
490
+ }
491
+ }
492
+ }
417
493
  /**
418
494
  * Fetches the contents of a directory and maps the file / directory paths
419
495
  * to promises that will fetch the file / directory contents.
@@ -430,6 +506,35 @@ class GithubRepoLoader extends base_js_1.BaseDocumentLoader {
430
506
  return Promise.reject(error);
431
507
  }
432
508
  }
509
+ /**
510
+ * Asynchronously processes the contents of a given directory in the GitHub repository,
511
+ * streaming each file as a Document object.
512
+ * @param path The path of the directory to process.
513
+ * @yields Yields a Promise that resolves to a Document object for each file in the directory.
514
+ */
515
+ async *processDirectoryAsStream(path) {
516
+ const files = await this.fetchRepoFiles(path);
517
+ for (const file of files) {
518
+ if (this.shouldIgnore(file.path, file.type)) {
519
+ continue;
520
+ }
521
+ if (file.type === "file") {
522
+ try {
523
+ const fileResponse = await this.fetchFileContentWrapper(file);
524
+ yield new document_js_1.Document({
525
+ pageContent: fileResponse.contents,
526
+ metadata: fileResponse.metadata,
527
+ });
528
+ }
529
+ catch {
530
+ this.handleError(`Failed to fetch file content: ${file.path}`);
531
+ }
532
+ }
533
+ else if (this.recursive) {
534
+ yield* await this.processDirectoryAsStream(file.path);
535
+ }
536
+ }
537
+ }
433
538
  /**
434
539
  * Fetches the files from a GitHub repository.
435
540
  * If the path denotes a single file, the resulting array contains only one element.
@@ -99,6 +99,12 @@ export declare class GithubRepoLoader extends BaseDocumentLoader implements Gith
99
99
  * @returns A promise that resolves to an array of Document instances.
100
100
  */
101
101
  load(): Promise<Document[]>;
102
+ /**
103
+ * Asynchronously streams documents from the entire GitHub repository.
104
+ * It is suitable for situations where processing large repositories in a memory-efficient manner is required.
105
+ * @yields Yields a Promise that resolves to a Document object for each file or submodule content found in the repository.
106
+ */
107
+ loadAsStream(): AsyncGenerator<Document, void, undefined>;
102
108
  /**
103
109
  * Loads the information about Git submodules from the repository, if available.
104
110
  */
@@ -115,6 +121,12 @@ export declare class GithubRepoLoader extends BaseDocumentLoader implements Gith
115
121
  * @param submoduleInfo the info about the submodule to be loaded
116
122
  */
117
123
  private loadSubmodule;
124
+ /**
125
+ * Asynchronously processes and streams the contents of a specified submodule in the GitHub repository.
126
+ * @param submoduleInfo the info about the submodule to be loaded
127
+ * @yields Yields a Promise that resolves to a Document object for each file found in the submodule.
128
+ */
129
+ private loadSubmoduleAsStream;
118
130
  /**
119
131
  * Determines whether a file or directory should be ignored based on its
120
132
  * path and type.
@@ -137,6 +149,13 @@ export declare class GithubRepoLoader extends BaseDocumentLoader implements Gith
137
149
  * Begins the process of fetching the contents of the repository
138
150
  */
139
151
  private processRepo;
152
+ /**
153
+ * Asynchronously processes the contents of the entire GitHub repository,
154
+ * streaming each file as a Document object.
155
+ * @param path The path of the directory to process.
156
+ * @yields Yields a Promise that resolves to a Document object for each file found in the repository.
157
+ */
158
+ private processRepoAsStream;
140
159
  /**
141
160
  * Fetches the contents of a directory and maps the file / directory paths
142
161
  * to promises that will fetch the file / directory contents.
@@ -144,6 +163,13 @@ export declare class GithubRepoLoader extends BaseDocumentLoader implements Gith
144
163
  * @returns A promise that resolves to an array of promises that will fetch the file / directory contents.
145
164
  */
146
165
  private processDirectory;
166
+ /**
167
+ * Asynchronously processes the contents of a given directory in the GitHub repository,
168
+ * streaming each file as a Document object.
169
+ * @param path The path of the directory to process.
170
+ * @yields Yields a Promise that resolves to a Document object for each file in the directory.
171
+ */
172
+ private processDirectoryAsStream;
147
173
  /**
148
174
  * Fetches the files from a GitHub repository.
149
175
  * If the path denotes a single file, the resulting array contains only one element.