langchain 0.0.142 → 0.0.144
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/callbacks/handlers/llmonitor.cjs +1 -0
- package/callbacks/handlers/llmonitor.d.ts +1 -0
- package/callbacks/handlers/llmonitor.js +1 -0
- package/dist/agents/mrkl/outputParser.cjs +1 -1
- package/dist/agents/mrkl/outputParser.js +1 -1
- package/dist/base_language/index.cjs +2 -1
- package/dist/base_language/index.d.ts +7 -2
- package/dist/base_language/index.js +2 -1
- package/dist/callbacks/handlers/llmonitor.cjs +223 -0
- package/dist/callbacks/handlers/llmonitor.d.ts +35 -0
- package/dist/callbacks/handlers/llmonitor.js +215 -0
- package/dist/chains/openai_functions/extraction.d.ts +4 -4
- package/dist/chains/openai_functions/openapi.d.ts +3 -3
- package/dist/chains/openai_functions/structured_output.d.ts +5 -4
- package/dist/chains/openai_functions/tagging.d.ts +4 -4
- package/dist/chains/openai_moderation.cjs +1 -0
- package/dist/chains/openai_moderation.js +1 -0
- package/dist/chat_models/base.cjs +4 -3
- package/dist/chat_models/base.d.ts +3 -3
- package/dist/chat_models/base.js +5 -4
- package/dist/chat_models/minimax.d.ts +6 -28
- package/dist/chat_models/openai.cjs +1 -0
- package/dist/chat_models/openai.d.ts +2 -3
- package/dist/chat_models/openai.js +1 -0
- package/dist/document_loaders/fs/openai_whisper_audio.cjs +32 -0
- package/dist/document_loaders/fs/openai_whisper_audio.d.ts +11 -0
- package/dist/document_loaders/fs/openai_whisper_audio.js +28 -0
- package/dist/document_loaders/web/github.cjs +210 -24
- package/dist/document_loaders/web/github.d.ts +44 -1
- package/dist/document_loaders/web/github.js +210 -24
- package/dist/document_loaders/web/recursive_url.cjs +13 -0
- package/dist/document_loaders/web/recursive_url.js +13 -0
- package/dist/embeddings/hf_transformers.cjs +71 -0
- package/dist/embeddings/hf_transformers.d.ts +29 -0
- package/dist/embeddings/hf_transformers.js +67 -0
- package/dist/embeddings/openai.cjs +2 -1
- package/dist/embeddings/openai.js +2 -1
- package/dist/experimental/chat_models/anthropic_functions.d.ts +2 -5
- package/dist/llms/openai-chat.cjs +1 -0
- package/dist/llms/openai-chat.js +1 -0
- package/dist/llms/openai.cjs +1 -0
- package/dist/llms/openai.js +1 -0
- package/dist/load/import_constants.cjs +3 -0
- package/dist/load/import_constants.js +3 -0
- package/dist/prompts/chat.cjs +27 -1
- package/dist/prompts/chat.d.ts +3 -2
- package/dist/prompts/chat.js +28 -2
- package/dist/schema/index.cjs +44 -1
- package/dist/schema/index.d.ts +10 -0
- package/dist/schema/index.js +41 -0
- package/dist/tools/serpapi.cjs +108 -13
- package/dist/tools/serpapi.js +108 -13
- package/dist/vectorstores/redis.cjs +12 -4
- package/dist/vectorstores/redis.d.ts +8 -0
- package/dist/vectorstores/redis.js +12 -4
- package/dist/vectorstores/tigris.cjs +2 -0
- package/dist/vectorstores/tigris.d.ts +2 -3
- package/dist/vectorstores/tigris.js +2 -0
- package/dist/vectorstores/vectara.cjs +30 -12
- package/dist/vectorstores/vectara.d.ts +1 -1
- package/dist/vectorstores/vectara.js +30 -12
- package/document_loaders/fs/openai_whisper_audio.cjs +1 -0
- package/document_loaders/fs/openai_whisper_audio.d.ts +1 -0
- package/document_loaders/fs/openai_whisper_audio.js +1 -0
- package/embeddings/hf_transformers.cjs +1 -0
- package/embeddings/hf_transformers.d.ts +1 -0
- package/embeddings/hf_transformers.js +1 -0
- package/package.json +36 -6
|
@@ -110,6 +110,7 @@ class BaseChatModel extends index_js_2.BaseLanguageModel {
|
|
|
110
110
|
else {
|
|
111
111
|
parsedOptions = options;
|
|
112
112
|
}
|
|
113
|
+
const baseMessages = messages.map((messageList) => messageList.map(index_js_1.coerceMessageLikeToMessage));
|
|
113
114
|
const [runnableConfig, callOptions] = this._separateRunnableConfigFromCallOptions(parsedOptions);
|
|
114
115
|
// create callback manager and start run
|
|
115
116
|
const callbackManager_ = await manager_js_1.CallbackManager.configure(runnableConfig.callbacks ?? callbacks, this.callbacks, runnableConfig.tags, this.tags, runnableConfig.metadata, this.metadata, { verbose: this.verbose });
|
|
@@ -117,9 +118,9 @@ class BaseChatModel extends index_js_2.BaseLanguageModel {
|
|
|
117
118
|
options: callOptions,
|
|
118
119
|
invocation_params: this?.invocationParams(parsedOptions),
|
|
119
120
|
};
|
|
120
|
-
const runManagers = await callbackManager_?.handleChatModelStart(this.toJSON(),
|
|
121
|
+
const runManagers = await callbackManager_?.handleChatModelStart(this.toJSON(), baseMessages, undefined, undefined, extra);
|
|
121
122
|
// generate results
|
|
122
|
-
const results = await Promise.allSettled(
|
|
123
|
+
const results = await Promise.allSettled(baseMessages.map((messageList, i) => this._generate(messageList, { ...callOptions, promptIndex: i }, runManagers?.[i])));
|
|
123
124
|
// handle results
|
|
124
125
|
const generations = [];
|
|
125
126
|
const llmOutputs = [];
|
|
@@ -183,7 +184,7 @@ class BaseChatModel extends index_js_2.BaseLanguageModel {
|
|
|
183
184
|
* @returns A Promise that resolves to a BaseMessage.
|
|
184
185
|
*/
|
|
185
186
|
async call(messages, options, callbacks) {
|
|
186
|
-
const result = await this.generate([messages], options, callbacks);
|
|
187
|
+
const result = await this.generate([messages.map(index_js_1.coerceMessageLikeToMessage)], options, callbacks);
|
|
187
188
|
const generations = result.generations;
|
|
188
189
|
return generations[0][0].message;
|
|
189
190
|
}
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { BaseMessage, BasePromptValue, ChatResult, BaseMessageChunk, LLMResult, ChatGenerationChunk } from "../schema/index.js";
|
|
1
|
+
import { BaseMessage, BasePromptValue, ChatResult, BaseMessageChunk, LLMResult, ChatGenerationChunk, BaseMessageLike } from "../schema/index.js";
|
|
2
2
|
import { BaseLanguageModel, BaseLanguageModelCallOptions, BaseLanguageModelInput, BaseLanguageModelParams } from "../base_language/index.js";
|
|
3
3
|
import { CallbackManagerForLLMRun, Callbacks } from "../callbacks/manager.js";
|
|
4
4
|
import { RunnableConfig } from "../schema/runnable.js";
|
|
@@ -56,7 +56,7 @@ export declare abstract class BaseChatModel<CallOptions extends BaseChatModelCal
|
|
|
56
56
|
* @param callbacks The callbacks for the language model.
|
|
57
57
|
* @returns A Promise that resolves to an LLMResult.
|
|
58
58
|
*/
|
|
59
|
-
generate(messages:
|
|
59
|
+
generate(messages: BaseMessageLike[][], options?: string[] | CallOptions, callbacks?: Callbacks): Promise<LLMResult>;
|
|
60
60
|
/**
|
|
61
61
|
* Get the parameters used to invoke the model
|
|
62
62
|
*/
|
|
@@ -79,7 +79,7 @@ export declare abstract class BaseChatModel<CallOptions extends BaseChatModelCal
|
|
|
79
79
|
* @param callbacks The callbacks for the language model.
|
|
80
80
|
* @returns A Promise that resolves to a BaseMessage.
|
|
81
81
|
*/
|
|
82
|
-
call(messages:
|
|
82
|
+
call(messages: BaseMessageLike[], options?: string[] | CallOptions, callbacks?: Callbacks): Promise<BaseMessage>;
|
|
83
83
|
/**
|
|
84
84
|
* Makes a single call to the chat model with a prompt value.
|
|
85
85
|
* @param promptValue The value of the prompt.
|
package/dist/chat_models/base.js
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { AIMessage, HumanMessage, RUN_KEY, } from "../schema/index.js";
|
|
1
|
+
import { AIMessage, HumanMessage, RUN_KEY, coerceMessageLikeToMessage, } from "../schema/index.js";
|
|
2
2
|
import { BaseLanguageModel, } from "../base_language/index.js";
|
|
3
3
|
import { CallbackManager, } from "../callbacks/manager.js";
|
|
4
4
|
/**
|
|
@@ -106,6 +106,7 @@ export class BaseChatModel extends BaseLanguageModel {
|
|
|
106
106
|
else {
|
|
107
107
|
parsedOptions = options;
|
|
108
108
|
}
|
|
109
|
+
const baseMessages = messages.map((messageList) => messageList.map(coerceMessageLikeToMessage));
|
|
109
110
|
const [runnableConfig, callOptions] = this._separateRunnableConfigFromCallOptions(parsedOptions);
|
|
110
111
|
// create callback manager and start run
|
|
111
112
|
const callbackManager_ = await CallbackManager.configure(runnableConfig.callbacks ?? callbacks, this.callbacks, runnableConfig.tags, this.tags, runnableConfig.metadata, this.metadata, { verbose: this.verbose });
|
|
@@ -113,9 +114,9 @@ export class BaseChatModel extends BaseLanguageModel {
|
|
|
113
114
|
options: callOptions,
|
|
114
115
|
invocation_params: this?.invocationParams(parsedOptions),
|
|
115
116
|
};
|
|
116
|
-
const runManagers = await callbackManager_?.handleChatModelStart(this.toJSON(),
|
|
117
|
+
const runManagers = await callbackManager_?.handleChatModelStart(this.toJSON(), baseMessages, undefined, undefined, extra);
|
|
117
118
|
// generate results
|
|
118
|
-
const results = await Promise.allSettled(
|
|
119
|
+
const results = await Promise.allSettled(baseMessages.map((messageList, i) => this._generate(messageList, { ...callOptions, promptIndex: i }, runManagers?.[i])));
|
|
119
120
|
// handle results
|
|
120
121
|
const generations = [];
|
|
121
122
|
const llmOutputs = [];
|
|
@@ -179,7 +180,7 @@ export class BaseChatModel extends BaseLanguageModel {
|
|
|
179
180
|
* @returns A Promise that resolves to a BaseMessage.
|
|
180
181
|
*/
|
|
181
182
|
async call(messages, options, callbacks) {
|
|
182
|
-
const result = await this.generate([messages], options, callbacks);
|
|
183
|
+
const result = await this.generate([messages.map(coerceMessageLikeToMessage)], options, callbacks);
|
|
183
184
|
const generations = result.generations;
|
|
184
185
|
return generations[0][0].message;
|
|
185
186
|
}
|
|
@@ -1,8 +1,9 @@
|
|
|
1
|
+
import type { OpenAI as OpenAIClient } from "openai";
|
|
1
2
|
import { BaseChatModel, BaseChatModelParams } from "./base.js";
|
|
2
3
|
import { BaseMessage, ChatResult } from "../schema/index.js";
|
|
3
4
|
import { CallbackManagerForLLMRun } from "../callbacks/manager.js";
|
|
4
5
|
import { StructuredTool } from "../tools/index.js";
|
|
5
|
-
import {
|
|
6
|
+
import { BaseFunctionCallOptions } from "../base_language/index.js";
|
|
6
7
|
/**
|
|
7
8
|
* Type representing the sender_type of a message in the Minimax chat model.
|
|
8
9
|
*/
|
|
@@ -15,28 +16,6 @@ interface MinimaxChatCompletionRequestMessage {
|
|
|
15
16
|
sender_name?: string;
|
|
16
17
|
text: string;
|
|
17
18
|
}
|
|
18
|
-
export interface MinimaxChatCompletionRequestFunctions {
|
|
19
|
-
/**
|
|
20
|
-
* The name of the function to be called. Must be a-z, A-Z, 0-9, or contain underscores and dashes, with a maximum length of 64.
|
|
21
|
-
* @type {string}
|
|
22
|
-
* @memberof MinimaxChatCompletionRequestFunctions
|
|
23
|
-
*/
|
|
24
|
-
name: string;
|
|
25
|
-
/**
|
|
26
|
-
* The description of what the function does.
|
|
27
|
-
* @type {string}
|
|
28
|
-
* @memberof MinimaxChatCompletionRequestFunctions
|
|
29
|
-
*/
|
|
30
|
-
description?: string;
|
|
31
|
-
/**
|
|
32
|
-
* The parameters the functions accepts, described as a JSON Schema object.
|
|
33
|
-
* @type {{ [key: string]: any; }}
|
|
34
|
-
* @memberof MinimaxChatCompletionRequestFunctions
|
|
35
|
-
*/
|
|
36
|
-
parameters?: {
|
|
37
|
-
[key: string]: any;
|
|
38
|
-
};
|
|
39
|
-
}
|
|
40
19
|
/**
|
|
41
20
|
* Interface representing a request for a chat completion.
|
|
42
21
|
*/
|
|
@@ -58,9 +37,9 @@ interface MinimaxChatCompletionRequest {
|
|
|
58
37
|
sample_messages?: MinimaxChatCompletionRequestMessage[];
|
|
59
38
|
/**
|
|
60
39
|
* A list of functions the model may generate JSON inputs for.
|
|
61
|
-
* @type {Array<
|
|
40
|
+
* @type {Array<OpenAIClient.Chat.ChatCompletionCreateParams.Function[]>}
|
|
62
41
|
*/
|
|
63
|
-
functions?:
|
|
42
|
+
functions?: OpenAIClient.Chat.ChatCompletionCreateParams.Function[];
|
|
64
43
|
plugins?: string[];
|
|
65
44
|
}
|
|
66
45
|
interface RoleMeta {
|
|
@@ -191,8 +170,7 @@ declare interface MinimaxChatInputPro extends MinimaxChatInputBase {
|
|
|
191
170
|
replyConstraints?: ReplyConstraints;
|
|
192
171
|
}
|
|
193
172
|
type MinimaxChatInput = MinimaxChatInputNormal & MinimaxChatInputPro;
|
|
194
|
-
export interface ChatMinimaxCallOptions extends
|
|
195
|
-
functions?: MinimaxChatCompletionRequestFunctions[];
|
|
173
|
+
export interface ChatMinimaxCallOptions extends BaseFunctionCallOptions {
|
|
196
174
|
tools?: StructuredTool[];
|
|
197
175
|
defaultUserName?: string;
|
|
198
176
|
defaultBotName?: string;
|
|
@@ -252,7 +230,7 @@ export declare class ChatMinimax extends BaseChatModel<ChatMinimaxCallOptions> i
|
|
|
252
230
|
identifyingParams(): {
|
|
253
231
|
prompt?: string | undefined;
|
|
254
232
|
stream?: boolean | undefined;
|
|
255
|
-
functions?:
|
|
233
|
+
functions?: OpenAIClient.Chat.Completions.ChatCompletionCreateParams.Function[] | undefined;
|
|
256
234
|
model: string;
|
|
257
235
|
temperature?: number | undefined;
|
|
258
236
|
top_p?: number | undefined;
|
|
@@ -325,6 +325,7 @@ class ChatOpenAI extends base_js_1.BaseChatModel {
|
|
|
325
325
|
if (!this.azureOpenAIApiVersion) {
|
|
326
326
|
throw new Error("Azure OpenAI API version not found");
|
|
327
327
|
}
|
|
328
|
+
this.openAIApiKey = this.openAIApiKey ?? "";
|
|
328
329
|
}
|
|
329
330
|
this.clientConfig = {
|
|
330
331
|
apiKey: this.openAIApiKey,
|
|
@@ -4,6 +4,7 @@ import { BaseMessage, ChatGenerationChunk, ChatResult } from "../schema/index.js
|
|
|
4
4
|
import { StructuredTool } from "../tools/base.js";
|
|
5
5
|
import { AzureOpenAIInput, OpenAICallOptions, OpenAIChatInput, OpenAICoreRequestOptions, LegacyOpenAIInput } from "../types/openai-types.js";
|
|
6
6
|
import { BaseChatModel, BaseChatModelParams } from "./base.js";
|
|
7
|
+
import { BaseFunctionCallOptions } from "../base_language/index.js";
|
|
7
8
|
export { AzureOpenAIInput, OpenAICallOptions, OpenAIChatInput };
|
|
8
9
|
interface TokenUsage {
|
|
9
10
|
completionTokens?: number;
|
|
@@ -13,9 +14,7 @@ interface TokenUsage {
|
|
|
13
14
|
interface OpenAILLMOutput {
|
|
14
15
|
tokenUsage: TokenUsage;
|
|
15
16
|
}
|
|
16
|
-
export interface ChatOpenAICallOptions extends OpenAICallOptions {
|
|
17
|
-
function_call?: OpenAIClient.Chat.ChatCompletionCreateParams.FunctionCallOption;
|
|
18
|
-
functions?: OpenAIClient.Chat.ChatCompletionCreateParams.Function[];
|
|
17
|
+
export interface ChatOpenAICallOptions extends OpenAICallOptions, BaseFunctionCallOptions {
|
|
19
18
|
tools?: StructuredTool[];
|
|
20
19
|
promptIndex?: number;
|
|
21
20
|
}
|
|
@@ -322,6 +322,7 @@ export class ChatOpenAI extends BaseChatModel {
|
|
|
322
322
|
if (!this.azureOpenAIApiVersion) {
|
|
323
323
|
throw new Error("Azure OpenAI API version not found");
|
|
324
324
|
}
|
|
325
|
+
this.openAIApiKey = this.openAIApiKey ?? "";
|
|
325
326
|
}
|
|
326
327
|
this.clientConfig = {
|
|
327
328
|
apiKey: this.openAIApiKey,
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.OpenAIWhisperAudio = void 0;
|
|
4
|
+
const openai_1 = require("openai");
|
|
5
|
+
const document_js_1 = require("../../document.cjs");
|
|
6
|
+
const buffer_js_1 = require("./buffer.cjs");
|
|
7
|
+
const MODEL_NAME = "whisper-1";
|
|
8
|
+
class OpenAIWhisperAudio extends buffer_js_1.BufferLoader {
|
|
9
|
+
constructor(filePathOrBlob, fields) {
|
|
10
|
+
super(filePathOrBlob);
|
|
11
|
+
Object.defineProperty(this, "openAIClient", {
|
|
12
|
+
enumerable: true,
|
|
13
|
+
configurable: true,
|
|
14
|
+
writable: true,
|
|
15
|
+
value: void 0
|
|
16
|
+
});
|
|
17
|
+
this.openAIClient = new openai_1.OpenAI(fields?.clientOptions);
|
|
18
|
+
}
|
|
19
|
+
async parse(raw, metadata) {
|
|
20
|
+
const fileName = metadata.source === "blob" ? metadata.blobType : metadata.source;
|
|
21
|
+
const transcriptionResponse = await this.openAIClient.audio.transcriptions.create({
|
|
22
|
+
file: await (0, openai_1.toFile)(raw, fileName),
|
|
23
|
+
model: MODEL_NAME,
|
|
24
|
+
});
|
|
25
|
+
const document = new document_js_1.Document({
|
|
26
|
+
pageContent: transcriptionResponse.text,
|
|
27
|
+
metadata,
|
|
28
|
+
});
|
|
29
|
+
return [document];
|
|
30
|
+
}
|
|
31
|
+
}
|
|
32
|
+
exports.OpenAIWhisperAudio = OpenAIWhisperAudio;
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
/// <reference types="node" resolution-mode="require"/>
|
|
2
|
+
import { type ClientOptions } from "openai";
|
|
3
|
+
import { Document } from "../../document.js";
|
|
4
|
+
import { BufferLoader } from "./buffer.js";
|
|
5
|
+
export declare class OpenAIWhisperAudio extends BufferLoader {
|
|
6
|
+
private readonly openAIClient;
|
|
7
|
+
constructor(filePathOrBlob: string | Blob, fields?: {
|
|
8
|
+
clientOptions?: ClientOptions;
|
|
9
|
+
});
|
|
10
|
+
protected parse(raw: Buffer, metadata: Record<string, string>): Promise<Document[]>;
|
|
11
|
+
}
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
import { OpenAI as OpenAIClient, toFile } from "openai";
|
|
2
|
+
import { Document } from "../../document.js";
|
|
3
|
+
import { BufferLoader } from "./buffer.js";
|
|
4
|
+
const MODEL_NAME = "whisper-1";
|
|
5
|
+
export class OpenAIWhisperAudio extends BufferLoader {
|
|
6
|
+
constructor(filePathOrBlob, fields) {
|
|
7
|
+
super(filePathOrBlob);
|
|
8
|
+
Object.defineProperty(this, "openAIClient", {
|
|
9
|
+
enumerable: true,
|
|
10
|
+
configurable: true,
|
|
11
|
+
writable: true,
|
|
12
|
+
value: void 0
|
|
13
|
+
});
|
|
14
|
+
this.openAIClient = new OpenAIClient(fields?.clientOptions);
|
|
15
|
+
}
|
|
16
|
+
async parse(raw, metadata) {
|
|
17
|
+
const fileName = metadata.source === "blob" ? metadata.blobType : metadata.source;
|
|
18
|
+
const transcriptionResponse = await this.openAIClient.audio.transcriptions.create({
|
|
19
|
+
file: await toFile(raw, fileName),
|
|
20
|
+
model: MODEL_NAME,
|
|
21
|
+
});
|
|
22
|
+
const document = new Document({
|
|
23
|
+
pageContent: transcriptionResponse.text,
|
|
24
|
+
metadata,
|
|
25
|
+
});
|
|
26
|
+
return [document];
|
|
27
|
+
}
|
|
28
|
+
}
|
|
@@ -28,8 +28,20 @@ function isBinaryPath(name) {
|
|
|
28
28
|
* loading files from a GitHub repository.
|
|
29
29
|
*/
|
|
30
30
|
class GithubRepoLoader extends base_js_1.BaseDocumentLoader {
|
|
31
|
-
constructor(githubUrl, { accessToken = (0, env_js_1.getEnvironmentVariable)("GITHUB_ACCESS_TOKEN"), branch = "main", recursive = true, unknown = directory_js_1.UnknownHandling.Warn, ignoreFiles = [], ignorePaths, verbose = false, maxConcurrency = 2, maxRetries = 2, ...rest } = {}) {
|
|
31
|
+
constructor(githubUrl, { accessToken = (0, env_js_1.getEnvironmentVariable)("GITHUB_ACCESS_TOKEN"), baseUrl = "https://github.com", apiUrl = "https://api.github.com", branch = "main", recursive = true, processSubmodules = false, unknown = directory_js_1.UnknownHandling.Warn, ignoreFiles = [], ignorePaths, verbose = false, maxConcurrency = 2, maxRetries = 2, ...rest } = {}) {
|
|
32
32
|
super();
|
|
33
|
+
Object.defineProperty(this, "baseUrl", {
|
|
34
|
+
enumerable: true,
|
|
35
|
+
configurable: true,
|
|
36
|
+
writable: true,
|
|
37
|
+
value: void 0
|
|
38
|
+
});
|
|
39
|
+
Object.defineProperty(this, "apiUrl", {
|
|
40
|
+
enumerable: true,
|
|
41
|
+
configurable: true,
|
|
42
|
+
writable: true,
|
|
43
|
+
value: void 0
|
|
44
|
+
});
|
|
33
45
|
Object.defineProperty(this, "owner", {
|
|
34
46
|
enumerable: true,
|
|
35
47
|
configurable: true,
|
|
@@ -66,6 +78,12 @@ class GithubRepoLoader extends base_js_1.BaseDocumentLoader {
|
|
|
66
78
|
writable: true,
|
|
67
79
|
value: void 0
|
|
68
80
|
});
|
|
81
|
+
Object.defineProperty(this, "processSubmodules", {
|
|
82
|
+
enumerable: true,
|
|
83
|
+
configurable: true,
|
|
84
|
+
writable: true,
|
|
85
|
+
value: void 0
|
|
86
|
+
});
|
|
69
87
|
Object.defineProperty(this, "unknown", {
|
|
70
88
|
enumerable: true,
|
|
71
89
|
configurable: true,
|
|
@@ -96,22 +114,55 @@ class GithubRepoLoader extends base_js_1.BaseDocumentLoader {
|
|
|
96
114
|
writable: true,
|
|
97
115
|
value: void 0
|
|
98
116
|
});
|
|
117
|
+
Object.defineProperty(this, "maxConcurrency", {
|
|
118
|
+
enumerable: true,
|
|
119
|
+
configurable: true,
|
|
120
|
+
writable: true,
|
|
121
|
+
value: void 0
|
|
122
|
+
});
|
|
123
|
+
Object.defineProperty(this, "maxRetries", {
|
|
124
|
+
enumerable: true,
|
|
125
|
+
configurable: true,
|
|
126
|
+
writable: true,
|
|
127
|
+
value: void 0
|
|
128
|
+
});
|
|
99
129
|
Object.defineProperty(this, "caller", {
|
|
100
130
|
enumerable: true,
|
|
101
131
|
configurable: true,
|
|
102
132
|
writable: true,
|
|
103
133
|
value: void 0
|
|
104
134
|
});
|
|
135
|
+
Object.defineProperty(this, "ignorePaths", {
|
|
136
|
+
enumerable: true,
|
|
137
|
+
configurable: true,
|
|
138
|
+
writable: true,
|
|
139
|
+
value: void 0
|
|
140
|
+
});
|
|
141
|
+
Object.defineProperty(this, "submoduleInfos", {
|
|
142
|
+
enumerable: true,
|
|
143
|
+
configurable: true,
|
|
144
|
+
writable: true,
|
|
145
|
+
value: void 0
|
|
146
|
+
});
|
|
147
|
+
this.baseUrl = baseUrl;
|
|
148
|
+
this.apiUrl = apiUrl;
|
|
105
149
|
const { owner, repo, path } = this.extractOwnerAndRepoAndPath(githubUrl);
|
|
106
150
|
this.owner = owner;
|
|
107
151
|
this.repo = repo;
|
|
108
152
|
this.initialPath = path;
|
|
109
153
|
this.branch = branch;
|
|
110
154
|
this.recursive = recursive;
|
|
155
|
+
// processing submodules without processing contents of other directories makes no sense
|
|
156
|
+
if (processSubmodules && !recursive) {
|
|
157
|
+
throw new Error(`Input property "recursive" must be true if "processSubmodules" is true.`);
|
|
158
|
+
}
|
|
159
|
+
this.processSubmodules = processSubmodules;
|
|
111
160
|
this.unknown = unknown;
|
|
112
161
|
this.accessToken = accessToken;
|
|
113
162
|
this.ignoreFiles = ignoreFiles;
|
|
114
163
|
this.verbose = verbose;
|
|
164
|
+
this.maxConcurrency = maxConcurrency;
|
|
165
|
+
this.maxRetries = maxRetries;
|
|
115
166
|
this.headers = {
|
|
116
167
|
"User-Agent": "langchain",
|
|
117
168
|
};
|
|
@@ -120,6 +171,7 @@ class GithubRepoLoader extends base_js_1.BaseDocumentLoader {
|
|
|
120
171
|
maxRetries,
|
|
121
172
|
...rest,
|
|
122
173
|
});
|
|
174
|
+
this.ignorePaths = ignorePaths;
|
|
123
175
|
if (ignorePaths) {
|
|
124
176
|
this.ignore = ignore_1.default.default().add(ignorePaths);
|
|
125
177
|
}
|
|
@@ -136,7 +188,7 @@ class GithubRepoLoader extends base_js_1.BaseDocumentLoader {
|
|
|
136
188
|
* @returns An object containing the owner, repository, and path extracted from the GitHub URL.
|
|
137
189
|
*/
|
|
138
190
|
extractOwnerAndRepoAndPath(url) {
|
|
139
|
-
const match = url.match(/
|
|
191
|
+
const match = url.match(new RegExp(`${this.baseUrl}/([^/]+)/([^/]+)(/tree/[^/]+/(.+))?`, "i"));
|
|
140
192
|
if (!match) {
|
|
141
193
|
throw new Error("Invalid GitHub URL format.");
|
|
142
194
|
}
|
|
@@ -149,10 +201,127 @@ class GithubRepoLoader extends base_js_1.BaseDocumentLoader {
|
|
|
149
201
|
* @returns A promise that resolves to an array of Document instances.
|
|
150
202
|
*/
|
|
151
203
|
async load() {
|
|
152
|
-
|
|
204
|
+
this.log(`Loading documents from ${this.baseUrl}/${this.owner}/${this.repo}/${this.initialPath}...`);
|
|
205
|
+
// process repository without submodules
|
|
206
|
+
const documents = (await this.processRepo()).map((fileResponse) => new document_js_1.Document({
|
|
153
207
|
pageContent: fileResponse.contents,
|
|
154
208
|
metadata: fileResponse.metadata,
|
|
155
209
|
}));
|
|
210
|
+
if (this.processSubmodules) {
|
|
211
|
+
// process submodules
|
|
212
|
+
await this.getSubmoduleInfo();
|
|
213
|
+
for (const submoduleInfo of this.submoduleInfos) {
|
|
214
|
+
documents.push(...(await this.loadSubmodule(submoduleInfo)));
|
|
215
|
+
}
|
|
216
|
+
}
|
|
217
|
+
return documents;
|
|
218
|
+
}
|
|
219
|
+
/**
|
|
220
|
+
* Loads the information about Git submodules from the repository, if available.
|
|
221
|
+
*/
|
|
222
|
+
async getSubmoduleInfo() {
|
|
223
|
+
this.log("Loading info about submodules...");
|
|
224
|
+
// we have to fetch the files of the root directory to get the download url of the .gitmodules file
|
|
225
|
+
// however, we cannot reuse the files retrieved in processRepo() as initialPath may be != ""
|
|
226
|
+
// so it may be that we end up fetching this file list twice
|
|
227
|
+
const repoFiles = await this.fetchRepoFiles("");
|
|
228
|
+
const gitmodulesFile = repoFiles.filter(({ name }) => name === ".gitmodules")?.[0];
|
|
229
|
+
if (gitmodulesFile) {
|
|
230
|
+
const gitmodulesContent = await this.fetchFileContent({
|
|
231
|
+
download_url: gitmodulesFile.download_url,
|
|
232
|
+
});
|
|
233
|
+
this.submoduleInfos = await this.parseGitmodules(gitmodulesContent);
|
|
234
|
+
}
|
|
235
|
+
else {
|
|
236
|
+
this.submoduleInfos = [];
|
|
237
|
+
}
|
|
238
|
+
this.log(`Found ${this.submoduleInfos.length} submodules:`);
|
|
239
|
+
for (const submoduleInfo of this.submoduleInfos) {
|
|
240
|
+
this.log(JSON.stringify(submoduleInfo));
|
|
241
|
+
}
|
|
242
|
+
}
|
|
243
|
+
/**
|
|
244
|
+
* Parses the given content of a .gitmodules file. Furthermore, queries the current SHA ref of all submodules.
|
|
245
|
+
* Returns the submodule information as array.
|
|
246
|
+
* @param gitmodulesContent the content of a .gitmodules file
|
|
247
|
+
*/
|
|
248
|
+
async parseGitmodules(gitmodulesContent) {
|
|
249
|
+
// catches the initial line of submodule entries
|
|
250
|
+
const submodulePattern = /\[submodule "(.*?)"]\n((\s+.*?\s*=\s*.*?\n)*)/g;
|
|
251
|
+
// catches the properties of a submodule
|
|
252
|
+
const keyValuePattern = /\s+(.*?)\s*=\s*(.*?)\s/g;
|
|
253
|
+
const submoduleInfos = [];
|
|
254
|
+
for (const [, name, propertyLines] of gitmodulesContent.matchAll(submodulePattern)) {
|
|
255
|
+
if (!name || !propertyLines) {
|
|
256
|
+
throw new Error("Could not parse submodule entry");
|
|
257
|
+
}
|
|
258
|
+
const submodulePropertyLines = propertyLines.matchAll(keyValuePattern);
|
|
259
|
+
let path;
|
|
260
|
+
let url;
|
|
261
|
+
for (const [, key, value] of submodulePropertyLines) {
|
|
262
|
+
if (!key || !value) {
|
|
263
|
+
throw new Error(`Could not parse key/value pairs for submodule ${name}`);
|
|
264
|
+
}
|
|
265
|
+
switch (key) {
|
|
266
|
+
case "path":
|
|
267
|
+
path = value;
|
|
268
|
+
break;
|
|
269
|
+
case "url":
|
|
270
|
+
url = value;
|
|
271
|
+
if (url.endsWith(".git")) {
|
|
272
|
+
url = url.substring(0, url.length - 4);
|
|
273
|
+
}
|
|
274
|
+
break;
|
|
275
|
+
default:
|
|
276
|
+
// ignoring unused keys
|
|
277
|
+
}
|
|
278
|
+
}
|
|
279
|
+
if (!path || !url) {
|
|
280
|
+
throw new Error(`Missing properties for submodule ${name}`);
|
|
281
|
+
}
|
|
282
|
+
// fetch the current ref of the submodule
|
|
283
|
+
const files = await this.fetchRepoFiles(path);
|
|
284
|
+
const submoduleInfo = {
|
|
285
|
+
name,
|
|
286
|
+
path,
|
|
287
|
+
url,
|
|
288
|
+
ref: files[0].sha,
|
|
289
|
+
};
|
|
290
|
+
submoduleInfos.push(submoduleInfo);
|
|
291
|
+
}
|
|
292
|
+
return submoduleInfos;
|
|
293
|
+
}
|
|
294
|
+
/**
|
|
295
|
+
* Loads the documents of the given submodule. Uses the same parameters as for the current repository.
|
|
296
|
+
* External submodules, i.e. submodules pointing to another GitHub instance, are ignored.
|
|
297
|
+
* @param submoduleInfo the info about the submodule to be loaded
|
|
298
|
+
*/
|
|
299
|
+
async loadSubmodule(submoduleInfo) {
|
|
300
|
+
if (!submoduleInfo.url.startsWith(this.baseUrl)) {
|
|
301
|
+
this.log(`Ignoring external submodule ${submoduleInfo.url}.`);
|
|
302
|
+
return [];
|
|
303
|
+
}
|
|
304
|
+
else if (!submoduleInfo.path.startsWith(this.initialPath)) {
|
|
305
|
+
this.log(`Ignoring submodule ${submoduleInfo.url}, as it is not on initial path.`);
|
|
306
|
+
return [];
|
|
307
|
+
}
|
|
308
|
+
else {
|
|
309
|
+
this.log(`Accessing submodule ${submoduleInfo.name} (${submoduleInfo.url})...`);
|
|
310
|
+
return new GithubRepoLoader(submoduleInfo.url, {
|
|
311
|
+
accessToken: this.accessToken,
|
|
312
|
+
apiUrl: this.apiUrl,
|
|
313
|
+
baseUrl: this.baseUrl,
|
|
314
|
+
branch: submoduleInfo.ref,
|
|
315
|
+
recursive: this.recursive,
|
|
316
|
+
processSubmodules: this.processSubmodules,
|
|
317
|
+
unknown: this.unknown,
|
|
318
|
+
ignoreFiles: this.ignoreFiles,
|
|
319
|
+
ignorePaths: this.ignorePaths,
|
|
320
|
+
verbose: this.verbose,
|
|
321
|
+
maxConcurrency: this.maxConcurrency,
|
|
322
|
+
maxRetries: this.maxRetries,
|
|
323
|
+
}).load();
|
|
324
|
+
}
|
|
156
325
|
}
|
|
157
326
|
/**
|
|
158
327
|
* Determines whether a file or directory should be ignored based on its
|
|
@@ -192,7 +361,11 @@ class GithubRepoLoader extends base_js_1.BaseDocumentLoader {
|
|
|
192
361
|
});
|
|
193
362
|
return {
|
|
194
363
|
contents: fileContent || "",
|
|
195
|
-
metadata: {
|
|
364
|
+
metadata: {
|
|
365
|
+
source: file.path,
|
|
366
|
+
repository: `${this.baseUrl}/${this.owner}/${this.repo}`,
|
|
367
|
+
branch: this.branch,
|
|
368
|
+
},
|
|
196
369
|
};
|
|
197
370
|
}
|
|
198
371
|
/**
|
|
@@ -203,19 +376,24 @@ class GithubRepoLoader extends base_js_1.BaseDocumentLoader {
|
|
|
203
376
|
// Directories have nested files / directories, which is why this is a list of promises of promises
|
|
204
377
|
const currentDirectoryDirectoryPromises = [];
|
|
205
378
|
for (const file of files) {
|
|
206
|
-
if (
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
379
|
+
if (this.shouldIgnore(file.path, file.type)) {
|
|
380
|
+
continue;
|
|
381
|
+
}
|
|
382
|
+
if (file.type === "file" && file.size === 0) {
|
|
383
|
+
// this is a submodule. ignoring for the moment. submodule processing is done separately
|
|
384
|
+
continue;
|
|
385
|
+
}
|
|
386
|
+
if (file.type !== "dir") {
|
|
387
|
+
try {
|
|
388
|
+
currentDirectoryFilePromises.push(this.fetchFileContentWrapper(file));
|
|
214
389
|
}
|
|
215
|
-
|
|
216
|
-
|
|
390
|
+
catch (e) {
|
|
391
|
+
this.handleError(`Failed to fetch file content: ${file.path}, ${e}`);
|
|
217
392
|
}
|
|
218
393
|
}
|
|
394
|
+
else if (this.recursive) {
|
|
395
|
+
currentDirectoryDirectoryPromises.push(this.processDirectory(file.path));
|
|
396
|
+
}
|
|
219
397
|
}
|
|
220
398
|
const curDirDirectories = await Promise.all(currentDirectoryDirectoryPromises);
|
|
221
399
|
return [...currentDirectoryFilePromises, ...curDirDirectories.flat()];
|
|
@@ -254,24 +432,25 @@ class GithubRepoLoader extends base_js_1.BaseDocumentLoader {
|
|
|
254
432
|
}
|
|
255
433
|
/**
|
|
256
434
|
* Fetches the files from a GitHub repository.
|
|
435
|
+
* If the path denotes a single file, the resulting array contains only one element.
|
|
257
436
|
* @param path The path of the repository to fetch the files from.
|
|
258
437
|
* @returns A promise that resolves to an array of GithubFile instances.
|
|
259
438
|
*/
|
|
260
439
|
async fetchRepoFiles(path) {
|
|
261
|
-
const url =
|
|
440
|
+
const url = `${this.apiUrl}/repos/${this.owner}/${this.repo}/contents/${path}?ref=${this.branch}`;
|
|
262
441
|
return this.caller.call(async () => {
|
|
263
|
-
|
|
264
|
-
console.log("Fetching", url);
|
|
265
|
-
}
|
|
442
|
+
this.log(`Fetching ${url}`);
|
|
266
443
|
const response = await fetch(url, { headers: this.headers });
|
|
267
444
|
const data = await response.json();
|
|
268
445
|
if (!response.ok) {
|
|
269
446
|
throw new Error(`Unable to fetch repository files: ${response.status} ${JSON.stringify(data)}`);
|
|
270
447
|
}
|
|
271
|
-
if (
|
|
272
|
-
|
|
448
|
+
if (Array.isArray(data)) {
|
|
449
|
+
return data;
|
|
450
|
+
}
|
|
451
|
+
else {
|
|
452
|
+
return [data];
|
|
273
453
|
}
|
|
274
|
-
return data;
|
|
275
454
|
});
|
|
276
455
|
}
|
|
277
456
|
/**
|
|
@@ -281,9 +460,7 @@ class GithubRepoLoader extends base_js_1.BaseDocumentLoader {
|
|
|
281
460
|
*/
|
|
282
461
|
async fetchFileContent(file) {
|
|
283
462
|
return this.caller.call(async () => {
|
|
284
|
-
|
|
285
|
-
console.log("Fetching", file.download_url);
|
|
286
|
-
}
|
|
463
|
+
this.log(`Fetching ${file.download_url}`);
|
|
287
464
|
const response = await fetch(file.download_url, {
|
|
288
465
|
headers: this.headers,
|
|
289
466
|
});
|
|
@@ -308,5 +485,14 @@ class GithubRepoLoader extends base_js_1.BaseDocumentLoader {
|
|
|
308
485
|
throw new Error(`Unknown unknown handling: ${this.unknown}`);
|
|
309
486
|
}
|
|
310
487
|
}
|
|
488
|
+
/**
|
|
489
|
+
* Logs the given message to the console, if parameter 'verbose' is set to true.
|
|
490
|
+
* @param message the message to be logged.
|
|
491
|
+
*/
|
|
492
|
+
log(message) {
|
|
493
|
+
if (this.verbose) {
|
|
494
|
+
console.log(message);
|
|
495
|
+
}
|
|
496
|
+
}
|
|
311
497
|
}
|
|
312
498
|
exports.GithubRepoLoader = GithubRepoLoader;
|