langchain 0.0.143 → 0.0.144
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/callbacks/handlers/llmonitor.cjs +1 -0
- package/callbacks/handlers/llmonitor.d.ts +1 -0
- package/callbacks/handlers/llmonitor.js +1 -0
- package/dist/agents/mrkl/outputParser.cjs +1 -1
- package/dist/agents/mrkl/outputParser.js +1 -1
- package/dist/base_language/index.cjs +2 -1
- package/dist/base_language/index.d.ts +7 -2
- package/dist/base_language/index.js +2 -1
- package/dist/callbacks/handlers/llmonitor.cjs +223 -0
- package/dist/callbacks/handlers/llmonitor.d.ts +35 -0
- package/dist/callbacks/handlers/llmonitor.js +215 -0
- package/dist/chains/openai_functions/extraction.d.ts +4 -4
- package/dist/chains/openai_functions/openapi.d.ts +3 -3
- package/dist/chains/openai_functions/structured_output.d.ts +5 -4
- package/dist/chains/openai_functions/tagging.d.ts +4 -4
- package/dist/chains/openai_moderation.cjs +1 -0
- package/dist/chains/openai_moderation.js +1 -0
- package/dist/chat_models/base.cjs +4 -3
- package/dist/chat_models/base.d.ts +3 -3
- package/dist/chat_models/base.js +5 -4
- package/dist/chat_models/minimax.d.ts +6 -28
- package/dist/chat_models/openai.d.ts +2 -3
- package/dist/document_loaders/fs/openai_whisper_audio.cjs +32 -0
- package/dist/document_loaders/fs/openai_whisper_audio.d.ts +11 -0
- package/dist/document_loaders/fs/openai_whisper_audio.js +28 -0
- package/dist/document_loaders/web/github.cjs +210 -24
- package/dist/document_loaders/web/github.d.ts +44 -1
- package/dist/document_loaders/web/github.js +210 -24
- package/dist/document_loaders/web/recursive_url.cjs +13 -0
- package/dist/document_loaders/web/recursive_url.js +13 -0
- package/dist/embeddings/hf_transformers.cjs +71 -0
- package/dist/embeddings/hf_transformers.d.ts +29 -0
- package/dist/embeddings/hf_transformers.js +67 -0
- package/dist/experimental/chat_models/anthropic_functions.d.ts +2 -5
- package/dist/load/import_constants.cjs +3 -0
- package/dist/load/import_constants.js +3 -0
- package/dist/prompts/chat.cjs +27 -1
- package/dist/prompts/chat.d.ts +3 -2
- package/dist/prompts/chat.js +28 -2
- package/dist/schema/index.cjs +44 -1
- package/dist/schema/index.d.ts +10 -0
- package/dist/schema/index.js +41 -0
- package/dist/tools/serpapi.cjs +108 -13
- package/dist/tools/serpapi.js +108 -13
- package/dist/vectorstores/redis.cjs +12 -4
- package/dist/vectorstores/redis.d.ts +8 -0
- package/dist/vectorstores/redis.js +12 -4
- package/dist/vectorstores/tigris.cjs +2 -0
- package/dist/vectorstores/tigris.d.ts +2 -3
- package/dist/vectorstores/tigris.js +2 -0
- package/dist/vectorstores/vectara.cjs +30 -12
- package/dist/vectorstores/vectara.d.ts +1 -1
- package/dist/vectorstores/vectara.js +30 -12
- package/document_loaders/fs/openai_whisper_audio.cjs +1 -0
- package/document_loaders/fs/openai_whisper_audio.d.ts +1 -0
- package/document_loaders/fs/openai_whisper_audio.js +1 -0
- package/embeddings/hf_transformers.cjs +1 -0
- package/embeddings/hf_transformers.d.ts +1 -0
- package/embeddings/hf_transformers.js +1 -0
- package/package.json +36 -6
|
@@ -29,8 +29,22 @@ export interface GithubFile {
|
|
|
29
29
|
* properties specific to the GitHub repository loader.
|
|
30
30
|
*/
|
|
31
31
|
export interface GithubRepoLoaderParams extends AsyncCallerParams {
|
|
32
|
+
/**
|
|
33
|
+
* The base URL of the GitHub instance.
|
|
34
|
+
* To be used when you are not targeting github.com, e.g. a GitHub Enterprise instance.
|
|
35
|
+
*/
|
|
36
|
+
baseUrl?: string;
|
|
37
|
+
/**
|
|
38
|
+
* The API endpoint URL of the GitHub instance.
|
|
39
|
+
* To be used when you are not targeting github.com, e.g. a GitHub Enterprise instance.
|
|
40
|
+
*/
|
|
41
|
+
apiUrl?: string;
|
|
32
42
|
branch?: string;
|
|
33
43
|
recursive?: boolean;
|
|
44
|
+
/**
|
|
45
|
+
* Set to true to recursively process submodules. Is only effective, when recursive=true.
|
|
46
|
+
*/
|
|
47
|
+
processSubmodules?: boolean;
|
|
34
48
|
unknown?: UnknownHandling;
|
|
35
49
|
accessToken?: string;
|
|
36
50
|
ignoreFiles?: (string | RegExp)[];
|
|
@@ -52,19 +66,26 @@ export interface GithubRepoLoaderParams extends AsyncCallerParams {
|
|
|
52
66
|
* loading files from a GitHub repository.
|
|
53
67
|
*/
|
|
54
68
|
export declare class GithubRepoLoader extends BaseDocumentLoader implements GithubRepoLoaderParams {
|
|
69
|
+
baseUrl: string;
|
|
70
|
+
apiUrl: string;
|
|
55
71
|
private readonly owner;
|
|
56
72
|
private readonly repo;
|
|
57
73
|
private readonly initialPath;
|
|
58
74
|
private headers;
|
|
59
75
|
branch: string;
|
|
60
76
|
recursive: boolean;
|
|
77
|
+
processSubmodules: boolean;
|
|
61
78
|
unknown: UnknownHandling;
|
|
62
79
|
accessToken?: string;
|
|
63
80
|
ignoreFiles: (string | RegExp)[];
|
|
64
81
|
ignore?: Ignore;
|
|
65
82
|
verbose?: boolean;
|
|
83
|
+
maxConcurrency?: number;
|
|
84
|
+
maxRetries?: number;
|
|
66
85
|
protected caller: AsyncCaller;
|
|
67
|
-
|
|
86
|
+
ignorePaths?: string[];
|
|
87
|
+
private submoduleInfos;
|
|
88
|
+
constructor(githubUrl: string, { accessToken, baseUrl, apiUrl, branch, recursive, processSubmodules, unknown, ignoreFiles, ignorePaths, verbose, maxConcurrency, maxRetries, ...rest }?: GithubRepoLoaderParams);
|
|
68
89
|
/**
|
|
69
90
|
* Extracts the owner, repository, and path from a GitHub URL.
|
|
70
91
|
* @param url The GitHub URL to extract information from.
|
|
@@ -78,6 +99,22 @@ export declare class GithubRepoLoader extends BaseDocumentLoader implements Gith
|
|
|
78
99
|
* @returns A promise that resolves to an array of Document instances.
|
|
79
100
|
*/
|
|
80
101
|
load(): Promise<Document[]>;
|
|
102
|
+
/**
|
|
103
|
+
* Loads the information about Git submodules from the repository, if available.
|
|
104
|
+
*/
|
|
105
|
+
private getSubmoduleInfo;
|
|
106
|
+
/**
|
|
107
|
+
* Parses the given content of a .gitmodules file. Furthermore, queries the current SHA ref of all submodules.
|
|
108
|
+
* Returns the submodule information as array.
|
|
109
|
+
* @param gitmodulesContent the content of a .gitmodules file
|
|
110
|
+
*/
|
|
111
|
+
private parseGitmodules;
|
|
112
|
+
/**
|
|
113
|
+
* Loads the documents of the given submodule. Uses the same parameters as for the current repository.
|
|
114
|
+
* External submodules, i.e. submodules pointing to another GitHub instance, are ignored.
|
|
115
|
+
* @param submoduleInfo the info about the submodule to be loaded
|
|
116
|
+
*/
|
|
117
|
+
private loadSubmodule;
|
|
81
118
|
/**
|
|
82
119
|
* Determines whether a file or directory should be ignored based on its
|
|
83
120
|
* path and type.
|
|
@@ -109,6 +146,7 @@ export declare class GithubRepoLoader extends BaseDocumentLoader implements Gith
|
|
|
109
146
|
private processDirectory;
|
|
110
147
|
/**
|
|
111
148
|
* Fetches the files from a GitHub repository.
|
|
149
|
+
* If the path denotes a single file, the resulting array contains only one element.
|
|
112
150
|
* @param path The path of the repository to fetch the files from.
|
|
113
151
|
* @returns A promise that resolves to an array of GithubFile instances.
|
|
114
152
|
*/
|
|
@@ -125,4 +163,9 @@ export declare class GithubRepoLoader extends BaseDocumentLoader implements Gith
|
|
|
125
163
|
* @returns void
|
|
126
164
|
*/
|
|
127
165
|
private handleError;
|
|
166
|
+
/**
|
|
167
|
+
* Logs the given message to the console, if parameter 'verbose' is set to true.
|
|
168
|
+
* @param message the message to be logged.
|
|
169
|
+
*/
|
|
170
|
+
private log;
|
|
128
171
|
}
|
|
@@ -22,8 +22,20 @@ function isBinaryPath(name) {
|
|
|
22
22
|
* loading files from a GitHub repository.
|
|
23
23
|
*/
|
|
24
24
|
export class GithubRepoLoader extends BaseDocumentLoader {
|
|
25
|
-
constructor(githubUrl, { accessToken = getEnvironmentVariable("GITHUB_ACCESS_TOKEN"), branch = "main", recursive = true, unknown = UnknownHandling.Warn, ignoreFiles = [], ignorePaths, verbose = false, maxConcurrency = 2, maxRetries = 2, ...rest } = {}) {
|
|
25
|
+
constructor(githubUrl, { accessToken = getEnvironmentVariable("GITHUB_ACCESS_TOKEN"), baseUrl = "https://github.com", apiUrl = "https://api.github.com", branch = "main", recursive = true, processSubmodules = false, unknown = UnknownHandling.Warn, ignoreFiles = [], ignorePaths, verbose = false, maxConcurrency = 2, maxRetries = 2, ...rest } = {}) {
|
|
26
26
|
super();
|
|
27
|
+
Object.defineProperty(this, "baseUrl", {
|
|
28
|
+
enumerable: true,
|
|
29
|
+
configurable: true,
|
|
30
|
+
writable: true,
|
|
31
|
+
value: void 0
|
|
32
|
+
});
|
|
33
|
+
Object.defineProperty(this, "apiUrl", {
|
|
34
|
+
enumerable: true,
|
|
35
|
+
configurable: true,
|
|
36
|
+
writable: true,
|
|
37
|
+
value: void 0
|
|
38
|
+
});
|
|
27
39
|
Object.defineProperty(this, "owner", {
|
|
28
40
|
enumerable: true,
|
|
29
41
|
configurable: true,
|
|
@@ -60,6 +72,12 @@ export class GithubRepoLoader extends BaseDocumentLoader {
|
|
|
60
72
|
writable: true,
|
|
61
73
|
value: void 0
|
|
62
74
|
});
|
|
75
|
+
Object.defineProperty(this, "processSubmodules", {
|
|
76
|
+
enumerable: true,
|
|
77
|
+
configurable: true,
|
|
78
|
+
writable: true,
|
|
79
|
+
value: void 0
|
|
80
|
+
});
|
|
63
81
|
Object.defineProperty(this, "unknown", {
|
|
64
82
|
enumerable: true,
|
|
65
83
|
configurable: true,
|
|
@@ -90,22 +108,55 @@ export class GithubRepoLoader extends BaseDocumentLoader {
|
|
|
90
108
|
writable: true,
|
|
91
109
|
value: void 0
|
|
92
110
|
});
|
|
111
|
+
Object.defineProperty(this, "maxConcurrency", {
|
|
112
|
+
enumerable: true,
|
|
113
|
+
configurable: true,
|
|
114
|
+
writable: true,
|
|
115
|
+
value: void 0
|
|
116
|
+
});
|
|
117
|
+
Object.defineProperty(this, "maxRetries", {
|
|
118
|
+
enumerable: true,
|
|
119
|
+
configurable: true,
|
|
120
|
+
writable: true,
|
|
121
|
+
value: void 0
|
|
122
|
+
});
|
|
93
123
|
Object.defineProperty(this, "caller", {
|
|
94
124
|
enumerable: true,
|
|
95
125
|
configurable: true,
|
|
96
126
|
writable: true,
|
|
97
127
|
value: void 0
|
|
98
128
|
});
|
|
129
|
+
Object.defineProperty(this, "ignorePaths", {
|
|
130
|
+
enumerable: true,
|
|
131
|
+
configurable: true,
|
|
132
|
+
writable: true,
|
|
133
|
+
value: void 0
|
|
134
|
+
});
|
|
135
|
+
Object.defineProperty(this, "submoduleInfos", {
|
|
136
|
+
enumerable: true,
|
|
137
|
+
configurable: true,
|
|
138
|
+
writable: true,
|
|
139
|
+
value: void 0
|
|
140
|
+
});
|
|
141
|
+
this.baseUrl = baseUrl;
|
|
142
|
+
this.apiUrl = apiUrl;
|
|
99
143
|
const { owner, repo, path } = this.extractOwnerAndRepoAndPath(githubUrl);
|
|
100
144
|
this.owner = owner;
|
|
101
145
|
this.repo = repo;
|
|
102
146
|
this.initialPath = path;
|
|
103
147
|
this.branch = branch;
|
|
104
148
|
this.recursive = recursive;
|
|
149
|
+
// processing submodules without processing contents of other directories makes no sense
|
|
150
|
+
if (processSubmodules && !recursive) {
|
|
151
|
+
throw new Error(`Input property "recursive" must be true if "processSubmodules" is true.`);
|
|
152
|
+
}
|
|
153
|
+
this.processSubmodules = processSubmodules;
|
|
105
154
|
this.unknown = unknown;
|
|
106
155
|
this.accessToken = accessToken;
|
|
107
156
|
this.ignoreFiles = ignoreFiles;
|
|
108
157
|
this.verbose = verbose;
|
|
158
|
+
this.maxConcurrency = maxConcurrency;
|
|
159
|
+
this.maxRetries = maxRetries;
|
|
109
160
|
this.headers = {
|
|
110
161
|
"User-Agent": "langchain",
|
|
111
162
|
};
|
|
@@ -114,6 +165,7 @@ export class GithubRepoLoader extends BaseDocumentLoader {
|
|
|
114
165
|
maxRetries,
|
|
115
166
|
...rest,
|
|
116
167
|
});
|
|
168
|
+
this.ignorePaths = ignorePaths;
|
|
117
169
|
if (ignorePaths) {
|
|
118
170
|
this.ignore = ignore.default().add(ignorePaths);
|
|
119
171
|
}
|
|
@@ -130,7 +182,7 @@ export class GithubRepoLoader extends BaseDocumentLoader {
|
|
|
130
182
|
* @returns An object containing the owner, repository, and path extracted from the GitHub URL.
|
|
131
183
|
*/
|
|
132
184
|
extractOwnerAndRepoAndPath(url) {
|
|
133
|
-
const match = url.match(/
|
|
185
|
+
const match = url.match(new RegExp(`${this.baseUrl}/([^/]+)/([^/]+)(/tree/[^/]+/(.+))?`, "i"));
|
|
134
186
|
if (!match) {
|
|
135
187
|
throw new Error("Invalid GitHub URL format.");
|
|
136
188
|
}
|
|
@@ -143,10 +195,127 @@ export class GithubRepoLoader extends BaseDocumentLoader {
|
|
|
143
195
|
* @returns A promise that resolves to an array of Document instances.
|
|
144
196
|
*/
|
|
145
197
|
async load() {
|
|
146
|
-
|
|
198
|
+
this.log(`Loading documents from ${this.baseUrl}/${this.owner}/${this.repo}/${this.initialPath}...`);
|
|
199
|
+
// process repository without submodules
|
|
200
|
+
const documents = (await this.processRepo()).map((fileResponse) => new Document({
|
|
147
201
|
pageContent: fileResponse.contents,
|
|
148
202
|
metadata: fileResponse.metadata,
|
|
149
203
|
}));
|
|
204
|
+
if (this.processSubmodules) {
|
|
205
|
+
// process submodules
|
|
206
|
+
await this.getSubmoduleInfo();
|
|
207
|
+
for (const submoduleInfo of this.submoduleInfos) {
|
|
208
|
+
documents.push(...(await this.loadSubmodule(submoduleInfo)));
|
|
209
|
+
}
|
|
210
|
+
}
|
|
211
|
+
return documents;
|
|
212
|
+
}
|
|
213
|
+
/**
|
|
214
|
+
* Loads the information about Git submodules from the repository, if available.
|
|
215
|
+
*/
|
|
216
|
+
async getSubmoduleInfo() {
|
|
217
|
+
this.log("Loading info about submodules...");
|
|
218
|
+
// we have to fetch the files of the root directory to get the download url of the .gitmodules file
|
|
219
|
+
// however, we cannot reuse the files retrieved in processRepo() as initialPath may be != ""
|
|
220
|
+
// so it may be that we end up fetching this file list twice
|
|
221
|
+
const repoFiles = await this.fetchRepoFiles("");
|
|
222
|
+
const gitmodulesFile = repoFiles.filter(({ name }) => name === ".gitmodules")?.[0];
|
|
223
|
+
if (gitmodulesFile) {
|
|
224
|
+
const gitmodulesContent = await this.fetchFileContent({
|
|
225
|
+
download_url: gitmodulesFile.download_url,
|
|
226
|
+
});
|
|
227
|
+
this.submoduleInfos = await this.parseGitmodules(gitmodulesContent);
|
|
228
|
+
}
|
|
229
|
+
else {
|
|
230
|
+
this.submoduleInfos = [];
|
|
231
|
+
}
|
|
232
|
+
this.log(`Found ${this.submoduleInfos.length} submodules:`);
|
|
233
|
+
for (const submoduleInfo of this.submoduleInfos) {
|
|
234
|
+
this.log(JSON.stringify(submoduleInfo));
|
|
235
|
+
}
|
|
236
|
+
}
|
|
237
|
+
/**
|
|
238
|
+
* Parses the given content of a .gitmodules file. Furthermore, queries the current SHA ref of all submodules.
|
|
239
|
+
* Returns the submodule information as array.
|
|
240
|
+
* @param gitmodulesContent the content of a .gitmodules file
|
|
241
|
+
*/
|
|
242
|
+
async parseGitmodules(gitmodulesContent) {
|
|
243
|
+
// catches the initial line of submodule entries
|
|
244
|
+
const submodulePattern = /\[submodule "(.*?)"]\n((\s+.*?\s*=\s*.*?\n)*)/g;
|
|
245
|
+
// catches the properties of a submodule
|
|
246
|
+
const keyValuePattern = /\s+(.*?)\s*=\s*(.*?)\s/g;
|
|
247
|
+
const submoduleInfos = [];
|
|
248
|
+
for (const [, name, propertyLines] of gitmodulesContent.matchAll(submodulePattern)) {
|
|
249
|
+
if (!name || !propertyLines) {
|
|
250
|
+
throw new Error("Could not parse submodule entry");
|
|
251
|
+
}
|
|
252
|
+
const submodulePropertyLines = propertyLines.matchAll(keyValuePattern);
|
|
253
|
+
let path;
|
|
254
|
+
let url;
|
|
255
|
+
for (const [, key, value] of submodulePropertyLines) {
|
|
256
|
+
if (!key || !value) {
|
|
257
|
+
throw new Error(`Could not parse key/value pairs for submodule ${name}`);
|
|
258
|
+
}
|
|
259
|
+
switch (key) {
|
|
260
|
+
case "path":
|
|
261
|
+
path = value;
|
|
262
|
+
break;
|
|
263
|
+
case "url":
|
|
264
|
+
url = value;
|
|
265
|
+
if (url.endsWith(".git")) {
|
|
266
|
+
url = url.substring(0, url.length - 4);
|
|
267
|
+
}
|
|
268
|
+
break;
|
|
269
|
+
default:
|
|
270
|
+
// ignoring unused keys
|
|
271
|
+
}
|
|
272
|
+
}
|
|
273
|
+
if (!path || !url) {
|
|
274
|
+
throw new Error(`Missing properties for submodule ${name}`);
|
|
275
|
+
}
|
|
276
|
+
// fetch the current ref of the submodule
|
|
277
|
+
const files = await this.fetchRepoFiles(path);
|
|
278
|
+
const submoduleInfo = {
|
|
279
|
+
name,
|
|
280
|
+
path,
|
|
281
|
+
url,
|
|
282
|
+
ref: files[0].sha,
|
|
283
|
+
};
|
|
284
|
+
submoduleInfos.push(submoduleInfo);
|
|
285
|
+
}
|
|
286
|
+
return submoduleInfos;
|
|
287
|
+
}
|
|
288
|
+
/**
|
|
289
|
+
* Loads the documents of the given submodule. Uses the same parameters as for the current repository.
|
|
290
|
+
* External submodules, i.e. submodules pointing to another GitHub instance, are ignored.
|
|
291
|
+
* @param submoduleInfo the info about the submodule to be loaded
|
|
292
|
+
*/
|
|
293
|
+
async loadSubmodule(submoduleInfo) {
|
|
294
|
+
if (!submoduleInfo.url.startsWith(this.baseUrl)) {
|
|
295
|
+
this.log(`Ignoring external submodule ${submoduleInfo.url}.`);
|
|
296
|
+
return [];
|
|
297
|
+
}
|
|
298
|
+
else if (!submoduleInfo.path.startsWith(this.initialPath)) {
|
|
299
|
+
this.log(`Ignoring submodule ${submoduleInfo.url}, as it is not on initial path.`);
|
|
300
|
+
return [];
|
|
301
|
+
}
|
|
302
|
+
else {
|
|
303
|
+
this.log(`Accessing submodule ${submoduleInfo.name} (${submoduleInfo.url})...`);
|
|
304
|
+
return new GithubRepoLoader(submoduleInfo.url, {
|
|
305
|
+
accessToken: this.accessToken,
|
|
306
|
+
apiUrl: this.apiUrl,
|
|
307
|
+
baseUrl: this.baseUrl,
|
|
308
|
+
branch: submoduleInfo.ref,
|
|
309
|
+
recursive: this.recursive,
|
|
310
|
+
processSubmodules: this.processSubmodules,
|
|
311
|
+
unknown: this.unknown,
|
|
312
|
+
ignoreFiles: this.ignoreFiles,
|
|
313
|
+
ignorePaths: this.ignorePaths,
|
|
314
|
+
verbose: this.verbose,
|
|
315
|
+
maxConcurrency: this.maxConcurrency,
|
|
316
|
+
maxRetries: this.maxRetries,
|
|
317
|
+
}).load();
|
|
318
|
+
}
|
|
150
319
|
}
|
|
151
320
|
/**
|
|
152
321
|
* Determines whether a file or directory should be ignored based on its
|
|
@@ -186,7 +355,11 @@ export class GithubRepoLoader extends BaseDocumentLoader {
|
|
|
186
355
|
});
|
|
187
356
|
return {
|
|
188
357
|
contents: fileContent || "",
|
|
189
|
-
metadata: {
|
|
358
|
+
metadata: {
|
|
359
|
+
source: file.path,
|
|
360
|
+
repository: `${this.baseUrl}/${this.owner}/${this.repo}`,
|
|
361
|
+
branch: this.branch,
|
|
362
|
+
},
|
|
190
363
|
};
|
|
191
364
|
}
|
|
192
365
|
/**
|
|
@@ -197,19 +370,24 @@ export class GithubRepoLoader extends BaseDocumentLoader {
|
|
|
197
370
|
// Directories have nested files / directories, which is why this is a list of promises of promises
|
|
198
371
|
const currentDirectoryDirectoryPromises = [];
|
|
199
372
|
for (const file of files) {
|
|
200
|
-
if (
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
373
|
+
if (this.shouldIgnore(file.path, file.type)) {
|
|
374
|
+
continue;
|
|
375
|
+
}
|
|
376
|
+
if (file.type === "file" && file.size === 0) {
|
|
377
|
+
// this is a submodule. ignoring for the moment. submodule processing is done separately
|
|
378
|
+
continue;
|
|
379
|
+
}
|
|
380
|
+
if (file.type !== "dir") {
|
|
381
|
+
try {
|
|
382
|
+
currentDirectoryFilePromises.push(this.fetchFileContentWrapper(file));
|
|
208
383
|
}
|
|
209
|
-
|
|
210
|
-
|
|
384
|
+
catch (e) {
|
|
385
|
+
this.handleError(`Failed to fetch file content: ${file.path}, ${e}`);
|
|
211
386
|
}
|
|
212
387
|
}
|
|
388
|
+
else if (this.recursive) {
|
|
389
|
+
currentDirectoryDirectoryPromises.push(this.processDirectory(file.path));
|
|
390
|
+
}
|
|
213
391
|
}
|
|
214
392
|
const curDirDirectories = await Promise.all(currentDirectoryDirectoryPromises);
|
|
215
393
|
return [...currentDirectoryFilePromises, ...curDirDirectories.flat()];
|
|
@@ -248,24 +426,25 @@ export class GithubRepoLoader extends BaseDocumentLoader {
|
|
|
248
426
|
}
|
|
249
427
|
/**
|
|
250
428
|
* Fetches the files from a GitHub repository.
|
|
429
|
+
* If the path denotes a single file, the resulting array contains only one element.
|
|
251
430
|
* @param path The path of the repository to fetch the files from.
|
|
252
431
|
* @returns A promise that resolves to an array of GithubFile instances.
|
|
253
432
|
*/
|
|
254
433
|
async fetchRepoFiles(path) {
|
|
255
|
-
const url =
|
|
434
|
+
const url = `${this.apiUrl}/repos/${this.owner}/${this.repo}/contents/${path}?ref=${this.branch}`;
|
|
256
435
|
return this.caller.call(async () => {
|
|
257
|
-
|
|
258
|
-
console.log("Fetching", url);
|
|
259
|
-
}
|
|
436
|
+
this.log(`Fetching ${url}`);
|
|
260
437
|
const response = await fetch(url, { headers: this.headers });
|
|
261
438
|
const data = await response.json();
|
|
262
439
|
if (!response.ok) {
|
|
263
440
|
throw new Error(`Unable to fetch repository files: ${response.status} ${JSON.stringify(data)}`);
|
|
264
441
|
}
|
|
265
|
-
if (
|
|
266
|
-
|
|
442
|
+
if (Array.isArray(data)) {
|
|
443
|
+
return data;
|
|
444
|
+
}
|
|
445
|
+
else {
|
|
446
|
+
return [data];
|
|
267
447
|
}
|
|
268
|
-
return data;
|
|
269
448
|
});
|
|
270
449
|
}
|
|
271
450
|
/**
|
|
@@ -275,9 +454,7 @@ export class GithubRepoLoader extends BaseDocumentLoader {
|
|
|
275
454
|
*/
|
|
276
455
|
async fetchFileContent(file) {
|
|
277
456
|
return this.caller.call(async () => {
|
|
278
|
-
|
|
279
|
-
console.log("Fetching", file.download_url);
|
|
280
|
-
}
|
|
457
|
+
this.log(`Fetching ${file.download_url}`);
|
|
281
458
|
const response = await fetch(file.download_url, {
|
|
282
459
|
headers: this.headers,
|
|
283
460
|
});
|
|
@@ -302,4 +479,13 @@ export class GithubRepoLoader extends BaseDocumentLoader {
|
|
|
302
479
|
throw new Error(`Unknown unknown handling: ${this.unknown}`);
|
|
303
480
|
}
|
|
304
481
|
}
|
|
482
|
+
/**
|
|
483
|
+
* Logs the given message to the console, if parameter 'verbose' is set to true.
|
|
484
|
+
* @param message the message to be logged.
|
|
485
|
+
*/
|
|
486
|
+
log(message) {
|
|
487
|
+
if (this.verbose) {
|
|
488
|
+
console.log(message);
|
|
489
|
+
}
|
|
490
|
+
}
|
|
305
491
|
}
|
|
@@ -84,6 +84,19 @@ class RecursiveUrlLoader extends base_js_1.BaseDocumentLoader {
|
|
|
84
84
|
if (invalidPrefixes.some((prefix) => link.startsWith(prefix)) ||
|
|
85
85
|
invalidSuffixes.some((suffix) => link.endsWith(suffix)))
|
|
86
86
|
continue;
|
|
87
|
+
let standardizedLink;
|
|
88
|
+
if (link.startsWith("http")) {
|
|
89
|
+
standardizedLink = link;
|
|
90
|
+
}
|
|
91
|
+
else if (link.startsWith("//")) {
|
|
92
|
+
const base = new URL(baseUrl);
|
|
93
|
+
standardizedLink = base.protocol + link;
|
|
94
|
+
}
|
|
95
|
+
else {
|
|
96
|
+
standardizedLink = new URL(link, baseUrl).href;
|
|
97
|
+
}
|
|
98
|
+
if (this.excludeDirs.some((exDir) => standardizedLink.startsWith(exDir)))
|
|
99
|
+
continue;
|
|
87
100
|
if (link.startsWith("http")) {
|
|
88
101
|
const isAllowed = !this.preventOutside || link.startsWith(baseUrl);
|
|
89
102
|
if (isAllowed)
|
|
@@ -81,6 +81,19 @@ export class RecursiveUrlLoader extends BaseDocumentLoader {
|
|
|
81
81
|
if (invalidPrefixes.some((prefix) => link.startsWith(prefix)) ||
|
|
82
82
|
invalidSuffixes.some((suffix) => link.endsWith(suffix)))
|
|
83
83
|
continue;
|
|
84
|
+
let standardizedLink;
|
|
85
|
+
if (link.startsWith("http")) {
|
|
86
|
+
standardizedLink = link;
|
|
87
|
+
}
|
|
88
|
+
else if (link.startsWith("//")) {
|
|
89
|
+
const base = new URL(baseUrl);
|
|
90
|
+
standardizedLink = base.protocol + link;
|
|
91
|
+
}
|
|
92
|
+
else {
|
|
93
|
+
standardizedLink = new URL(link, baseUrl).href;
|
|
94
|
+
}
|
|
95
|
+
if (this.excludeDirs.some((exDir) => standardizedLink.startsWith(exDir)))
|
|
96
|
+
continue;
|
|
84
97
|
if (link.startsWith("http")) {
|
|
85
98
|
const isAllowed = !this.preventOutside || link.startsWith(baseUrl);
|
|
86
99
|
if (isAllowed)
|
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.HuggingFaceTransformersEmbeddings = void 0;
|
|
4
|
+
const transformers_1 = require("@xenova/transformers");
|
|
5
|
+
const chunk_js_1 = require("../util/chunk.cjs");
|
|
6
|
+
const base_js_1 = require("./base.cjs");
|
|
7
|
+
class HuggingFaceTransformersEmbeddings extends base_js_1.Embeddings {
|
|
8
|
+
constructor(fields) {
|
|
9
|
+
super(fields ?? {});
|
|
10
|
+
Object.defineProperty(this, "modelName", {
|
|
11
|
+
enumerable: true,
|
|
12
|
+
configurable: true,
|
|
13
|
+
writable: true,
|
|
14
|
+
value: "Xenova/all-MiniLM-L6-v2"
|
|
15
|
+
});
|
|
16
|
+
Object.defineProperty(this, "batchSize", {
|
|
17
|
+
enumerable: true,
|
|
18
|
+
configurable: true,
|
|
19
|
+
writable: true,
|
|
20
|
+
value: 512
|
|
21
|
+
});
|
|
22
|
+
Object.defineProperty(this, "stripNewLines", {
|
|
23
|
+
enumerable: true,
|
|
24
|
+
configurable: true,
|
|
25
|
+
writable: true,
|
|
26
|
+
value: true
|
|
27
|
+
});
|
|
28
|
+
Object.defineProperty(this, "timeout", {
|
|
29
|
+
enumerable: true,
|
|
30
|
+
configurable: true,
|
|
31
|
+
writable: true,
|
|
32
|
+
value: void 0
|
|
33
|
+
});
|
|
34
|
+
Object.defineProperty(this, "pipelinePromise", {
|
|
35
|
+
enumerable: true,
|
|
36
|
+
configurable: true,
|
|
37
|
+
writable: true,
|
|
38
|
+
value: void 0
|
|
39
|
+
});
|
|
40
|
+
this.modelName = fields?.modelName ?? this.modelName;
|
|
41
|
+
this.stripNewLines = fields?.stripNewLines ?? this.stripNewLines;
|
|
42
|
+
this.timeout = fields?.timeout;
|
|
43
|
+
}
|
|
44
|
+
async embedDocuments(texts) {
|
|
45
|
+
const batches = (0, chunk_js_1.chunkArray)(this.stripNewLines ? texts.map((t) => t.replace(/\n/g, " ")) : texts, this.batchSize);
|
|
46
|
+
const batchRequests = batches.map((batch) => this.runEmbedding(batch));
|
|
47
|
+
const batchResponses = await Promise.all(batchRequests);
|
|
48
|
+
const embeddings = [];
|
|
49
|
+
for (let i = 0; i < batchResponses.length; i += 1) {
|
|
50
|
+
const batchResponse = batchResponses[i];
|
|
51
|
+
for (let j = 0; j < batchResponse.length; j += 1) {
|
|
52
|
+
embeddings.push(batchResponse[j]);
|
|
53
|
+
}
|
|
54
|
+
}
|
|
55
|
+
return embeddings;
|
|
56
|
+
}
|
|
57
|
+
async embedQuery(text) {
|
|
58
|
+
const data = await this.runEmbedding([
|
|
59
|
+
this.stripNewLines ? text.replace(/\n/g, " ") : text,
|
|
60
|
+
]);
|
|
61
|
+
return data[0];
|
|
62
|
+
}
|
|
63
|
+
async runEmbedding(texts) {
|
|
64
|
+
const pipe = await (this.pipelinePromise ??= (0, transformers_1.pipeline)("feature-extraction", this.modelName));
|
|
65
|
+
return this.caller.call(async () => {
|
|
66
|
+
const output = await pipe(texts, { pooling: "mean", normalize: true });
|
|
67
|
+
return output.tolist();
|
|
68
|
+
});
|
|
69
|
+
}
|
|
70
|
+
}
|
|
71
|
+
exports.HuggingFaceTransformersEmbeddings = HuggingFaceTransformersEmbeddings;
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
import { Embeddings, EmbeddingsParams } from "./base.js";
|
|
2
|
+
export interface HuggingFaceTransformersEmbeddingsParams extends EmbeddingsParams {
|
|
3
|
+
/** Model name to use */
|
|
4
|
+
modelName: string;
|
|
5
|
+
/**
|
|
6
|
+
* Timeout to use when making requests to OpenAI.
|
|
7
|
+
*/
|
|
8
|
+
timeout?: number;
|
|
9
|
+
/**
|
|
10
|
+
* The maximum number of documents to embed in a single request.
|
|
11
|
+
*/
|
|
12
|
+
batchSize?: number;
|
|
13
|
+
/**
|
|
14
|
+
* Whether to strip new lines from the input text. This is recommended by
|
|
15
|
+
* OpenAI, but may not be suitable for all use cases.
|
|
16
|
+
*/
|
|
17
|
+
stripNewLines?: boolean;
|
|
18
|
+
}
|
|
19
|
+
export declare class HuggingFaceTransformersEmbeddings extends Embeddings implements HuggingFaceTransformersEmbeddingsParams {
|
|
20
|
+
modelName: string;
|
|
21
|
+
batchSize: number;
|
|
22
|
+
stripNewLines: boolean;
|
|
23
|
+
timeout?: number;
|
|
24
|
+
private pipelinePromise;
|
|
25
|
+
constructor(fields?: Partial<HuggingFaceTransformersEmbeddingsParams>);
|
|
26
|
+
embedDocuments(texts: string[]): Promise<number[][]>;
|
|
27
|
+
embedQuery(text: string): Promise<number[]>;
|
|
28
|
+
private runEmbedding;
|
|
29
|
+
}
|
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
import { pipeline } from "@xenova/transformers";
|
|
2
|
+
import { chunkArray } from "../util/chunk.js";
|
|
3
|
+
import { Embeddings } from "./base.js";
|
|
4
|
+
export class HuggingFaceTransformersEmbeddings extends Embeddings {
|
|
5
|
+
constructor(fields) {
|
|
6
|
+
super(fields ?? {});
|
|
7
|
+
Object.defineProperty(this, "modelName", {
|
|
8
|
+
enumerable: true,
|
|
9
|
+
configurable: true,
|
|
10
|
+
writable: true,
|
|
11
|
+
value: "Xenova/all-MiniLM-L6-v2"
|
|
12
|
+
});
|
|
13
|
+
Object.defineProperty(this, "batchSize", {
|
|
14
|
+
enumerable: true,
|
|
15
|
+
configurable: true,
|
|
16
|
+
writable: true,
|
|
17
|
+
value: 512
|
|
18
|
+
});
|
|
19
|
+
Object.defineProperty(this, "stripNewLines", {
|
|
20
|
+
enumerable: true,
|
|
21
|
+
configurable: true,
|
|
22
|
+
writable: true,
|
|
23
|
+
value: true
|
|
24
|
+
});
|
|
25
|
+
Object.defineProperty(this, "timeout", {
|
|
26
|
+
enumerable: true,
|
|
27
|
+
configurable: true,
|
|
28
|
+
writable: true,
|
|
29
|
+
value: void 0
|
|
30
|
+
});
|
|
31
|
+
Object.defineProperty(this, "pipelinePromise", {
|
|
32
|
+
enumerable: true,
|
|
33
|
+
configurable: true,
|
|
34
|
+
writable: true,
|
|
35
|
+
value: void 0
|
|
36
|
+
});
|
|
37
|
+
this.modelName = fields?.modelName ?? this.modelName;
|
|
38
|
+
this.stripNewLines = fields?.stripNewLines ?? this.stripNewLines;
|
|
39
|
+
this.timeout = fields?.timeout;
|
|
40
|
+
}
|
|
41
|
+
async embedDocuments(texts) {
|
|
42
|
+
const batches = chunkArray(this.stripNewLines ? texts.map((t) => t.replace(/\n/g, " ")) : texts, this.batchSize);
|
|
43
|
+
const batchRequests = batches.map((batch) => this.runEmbedding(batch));
|
|
44
|
+
const batchResponses = await Promise.all(batchRequests);
|
|
45
|
+
const embeddings = [];
|
|
46
|
+
for (let i = 0; i < batchResponses.length; i += 1) {
|
|
47
|
+
const batchResponse = batchResponses[i];
|
|
48
|
+
for (let j = 0; j < batchResponse.length; j += 1) {
|
|
49
|
+
embeddings.push(batchResponse[j]);
|
|
50
|
+
}
|
|
51
|
+
}
|
|
52
|
+
return embeddings;
|
|
53
|
+
}
|
|
54
|
+
async embedQuery(text) {
|
|
55
|
+
const data = await this.runEmbedding([
|
|
56
|
+
this.stripNewLines ? text.replace(/\n/g, " ") : text,
|
|
57
|
+
]);
|
|
58
|
+
return data[0];
|
|
59
|
+
}
|
|
60
|
+
async runEmbedding(texts) {
|
|
61
|
+
const pipe = await (this.pipelinePromise ??= pipeline("feature-extraction", this.modelName));
|
|
62
|
+
return this.caller.call(async () => {
|
|
63
|
+
const output = await pipe(texts, { pooling: "mean", normalize: true });
|
|
64
|
+
return output.tolist();
|
|
65
|
+
});
|
|
66
|
+
}
|
|
67
|
+
}
|
|
@@ -1,13 +1,10 @@
|
|
|
1
|
-
import type { OpenAI as OpenAIClient } from "openai";
|
|
2
1
|
import { BaseChatModelParams } from "../../chat_models/base.js";
|
|
3
2
|
import { CallbackManagerForLLMRun } from "../../callbacks/manager.js";
|
|
4
3
|
import { BaseMessage, ChatResult } from "../../schema/index.js";
|
|
5
4
|
import { ChatAnthropic, type AnthropicInput } from "../../chat_models/anthropic.js";
|
|
6
|
-
import {
|
|
5
|
+
import { BaseFunctionCallOptions } from "../../base_language/index.js";
|
|
7
6
|
import { StructuredTool } from "../../tools/base.js";
|
|
8
|
-
export interface ChatAnthropicFunctionsCallOptions extends
|
|
9
|
-
function_call?: OpenAIClient.Chat.ChatCompletionCreateParams.FunctionCallOption;
|
|
10
|
-
functions?: OpenAIClient.Chat.ChatCompletionCreateParams.Function[];
|
|
7
|
+
export interface ChatAnthropicFunctionsCallOptions extends BaseFunctionCallOptions {
|
|
11
8
|
tools?: StructuredTool[];
|
|
12
9
|
}
|
|
13
10
|
export declare class AnthropicFunctions extends ChatAnthropic<ChatAnthropicFunctionsCallOptions> {
|
|
@@ -18,6 +18,7 @@ exports.optionalImportEntrypoints = [
|
|
|
18
18
|
"langchain/embeddings/cohere",
|
|
19
19
|
"langchain/embeddings/tensorflow",
|
|
20
20
|
"langchain/embeddings/hf",
|
|
21
|
+
"langchain/embeddings/hf_transformers",
|
|
21
22
|
"langchain/embeddings/googlevertexai",
|
|
22
23
|
"langchain/embeddings/googlepalm",
|
|
23
24
|
"langchain/llms/load",
|
|
@@ -87,11 +88,13 @@ exports.optionalImportEntrypoints = [
|
|
|
87
88
|
"langchain/document_loaders/fs/csv",
|
|
88
89
|
"langchain/document_loaders/fs/notion",
|
|
89
90
|
"langchain/document_loaders/fs/unstructured",
|
|
91
|
+
"langchain/document_loaders/fs/openai_whisper_audio",
|
|
90
92
|
"langchain/document_transformers/html_to_text",
|
|
91
93
|
"langchain/document_transformers/mozilla_readability",
|
|
92
94
|
"langchain/chat_models/googlevertexai",
|
|
93
95
|
"langchain/chat_models/googlepalm",
|
|
94
96
|
"langchain/sql_db",
|
|
97
|
+
"langchain/callbacks/handlers/llmonitor",
|
|
95
98
|
"langchain/output_parsers/expression",
|
|
96
99
|
"langchain/retrievers/amazon_kendra",
|
|
97
100
|
"langchain/retrievers/supabase",
|