langchain 0.2.5 → 0.2.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/chains/openai_moderation.cjs +3 -1
- package/dist/chains/openai_moderation.d.ts +2 -0
- package/dist/chains/openai_moderation.js +3 -1
- package/dist/document_loaders/fs/multi_file.cjs +98 -0
- package/dist/document_loaders/fs/multi_file.d.ts +37 -0
- package/dist/document_loaders/fs/multi_file.js +94 -0
- package/dist/document_loaders/fs/unstructured.cjs +33 -10
- package/dist/document_loaders/fs/unstructured.d.ts +8 -1
- package/dist/document_loaders/fs/unstructured.js +33 -10
- package/dist/document_loaders/tests/multi_file.test.d.ts +1 -0
- package/dist/document_loaders/tests/multi_file.test.js +49 -0
- package/dist/document_loaders/tests/unstructured.int.test.js +18 -0
- package/dist/load/import_constants.cjs +1 -0
- package/dist/load/import_constants.js +1 -0
- package/dist/retrievers/multi_query.cjs +1 -1
- package/dist/retrievers/multi_query.js +1 -1
- package/document_loaders/fs/multi_file.cjs +1 -0
- package/document_loaders/fs/multi_file.d.cts +1 -0
- package/document_loaders/fs/multi_file.d.ts +1 -0
- package/document_loaders/fs/multi_file.js +1 -0
- package/package.json +18 -5
|
@@ -99,7 +99,9 @@ class OpenAIModerationChain extends base_js_1.BaseChain {
|
|
|
99
99
|
});
|
|
100
100
|
this.throwError = fields?.throwError ?? false;
|
|
101
101
|
this.openAIApiKey =
|
|
102
|
-
fields?.
|
|
102
|
+
fields?.apiKey ??
|
|
103
|
+
fields?.openAIApiKey ??
|
|
104
|
+
(0, env_1.getEnvironmentVariable)("OPENAI_API_KEY");
|
|
103
105
|
if (!this.openAIApiKey) {
|
|
104
106
|
throw new Error("OpenAI API key not found");
|
|
105
107
|
}
|
|
@@ -6,6 +6,8 @@ import { BaseChain, ChainInputs } from "./base.js";
|
|
|
6
6
|
* Interface for the input parameters of the OpenAIModerationChain class.
|
|
7
7
|
*/
|
|
8
8
|
export interface OpenAIModerationChainInput extends ChainInputs, AsyncCallerParams {
|
|
9
|
+
apiKey?: string;
|
|
10
|
+
/** @deprecated Use "apiKey" instead. */
|
|
9
11
|
openAIApiKey?: string;
|
|
10
12
|
openAIOrganization?: string;
|
|
11
13
|
throwError?: boolean;
|
|
@@ -96,7 +96,9 @@ export class OpenAIModerationChain extends BaseChain {
|
|
|
96
96
|
});
|
|
97
97
|
this.throwError = fields?.throwError ?? false;
|
|
98
98
|
this.openAIApiKey =
|
|
99
|
-
fields?.
|
|
99
|
+
fields?.apiKey ??
|
|
100
|
+
fields?.openAIApiKey ??
|
|
101
|
+
getEnvironmentVariable("OPENAI_API_KEY");
|
|
100
102
|
if (!this.openAIApiKey) {
|
|
101
103
|
throw new Error("OpenAI API key not found");
|
|
102
104
|
}
|
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.MultiFileLoader = void 0;
|
|
4
|
+
const node_path_1 = require("node:path");
|
|
5
|
+
const promises_1 = require("node:fs/promises");
|
|
6
|
+
const base_js_1 = require("../base.cjs");
|
|
7
|
+
const directory_js_1 = require("./directory.cjs");
|
|
8
|
+
/**
|
|
9
|
+
* A document loader that loads documents from multiple files. It extends the
|
|
10
|
+
* `BaseDocumentLoader` class and implements the `load()` method.
|
|
11
|
+
* @example
|
|
12
|
+
* ```typescript
|
|
13
|
+
*
|
|
14
|
+
* const multiFileLoader = new MultiFileLoader(
|
|
15
|
+
* ["path/to/file1.pdf", "path/to/file2.txt"],
|
|
16
|
+
* {
|
|
17
|
+
* ".pdf": (path: string) => new PDFLoader(path),
|
|
18
|
+
* },
|
|
19
|
+
* );
|
|
20
|
+
*
|
|
21
|
+
* const docs = await multiFileLoader.load();
|
|
22
|
+
* console.log({ docs });
|
|
23
|
+
*
|
|
24
|
+
* ```
|
|
25
|
+
*/
|
|
26
|
+
class MultiFileLoader extends base_js_1.BaseDocumentLoader {
|
|
27
|
+
constructor(filePaths, loaders, unknown = directory_js_1.UnknownHandling.Warn) {
|
|
28
|
+
super();
|
|
29
|
+
Object.defineProperty(this, "filePaths", {
|
|
30
|
+
enumerable: true,
|
|
31
|
+
configurable: true,
|
|
32
|
+
writable: true,
|
|
33
|
+
value: filePaths
|
|
34
|
+
});
|
|
35
|
+
Object.defineProperty(this, "loaders", {
|
|
36
|
+
enumerable: true,
|
|
37
|
+
configurable: true,
|
|
38
|
+
writable: true,
|
|
39
|
+
value: loaders
|
|
40
|
+
});
|
|
41
|
+
Object.defineProperty(this, "unknown", {
|
|
42
|
+
enumerable: true,
|
|
43
|
+
configurable: true,
|
|
44
|
+
writable: true,
|
|
45
|
+
value: unknown
|
|
46
|
+
});
|
|
47
|
+
if (Object.keys(loaders).length === 0) {
|
|
48
|
+
throw new Error("Must provide at least one loader");
|
|
49
|
+
}
|
|
50
|
+
for (const extension in loaders) {
|
|
51
|
+
if (Object.hasOwn(loaders, extension)) {
|
|
52
|
+
if (extension[0] !== ".") {
|
|
53
|
+
throw new Error(`Extension must start with a dot: ${extension}`);
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
/**
|
|
59
|
+
* Loads the documents from the provided file paths. It checks if the file
|
|
60
|
+
* is a directory and ignores it. If a file is a file, it checks if there
|
|
61
|
+
* is a corresponding loader function for the file extension in the `loaders`
|
|
62
|
+
* mapping. If there is, it loads the documents. If there is no
|
|
63
|
+
* corresponding loader function and `unknown` is set to `Warn`, it logs a
|
|
64
|
+
* warning message. If `unknown` is set to `Error`, it throws an error.
|
|
65
|
+
* @returns A promise that resolves to an array of loaded documents.
|
|
66
|
+
*/
|
|
67
|
+
async load() {
|
|
68
|
+
const documents = [];
|
|
69
|
+
for (const filePath of this.filePaths) {
|
|
70
|
+
const fullPath = (0, node_path_1.resolve)(filePath);
|
|
71
|
+
const fileStat = await (0, promises_1.stat)(fullPath);
|
|
72
|
+
if (fileStat.isDirectory()) {
|
|
73
|
+
console.warn(`Ignoring directory: ${fullPath}`);
|
|
74
|
+
continue;
|
|
75
|
+
}
|
|
76
|
+
const loaderFactory = this.loaders[(0, node_path_1.extname)(fullPath)];
|
|
77
|
+
if (loaderFactory) {
|
|
78
|
+
const loader = loaderFactory(fullPath);
|
|
79
|
+
documents.push(...(await loader.load()));
|
|
80
|
+
}
|
|
81
|
+
else {
|
|
82
|
+
switch (this.unknown) {
|
|
83
|
+
case directory_js_1.UnknownHandling.Ignore:
|
|
84
|
+
break;
|
|
85
|
+
case directory_js_1.UnknownHandling.Warn:
|
|
86
|
+
console.warn(`Unknown file type: ${fullPath}`);
|
|
87
|
+
break;
|
|
88
|
+
case directory_js_1.UnknownHandling.Error:
|
|
89
|
+
throw new Error(`Unknown file type: ${fullPath}`);
|
|
90
|
+
default:
|
|
91
|
+
throw new Error(`Unknown unknown handling: ${this.unknown}`);
|
|
92
|
+
}
|
|
93
|
+
}
|
|
94
|
+
}
|
|
95
|
+
return documents;
|
|
96
|
+
}
|
|
97
|
+
}
|
|
98
|
+
exports.MultiFileLoader = MultiFileLoader;
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
import { Document } from "@langchain/core/documents";
|
|
2
|
+
import { BaseDocumentLoader } from "../base.js";
|
|
3
|
+
import { type LoadersMapping, UnknownHandling } from "./directory.js";
|
|
4
|
+
/**
|
|
5
|
+
* A document loader that loads documents from multiple files. It extends the
|
|
6
|
+
* `BaseDocumentLoader` class and implements the `load()` method.
|
|
7
|
+
* @example
|
|
8
|
+
* ```typescript
|
|
9
|
+
*
|
|
10
|
+
* const multiFileLoader = new MultiFileLoader(
|
|
11
|
+
* ["path/to/file1.pdf", "path/to/file2.txt"],
|
|
12
|
+
* {
|
|
13
|
+
* ".pdf": (path: string) => new PDFLoader(path),
|
|
14
|
+
* },
|
|
15
|
+
* );
|
|
16
|
+
*
|
|
17
|
+
* const docs = await multiFileLoader.load();
|
|
18
|
+
* console.log({ docs });
|
|
19
|
+
*
|
|
20
|
+
* ```
|
|
21
|
+
*/
|
|
22
|
+
export declare class MultiFileLoader extends BaseDocumentLoader {
|
|
23
|
+
filePaths: string[];
|
|
24
|
+
loaders: LoadersMapping;
|
|
25
|
+
unknown: UnknownHandling;
|
|
26
|
+
constructor(filePaths: string[], loaders: LoadersMapping, unknown?: UnknownHandling);
|
|
27
|
+
/**
|
|
28
|
+
* Loads the documents from the provided file paths. It checks if the file
|
|
29
|
+
* is a directory and ignores it. If a file is a file, it checks if there
|
|
30
|
+
* is a corresponding loader function for the file extension in the `loaders`
|
|
31
|
+
* mapping. If there is, it loads the documents. If there is no
|
|
32
|
+
* corresponding loader function and `unknown` is set to `Warn`, it logs a
|
|
33
|
+
* warning message. If `unknown` is set to `Error`, it throws an error.
|
|
34
|
+
* @returns A promise that resolves to an array of loaded documents.
|
|
35
|
+
*/
|
|
36
|
+
load(): Promise<Document[]>;
|
|
37
|
+
}
|
|
@@ -0,0 +1,94 @@
|
|
|
1
|
+
import { extname, resolve } from "node:path";
|
|
2
|
+
import { stat } from "node:fs/promises";
|
|
3
|
+
import { BaseDocumentLoader } from "../base.js";
|
|
4
|
+
import { UnknownHandling } from "./directory.js";
|
|
5
|
+
/**
|
|
6
|
+
* A document loader that loads documents from multiple files. It extends the
|
|
7
|
+
* `BaseDocumentLoader` class and implements the `load()` method.
|
|
8
|
+
* @example
|
|
9
|
+
* ```typescript
|
|
10
|
+
*
|
|
11
|
+
* const multiFileLoader = new MultiFileLoader(
|
|
12
|
+
* ["path/to/file1.pdf", "path/to/file2.txt"],
|
|
13
|
+
* {
|
|
14
|
+
* ".pdf": (path: string) => new PDFLoader(path),
|
|
15
|
+
* },
|
|
16
|
+
* );
|
|
17
|
+
*
|
|
18
|
+
* const docs = await multiFileLoader.load();
|
|
19
|
+
* console.log({ docs });
|
|
20
|
+
*
|
|
21
|
+
* ```
|
|
22
|
+
*/
|
|
23
|
+
export class MultiFileLoader extends BaseDocumentLoader {
|
|
24
|
+
constructor(filePaths, loaders, unknown = UnknownHandling.Warn) {
|
|
25
|
+
super();
|
|
26
|
+
Object.defineProperty(this, "filePaths", {
|
|
27
|
+
enumerable: true,
|
|
28
|
+
configurable: true,
|
|
29
|
+
writable: true,
|
|
30
|
+
value: filePaths
|
|
31
|
+
});
|
|
32
|
+
Object.defineProperty(this, "loaders", {
|
|
33
|
+
enumerable: true,
|
|
34
|
+
configurable: true,
|
|
35
|
+
writable: true,
|
|
36
|
+
value: loaders
|
|
37
|
+
});
|
|
38
|
+
Object.defineProperty(this, "unknown", {
|
|
39
|
+
enumerable: true,
|
|
40
|
+
configurable: true,
|
|
41
|
+
writable: true,
|
|
42
|
+
value: unknown
|
|
43
|
+
});
|
|
44
|
+
if (Object.keys(loaders).length === 0) {
|
|
45
|
+
throw new Error("Must provide at least one loader");
|
|
46
|
+
}
|
|
47
|
+
for (const extension in loaders) {
|
|
48
|
+
if (Object.hasOwn(loaders, extension)) {
|
|
49
|
+
if (extension[0] !== ".") {
|
|
50
|
+
throw new Error(`Extension must start with a dot: ${extension}`);
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
}
|
|
55
|
+
/**
|
|
56
|
+
* Loads the documents from the provided file paths. It checks if the file
|
|
57
|
+
* is a directory and ignores it. If a file is a file, it checks if there
|
|
58
|
+
* is a corresponding loader function for the file extension in the `loaders`
|
|
59
|
+
* mapping. If there is, it loads the documents. If there is no
|
|
60
|
+
* corresponding loader function and `unknown` is set to `Warn`, it logs a
|
|
61
|
+
* warning message. If `unknown` is set to `Error`, it throws an error.
|
|
62
|
+
* @returns A promise that resolves to an array of loaded documents.
|
|
63
|
+
*/
|
|
64
|
+
async load() {
|
|
65
|
+
const documents = [];
|
|
66
|
+
for (const filePath of this.filePaths) {
|
|
67
|
+
const fullPath = resolve(filePath);
|
|
68
|
+
const fileStat = await stat(fullPath);
|
|
69
|
+
if (fileStat.isDirectory()) {
|
|
70
|
+
console.warn(`Ignoring directory: ${fullPath}`);
|
|
71
|
+
continue;
|
|
72
|
+
}
|
|
73
|
+
const loaderFactory = this.loaders[extname(fullPath)];
|
|
74
|
+
if (loaderFactory) {
|
|
75
|
+
const loader = loaderFactory(fullPath);
|
|
76
|
+
documents.push(...(await loader.load()));
|
|
77
|
+
}
|
|
78
|
+
else {
|
|
79
|
+
switch (this.unknown) {
|
|
80
|
+
case UnknownHandling.Ignore:
|
|
81
|
+
break;
|
|
82
|
+
case UnknownHandling.Warn:
|
|
83
|
+
console.warn(`Unknown file type: ${fullPath}`);
|
|
84
|
+
break;
|
|
85
|
+
case UnknownHandling.Error:
|
|
86
|
+
throw new Error(`Unknown file type: ${fullPath}`);
|
|
87
|
+
default:
|
|
88
|
+
throw new Error(`Unknown unknown handling: ${this.unknown}`);
|
|
89
|
+
}
|
|
90
|
+
}
|
|
91
|
+
}
|
|
92
|
+
return documents;
|
|
93
|
+
}
|
|
94
|
+
}
|
|
@@ -43,7 +43,7 @@ const UNSTRUCTURED_API_FILETYPES = [
|
|
|
43
43
|
* and returns an array of Document instances.
|
|
44
44
|
*/
|
|
45
45
|
class UnstructuredLoader extends base_js_1.BaseDocumentLoader {
|
|
46
|
-
constructor(
|
|
46
|
+
constructor(filePathOrLegacyApiUrlOrMemoryBuffer, optionsOrLegacyFilePath = {}) {
|
|
47
47
|
super();
|
|
48
48
|
Object.defineProperty(this, "filePath", {
|
|
49
49
|
enumerable: true,
|
|
@@ -51,6 +51,18 @@ class UnstructuredLoader extends base_js_1.BaseDocumentLoader {
|
|
|
51
51
|
writable: true,
|
|
52
52
|
value: void 0
|
|
53
53
|
});
|
|
54
|
+
Object.defineProperty(this, "buffer", {
|
|
55
|
+
enumerable: true,
|
|
56
|
+
configurable: true,
|
|
57
|
+
writable: true,
|
|
58
|
+
value: void 0
|
|
59
|
+
});
|
|
60
|
+
Object.defineProperty(this, "fileName", {
|
|
61
|
+
enumerable: true,
|
|
62
|
+
configurable: true,
|
|
63
|
+
writable: true,
|
|
64
|
+
value: void 0
|
|
65
|
+
});
|
|
54
66
|
Object.defineProperty(this, "apiUrl", {
|
|
55
67
|
enumerable: true,
|
|
56
68
|
configurable: true,
|
|
@@ -150,12 +162,19 @@ class UnstructuredLoader extends base_js_1.BaseDocumentLoader {
|
|
|
150
162
|
// Temporary shim to avoid breaking existing users
|
|
151
163
|
// Remove when API keys are enforced by Unstructured and existing code will break anyway
|
|
152
164
|
const isLegacySyntax = typeof optionsOrLegacyFilePath === "string";
|
|
153
|
-
|
|
165
|
+
const isMemorySyntax = typeof filePathOrLegacyApiUrlOrMemoryBuffer === "object";
|
|
166
|
+
if (isMemorySyntax) {
|
|
167
|
+
this.buffer = filePathOrLegacyApiUrlOrMemoryBuffer.buffer;
|
|
168
|
+
this.fileName = filePathOrLegacyApiUrlOrMemoryBuffer.fileName;
|
|
169
|
+
}
|
|
170
|
+
else if (isLegacySyntax) {
|
|
154
171
|
this.filePath = optionsOrLegacyFilePath;
|
|
155
|
-
this.apiUrl =
|
|
172
|
+
this.apiUrl = filePathOrLegacyApiUrlOrMemoryBuffer;
|
|
156
173
|
}
|
|
157
174
|
else {
|
|
158
|
-
this.filePath =
|
|
175
|
+
this.filePath = filePathOrLegacyApiUrlOrMemoryBuffer;
|
|
176
|
+
}
|
|
177
|
+
if (!isLegacySyntax) {
|
|
159
178
|
const options = optionsOrLegacyFilePath;
|
|
160
179
|
this.apiKey = options.apiKey;
|
|
161
180
|
this.apiUrl = options.apiUrl ?? this.apiUrl;
|
|
@@ -176,12 +195,16 @@ class UnstructuredLoader extends base_js_1.BaseDocumentLoader {
|
|
|
176
195
|
}
|
|
177
196
|
}
|
|
178
197
|
async _partition() {
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
198
|
+
let { buffer } = this;
|
|
199
|
+
let { fileName } = this;
|
|
200
|
+
if (!buffer) {
|
|
201
|
+
const { readFile, basename } = await this.imports();
|
|
202
|
+
buffer = await readFile(this.filePath);
|
|
203
|
+
fileName = basename(this.filePath);
|
|
204
|
+
// I'm aware this reads the file into memory first, but we have lots of work
|
|
205
|
+
// to do on then consuming Documents in a streaming fashion anyway, so not
|
|
206
|
+
// worried about this for now.
|
|
207
|
+
}
|
|
185
208
|
const formData = new FormData();
|
|
186
209
|
formData.append("files", new Blob([buffer]), fileName);
|
|
187
210
|
formData.append("strategy", this.strategy);
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
/// <reference types="node" resolution-mode="require"/>
|
|
2
2
|
/// <reference types="node" resolution-mode="require"/>
|
|
3
|
+
/// <reference types="node" resolution-mode="require"/>
|
|
3
4
|
import type { basename as BasenameT } from "node:path";
|
|
4
5
|
import type { readFile as ReadFileT } from "node:fs/promises";
|
|
5
6
|
import { Document } from "@langchain/core/documents";
|
|
@@ -63,6 +64,10 @@ type UnstructuredDirectoryLoaderOptions = UnstructuredLoaderOptions & {
|
|
|
63
64
|
recursive?: boolean;
|
|
64
65
|
unknown?: UnknownHandling;
|
|
65
66
|
};
|
|
67
|
+
type UnstructuredMemoryLoaderOptions = {
|
|
68
|
+
buffer: Buffer;
|
|
69
|
+
fileName: string;
|
|
70
|
+
};
|
|
66
71
|
/**
|
|
67
72
|
* @deprecated - Import from "@langchain/community/document_loaders/fs/unstructured" instead. This entrypoint will be removed in 0.3.0.
|
|
68
73
|
*
|
|
@@ -75,6 +80,8 @@ type UnstructuredDirectoryLoaderOptions = UnstructuredLoaderOptions & {
|
|
|
75
80
|
*/
|
|
76
81
|
export declare class UnstructuredLoader extends BaseDocumentLoader {
|
|
77
82
|
filePath: string;
|
|
83
|
+
private buffer?;
|
|
84
|
+
private fileName?;
|
|
78
85
|
private apiUrl;
|
|
79
86
|
private apiKey?;
|
|
80
87
|
private strategy;
|
|
@@ -91,7 +98,7 @@ export declare class UnstructuredLoader extends BaseDocumentLoader {
|
|
|
91
98
|
private combineUnderNChars?;
|
|
92
99
|
private newAfterNChars?;
|
|
93
100
|
private maxCharacters?;
|
|
94
|
-
constructor(
|
|
101
|
+
constructor(filePathOrLegacyApiUrlOrMemoryBuffer: string | UnstructuredMemoryLoaderOptions, optionsOrLegacyFilePath?: UnstructuredLoaderOptions | string);
|
|
95
102
|
_partition(): Promise<Element[]>;
|
|
96
103
|
load(): Promise<Document[]>;
|
|
97
104
|
imports(): Promise<{
|
|
@@ -39,7 +39,7 @@ const UNSTRUCTURED_API_FILETYPES = [
|
|
|
39
39
|
* and returns an array of Document instances.
|
|
40
40
|
*/
|
|
41
41
|
export class UnstructuredLoader extends BaseDocumentLoader {
|
|
42
|
-
constructor(
|
|
42
|
+
constructor(filePathOrLegacyApiUrlOrMemoryBuffer, optionsOrLegacyFilePath = {}) {
|
|
43
43
|
super();
|
|
44
44
|
Object.defineProperty(this, "filePath", {
|
|
45
45
|
enumerable: true,
|
|
@@ -47,6 +47,18 @@ export class UnstructuredLoader extends BaseDocumentLoader {
|
|
|
47
47
|
writable: true,
|
|
48
48
|
value: void 0
|
|
49
49
|
});
|
|
50
|
+
Object.defineProperty(this, "buffer", {
|
|
51
|
+
enumerable: true,
|
|
52
|
+
configurable: true,
|
|
53
|
+
writable: true,
|
|
54
|
+
value: void 0
|
|
55
|
+
});
|
|
56
|
+
Object.defineProperty(this, "fileName", {
|
|
57
|
+
enumerable: true,
|
|
58
|
+
configurable: true,
|
|
59
|
+
writable: true,
|
|
60
|
+
value: void 0
|
|
61
|
+
});
|
|
50
62
|
Object.defineProperty(this, "apiUrl", {
|
|
51
63
|
enumerable: true,
|
|
52
64
|
configurable: true,
|
|
@@ -146,12 +158,19 @@ export class UnstructuredLoader extends BaseDocumentLoader {
|
|
|
146
158
|
// Temporary shim to avoid breaking existing users
|
|
147
159
|
// Remove when API keys are enforced by Unstructured and existing code will break anyway
|
|
148
160
|
const isLegacySyntax = typeof optionsOrLegacyFilePath === "string";
|
|
149
|
-
|
|
161
|
+
const isMemorySyntax = typeof filePathOrLegacyApiUrlOrMemoryBuffer === "object";
|
|
162
|
+
if (isMemorySyntax) {
|
|
163
|
+
this.buffer = filePathOrLegacyApiUrlOrMemoryBuffer.buffer;
|
|
164
|
+
this.fileName = filePathOrLegacyApiUrlOrMemoryBuffer.fileName;
|
|
165
|
+
}
|
|
166
|
+
else if (isLegacySyntax) {
|
|
150
167
|
this.filePath = optionsOrLegacyFilePath;
|
|
151
|
-
this.apiUrl =
|
|
168
|
+
this.apiUrl = filePathOrLegacyApiUrlOrMemoryBuffer;
|
|
152
169
|
}
|
|
153
170
|
else {
|
|
154
|
-
this.filePath =
|
|
171
|
+
this.filePath = filePathOrLegacyApiUrlOrMemoryBuffer;
|
|
172
|
+
}
|
|
173
|
+
if (!isLegacySyntax) {
|
|
155
174
|
const options = optionsOrLegacyFilePath;
|
|
156
175
|
this.apiKey = options.apiKey;
|
|
157
176
|
this.apiUrl = options.apiUrl ?? this.apiUrl;
|
|
@@ -172,12 +191,16 @@ export class UnstructuredLoader extends BaseDocumentLoader {
|
|
|
172
191
|
}
|
|
173
192
|
}
|
|
174
193
|
async _partition() {
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
194
|
+
let { buffer } = this;
|
|
195
|
+
let { fileName } = this;
|
|
196
|
+
if (!buffer) {
|
|
197
|
+
const { readFile, basename } = await this.imports();
|
|
198
|
+
buffer = await readFile(this.filePath);
|
|
199
|
+
fileName = basename(this.filePath);
|
|
200
|
+
// I'm aware this reads the file into memory first, but we have lots of work
|
|
201
|
+
// to do on then consuming Documents in a streaming fashion anyway, so not
|
|
202
|
+
// worried about this for now.
|
|
203
|
+
}
|
|
181
204
|
const formData = new FormData();
|
|
182
205
|
formData.append("files", new Blob([buffer]), fileName);
|
|
183
206
|
formData.append("strategy", this.strategy);
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
import * as url from "node:url";
|
|
2
|
+
import * as path from "node:path";
|
|
3
|
+
import { test, expect } from "@jest/globals";
|
|
4
|
+
import { MultiFileLoader } from "../fs/multi_file.js";
|
|
5
|
+
import { CSVLoader } from "../fs/csv.js";
|
|
6
|
+
import { PDFLoader } from "../fs/pdf.js";
|
|
7
|
+
import { TextLoader } from "../fs/text.js";
|
|
8
|
+
import { JSONLoader } from "../fs/json.js";
|
|
9
|
+
import { UnknownHandling } from "../fs/directory.js";
|
|
10
|
+
test("Test MultiFileLoader", async () => {
|
|
11
|
+
const baseDirectory = path.resolve(path.dirname(url.fileURLToPath(import.meta.url)), "./example_data");
|
|
12
|
+
const filePaths = [
|
|
13
|
+
path.resolve(baseDirectory, "1706.03762.pdf"),
|
|
14
|
+
path.resolve(baseDirectory, "Jacob_Lee_Resume_2023.pdf"),
|
|
15
|
+
path.resolve(baseDirectory, "Star_Wars_The_Clone_Wars_S06E07_Crisis_at_the_Heart.csv"),
|
|
16
|
+
path.resolve(baseDirectory, "Star_Wars_The_Clone_Wars_S06E07_Crisis_at_the_Heart.json"),
|
|
17
|
+
path.resolve(baseDirectory, "complex.json"),
|
|
18
|
+
path.resolve(baseDirectory, "example.txt"),
|
|
19
|
+
path.resolve(baseDirectory, "example_separator.csv"),
|
|
20
|
+
];
|
|
21
|
+
const loader = new MultiFileLoader(filePaths, {
|
|
22
|
+
".csv": (p) => {
|
|
23
|
+
if (p.includes("separator.csv")) {
|
|
24
|
+
return new CSVLoader(p, { column: "html", separator: "|" });
|
|
25
|
+
}
|
|
26
|
+
return new CSVLoader(p, "html");
|
|
27
|
+
},
|
|
28
|
+
".pdf": (p) => new PDFLoader(p),
|
|
29
|
+
".txt": (p) => new TextLoader(p),
|
|
30
|
+
".json": (p) => new JSONLoader(p),
|
|
31
|
+
}, UnknownHandling.Ignore);
|
|
32
|
+
const docs = await loader.load();
|
|
33
|
+
expect(docs.length).toBe(123);
|
|
34
|
+
const expectedSources = [
|
|
35
|
+
// PDF
|
|
36
|
+
...Array.from({ length: 15 }, (_) => path.resolve(baseDirectory, "1706.03762.pdf")),
|
|
37
|
+
path.resolve(baseDirectory, "Jacob_Lee_Resume_2023.pdf"),
|
|
38
|
+
// CSV
|
|
39
|
+
...Array.from({ length: 32 }, (_) => path.resolve(baseDirectory, "Star_Wars_The_Clone_Wars_S06E07_Crisis_at_the_Heart.csv")),
|
|
40
|
+
// JSON
|
|
41
|
+
...Array.from({ length: 32 }, (_) => path.resolve(baseDirectory, "Star_Wars_The_Clone_Wars_S06E07_Crisis_at_the_Heart.json")),
|
|
42
|
+
...Array.from({ length: 10 }, (_) => path.resolve(baseDirectory, "complex.json")),
|
|
43
|
+
// TXT
|
|
44
|
+
path.resolve(baseDirectory, "example.txt"),
|
|
45
|
+
// CSV
|
|
46
|
+
...Array.from({ length: 32 }, (_) => path.resolve(baseDirectory, "example_separator.csv")),
|
|
47
|
+
];
|
|
48
|
+
expect(docs.map((d) => d.metadata.source).sort()).toEqual(expectedSources);
|
|
49
|
+
});
|
|
@@ -2,6 +2,7 @@
|
|
|
2
2
|
/* eslint-disable @typescript-eslint/no-non-null-assertion */
|
|
3
3
|
import * as url from "node:url";
|
|
4
4
|
import * as path from "node:path";
|
|
5
|
+
import { readFile } from "node:fs/promises";
|
|
5
6
|
import { test, expect } from "@jest/globals";
|
|
6
7
|
import { UnstructuredDirectoryLoader, UnstructuredLoader, UnknownHandling, } from "../fs/unstructured.js";
|
|
7
8
|
test.skip("Test Unstructured base loader", async () => {
|
|
@@ -16,6 +17,23 @@ test.skip("Test Unstructured base loader", async () => {
|
|
|
16
17
|
expect(typeof doc.pageContent).toBe("string");
|
|
17
18
|
}
|
|
18
19
|
});
|
|
20
|
+
test.skip("Test Unstructured base loader with buffer", async () => {
|
|
21
|
+
const filePath = path.resolve(path.dirname(url.fileURLToPath(import.meta.url)), "./example_data/example.txt");
|
|
22
|
+
const options = {
|
|
23
|
+
apiKey: process.env.UNSTRUCTURED_API_KEY,
|
|
24
|
+
};
|
|
25
|
+
const buffer = await readFile(filePath);
|
|
26
|
+
const fileName = "example.txt";
|
|
27
|
+
const loader = new UnstructuredLoader({
|
|
28
|
+
buffer,
|
|
29
|
+
fileName,
|
|
30
|
+
}, options);
|
|
31
|
+
const docs = await loader.load();
|
|
32
|
+
expect(docs.length).toBe(3);
|
|
33
|
+
for (const doc of docs) {
|
|
34
|
+
expect(typeof doc.pageContent).toBe("string");
|
|
35
|
+
}
|
|
36
|
+
});
|
|
19
37
|
test.skip("Test Unstructured base loader with fast strategy", async () => {
|
|
20
38
|
const filePath = path.resolve(path.dirname(url.fileURLToPath(import.meta.url)), "./example_data/1706.03762.pdf");
|
|
21
39
|
const options = {
|
|
@@ -38,6 +38,7 @@ exports.optionalImportEntrypoints = [
|
|
|
38
38
|
"langchain/document_loaders/web/couchbase",
|
|
39
39
|
"langchain/document_loaders/web/youtube",
|
|
40
40
|
"langchain/document_loaders/fs/directory",
|
|
41
|
+
"langchain/document_loaders/fs/multi_file",
|
|
41
42
|
"langchain/document_loaders/fs/buffer",
|
|
42
43
|
"langchain/document_loaders/fs/chatgpt",
|
|
43
44
|
"langchain/document_loaders/fs/text",
|
|
@@ -35,6 +35,7 @@ export const optionalImportEntrypoints = [
|
|
|
35
35
|
"langchain/document_loaders/web/couchbase",
|
|
36
36
|
"langchain/document_loaders/web/youtube",
|
|
37
37
|
"langchain/document_loaders/fs/directory",
|
|
38
|
+
"langchain/document_loaders/fs/multi_file",
|
|
38
39
|
"langchain/document_loaders/fs/buffer",
|
|
39
40
|
"langchain/document_loaders/fs/chatgpt",
|
|
40
41
|
"langchain/document_loaders/fs/text",
|
|
@@ -162,7 +162,7 @@ class MultiQueryRetriever extends retrievers_1.BaseRetriever {
|
|
|
162
162
|
const uniqueDocuments = this._uniqueUnion(documents);
|
|
163
163
|
let outputDocs = uniqueDocuments;
|
|
164
164
|
if (this.documentCompressor && uniqueDocuments.length) {
|
|
165
|
-
outputDocs = await this.documentCompressor.compressDocuments(uniqueDocuments, question);
|
|
165
|
+
outputDocs = await this.documentCompressor.compressDocuments(uniqueDocuments, question, runManager?.getChild());
|
|
166
166
|
if (this.documentCompressorFilteringFn) {
|
|
167
167
|
outputDocs = this.documentCompressorFilteringFn(outputDocs);
|
|
168
168
|
}
|
|
@@ -159,7 +159,7 @@ export class MultiQueryRetriever extends BaseRetriever {
|
|
|
159
159
|
const uniqueDocuments = this._uniqueUnion(documents);
|
|
160
160
|
let outputDocs = uniqueDocuments;
|
|
161
161
|
if (this.documentCompressor && uniqueDocuments.length) {
|
|
162
|
-
outputDocs = await this.documentCompressor.compressDocuments(uniqueDocuments, question);
|
|
162
|
+
outputDocs = await this.documentCompressor.compressDocuments(uniqueDocuments, question, runManager?.getChild());
|
|
163
163
|
if (this.documentCompressorFilteringFn) {
|
|
164
164
|
outputDocs = this.documentCompressorFilteringFn(outputDocs);
|
|
165
165
|
}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
module.exports = require('../../dist/document_loaders/fs/multi_file.cjs');
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export * from '../../dist/document_loaders/fs/multi_file.js'
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export * from '../../dist/document_loaders/fs/multi_file.js'
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export * from '../../dist/document_loaders/fs/multi_file.js'
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "langchain",
|
|
3
|
-
"version": "0.2.
|
|
3
|
+
"version": "0.2.7",
|
|
4
4
|
"description": "Typescript bindings for langchain",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"engines": {
|
|
@@ -286,6 +286,10 @@
|
|
|
286
286
|
"document_loaders/fs/directory.js",
|
|
287
287
|
"document_loaders/fs/directory.d.ts",
|
|
288
288
|
"document_loaders/fs/directory.d.cts",
|
|
289
|
+
"document_loaders/fs/multi_file.cjs",
|
|
290
|
+
"document_loaders/fs/multi_file.js",
|
|
291
|
+
"document_loaders/fs/multi_file.d.ts",
|
|
292
|
+
"document_loaders/fs/multi_file.d.cts",
|
|
289
293
|
"document_loaders/fs/buffer.cjs",
|
|
290
294
|
"document_loaders/fs/buffer.js",
|
|
291
295
|
"document_loaders/fs/buffer.d.ts",
|
|
@@ -580,7 +584,7 @@
|
|
|
580
584
|
"clean": "rm -rf .turbo dist/",
|
|
581
585
|
"prepack": "yarn build",
|
|
582
586
|
"release": "release-it --only-version --config .release-it.json",
|
|
583
|
-
"test": "
|
|
587
|
+
"test": "NODE_OPTIONS=--experimental-vm-modules jest --testPathIgnorePatterns=\\.int\\.test.ts --testTimeout 30000 --maxWorkers=50%",
|
|
584
588
|
"test:watch": "yarn run build:deps && NODE_OPTIONS=--experimental-vm-modules jest --watch --testPathIgnorePatterns=\\.int\\.test.ts",
|
|
585
589
|
"test:integration": "yarn run build:deps && NODE_OPTIONS=--experimental-vm-modules jest --testPathPattern=\\.int\\.test.ts --testTimeout 100000 --maxWorkers=50%",
|
|
586
590
|
"test:single": "yarn run build:deps && NODE_OPTIONS=--experimental-vm-modules yarn run jest --config jest.config.cjs --testTimeout 100000",
|
|
@@ -672,7 +676,7 @@
|
|
|
672
676
|
"sonix-speech-recognition": "^2.1.1",
|
|
673
677
|
"srt-parser-2": "^1.2.3",
|
|
674
678
|
"ts-jest": "^29.1.0",
|
|
675
|
-
"typeorm": "^0.3.
|
|
679
|
+
"typeorm": "^0.3.20",
|
|
676
680
|
"typescript": "~5.1.6",
|
|
677
681
|
"weaviate-ts-client": "^2.0.0",
|
|
678
682
|
"web-auth-library": "^1.0.3",
|
|
@@ -724,7 +728,7 @@
|
|
|
724
728
|
"redis": "^4.6.4",
|
|
725
729
|
"sonix-speech-recognition": "^2.1.1",
|
|
726
730
|
"srt-parser-2": "^1.2.3",
|
|
727
|
-
"typeorm": "^0.3.
|
|
731
|
+
"typeorm": "^0.3.20",
|
|
728
732
|
"weaviate-ts-client": "*",
|
|
729
733
|
"web-auth-library": "^1.0.3",
|
|
730
734
|
"ws": "^8.14.2",
|
|
@@ -885,7 +889,7 @@
|
|
|
885
889
|
},
|
|
886
890
|
"dependencies": {
|
|
887
891
|
"@langchain/core": "~0.2.0",
|
|
888
|
-
"@langchain/openai": "
|
|
892
|
+
"@langchain/openai": ">=0.1.0 <0.3.0",
|
|
889
893
|
"@langchain/textsplitters": "~0.0.0",
|
|
890
894
|
"binary-extensions": "^2.2.0",
|
|
891
895
|
"js-tiktoken": "^1.0.12",
|
|
@@ -1540,6 +1544,15 @@
|
|
|
1540
1544
|
"import": "./document_loaders/fs/directory.js",
|
|
1541
1545
|
"require": "./document_loaders/fs/directory.cjs"
|
|
1542
1546
|
},
|
|
1547
|
+
"./document_loaders/fs/multi_file": {
|
|
1548
|
+
"types": {
|
|
1549
|
+
"import": "./document_loaders/fs/multi_file.d.ts",
|
|
1550
|
+
"require": "./document_loaders/fs/multi_file.d.cts",
|
|
1551
|
+
"default": "./document_loaders/fs/multi_file.d.ts"
|
|
1552
|
+
},
|
|
1553
|
+
"import": "./document_loaders/fs/multi_file.js",
|
|
1554
|
+
"require": "./document_loaders/fs/multi_file.cjs"
|
|
1555
|
+
},
|
|
1543
1556
|
"./document_loaders/fs/buffer": {
|
|
1544
1557
|
"types": {
|
|
1545
1558
|
"import": "./document_loaders/fs/buffer.d.ts",
|