langchain 0.2.5 → 0.2.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/chains/openai_moderation.cjs +3 -1
- package/dist/chains/openai_moderation.d.ts +2 -0
- package/dist/chains/openai_moderation.js +3 -1
- package/dist/document_loaders/fs/unstructured.cjs +33 -10
- package/dist/document_loaders/fs/unstructured.d.ts +8 -1
- package/dist/document_loaders/fs/unstructured.js +33 -10
- package/dist/document_loaders/tests/unstructured.int.test.js +18 -0
- package/package.json +5 -5
|
@@ -99,7 +99,9 @@ class OpenAIModerationChain extends base_js_1.BaseChain {
|
|
|
99
99
|
});
|
|
100
100
|
this.throwError = fields?.throwError ?? false;
|
|
101
101
|
this.openAIApiKey =
|
|
102
|
-
fields?.
|
|
102
|
+
fields?.apiKey ??
|
|
103
|
+
fields?.openAIApiKey ??
|
|
104
|
+
(0, env_1.getEnvironmentVariable)("OPENAI_API_KEY");
|
|
103
105
|
if (!this.openAIApiKey) {
|
|
104
106
|
throw new Error("OpenAI API key not found");
|
|
105
107
|
}
|
|
@@ -6,6 +6,8 @@ import { BaseChain, ChainInputs } from "./base.js";
|
|
|
6
6
|
* Interface for the input parameters of the OpenAIModerationChain class.
|
|
7
7
|
*/
|
|
8
8
|
export interface OpenAIModerationChainInput extends ChainInputs, AsyncCallerParams {
|
|
9
|
+
apiKey?: string;
|
|
10
|
+
/** @deprecated Use "apiKey" instead. */
|
|
9
11
|
openAIApiKey?: string;
|
|
10
12
|
openAIOrganization?: string;
|
|
11
13
|
throwError?: boolean;
|
|
@@ -96,7 +96,9 @@ export class OpenAIModerationChain extends BaseChain {
|
|
|
96
96
|
});
|
|
97
97
|
this.throwError = fields?.throwError ?? false;
|
|
98
98
|
this.openAIApiKey =
|
|
99
|
-
fields?.
|
|
99
|
+
fields?.apiKey ??
|
|
100
|
+
fields?.openAIApiKey ??
|
|
101
|
+
getEnvironmentVariable("OPENAI_API_KEY");
|
|
100
102
|
if (!this.openAIApiKey) {
|
|
101
103
|
throw new Error("OpenAI API key not found");
|
|
102
104
|
}
|
|
@@ -43,7 +43,7 @@ const UNSTRUCTURED_API_FILETYPES = [
|
|
|
43
43
|
* and returns an array of Document instances.
|
|
44
44
|
*/
|
|
45
45
|
class UnstructuredLoader extends base_js_1.BaseDocumentLoader {
|
|
46
|
-
constructor(
|
|
46
|
+
constructor(filePathOrLegacyApiUrlOrMemoryBuffer, optionsOrLegacyFilePath = {}) {
|
|
47
47
|
super();
|
|
48
48
|
Object.defineProperty(this, "filePath", {
|
|
49
49
|
enumerable: true,
|
|
@@ -51,6 +51,18 @@ class UnstructuredLoader extends base_js_1.BaseDocumentLoader {
|
|
|
51
51
|
writable: true,
|
|
52
52
|
value: void 0
|
|
53
53
|
});
|
|
54
|
+
Object.defineProperty(this, "buffer", {
|
|
55
|
+
enumerable: true,
|
|
56
|
+
configurable: true,
|
|
57
|
+
writable: true,
|
|
58
|
+
value: void 0
|
|
59
|
+
});
|
|
60
|
+
Object.defineProperty(this, "fileName", {
|
|
61
|
+
enumerable: true,
|
|
62
|
+
configurable: true,
|
|
63
|
+
writable: true,
|
|
64
|
+
value: void 0
|
|
65
|
+
});
|
|
54
66
|
Object.defineProperty(this, "apiUrl", {
|
|
55
67
|
enumerable: true,
|
|
56
68
|
configurable: true,
|
|
@@ -150,12 +162,19 @@ class UnstructuredLoader extends base_js_1.BaseDocumentLoader {
|
|
|
150
162
|
// Temporary shim to avoid breaking existing users
|
|
151
163
|
// Remove when API keys are enforced by Unstructured and existing code will break anyway
|
|
152
164
|
const isLegacySyntax = typeof optionsOrLegacyFilePath === "string";
|
|
153
|
-
|
|
165
|
+
const isMemorySyntax = typeof filePathOrLegacyApiUrlOrMemoryBuffer === "object";
|
|
166
|
+
if (isMemorySyntax) {
|
|
167
|
+
this.buffer = filePathOrLegacyApiUrlOrMemoryBuffer.buffer;
|
|
168
|
+
this.fileName = filePathOrLegacyApiUrlOrMemoryBuffer.fileName;
|
|
169
|
+
}
|
|
170
|
+
else if (isLegacySyntax) {
|
|
154
171
|
this.filePath = optionsOrLegacyFilePath;
|
|
155
|
-
this.apiUrl =
|
|
172
|
+
this.apiUrl = filePathOrLegacyApiUrlOrMemoryBuffer;
|
|
156
173
|
}
|
|
157
174
|
else {
|
|
158
|
-
this.filePath =
|
|
175
|
+
this.filePath = filePathOrLegacyApiUrlOrMemoryBuffer;
|
|
176
|
+
}
|
|
177
|
+
if (!isLegacySyntax) {
|
|
159
178
|
const options = optionsOrLegacyFilePath;
|
|
160
179
|
this.apiKey = options.apiKey;
|
|
161
180
|
this.apiUrl = options.apiUrl ?? this.apiUrl;
|
|
@@ -176,12 +195,16 @@ class UnstructuredLoader extends base_js_1.BaseDocumentLoader {
|
|
|
176
195
|
}
|
|
177
196
|
}
|
|
178
197
|
async _partition() {
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
198
|
+
let { buffer } = this;
|
|
199
|
+
let { fileName } = this;
|
|
200
|
+
if (!buffer) {
|
|
201
|
+
const { readFile, basename } = await this.imports();
|
|
202
|
+
buffer = await readFile(this.filePath);
|
|
203
|
+
fileName = basename(this.filePath);
|
|
204
|
+
// I'm aware this reads the file into memory first, but we have lots of work
|
|
205
|
+
// to do on then consuming Documents in a streaming fashion anyway, so not
|
|
206
|
+
// worried about this for now.
|
|
207
|
+
}
|
|
185
208
|
const formData = new FormData();
|
|
186
209
|
formData.append("files", new Blob([buffer]), fileName);
|
|
187
210
|
formData.append("strategy", this.strategy);
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
/// <reference types="node" resolution-mode="require"/>
|
|
2
2
|
/// <reference types="node" resolution-mode="require"/>
|
|
3
|
+
/// <reference types="node" resolution-mode="require"/>
|
|
3
4
|
import type { basename as BasenameT } from "node:path";
|
|
4
5
|
import type { readFile as ReadFileT } from "node:fs/promises";
|
|
5
6
|
import { Document } from "@langchain/core/documents";
|
|
@@ -63,6 +64,10 @@ type UnstructuredDirectoryLoaderOptions = UnstructuredLoaderOptions & {
|
|
|
63
64
|
recursive?: boolean;
|
|
64
65
|
unknown?: UnknownHandling;
|
|
65
66
|
};
|
|
67
|
+
type UnstructuredMemoryLoaderOptions = {
|
|
68
|
+
buffer: Buffer;
|
|
69
|
+
fileName: string;
|
|
70
|
+
};
|
|
66
71
|
/**
|
|
67
72
|
* @deprecated - Import from "@langchain/community/document_loaders/fs/unstructured" instead. This entrypoint will be removed in 0.3.0.
|
|
68
73
|
*
|
|
@@ -75,6 +80,8 @@ type UnstructuredDirectoryLoaderOptions = UnstructuredLoaderOptions & {
|
|
|
75
80
|
*/
|
|
76
81
|
export declare class UnstructuredLoader extends BaseDocumentLoader {
|
|
77
82
|
filePath: string;
|
|
83
|
+
private buffer?;
|
|
84
|
+
private fileName?;
|
|
78
85
|
private apiUrl;
|
|
79
86
|
private apiKey?;
|
|
80
87
|
private strategy;
|
|
@@ -91,7 +98,7 @@ export declare class UnstructuredLoader extends BaseDocumentLoader {
|
|
|
91
98
|
private combineUnderNChars?;
|
|
92
99
|
private newAfterNChars?;
|
|
93
100
|
private maxCharacters?;
|
|
94
|
-
constructor(
|
|
101
|
+
constructor(filePathOrLegacyApiUrlOrMemoryBuffer: string | UnstructuredMemoryLoaderOptions, optionsOrLegacyFilePath?: UnstructuredLoaderOptions | string);
|
|
95
102
|
_partition(): Promise<Element[]>;
|
|
96
103
|
load(): Promise<Document[]>;
|
|
97
104
|
imports(): Promise<{
|
|
@@ -39,7 +39,7 @@ const UNSTRUCTURED_API_FILETYPES = [
|
|
|
39
39
|
* and returns an array of Document instances.
|
|
40
40
|
*/
|
|
41
41
|
export class UnstructuredLoader extends BaseDocumentLoader {
|
|
42
|
-
constructor(
|
|
42
|
+
constructor(filePathOrLegacyApiUrlOrMemoryBuffer, optionsOrLegacyFilePath = {}) {
|
|
43
43
|
super();
|
|
44
44
|
Object.defineProperty(this, "filePath", {
|
|
45
45
|
enumerable: true,
|
|
@@ -47,6 +47,18 @@ export class UnstructuredLoader extends BaseDocumentLoader {
|
|
|
47
47
|
writable: true,
|
|
48
48
|
value: void 0
|
|
49
49
|
});
|
|
50
|
+
Object.defineProperty(this, "buffer", {
|
|
51
|
+
enumerable: true,
|
|
52
|
+
configurable: true,
|
|
53
|
+
writable: true,
|
|
54
|
+
value: void 0
|
|
55
|
+
});
|
|
56
|
+
Object.defineProperty(this, "fileName", {
|
|
57
|
+
enumerable: true,
|
|
58
|
+
configurable: true,
|
|
59
|
+
writable: true,
|
|
60
|
+
value: void 0
|
|
61
|
+
});
|
|
50
62
|
Object.defineProperty(this, "apiUrl", {
|
|
51
63
|
enumerable: true,
|
|
52
64
|
configurable: true,
|
|
@@ -146,12 +158,19 @@ export class UnstructuredLoader extends BaseDocumentLoader {
|
|
|
146
158
|
// Temporary shim to avoid breaking existing users
|
|
147
159
|
// Remove when API keys are enforced by Unstructured and existing code will break anyway
|
|
148
160
|
const isLegacySyntax = typeof optionsOrLegacyFilePath === "string";
|
|
149
|
-
|
|
161
|
+
const isMemorySyntax = typeof filePathOrLegacyApiUrlOrMemoryBuffer === "object";
|
|
162
|
+
if (isMemorySyntax) {
|
|
163
|
+
this.buffer = filePathOrLegacyApiUrlOrMemoryBuffer.buffer;
|
|
164
|
+
this.fileName = filePathOrLegacyApiUrlOrMemoryBuffer.fileName;
|
|
165
|
+
}
|
|
166
|
+
else if (isLegacySyntax) {
|
|
150
167
|
this.filePath = optionsOrLegacyFilePath;
|
|
151
|
-
this.apiUrl =
|
|
168
|
+
this.apiUrl = filePathOrLegacyApiUrlOrMemoryBuffer;
|
|
152
169
|
}
|
|
153
170
|
else {
|
|
154
|
-
this.filePath =
|
|
171
|
+
this.filePath = filePathOrLegacyApiUrlOrMemoryBuffer;
|
|
172
|
+
}
|
|
173
|
+
if (!isLegacySyntax) {
|
|
155
174
|
const options = optionsOrLegacyFilePath;
|
|
156
175
|
this.apiKey = options.apiKey;
|
|
157
176
|
this.apiUrl = options.apiUrl ?? this.apiUrl;
|
|
@@ -172,12 +191,16 @@ export class UnstructuredLoader extends BaseDocumentLoader {
|
|
|
172
191
|
}
|
|
173
192
|
}
|
|
174
193
|
async _partition() {
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
194
|
+
let { buffer } = this;
|
|
195
|
+
let { fileName } = this;
|
|
196
|
+
if (!buffer) {
|
|
197
|
+
const { readFile, basename } = await this.imports();
|
|
198
|
+
buffer = await readFile(this.filePath);
|
|
199
|
+
fileName = basename(this.filePath);
|
|
200
|
+
// I'm aware this reads the file into memory first, but we have lots of work
|
|
201
|
+
// to do on then consuming Documents in a streaming fashion anyway, so not
|
|
202
|
+
// worried about this for now.
|
|
203
|
+
}
|
|
181
204
|
const formData = new FormData();
|
|
182
205
|
formData.append("files", new Blob([buffer]), fileName);
|
|
183
206
|
formData.append("strategy", this.strategy);
|
|
@@ -2,6 +2,7 @@
|
|
|
2
2
|
/* eslint-disable @typescript-eslint/no-non-null-assertion */
|
|
3
3
|
import * as url from "node:url";
|
|
4
4
|
import * as path from "node:path";
|
|
5
|
+
import { readFile } from "node:fs/promises";
|
|
5
6
|
import { test, expect } from "@jest/globals";
|
|
6
7
|
import { UnstructuredDirectoryLoader, UnstructuredLoader, UnknownHandling, } from "../fs/unstructured.js";
|
|
7
8
|
test.skip("Test Unstructured base loader", async () => {
|
|
@@ -16,6 +17,23 @@ test.skip("Test Unstructured base loader", async () => {
|
|
|
16
17
|
expect(typeof doc.pageContent).toBe("string");
|
|
17
18
|
}
|
|
18
19
|
});
|
|
20
|
+
test.skip("Test Unstructured base loader with buffer", async () => {
|
|
21
|
+
const filePath = path.resolve(path.dirname(url.fileURLToPath(import.meta.url)), "./example_data/example.txt");
|
|
22
|
+
const options = {
|
|
23
|
+
apiKey: process.env.UNSTRUCTURED_API_KEY,
|
|
24
|
+
};
|
|
25
|
+
const buffer = await readFile(filePath);
|
|
26
|
+
const fileName = "example.txt";
|
|
27
|
+
const loader = new UnstructuredLoader({
|
|
28
|
+
buffer,
|
|
29
|
+
fileName,
|
|
30
|
+
}, options);
|
|
31
|
+
const docs = await loader.load();
|
|
32
|
+
expect(docs.length).toBe(3);
|
|
33
|
+
for (const doc of docs) {
|
|
34
|
+
expect(typeof doc.pageContent).toBe("string");
|
|
35
|
+
}
|
|
36
|
+
});
|
|
19
37
|
test.skip("Test Unstructured base loader with fast strategy", async () => {
|
|
20
38
|
const filePath = path.resolve(path.dirname(url.fileURLToPath(import.meta.url)), "./example_data/1706.03762.pdf");
|
|
21
39
|
const options = {
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "langchain",
|
|
3
|
-
"version": "0.2.
|
|
3
|
+
"version": "0.2.6",
|
|
4
4
|
"description": "Typescript bindings for langchain",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"engines": {
|
|
@@ -580,7 +580,7 @@
|
|
|
580
580
|
"clean": "rm -rf .turbo dist/",
|
|
581
581
|
"prepack": "yarn build",
|
|
582
582
|
"release": "release-it --only-version --config .release-it.json",
|
|
583
|
-
"test": "
|
|
583
|
+
"test": "NODE_OPTIONS=--experimental-vm-modules jest --testPathIgnorePatterns=\\.int\\.test.ts --testTimeout 30000 --maxWorkers=50%",
|
|
584
584
|
"test:watch": "yarn run build:deps && NODE_OPTIONS=--experimental-vm-modules jest --watch --testPathIgnorePatterns=\\.int\\.test.ts",
|
|
585
585
|
"test:integration": "yarn run build:deps && NODE_OPTIONS=--experimental-vm-modules jest --testPathPattern=\\.int\\.test.ts --testTimeout 100000 --maxWorkers=50%",
|
|
586
586
|
"test:single": "yarn run build:deps && NODE_OPTIONS=--experimental-vm-modules yarn run jest --config jest.config.cjs --testTimeout 100000",
|
|
@@ -672,7 +672,7 @@
|
|
|
672
672
|
"sonix-speech-recognition": "^2.1.1",
|
|
673
673
|
"srt-parser-2": "^1.2.3",
|
|
674
674
|
"ts-jest": "^29.1.0",
|
|
675
|
-
"typeorm": "^0.3.
|
|
675
|
+
"typeorm": "^0.3.20",
|
|
676
676
|
"typescript": "~5.1.6",
|
|
677
677
|
"weaviate-ts-client": "^2.0.0",
|
|
678
678
|
"web-auth-library": "^1.0.3",
|
|
@@ -724,7 +724,7 @@
|
|
|
724
724
|
"redis": "^4.6.4",
|
|
725
725
|
"sonix-speech-recognition": "^2.1.1",
|
|
726
726
|
"srt-parser-2": "^1.2.3",
|
|
727
|
-
"typeorm": "^0.3.
|
|
727
|
+
"typeorm": "^0.3.20",
|
|
728
728
|
"weaviate-ts-client": "*",
|
|
729
729
|
"web-auth-library": "^1.0.3",
|
|
730
730
|
"ws": "^8.14.2",
|
|
@@ -885,7 +885,7 @@
|
|
|
885
885
|
},
|
|
886
886
|
"dependencies": {
|
|
887
887
|
"@langchain/core": "~0.2.0",
|
|
888
|
-
"@langchain/openai": "
|
|
888
|
+
"@langchain/openai": ">=0.1.0 <0.3.0",
|
|
889
889
|
"@langchain/textsplitters": "~0.0.0",
|
|
890
890
|
"binary-extensions": "^2.2.0",
|
|
891
891
|
"js-tiktoken": "^1.0.12",
|