langchain 0.2.6 → 0.2.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,98 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.MultiFileLoader = void 0;
4
+ const node_path_1 = require("node:path");
5
+ const promises_1 = require("node:fs/promises");
6
+ const base_js_1 = require("../base.cjs");
7
+ const directory_js_1 = require("./directory.cjs");
8
+ /**
9
+ * A document loader that loads documents from multiple files. It extends the
10
+ * `BaseDocumentLoader` class and implements the `load()` method.
11
+ * @example
12
+ * ```typescript
13
+ *
14
+ * const multiFileLoader = new MultiFileLoader(
15
+ * ["path/to/file1.pdf", "path/to/file2.txt"],
16
+ * {
17
+ * ".pdf": (path: string) => new PDFLoader(path),
18
+ * },
19
+ * );
20
+ *
21
+ * const docs = await multiFileLoader.load();
22
+ * console.log({ docs });
23
+ *
24
+ * ```
25
+ */
26
+ class MultiFileLoader extends base_js_1.BaseDocumentLoader {
27
+ constructor(filePaths, loaders, unknown = directory_js_1.UnknownHandling.Warn) {
28
+ super();
29
+ Object.defineProperty(this, "filePaths", {
30
+ enumerable: true,
31
+ configurable: true,
32
+ writable: true,
33
+ value: filePaths
34
+ });
35
+ Object.defineProperty(this, "loaders", {
36
+ enumerable: true,
37
+ configurable: true,
38
+ writable: true,
39
+ value: loaders
40
+ });
41
+ Object.defineProperty(this, "unknown", {
42
+ enumerable: true,
43
+ configurable: true,
44
+ writable: true,
45
+ value: unknown
46
+ });
47
+ if (Object.keys(loaders).length === 0) {
48
+ throw new Error("Must provide at least one loader");
49
+ }
50
+ for (const extension in loaders) {
51
+ if (Object.hasOwn(loaders, extension)) {
52
+ if (extension[0] !== ".") {
53
+ throw new Error(`Extension must start with a dot: ${extension}`);
54
+ }
55
+ }
56
+ }
57
+ }
58
+ /**
59
+ * Loads the documents from the provided file paths. It checks if the file
60
+ * is a directory and ignores it. If a file is a file, it checks if there
61
+ * is a corresponding loader function for the file extension in the `loaders`
62
+ * mapping. If there is, it loads the documents. If there is no
63
+ * corresponding loader function and `unknown` is set to `Warn`, it logs a
64
+ * warning message. If `unknown` is set to `Error`, it throws an error.
65
+ * @returns A promise that resolves to an array of loaded documents.
66
+ */
67
+ async load() {
68
+ const documents = [];
69
+ for (const filePath of this.filePaths) {
70
+ const fullPath = (0, node_path_1.resolve)(filePath);
71
+ const fileStat = await (0, promises_1.stat)(fullPath);
72
+ if (fileStat.isDirectory()) {
73
+ console.warn(`Ignoring directory: ${fullPath}`);
74
+ continue;
75
+ }
76
+ const loaderFactory = this.loaders[(0, node_path_1.extname)(fullPath)];
77
+ if (loaderFactory) {
78
+ const loader = loaderFactory(fullPath);
79
+ documents.push(...(await loader.load()));
80
+ }
81
+ else {
82
+ switch (this.unknown) {
83
+ case directory_js_1.UnknownHandling.Ignore:
84
+ break;
85
+ case directory_js_1.UnknownHandling.Warn:
86
+ console.warn(`Unknown file type: ${fullPath}`);
87
+ break;
88
+ case directory_js_1.UnknownHandling.Error:
89
+ throw new Error(`Unknown file type: ${fullPath}`);
90
+ default:
91
+ throw new Error(`Unknown unknown handling: ${this.unknown}`);
92
+ }
93
+ }
94
+ }
95
+ return documents;
96
+ }
97
+ }
98
+ exports.MultiFileLoader = MultiFileLoader;
@@ -0,0 +1,37 @@
1
+ import { Document } from "@langchain/core/documents";
2
+ import { BaseDocumentLoader } from "../base.js";
3
+ import { type LoadersMapping, UnknownHandling } from "./directory.js";
4
+ /**
5
+ * A document loader that loads documents from multiple files. It extends the
6
+ * `BaseDocumentLoader` class and implements the `load()` method.
7
+ * @example
8
+ * ```typescript
9
+ *
10
+ * const multiFileLoader = new MultiFileLoader(
11
+ * ["path/to/file1.pdf", "path/to/file2.txt"],
12
+ * {
13
+ * ".pdf": (path: string) => new PDFLoader(path),
14
+ * },
15
+ * );
16
+ *
17
+ * const docs = await multiFileLoader.load();
18
+ * console.log({ docs });
19
+ *
20
+ * ```
21
+ */
22
+ export declare class MultiFileLoader extends BaseDocumentLoader {
23
+ filePaths: string[];
24
+ loaders: LoadersMapping;
25
+ unknown: UnknownHandling;
26
+ constructor(filePaths: string[], loaders: LoadersMapping, unknown?: UnknownHandling);
27
+ /**
28
+ * Loads the documents from the provided file paths. It checks if the file
29
+ * is a directory and ignores it. If a file is a file, it checks if there
30
+ * is a corresponding loader function for the file extension in the `loaders`
31
+ * mapping. If there is, it loads the documents. If there is no
32
+ * corresponding loader function and `unknown` is set to `Warn`, it logs a
33
+ * warning message. If `unknown` is set to `Error`, it throws an error.
34
+ * @returns A promise that resolves to an array of loaded documents.
35
+ */
36
+ load(): Promise<Document[]>;
37
+ }
@@ -0,0 +1,94 @@
1
+ import { extname, resolve } from "node:path";
2
+ import { stat } from "node:fs/promises";
3
+ import { BaseDocumentLoader } from "../base.js";
4
+ import { UnknownHandling } from "./directory.js";
5
+ /**
6
+ * A document loader that loads documents from multiple files. It extends the
7
+ * `BaseDocumentLoader` class and implements the `load()` method.
8
+ * @example
9
+ * ```typescript
10
+ *
11
+ * const multiFileLoader = new MultiFileLoader(
12
+ * ["path/to/file1.pdf", "path/to/file2.txt"],
13
+ * {
14
+ * ".pdf": (path: string) => new PDFLoader(path),
15
+ * },
16
+ * );
17
+ *
18
+ * const docs = await multiFileLoader.load();
19
+ * console.log({ docs });
20
+ *
21
+ * ```
22
+ */
23
+ export class MultiFileLoader extends BaseDocumentLoader {
24
+ constructor(filePaths, loaders, unknown = UnknownHandling.Warn) {
25
+ super();
26
+ Object.defineProperty(this, "filePaths", {
27
+ enumerable: true,
28
+ configurable: true,
29
+ writable: true,
30
+ value: filePaths
31
+ });
32
+ Object.defineProperty(this, "loaders", {
33
+ enumerable: true,
34
+ configurable: true,
35
+ writable: true,
36
+ value: loaders
37
+ });
38
+ Object.defineProperty(this, "unknown", {
39
+ enumerable: true,
40
+ configurable: true,
41
+ writable: true,
42
+ value: unknown
43
+ });
44
+ if (Object.keys(loaders).length === 0) {
45
+ throw new Error("Must provide at least one loader");
46
+ }
47
+ for (const extension in loaders) {
48
+ if (Object.hasOwn(loaders, extension)) {
49
+ if (extension[0] !== ".") {
50
+ throw new Error(`Extension must start with a dot: ${extension}`);
51
+ }
52
+ }
53
+ }
54
+ }
55
+ /**
56
+ * Loads the documents from the provided file paths. It checks if the file
57
+ * is a directory and ignores it. If a file is a file, it checks if there
58
+ * is a corresponding loader function for the file extension in the `loaders`
59
+ * mapping. If there is, it loads the documents. If there is no
60
+ * corresponding loader function and `unknown` is set to `Warn`, it logs a
61
+ * warning message. If `unknown` is set to `Error`, it throws an error.
62
+ * @returns A promise that resolves to an array of loaded documents.
63
+ */
64
+ async load() {
65
+ const documents = [];
66
+ for (const filePath of this.filePaths) {
67
+ const fullPath = resolve(filePath);
68
+ const fileStat = await stat(fullPath);
69
+ if (fileStat.isDirectory()) {
70
+ console.warn(`Ignoring directory: ${fullPath}`);
71
+ continue;
72
+ }
73
+ const loaderFactory = this.loaders[extname(fullPath)];
74
+ if (loaderFactory) {
75
+ const loader = loaderFactory(fullPath);
76
+ documents.push(...(await loader.load()));
77
+ }
78
+ else {
79
+ switch (this.unknown) {
80
+ case UnknownHandling.Ignore:
81
+ break;
82
+ case UnknownHandling.Warn:
83
+ console.warn(`Unknown file type: ${fullPath}`);
84
+ break;
85
+ case UnknownHandling.Error:
86
+ throw new Error(`Unknown file type: ${fullPath}`);
87
+ default:
88
+ throw new Error(`Unknown unknown handling: ${this.unknown}`);
89
+ }
90
+ }
91
+ }
92
+ return documents;
93
+ }
94
+ }
@@ -0,0 +1 @@
1
+ export {};
@@ -0,0 +1,49 @@
1
+ import * as url from "node:url";
2
+ import * as path from "node:path";
3
+ import { test, expect } from "@jest/globals";
4
+ import { MultiFileLoader } from "../fs/multi_file.js";
5
+ import { CSVLoader } from "../fs/csv.js";
6
+ import { PDFLoader } from "../fs/pdf.js";
7
+ import { TextLoader } from "../fs/text.js";
8
+ import { JSONLoader } from "../fs/json.js";
9
+ import { UnknownHandling } from "../fs/directory.js";
10
+ test("Test MultiFileLoader", async () => {
11
+ const baseDirectory = path.resolve(path.dirname(url.fileURLToPath(import.meta.url)), "./example_data");
12
+ const filePaths = [
13
+ path.resolve(baseDirectory, "1706.03762.pdf"),
14
+ path.resolve(baseDirectory, "Jacob_Lee_Resume_2023.pdf"),
15
+ path.resolve(baseDirectory, "Star_Wars_The_Clone_Wars_S06E07_Crisis_at_the_Heart.csv"),
16
+ path.resolve(baseDirectory, "Star_Wars_The_Clone_Wars_S06E07_Crisis_at_the_Heart.json"),
17
+ path.resolve(baseDirectory, "complex.json"),
18
+ path.resolve(baseDirectory, "example.txt"),
19
+ path.resolve(baseDirectory, "example_separator.csv"),
20
+ ];
21
+ const loader = new MultiFileLoader(filePaths, {
22
+ ".csv": (p) => {
23
+ if (p.includes("separator.csv")) {
24
+ return new CSVLoader(p, { column: "html", separator: "|" });
25
+ }
26
+ return new CSVLoader(p, "html");
27
+ },
28
+ ".pdf": (p) => new PDFLoader(p),
29
+ ".txt": (p) => new TextLoader(p),
30
+ ".json": (p) => new JSONLoader(p),
31
+ }, UnknownHandling.Ignore);
32
+ const docs = await loader.load();
33
+ expect(docs.length).toBe(123);
34
+ const expectedSources = [
35
+ // PDF
36
+ ...Array.from({ length: 15 }, (_) => path.resolve(baseDirectory, "1706.03762.pdf")),
37
+ path.resolve(baseDirectory, "Jacob_Lee_Resume_2023.pdf"),
38
+ // CSV
39
+ ...Array.from({ length: 32 }, (_) => path.resolve(baseDirectory, "Star_Wars_The_Clone_Wars_S06E07_Crisis_at_the_Heart.csv")),
40
+ // JSON
41
+ ...Array.from({ length: 32 }, (_) => path.resolve(baseDirectory, "Star_Wars_The_Clone_Wars_S06E07_Crisis_at_the_Heart.json")),
42
+ ...Array.from({ length: 10 }, (_) => path.resolve(baseDirectory, "complex.json")),
43
+ // TXT
44
+ path.resolve(baseDirectory, "example.txt"),
45
+ // CSV
46
+ ...Array.from({ length: 32 }, (_) => path.resolve(baseDirectory, "example_separator.csv")),
47
+ ];
48
+ expect(docs.map((d) => d.metadata.source).sort()).toEqual(expectedSources);
49
+ });
@@ -38,6 +38,7 @@ exports.optionalImportEntrypoints = [
38
38
  "langchain/document_loaders/web/couchbase",
39
39
  "langchain/document_loaders/web/youtube",
40
40
  "langchain/document_loaders/fs/directory",
41
+ "langchain/document_loaders/fs/multi_file",
41
42
  "langchain/document_loaders/fs/buffer",
42
43
  "langchain/document_loaders/fs/chatgpt",
43
44
  "langchain/document_loaders/fs/text",
@@ -35,6 +35,7 @@ export const optionalImportEntrypoints = [
35
35
  "langchain/document_loaders/web/couchbase",
36
36
  "langchain/document_loaders/web/youtube",
37
37
  "langchain/document_loaders/fs/directory",
38
+ "langchain/document_loaders/fs/multi_file",
38
39
  "langchain/document_loaders/fs/buffer",
39
40
  "langchain/document_loaders/fs/chatgpt",
40
41
  "langchain/document_loaders/fs/text",
@@ -162,7 +162,7 @@ class MultiQueryRetriever extends retrievers_1.BaseRetriever {
162
162
  const uniqueDocuments = this._uniqueUnion(documents);
163
163
  let outputDocs = uniqueDocuments;
164
164
  if (this.documentCompressor && uniqueDocuments.length) {
165
- outputDocs = await this.documentCompressor.compressDocuments(uniqueDocuments, question);
165
+ outputDocs = await this.documentCompressor.compressDocuments(uniqueDocuments, question, runManager?.getChild());
166
166
  if (this.documentCompressorFilteringFn) {
167
167
  outputDocs = this.documentCompressorFilteringFn(outputDocs);
168
168
  }
@@ -159,7 +159,7 @@ export class MultiQueryRetriever extends BaseRetriever {
159
159
  const uniqueDocuments = this._uniqueUnion(documents);
160
160
  let outputDocs = uniqueDocuments;
161
161
  if (this.documentCompressor && uniqueDocuments.length) {
162
- outputDocs = await this.documentCompressor.compressDocuments(uniqueDocuments, question);
162
+ outputDocs = await this.documentCompressor.compressDocuments(uniqueDocuments, question, runManager?.getChild());
163
163
  if (this.documentCompressorFilteringFn) {
164
164
  outputDocs = this.documentCompressorFilteringFn(outputDocs);
165
165
  }
@@ -0,0 +1 @@
1
+ module.exports = require('../../dist/document_loaders/fs/multi_file.cjs');
@@ -0,0 +1 @@
1
+ export * from '../../dist/document_loaders/fs/multi_file.js'
@@ -0,0 +1 @@
1
+ export * from '../../dist/document_loaders/fs/multi_file.js'
@@ -0,0 +1 @@
1
+ export * from '../../dist/document_loaders/fs/multi_file.js'
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "langchain",
3
- "version": "0.2.6",
3
+ "version": "0.2.7",
4
4
  "description": "Typescript bindings for langchain",
5
5
  "type": "module",
6
6
  "engines": {
@@ -286,6 +286,10 @@
286
286
  "document_loaders/fs/directory.js",
287
287
  "document_loaders/fs/directory.d.ts",
288
288
  "document_loaders/fs/directory.d.cts",
289
+ "document_loaders/fs/multi_file.cjs",
290
+ "document_loaders/fs/multi_file.js",
291
+ "document_loaders/fs/multi_file.d.ts",
292
+ "document_loaders/fs/multi_file.d.cts",
289
293
  "document_loaders/fs/buffer.cjs",
290
294
  "document_loaders/fs/buffer.js",
291
295
  "document_loaders/fs/buffer.d.ts",
@@ -1540,6 +1544,15 @@
1540
1544
  "import": "./document_loaders/fs/directory.js",
1541
1545
  "require": "./document_loaders/fs/directory.cjs"
1542
1546
  },
1547
+ "./document_loaders/fs/multi_file": {
1548
+ "types": {
1549
+ "import": "./document_loaders/fs/multi_file.d.ts",
1550
+ "require": "./document_loaders/fs/multi_file.d.cts",
1551
+ "default": "./document_loaders/fs/multi_file.d.ts"
1552
+ },
1553
+ "import": "./document_loaders/fs/multi_file.js",
1554
+ "require": "./document_loaders/fs/multi_file.cjs"
1555
+ },
1543
1556
  "./document_loaders/fs/buffer": {
1544
1557
  "types": {
1545
1558
  "import": "./document_loaders/fs/buffer.d.ts",