langchain 0.1.34 → 0.1.36
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/chains/history_aware_retriever.cjs +1 -2
- package/dist/chains/history_aware_retriever.d.ts +1 -2
- package/dist/chains/history_aware_retriever.js +1 -2
- package/dist/chains/openai_functions/base.cjs +2 -0
- package/dist/chains/openai_functions/base.d.ts +2 -0
- package/dist/chains/openai_functions/base.js +2 -0
- package/dist/chains/query_constructor/index.cjs +5 -8
- package/dist/chains/query_constructor/index.d.ts +5 -4
- package/dist/chains/query_constructor/index.js +3 -6
- package/dist/chains/query_constructor/ir.cjs +15 -139
- package/dist/chains/query_constructor/ir.d.ts +1 -138
- package/dist/chains/query_constructor/ir.js +1 -132
- package/dist/chains/query_constructor/prompt.cjs +2 -2
- package/dist/chains/query_constructor/prompt.d.ts +1 -1
- package/dist/chains/query_constructor/prompt.js +1 -1
- package/dist/document_loaders/web/firecrawl.cjs +88 -0
- package/dist/document_loaders/web/firecrawl.d.ts +48 -0
- package/dist/document_loaders/web/firecrawl.js +81 -0
- package/dist/load/import_constants.cjs +1 -0
- package/dist/load/import_constants.js +1 -0
- package/dist/output_parsers/expression.cjs +1 -1
- package/dist/output_parsers/expression.d.ts +1 -1
- package/dist/output_parsers/expression.js +1 -1
- package/dist/retrievers/multi_query.cjs +24 -3
- package/dist/retrievers/multi_query.d.ts +6 -0
- package/dist/retrievers/multi_query.js +24 -3
- package/dist/retrievers/parent_document.cjs +20 -1
- package/dist/retrievers/parent_document.d.ts +6 -0
- package/dist/retrievers/parent_document.js +20 -1
- package/dist/retrievers/self_query/base.cjs +3 -136
- package/dist/retrievers/self_query/base.d.ts +1 -69
- package/dist/retrievers/self_query/base.js +1 -134
- package/dist/retrievers/self_query/chroma.cjs +9 -10
- package/dist/retrievers/self_query/chroma.d.ts +1 -1
- package/dist/retrievers/self_query/chroma.js +1 -2
- package/dist/retrievers/self_query/functional.cjs +2 -195
- package/dist/retrievers/self_query/functional.d.ts +1 -87
- package/dist/retrievers/self_query/functional.js +1 -194
- package/dist/retrievers/self_query/index.cjs +9 -13
- package/dist/retrievers/self_query/index.d.ts +11 -8
- package/dist/retrievers/self_query/index.js +7 -11
- package/dist/retrievers/self_query/pinecone.cjs +9 -10
- package/dist/retrievers/self_query/pinecone.d.ts +1 -1
- package/dist/retrievers/self_query/pinecone.js +1 -2
- package/dist/retrievers/self_query/supabase.cjs +28 -30
- package/dist/retrievers/self_query/supabase.d.ts +1 -2
- package/dist/retrievers/self_query/supabase.js +1 -3
- package/dist/retrievers/self_query/supabase_utils.cjs +2 -2
- package/dist/retrievers/self_query/supabase_utils.d.ts +1 -1
- package/dist/retrievers/self_query/supabase_utils.js +1 -1
- package/dist/retrievers/self_query/vectara.cjs +15 -17
- package/dist/retrievers/self_query/vectara.d.ts +1 -2
- package/dist/retrievers/self_query/vectara.js +1 -3
- package/dist/retrievers/self_query/weaviate.cjs +19 -21
- package/dist/retrievers/self_query/weaviate.d.ts +1 -2
- package/dist/retrievers/self_query/weaviate.js +1 -3
- package/dist/smith/runner_utils.cjs +18 -10
- package/dist/smith/runner_utils.js +18 -10
- package/dist/storage/in_memory.cjs +2 -81
- package/dist/storage/in_memory.d.ts +1 -49
- package/dist/storage/in_memory.js +1 -80
- package/dist/text_splitter.cjs +15 -727
- package/dist/text_splitter.d.ts +1 -77
- package/dist/text_splitter.js +1 -720
- package/document_loaders/web/firecrawl.cjs +1 -0
- package/document_loaders/web/firecrawl.d.cts +1 -0
- package/document_loaders/web/firecrawl.d.ts +1 -0
- package/document_loaders/web/firecrawl.js +1 -0
- package/package.json +22 -3
- package/dist/retrievers/self_query/utils.cjs +0 -94
- package/dist/retrievers/self_query/utils.d.ts +0 -29
- package/dist/retrievers/self_query/utils.js +0 -85
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
3
|
+
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
4
|
+
};
|
|
5
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
|
+
exports.FireCrawlLoader = void 0;
|
|
7
|
+
const firecrawl_js_1 = __importDefault(require("@mendable/firecrawl-js"));
|
|
8
|
+
const documents_1 = require("@langchain/core/documents");
|
|
9
|
+
const env_1 = require("@langchain/core/utils/env");
|
|
10
|
+
const base_js_1 = require("../base.cjs");
|
|
11
|
+
/**
|
|
12
|
+
* Class representing a document loader for loading data from
|
|
13
|
+
* Firecrawl (firecrawl.dev). It extends the BaseDocumentLoader class.
|
|
14
|
+
* @example
|
|
15
|
+
* ```typescript
|
|
16
|
+
* const loader = new FireCrawlLoader({
|
|
17
|
+
* url: "{url}",
|
|
18
|
+
* apiKey: "{apiKey}",
|
|
19
|
+
* mode: "crawl"
|
|
20
|
+
* });
|
|
21
|
+
* const docs = await loader.load();
|
|
22
|
+
* ```
|
|
23
|
+
*/
|
|
24
|
+
class FireCrawlLoader extends base_js_1.BaseDocumentLoader {
|
|
25
|
+
constructor(loaderParams) {
|
|
26
|
+
super();
|
|
27
|
+
Object.defineProperty(this, "apiKey", {
|
|
28
|
+
enumerable: true,
|
|
29
|
+
configurable: true,
|
|
30
|
+
writable: true,
|
|
31
|
+
value: void 0
|
|
32
|
+
});
|
|
33
|
+
Object.defineProperty(this, "url", {
|
|
34
|
+
enumerable: true,
|
|
35
|
+
configurable: true,
|
|
36
|
+
writable: true,
|
|
37
|
+
value: void 0
|
|
38
|
+
});
|
|
39
|
+
Object.defineProperty(this, "mode", {
|
|
40
|
+
enumerable: true,
|
|
41
|
+
configurable: true,
|
|
42
|
+
writable: true,
|
|
43
|
+
value: void 0
|
|
44
|
+
});
|
|
45
|
+
Object.defineProperty(this, "params", {
|
|
46
|
+
enumerable: true,
|
|
47
|
+
configurable: true,
|
|
48
|
+
writable: true,
|
|
49
|
+
value: void 0
|
|
50
|
+
});
|
|
51
|
+
const { apiKey = (0, env_1.getEnvironmentVariable)("FIRECRAWL_API_KEY"), url, mode = "crawl", params, } = loaderParams;
|
|
52
|
+
if (!apiKey) {
|
|
53
|
+
throw new Error("Firecrawl API key not set. You can set it as FIRECRAWL_API_KEY in your .env file, or pass it to Firecrawl.");
|
|
54
|
+
}
|
|
55
|
+
this.apiKey = apiKey;
|
|
56
|
+
this.url = url;
|
|
57
|
+
this.mode = mode;
|
|
58
|
+
this.params = params;
|
|
59
|
+
}
|
|
60
|
+
/**
|
|
61
|
+
* Loads the data from the Firecrawl.
|
|
62
|
+
* @returns An array of Documents representing the retrieved data.
|
|
63
|
+
* @throws An error if the data could not be loaded.
|
|
64
|
+
*/
|
|
65
|
+
async load() {
|
|
66
|
+
const app = new firecrawl_js_1.default({ apiKey: this.apiKey });
|
|
67
|
+
let firecrawlDocs;
|
|
68
|
+
if (this.mode === "scrape") {
|
|
69
|
+
const response = await app.scrapeUrl(this.url, this.params);
|
|
70
|
+
if (!response.success) {
|
|
71
|
+
throw new Error(`Firecrawl: Failed to scrape URL. Error: ${response.error}`);
|
|
72
|
+
}
|
|
73
|
+
firecrawlDocs = [response.data];
|
|
74
|
+
}
|
|
75
|
+
else if (this.mode === "crawl") {
|
|
76
|
+
const response = await app.crawlUrl(this.url, this.params, true);
|
|
77
|
+
firecrawlDocs = response;
|
|
78
|
+
}
|
|
79
|
+
else {
|
|
80
|
+
throw new Error(`Unrecognized mode '${this.mode}'. Expected one of 'crawl', 'scrape'.`);
|
|
81
|
+
}
|
|
82
|
+
return firecrawlDocs.map((doc) => new documents_1.Document({
|
|
83
|
+
pageContent: doc.markdown || "",
|
|
84
|
+
metadata: doc.metadata || {},
|
|
85
|
+
}));
|
|
86
|
+
}
|
|
87
|
+
}
|
|
88
|
+
exports.FireCrawlLoader = FireCrawlLoader;
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
import { type DocumentInterface } from "@langchain/core/documents";
|
|
2
|
+
import { BaseDocumentLoader } from "../base.js";
|
|
3
|
+
/**
|
|
4
|
+
* Interface representing the parameters for the Firecrawl loader. It
|
|
5
|
+
* includes properties such as the URL to scrape or crawl and the API key.
|
|
6
|
+
*/
|
|
7
|
+
interface FirecrawlLoaderParameters {
|
|
8
|
+
/**
|
|
9
|
+
* URL to scrape or crawl
|
|
10
|
+
*/
|
|
11
|
+
url: string;
|
|
12
|
+
/**
|
|
13
|
+
* API key for Firecrawl. If not provided, the default value is the value of the FIRECRAWL_API_KEY environment variable.
|
|
14
|
+
*/
|
|
15
|
+
apiKey?: string;
|
|
16
|
+
/**
|
|
17
|
+
* Mode of operation. Can be either "crawl" or "scrape". If not provided, the default value is "crawl".
|
|
18
|
+
*/
|
|
19
|
+
mode?: "crawl" | "scrape";
|
|
20
|
+
params?: Record<string, unknown>;
|
|
21
|
+
}
|
|
22
|
+
/**
|
|
23
|
+
* Class representing a document loader for loading data from
|
|
24
|
+
* Firecrawl (firecrawl.dev). It extends the BaseDocumentLoader class.
|
|
25
|
+
* @example
|
|
26
|
+
* ```typescript
|
|
27
|
+
* const loader = new FireCrawlLoader({
|
|
28
|
+
* url: "{url}",
|
|
29
|
+
* apiKey: "{apiKey}",
|
|
30
|
+
* mode: "crawl"
|
|
31
|
+
* });
|
|
32
|
+
* const docs = await loader.load();
|
|
33
|
+
* ```
|
|
34
|
+
*/
|
|
35
|
+
export declare class FireCrawlLoader extends BaseDocumentLoader {
|
|
36
|
+
private apiKey;
|
|
37
|
+
private url;
|
|
38
|
+
private mode;
|
|
39
|
+
private params?;
|
|
40
|
+
constructor(loaderParams: FirecrawlLoaderParameters);
|
|
41
|
+
/**
|
|
42
|
+
* Loads the data from the Firecrawl.
|
|
43
|
+
* @returns An array of Documents representing the retrieved data.
|
|
44
|
+
* @throws An error if the data could not be loaded.
|
|
45
|
+
*/
|
|
46
|
+
load(): Promise<DocumentInterface[]>;
|
|
47
|
+
}
|
|
48
|
+
export {};
|
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
import FirecrawlApp from "@mendable/firecrawl-js";
|
|
2
|
+
import { Document } from "@langchain/core/documents";
|
|
3
|
+
import { getEnvironmentVariable } from "@langchain/core/utils/env";
|
|
4
|
+
import { BaseDocumentLoader } from "../base.js";
|
|
5
|
+
/**
|
|
6
|
+
* Class representing a document loader for loading data from
|
|
7
|
+
* Firecrawl (firecrawl.dev). It extends the BaseDocumentLoader class.
|
|
8
|
+
* @example
|
|
9
|
+
* ```typescript
|
|
10
|
+
* const loader = new FireCrawlLoader({
|
|
11
|
+
* url: "{url}",
|
|
12
|
+
* apiKey: "{apiKey}",
|
|
13
|
+
* mode: "crawl"
|
|
14
|
+
* });
|
|
15
|
+
* const docs = await loader.load();
|
|
16
|
+
* ```
|
|
17
|
+
*/
|
|
18
|
+
export class FireCrawlLoader extends BaseDocumentLoader {
|
|
19
|
+
constructor(loaderParams) {
|
|
20
|
+
super();
|
|
21
|
+
Object.defineProperty(this, "apiKey", {
|
|
22
|
+
enumerable: true,
|
|
23
|
+
configurable: true,
|
|
24
|
+
writable: true,
|
|
25
|
+
value: void 0
|
|
26
|
+
});
|
|
27
|
+
Object.defineProperty(this, "url", {
|
|
28
|
+
enumerable: true,
|
|
29
|
+
configurable: true,
|
|
30
|
+
writable: true,
|
|
31
|
+
value: void 0
|
|
32
|
+
});
|
|
33
|
+
Object.defineProperty(this, "mode", {
|
|
34
|
+
enumerable: true,
|
|
35
|
+
configurable: true,
|
|
36
|
+
writable: true,
|
|
37
|
+
value: void 0
|
|
38
|
+
});
|
|
39
|
+
Object.defineProperty(this, "params", {
|
|
40
|
+
enumerable: true,
|
|
41
|
+
configurable: true,
|
|
42
|
+
writable: true,
|
|
43
|
+
value: void 0
|
|
44
|
+
});
|
|
45
|
+
const { apiKey = getEnvironmentVariable("FIRECRAWL_API_KEY"), url, mode = "crawl", params, } = loaderParams;
|
|
46
|
+
if (!apiKey) {
|
|
47
|
+
throw new Error("Firecrawl API key not set. You can set it as FIRECRAWL_API_KEY in your .env file, or pass it to Firecrawl.");
|
|
48
|
+
}
|
|
49
|
+
this.apiKey = apiKey;
|
|
50
|
+
this.url = url;
|
|
51
|
+
this.mode = mode;
|
|
52
|
+
this.params = params;
|
|
53
|
+
}
|
|
54
|
+
/**
|
|
55
|
+
* Loads the data from the Firecrawl.
|
|
56
|
+
* @returns An array of Documents representing the retrieved data.
|
|
57
|
+
* @throws An error if the data could not be loaded.
|
|
58
|
+
*/
|
|
59
|
+
async load() {
|
|
60
|
+
const app = new FirecrawlApp({ apiKey: this.apiKey });
|
|
61
|
+
let firecrawlDocs;
|
|
62
|
+
if (this.mode === "scrape") {
|
|
63
|
+
const response = await app.scrapeUrl(this.url, this.params);
|
|
64
|
+
if (!response.success) {
|
|
65
|
+
throw new Error(`Firecrawl: Failed to scrape URL. Error: ${response.error}`);
|
|
66
|
+
}
|
|
67
|
+
firecrawlDocs = [response.data];
|
|
68
|
+
}
|
|
69
|
+
else if (this.mode === "crawl") {
|
|
70
|
+
const response = await app.crawlUrl(this.url, this.params, true);
|
|
71
|
+
firecrawlDocs = response;
|
|
72
|
+
}
|
|
73
|
+
else {
|
|
74
|
+
throw new Error(`Unrecognized mode '${this.mode}'. Expected one of 'crawl', 'scrape'.`);
|
|
75
|
+
}
|
|
76
|
+
return firecrawlDocs.map((doc) => new Document({
|
|
77
|
+
pageContent: doc.markdown || "",
|
|
78
|
+
metadata: doc.metadata || {},
|
|
79
|
+
}));
|
|
80
|
+
}
|
|
81
|
+
}
|
|
@@ -93,6 +93,7 @@ exports.optionalImportEntrypoints = [
|
|
|
93
93
|
"langchain/document_loaders/web/hn",
|
|
94
94
|
"langchain/document_loaders/web/imsdb",
|
|
95
95
|
"langchain/document_loaders/web/figma",
|
|
96
|
+
"langchain/document_loaders/web/firecrawl",
|
|
96
97
|
"langchain/document_loaders/web/github",
|
|
97
98
|
"langchain/document_loaders/web/notiondb",
|
|
98
99
|
"langchain/document_loaders/web/notionapi",
|
|
@@ -90,6 +90,7 @@ export const optionalImportEntrypoints = [
|
|
|
90
90
|
"langchain/document_loaders/web/hn",
|
|
91
91
|
"langchain/document_loaders/web/imsdb",
|
|
92
92
|
"langchain/document_loaders/web/figma",
|
|
93
|
+
"langchain/document_loaders/web/firecrawl",
|
|
93
94
|
"langchain/document_loaders/web/github",
|
|
94
95
|
"langchain/document_loaders/web/notiondb",
|
|
95
96
|
"langchain/document_loaders/web/notionapi",
|
|
@@ -19,7 +19,7 @@ const output_parsers_1 = require("@langchain/core/output_parsers");
|
|
|
19
19
|
const factory_js_1 = require("./expression_type_handlers/factory.cjs");
|
|
20
20
|
const base_js_1 = require("./expression_type_handlers/base.cjs");
|
|
21
21
|
/**
|
|
22
|
-
*
|
|
22
|
+
* We need to be able to handle the following cases:
|
|
23
23
|
* ExpressionStatement
|
|
24
24
|
* CallExpression
|
|
25
25
|
* Identifier | MemberExpression
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import { BaseOutputParser } from "@langchain/core/output_parsers";
|
|
2
2
|
import { ParsedType } from "./expression_type_handlers/types.js";
|
|
3
3
|
/**
|
|
4
|
-
*
|
|
4
|
+
* We need to be able to handle the following cases:
|
|
5
5
|
* ExpressionStatement
|
|
6
6
|
* CallExpression
|
|
7
7
|
* Identifier | MemberExpression
|
|
@@ -2,7 +2,7 @@ import { BaseOutputParser } from "@langchain/core/output_parsers";
|
|
|
2
2
|
import { MasterHandler } from "./expression_type_handlers/factory.js";
|
|
3
3
|
import { ASTParser } from "./expression_type_handlers/base.js";
|
|
4
4
|
/**
|
|
5
|
-
*
|
|
5
|
+
* We need to be able to handle the following cases:
|
|
6
6
|
* ExpressionStatement
|
|
7
7
|
* CallExpression
|
|
8
8
|
* Identifier | MemberExpression
|
|
@@ -103,10 +103,24 @@ class MultiQueryRetriever extends retrievers_1.BaseRetriever {
|
|
|
103
103
|
writable: true,
|
|
104
104
|
value: "lines"
|
|
105
105
|
});
|
|
106
|
+
Object.defineProperty(this, "documentCompressor", {
|
|
107
|
+
enumerable: true,
|
|
108
|
+
configurable: true,
|
|
109
|
+
writable: true,
|
|
110
|
+
value: void 0
|
|
111
|
+
});
|
|
112
|
+
Object.defineProperty(this, "documentCompressorFilteringFn", {
|
|
113
|
+
enumerable: true,
|
|
114
|
+
configurable: true,
|
|
115
|
+
writable: true,
|
|
116
|
+
value: void 0
|
|
117
|
+
});
|
|
106
118
|
this.retriever = fields.retriever;
|
|
107
119
|
this.llmChain = fields.llmChain;
|
|
108
120
|
this.queryCount = fields.queryCount ?? this.queryCount;
|
|
109
121
|
this.parserKey = fields.parserKey ?? this.parserKey;
|
|
122
|
+
this.documentCompressor = fields.documentCompressor;
|
|
123
|
+
this.documentCompressorFilteringFn = fields.documentCompressorFilteringFn;
|
|
110
124
|
}
|
|
111
125
|
static fromLLM(fields) {
|
|
112
126
|
const { retriever, llm, prompt = DEFAULT_QUERY_PROMPT, queryCount, parserKey, ...rest } = fields;
|
|
@@ -126,10 +140,10 @@ class MultiQueryRetriever extends retrievers_1.BaseRetriever {
|
|
|
126
140
|
// Retrieve documents using the original retriever
|
|
127
141
|
async _retrieveDocuments(queries, runManager) {
|
|
128
142
|
const documents = [];
|
|
129
|
-
|
|
143
|
+
await Promise.all(queries.map(async (query) => {
|
|
130
144
|
const docs = await this.retriever.getRelevantDocuments(query, runManager?.getChild());
|
|
131
145
|
documents.push(...docs);
|
|
132
|
-
}
|
|
146
|
+
}));
|
|
133
147
|
return documents;
|
|
134
148
|
}
|
|
135
149
|
// Deduplicate the documents that were returned in multiple retrievals
|
|
@@ -146,7 +160,14 @@ class MultiQueryRetriever extends retrievers_1.BaseRetriever {
|
|
|
146
160
|
const queries = await this._generateQueries(question, runManager);
|
|
147
161
|
const documents = await this._retrieveDocuments(queries, runManager);
|
|
148
162
|
const uniqueDocuments = this._uniqueUnion(documents);
|
|
149
|
-
|
|
163
|
+
let outputDocs = uniqueDocuments;
|
|
164
|
+
if (this.documentCompressor && uniqueDocuments.length) {
|
|
165
|
+
outputDocs = await this.documentCompressor.compressDocuments(uniqueDocuments, question);
|
|
166
|
+
if (this.documentCompressorFilteringFn) {
|
|
167
|
+
outputDocs = this.documentCompressorFilteringFn(outputDocs);
|
|
168
|
+
}
|
|
169
|
+
}
|
|
170
|
+
return outputDocs;
|
|
150
171
|
}
|
|
151
172
|
}
|
|
152
173
|
exports.MultiQueryRetriever = MultiQueryRetriever;
|
|
@@ -4,14 +4,18 @@ import { Document } from "@langchain/core/documents";
|
|
|
4
4
|
import { BasePromptTemplate } from "@langchain/core/prompts";
|
|
5
5
|
import { CallbackManagerForRetrieverRun } from "@langchain/core/callbacks/manager";
|
|
6
6
|
import { LLMChain } from "../chains/llm_chain.js";
|
|
7
|
+
import type { BaseDocumentCompressor } from "./document_compressors/index.js";
|
|
7
8
|
interface LineList {
|
|
8
9
|
lines: string[];
|
|
9
10
|
}
|
|
11
|
+
export type MultiDocs = Document<Record<string, any>>[];
|
|
10
12
|
export interface MultiQueryRetrieverInput extends BaseRetrieverInput {
|
|
11
13
|
retriever: BaseRetrieverInterface;
|
|
12
14
|
llmChain: LLMChain<LineList>;
|
|
13
15
|
queryCount?: number;
|
|
14
16
|
parserKey?: string;
|
|
17
|
+
documentCompressor?: BaseDocumentCompressor | undefined;
|
|
18
|
+
documentCompressorFilteringFn?: (docs: MultiDocs) => MultiDocs;
|
|
15
19
|
}
|
|
16
20
|
/**
|
|
17
21
|
* @example
|
|
@@ -33,6 +37,8 @@ export declare class MultiQueryRetriever extends BaseRetriever {
|
|
|
33
37
|
private llmChain;
|
|
34
38
|
private queryCount;
|
|
35
39
|
private parserKey;
|
|
40
|
+
documentCompressor: BaseDocumentCompressor | undefined;
|
|
41
|
+
documentCompressorFilteringFn?: MultiQueryRetrieverInput["documentCompressorFilteringFn"];
|
|
36
42
|
constructor(fields: MultiQueryRetrieverInput);
|
|
37
43
|
static fromLLM(fields: Omit<MultiQueryRetrieverInput, "llmChain"> & {
|
|
38
44
|
llm: BaseLanguageModelInterface;
|
|
@@ -100,10 +100,24 @@ export class MultiQueryRetriever extends BaseRetriever {
|
|
|
100
100
|
writable: true,
|
|
101
101
|
value: "lines"
|
|
102
102
|
});
|
|
103
|
+
Object.defineProperty(this, "documentCompressor", {
|
|
104
|
+
enumerable: true,
|
|
105
|
+
configurable: true,
|
|
106
|
+
writable: true,
|
|
107
|
+
value: void 0
|
|
108
|
+
});
|
|
109
|
+
Object.defineProperty(this, "documentCompressorFilteringFn", {
|
|
110
|
+
enumerable: true,
|
|
111
|
+
configurable: true,
|
|
112
|
+
writable: true,
|
|
113
|
+
value: void 0
|
|
114
|
+
});
|
|
103
115
|
this.retriever = fields.retriever;
|
|
104
116
|
this.llmChain = fields.llmChain;
|
|
105
117
|
this.queryCount = fields.queryCount ?? this.queryCount;
|
|
106
118
|
this.parserKey = fields.parserKey ?? this.parserKey;
|
|
119
|
+
this.documentCompressor = fields.documentCompressor;
|
|
120
|
+
this.documentCompressorFilteringFn = fields.documentCompressorFilteringFn;
|
|
107
121
|
}
|
|
108
122
|
static fromLLM(fields) {
|
|
109
123
|
const { retriever, llm, prompt = DEFAULT_QUERY_PROMPT, queryCount, parserKey, ...rest } = fields;
|
|
@@ -123,10 +137,10 @@ export class MultiQueryRetriever extends BaseRetriever {
|
|
|
123
137
|
// Retrieve documents using the original retriever
|
|
124
138
|
async _retrieveDocuments(queries, runManager) {
|
|
125
139
|
const documents = [];
|
|
126
|
-
|
|
140
|
+
await Promise.all(queries.map(async (query) => {
|
|
127
141
|
const docs = await this.retriever.getRelevantDocuments(query, runManager?.getChild());
|
|
128
142
|
documents.push(...docs);
|
|
129
|
-
}
|
|
143
|
+
}));
|
|
130
144
|
return documents;
|
|
131
145
|
}
|
|
132
146
|
// Deduplicate the documents that were returned in multiple retrievals
|
|
@@ -143,6 +157,13 @@ export class MultiQueryRetriever extends BaseRetriever {
|
|
|
143
157
|
const queries = await this._generateQueries(question, runManager);
|
|
144
158
|
const documents = await this._retrieveDocuments(queries, runManager);
|
|
145
159
|
const uniqueDocuments = this._uniqueUnion(documents);
|
|
146
|
-
|
|
160
|
+
let outputDocs = uniqueDocuments;
|
|
161
|
+
if (this.documentCompressor && uniqueDocuments.length) {
|
|
162
|
+
outputDocs = await this.documentCompressor.compressDocuments(uniqueDocuments, question);
|
|
163
|
+
if (this.documentCompressorFilteringFn) {
|
|
164
|
+
outputDocs = this.documentCompressorFilteringFn(outputDocs);
|
|
165
|
+
}
|
|
166
|
+
}
|
|
167
|
+
return outputDocs;
|
|
147
168
|
}
|
|
148
169
|
}
|
|
@@ -111,6 +111,18 @@ class ParentDocumentRetriever extends multi_vector_js_1.MultiVectorRetriever {
|
|
|
111
111
|
writable: true,
|
|
112
112
|
value: void 0
|
|
113
113
|
});
|
|
114
|
+
Object.defineProperty(this, "documentCompressor", {
|
|
115
|
+
enumerable: true,
|
|
116
|
+
configurable: true,
|
|
117
|
+
writable: true,
|
|
118
|
+
value: void 0
|
|
119
|
+
});
|
|
120
|
+
Object.defineProperty(this, "documentCompressorFilteringFn", {
|
|
121
|
+
enumerable: true,
|
|
122
|
+
configurable: true,
|
|
123
|
+
writable: true,
|
|
124
|
+
value: void 0
|
|
125
|
+
});
|
|
114
126
|
this.vectorstore = fields.vectorstore;
|
|
115
127
|
this.childSplitter = fields.childSplitter;
|
|
116
128
|
this.parentSplitter = fields.parentSplitter;
|
|
@@ -118,9 +130,10 @@ class ParentDocumentRetriever extends multi_vector_js_1.MultiVectorRetriever {
|
|
|
118
130
|
this.childK = fields.childK;
|
|
119
131
|
this.parentK = fields.parentK;
|
|
120
132
|
this.childDocumentRetriever = fields.childDocumentRetriever;
|
|
133
|
+
this.documentCompressor = fields.documentCompressor;
|
|
134
|
+
this.documentCompressorFilteringFn = fields.documentCompressorFilteringFn;
|
|
121
135
|
}
|
|
122
136
|
async _getRelevantDocuments(query) {
|
|
123
|
-
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
124
137
|
let subDocs = [];
|
|
125
138
|
if (this.childDocumentRetriever) {
|
|
126
139
|
subDocs = await this.childDocumentRetriever.getRelevantDocuments(query);
|
|
@@ -128,6 +141,12 @@ class ParentDocumentRetriever extends multi_vector_js_1.MultiVectorRetriever {
|
|
|
128
141
|
else {
|
|
129
142
|
subDocs = await this.vectorstore.similaritySearch(query, this.childK);
|
|
130
143
|
}
|
|
144
|
+
if (this.documentCompressor && subDocs.length) {
|
|
145
|
+
subDocs = await this.documentCompressor.compressDocuments(subDocs, query);
|
|
146
|
+
if (this.documentCompressorFilteringFn) {
|
|
147
|
+
subDocs = this.documentCompressorFilteringFn(subDocs);
|
|
148
|
+
}
|
|
149
|
+
}
|
|
131
150
|
// Maintain order
|
|
132
151
|
const parentDocIds = [];
|
|
133
152
|
for (const doc of subDocs) {
|
|
@@ -1,7 +1,9 @@
|
|
|
1
1
|
import { type VectorStoreInterface, type VectorStoreRetrieverInterface } from "@langchain/core/vectorstores";
|
|
2
2
|
import { Document } from "@langchain/core/documents";
|
|
3
|
+
import type { BaseDocumentCompressor } from "./document_compressors/index.js";
|
|
3
4
|
import { TextSplitter, TextSplitterChunkHeaderOptions } from "../text_splitter.js";
|
|
4
5
|
import { MultiVectorRetriever, type MultiVectorRetrieverInput } from "./multi_vector.js";
|
|
6
|
+
export type SubDocs = Document<Record<string, any>>[];
|
|
5
7
|
/**
|
|
6
8
|
* Interface for the fields required to initialize a
|
|
7
9
|
* ParentDocumentRetriever instance.
|
|
@@ -14,6 +16,8 @@ export type ParentDocumentRetrieverFields = MultiVectorRetrieverInput & {
|
|
|
14
16
|
* the `.similaritySearch` method of the vectorstore.
|
|
15
17
|
*/
|
|
16
18
|
childDocumentRetriever?: VectorStoreRetrieverInterface<VectorStoreInterface>;
|
|
19
|
+
documentCompressor?: BaseDocumentCompressor | undefined;
|
|
20
|
+
documentCompressorFilteringFn?: (docs: SubDocs) => SubDocs;
|
|
17
21
|
};
|
|
18
22
|
/**
|
|
19
23
|
* A type of document retriever that splits input documents into smaller chunks
|
|
@@ -55,6 +59,8 @@ export declare class ParentDocumentRetriever extends MultiVectorRetriever {
|
|
|
55
59
|
protected childK?: number;
|
|
56
60
|
protected parentK?: number;
|
|
57
61
|
childDocumentRetriever: VectorStoreRetrieverInterface<VectorStoreInterface> | undefined;
|
|
62
|
+
documentCompressor: BaseDocumentCompressor | undefined;
|
|
63
|
+
documentCompressorFilteringFn?: ParentDocumentRetrieverFields["documentCompressorFilteringFn"];
|
|
58
64
|
constructor(fields: ParentDocumentRetrieverFields);
|
|
59
65
|
_getRelevantDocuments(query: string): Promise<Document[]>;
|
|
60
66
|
/**
|
|
@@ -85,6 +85,18 @@ export class ParentDocumentRetriever extends MultiVectorRetriever {
|
|
|
85
85
|
writable: true,
|
|
86
86
|
value: void 0
|
|
87
87
|
});
|
|
88
|
+
Object.defineProperty(this, "documentCompressor", {
|
|
89
|
+
enumerable: true,
|
|
90
|
+
configurable: true,
|
|
91
|
+
writable: true,
|
|
92
|
+
value: void 0
|
|
93
|
+
});
|
|
94
|
+
Object.defineProperty(this, "documentCompressorFilteringFn", {
|
|
95
|
+
enumerable: true,
|
|
96
|
+
configurable: true,
|
|
97
|
+
writable: true,
|
|
98
|
+
value: void 0
|
|
99
|
+
});
|
|
88
100
|
this.vectorstore = fields.vectorstore;
|
|
89
101
|
this.childSplitter = fields.childSplitter;
|
|
90
102
|
this.parentSplitter = fields.parentSplitter;
|
|
@@ -92,9 +104,10 @@ export class ParentDocumentRetriever extends MultiVectorRetriever {
|
|
|
92
104
|
this.childK = fields.childK;
|
|
93
105
|
this.parentK = fields.parentK;
|
|
94
106
|
this.childDocumentRetriever = fields.childDocumentRetriever;
|
|
107
|
+
this.documentCompressor = fields.documentCompressor;
|
|
108
|
+
this.documentCompressorFilteringFn = fields.documentCompressorFilteringFn;
|
|
95
109
|
}
|
|
96
110
|
async _getRelevantDocuments(query) {
|
|
97
|
-
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
98
111
|
let subDocs = [];
|
|
99
112
|
if (this.childDocumentRetriever) {
|
|
100
113
|
subDocs = await this.childDocumentRetriever.getRelevantDocuments(query);
|
|
@@ -102,6 +115,12 @@ export class ParentDocumentRetriever extends MultiVectorRetriever {
|
|
|
102
115
|
else {
|
|
103
116
|
subDocs = await this.vectorstore.similaritySearch(query, this.childK);
|
|
104
117
|
}
|
|
118
|
+
if (this.documentCompressor && subDocs.length) {
|
|
119
|
+
subDocs = await this.documentCompressor.compressDocuments(subDocs, query);
|
|
120
|
+
if (this.documentCompressorFilteringFn) {
|
|
121
|
+
subDocs = this.documentCompressorFilteringFn(subDocs);
|
|
122
|
+
}
|
|
123
|
+
}
|
|
105
124
|
// Maintain order
|
|
106
125
|
const parentDocIds = [];
|
|
107
126
|
for (const doc of subDocs) {
|
|
@@ -1,139 +1,6 @@
|
|
|
1
1
|
"use strict";
|
|
2
2
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
3
|
exports.BasicTranslator = exports.BaseTranslator = void 0;
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
* Abstract class that provides a blueprint for creating specific
|
|
8
|
-
* translator classes. Defines two abstract methods: formatFunction and
|
|
9
|
-
* mergeFilters.
|
|
10
|
-
*/
|
|
11
|
-
class BaseTranslator extends ir_js_1.Visitor {
|
|
12
|
-
}
|
|
13
|
-
exports.BaseTranslator = BaseTranslator;
|
|
14
|
-
/**
|
|
15
|
-
* Class that extends the BaseTranslator class and provides concrete
|
|
16
|
-
* implementations for the abstract methods. Also declares three types:
|
|
17
|
-
* VisitOperationOutput, VisitComparisonOutput, and
|
|
18
|
-
* VisitStructuredQueryOutput, which are used as the return types for the
|
|
19
|
-
* visitOperation, visitComparison, and visitStructuredQuery methods
|
|
20
|
-
* respectively.
|
|
21
|
-
*/
|
|
22
|
-
class BasicTranslator extends BaseTranslator {
|
|
23
|
-
constructor(opts) {
|
|
24
|
-
super();
|
|
25
|
-
Object.defineProperty(this, "allowedOperators", {
|
|
26
|
-
enumerable: true,
|
|
27
|
-
configurable: true,
|
|
28
|
-
writable: true,
|
|
29
|
-
value: void 0
|
|
30
|
-
});
|
|
31
|
-
Object.defineProperty(this, "allowedComparators", {
|
|
32
|
-
enumerable: true,
|
|
33
|
-
configurable: true,
|
|
34
|
-
writable: true,
|
|
35
|
-
value: void 0
|
|
36
|
-
});
|
|
37
|
-
this.allowedOperators = opts?.allowedOperators ?? [
|
|
38
|
-
ir_js_1.Operators.and,
|
|
39
|
-
ir_js_1.Operators.or,
|
|
40
|
-
];
|
|
41
|
-
this.allowedComparators = opts?.allowedComparators ?? [
|
|
42
|
-
ir_js_1.Comparators.eq,
|
|
43
|
-
ir_js_1.Comparators.ne,
|
|
44
|
-
ir_js_1.Comparators.gt,
|
|
45
|
-
ir_js_1.Comparators.gte,
|
|
46
|
-
ir_js_1.Comparators.lt,
|
|
47
|
-
ir_js_1.Comparators.lte,
|
|
48
|
-
];
|
|
49
|
-
}
|
|
50
|
-
formatFunction(func) {
|
|
51
|
-
if (func in ir_js_1.Comparators) {
|
|
52
|
-
if (this.allowedComparators.length > 0 &&
|
|
53
|
-
this.allowedComparators.indexOf(func) === -1) {
|
|
54
|
-
throw new Error(`Comparator ${func} not allowed. Allowed operators: ${this.allowedComparators.join(", ")}`);
|
|
55
|
-
}
|
|
56
|
-
}
|
|
57
|
-
else if (func in ir_js_1.Operators) {
|
|
58
|
-
if (this.allowedOperators.length > 0 &&
|
|
59
|
-
this.allowedOperators.indexOf(func) === -1) {
|
|
60
|
-
throw new Error(`Operator ${func} not allowed. Allowed operators: ${this.allowedOperators.join(", ")}`);
|
|
61
|
-
}
|
|
62
|
-
}
|
|
63
|
-
else {
|
|
64
|
-
throw new Error("Unknown comparator or operator");
|
|
65
|
-
}
|
|
66
|
-
return `$${func}`;
|
|
67
|
-
}
|
|
68
|
-
/**
|
|
69
|
-
* Visits an operation and returns a result.
|
|
70
|
-
* @param operation The operation to visit.
|
|
71
|
-
* @returns The result of visiting the operation.
|
|
72
|
-
*/
|
|
73
|
-
visitOperation(operation) {
|
|
74
|
-
const args = operation.args?.map((arg) => arg.accept(this));
|
|
75
|
-
return {
|
|
76
|
-
[this.formatFunction(operation.operator)]: args,
|
|
77
|
-
};
|
|
78
|
-
}
|
|
79
|
-
/**
|
|
80
|
-
* Visits a comparison and returns a result.
|
|
81
|
-
* @param comparison The comparison to visit.
|
|
82
|
-
* @returns The result of visiting the comparison.
|
|
83
|
-
*/
|
|
84
|
-
visitComparison(comparison) {
|
|
85
|
-
return {
|
|
86
|
-
[comparison.attribute]: {
|
|
87
|
-
[this.formatFunction(comparison.comparator)]: (0, utils_js_1.castValue)(comparison.value),
|
|
88
|
-
},
|
|
89
|
-
};
|
|
90
|
-
}
|
|
91
|
-
/**
|
|
92
|
-
* Visits a structured query and returns a result.
|
|
93
|
-
* @param query The structured query to visit.
|
|
94
|
-
* @returns The result of visiting the structured query.
|
|
95
|
-
*/
|
|
96
|
-
visitStructuredQuery(query) {
|
|
97
|
-
let nextArg = {};
|
|
98
|
-
if (query.filter) {
|
|
99
|
-
nextArg = {
|
|
100
|
-
filter: query.filter.accept(this),
|
|
101
|
-
};
|
|
102
|
-
}
|
|
103
|
-
return nextArg;
|
|
104
|
-
}
|
|
105
|
-
mergeFilters(defaultFilter, generatedFilter, mergeType = "and", forceDefaultFilter = false) {
|
|
106
|
-
if ((0, utils_js_1.isFilterEmpty)(defaultFilter) && (0, utils_js_1.isFilterEmpty)(generatedFilter)) {
|
|
107
|
-
return undefined;
|
|
108
|
-
}
|
|
109
|
-
if ((0, utils_js_1.isFilterEmpty)(defaultFilter) || mergeType === "replace") {
|
|
110
|
-
if ((0, utils_js_1.isFilterEmpty)(generatedFilter)) {
|
|
111
|
-
return undefined;
|
|
112
|
-
}
|
|
113
|
-
return generatedFilter;
|
|
114
|
-
}
|
|
115
|
-
if ((0, utils_js_1.isFilterEmpty)(generatedFilter)) {
|
|
116
|
-
if (forceDefaultFilter) {
|
|
117
|
-
return defaultFilter;
|
|
118
|
-
}
|
|
119
|
-
if (mergeType === "and") {
|
|
120
|
-
return undefined;
|
|
121
|
-
}
|
|
122
|
-
return defaultFilter;
|
|
123
|
-
}
|
|
124
|
-
if (mergeType === "and") {
|
|
125
|
-
return {
|
|
126
|
-
$and: [defaultFilter, generatedFilter],
|
|
127
|
-
};
|
|
128
|
-
}
|
|
129
|
-
else if (mergeType === "or") {
|
|
130
|
-
return {
|
|
131
|
-
$or: [defaultFilter, generatedFilter],
|
|
132
|
-
};
|
|
133
|
-
}
|
|
134
|
-
else {
|
|
135
|
-
throw new Error("Unknown merge type");
|
|
136
|
-
}
|
|
137
|
-
}
|
|
138
|
-
}
|
|
139
|
-
exports.BasicTranslator = BasicTranslator;
|
|
4
|
+
var structured_query_1 = require("@langchain/core/structured_query");
|
|
5
|
+
Object.defineProperty(exports, "BaseTranslator", { enumerable: true, get: function () { return structured_query_1.BaseTranslator; } });
|
|
6
|
+
Object.defineProperty(exports, "BasicTranslator", { enumerable: true, get: function () { return structured_query_1.BasicTranslator; } });
|