langchain 0.1.34 → 0.1.36

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (72) hide show
  1. package/dist/chains/history_aware_retriever.cjs +1 -2
  2. package/dist/chains/history_aware_retriever.d.ts +1 -2
  3. package/dist/chains/history_aware_retriever.js +1 -2
  4. package/dist/chains/openai_functions/base.cjs +2 -0
  5. package/dist/chains/openai_functions/base.d.ts +2 -0
  6. package/dist/chains/openai_functions/base.js +2 -0
  7. package/dist/chains/query_constructor/index.cjs +5 -8
  8. package/dist/chains/query_constructor/index.d.ts +5 -4
  9. package/dist/chains/query_constructor/index.js +3 -6
  10. package/dist/chains/query_constructor/ir.cjs +15 -139
  11. package/dist/chains/query_constructor/ir.d.ts +1 -138
  12. package/dist/chains/query_constructor/ir.js +1 -132
  13. package/dist/chains/query_constructor/prompt.cjs +2 -2
  14. package/dist/chains/query_constructor/prompt.d.ts +1 -1
  15. package/dist/chains/query_constructor/prompt.js +1 -1
  16. package/dist/document_loaders/web/firecrawl.cjs +88 -0
  17. package/dist/document_loaders/web/firecrawl.d.ts +48 -0
  18. package/dist/document_loaders/web/firecrawl.js +81 -0
  19. package/dist/load/import_constants.cjs +1 -0
  20. package/dist/load/import_constants.js +1 -0
  21. package/dist/output_parsers/expression.cjs +1 -1
  22. package/dist/output_parsers/expression.d.ts +1 -1
  23. package/dist/output_parsers/expression.js +1 -1
  24. package/dist/retrievers/multi_query.cjs +24 -3
  25. package/dist/retrievers/multi_query.d.ts +6 -0
  26. package/dist/retrievers/multi_query.js +24 -3
  27. package/dist/retrievers/parent_document.cjs +20 -1
  28. package/dist/retrievers/parent_document.d.ts +6 -0
  29. package/dist/retrievers/parent_document.js +20 -1
  30. package/dist/retrievers/self_query/base.cjs +3 -136
  31. package/dist/retrievers/self_query/base.d.ts +1 -69
  32. package/dist/retrievers/self_query/base.js +1 -134
  33. package/dist/retrievers/self_query/chroma.cjs +9 -10
  34. package/dist/retrievers/self_query/chroma.d.ts +1 -1
  35. package/dist/retrievers/self_query/chroma.js +1 -2
  36. package/dist/retrievers/self_query/functional.cjs +2 -195
  37. package/dist/retrievers/self_query/functional.d.ts +1 -87
  38. package/dist/retrievers/self_query/functional.js +1 -194
  39. package/dist/retrievers/self_query/index.cjs +9 -13
  40. package/dist/retrievers/self_query/index.d.ts +11 -8
  41. package/dist/retrievers/self_query/index.js +7 -11
  42. package/dist/retrievers/self_query/pinecone.cjs +9 -10
  43. package/dist/retrievers/self_query/pinecone.d.ts +1 -1
  44. package/dist/retrievers/self_query/pinecone.js +1 -2
  45. package/dist/retrievers/self_query/supabase.cjs +28 -30
  46. package/dist/retrievers/self_query/supabase.d.ts +1 -2
  47. package/dist/retrievers/self_query/supabase.js +1 -3
  48. package/dist/retrievers/self_query/supabase_utils.cjs +2 -2
  49. package/dist/retrievers/self_query/supabase_utils.d.ts +1 -1
  50. package/dist/retrievers/self_query/supabase_utils.js +1 -1
  51. package/dist/retrievers/self_query/vectara.cjs +15 -17
  52. package/dist/retrievers/self_query/vectara.d.ts +1 -2
  53. package/dist/retrievers/self_query/vectara.js +1 -3
  54. package/dist/retrievers/self_query/weaviate.cjs +19 -21
  55. package/dist/retrievers/self_query/weaviate.d.ts +1 -2
  56. package/dist/retrievers/self_query/weaviate.js +1 -3
  57. package/dist/smith/runner_utils.cjs +18 -10
  58. package/dist/smith/runner_utils.js +18 -10
  59. package/dist/storage/in_memory.cjs +2 -81
  60. package/dist/storage/in_memory.d.ts +1 -49
  61. package/dist/storage/in_memory.js +1 -80
  62. package/dist/text_splitter.cjs +15 -727
  63. package/dist/text_splitter.d.ts +1 -77
  64. package/dist/text_splitter.js +1 -720
  65. package/document_loaders/web/firecrawl.cjs +1 -0
  66. package/document_loaders/web/firecrawl.d.cts +1 -0
  67. package/document_loaders/web/firecrawl.d.ts +1 -0
  68. package/document_loaders/web/firecrawl.js +1 -0
  69. package/package.json +22 -3
  70. package/dist/retrievers/self_query/utils.cjs +0 -94
  71. package/dist/retrievers/self_query/utils.d.ts +0 -29
  72. package/dist/retrievers/self_query/utils.js +0 -85
@@ -0,0 +1,88 @@
1
+ "use strict";
2
+ var __importDefault = (this && this.__importDefault) || function (mod) {
3
+ return (mod && mod.__esModule) ? mod : { "default": mod };
4
+ };
5
+ Object.defineProperty(exports, "__esModule", { value: true });
6
+ exports.FireCrawlLoader = void 0;
7
+ const firecrawl_js_1 = __importDefault(require("@mendable/firecrawl-js"));
8
+ const documents_1 = require("@langchain/core/documents");
9
+ const env_1 = require("@langchain/core/utils/env");
10
+ const base_js_1 = require("../base.cjs");
11
+ /**
12
+ * Class representing a document loader for loading data from
13
+ * Firecrawl (firecrawl.dev). It extends the BaseDocumentLoader class.
14
+ * @example
15
+ * ```typescript
16
+ * const loader = new FireCrawlLoader({
17
+ * url: "{url}",
18
+ * apiKey: "{apiKey}",
19
+ * mode: "crawl"
20
+ * });
21
+ * const docs = await loader.load();
22
+ * ```
23
+ */
24
+ class FireCrawlLoader extends base_js_1.BaseDocumentLoader {
25
+ constructor(loaderParams) {
26
+ super();
27
+ Object.defineProperty(this, "apiKey", {
28
+ enumerable: true,
29
+ configurable: true,
30
+ writable: true,
31
+ value: void 0
32
+ });
33
+ Object.defineProperty(this, "url", {
34
+ enumerable: true,
35
+ configurable: true,
36
+ writable: true,
37
+ value: void 0
38
+ });
39
+ Object.defineProperty(this, "mode", {
40
+ enumerable: true,
41
+ configurable: true,
42
+ writable: true,
43
+ value: void 0
44
+ });
45
+ Object.defineProperty(this, "params", {
46
+ enumerable: true,
47
+ configurable: true,
48
+ writable: true,
49
+ value: void 0
50
+ });
51
+ const { apiKey = (0, env_1.getEnvironmentVariable)("FIRECRAWL_API_KEY"), url, mode = "crawl", params, } = loaderParams;
52
+ if (!apiKey) {
53
+ throw new Error("Firecrawl API key not set. You can set it as FIRECRAWL_API_KEY in your .env file, or pass it to Firecrawl.");
54
+ }
55
+ this.apiKey = apiKey;
56
+ this.url = url;
57
+ this.mode = mode;
58
+ this.params = params;
59
+ }
60
+ /**
61
+ * Loads the data from the Firecrawl.
62
+ * @returns An array of Documents representing the retrieved data.
63
+ * @throws An error if the data could not be loaded.
64
+ */
65
+ async load() {
66
+ const app = new firecrawl_js_1.default({ apiKey: this.apiKey });
67
+ let firecrawlDocs;
68
+ if (this.mode === "scrape") {
69
+ const response = await app.scrapeUrl(this.url, this.params);
70
+ if (!response.success) {
71
+ throw new Error(`Firecrawl: Failed to scrape URL. Error: ${response.error}`);
72
+ }
73
+ firecrawlDocs = [response.data];
74
+ }
75
+ else if (this.mode === "crawl") {
76
+ const response = await app.crawlUrl(this.url, this.params, true);
77
+ firecrawlDocs = response;
78
+ }
79
+ else {
80
+ throw new Error(`Unrecognized mode '${this.mode}'. Expected one of 'crawl', 'scrape'.`);
81
+ }
82
+ return firecrawlDocs.map((doc) => new documents_1.Document({
83
+ pageContent: doc.markdown || "",
84
+ metadata: doc.metadata || {},
85
+ }));
86
+ }
87
+ }
88
+ exports.FireCrawlLoader = FireCrawlLoader;
@@ -0,0 +1,48 @@
1
+ import { type DocumentInterface } from "@langchain/core/documents";
2
+ import { BaseDocumentLoader } from "../base.js";
3
+ /**
4
+ * Interface representing the parameters for the Firecrawl loader. It
5
+ * includes properties such as the URL to scrape or crawl and the API key.
6
+ */
7
+ interface FirecrawlLoaderParameters {
8
+ /**
9
+ * URL to scrape or crawl
10
+ */
11
+ url: string;
12
+ /**
13
+ * API key for Firecrawl. If not provided, the default value is the value of the FIRECRAWL_API_KEY environment variable.
14
+ */
15
+ apiKey?: string;
16
+ /**
17
+ * Mode of operation. Can be either "crawl" or "scrape". If not provided, the default value is "crawl".
18
+ */
19
+ mode?: "crawl" | "scrape";
20
+ params?: Record<string, unknown>;
21
+ }
22
+ /**
23
+ * Class representing a document loader for loading data from
24
+ * Firecrawl (firecrawl.dev). It extends the BaseDocumentLoader class.
25
+ * @example
26
+ * ```typescript
27
+ * const loader = new FireCrawlLoader({
28
+ * url: "{url}",
29
+ * apiKey: "{apiKey}",
30
+ * mode: "crawl"
31
+ * });
32
+ * const docs = await loader.load();
33
+ * ```
34
+ */
35
+ export declare class FireCrawlLoader extends BaseDocumentLoader {
36
+ private apiKey;
37
+ private url;
38
+ private mode;
39
+ private params?;
40
+ constructor(loaderParams: FirecrawlLoaderParameters);
41
+ /**
42
+ * Loads the data from the Firecrawl.
43
+ * @returns An array of Documents representing the retrieved data.
44
+ * @throws An error if the data could not be loaded.
45
+ */
46
+ load(): Promise<DocumentInterface[]>;
47
+ }
48
+ export {};
@@ -0,0 +1,81 @@
1
+ import FirecrawlApp from "@mendable/firecrawl-js";
2
+ import { Document } from "@langchain/core/documents";
3
+ import { getEnvironmentVariable } from "@langchain/core/utils/env";
4
+ import { BaseDocumentLoader } from "../base.js";
5
+ /**
6
+ * Class representing a document loader for loading data from
7
+ * Firecrawl (firecrawl.dev). It extends the BaseDocumentLoader class.
8
+ * @example
9
+ * ```typescript
10
+ * const loader = new FireCrawlLoader({
11
+ * url: "{url}",
12
+ * apiKey: "{apiKey}",
13
+ * mode: "crawl"
14
+ * });
15
+ * const docs = await loader.load();
16
+ * ```
17
+ */
18
+ export class FireCrawlLoader extends BaseDocumentLoader {
19
+ constructor(loaderParams) {
20
+ super();
21
+ Object.defineProperty(this, "apiKey", {
22
+ enumerable: true,
23
+ configurable: true,
24
+ writable: true,
25
+ value: void 0
26
+ });
27
+ Object.defineProperty(this, "url", {
28
+ enumerable: true,
29
+ configurable: true,
30
+ writable: true,
31
+ value: void 0
32
+ });
33
+ Object.defineProperty(this, "mode", {
34
+ enumerable: true,
35
+ configurable: true,
36
+ writable: true,
37
+ value: void 0
38
+ });
39
+ Object.defineProperty(this, "params", {
40
+ enumerable: true,
41
+ configurable: true,
42
+ writable: true,
43
+ value: void 0
44
+ });
45
+ const { apiKey = getEnvironmentVariable("FIRECRAWL_API_KEY"), url, mode = "crawl", params, } = loaderParams;
46
+ if (!apiKey) {
47
+ throw new Error("Firecrawl API key not set. You can set it as FIRECRAWL_API_KEY in your .env file, or pass it to Firecrawl.");
48
+ }
49
+ this.apiKey = apiKey;
50
+ this.url = url;
51
+ this.mode = mode;
52
+ this.params = params;
53
+ }
54
+ /**
55
+ * Loads the data from the Firecrawl.
56
+ * @returns An array of Documents representing the retrieved data.
57
+ * @throws An error if the data could not be loaded.
58
+ */
59
+ async load() {
60
+ const app = new FirecrawlApp({ apiKey: this.apiKey });
61
+ let firecrawlDocs;
62
+ if (this.mode === "scrape") {
63
+ const response = await app.scrapeUrl(this.url, this.params);
64
+ if (!response.success) {
65
+ throw new Error(`Firecrawl: Failed to scrape URL. Error: ${response.error}`);
66
+ }
67
+ firecrawlDocs = [response.data];
68
+ }
69
+ else if (this.mode === "crawl") {
70
+ const response = await app.crawlUrl(this.url, this.params, true);
71
+ firecrawlDocs = response;
72
+ }
73
+ else {
74
+ throw new Error(`Unrecognized mode '${this.mode}'. Expected one of 'crawl', 'scrape'.`);
75
+ }
76
+ return firecrawlDocs.map((doc) => new Document({
77
+ pageContent: doc.markdown || "",
78
+ metadata: doc.metadata || {},
79
+ }));
80
+ }
81
+ }
@@ -93,6 +93,7 @@ exports.optionalImportEntrypoints = [
93
93
  "langchain/document_loaders/web/hn",
94
94
  "langchain/document_loaders/web/imsdb",
95
95
  "langchain/document_loaders/web/figma",
96
+ "langchain/document_loaders/web/firecrawl",
96
97
  "langchain/document_loaders/web/github",
97
98
  "langchain/document_loaders/web/notiondb",
98
99
  "langchain/document_loaders/web/notionapi",
@@ -90,6 +90,7 @@ export const optionalImportEntrypoints = [
90
90
  "langchain/document_loaders/web/hn",
91
91
  "langchain/document_loaders/web/imsdb",
92
92
  "langchain/document_loaders/web/figma",
93
+ "langchain/document_loaders/web/firecrawl",
93
94
  "langchain/document_loaders/web/github",
94
95
  "langchain/document_loaders/web/notiondb",
95
96
  "langchain/document_loaders/web/notionapi",
@@ -19,7 +19,7 @@ const output_parsers_1 = require("@langchain/core/output_parsers");
19
19
  const factory_js_1 = require("./expression_type_handlers/factory.cjs");
20
20
  const base_js_1 = require("./expression_type_handlers/base.cjs");
21
21
  /**
22
- * okay so we need to be able to handle the following cases:
22
+ * We need to be able to handle the following cases:
23
23
  * ExpressionStatement
24
24
  * CallExpression
25
25
  * Identifier | MemberExpression
@@ -1,7 +1,7 @@
1
1
  import { BaseOutputParser } from "@langchain/core/output_parsers";
2
2
  import { ParsedType } from "./expression_type_handlers/types.js";
3
3
  /**
4
- * okay so we need to be able to handle the following cases:
4
+ * We need to be able to handle the following cases:
5
5
  * ExpressionStatement
6
6
  * CallExpression
7
7
  * Identifier | MemberExpression
@@ -2,7 +2,7 @@ import { BaseOutputParser } from "@langchain/core/output_parsers";
2
2
  import { MasterHandler } from "./expression_type_handlers/factory.js";
3
3
  import { ASTParser } from "./expression_type_handlers/base.js";
4
4
  /**
5
- * okay so we need to be able to handle the following cases:
5
+ * We need to be able to handle the following cases:
6
6
  * ExpressionStatement
7
7
  * CallExpression
8
8
  * Identifier | MemberExpression
@@ -103,10 +103,24 @@ class MultiQueryRetriever extends retrievers_1.BaseRetriever {
103
103
  writable: true,
104
104
  value: "lines"
105
105
  });
106
+ Object.defineProperty(this, "documentCompressor", {
107
+ enumerable: true,
108
+ configurable: true,
109
+ writable: true,
110
+ value: void 0
111
+ });
112
+ Object.defineProperty(this, "documentCompressorFilteringFn", {
113
+ enumerable: true,
114
+ configurable: true,
115
+ writable: true,
116
+ value: void 0
117
+ });
106
118
  this.retriever = fields.retriever;
107
119
  this.llmChain = fields.llmChain;
108
120
  this.queryCount = fields.queryCount ?? this.queryCount;
109
121
  this.parserKey = fields.parserKey ?? this.parserKey;
122
+ this.documentCompressor = fields.documentCompressor;
123
+ this.documentCompressorFilteringFn = fields.documentCompressorFilteringFn;
110
124
  }
111
125
  static fromLLM(fields) {
112
126
  const { retriever, llm, prompt = DEFAULT_QUERY_PROMPT, queryCount, parserKey, ...rest } = fields;
@@ -126,10 +140,10 @@ class MultiQueryRetriever extends retrievers_1.BaseRetriever {
126
140
  // Retrieve documents using the original retriever
127
141
  async _retrieveDocuments(queries, runManager) {
128
142
  const documents = [];
129
- for (const query of queries) {
143
+ await Promise.all(queries.map(async (query) => {
130
144
  const docs = await this.retriever.getRelevantDocuments(query, runManager?.getChild());
131
145
  documents.push(...docs);
132
- }
146
+ }));
133
147
  return documents;
134
148
  }
135
149
  // Deduplicate the documents that were returned in multiple retrievals
@@ -146,7 +160,14 @@ class MultiQueryRetriever extends retrievers_1.BaseRetriever {
146
160
  const queries = await this._generateQueries(question, runManager);
147
161
  const documents = await this._retrieveDocuments(queries, runManager);
148
162
  const uniqueDocuments = this._uniqueUnion(documents);
149
- return uniqueDocuments;
163
+ let outputDocs = uniqueDocuments;
164
+ if (this.documentCompressor && uniqueDocuments.length) {
165
+ outputDocs = await this.documentCompressor.compressDocuments(uniqueDocuments, question);
166
+ if (this.documentCompressorFilteringFn) {
167
+ outputDocs = this.documentCompressorFilteringFn(outputDocs);
168
+ }
169
+ }
170
+ return outputDocs;
150
171
  }
151
172
  }
152
173
  exports.MultiQueryRetriever = MultiQueryRetriever;
@@ -4,14 +4,18 @@ import { Document } from "@langchain/core/documents";
4
4
  import { BasePromptTemplate } from "@langchain/core/prompts";
5
5
  import { CallbackManagerForRetrieverRun } from "@langchain/core/callbacks/manager";
6
6
  import { LLMChain } from "../chains/llm_chain.js";
7
+ import type { BaseDocumentCompressor } from "./document_compressors/index.js";
7
8
  interface LineList {
8
9
  lines: string[];
9
10
  }
11
+ export type MultiDocs = Document<Record<string, any>>[];
10
12
  export interface MultiQueryRetrieverInput extends BaseRetrieverInput {
11
13
  retriever: BaseRetrieverInterface;
12
14
  llmChain: LLMChain<LineList>;
13
15
  queryCount?: number;
14
16
  parserKey?: string;
17
+ documentCompressor?: BaseDocumentCompressor | undefined;
18
+ documentCompressorFilteringFn?: (docs: MultiDocs) => MultiDocs;
15
19
  }
16
20
  /**
17
21
  * @example
@@ -33,6 +37,8 @@ export declare class MultiQueryRetriever extends BaseRetriever {
33
37
  private llmChain;
34
38
  private queryCount;
35
39
  private parserKey;
40
+ documentCompressor: BaseDocumentCompressor | undefined;
41
+ documentCompressorFilteringFn?: MultiQueryRetrieverInput["documentCompressorFilteringFn"];
36
42
  constructor(fields: MultiQueryRetrieverInput);
37
43
  static fromLLM(fields: Omit<MultiQueryRetrieverInput, "llmChain"> & {
38
44
  llm: BaseLanguageModelInterface;
@@ -100,10 +100,24 @@ export class MultiQueryRetriever extends BaseRetriever {
100
100
  writable: true,
101
101
  value: "lines"
102
102
  });
103
+ Object.defineProperty(this, "documentCompressor", {
104
+ enumerable: true,
105
+ configurable: true,
106
+ writable: true,
107
+ value: void 0
108
+ });
109
+ Object.defineProperty(this, "documentCompressorFilteringFn", {
110
+ enumerable: true,
111
+ configurable: true,
112
+ writable: true,
113
+ value: void 0
114
+ });
103
115
  this.retriever = fields.retriever;
104
116
  this.llmChain = fields.llmChain;
105
117
  this.queryCount = fields.queryCount ?? this.queryCount;
106
118
  this.parserKey = fields.parserKey ?? this.parserKey;
119
+ this.documentCompressor = fields.documentCompressor;
120
+ this.documentCompressorFilteringFn = fields.documentCompressorFilteringFn;
107
121
  }
108
122
  static fromLLM(fields) {
109
123
  const { retriever, llm, prompt = DEFAULT_QUERY_PROMPT, queryCount, parserKey, ...rest } = fields;
@@ -123,10 +137,10 @@ export class MultiQueryRetriever extends BaseRetriever {
123
137
  // Retrieve documents using the original retriever
124
138
  async _retrieveDocuments(queries, runManager) {
125
139
  const documents = [];
126
- for (const query of queries) {
140
+ await Promise.all(queries.map(async (query) => {
127
141
  const docs = await this.retriever.getRelevantDocuments(query, runManager?.getChild());
128
142
  documents.push(...docs);
129
- }
143
+ }));
130
144
  return documents;
131
145
  }
132
146
  // Deduplicate the documents that were returned in multiple retrievals
@@ -143,6 +157,13 @@ export class MultiQueryRetriever extends BaseRetriever {
143
157
  const queries = await this._generateQueries(question, runManager);
144
158
  const documents = await this._retrieveDocuments(queries, runManager);
145
159
  const uniqueDocuments = this._uniqueUnion(documents);
146
- return uniqueDocuments;
160
+ let outputDocs = uniqueDocuments;
161
+ if (this.documentCompressor && uniqueDocuments.length) {
162
+ outputDocs = await this.documentCompressor.compressDocuments(uniqueDocuments, question);
163
+ if (this.documentCompressorFilteringFn) {
164
+ outputDocs = this.documentCompressorFilteringFn(outputDocs);
165
+ }
166
+ }
167
+ return outputDocs;
147
168
  }
148
169
  }
@@ -111,6 +111,18 @@ class ParentDocumentRetriever extends multi_vector_js_1.MultiVectorRetriever {
111
111
  writable: true,
112
112
  value: void 0
113
113
  });
114
+ Object.defineProperty(this, "documentCompressor", {
115
+ enumerable: true,
116
+ configurable: true,
117
+ writable: true,
118
+ value: void 0
119
+ });
120
+ Object.defineProperty(this, "documentCompressorFilteringFn", {
121
+ enumerable: true,
122
+ configurable: true,
123
+ writable: true,
124
+ value: void 0
125
+ });
114
126
  this.vectorstore = fields.vectorstore;
115
127
  this.childSplitter = fields.childSplitter;
116
128
  this.parentSplitter = fields.parentSplitter;
@@ -118,9 +130,10 @@ class ParentDocumentRetriever extends multi_vector_js_1.MultiVectorRetriever {
118
130
  this.childK = fields.childK;
119
131
  this.parentK = fields.parentK;
120
132
  this.childDocumentRetriever = fields.childDocumentRetriever;
133
+ this.documentCompressor = fields.documentCompressor;
134
+ this.documentCompressorFilteringFn = fields.documentCompressorFilteringFn;
121
135
  }
122
136
  async _getRelevantDocuments(query) {
123
- // eslint-disable-next-line @typescript-eslint/no-explicit-any
124
137
  let subDocs = [];
125
138
  if (this.childDocumentRetriever) {
126
139
  subDocs = await this.childDocumentRetriever.getRelevantDocuments(query);
@@ -128,6 +141,12 @@ class ParentDocumentRetriever extends multi_vector_js_1.MultiVectorRetriever {
128
141
  else {
129
142
  subDocs = await this.vectorstore.similaritySearch(query, this.childK);
130
143
  }
144
+ if (this.documentCompressor && subDocs.length) {
145
+ subDocs = await this.documentCompressor.compressDocuments(subDocs, query);
146
+ if (this.documentCompressorFilteringFn) {
147
+ subDocs = this.documentCompressorFilteringFn(subDocs);
148
+ }
149
+ }
131
150
  // Maintain order
132
151
  const parentDocIds = [];
133
152
  for (const doc of subDocs) {
@@ -1,7 +1,9 @@
1
1
  import { type VectorStoreInterface, type VectorStoreRetrieverInterface } from "@langchain/core/vectorstores";
2
2
  import { Document } from "@langchain/core/documents";
3
+ import type { BaseDocumentCompressor } from "./document_compressors/index.js";
3
4
  import { TextSplitter, TextSplitterChunkHeaderOptions } from "../text_splitter.js";
4
5
  import { MultiVectorRetriever, type MultiVectorRetrieverInput } from "./multi_vector.js";
6
+ export type SubDocs = Document<Record<string, any>>[];
5
7
  /**
6
8
  * Interface for the fields required to initialize a
7
9
  * ParentDocumentRetriever instance.
@@ -14,6 +16,8 @@ export type ParentDocumentRetrieverFields = MultiVectorRetrieverInput & {
14
16
  * the `.similaritySearch` method of the vectorstore.
15
17
  */
16
18
  childDocumentRetriever?: VectorStoreRetrieverInterface<VectorStoreInterface>;
19
+ documentCompressor?: BaseDocumentCompressor | undefined;
20
+ documentCompressorFilteringFn?: (docs: SubDocs) => SubDocs;
17
21
  };
18
22
  /**
19
23
  * A type of document retriever that splits input documents into smaller chunks
@@ -55,6 +59,8 @@ export declare class ParentDocumentRetriever extends MultiVectorRetriever {
55
59
  protected childK?: number;
56
60
  protected parentK?: number;
57
61
  childDocumentRetriever: VectorStoreRetrieverInterface<VectorStoreInterface> | undefined;
62
+ documentCompressor: BaseDocumentCompressor | undefined;
63
+ documentCompressorFilteringFn?: ParentDocumentRetrieverFields["documentCompressorFilteringFn"];
58
64
  constructor(fields: ParentDocumentRetrieverFields);
59
65
  _getRelevantDocuments(query: string): Promise<Document[]>;
60
66
  /**
@@ -85,6 +85,18 @@ export class ParentDocumentRetriever extends MultiVectorRetriever {
85
85
  writable: true,
86
86
  value: void 0
87
87
  });
88
+ Object.defineProperty(this, "documentCompressor", {
89
+ enumerable: true,
90
+ configurable: true,
91
+ writable: true,
92
+ value: void 0
93
+ });
94
+ Object.defineProperty(this, "documentCompressorFilteringFn", {
95
+ enumerable: true,
96
+ configurable: true,
97
+ writable: true,
98
+ value: void 0
99
+ });
88
100
  this.vectorstore = fields.vectorstore;
89
101
  this.childSplitter = fields.childSplitter;
90
102
  this.parentSplitter = fields.parentSplitter;
@@ -92,9 +104,10 @@ export class ParentDocumentRetriever extends MultiVectorRetriever {
92
104
  this.childK = fields.childK;
93
105
  this.parentK = fields.parentK;
94
106
  this.childDocumentRetriever = fields.childDocumentRetriever;
107
+ this.documentCompressor = fields.documentCompressor;
108
+ this.documentCompressorFilteringFn = fields.documentCompressorFilteringFn;
95
109
  }
96
110
  async _getRelevantDocuments(query) {
97
- // eslint-disable-next-line @typescript-eslint/no-explicit-any
98
111
  let subDocs = [];
99
112
  if (this.childDocumentRetriever) {
100
113
  subDocs = await this.childDocumentRetriever.getRelevantDocuments(query);
@@ -102,6 +115,12 @@ export class ParentDocumentRetriever extends MultiVectorRetriever {
102
115
  else {
103
116
  subDocs = await this.vectorstore.similaritySearch(query, this.childK);
104
117
  }
118
+ if (this.documentCompressor && subDocs.length) {
119
+ subDocs = await this.documentCompressor.compressDocuments(subDocs, query);
120
+ if (this.documentCompressorFilteringFn) {
121
+ subDocs = this.documentCompressorFilteringFn(subDocs);
122
+ }
123
+ }
105
124
  // Maintain order
106
125
  const parentDocIds = [];
107
126
  for (const doc of subDocs) {
@@ -1,139 +1,6 @@
1
1
  "use strict";
2
2
  Object.defineProperty(exports, "__esModule", { value: true });
3
3
  exports.BasicTranslator = exports.BaseTranslator = void 0;
4
- const ir_js_1 = require("../../chains/query_constructor/ir.cjs");
5
- const utils_js_1 = require("./utils.cjs");
6
- /**
7
- * Abstract class that provides a blueprint for creating specific
8
- * translator classes. Defines two abstract methods: formatFunction and
9
- * mergeFilters.
10
- */
11
- class BaseTranslator extends ir_js_1.Visitor {
12
- }
13
- exports.BaseTranslator = BaseTranslator;
14
- /**
15
- * Class that extends the BaseTranslator class and provides concrete
16
- * implementations for the abstract methods. Also declares three types:
17
- * VisitOperationOutput, VisitComparisonOutput, and
18
- * VisitStructuredQueryOutput, which are used as the return types for the
19
- * visitOperation, visitComparison, and visitStructuredQuery methods
20
- * respectively.
21
- */
22
- class BasicTranslator extends BaseTranslator {
23
- constructor(opts) {
24
- super();
25
- Object.defineProperty(this, "allowedOperators", {
26
- enumerable: true,
27
- configurable: true,
28
- writable: true,
29
- value: void 0
30
- });
31
- Object.defineProperty(this, "allowedComparators", {
32
- enumerable: true,
33
- configurable: true,
34
- writable: true,
35
- value: void 0
36
- });
37
- this.allowedOperators = opts?.allowedOperators ?? [
38
- ir_js_1.Operators.and,
39
- ir_js_1.Operators.or,
40
- ];
41
- this.allowedComparators = opts?.allowedComparators ?? [
42
- ir_js_1.Comparators.eq,
43
- ir_js_1.Comparators.ne,
44
- ir_js_1.Comparators.gt,
45
- ir_js_1.Comparators.gte,
46
- ir_js_1.Comparators.lt,
47
- ir_js_1.Comparators.lte,
48
- ];
49
- }
50
- formatFunction(func) {
51
- if (func in ir_js_1.Comparators) {
52
- if (this.allowedComparators.length > 0 &&
53
- this.allowedComparators.indexOf(func) === -1) {
54
- throw new Error(`Comparator ${func} not allowed. Allowed operators: ${this.allowedComparators.join(", ")}`);
55
- }
56
- }
57
- else if (func in ir_js_1.Operators) {
58
- if (this.allowedOperators.length > 0 &&
59
- this.allowedOperators.indexOf(func) === -1) {
60
- throw new Error(`Operator ${func} not allowed. Allowed operators: ${this.allowedOperators.join(", ")}`);
61
- }
62
- }
63
- else {
64
- throw new Error("Unknown comparator or operator");
65
- }
66
- return `$${func}`;
67
- }
68
- /**
69
- * Visits an operation and returns a result.
70
- * @param operation The operation to visit.
71
- * @returns The result of visiting the operation.
72
- */
73
- visitOperation(operation) {
74
- const args = operation.args?.map((arg) => arg.accept(this));
75
- return {
76
- [this.formatFunction(operation.operator)]: args,
77
- };
78
- }
79
- /**
80
- * Visits a comparison and returns a result.
81
- * @param comparison The comparison to visit.
82
- * @returns The result of visiting the comparison.
83
- */
84
- visitComparison(comparison) {
85
- return {
86
- [comparison.attribute]: {
87
- [this.formatFunction(comparison.comparator)]: (0, utils_js_1.castValue)(comparison.value),
88
- },
89
- };
90
- }
91
- /**
92
- * Visits a structured query and returns a result.
93
- * @param query The structured query to visit.
94
- * @returns The result of visiting the structured query.
95
- */
96
- visitStructuredQuery(query) {
97
- let nextArg = {};
98
- if (query.filter) {
99
- nextArg = {
100
- filter: query.filter.accept(this),
101
- };
102
- }
103
- return nextArg;
104
- }
105
- mergeFilters(defaultFilter, generatedFilter, mergeType = "and", forceDefaultFilter = false) {
106
- if ((0, utils_js_1.isFilterEmpty)(defaultFilter) && (0, utils_js_1.isFilterEmpty)(generatedFilter)) {
107
- return undefined;
108
- }
109
- if ((0, utils_js_1.isFilterEmpty)(defaultFilter) || mergeType === "replace") {
110
- if ((0, utils_js_1.isFilterEmpty)(generatedFilter)) {
111
- return undefined;
112
- }
113
- return generatedFilter;
114
- }
115
- if ((0, utils_js_1.isFilterEmpty)(generatedFilter)) {
116
- if (forceDefaultFilter) {
117
- return defaultFilter;
118
- }
119
- if (mergeType === "and") {
120
- return undefined;
121
- }
122
- return defaultFilter;
123
- }
124
- if (mergeType === "and") {
125
- return {
126
- $and: [defaultFilter, generatedFilter],
127
- };
128
- }
129
- else if (mergeType === "or") {
130
- return {
131
- $or: [defaultFilter, generatedFilter],
132
- };
133
- }
134
- else {
135
- throw new Error("Unknown merge type");
136
- }
137
- }
138
- }
139
- exports.BasicTranslator = BasicTranslator;
4
+ var structured_query_1 = require("@langchain/core/structured_query");
5
+ Object.defineProperty(exports, "BaseTranslator", { enumerable: true, get: function () { return structured_query_1.BaseTranslator; } });
6
+ Object.defineProperty(exports, "BasicTranslator", { enumerable: true, get: function () { return structured_query_1.BasicTranslator; } });