langchain 0.2.4 → 0.2.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -99,7 +99,9 @@ class OpenAIModerationChain extends base_js_1.BaseChain {
99
99
  });
100
100
  this.throwError = fields?.throwError ?? false;
101
101
  this.openAIApiKey =
102
- fields?.openAIApiKey ?? (0, env_1.getEnvironmentVariable)("OPENAI_API_KEY");
102
+ fields?.apiKey ??
103
+ fields?.openAIApiKey ??
104
+ (0, env_1.getEnvironmentVariable)("OPENAI_API_KEY");
103
105
  if (!this.openAIApiKey) {
104
106
  throw new Error("OpenAI API key not found");
105
107
  }
@@ -6,6 +6,8 @@ import { BaseChain, ChainInputs } from "./base.js";
6
6
  * Interface for the input parameters of the OpenAIModerationChain class.
7
7
  */
8
8
  export interface OpenAIModerationChainInput extends ChainInputs, AsyncCallerParams {
9
+ apiKey?: string;
10
+ /** @deprecated Use "apiKey" instead. */
9
11
  openAIApiKey?: string;
10
12
  openAIOrganization?: string;
11
13
  throwError?: boolean;
@@ -96,7 +96,9 @@ export class OpenAIModerationChain extends BaseChain {
96
96
  });
97
97
  this.throwError = fields?.throwError ?? false;
98
98
  this.openAIApiKey =
99
- fields?.openAIApiKey ?? getEnvironmentVariable("OPENAI_API_KEY");
99
+ fields?.apiKey ??
100
+ fields?.openAIApiKey ??
101
+ getEnvironmentVariable("OPENAI_API_KEY");
100
102
  if (!this.openAIApiKey) {
101
103
  throw new Error("OpenAI API key not found");
102
104
  }
@@ -43,7 +43,7 @@ const UNSTRUCTURED_API_FILETYPES = [
43
43
  * and returns an array of Document instances.
44
44
  */
45
45
  class UnstructuredLoader extends base_js_1.BaseDocumentLoader {
46
- constructor(filePathOrLegacyApiUrl, optionsOrLegacyFilePath = {}) {
46
+ constructor(filePathOrLegacyApiUrlOrMemoryBuffer, optionsOrLegacyFilePath = {}) {
47
47
  super();
48
48
  Object.defineProperty(this, "filePath", {
49
49
  enumerable: true,
@@ -51,6 +51,18 @@ class UnstructuredLoader extends base_js_1.BaseDocumentLoader {
51
51
  writable: true,
52
52
  value: void 0
53
53
  });
54
+ Object.defineProperty(this, "buffer", {
55
+ enumerable: true,
56
+ configurable: true,
57
+ writable: true,
58
+ value: void 0
59
+ });
60
+ Object.defineProperty(this, "fileName", {
61
+ enumerable: true,
62
+ configurable: true,
63
+ writable: true,
64
+ value: void 0
65
+ });
54
66
  Object.defineProperty(this, "apiUrl", {
55
67
  enumerable: true,
56
68
  configurable: true,
@@ -150,12 +162,19 @@ class UnstructuredLoader extends base_js_1.BaseDocumentLoader {
150
162
  // Temporary shim to avoid breaking existing users
151
163
  // Remove when API keys are enforced by Unstructured and existing code will break anyway
152
164
  const isLegacySyntax = typeof optionsOrLegacyFilePath === "string";
153
- if (isLegacySyntax) {
165
+ const isMemorySyntax = typeof filePathOrLegacyApiUrlOrMemoryBuffer === "object";
166
+ if (isMemorySyntax) {
167
+ this.buffer = filePathOrLegacyApiUrlOrMemoryBuffer.buffer;
168
+ this.fileName = filePathOrLegacyApiUrlOrMemoryBuffer.fileName;
169
+ }
170
+ else if (isLegacySyntax) {
154
171
  this.filePath = optionsOrLegacyFilePath;
155
- this.apiUrl = filePathOrLegacyApiUrl;
172
+ this.apiUrl = filePathOrLegacyApiUrlOrMemoryBuffer;
156
173
  }
157
174
  else {
158
- this.filePath = filePathOrLegacyApiUrl;
175
+ this.filePath = filePathOrLegacyApiUrlOrMemoryBuffer;
176
+ }
177
+ if (!isLegacySyntax) {
159
178
  const options = optionsOrLegacyFilePath;
160
179
  this.apiKey = options.apiKey;
161
180
  this.apiUrl = options.apiUrl ?? this.apiUrl;
@@ -176,12 +195,16 @@ class UnstructuredLoader extends base_js_1.BaseDocumentLoader {
176
195
  }
177
196
  }
178
197
  async _partition() {
179
- const { readFile, basename } = await this.imports();
180
- const buffer = await readFile(this.filePath);
181
- const fileName = basename(this.filePath);
182
- // I'm aware this reads the file into memory first, but we have lots of work
183
- // to do on then consuming Documents in a streaming fashion anyway, so not
184
- // worried about this for now.
198
+ let { buffer } = this;
199
+ let { fileName } = this;
200
+ if (!buffer) {
201
+ const { readFile, basename } = await this.imports();
202
+ buffer = await readFile(this.filePath);
203
+ fileName = basename(this.filePath);
204
+ // I'm aware this reads the file into memory first, but we have lots of work
205
+ // to do on then consuming Documents in a streaming fashion anyway, so not
206
+ // worried about this for now.
207
+ }
185
208
  const formData = new FormData();
186
209
  formData.append("files", new Blob([buffer]), fileName);
187
210
  formData.append("strategy", this.strategy);
@@ -1,5 +1,6 @@
1
1
  /// <reference types="node" resolution-mode="require"/>
2
2
  /// <reference types="node" resolution-mode="require"/>
3
+ /// <reference types="node" resolution-mode="require"/>
3
4
  import type { basename as BasenameT } from "node:path";
4
5
  import type { readFile as ReadFileT } from "node:fs/promises";
5
6
  import { Document } from "@langchain/core/documents";
@@ -63,6 +64,10 @@ type UnstructuredDirectoryLoaderOptions = UnstructuredLoaderOptions & {
63
64
  recursive?: boolean;
64
65
  unknown?: UnknownHandling;
65
66
  };
67
+ type UnstructuredMemoryLoaderOptions = {
68
+ buffer: Buffer;
69
+ fileName: string;
70
+ };
66
71
  /**
67
72
  * @deprecated - Import from "@langchain/community/document_loaders/fs/unstructured" instead. This entrypoint will be removed in 0.3.0.
68
73
  *
@@ -75,6 +80,8 @@ type UnstructuredDirectoryLoaderOptions = UnstructuredLoaderOptions & {
75
80
  */
76
81
  export declare class UnstructuredLoader extends BaseDocumentLoader {
77
82
  filePath: string;
83
+ private buffer?;
84
+ private fileName?;
78
85
  private apiUrl;
79
86
  private apiKey?;
80
87
  private strategy;
@@ -91,7 +98,7 @@ export declare class UnstructuredLoader extends BaseDocumentLoader {
91
98
  private combineUnderNChars?;
92
99
  private newAfterNChars?;
93
100
  private maxCharacters?;
94
- constructor(filePathOrLegacyApiUrl: string, optionsOrLegacyFilePath?: UnstructuredLoaderOptions | string);
101
+ constructor(filePathOrLegacyApiUrlOrMemoryBuffer: string | UnstructuredMemoryLoaderOptions, optionsOrLegacyFilePath?: UnstructuredLoaderOptions | string);
95
102
  _partition(): Promise<Element[]>;
96
103
  load(): Promise<Document[]>;
97
104
  imports(): Promise<{
@@ -39,7 +39,7 @@ const UNSTRUCTURED_API_FILETYPES = [
39
39
  * and returns an array of Document instances.
40
40
  */
41
41
  export class UnstructuredLoader extends BaseDocumentLoader {
42
- constructor(filePathOrLegacyApiUrl, optionsOrLegacyFilePath = {}) {
42
+ constructor(filePathOrLegacyApiUrlOrMemoryBuffer, optionsOrLegacyFilePath = {}) {
43
43
  super();
44
44
  Object.defineProperty(this, "filePath", {
45
45
  enumerable: true,
@@ -47,6 +47,18 @@ export class UnstructuredLoader extends BaseDocumentLoader {
47
47
  writable: true,
48
48
  value: void 0
49
49
  });
50
+ Object.defineProperty(this, "buffer", {
51
+ enumerable: true,
52
+ configurable: true,
53
+ writable: true,
54
+ value: void 0
55
+ });
56
+ Object.defineProperty(this, "fileName", {
57
+ enumerable: true,
58
+ configurable: true,
59
+ writable: true,
60
+ value: void 0
61
+ });
50
62
  Object.defineProperty(this, "apiUrl", {
51
63
  enumerable: true,
52
64
  configurable: true,
@@ -146,12 +158,19 @@ export class UnstructuredLoader extends BaseDocumentLoader {
146
158
  // Temporary shim to avoid breaking existing users
147
159
  // Remove when API keys are enforced by Unstructured and existing code will break anyway
148
160
  const isLegacySyntax = typeof optionsOrLegacyFilePath === "string";
149
- if (isLegacySyntax) {
161
+ const isMemorySyntax = typeof filePathOrLegacyApiUrlOrMemoryBuffer === "object";
162
+ if (isMemorySyntax) {
163
+ this.buffer = filePathOrLegacyApiUrlOrMemoryBuffer.buffer;
164
+ this.fileName = filePathOrLegacyApiUrlOrMemoryBuffer.fileName;
165
+ }
166
+ else if (isLegacySyntax) {
150
167
  this.filePath = optionsOrLegacyFilePath;
151
- this.apiUrl = filePathOrLegacyApiUrl;
168
+ this.apiUrl = filePathOrLegacyApiUrlOrMemoryBuffer;
152
169
  }
153
170
  else {
154
- this.filePath = filePathOrLegacyApiUrl;
171
+ this.filePath = filePathOrLegacyApiUrlOrMemoryBuffer;
172
+ }
173
+ if (!isLegacySyntax) {
155
174
  const options = optionsOrLegacyFilePath;
156
175
  this.apiKey = options.apiKey;
157
176
  this.apiUrl = options.apiUrl ?? this.apiUrl;
@@ -172,12 +191,16 @@ export class UnstructuredLoader extends BaseDocumentLoader {
172
191
  }
173
192
  }
174
193
  async _partition() {
175
- const { readFile, basename } = await this.imports();
176
- const buffer = await readFile(this.filePath);
177
- const fileName = basename(this.filePath);
178
- // I'm aware this reads the file into memory first, but we have lots of work
179
- // to do on then consuming Documents in a streaming fashion anyway, so not
180
- // worried about this for now.
194
+ let { buffer } = this;
195
+ let { fileName } = this;
196
+ if (!buffer) {
197
+ const { readFile, basename } = await this.imports();
198
+ buffer = await readFile(this.filePath);
199
+ fileName = basename(this.filePath);
200
+ // I'm aware this reads the file into memory first, but we have lots of work
201
+ // to do on then consuming Documents in a streaming fashion anyway, so not
202
+ // worried about this for now.
203
+ }
181
204
  const formData = new FormData();
182
205
  formData.append("files", new Blob([buffer]), fileName);
183
206
  formData.append("strategy", this.strategy);
@@ -2,6 +2,7 @@
2
2
  /* eslint-disable @typescript-eslint/no-non-null-assertion */
3
3
  import * as url from "node:url";
4
4
  import * as path from "node:path";
5
+ import { readFile } from "node:fs/promises";
5
6
  import { test, expect } from "@jest/globals";
6
7
  import { UnstructuredDirectoryLoader, UnstructuredLoader, UnknownHandling, } from "../fs/unstructured.js";
7
8
  test.skip("Test Unstructured base loader", async () => {
@@ -16,6 +17,23 @@ test.skip("Test Unstructured base loader", async () => {
16
17
  expect(typeof doc.pageContent).toBe("string");
17
18
  }
18
19
  });
20
+ test.skip("Test Unstructured base loader with buffer", async () => {
21
+ const filePath = path.resolve(path.dirname(url.fileURLToPath(import.meta.url)), "./example_data/example.txt");
22
+ const options = {
23
+ apiKey: process.env.UNSTRUCTURED_API_KEY,
24
+ };
25
+ const buffer = await readFile(filePath);
26
+ const fileName = "example.txt";
27
+ const loader = new UnstructuredLoader({
28
+ buffer,
29
+ fileName,
30
+ }, options);
31
+ const docs = await loader.load();
32
+ expect(docs.length).toBe(3);
33
+ for (const doc of docs) {
34
+ expect(typeof doc.pageContent).toBe("string");
35
+ }
36
+ });
19
37
  test.skip("Test Unstructured base loader with fast strategy", async () => {
20
38
  const filePath = path.resolve(path.dirname(url.fileURLToPath(import.meta.url)), "./example_data/1706.03762.pdf");
21
39
  const options = {
@@ -24,8 +24,8 @@ var __importStar = (this && this.__importStar) || function (mod) {
24
24
  return result;
25
25
  };
26
26
  Object.defineProperty(exports, "__esModule", { value: true });
27
- exports.util__math = exports.util__document = exports.storage__in_memory = exports.storage__encoder_backed = exports.stores__message__in_memory = exports.stores__file__in_memory = exports.stores__doc__in_memory = exports.stores__doc__base = exports.retrievers__matryoshka_retriever = exports.retrievers__score_threshold = exports.retrievers__hyde = exports.retrievers__document_compressors__embeddings_filter = exports.retrievers__document_compressors__chain_extract = exports.retrievers__time_weighted = exports.retrievers__parent_document = exports.retrievers__multi_vector = exports.retrievers__multi_query = exports.retrievers__document_compressors = exports.retrievers__contextual_compression = exports.output_parsers = exports.callbacks = exports.document_transformers__openai_functions = exports.document_loaders__base = exports.memory__chat_memory = exports.memory__index = exports.memory = exports.text_splitter = exports.vectorstores__memory = exports.embeddings__fake = exports.embeddings__cache_backed = exports.chains__retrieval = exports.chains__openai_functions = exports.chains__history_aware_retriever = exports.chains__combine_documents__reduce = exports.chains__combine_documents = exports.chains = exports.tools__retriever = exports.tools__render = exports.tools__chain = exports.tools = exports.agents__openai__output_parser = exports.agents__xml__output_parser = exports.agents__react__output_parser = exports.agents__format_scratchpad__log_to_message = exports.agents__format_scratchpad__xml = exports.agents__format_scratchpad__log = exports.agents__format_scratchpad__openai_tools = exports.agents__format_scratchpad = exports.agents__toolkits = exports.agents = void 0;
28
- exports.schema__output = exports.schema__output_parser = exports.schema__runnable = exports.prompts__base = exports.prompts__pipeline = exports.prompts__image = exports.prompts__chat = exports.schema = exports.schema__messages = exports.prompts__prompt = exports.embeddings__openai = exports.llms__openai = exports.chat_models__openai = exports.schema__prompt_template = exports.schema__query_constructor = exports.indexes = exports.runnables__remote = exports.smith = exports.evaluation = exports.experimental__prompts__custom_format = exports.experimental__masking = exports.experimental__chains__violation_of_expectations = exports.experimental__plan_and_execute = exports.experimental__generative_agents = exports.experimental__babyagi = exports.experimental__openai_files = exports.experimental__openai_assistant = exports.experimental__autogpt = exports.util__time = void 0;
27
+ exports.util__document = exports.storage__in_memory = exports.storage__encoder_backed = exports.stores__message__in_memory = exports.stores__file__in_memory = exports.stores__doc__in_memory = exports.stores__doc__base = exports.retrievers__matryoshka_retriever = exports.retrievers__score_threshold = exports.retrievers__hyde = exports.retrievers__document_compressors__embeddings_filter = exports.retrievers__document_compressors__chain_extract = exports.retrievers__time_weighted = exports.retrievers__parent_document = exports.retrievers__multi_vector = exports.retrievers__multi_query = exports.retrievers__ensemble = exports.retrievers__document_compressors = exports.retrievers__contextual_compression = exports.output_parsers = exports.callbacks = exports.document_transformers__openai_functions = exports.document_loaders__base = exports.memory__chat_memory = exports.memory__index = exports.memory = exports.text_splitter = exports.vectorstores__memory = exports.embeddings__fake = exports.embeddings__cache_backed = exports.chains__retrieval = exports.chains__openai_functions = exports.chains__history_aware_retriever = exports.chains__combine_documents__reduce = exports.chains__combine_documents = exports.chains = exports.tools__retriever = exports.tools__render = exports.tools__chain = exports.tools = exports.agents__openai__output_parser = exports.agents__xml__output_parser = exports.agents__react__output_parser = exports.agents__format_scratchpad__log_to_message = exports.agents__format_scratchpad__xml = exports.agents__format_scratchpad__log = exports.agents__format_scratchpad__openai_tools = exports.agents__format_scratchpad = exports.agents__toolkits = exports.agents = void 0;
28
+ exports.schema__output = exports.schema__output_parser = exports.schema__runnable = exports.prompts__base = exports.prompts__pipeline = exports.prompts__image = exports.prompts__chat = exports.schema = exports.schema__messages = exports.prompts__prompt = exports.embeddings__openai = exports.llms__openai = exports.chat_models__openai = exports.schema__prompt_template = exports.schema__query_constructor = exports.indexes = exports.runnables__remote = exports.smith = exports.evaluation = exports.experimental__prompts__custom_format = exports.experimental__masking = exports.experimental__chains__violation_of_expectations = exports.experimental__plan_and_execute = exports.experimental__generative_agents = exports.experimental__babyagi = exports.experimental__openai_files = exports.experimental__openai_assistant = exports.experimental__autogpt = exports.util__time = exports.util__math = void 0;
29
29
  exports.agents = __importStar(require("../agents/index.cjs"));
30
30
  exports.agents__toolkits = __importStar(require("../agents/toolkits/index.cjs"));
31
31
  exports.agents__format_scratchpad = __importStar(require("../agents/format_scratchpad/openai_functions.cjs"));
@@ -59,6 +59,7 @@ exports.callbacks = __importStar(require("../callbacks/index.cjs"));
59
59
  exports.output_parsers = __importStar(require("../output_parsers/index.cjs"));
60
60
  exports.retrievers__contextual_compression = __importStar(require("../retrievers/contextual_compression.cjs"));
61
61
  exports.retrievers__document_compressors = __importStar(require("../retrievers/document_compressors/index.cjs"));
62
+ exports.retrievers__ensemble = __importStar(require("../retrievers/ensemble.cjs"));
62
63
  exports.retrievers__multi_query = __importStar(require("../retrievers/multi_query.cjs"));
63
64
  exports.retrievers__multi_vector = __importStar(require("../retrievers/multi_vector.cjs"));
64
65
  exports.retrievers__parent_document = __importStar(require("../retrievers/parent_document.cjs"));
@@ -31,6 +31,7 @@ export * as callbacks from "../callbacks/index.js";
31
31
  export * as output_parsers from "../output_parsers/index.js";
32
32
  export * as retrievers__contextual_compression from "../retrievers/contextual_compression.js";
33
33
  export * as retrievers__document_compressors from "../retrievers/document_compressors/index.js";
34
+ export * as retrievers__ensemble from "../retrievers/ensemble.js";
34
35
  export * as retrievers__multi_query from "../retrievers/multi_query.js";
35
36
  export * as retrievers__multi_vector from "../retrievers/multi_vector.js";
36
37
  export * as retrievers__parent_document from "../retrievers/parent_document.js";
@@ -32,6 +32,7 @@ export * as callbacks from "../callbacks/index.js";
32
32
  export * as output_parsers from "../output_parsers/index.js";
33
33
  export * as retrievers__contextual_compression from "../retrievers/contextual_compression.js";
34
34
  export * as retrievers__document_compressors from "../retrievers/document_compressors/index.js";
35
+ export * as retrievers__ensemble from "../retrievers/ensemble.js";
35
36
  export * as retrievers__multi_query from "../retrievers/multi_query.js";
36
37
  export * as retrievers__multi_vector from "../retrievers/multi_vector.js";
37
38
  export * as retrievers__parent_document from "../retrievers/parent_document.js";
@@ -0,0 +1,91 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.EnsembleRetriever = void 0;
4
+ const retrievers_1 = require("@langchain/core/retrievers");
5
+ /**
6
+ * Ensemble retriever that aggregates and orders the results of
7
+ * multiple retrievers by using weighted Reciprocal Rank Fusion.
8
+ */
9
+ class EnsembleRetriever extends retrievers_1.BaseRetriever {
10
+ static lc_name() {
11
+ return "EnsembleRetriever";
12
+ }
13
+ constructor(args) {
14
+ super(args);
15
+ Object.defineProperty(this, "lc_namespace", {
16
+ enumerable: true,
17
+ configurable: true,
18
+ writable: true,
19
+ value: ["langchain", "retrievers", "ensemble_retriever"]
20
+ });
21
+ Object.defineProperty(this, "retrievers", {
22
+ enumerable: true,
23
+ configurable: true,
24
+ writable: true,
25
+ value: void 0
26
+ });
27
+ Object.defineProperty(this, "weights", {
28
+ enumerable: true,
29
+ configurable: true,
30
+ writable: true,
31
+ value: void 0
32
+ });
33
+ Object.defineProperty(this, "c", {
34
+ enumerable: true,
35
+ configurable: true,
36
+ writable: true,
37
+ value: 60
38
+ });
39
+ this.retrievers = args.retrievers;
40
+ this.weights =
41
+ args.weights ||
42
+ new Array(args.retrievers.length).fill(1 / args.retrievers.length);
43
+ this.c = args.c || 60;
44
+ }
45
+ async _getRelevantDocuments(query, runManager) {
46
+ return this._rankFusion(query, runManager);
47
+ }
48
+ async _rankFusion(query, runManager) {
49
+ const retrieverDocs = await Promise.all(this.retrievers.map((retriever, i) => retriever.invoke(query, {
50
+ callbacks: runManager?.getChild(`retriever_${i + 1}`),
51
+ })));
52
+ const fusedDocs = await this._weightedReciprocalRank(retrieverDocs);
53
+ return fusedDocs;
54
+ }
55
+ async _weightedReciprocalRank(docList) {
56
+ if (docList.length !== this.weights.length) {
57
+ throw new Error("Number of retrieved document lists must be equal to the number of weights.");
58
+ }
59
+ const rrfScoreDict = docList.reduce((rffScore, retrieverDoc, idx) => {
60
+ let rank = 1;
61
+ const weight = this.weights[idx];
62
+ while (rank <= retrieverDoc.length) {
63
+ const { pageContent } = retrieverDoc[rank - 1];
64
+ if (!rffScore[pageContent]) {
65
+ // eslint-disable-next-line no-param-reassign
66
+ rffScore[pageContent] = 0;
67
+ }
68
+ // eslint-disable-next-line no-param-reassign
69
+ rffScore[pageContent] += weight / (rank + this.c);
70
+ rank += 1;
71
+ }
72
+ return rffScore;
73
+ }, {});
74
+ const uniqueDocs = this._uniqueUnion(docList.flat());
75
+ const sortedDocs = Array.from(uniqueDocs).sort((a, b) => rrfScoreDict[b.pageContent] - rrfScoreDict[a.pageContent]);
76
+ return sortedDocs;
77
+ }
78
+ _uniqueUnion(documents) {
79
+ const documentSet = new Set();
80
+ const result = [];
81
+ for (const doc of documents) {
82
+ const key = doc.pageContent;
83
+ if (!documentSet.has(key)) {
84
+ documentSet.add(key);
85
+ result.push(doc);
86
+ }
87
+ }
88
+ return result;
89
+ }
90
+ }
91
+ exports.EnsembleRetriever = EnsembleRetriever;
@@ -0,0 +1,34 @@
1
+ import { BaseRetriever, BaseRetrieverInput } from "@langchain/core/retrievers";
2
+ import { Document, DocumentInterface } from "@langchain/core/documents";
3
+ import { CallbackManagerForRetrieverRun } from "@langchain/core/callbacks/manager";
4
+ export interface EnsembleRetrieverInput extends BaseRetrieverInput {
5
+ /** A list of retrievers to ensemble. */
6
+ retrievers: BaseRetriever[];
7
+ /**
8
+ * A list of weights corresponding to the retrievers. Defaults to equal
9
+ * weighting for all retrievers.
10
+ */
11
+ weights?: number[];
12
+ /**
13
+ * A constant added to the rank, controlling the balance between the importance
14
+ * of high-ranked items and the consideration given to lower-ranked items.
15
+ * Default is 60.
16
+ */
17
+ c?: number;
18
+ }
19
+ /**
20
+ * Ensemble retriever that aggregates and orders the results of
21
+ * multiple retrievers by using weighted Reciprocal Rank Fusion.
22
+ */
23
+ export declare class EnsembleRetriever extends BaseRetriever {
24
+ static lc_name(): string;
25
+ lc_namespace: string[];
26
+ retrievers: BaseRetriever[];
27
+ weights: number[];
28
+ c: number;
29
+ constructor(args: EnsembleRetrieverInput);
30
+ _getRelevantDocuments(query: string, runManager?: CallbackManagerForRetrieverRun): Promise<Document<Record<string, any>>[]>;
31
+ _rankFusion(query: string, runManager?: CallbackManagerForRetrieverRun): Promise<Document<Record<string, any>>[]>;
32
+ _weightedReciprocalRank(docList: DocumentInterface[][]): Promise<Document<Record<string, any>>[]>;
33
+ private _uniqueUnion;
34
+ }
@@ -0,0 +1,87 @@
1
+ import { BaseRetriever } from "@langchain/core/retrievers";
2
+ /**
3
+ * Ensemble retriever that aggregates and orders the results of
4
+ * multiple retrievers by using weighted Reciprocal Rank Fusion.
5
+ */
6
+ export class EnsembleRetriever extends BaseRetriever {
7
+ static lc_name() {
8
+ return "EnsembleRetriever";
9
+ }
10
+ constructor(args) {
11
+ super(args);
12
+ Object.defineProperty(this, "lc_namespace", {
13
+ enumerable: true,
14
+ configurable: true,
15
+ writable: true,
16
+ value: ["langchain", "retrievers", "ensemble_retriever"]
17
+ });
18
+ Object.defineProperty(this, "retrievers", {
19
+ enumerable: true,
20
+ configurable: true,
21
+ writable: true,
22
+ value: void 0
23
+ });
24
+ Object.defineProperty(this, "weights", {
25
+ enumerable: true,
26
+ configurable: true,
27
+ writable: true,
28
+ value: void 0
29
+ });
30
+ Object.defineProperty(this, "c", {
31
+ enumerable: true,
32
+ configurable: true,
33
+ writable: true,
34
+ value: 60
35
+ });
36
+ this.retrievers = args.retrievers;
37
+ this.weights =
38
+ args.weights ||
39
+ new Array(args.retrievers.length).fill(1 / args.retrievers.length);
40
+ this.c = args.c || 60;
41
+ }
42
+ async _getRelevantDocuments(query, runManager) {
43
+ return this._rankFusion(query, runManager);
44
+ }
45
+ async _rankFusion(query, runManager) {
46
+ const retrieverDocs = await Promise.all(this.retrievers.map((retriever, i) => retriever.invoke(query, {
47
+ callbacks: runManager?.getChild(`retriever_${i + 1}`),
48
+ })));
49
+ const fusedDocs = await this._weightedReciprocalRank(retrieverDocs);
50
+ return fusedDocs;
51
+ }
52
+ async _weightedReciprocalRank(docList) {
53
+ if (docList.length !== this.weights.length) {
54
+ throw new Error("Number of retrieved document lists must be equal to the number of weights.");
55
+ }
56
+ const rrfScoreDict = docList.reduce((rffScore, retrieverDoc, idx) => {
57
+ let rank = 1;
58
+ const weight = this.weights[idx];
59
+ while (rank <= retrieverDoc.length) {
60
+ const { pageContent } = retrieverDoc[rank - 1];
61
+ if (!rffScore[pageContent]) {
62
+ // eslint-disable-next-line no-param-reassign
63
+ rffScore[pageContent] = 0;
64
+ }
65
+ // eslint-disable-next-line no-param-reassign
66
+ rffScore[pageContent] += weight / (rank + this.c);
67
+ rank += 1;
68
+ }
69
+ return rffScore;
70
+ }, {});
71
+ const uniqueDocs = this._uniqueUnion(docList.flat());
72
+ const sortedDocs = Array.from(uniqueDocs).sort((a, b) => rrfScoreDict[b.pageContent] - rrfScoreDict[a.pageContent]);
73
+ return sortedDocs;
74
+ }
75
+ _uniqueUnion(documents) {
76
+ const documentSet = new Set();
77
+ const result = [];
78
+ for (const doc of documents) {
79
+ const key = doc.pageContent;
80
+ if (!documentSet.has(key)) {
81
+ documentSet.add(key);
82
+ result.push(doc);
83
+ }
84
+ }
85
+ return result;
86
+ }
87
+ }
@@ -0,0 +1,74 @@
1
+ import { expect, test } from "@jest/globals";
2
+ import { CohereEmbeddings } from "@langchain/cohere";
3
+ import { MemoryVectorStore } from "../../vectorstores/memory.js";
4
+ import { EnsembleRetriever } from "../ensemble.js";
5
+ test("Should work with a question input", async () => {
6
+ const vectorstore = await MemoryVectorStore.fromTexts([
7
+ "Buildings are made out of brick",
8
+ "Buildings are made out of wood",
9
+ "Buildings are made out of stone",
10
+ "Cars are made out of metal",
11
+ "Cars are made out of plastic",
12
+ "mitochondria is the powerhouse of the cell",
13
+ "mitochondria is made of lipids",
14
+ ], [{ id: 1 }, { id: 2 }, { id: 3 }, { id: 4 }, { id: 5 }], new CohereEmbeddings());
15
+ const retriever = new EnsembleRetriever({
16
+ retrievers: [vectorstore.asRetriever()],
17
+ });
18
+ const query = "What are mitochondria made of?";
19
+ const retrievedDocs = await retriever.invoke(query);
20
+ expect(retrievedDocs[0].pageContent).toContain("mitochondria");
21
+ });
22
+ test("Should work with multiple retriever", async () => {
23
+ const vectorstore = await MemoryVectorStore.fromTexts([
24
+ "Buildings are made out of brick",
25
+ "Buildings are made out of wood",
26
+ "Buildings are made out of stone",
27
+ "Cars are made out of metal",
28
+ "Cars are made out of plastic",
29
+ "mitochondria is the powerhouse of the cell",
30
+ "mitochondria is made of lipids",
31
+ ], [{ id: 1 }, { id: 2 }, { id: 3 }, { id: 4 }, { id: 5 }], new CohereEmbeddings());
32
+ const vectorstore2 = await MemoryVectorStore.fromTexts([
33
+ "Buildings are made out of brick",
34
+ "Buildings are made out of wood",
35
+ "Buildings are made out of stone",
36
+ "Cars are made out of metal",
37
+ "Cars are made out of plastic",
38
+ "mitochondria is the powerhouse of the cell",
39
+ "mitochondria is made of lipids",
40
+ ], [{ id: 6 }, { id: 7 }, { id: 8 }, { id: 9 }, { id: 10 }], new CohereEmbeddings());
41
+ const retriever = new EnsembleRetriever({
42
+ retrievers: [vectorstore.asRetriever(), vectorstore2.asRetriever()],
43
+ });
44
+ const query = "cars";
45
+ const retrievedDocs = await retriever.invoke(query);
46
+ expect(retrievedDocs.filter((item) => item.pageContent.includes("Cars")).length).toBe(2);
47
+ });
48
+ test("Should work with weights", async () => {
49
+ const vectorstore = await MemoryVectorStore.fromTexts([
50
+ "Buildings are made out of brick",
51
+ "Buildings are made out of wood",
52
+ "Buildings are made out of stone",
53
+ "Cars are made out of metal",
54
+ "Cars are made out of plastic",
55
+ "mitochondria is the powerhouse of the cell",
56
+ "mitochondria is made of lipids",
57
+ ], [{ id: 1 }, { id: 2 }, { id: 3 }, { id: 4 }, { id: 5 }], new CohereEmbeddings());
58
+ const vectorstore2 = await MemoryVectorStore.fromTexts([
59
+ "Buildings are made out of brick",
60
+ "Buildings are made out of wood",
61
+ "Buildings are made out of stone",
62
+ "Cars are made out of metal",
63
+ "Cars are made out of plastic",
64
+ "mitochondria is the powerhouse of the cell",
65
+ "mitochondria is made of lipids",
66
+ ], [{ id: 6 }, { id: 7 }, { id: 8 }, { id: 9 }, { id: 10 }], new CohereEmbeddings());
67
+ const retriever = new EnsembleRetriever({
68
+ retrievers: [vectorstore.asRetriever(), vectorstore2.asRetriever()],
69
+ weights: [0.5, 0.9],
70
+ });
71
+ const query = "cars";
72
+ const retrievedDocs = await retriever.invoke(query);
73
+ expect(retrievedDocs.filter((item) => item.pageContent.includes("Cars")).length).toBe(2);
74
+ });
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "langchain",
3
- "version": "0.2.4",
3
+ "version": "0.2.6",
4
4
  "description": "Typescript bindings for langchain",
5
5
  "type": "module",
6
6
  "engines": {
@@ -370,6 +370,10 @@
370
370
  "retrievers/document_compressors.js",
371
371
  "retrievers/document_compressors.d.ts",
372
372
  "retrievers/document_compressors.d.cts",
373
+ "retrievers/ensemble.cjs",
374
+ "retrievers/ensemble.js",
375
+ "retrievers/ensemble.d.ts",
376
+ "retrievers/ensemble.d.cts",
373
377
  "retrievers/multi_query.cjs",
374
378
  "retrievers/multi_query.js",
375
379
  "retrievers/multi_query.d.ts",
@@ -576,7 +580,7 @@
576
580
  "clean": "rm -rf .turbo dist/",
577
581
  "prepack": "yarn build",
578
582
  "release": "release-it --only-version --config .release-it.json",
579
- "test": "yarn run build:deps && NODE_OPTIONS=--experimental-vm-modules jest --testPathIgnorePatterns=\\.int\\.test.ts --testTimeout 30000 --maxWorkers=50%",
583
+ "test": "NODE_OPTIONS=--experimental-vm-modules jest --testPathIgnorePatterns=\\.int\\.test.ts --testTimeout 30000 --maxWorkers=50%",
580
584
  "test:watch": "yarn run build:deps && NODE_OPTIONS=--experimental-vm-modules jest --watch --testPathIgnorePatterns=\\.int\\.test.ts",
581
585
  "test:integration": "yarn run build:deps && NODE_OPTIONS=--experimental-vm-modules jest --testPathPattern=\\.int\\.test.ts --testTimeout 100000 --maxWorkers=50%",
582
586
  "test:single": "yarn run build:deps && NODE_OPTIONS=--experimental-vm-modules yarn run jest --config jest.config.cjs --testTimeout 100000",
@@ -668,7 +672,7 @@
668
672
  "sonix-speech-recognition": "^2.1.1",
669
673
  "srt-parser-2": "^1.2.3",
670
674
  "ts-jest": "^29.1.0",
671
- "typeorm": "^0.3.12",
675
+ "typeorm": "^0.3.20",
672
676
  "typescript": "~5.1.6",
673
677
  "weaviate-ts-client": "^2.0.0",
674
678
  "web-auth-library": "^1.0.3",
@@ -720,7 +724,7 @@
720
724
  "redis": "^4.6.4",
721
725
  "sonix-speech-recognition": "^2.1.1",
722
726
  "srt-parser-2": "^1.2.3",
723
- "typeorm": "^0.3.12",
727
+ "typeorm": "^0.3.20",
724
728
  "weaviate-ts-client": "*",
725
729
  "web-auth-library": "^1.0.3",
726
730
  "ws": "^8.14.2",
@@ -881,7 +885,7 @@
881
885
  },
882
886
  "dependencies": {
883
887
  "@langchain/core": "~0.2.0",
884
- "@langchain/openai": "~0.1.0",
888
+ "@langchain/openai": ">=0.1.0 <0.3.0",
885
889
  "@langchain/textsplitters": "~0.0.0",
886
890
  "binary-extensions": "^2.2.0",
887
891
  "js-tiktoken": "^1.0.12",
@@ -1725,6 +1729,15 @@
1725
1729
  "import": "./retrievers/document_compressors.js",
1726
1730
  "require": "./retrievers/document_compressors.cjs"
1727
1731
  },
1732
+ "./retrievers/ensemble": {
1733
+ "types": {
1734
+ "import": "./retrievers/ensemble.d.ts",
1735
+ "require": "./retrievers/ensemble.d.cts",
1736
+ "default": "./retrievers/ensemble.d.ts"
1737
+ },
1738
+ "import": "./retrievers/ensemble.js",
1739
+ "require": "./retrievers/ensemble.cjs"
1740
+ },
1728
1741
  "./retrievers/multi_query": {
1729
1742
  "types": {
1730
1743
  "import": "./retrievers/multi_query.d.ts",
@@ -0,0 +1 @@
1
+ module.exports = require('../dist/retrievers/ensemble.cjs');
@@ -0,0 +1 @@
1
+ export * from '../dist/retrievers/ensemble.js'
@@ -0,0 +1 @@
1
+ export * from '../dist/retrievers/ensemble.js'
@@ -0,0 +1 @@
1
+ export * from '../dist/retrievers/ensemble.js'