langchain 0.2.4 → 0.2.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -24,8 +24,8 @@ var __importStar = (this && this.__importStar) || function (mod) {
24
24
  return result;
25
25
  };
26
26
  Object.defineProperty(exports, "__esModule", { value: true });
27
- exports.util__math = exports.util__document = exports.storage__in_memory = exports.storage__encoder_backed = exports.stores__message__in_memory = exports.stores__file__in_memory = exports.stores__doc__in_memory = exports.stores__doc__base = exports.retrievers__matryoshka_retriever = exports.retrievers__score_threshold = exports.retrievers__hyde = exports.retrievers__document_compressors__embeddings_filter = exports.retrievers__document_compressors__chain_extract = exports.retrievers__time_weighted = exports.retrievers__parent_document = exports.retrievers__multi_vector = exports.retrievers__multi_query = exports.retrievers__document_compressors = exports.retrievers__contextual_compression = exports.output_parsers = exports.callbacks = exports.document_transformers__openai_functions = exports.document_loaders__base = exports.memory__chat_memory = exports.memory__index = exports.memory = exports.text_splitter = exports.vectorstores__memory = exports.embeddings__fake = exports.embeddings__cache_backed = exports.chains__retrieval = exports.chains__openai_functions = exports.chains__history_aware_retriever = exports.chains__combine_documents__reduce = exports.chains__combine_documents = exports.chains = exports.tools__retriever = exports.tools__render = exports.tools__chain = exports.tools = exports.agents__openai__output_parser = exports.agents__xml__output_parser = exports.agents__react__output_parser = exports.agents__format_scratchpad__log_to_message = exports.agents__format_scratchpad__xml = exports.agents__format_scratchpad__log = exports.agents__format_scratchpad__openai_tools = exports.agents__format_scratchpad = exports.agents__toolkits = exports.agents = void 0;
28
- exports.schema__output = exports.schema__output_parser = exports.schema__runnable = exports.prompts__base = exports.prompts__pipeline = exports.prompts__image = exports.prompts__chat = exports.schema = exports.schema__messages = exports.prompts__prompt = exports.embeddings__openai = exports.llms__openai = exports.chat_models__openai = exports.schema__prompt_template = exports.schema__query_constructor = exports.indexes = exports.runnables__remote = exports.smith = exports.evaluation = exports.experimental__prompts__custom_format = exports.experimental__masking = exports.experimental__chains__violation_of_expectations = exports.experimental__plan_and_execute = exports.experimental__generative_agents = exports.experimental__babyagi = exports.experimental__openai_files = exports.experimental__openai_assistant = exports.experimental__autogpt = exports.util__time = void 0;
27
+ exports.util__document = exports.storage__in_memory = exports.storage__encoder_backed = exports.stores__message__in_memory = exports.stores__file__in_memory = exports.stores__doc__in_memory = exports.stores__doc__base = exports.retrievers__matryoshka_retriever = exports.retrievers__score_threshold = exports.retrievers__hyde = exports.retrievers__document_compressors__embeddings_filter = exports.retrievers__document_compressors__chain_extract = exports.retrievers__time_weighted = exports.retrievers__parent_document = exports.retrievers__multi_vector = exports.retrievers__multi_query = exports.retrievers__ensemble = exports.retrievers__document_compressors = exports.retrievers__contextual_compression = exports.output_parsers = exports.callbacks = exports.document_transformers__openai_functions = exports.document_loaders__base = exports.memory__chat_memory = exports.memory__index = exports.memory = exports.text_splitter = exports.vectorstores__memory = exports.embeddings__fake = exports.embeddings__cache_backed = exports.chains__retrieval = exports.chains__openai_functions = exports.chains__history_aware_retriever = exports.chains__combine_documents__reduce = exports.chains__combine_documents = exports.chains = exports.tools__retriever = exports.tools__render = exports.tools__chain = exports.tools = exports.agents__openai__output_parser = exports.agents__xml__output_parser = exports.agents__react__output_parser = exports.agents__format_scratchpad__log_to_message = exports.agents__format_scratchpad__xml = exports.agents__format_scratchpad__log = exports.agents__format_scratchpad__openai_tools = exports.agents__format_scratchpad = exports.agents__toolkits = exports.agents = void 0;
28
+ exports.schema__output = exports.schema__output_parser = exports.schema__runnable = exports.prompts__base = exports.prompts__pipeline = exports.prompts__image = exports.prompts__chat = exports.schema = exports.schema__messages = exports.prompts__prompt = exports.embeddings__openai = exports.llms__openai = exports.chat_models__openai = exports.schema__prompt_template = exports.schema__query_constructor = exports.indexes = exports.runnables__remote = exports.smith = exports.evaluation = exports.experimental__prompts__custom_format = exports.experimental__masking = exports.experimental__chains__violation_of_expectations = exports.experimental__plan_and_execute = exports.experimental__generative_agents = exports.experimental__babyagi = exports.experimental__openai_files = exports.experimental__openai_assistant = exports.experimental__autogpt = exports.util__time = exports.util__math = void 0;
29
29
  exports.agents = __importStar(require("../agents/index.cjs"));
30
30
  exports.agents__toolkits = __importStar(require("../agents/toolkits/index.cjs"));
31
31
  exports.agents__format_scratchpad = __importStar(require("../agents/format_scratchpad/openai_functions.cjs"));
@@ -59,6 +59,7 @@ exports.callbacks = __importStar(require("../callbacks/index.cjs"));
59
59
  exports.output_parsers = __importStar(require("../output_parsers/index.cjs"));
60
60
  exports.retrievers__contextual_compression = __importStar(require("../retrievers/contextual_compression.cjs"));
61
61
  exports.retrievers__document_compressors = __importStar(require("../retrievers/document_compressors/index.cjs"));
62
+ exports.retrievers__ensemble = __importStar(require("../retrievers/ensemble.cjs"));
62
63
  exports.retrievers__multi_query = __importStar(require("../retrievers/multi_query.cjs"));
63
64
  exports.retrievers__multi_vector = __importStar(require("../retrievers/multi_vector.cjs"));
64
65
  exports.retrievers__parent_document = __importStar(require("../retrievers/parent_document.cjs"));
@@ -31,6 +31,7 @@ export * as callbacks from "../callbacks/index.js";
31
31
  export * as output_parsers from "../output_parsers/index.js";
32
32
  export * as retrievers__contextual_compression from "../retrievers/contextual_compression.js";
33
33
  export * as retrievers__document_compressors from "../retrievers/document_compressors/index.js";
34
+ export * as retrievers__ensemble from "../retrievers/ensemble.js";
34
35
  export * as retrievers__multi_query from "../retrievers/multi_query.js";
35
36
  export * as retrievers__multi_vector from "../retrievers/multi_vector.js";
36
37
  export * as retrievers__parent_document from "../retrievers/parent_document.js";
@@ -32,6 +32,7 @@ export * as callbacks from "../callbacks/index.js";
32
32
  export * as output_parsers from "../output_parsers/index.js";
33
33
  export * as retrievers__contextual_compression from "../retrievers/contextual_compression.js";
34
34
  export * as retrievers__document_compressors from "../retrievers/document_compressors/index.js";
35
+ export * as retrievers__ensemble from "../retrievers/ensemble.js";
35
36
  export * as retrievers__multi_query from "../retrievers/multi_query.js";
36
37
  export * as retrievers__multi_vector from "../retrievers/multi_vector.js";
37
38
  export * as retrievers__parent_document from "../retrievers/parent_document.js";
@@ -0,0 +1,91 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.EnsembleRetriever = void 0;
4
+ const retrievers_1 = require("@langchain/core/retrievers");
5
+ /**
6
+ * Ensemble retriever that aggregates and orders the results of
7
+ * multiple retrievers by using weighted Reciprocal Rank Fusion.
8
+ */
9
+ class EnsembleRetriever extends retrievers_1.BaseRetriever {
10
+ static lc_name() {
11
+ return "EnsembleRetriever";
12
+ }
13
+ constructor(args) {
14
+ super(args);
15
+ Object.defineProperty(this, "lc_namespace", {
16
+ enumerable: true,
17
+ configurable: true,
18
+ writable: true,
19
+ value: ["langchain", "retrievers", "ensemble_retriever"]
20
+ });
21
+ Object.defineProperty(this, "retrievers", {
22
+ enumerable: true,
23
+ configurable: true,
24
+ writable: true,
25
+ value: void 0
26
+ });
27
+ Object.defineProperty(this, "weights", {
28
+ enumerable: true,
29
+ configurable: true,
30
+ writable: true,
31
+ value: void 0
32
+ });
33
+ Object.defineProperty(this, "c", {
34
+ enumerable: true,
35
+ configurable: true,
36
+ writable: true,
37
+ value: 60
38
+ });
39
+ this.retrievers = args.retrievers;
40
+ this.weights =
41
+ args.weights ||
42
+ new Array(args.retrievers.length).fill(1 / args.retrievers.length);
43
+ this.c = args.c || 60;
44
+ }
45
+ async _getRelevantDocuments(query, runManager) {
46
+ return this._rankFusion(query, runManager);
47
+ }
48
+ async _rankFusion(query, runManager) {
49
+ const retrieverDocs = await Promise.all(this.retrievers.map((retriever, i) => retriever.invoke(query, {
50
+ callbacks: runManager?.getChild(`retriever_${i + 1}`),
51
+ })));
52
+ const fusedDocs = await this._weightedReciprocalRank(retrieverDocs);
53
+ return fusedDocs;
54
+ }
55
+ async _weightedReciprocalRank(docList) {
56
+ if (docList.length !== this.weights.length) {
57
+ throw new Error("Number of retrieved document lists must be equal to the number of weights.");
58
+ }
59
+ const rrfScoreDict = docList.reduce((rffScore, retrieverDoc, idx) => {
60
+ let rank = 1;
61
+ const weight = this.weights[idx];
62
+ while (rank <= retrieverDoc.length) {
63
+ const { pageContent } = retrieverDoc[rank - 1];
64
+ if (!rffScore[pageContent]) {
65
+ // eslint-disable-next-line no-param-reassign
66
+ rffScore[pageContent] = 0;
67
+ }
68
+ // eslint-disable-next-line no-param-reassign
69
+ rffScore[pageContent] += weight / (rank + this.c);
70
+ rank += 1;
71
+ }
72
+ return rffScore;
73
+ }, {});
74
+ const uniqueDocs = this._uniqueUnion(docList.flat());
75
+ const sortedDocs = Array.from(uniqueDocs).sort((a, b) => rrfScoreDict[b.pageContent] - rrfScoreDict[a.pageContent]);
76
+ return sortedDocs;
77
+ }
78
+ _uniqueUnion(documents) {
79
+ const documentSet = new Set();
80
+ const result = [];
81
+ for (const doc of documents) {
82
+ const key = doc.pageContent;
83
+ if (!documentSet.has(key)) {
84
+ documentSet.add(key);
85
+ result.push(doc);
86
+ }
87
+ }
88
+ return result;
89
+ }
90
+ }
91
+ exports.EnsembleRetriever = EnsembleRetriever;
@@ -0,0 +1,34 @@
1
+ import { BaseRetriever, BaseRetrieverInput } from "@langchain/core/retrievers";
2
+ import { Document, DocumentInterface } from "@langchain/core/documents";
3
+ import { CallbackManagerForRetrieverRun } from "@langchain/core/callbacks/manager";
4
+ export interface EnsembleRetrieverInput extends BaseRetrieverInput {
5
+ /** A list of retrievers to ensemble. */
6
+ retrievers: BaseRetriever[];
7
+ /**
8
+ * A list of weights corresponding to the retrievers. Defaults to equal
9
+ * weighting for all retrievers.
10
+ */
11
+ weights?: number[];
12
+ /**
13
+ * A constant added to the rank, controlling the balance between the importance
14
+ * of high-ranked items and the consideration given to lower-ranked items.
15
+ * Default is 60.
16
+ */
17
+ c?: number;
18
+ }
19
+ /**
20
+ * Ensemble retriever that aggregates and orders the results of
21
+ * multiple retrievers by using weighted Reciprocal Rank Fusion.
22
+ */
23
+ export declare class EnsembleRetriever extends BaseRetriever {
24
+ static lc_name(): string;
25
+ lc_namespace: string[];
26
+ retrievers: BaseRetriever[];
27
+ weights: number[];
28
+ c: number;
29
+ constructor(args: EnsembleRetrieverInput);
30
+ _getRelevantDocuments(query: string, runManager?: CallbackManagerForRetrieverRun): Promise<Document<Record<string, any>>[]>;
31
+ _rankFusion(query: string, runManager?: CallbackManagerForRetrieverRun): Promise<Document<Record<string, any>>[]>;
32
+ _weightedReciprocalRank(docList: DocumentInterface[][]): Promise<Document<Record<string, any>>[]>;
33
+ private _uniqueUnion;
34
+ }
@@ -0,0 +1,87 @@
1
+ import { BaseRetriever } from "@langchain/core/retrievers";
2
+ /**
3
+ * Ensemble retriever that aggregates and orders the results of
4
+ * multiple retrievers by using weighted Reciprocal Rank Fusion.
5
+ */
6
+ export class EnsembleRetriever extends BaseRetriever {
7
+ static lc_name() {
8
+ return "EnsembleRetriever";
9
+ }
10
+ constructor(args) {
11
+ super(args);
12
+ Object.defineProperty(this, "lc_namespace", {
13
+ enumerable: true,
14
+ configurable: true,
15
+ writable: true,
16
+ value: ["langchain", "retrievers", "ensemble_retriever"]
17
+ });
18
+ Object.defineProperty(this, "retrievers", {
19
+ enumerable: true,
20
+ configurable: true,
21
+ writable: true,
22
+ value: void 0
23
+ });
24
+ Object.defineProperty(this, "weights", {
25
+ enumerable: true,
26
+ configurable: true,
27
+ writable: true,
28
+ value: void 0
29
+ });
30
+ Object.defineProperty(this, "c", {
31
+ enumerable: true,
32
+ configurable: true,
33
+ writable: true,
34
+ value: 60
35
+ });
36
+ this.retrievers = args.retrievers;
37
+ this.weights =
38
+ args.weights ||
39
+ new Array(args.retrievers.length).fill(1 / args.retrievers.length);
40
+ this.c = args.c || 60;
41
+ }
42
+ async _getRelevantDocuments(query, runManager) {
43
+ return this._rankFusion(query, runManager);
44
+ }
45
+ async _rankFusion(query, runManager) {
46
+ const retrieverDocs = await Promise.all(this.retrievers.map((retriever, i) => retriever.invoke(query, {
47
+ callbacks: runManager?.getChild(`retriever_${i + 1}`),
48
+ })));
49
+ const fusedDocs = await this._weightedReciprocalRank(retrieverDocs);
50
+ return fusedDocs;
51
+ }
52
+ async _weightedReciprocalRank(docList) {
53
+ if (docList.length !== this.weights.length) {
54
+ throw new Error("Number of retrieved document lists must be equal to the number of weights.");
55
+ }
56
+ const rrfScoreDict = docList.reduce((rffScore, retrieverDoc, idx) => {
57
+ let rank = 1;
58
+ const weight = this.weights[idx];
59
+ while (rank <= retrieverDoc.length) {
60
+ const { pageContent } = retrieverDoc[rank - 1];
61
+ if (!rffScore[pageContent]) {
62
+ // eslint-disable-next-line no-param-reassign
63
+ rffScore[pageContent] = 0;
64
+ }
65
+ // eslint-disable-next-line no-param-reassign
66
+ rffScore[pageContent] += weight / (rank + this.c);
67
+ rank += 1;
68
+ }
69
+ return rffScore;
70
+ }, {});
71
+ const uniqueDocs = this._uniqueUnion(docList.flat());
72
+ const sortedDocs = Array.from(uniqueDocs).sort((a, b) => rrfScoreDict[b.pageContent] - rrfScoreDict[a.pageContent]);
73
+ return sortedDocs;
74
+ }
75
+ _uniqueUnion(documents) {
76
+ const documentSet = new Set();
77
+ const result = [];
78
+ for (const doc of documents) {
79
+ const key = doc.pageContent;
80
+ if (!documentSet.has(key)) {
81
+ documentSet.add(key);
82
+ result.push(doc);
83
+ }
84
+ }
85
+ return result;
86
+ }
87
+ }
@@ -0,0 +1,74 @@
1
+ import { expect, test } from "@jest/globals";
2
+ import { CohereEmbeddings } from "@langchain/cohere";
3
+ import { MemoryVectorStore } from "../../vectorstores/memory.js";
4
+ import { EnsembleRetriever } from "../ensemble.js";
5
+ test("Should work with a question input", async () => {
6
+ const vectorstore = await MemoryVectorStore.fromTexts([
7
+ "Buildings are made out of brick",
8
+ "Buildings are made out of wood",
9
+ "Buildings are made out of stone",
10
+ "Cars are made out of metal",
11
+ "Cars are made out of plastic",
12
+ "mitochondria is the powerhouse of the cell",
13
+ "mitochondria is made of lipids",
14
+ ], [{ id: 1 }, { id: 2 }, { id: 3 }, { id: 4 }, { id: 5 }], new CohereEmbeddings());
15
+ const retriever = new EnsembleRetriever({
16
+ retrievers: [vectorstore.asRetriever()],
17
+ });
18
+ const query = "What are mitochondria made of?";
19
+ const retrievedDocs = await retriever.invoke(query);
20
+ expect(retrievedDocs[0].pageContent).toContain("mitochondria");
21
+ });
22
+ test("Should work with multiple retriever", async () => {
23
+ const vectorstore = await MemoryVectorStore.fromTexts([
24
+ "Buildings are made out of brick",
25
+ "Buildings are made out of wood",
26
+ "Buildings are made out of stone",
27
+ "Cars are made out of metal",
28
+ "Cars are made out of plastic",
29
+ "mitochondria is the powerhouse of the cell",
30
+ "mitochondria is made of lipids",
31
+ ], [{ id: 1 }, { id: 2 }, { id: 3 }, { id: 4 }, { id: 5 }], new CohereEmbeddings());
32
+ const vectorstore2 = await MemoryVectorStore.fromTexts([
33
+ "Buildings are made out of brick",
34
+ "Buildings are made out of wood",
35
+ "Buildings are made out of stone",
36
+ "Cars are made out of metal",
37
+ "Cars are made out of plastic",
38
+ "mitochondria is the powerhouse of the cell",
39
+ "mitochondria is made of lipids",
40
+ ], [{ id: 6 }, { id: 7 }, { id: 8 }, { id: 9 }, { id: 10 }], new CohereEmbeddings());
41
+ const retriever = new EnsembleRetriever({
42
+ retrievers: [vectorstore.asRetriever(), vectorstore2.asRetriever()],
43
+ });
44
+ const query = "cars";
45
+ const retrievedDocs = await retriever.invoke(query);
46
+ expect(retrievedDocs.filter((item) => item.pageContent.includes("Cars")).length).toBe(2);
47
+ });
48
+ test("Should work with weights", async () => {
49
+ const vectorstore = await MemoryVectorStore.fromTexts([
50
+ "Buildings are made out of brick",
51
+ "Buildings are made out of wood",
52
+ "Buildings are made out of stone",
53
+ "Cars are made out of metal",
54
+ "Cars are made out of plastic",
55
+ "mitochondria is the powerhouse of the cell",
56
+ "mitochondria is made of lipids",
57
+ ], [{ id: 1 }, { id: 2 }, { id: 3 }, { id: 4 }, { id: 5 }], new CohereEmbeddings());
58
+ const vectorstore2 = await MemoryVectorStore.fromTexts([
59
+ "Buildings are made out of brick",
60
+ "Buildings are made out of wood",
61
+ "Buildings are made out of stone",
62
+ "Cars are made out of metal",
63
+ "Cars are made out of plastic",
64
+ "mitochondria is the powerhouse of the cell",
65
+ "mitochondria is made of lipids",
66
+ ], [{ id: 6 }, { id: 7 }, { id: 8 }, { id: 9 }, { id: 10 }], new CohereEmbeddings());
67
+ const retriever = new EnsembleRetriever({
68
+ retrievers: [vectorstore.asRetriever(), vectorstore2.asRetriever()],
69
+ weights: [0.5, 0.9],
70
+ });
71
+ const query = "cars";
72
+ const retrievedDocs = await retriever.invoke(query);
73
+ expect(retrievedDocs.filter((item) => item.pageContent.includes("Cars")).length).toBe(2);
74
+ });
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "langchain",
3
- "version": "0.2.4",
3
+ "version": "0.2.5",
4
4
  "description": "Typescript bindings for langchain",
5
5
  "type": "module",
6
6
  "engines": {
@@ -370,6 +370,10 @@
370
370
  "retrievers/document_compressors.js",
371
371
  "retrievers/document_compressors.d.ts",
372
372
  "retrievers/document_compressors.d.cts",
373
+ "retrievers/ensemble.cjs",
374
+ "retrievers/ensemble.js",
375
+ "retrievers/ensemble.d.ts",
376
+ "retrievers/ensemble.d.cts",
373
377
  "retrievers/multi_query.cjs",
374
378
  "retrievers/multi_query.js",
375
379
  "retrievers/multi_query.d.ts",
@@ -1725,6 +1729,15 @@
1725
1729
  "import": "./retrievers/document_compressors.js",
1726
1730
  "require": "./retrievers/document_compressors.cjs"
1727
1731
  },
1732
+ "./retrievers/ensemble": {
1733
+ "types": {
1734
+ "import": "./retrievers/ensemble.d.ts",
1735
+ "require": "./retrievers/ensemble.d.cts",
1736
+ "default": "./retrievers/ensemble.d.ts"
1737
+ },
1738
+ "import": "./retrievers/ensemble.js",
1739
+ "require": "./retrievers/ensemble.cjs"
1740
+ },
1728
1741
  "./retrievers/multi_query": {
1729
1742
  "types": {
1730
1743
  "import": "./retrievers/multi_query.d.ts",
@@ -0,0 +1 @@
1
+ module.exports = require('../dist/retrievers/ensemble.cjs');
@@ -0,0 +1 @@
1
+ export * from '../dist/retrievers/ensemble.js'
@@ -0,0 +1 @@
1
+ export * from '../dist/retrievers/ensemble.js'
@@ -0,0 +1 @@
1
+ export * from '../dist/retrievers/ensemble.js'