langchain 0.1.23 → 0.1.25

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -24,8 +24,8 @@ var __importStar = (this && this.__importStar) || function (mod) {
24
24
  return result;
25
25
  };
26
26
  Object.defineProperty(exports, "__esModule", { value: true });
27
- exports.storage__in_memory = exports.storage__encoder_backed = exports.stores__message__in_memory = exports.stores__file__in_memory = exports.stores__doc__in_memory = exports.retrievers__vespa = exports.retrievers__score_threshold = exports.retrievers__hyde = exports.retrievers__document_compressors__embeddings_filter = exports.retrievers__document_compressors__chain_extract = exports.retrievers__time_weighted = exports.retrievers__parent_document = exports.retrievers__multi_vector = exports.retrievers__multi_query = exports.retrievers__document_compressors = exports.retrievers__contextual_compression = exports.retrievers__remote = exports.output_parsers = exports.schema__query_constructor = exports.schema__prompt_template = exports.chat_models__anthropic = exports.document_transformers__openai_functions = exports.document_loaders__web__sort_xyz_blockchain = exports.document_loaders__web__serpapi = exports.document_loaders__web__searchapi = exports.document_loaders__base = exports.text_splitter = exports.vectorstores__memory = exports.llms__fake = exports.embeddings__fake = exports.embeddings__cache_backed = exports.chains__retrieval = exports.chains__openai_functions = exports.chains__history_aware_retriever = exports.chains__combine_documents__reduce = exports.chains__combine_documents = exports.chains = exports.tools__retriever = exports.tools__render = exports.tools__chain = exports.agents__openai__output_parser = exports.agents__xml__output_parser = exports.agents__react__output_parser = exports.agents__format_scratchpad__log_to_message = exports.agents__format_scratchpad__xml = exports.agents__format_scratchpad__log = exports.agents__format_scratchpad__openai_tools = exports.agents__format_scratchpad = exports.agents__toolkits = exports.agents = void 0;
28
- exports.llms__fireworks = exports.chat_models__fireworks = exports.schema__output = exports.schema__output_parser = exports.schema__runnable = exports.prompts__base = exports.prompts__pipeline = exports.prompts__image = exports.prompts__chat = exports.schema = exports.schema__messages = exports.prompts__prompt = exports.embeddings__openai = exports.llms__openai = exports.chat_models__openai = exports.indexes = exports.runnables__remote = exports.smith = exports.evaluation = exports.experimental__prompts__custom_format = exports.experimental__masking = exports.experimental__chains__violation_of_expectations = exports.experimental__chat_models__bittensor = exports.experimental__plan_and_execute = exports.experimental__generative_agents = exports.experimental__babyagi = exports.experimental__openai_files = exports.experimental__openai_assistant = exports.experimental__autogpt = exports.util__time = exports.util__math = exports.util__document = void 0;
27
+ exports.storage__encoder_backed = exports.stores__message__in_memory = exports.stores__file__in_memory = exports.stores__doc__in_memory = exports.retrievers__matryoshka_retriever = exports.retrievers__vespa = exports.retrievers__score_threshold = exports.retrievers__hyde = exports.retrievers__document_compressors__embeddings_filter = exports.retrievers__document_compressors__chain_extract = exports.retrievers__time_weighted = exports.retrievers__parent_document = exports.retrievers__multi_vector = exports.retrievers__multi_query = exports.retrievers__document_compressors = exports.retrievers__contextual_compression = exports.retrievers__remote = exports.output_parsers = exports.schema__query_constructor = exports.schema__prompt_template = exports.chat_models__anthropic = exports.document_transformers__openai_functions = exports.document_loaders__web__sort_xyz_blockchain = exports.document_loaders__web__serpapi = exports.document_loaders__web__searchapi = exports.document_loaders__base = exports.text_splitter = exports.vectorstores__memory = exports.llms__fake = exports.embeddings__fake = exports.embeddings__cache_backed = exports.chains__retrieval = exports.chains__openai_functions = exports.chains__history_aware_retriever = exports.chains__combine_documents__reduce = exports.chains__combine_documents = exports.chains = exports.tools__retriever = exports.tools__render = exports.tools__chain = exports.agents__openai__output_parser = exports.agents__xml__output_parser = exports.agents__react__output_parser = exports.agents__format_scratchpad__log_to_message = exports.agents__format_scratchpad__xml = exports.agents__format_scratchpad__log = exports.agents__format_scratchpad__openai_tools = exports.agents__format_scratchpad = exports.agents__toolkits = exports.agents = void 0;
28
+ exports.llms__fireworks = exports.chat_models__fireworks = exports.schema__output = exports.schema__output_parser = exports.schema__runnable = exports.prompts__base = exports.prompts__pipeline = exports.prompts__image = exports.prompts__chat = exports.schema = exports.schema__messages = exports.prompts__prompt = exports.embeddings__openai = exports.llms__openai = exports.chat_models__openai = exports.indexes = exports.runnables__remote = exports.smith = exports.evaluation = exports.experimental__prompts__custom_format = exports.experimental__masking = exports.experimental__chains__violation_of_expectations = exports.experimental__chat_models__bittensor = exports.experimental__plan_and_execute = exports.experimental__generative_agents = exports.experimental__babyagi = exports.experimental__openai_files = exports.experimental__openai_assistant = exports.experimental__autogpt = exports.util__time = exports.util__math = exports.util__document = exports.storage__in_memory = void 0;
29
29
  exports.agents = __importStar(require("../agents/index.cjs"));
30
30
  exports.agents__toolkits = __importStar(require("../agents/toolkits/index.cjs"));
31
31
  exports.agents__format_scratchpad = __importStar(require("../agents/format_scratchpad/openai_functions.cjs"));
@@ -71,6 +71,7 @@ exports.retrievers__document_compressors__embeddings_filter = __importStar(requi
71
71
  exports.retrievers__hyde = __importStar(require("../retrievers/hyde.cjs"));
72
72
  exports.retrievers__score_threshold = __importStar(require("../retrievers/score_threshold.cjs"));
73
73
  exports.retrievers__vespa = __importStar(require("../retrievers/vespa.cjs"));
74
+ exports.retrievers__matryoshka_retriever = __importStar(require("../retrievers/matryoshka_retriever.cjs"));
74
75
  exports.stores__doc__in_memory = __importStar(require("../stores/doc/in_memory.cjs"));
75
76
  exports.stores__file__in_memory = __importStar(require("../stores/file/in_memory.cjs"));
76
77
  exports.stores__message__in_memory = __importStar(require("../stores/message/in_memory.cjs"));
@@ -43,6 +43,7 @@ export * as retrievers__document_compressors__embeddings_filter from "../retriev
43
43
  export * as retrievers__hyde from "../retrievers/hyde.js";
44
44
  export * as retrievers__score_threshold from "../retrievers/score_threshold.js";
45
45
  export * as retrievers__vespa from "../retrievers/vespa.js";
46
+ export * as retrievers__matryoshka_retriever from "../retrievers/matryoshka_retriever.js";
46
47
  export * as stores__doc__in_memory from "../stores/doc/in_memory.js";
47
48
  export * as stores__file__in_memory from "../stores/file/in_memory.js";
48
49
  export * as stores__message__in_memory from "../stores/message/in_memory.js";
@@ -44,6 +44,7 @@ export * as retrievers__document_compressors__embeddings_filter from "../retriev
44
44
  export * as retrievers__hyde from "../retrievers/hyde.js";
45
45
  export * as retrievers__score_threshold from "../retrievers/score_threshold.js";
46
46
  export * as retrievers__vespa from "../retrievers/vespa.js";
47
+ export * as retrievers__matryoshka_retriever from "../retrievers/matryoshka_retriever.js";
47
48
  export * as stores__doc__in_memory from "../stores/doc/in_memory.js";
48
49
  export * as stores__file__in_memory from "../stores/file/in_memory.js";
49
50
  export * as stores__message__in_memory from "../stores/message/in_memory.js";
@@ -0,0 +1,148 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.MatryoshkaRetriever = void 0;
4
+ const math_1 = require("@langchain/core/utils/math");
5
+ const vectorstores_1 = require("@langchain/core/vectorstores");
6
+ /**
7
+ * A retriever that uses two sets of embeddings to perform adaptive retrieval. Based
8
+ * off of the "Matryoshka embeddings: faster OpenAI vector search using Adaptive Retrieval"
9
+ * blog post {@link https://supabase.com/blog/matryoshka-embeddings}.
10
+ *
11
+ *
12
+ * This class performs "Adaptive Retrieval" for searching text embeddings efficiently using the
13
+ * Matryoshka Representation Learning (MRL) technique. It retrieves documents similar to a query
14
+ * embedding in two steps:
15
+ *
16
+ * First-pass: Uses a lower dimensional sub-vector from the MRL embedding for an initial, fast,
17
+ * but less accurate search.
18
+ *
19
+ * Second-pass: Re-ranks the top results from the first pass using the full, high-dimensional
20
+ * embedding for higher accuracy.
21
+ *
22
+ *
23
+ * This code implements MRL embeddings for efficient vector search by combining faster,
24
+ * lower-dimensional initial search with accurate, high-dimensional re-ranking.
25
+ */
26
+ class MatryoshkaRetriever extends vectorstores_1.VectorStoreRetriever {
27
+ constructor(fields) {
28
+ super(fields);
29
+ Object.defineProperty(this, "smallK", {
30
+ enumerable: true,
31
+ configurable: true,
32
+ writable: true,
33
+ value: 50
34
+ });
35
+ Object.defineProperty(this, "largeK", {
36
+ enumerable: true,
37
+ configurable: true,
38
+ writable: true,
39
+ value: 8
40
+ });
41
+ Object.defineProperty(this, "largeEmbeddingKey", {
42
+ enumerable: true,
43
+ configurable: true,
44
+ writable: true,
45
+ value: "lc_large_embedding"
46
+ });
47
+ Object.defineProperty(this, "largeEmbeddingModel", {
48
+ enumerable: true,
49
+ configurable: true,
50
+ writable: true,
51
+ value: void 0
52
+ });
53
+ Object.defineProperty(this, "searchType", {
54
+ enumerable: true,
55
+ configurable: true,
56
+ writable: true,
57
+ value: "cosine"
58
+ });
59
+ /**
60
+ * Override the default `addDocuments` method to embed the documents twice,
61
+ * once using the larger embeddings model, and then again using the default
62
+ * embedding model linked to the vector store.
63
+ *
64
+ * @param {DocumentInterface[]} documents - An array of documents to add to the vector store.
65
+ * @param {AddDocumentOptions} options - An optional object containing additional options for adding documents.
66
+ * @returns {Promise<string[] | void>} A promise that resolves to an array of the document IDs that were added to the vector store.
67
+ */
68
+ Object.defineProperty(this, "addDocuments", {
69
+ enumerable: true,
70
+ configurable: true,
71
+ writable: true,
72
+ value: async (documents, options) => {
73
+ // Insure documents metadata does not contain the large embedding key
74
+ if (documents.some((doc) => this.largeEmbeddingKey in doc.metadata)) {
75
+ throw new Error(`All documents must not contain the large embedding key: ${this.largeEmbeddingKey} in their metadata.`);
76
+ }
77
+ const allDocPageContent = documents.map((doc) => doc.pageContent);
78
+ const allDocLargeEmbeddings = await this.largeEmbeddingModel.embedDocuments(allDocPageContent);
79
+ const newDocuments = documents.map((doc, idx) => ({
80
+ ...doc,
81
+ metadata: {
82
+ ...doc.metadata,
83
+ [this.largeEmbeddingKey]: JSON.stringify(allDocLargeEmbeddings[idx]),
84
+ },
85
+ }));
86
+ return this.vectorStore.addDocuments(newDocuments, options);
87
+ }
88
+ });
89
+ this.smallK = fields.smallK ?? this.smallK;
90
+ this.largeK = fields.largeK ?? this.largeK;
91
+ this.largeEmbeddingKey = fields.largeEmbeddingKey ?? this.largeEmbeddingKey;
92
+ this.largeEmbeddingModel = fields.largeEmbeddingModel;
93
+ this.searchType = fields.searchType ?? this.searchType;
94
+ }
95
+ /**
96
+ * Ranks documents based on their similarity to a query embedding using larger embeddings.
97
+ *
98
+ * This method takes a query embedding and a list of documents (smallResults) as input. Each document
99
+ * in the smallResults array has previously been associated with a large embedding stored in its metadata.
100
+ * Depending on the `searchType` (cosine, innerProduct, or euclidean), it calculates the similarity scores
101
+ * between the query embedding and each document's large embedding. It then ranks the documents based on
102
+ * these similarity scores, from the most similar to the least similar.
103
+ *
104
+ * The method returns a promise that resolves to an array of the top `largeK` documents, where `largeK`
105
+ * is a class property defining the number of documents to return. This subset of documents is determined
106
+ * by sorting the entire list of documents based on their similarity scores and then selecting the top
107
+ * `largeK` documents.
108
+ *
109
+ * @param {number[]} embeddedQuery The embedding of the query, represented as an array of numbers.
110
+ * @param {DocumentInterface[]} smallResults An array of documents, each with metadata that includes a large embedding for similarity comparison.
111
+ * @returns {Promise<DocumentInterface[]>} A promise that resolves to an array of the top `largeK` ranked documents based on their similarity to the query embedding.
112
+ */
113
+ _rankByLargeEmbeddings(embeddedQuery, smallResults) {
114
+ const largeEmbeddings = smallResults.map((doc) => JSON.parse(doc.metadata[this.largeEmbeddingKey]));
115
+ let func;
116
+ switch (this.searchType) {
117
+ case "cosine":
118
+ func = () => (0, math_1.cosineSimilarity)([embeddedQuery], largeEmbeddings);
119
+ break;
120
+ case "innerProduct":
121
+ func = () => (0, math_1.innerProduct)([embeddedQuery], largeEmbeddings);
122
+ break;
123
+ case "euclidean":
124
+ func = () => (0, math_1.euclideanDistance)([embeddedQuery], largeEmbeddings);
125
+ break;
126
+ default:
127
+ throw new Error(`Unknown search type: ${this.searchType}`);
128
+ }
129
+ // Calculate the similarity scores between the query embedding and the large embeddings
130
+ const [similarityScores] = func();
131
+ // Create an array of indices from 0 to N-1, where N is the number of documents
132
+ let indices = Array.from({ length: smallResults.length }, (_, index) => index);
133
+ indices = indices
134
+ .map((v, i) => [similarityScores[i], v])
135
+ .sort(([a], [b]) => b - a)
136
+ .slice(0, this.largeK)
137
+ .map(([, i]) => i);
138
+ return indices.map((i) => smallResults[i]);
139
+ }
140
+ async _getRelevantDocuments(query) {
141
+ const [embeddedQuery, smallResults] = await Promise.all([
142
+ this.largeEmbeddingModel.embedQuery(query),
143
+ this.vectorStore.similaritySearch(query, this.smallK, this.filter),
144
+ ]);
145
+ return this._rankByLargeEmbeddings(embeddedQuery, smallResults);
146
+ }
147
+ }
148
+ exports.MatryoshkaRetriever = MatryoshkaRetriever;
@@ -0,0 +1,93 @@
1
+ import { DocumentInterface } from "@langchain/core/documents";
2
+ import { Embeddings } from "@langchain/core/embeddings";
3
+ import { VectorStore, VectorStoreRetriever, VectorStoreRetrieverInput } from "@langchain/core/vectorstores";
4
+ /**
5
+ * Type for options when adding a document to the VectorStore.
6
+ */
7
+ type AddDocumentOptions = Record<string, any>;
8
+ export interface MatryoshkaRetrieverFields {
9
+ /**
10
+ * The number of documents to retrieve from the small store.
11
+ * @default 50
12
+ */
13
+ smallK?: number;
14
+ /**
15
+ * The number of documents to retrieve from the large store.
16
+ * @default 8
17
+ */
18
+ largeK?: number;
19
+ /**
20
+ * The metadata key to store the larger embeddings.
21
+ * @default "lc_large_embedding"
22
+ */
23
+ largeEmbeddingKey?: string;
24
+ /**
25
+ * The embedding model to use when generating the large
26
+ * embeddings.
27
+ */
28
+ largeEmbeddingModel: Embeddings;
29
+ /**
30
+ * The type of search to perform using the large embeddings.
31
+ * @default "cosine"
32
+ */
33
+ searchType?: "cosine" | "innerProduct" | "euclidean";
34
+ }
35
+ /**
36
+ * A retriever that uses two sets of embeddings to perform adaptive retrieval. Based
37
+ * off of the "Matryoshka embeddings: faster OpenAI vector search using Adaptive Retrieval"
38
+ * blog post {@link https://supabase.com/blog/matryoshka-embeddings}.
39
+ *
40
+ *
41
+ * This class performs "Adaptive Retrieval" for searching text embeddings efficiently using the
42
+ * Matryoshka Representation Learning (MRL) technique. It retrieves documents similar to a query
43
+ * embedding in two steps:
44
+ *
45
+ * First-pass: Uses a lower dimensional sub-vector from the MRL embedding for an initial, fast,
46
+ * but less accurate search.
47
+ *
48
+ * Second-pass: Re-ranks the top results from the first pass using the full, high-dimensional
49
+ * embedding for higher accuracy.
50
+ *
51
+ *
52
+ * This code implements MRL embeddings for efficient vector search by combining faster,
53
+ * lower-dimensional initial search with accurate, high-dimensional re-ranking.
54
+ */
55
+ export declare class MatryoshkaRetriever<Store extends VectorStore = VectorStore> extends VectorStoreRetriever<Store> {
56
+ smallK: number;
57
+ largeK: number;
58
+ largeEmbeddingKey: string;
59
+ largeEmbeddingModel: Embeddings;
60
+ searchType: "cosine" | "innerProduct" | "euclidean";
61
+ constructor(fields: MatryoshkaRetrieverFields & VectorStoreRetrieverInput<Store>);
62
+ /**
63
+ * Ranks documents based on their similarity to a query embedding using larger embeddings.
64
+ *
65
+ * This method takes a query embedding and a list of documents (smallResults) as input. Each document
66
+ * in the smallResults array has previously been associated with a large embedding stored in its metadata.
67
+ * Depending on the `searchType` (cosine, innerProduct, or euclidean), it calculates the similarity scores
68
+ * between the query embedding and each document's large embedding. It then ranks the documents based on
69
+ * these similarity scores, from the most similar to the least similar.
70
+ *
71
+ * The method returns a promise that resolves to an array of the top `largeK` documents, where `largeK`
72
+ * is a class property defining the number of documents to return. This subset of documents is determined
73
+ * by sorting the entire list of documents based on their similarity scores and then selecting the top
74
+ * `largeK` documents.
75
+ *
76
+ * @param {number[]} embeddedQuery The embedding of the query, represented as an array of numbers.
77
+ * @param {DocumentInterface[]} smallResults An array of documents, each with metadata that includes a large embedding for similarity comparison.
78
+ * @returns {Promise<DocumentInterface[]>} A promise that resolves to an array of the top `largeK` ranked documents based on their similarity to the query embedding.
79
+ */
80
+ private _rankByLargeEmbeddings;
81
+ _getRelevantDocuments(query: string): Promise<DocumentInterface[]>;
82
+ /**
83
+ * Override the default `addDocuments` method to embed the documents twice,
84
+ * once using the larger embeddings model, and then again using the default
85
+ * embedding model linked to the vector store.
86
+ *
87
+ * @param {DocumentInterface[]} documents - An array of documents to add to the vector store.
88
+ * @param {AddDocumentOptions} options - An optional object containing additional options for adding documents.
89
+ * @returns {Promise<string[] | void>} A promise that resolves to an array of the document IDs that were added to the vector store.
90
+ */
91
+ addDocuments: (documents: DocumentInterface[], options?: AddDocumentOptions) => Promise<string[] | void>;
92
+ }
93
+ export {};
@@ -0,0 +1,144 @@
1
+ import { cosineSimilarity, euclideanDistance, innerProduct, } from "@langchain/core/utils/math";
2
+ import { VectorStoreRetriever, } from "@langchain/core/vectorstores";
3
+ /**
4
+ * A retriever that uses two sets of embeddings to perform adaptive retrieval. Based
5
+ * off of the "Matryoshka embeddings: faster OpenAI vector search using Adaptive Retrieval"
6
+ * blog post {@link https://supabase.com/blog/matryoshka-embeddings}.
7
+ *
8
+ *
9
+ * This class performs "Adaptive Retrieval" for searching text embeddings efficiently using the
10
+ * Matryoshka Representation Learning (MRL) technique. It retrieves documents similar to a query
11
+ * embedding in two steps:
12
+ *
13
+ * First-pass: Uses a lower dimensional sub-vector from the MRL embedding for an initial, fast,
14
+ * but less accurate search.
15
+ *
16
+ * Second-pass: Re-ranks the top results from the first pass using the full, high-dimensional
17
+ * embedding for higher accuracy.
18
+ *
19
+ *
20
+ * This code implements MRL embeddings for efficient vector search by combining faster,
21
+ * lower-dimensional initial search with accurate, high-dimensional re-ranking.
22
+ */
23
+ export class MatryoshkaRetriever extends VectorStoreRetriever {
24
+ constructor(fields) {
25
+ super(fields);
26
+ Object.defineProperty(this, "smallK", {
27
+ enumerable: true,
28
+ configurable: true,
29
+ writable: true,
30
+ value: 50
31
+ });
32
+ Object.defineProperty(this, "largeK", {
33
+ enumerable: true,
34
+ configurable: true,
35
+ writable: true,
36
+ value: 8
37
+ });
38
+ Object.defineProperty(this, "largeEmbeddingKey", {
39
+ enumerable: true,
40
+ configurable: true,
41
+ writable: true,
42
+ value: "lc_large_embedding"
43
+ });
44
+ Object.defineProperty(this, "largeEmbeddingModel", {
45
+ enumerable: true,
46
+ configurable: true,
47
+ writable: true,
48
+ value: void 0
49
+ });
50
+ Object.defineProperty(this, "searchType", {
51
+ enumerable: true,
52
+ configurable: true,
53
+ writable: true,
54
+ value: "cosine"
55
+ });
56
+ /**
57
+ * Override the default `addDocuments` method to embed the documents twice,
58
+ * once using the larger embeddings model, and then again using the default
59
+ * embedding model linked to the vector store.
60
+ *
61
+ * @param {DocumentInterface[]} documents - An array of documents to add to the vector store.
62
+ * @param {AddDocumentOptions} options - An optional object containing additional options for adding documents.
63
+ * @returns {Promise<string[] | void>} A promise that resolves to an array of the document IDs that were added to the vector store.
64
+ */
65
+ Object.defineProperty(this, "addDocuments", {
66
+ enumerable: true,
67
+ configurable: true,
68
+ writable: true,
69
+ value: async (documents, options) => {
70
+ // Insure documents metadata does not contain the large embedding key
71
+ if (documents.some((doc) => this.largeEmbeddingKey in doc.metadata)) {
72
+ throw new Error(`All documents must not contain the large embedding key: ${this.largeEmbeddingKey} in their metadata.`);
73
+ }
74
+ const allDocPageContent = documents.map((doc) => doc.pageContent);
75
+ const allDocLargeEmbeddings = await this.largeEmbeddingModel.embedDocuments(allDocPageContent);
76
+ const newDocuments = documents.map((doc, idx) => ({
77
+ ...doc,
78
+ metadata: {
79
+ ...doc.metadata,
80
+ [this.largeEmbeddingKey]: JSON.stringify(allDocLargeEmbeddings[idx]),
81
+ },
82
+ }));
83
+ return this.vectorStore.addDocuments(newDocuments, options);
84
+ }
85
+ });
86
+ this.smallK = fields.smallK ?? this.smallK;
87
+ this.largeK = fields.largeK ?? this.largeK;
88
+ this.largeEmbeddingKey = fields.largeEmbeddingKey ?? this.largeEmbeddingKey;
89
+ this.largeEmbeddingModel = fields.largeEmbeddingModel;
90
+ this.searchType = fields.searchType ?? this.searchType;
91
+ }
92
+ /**
93
+ * Ranks documents based on their similarity to a query embedding using larger embeddings.
94
+ *
95
+ * This method takes a query embedding and a list of documents (smallResults) as input. Each document
96
+ * in the smallResults array has previously been associated with a large embedding stored in its metadata.
97
+ * Depending on the `searchType` (cosine, innerProduct, or euclidean), it calculates the similarity scores
98
+ * between the query embedding and each document's large embedding. It then ranks the documents based on
99
+ * these similarity scores, from the most similar to the least similar.
100
+ *
101
+ * The method returns a promise that resolves to an array of the top `largeK` documents, where `largeK`
102
+ * is a class property defining the number of documents to return. This subset of documents is determined
103
+ * by sorting the entire list of documents based on their similarity scores and then selecting the top
104
+ * `largeK` documents.
105
+ *
106
+ * @param {number[]} embeddedQuery The embedding of the query, represented as an array of numbers.
107
+ * @param {DocumentInterface[]} smallResults An array of documents, each with metadata that includes a large embedding for similarity comparison.
108
+ * @returns {Promise<DocumentInterface[]>} A promise that resolves to an array of the top `largeK` ranked documents based on their similarity to the query embedding.
109
+ */
110
+ _rankByLargeEmbeddings(embeddedQuery, smallResults) {
111
+ const largeEmbeddings = smallResults.map((doc) => JSON.parse(doc.metadata[this.largeEmbeddingKey]));
112
+ let func;
113
+ switch (this.searchType) {
114
+ case "cosine":
115
+ func = () => cosineSimilarity([embeddedQuery], largeEmbeddings);
116
+ break;
117
+ case "innerProduct":
118
+ func = () => innerProduct([embeddedQuery], largeEmbeddings);
119
+ break;
120
+ case "euclidean":
121
+ func = () => euclideanDistance([embeddedQuery], largeEmbeddings);
122
+ break;
123
+ default:
124
+ throw new Error(`Unknown search type: ${this.searchType}`);
125
+ }
126
+ // Calculate the similarity scores between the query embedding and the large embeddings
127
+ const [similarityScores] = func();
128
+ // Create an array of indices from 0 to N-1, where N is the number of documents
129
+ let indices = Array.from({ length: smallResults.length }, (_, index) => index);
130
+ indices = indices
131
+ .map((v, i) => [similarityScores[i], v])
132
+ .sort(([a], [b]) => b - a)
133
+ .slice(0, this.largeK)
134
+ .map(([, i]) => i);
135
+ return indices.map((i) => smallResults[i]);
136
+ }
137
+ async _getRelevantDocuments(query) {
138
+ const [embeddedQuery, smallResults] = await Promise.all([
139
+ this.largeEmbeddingModel.embedQuery(query),
140
+ this.vectorStore.similaritySearch(query, this.smallK, this.filter),
141
+ ]);
142
+ return this._rankByLargeEmbeddings(embeddedQuery, smallResults);
143
+ }
144
+ }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "langchain",
3
- "version": "0.1.23",
3
+ "version": "0.1.25",
4
4
  "description": "Typescript bindings for langchain",
5
5
  "type": "module",
6
6
  "engines": {
@@ -942,6 +942,10 @@
942
942
  "retrievers/vespa.js",
943
943
  "retrievers/vespa.d.ts",
944
944
  "retrievers/vespa.d.cts",
945
+ "retrievers/matryoshka_retriever.cjs",
946
+ "retrievers/matryoshka_retriever.js",
947
+ "retrievers/matryoshka_retriever.d.ts",
948
+ "retrievers/matryoshka_retriever.d.cts",
945
949
  "cache.cjs",
946
950
  "cache.js",
947
951
  "cache.d.ts",
@@ -1297,7 +1301,7 @@
1297
1301
  "web-auth-library": "^1.0.3",
1298
1302
  "wikipedia": "^2.1.2",
1299
1303
  "youtube-transcript": "^1.0.6",
1300
- "youtubei.js": "^5.8.0"
1304
+ "youtubei.js": "^9.1.0"
1301
1305
  },
1302
1306
  "peerDependencies": {
1303
1307
  "@aws-sdk/client-s3": "^3.310.0",
@@ -1349,7 +1353,7 @@
1349
1353
  "web-auth-library": "^1.0.3",
1350
1354
  "ws": "^8.14.2",
1351
1355
  "youtube-transcript": "^1.0.6",
1352
- "youtubei.js": "^5.8.0"
1356
+ "youtubei.js": "^9.1.0"
1353
1357
  },
1354
1358
  "peerDependenciesMeta": {
1355
1359
  "@aws-sdk/client-s3": {
@@ -3641,6 +3645,15 @@
3641
3645
  "import": "./retrievers/vespa.js",
3642
3646
  "require": "./retrievers/vespa.cjs"
3643
3647
  },
3648
+ "./retrievers/matryoshka_retriever": {
3649
+ "types": {
3650
+ "import": "./retrievers/matryoshka_retriever.d.ts",
3651
+ "require": "./retrievers/matryoshka_retriever.d.cts",
3652
+ "default": "./retrievers/matryoshka_retriever.d.ts"
3653
+ },
3654
+ "import": "./retrievers/matryoshka_retriever.js",
3655
+ "require": "./retrievers/matryoshka_retriever.cjs"
3656
+ },
3644
3657
  "./cache": {
3645
3658
  "types": {
3646
3659
  "import": "./cache.d.ts",
@@ -0,0 +1 @@
1
+ module.exports = require('../dist/retrievers/matryoshka_retriever.cjs');
@@ -0,0 +1 @@
1
+ export * from '../dist/retrievers/matryoshka_retriever.js'
@@ -0,0 +1 @@
1
+ export * from '../dist/retrievers/matryoshka_retriever.js'
@@ -0,0 +1 @@
1
+ export * from '../dist/retrievers/matryoshka_retriever.js'