langchain 0.1.33 → 0.1.35
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/chains/history_aware_retriever.cjs +1 -2
- package/dist/chains/history_aware_retriever.d.ts +1 -2
- package/dist/chains/history_aware_retriever.js +1 -2
- package/dist/document_loaders/web/github.cjs +6 -1
- package/dist/document_loaders/web/github.js +6 -1
- package/dist/experimental/openai_assistant/index.cjs +1 -1
- package/dist/experimental/openai_assistant/index.js +1 -1
- package/dist/memory/vector_store.cjs +11 -1
- package/dist/memory/vector_store.d.ts +8 -0
- package/dist/memory/vector_store.js +11 -1
- package/dist/retrievers/multi_query.cjs +24 -3
- package/dist/retrievers/multi_query.d.ts +6 -0
- package/dist/retrievers/multi_query.js +24 -3
- package/dist/retrievers/parent_document.cjs +20 -1
- package/dist/retrievers/parent_document.d.ts +6 -0
- package/dist/retrievers/parent_document.js +20 -1
- package/dist/retrievers/self_query/index.cjs +1 -1
- package/dist/retrievers/self_query/index.d.ts +1 -1
- package/dist/retrievers/self_query/index.js +1 -1
- package/dist/retrievers/self_query/pinecone.cjs +1 -1
- package/dist/retrievers/self_query/pinecone.d.ts +1 -1
- package/dist/retrievers/self_query/pinecone.js +1 -1
- package/dist/smith/runner_utils.cjs +18 -10
- package/dist/smith/runner_utils.js +18 -10
- package/package.json +1 -1
|
@@ -18,8 +18,7 @@ const output_parsers_1 = require("@langchain/core/output_parsers");
|
|
|
18
18
|
*
|
|
19
19
|
* import { ChatOpenAI } from "@langchain/openai";
|
|
20
20
|
* import { pull } from "langchain/hub";
|
|
21
|
-
* import {
|
|
22
|
-
* import { createStuffDocumentsChain } from "langchain/chains/combine_documents";
|
|
21
|
+
* import { createHistoryAwareRetriever } from "langchain/chains/history_aware_retriever";
|
|
23
22
|
*
|
|
24
23
|
* const rephrasePrompt = await pull("langchain-ai/chat-langchain-rephrase");
|
|
25
24
|
* const llm = new ChatOpenAI({});
|
|
@@ -35,8 +35,7 @@ export type CreateHistoryAwareRetrieverParams = {
|
|
|
35
35
|
*
|
|
36
36
|
* import { ChatOpenAI } from "@langchain/openai";
|
|
37
37
|
* import { pull } from "langchain/hub";
|
|
38
|
-
* import {
|
|
39
|
-
* import { createStuffDocumentsChain } from "langchain/chains/combine_documents";
|
|
38
|
+
* import { createHistoryAwareRetriever } from "langchain/chains/history_aware_retriever";
|
|
40
39
|
*
|
|
41
40
|
* const rephrasePrompt = await pull("langchain-ai/chat-langchain-rephrase");
|
|
42
41
|
* const llm = new ChatOpenAI({});
|
|
@@ -15,8 +15,7 @@ import { StringOutputParser } from "@langchain/core/output_parsers";
|
|
|
15
15
|
*
|
|
16
16
|
* import { ChatOpenAI } from "@langchain/openai";
|
|
17
17
|
* import { pull } from "langchain/hub";
|
|
18
|
-
* import {
|
|
19
|
-
* import { createStuffDocumentsChain } from "langchain/chains/combine_documents";
|
|
18
|
+
* import { createHistoryAwareRetriever } from "langchain/chains/history_aware_retriever";
|
|
20
19
|
*
|
|
21
20
|
* const rephrasePrompt = await pull("langchain-ai/chat-langchain-rephrase");
|
|
22
21
|
* const llm = new ChatOpenAI({});
|
|
@@ -262,12 +262,17 @@ class GithubRepoLoader extends base_js_1.BaseDocumentLoader {
|
|
|
262
262
|
* @param gitmodulesContent the content of a .gitmodules file
|
|
263
263
|
*/
|
|
264
264
|
async parseGitmodules(gitmodulesContent) {
|
|
265
|
+
let validGitmodulesContent = gitmodulesContent;
|
|
266
|
+
// in case the .gitmodules file does not end with a newline, we add one to make the regex work
|
|
267
|
+
if (!validGitmodulesContent.endsWith("\n")) {
|
|
268
|
+
validGitmodulesContent += "\n";
|
|
269
|
+
}
|
|
265
270
|
// catches the initial line of submodule entries
|
|
266
271
|
const submodulePattern = /\[submodule "(.*?)"]\n((\s+.*?\s*=\s*.*?\n)*)/g;
|
|
267
272
|
// catches the properties of a submodule
|
|
268
273
|
const keyValuePattern = /\s+(.*?)\s*=\s*(.*?)\s/g;
|
|
269
274
|
const submoduleInfos = [];
|
|
270
|
-
for (const [, name, propertyLines] of
|
|
275
|
+
for (const [, name, propertyLines] of validGitmodulesContent.matchAll(submodulePattern)) {
|
|
271
276
|
if (!name || !propertyLines) {
|
|
272
277
|
throw new Error("Could not parse submodule entry");
|
|
273
278
|
}
|
|
@@ -256,12 +256,17 @@ export class GithubRepoLoader extends BaseDocumentLoader {
|
|
|
256
256
|
* @param gitmodulesContent the content of a .gitmodules file
|
|
257
257
|
*/
|
|
258
258
|
async parseGitmodules(gitmodulesContent) {
|
|
259
|
+
let validGitmodulesContent = gitmodulesContent;
|
|
260
|
+
// in case the .gitmodules file does not end with a newline, we add one to make the regex work
|
|
261
|
+
if (!validGitmodulesContent.endsWith("\n")) {
|
|
262
|
+
validGitmodulesContent += "\n";
|
|
263
|
+
}
|
|
259
264
|
// catches the initial line of submodule entries
|
|
260
265
|
const submodulePattern = /\[submodule "(.*?)"]\n((\s+.*?\s*=\s*.*?\n)*)/g;
|
|
261
266
|
// catches the properties of a submodule
|
|
262
267
|
const keyValuePattern = /\s+(.*?)\s*=\s*(.*?)\s/g;
|
|
263
268
|
const submoduleInfos = [];
|
|
264
|
-
for (const [, name, propertyLines] of
|
|
269
|
+
for (const [, name, propertyLines] of validGitmodulesContent.matchAll(submodulePattern)) {
|
|
265
270
|
if (!name || !propertyLines) {
|
|
266
271
|
throw new Error("Could not parse submodule entry");
|
|
267
272
|
}
|
|
@@ -210,7 +210,7 @@ class OpenAIAssistantRunnable extends runnables_1.Runnable {
|
|
|
210
210
|
const run = await this._waitForRun(runId, threadId);
|
|
211
211
|
if (run.status === "completed") {
|
|
212
212
|
const messages = await this.client.beta.threads.messages.list(threadId, {
|
|
213
|
-
order: "
|
|
213
|
+
order: "desc",
|
|
214
214
|
});
|
|
215
215
|
const newMessages = messages.data.filter((msg) => msg.run_id === runId);
|
|
216
216
|
if (!this.asAgent) {
|
|
@@ -207,7 +207,7 @@ export class OpenAIAssistantRunnable extends Runnable {
|
|
|
207
207
|
const run = await this._waitForRun(runId, threadId);
|
|
208
208
|
if (run.status === "completed") {
|
|
209
209
|
const messages = await this.client.beta.threads.messages.list(threadId, {
|
|
210
|
-
order: "
|
|
210
|
+
order: "desc",
|
|
211
211
|
});
|
|
212
212
|
const newMessages = messages.data.filter((msg) => msg.run_id === runId);
|
|
213
213
|
if (!this.asAgent) {
|
|
@@ -62,10 +62,17 @@ class VectorStoreRetrieverMemory extends memory_1.BaseMemory {
|
|
|
62
62
|
writable: true,
|
|
63
63
|
value: void 0
|
|
64
64
|
});
|
|
65
|
+
Object.defineProperty(this, "metadata", {
|
|
66
|
+
enumerable: true,
|
|
67
|
+
configurable: true,
|
|
68
|
+
writable: true,
|
|
69
|
+
value: void 0
|
|
70
|
+
});
|
|
65
71
|
this.vectorStoreRetriever = fields.vectorStoreRetriever;
|
|
66
72
|
this.inputKey = fields.inputKey;
|
|
67
73
|
this.memoryKey = fields.memoryKey ?? "memory";
|
|
68
74
|
this.returnDocs = fields.returnDocs ?? false;
|
|
75
|
+
this.metadata = fields.metadata;
|
|
69
76
|
}
|
|
70
77
|
get memoryKeys() {
|
|
71
78
|
return [this.memoryKey];
|
|
@@ -95,13 +102,16 @@ class VectorStoreRetrieverMemory extends memory_1.BaseMemory {
|
|
|
95
102
|
* @returns A Promise that resolves to void.
|
|
96
103
|
*/
|
|
97
104
|
async saveContext(inputValues, outputValues) {
|
|
105
|
+
const metadata = typeof this.metadata === "function"
|
|
106
|
+
? this.metadata(inputValues, outputValues)
|
|
107
|
+
: this.metadata;
|
|
98
108
|
const text = Object.entries(inputValues)
|
|
99
109
|
.filter(([k]) => k !== this.memoryKey)
|
|
100
110
|
.concat(Object.entries(outputValues))
|
|
101
111
|
.map(([k, v]) => `${k}: ${v}`)
|
|
102
112
|
.join("\n");
|
|
103
113
|
await this.vectorStoreRetriever.addDocuments([
|
|
104
|
-
new documents_1.Document({ pageContent: text }),
|
|
114
|
+
new documents_1.Document({ pageContent: text, metadata }),
|
|
105
115
|
]);
|
|
106
116
|
}
|
|
107
117
|
}
|
|
@@ -1,5 +1,7 @@
|
|
|
1
1
|
import type { VectorStoreRetrieverInterface } from "@langchain/core/vectorstores";
|
|
2
2
|
import { BaseMemory, InputValues, MemoryVariables, OutputValues } from "@langchain/core/memory";
|
|
3
|
+
type Metadata = Record<string, unknown>;
|
|
4
|
+
type MetadataFunction = (inputValues?: InputValues, outputValues?: OutputValues) => Metadata;
|
|
3
5
|
/**
|
|
4
6
|
* Interface for the parameters required to initialize a
|
|
5
7
|
* VectorStoreRetrieverMemory instance.
|
|
@@ -10,6 +12,10 @@ export interface VectorStoreRetrieverMemoryParams {
|
|
|
10
12
|
outputKey?: string;
|
|
11
13
|
memoryKey?: string;
|
|
12
14
|
returnDocs?: boolean;
|
|
15
|
+
/**
|
|
16
|
+
* Metadata to be added to the document when saving context.
|
|
17
|
+
*/
|
|
18
|
+
metadata?: Metadata | MetadataFunction;
|
|
13
19
|
}
|
|
14
20
|
/**
|
|
15
21
|
* Class for managing long-term memory in Large Language Model (LLM)
|
|
@@ -47,6 +53,7 @@ export declare class VectorStoreRetrieverMemory extends BaseMemory implements Ve
|
|
|
47
53
|
inputKey?: string;
|
|
48
54
|
memoryKey: string;
|
|
49
55
|
returnDocs: boolean;
|
|
56
|
+
metadata?: Metadata | MetadataFunction;
|
|
50
57
|
constructor(fields: VectorStoreRetrieverMemoryParams);
|
|
51
58
|
get memoryKeys(): string[];
|
|
52
59
|
/**
|
|
@@ -67,3 +74,4 @@ export declare class VectorStoreRetrieverMemory extends BaseMemory implements Ve
|
|
|
67
74
|
*/
|
|
68
75
|
saveContext(inputValues: InputValues, outputValues: OutputValues): Promise<void>;
|
|
69
76
|
}
|
|
77
|
+
export {};
|
|
@@ -59,10 +59,17 @@ export class VectorStoreRetrieverMemory extends BaseMemory {
|
|
|
59
59
|
writable: true,
|
|
60
60
|
value: void 0
|
|
61
61
|
});
|
|
62
|
+
Object.defineProperty(this, "metadata", {
|
|
63
|
+
enumerable: true,
|
|
64
|
+
configurable: true,
|
|
65
|
+
writable: true,
|
|
66
|
+
value: void 0
|
|
67
|
+
});
|
|
62
68
|
this.vectorStoreRetriever = fields.vectorStoreRetriever;
|
|
63
69
|
this.inputKey = fields.inputKey;
|
|
64
70
|
this.memoryKey = fields.memoryKey ?? "memory";
|
|
65
71
|
this.returnDocs = fields.returnDocs ?? false;
|
|
72
|
+
this.metadata = fields.metadata;
|
|
66
73
|
}
|
|
67
74
|
get memoryKeys() {
|
|
68
75
|
return [this.memoryKey];
|
|
@@ -92,13 +99,16 @@ export class VectorStoreRetrieverMemory extends BaseMemory {
|
|
|
92
99
|
* @returns A Promise that resolves to void.
|
|
93
100
|
*/
|
|
94
101
|
async saveContext(inputValues, outputValues) {
|
|
102
|
+
const metadata = typeof this.metadata === "function"
|
|
103
|
+
? this.metadata(inputValues, outputValues)
|
|
104
|
+
: this.metadata;
|
|
95
105
|
const text = Object.entries(inputValues)
|
|
96
106
|
.filter(([k]) => k !== this.memoryKey)
|
|
97
107
|
.concat(Object.entries(outputValues))
|
|
98
108
|
.map(([k, v]) => `${k}: ${v}`)
|
|
99
109
|
.join("\n");
|
|
100
110
|
await this.vectorStoreRetriever.addDocuments([
|
|
101
|
-
new Document({ pageContent: text }),
|
|
111
|
+
new Document({ pageContent: text, metadata }),
|
|
102
112
|
]);
|
|
103
113
|
}
|
|
104
114
|
}
|
|
@@ -103,10 +103,24 @@ class MultiQueryRetriever extends retrievers_1.BaseRetriever {
|
|
|
103
103
|
writable: true,
|
|
104
104
|
value: "lines"
|
|
105
105
|
});
|
|
106
|
+
Object.defineProperty(this, "documentCompressor", {
|
|
107
|
+
enumerable: true,
|
|
108
|
+
configurable: true,
|
|
109
|
+
writable: true,
|
|
110
|
+
value: void 0
|
|
111
|
+
});
|
|
112
|
+
Object.defineProperty(this, "documentCompressorFilteringFn", {
|
|
113
|
+
enumerable: true,
|
|
114
|
+
configurable: true,
|
|
115
|
+
writable: true,
|
|
116
|
+
value: void 0
|
|
117
|
+
});
|
|
106
118
|
this.retriever = fields.retriever;
|
|
107
119
|
this.llmChain = fields.llmChain;
|
|
108
120
|
this.queryCount = fields.queryCount ?? this.queryCount;
|
|
109
121
|
this.parserKey = fields.parserKey ?? this.parserKey;
|
|
122
|
+
this.documentCompressor = fields.documentCompressor;
|
|
123
|
+
this.documentCompressorFilteringFn = fields.documentCompressorFilteringFn;
|
|
110
124
|
}
|
|
111
125
|
static fromLLM(fields) {
|
|
112
126
|
const { retriever, llm, prompt = DEFAULT_QUERY_PROMPT, queryCount, parserKey, ...rest } = fields;
|
|
@@ -126,10 +140,10 @@ class MultiQueryRetriever extends retrievers_1.BaseRetriever {
|
|
|
126
140
|
// Retrieve documents using the original retriever
|
|
127
141
|
async _retrieveDocuments(queries, runManager) {
|
|
128
142
|
const documents = [];
|
|
129
|
-
|
|
143
|
+
await Promise.all(queries.map(async (query) => {
|
|
130
144
|
const docs = await this.retriever.getRelevantDocuments(query, runManager?.getChild());
|
|
131
145
|
documents.push(...docs);
|
|
132
|
-
}
|
|
146
|
+
}));
|
|
133
147
|
return documents;
|
|
134
148
|
}
|
|
135
149
|
// Deduplicate the documents that were returned in multiple retrievals
|
|
@@ -146,7 +160,14 @@ class MultiQueryRetriever extends retrievers_1.BaseRetriever {
|
|
|
146
160
|
const queries = await this._generateQueries(question, runManager);
|
|
147
161
|
const documents = await this._retrieveDocuments(queries, runManager);
|
|
148
162
|
const uniqueDocuments = this._uniqueUnion(documents);
|
|
149
|
-
|
|
163
|
+
let outputDocs = uniqueDocuments;
|
|
164
|
+
if (this.documentCompressor && uniqueDocuments.length) {
|
|
165
|
+
outputDocs = await this.documentCompressor.compressDocuments(uniqueDocuments, question);
|
|
166
|
+
if (this.documentCompressorFilteringFn) {
|
|
167
|
+
outputDocs = this.documentCompressorFilteringFn(outputDocs);
|
|
168
|
+
}
|
|
169
|
+
}
|
|
170
|
+
return outputDocs;
|
|
150
171
|
}
|
|
151
172
|
}
|
|
152
173
|
exports.MultiQueryRetriever = MultiQueryRetriever;
|
|
@@ -4,14 +4,18 @@ import { Document } from "@langchain/core/documents";
|
|
|
4
4
|
import { BasePromptTemplate } from "@langchain/core/prompts";
|
|
5
5
|
import { CallbackManagerForRetrieverRun } from "@langchain/core/callbacks/manager";
|
|
6
6
|
import { LLMChain } from "../chains/llm_chain.js";
|
|
7
|
+
import type { BaseDocumentCompressor } from "./document_compressors/index.js";
|
|
7
8
|
interface LineList {
|
|
8
9
|
lines: string[];
|
|
9
10
|
}
|
|
11
|
+
export type MultiDocs = Document<Record<string, any>>[];
|
|
10
12
|
export interface MultiQueryRetrieverInput extends BaseRetrieverInput {
|
|
11
13
|
retriever: BaseRetrieverInterface;
|
|
12
14
|
llmChain: LLMChain<LineList>;
|
|
13
15
|
queryCount?: number;
|
|
14
16
|
parserKey?: string;
|
|
17
|
+
documentCompressor?: BaseDocumentCompressor | undefined;
|
|
18
|
+
documentCompressorFilteringFn?: (docs: MultiDocs) => MultiDocs;
|
|
15
19
|
}
|
|
16
20
|
/**
|
|
17
21
|
* @example
|
|
@@ -33,6 +37,8 @@ export declare class MultiQueryRetriever extends BaseRetriever {
|
|
|
33
37
|
private llmChain;
|
|
34
38
|
private queryCount;
|
|
35
39
|
private parserKey;
|
|
40
|
+
documentCompressor: BaseDocumentCompressor | undefined;
|
|
41
|
+
documentCompressorFilteringFn?: MultiQueryRetrieverInput["documentCompressorFilteringFn"];
|
|
36
42
|
constructor(fields: MultiQueryRetrieverInput);
|
|
37
43
|
static fromLLM(fields: Omit<MultiQueryRetrieverInput, "llmChain"> & {
|
|
38
44
|
llm: BaseLanguageModelInterface;
|
|
@@ -100,10 +100,24 @@ export class MultiQueryRetriever extends BaseRetriever {
|
|
|
100
100
|
writable: true,
|
|
101
101
|
value: "lines"
|
|
102
102
|
});
|
|
103
|
+
Object.defineProperty(this, "documentCompressor", {
|
|
104
|
+
enumerable: true,
|
|
105
|
+
configurable: true,
|
|
106
|
+
writable: true,
|
|
107
|
+
value: void 0
|
|
108
|
+
});
|
|
109
|
+
Object.defineProperty(this, "documentCompressorFilteringFn", {
|
|
110
|
+
enumerable: true,
|
|
111
|
+
configurable: true,
|
|
112
|
+
writable: true,
|
|
113
|
+
value: void 0
|
|
114
|
+
});
|
|
103
115
|
this.retriever = fields.retriever;
|
|
104
116
|
this.llmChain = fields.llmChain;
|
|
105
117
|
this.queryCount = fields.queryCount ?? this.queryCount;
|
|
106
118
|
this.parserKey = fields.parserKey ?? this.parserKey;
|
|
119
|
+
this.documentCompressor = fields.documentCompressor;
|
|
120
|
+
this.documentCompressorFilteringFn = fields.documentCompressorFilteringFn;
|
|
107
121
|
}
|
|
108
122
|
static fromLLM(fields) {
|
|
109
123
|
const { retriever, llm, prompt = DEFAULT_QUERY_PROMPT, queryCount, parserKey, ...rest } = fields;
|
|
@@ -123,10 +137,10 @@ export class MultiQueryRetriever extends BaseRetriever {
|
|
|
123
137
|
// Retrieve documents using the original retriever
|
|
124
138
|
async _retrieveDocuments(queries, runManager) {
|
|
125
139
|
const documents = [];
|
|
126
|
-
|
|
140
|
+
await Promise.all(queries.map(async (query) => {
|
|
127
141
|
const docs = await this.retriever.getRelevantDocuments(query, runManager?.getChild());
|
|
128
142
|
documents.push(...docs);
|
|
129
|
-
}
|
|
143
|
+
}));
|
|
130
144
|
return documents;
|
|
131
145
|
}
|
|
132
146
|
// Deduplicate the documents that were returned in multiple retrievals
|
|
@@ -143,6 +157,13 @@ export class MultiQueryRetriever extends BaseRetriever {
|
|
|
143
157
|
const queries = await this._generateQueries(question, runManager);
|
|
144
158
|
const documents = await this._retrieveDocuments(queries, runManager);
|
|
145
159
|
const uniqueDocuments = this._uniqueUnion(documents);
|
|
146
|
-
|
|
160
|
+
let outputDocs = uniqueDocuments;
|
|
161
|
+
if (this.documentCompressor && uniqueDocuments.length) {
|
|
162
|
+
outputDocs = await this.documentCompressor.compressDocuments(uniqueDocuments, question);
|
|
163
|
+
if (this.documentCompressorFilteringFn) {
|
|
164
|
+
outputDocs = this.documentCompressorFilteringFn(outputDocs);
|
|
165
|
+
}
|
|
166
|
+
}
|
|
167
|
+
return outputDocs;
|
|
147
168
|
}
|
|
148
169
|
}
|
|
@@ -111,6 +111,18 @@ class ParentDocumentRetriever extends multi_vector_js_1.MultiVectorRetriever {
|
|
|
111
111
|
writable: true,
|
|
112
112
|
value: void 0
|
|
113
113
|
});
|
|
114
|
+
Object.defineProperty(this, "documentCompressor", {
|
|
115
|
+
enumerable: true,
|
|
116
|
+
configurable: true,
|
|
117
|
+
writable: true,
|
|
118
|
+
value: void 0
|
|
119
|
+
});
|
|
120
|
+
Object.defineProperty(this, "documentCompressorFilteringFn", {
|
|
121
|
+
enumerable: true,
|
|
122
|
+
configurable: true,
|
|
123
|
+
writable: true,
|
|
124
|
+
value: void 0
|
|
125
|
+
});
|
|
114
126
|
this.vectorstore = fields.vectorstore;
|
|
115
127
|
this.childSplitter = fields.childSplitter;
|
|
116
128
|
this.parentSplitter = fields.parentSplitter;
|
|
@@ -118,9 +130,10 @@ class ParentDocumentRetriever extends multi_vector_js_1.MultiVectorRetriever {
|
|
|
118
130
|
this.childK = fields.childK;
|
|
119
131
|
this.parentK = fields.parentK;
|
|
120
132
|
this.childDocumentRetriever = fields.childDocumentRetriever;
|
|
133
|
+
this.documentCompressor = fields.documentCompressor;
|
|
134
|
+
this.documentCompressorFilteringFn = fields.documentCompressorFilteringFn;
|
|
121
135
|
}
|
|
122
136
|
async _getRelevantDocuments(query) {
|
|
123
|
-
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
124
137
|
let subDocs = [];
|
|
125
138
|
if (this.childDocumentRetriever) {
|
|
126
139
|
subDocs = await this.childDocumentRetriever.getRelevantDocuments(query);
|
|
@@ -128,6 +141,12 @@ class ParentDocumentRetriever extends multi_vector_js_1.MultiVectorRetriever {
|
|
|
128
141
|
else {
|
|
129
142
|
subDocs = await this.vectorstore.similaritySearch(query, this.childK);
|
|
130
143
|
}
|
|
144
|
+
if (this.documentCompressor && subDocs.length) {
|
|
145
|
+
subDocs = await this.documentCompressor.compressDocuments(subDocs, query);
|
|
146
|
+
if (this.documentCompressorFilteringFn) {
|
|
147
|
+
subDocs = this.documentCompressorFilteringFn(subDocs);
|
|
148
|
+
}
|
|
149
|
+
}
|
|
131
150
|
// Maintain order
|
|
132
151
|
const parentDocIds = [];
|
|
133
152
|
for (const doc of subDocs) {
|
|
@@ -1,7 +1,9 @@
|
|
|
1
1
|
import { type VectorStoreInterface, type VectorStoreRetrieverInterface } from "@langchain/core/vectorstores";
|
|
2
2
|
import { Document } from "@langchain/core/documents";
|
|
3
|
+
import type { BaseDocumentCompressor } from "./document_compressors/index.js";
|
|
3
4
|
import { TextSplitter, TextSplitterChunkHeaderOptions } from "../text_splitter.js";
|
|
4
5
|
import { MultiVectorRetriever, type MultiVectorRetrieverInput } from "./multi_vector.js";
|
|
6
|
+
export type SubDocs = Document<Record<string, any>>[];
|
|
5
7
|
/**
|
|
6
8
|
* Interface for the fields required to initialize a
|
|
7
9
|
* ParentDocumentRetriever instance.
|
|
@@ -14,6 +16,8 @@ export type ParentDocumentRetrieverFields = MultiVectorRetrieverInput & {
|
|
|
14
16
|
* the `.similaritySearch` method of the vectorstore.
|
|
15
17
|
*/
|
|
16
18
|
childDocumentRetriever?: VectorStoreRetrieverInterface<VectorStoreInterface>;
|
|
19
|
+
documentCompressor?: BaseDocumentCompressor | undefined;
|
|
20
|
+
documentCompressorFilteringFn?: (docs: SubDocs) => SubDocs;
|
|
17
21
|
};
|
|
18
22
|
/**
|
|
19
23
|
* A type of document retriever that splits input documents into smaller chunks
|
|
@@ -55,6 +59,8 @@ export declare class ParentDocumentRetriever extends MultiVectorRetriever {
|
|
|
55
59
|
protected childK?: number;
|
|
56
60
|
protected parentK?: number;
|
|
57
61
|
childDocumentRetriever: VectorStoreRetrieverInterface<VectorStoreInterface> | undefined;
|
|
62
|
+
documentCompressor: BaseDocumentCompressor | undefined;
|
|
63
|
+
documentCompressorFilteringFn?: ParentDocumentRetrieverFields["documentCompressorFilteringFn"];
|
|
58
64
|
constructor(fields: ParentDocumentRetrieverFields);
|
|
59
65
|
_getRelevantDocuments(query: string): Promise<Document[]>;
|
|
60
66
|
/**
|
|
@@ -85,6 +85,18 @@ export class ParentDocumentRetriever extends MultiVectorRetriever {
|
|
|
85
85
|
writable: true,
|
|
86
86
|
value: void 0
|
|
87
87
|
});
|
|
88
|
+
Object.defineProperty(this, "documentCompressor", {
|
|
89
|
+
enumerable: true,
|
|
90
|
+
configurable: true,
|
|
91
|
+
writable: true,
|
|
92
|
+
value: void 0
|
|
93
|
+
});
|
|
94
|
+
Object.defineProperty(this, "documentCompressorFilteringFn", {
|
|
95
|
+
enumerable: true,
|
|
96
|
+
configurable: true,
|
|
97
|
+
writable: true,
|
|
98
|
+
value: void 0
|
|
99
|
+
});
|
|
88
100
|
this.vectorstore = fields.vectorstore;
|
|
89
101
|
this.childSplitter = fields.childSplitter;
|
|
90
102
|
this.parentSplitter = fields.parentSplitter;
|
|
@@ -92,9 +104,10 @@ export class ParentDocumentRetriever extends MultiVectorRetriever {
|
|
|
92
104
|
this.childK = fields.childK;
|
|
93
105
|
this.parentK = fields.parentK;
|
|
94
106
|
this.childDocumentRetriever = fields.childDocumentRetriever;
|
|
107
|
+
this.documentCompressor = fields.documentCompressor;
|
|
108
|
+
this.documentCompressorFilteringFn = fields.documentCompressorFilteringFn;
|
|
95
109
|
}
|
|
96
110
|
async _getRelevantDocuments(query) {
|
|
97
|
-
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
98
111
|
let subDocs = [];
|
|
99
112
|
if (this.childDocumentRetriever) {
|
|
100
113
|
subDocs = await this.childDocumentRetriever.getRelevantDocuments(query);
|
|
@@ -102,6 +115,12 @@ export class ParentDocumentRetriever extends MultiVectorRetriever {
|
|
|
102
115
|
else {
|
|
103
116
|
subDocs = await this.vectorstore.similaritySearch(query, this.childK);
|
|
104
117
|
}
|
|
118
|
+
if (this.documentCompressor && subDocs.length) {
|
|
119
|
+
subDocs = await this.documentCompressor.compressDocuments(subDocs, query);
|
|
120
|
+
if (this.documentCompressorFilteringFn) {
|
|
121
|
+
subDocs = this.documentCompressorFilteringFn(subDocs);
|
|
122
|
+
}
|
|
123
|
+
}
|
|
105
124
|
// Maintain order
|
|
106
125
|
const parentDocIds = [];
|
|
107
126
|
for (const doc of subDocs) {
|
|
@@ -14,7 +14,7 @@ Object.defineProperty(exports, "BasicTranslator", { enumerable: true, get: funct
|
|
|
14
14
|
* implements the SelfQueryRetrieverArgs interface.
|
|
15
15
|
* @example
|
|
16
16
|
* ```typescript
|
|
17
|
-
* const selfQueryRetriever =
|
|
17
|
+
* const selfQueryRetriever = SelfQueryRetriever.fromLLM({
|
|
18
18
|
* llm: new ChatOpenAI(),
|
|
19
19
|
* vectorStore: await HNSWLib.fromDocuments(docs, new OpenAIEmbeddings()),
|
|
20
20
|
* documentContents: "Brief summary of a movie",
|
|
@@ -30,7 +30,7 @@ export interface SelfQueryRetrieverArgs<T extends VectorStore> extends BaseRetri
|
|
|
30
30
|
* implements the SelfQueryRetrieverArgs interface.
|
|
31
31
|
* @example
|
|
32
32
|
* ```typescript
|
|
33
|
-
* const selfQueryRetriever =
|
|
33
|
+
* const selfQueryRetriever = SelfQueryRetriever.fromLLM({
|
|
34
34
|
* llm: new ChatOpenAI(),
|
|
35
35
|
* vectorStore: await HNSWLib.fromDocuments(docs, new OpenAIEmbeddings()),
|
|
36
36
|
* documentContents: "Brief summary of a movie",
|
|
@@ -9,7 +9,7 @@ export { BaseTranslator, BasicTranslator, FunctionalTranslator };
|
|
|
9
9
|
* implements the SelfQueryRetrieverArgs interface.
|
|
10
10
|
* @example
|
|
11
11
|
* ```typescript
|
|
12
|
-
* const selfQueryRetriever =
|
|
12
|
+
* const selfQueryRetriever = SelfQueryRetriever.fromLLM({
|
|
13
13
|
* llm: new ChatOpenAI(),
|
|
14
14
|
* vectorStore: await HNSWLib.fromDocuments(docs, new OpenAIEmbeddings()),
|
|
15
15
|
* documentContents: "Brief summary of a movie",
|
|
@@ -11,7 +11,7 @@ const base_js_1 = require("./base.cjs");
|
|
|
11
11
|
* queries and compare results.
|
|
12
12
|
* @example
|
|
13
13
|
* ```typescript
|
|
14
|
-
* const selfQueryRetriever =
|
|
14
|
+
* const selfQueryRetriever = SelfQueryRetriever.fromLLM({
|
|
15
15
|
* llm: new ChatOpenAI(),
|
|
16
16
|
* vectorStore: new PineconeStore(),
|
|
17
17
|
* documentContents: "Brief summary of a movie",
|
|
@@ -8,7 +8,7 @@ import { BasicTranslator } from "./base.js";
|
|
|
8
8
|
* queries and compare results.
|
|
9
9
|
* @example
|
|
10
10
|
* ```typescript
|
|
11
|
-
* const selfQueryRetriever =
|
|
11
|
+
* const selfQueryRetriever = SelfQueryRetriever.fromLLM({
|
|
12
12
|
* llm: new ChatOpenAI(),
|
|
13
13
|
* vectorStore: new PineconeStore(),
|
|
14
14
|
* documentContents: "Brief summary of a movie",
|
|
@@ -8,7 +8,7 @@ import { BasicTranslator } from "./base.js";
|
|
|
8
8
|
* queries and compare results.
|
|
9
9
|
* @example
|
|
10
10
|
* ```typescript
|
|
11
|
-
* const selfQueryRetriever =
|
|
11
|
+
* const selfQueryRetriever = SelfQueryRetriever.fromLLM({
|
|
12
12
|
* llm: new ChatOpenAI(),
|
|
13
13
|
* vectorStore: new PineconeStore(),
|
|
14
14
|
* documentContents: "Brief summary of a movie",
|
|
@@ -5,6 +5,7 @@ const messages_1 = require("@langchain/core/messages");
|
|
|
5
5
|
const runnables_1 = require("@langchain/core/runnables");
|
|
6
6
|
const tracer_langchain_1 = require("@langchain/core/tracers/tracer_langchain");
|
|
7
7
|
const base_1 = require("@langchain/core/tracers/base");
|
|
8
|
+
const async_caller_1 = require("@langchain/core/utils/async_caller");
|
|
8
9
|
const langsmith_1 = require("langsmith");
|
|
9
10
|
const loader_js_1 = require("../evaluation/loader.cjs");
|
|
10
11
|
const config_js_1 = require("./config.cjs");
|
|
@@ -403,31 +404,37 @@ const loadExamples = async ({ datasetName, client, projectName, }) => {
|
|
|
403
404
|
runExtractors,
|
|
404
405
|
};
|
|
405
406
|
};
|
|
406
|
-
const applyEvaluators = async ({ evaluation, runs, examples, client, }) => {
|
|
407
|
+
const applyEvaluators = async ({ evaluation, runs, examples, client, maxConcurrency, }) => {
|
|
407
408
|
// TODO: Parallelize and/or put in callbacks to speed up evals.
|
|
408
409
|
const { evaluators } = evaluation;
|
|
409
410
|
const progress = new progress_js_1.ProgressBar({
|
|
410
411
|
total: examples.length,
|
|
411
412
|
format: "Running Evaluators: {bar} {percentage}% | {value}/{total}\n",
|
|
412
413
|
});
|
|
413
|
-
const
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
|
|
414
|
+
const caller = new async_caller_1.AsyncCaller({
|
|
415
|
+
maxConcurrency,
|
|
416
|
+
});
|
|
417
|
+
const requests = runs.map(async (run, i) => caller.call(async () => {
|
|
417
418
|
const evaluatorResults = await Promise.allSettled(evaluators.map((evaluator) => client.evaluateRun(run, evaluator, {
|
|
418
|
-
referenceExample:
|
|
419
|
+
referenceExample: examples[i],
|
|
419
420
|
loadChildRuns: false,
|
|
420
421
|
})));
|
|
421
422
|
progress.increment();
|
|
422
|
-
|
|
423
|
+
return {
|
|
423
424
|
execution_time: run?.end_time && run.start_time
|
|
424
425
|
? run.end_time - run.start_time
|
|
425
426
|
: undefined,
|
|
426
|
-
feedback: evaluatorResults.map((evalResult) => evalResult.status === "fulfilled"
|
|
427
|
+
feedback: evaluatorResults.map((evalResult) => evalResult.status === "fulfilled"
|
|
428
|
+
? evalResult.value
|
|
429
|
+
: evalResult.reason),
|
|
427
430
|
run_id: run.id,
|
|
428
431
|
};
|
|
429
|
-
}
|
|
430
|
-
|
|
432
|
+
}));
|
|
433
|
+
const results = await Promise.all(requests);
|
|
434
|
+
return results.reduce((acc, result, i) => ({
|
|
435
|
+
...acc,
|
|
436
|
+
[examples[i].id]: result,
|
|
437
|
+
}), {});
|
|
431
438
|
};
|
|
432
439
|
const getExamplesInputs = (examples, chainOrFactory, dataType) => {
|
|
433
440
|
if (dataType === "chat") {
|
|
@@ -553,6 +560,7 @@ async function runOnDataset(chainOrFactory, datasetName, options) {
|
|
|
553
560
|
runs,
|
|
554
561
|
examples,
|
|
555
562
|
client: testClient,
|
|
563
|
+
maxConcurrency: testConcurrency,
|
|
556
564
|
});
|
|
557
565
|
}
|
|
558
566
|
const results = {
|
|
@@ -2,6 +2,7 @@ import { mapStoredMessagesToChatMessages } from "@langchain/core/messages";
|
|
|
2
2
|
import { Runnable, RunnableLambda, getCallbackManagerForConfig, } from "@langchain/core/runnables";
|
|
3
3
|
import { LangChainTracer } from "@langchain/core/tracers/tracer_langchain";
|
|
4
4
|
import { BaseTracer } from "@langchain/core/tracers/base";
|
|
5
|
+
import { AsyncCaller } from "@langchain/core/utils/async_caller";
|
|
5
6
|
import { Client, RunTree, } from "langsmith";
|
|
6
7
|
import { loadEvaluator } from "../evaluation/loader.js";
|
|
7
8
|
import { isOffTheShelfEvaluator, isCustomEvaluator, } from "./config.js";
|
|
@@ -400,31 +401,37 @@ const loadExamples = async ({ datasetName, client, projectName, }) => {
|
|
|
400
401
|
runExtractors,
|
|
401
402
|
};
|
|
402
403
|
};
|
|
403
|
-
const applyEvaluators = async ({ evaluation, runs, examples, client, }) => {
|
|
404
|
+
const applyEvaluators = async ({ evaluation, runs, examples, client, maxConcurrency, }) => {
|
|
404
405
|
// TODO: Parallelize and/or put in callbacks to speed up evals.
|
|
405
406
|
const { evaluators } = evaluation;
|
|
406
407
|
const progress = new ProgressBar({
|
|
407
408
|
total: examples.length,
|
|
408
409
|
format: "Running Evaluators: {bar} {percentage}% | {value}/{total}\n",
|
|
409
410
|
});
|
|
410
|
-
const
|
|
411
|
-
|
|
412
|
-
|
|
413
|
-
|
|
411
|
+
const caller = new AsyncCaller({
|
|
412
|
+
maxConcurrency,
|
|
413
|
+
});
|
|
414
|
+
const requests = runs.map(async (run, i) => caller.call(async () => {
|
|
414
415
|
const evaluatorResults = await Promise.allSettled(evaluators.map((evaluator) => client.evaluateRun(run, evaluator, {
|
|
415
|
-
referenceExample:
|
|
416
|
+
referenceExample: examples[i],
|
|
416
417
|
loadChildRuns: false,
|
|
417
418
|
})));
|
|
418
419
|
progress.increment();
|
|
419
|
-
|
|
420
|
+
return {
|
|
420
421
|
execution_time: run?.end_time && run.start_time
|
|
421
422
|
? run.end_time - run.start_time
|
|
422
423
|
: undefined,
|
|
423
|
-
feedback: evaluatorResults.map((evalResult) => evalResult.status === "fulfilled"
|
|
424
|
+
feedback: evaluatorResults.map((evalResult) => evalResult.status === "fulfilled"
|
|
425
|
+
? evalResult.value
|
|
426
|
+
: evalResult.reason),
|
|
424
427
|
run_id: run.id,
|
|
425
428
|
};
|
|
426
|
-
}
|
|
427
|
-
|
|
429
|
+
}));
|
|
430
|
+
const results = await Promise.all(requests);
|
|
431
|
+
return results.reduce((acc, result, i) => ({
|
|
432
|
+
...acc,
|
|
433
|
+
[examples[i].id]: result,
|
|
434
|
+
}), {});
|
|
428
435
|
};
|
|
429
436
|
const getExamplesInputs = (examples, chainOrFactory, dataType) => {
|
|
430
437
|
if (dataType === "chat") {
|
|
@@ -550,6 +557,7 @@ export async function runOnDataset(chainOrFactory, datasetName, options) {
|
|
|
550
557
|
runs,
|
|
551
558
|
examples,
|
|
552
559
|
client: testClient,
|
|
560
|
+
maxConcurrency: testConcurrency,
|
|
553
561
|
});
|
|
554
562
|
}
|
|
555
563
|
const results = {
|