langchain 0.1.6 → 0.1.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/agents/toolkits/conversational_retrieval/openai_functions.cjs +3 -1
- package/dist/agents/toolkits/conversational_retrieval/openai_functions.d.ts +1 -0
- package/dist/agents/toolkits/conversational_retrieval/openai_functions.js +3 -1
- package/dist/indexes/index.cjs +5 -0
- package/dist/indexes/index.d.ts +1 -0
- package/dist/indexes/index.js +1 -0
- package/dist/indexes/indexing.cjs +265 -0
- package/dist/indexes/indexing.d.ts +75 -0
- package/dist/indexes/indexing.js +261 -0
- package/dist/load/import_map.cjs +2 -1
- package/dist/load/import_map.d.ts +1 -0
- package/dist/load/import_map.js +1 -0
- package/dist/smith/config.d.ts +72 -27
- package/dist/smith/index.cjs +15 -0
- package/dist/smith/index.d.ts +3 -3
- package/dist/smith/index.js +2 -1
- package/dist/smith/runner_utils.cjs +11 -4
- package/dist/smith/runner_utils.d.ts +4 -4
- package/dist/smith/runner_utils.js +12 -5
- package/indexes.cjs +1 -0
- package/indexes.d.ts +1 -0
- package/indexes.js +1 -0
- package/package.json +10 -2
|
@@ -15,13 +15,14 @@ const token_buffer_memory_js_1 = require("./token_buffer_memory.cjs");
|
|
|
15
15
|
* @returns A Promise that resolves to an initialized AgentExecutor.
|
|
16
16
|
*/
|
|
17
17
|
async function createConversationalRetrievalAgent(llm, tools, options) {
|
|
18
|
-
const { rememberIntermediateSteps = true, memoryKey = "chat_history", outputKey = "output", prefix, verbose, } = options ?? {};
|
|
18
|
+
const { rememberIntermediateSteps = true, memoryKey = "chat_history", outputKey = "output", inputKey = "input", prefix, verbose, } = options ?? {};
|
|
19
19
|
let memory;
|
|
20
20
|
if (rememberIntermediateSteps) {
|
|
21
21
|
memory = new token_buffer_memory_js_1.OpenAIAgentTokenBufferMemory({
|
|
22
22
|
memoryKey,
|
|
23
23
|
llm,
|
|
24
24
|
outputKey,
|
|
25
|
+
inputKey,
|
|
25
26
|
});
|
|
26
27
|
}
|
|
27
28
|
else {
|
|
@@ -31,6 +32,7 @@ async function createConversationalRetrievalAgent(llm, tools, options) {
|
|
|
31
32
|
maxTokenLimit: 12000,
|
|
32
33
|
returnMessages: true,
|
|
33
34
|
outputKey,
|
|
35
|
+
inputKey,
|
|
34
36
|
});
|
|
35
37
|
}
|
|
36
38
|
const executor = await (0, initialize_js_1.initializeAgentExecutorWithOptions)(tools, llm, {
|
|
@@ -12,13 +12,14 @@ import { OpenAIAgentTokenBufferMemory } from "./token_buffer_memory.js";
|
|
|
12
12
|
* @returns A Promise that resolves to an initialized AgentExecutor.
|
|
13
13
|
*/
|
|
14
14
|
export async function createConversationalRetrievalAgent(llm, tools, options) {
|
|
15
|
-
const { rememberIntermediateSteps = true, memoryKey = "chat_history", outputKey = "output", prefix, verbose, } = options ?? {};
|
|
15
|
+
const { rememberIntermediateSteps = true, memoryKey = "chat_history", outputKey = "output", inputKey = "input", prefix, verbose, } = options ?? {};
|
|
16
16
|
let memory;
|
|
17
17
|
if (rememberIntermediateSteps) {
|
|
18
18
|
memory = new OpenAIAgentTokenBufferMemory({
|
|
19
19
|
memoryKey,
|
|
20
20
|
llm,
|
|
21
21
|
outputKey,
|
|
22
|
+
inputKey,
|
|
22
23
|
});
|
|
23
24
|
}
|
|
24
25
|
else {
|
|
@@ -28,6 +29,7 @@ export async function createConversationalRetrievalAgent(llm, tools, options) {
|
|
|
28
29
|
maxTokenLimit: 12000,
|
|
29
30
|
returnMessages: true,
|
|
30
31
|
outputKey,
|
|
32
|
+
inputKey,
|
|
31
33
|
});
|
|
32
34
|
}
|
|
33
35
|
const executor = await initializeAgentExecutorWithOptions(tools, llm, {
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export { type CleanupMode, type IndexOptions, index } from "./indexing.js";
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export { index } from "./indexing.js";
|
|
@@ -0,0 +1,265 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.index = void 0;
|
|
4
|
+
const uuid_1 = require("uuid");
|
|
5
|
+
const base_1 = require("@langchain/community/indexes/base");
|
|
6
|
+
const hash_1 = require("@langchain/core/utils/hash");
|
|
7
|
+
const document_js_1 = require("../document.cjs");
|
|
8
|
+
/**
|
|
9
|
+
* HashedDocument is a Document with hashes calculated.
|
|
10
|
+
* Hashes are calculated based on page content and metadata.
|
|
11
|
+
* It is used for indexing.
|
|
12
|
+
*/
|
|
13
|
+
class HashedDocument {
|
|
14
|
+
constructor(fields) {
|
|
15
|
+
Object.defineProperty(this, "uid", {
|
|
16
|
+
enumerable: true,
|
|
17
|
+
configurable: true,
|
|
18
|
+
writable: true,
|
|
19
|
+
value: void 0
|
|
20
|
+
});
|
|
21
|
+
Object.defineProperty(this, "hash_", {
|
|
22
|
+
enumerable: true,
|
|
23
|
+
configurable: true,
|
|
24
|
+
writable: true,
|
|
25
|
+
value: void 0
|
|
26
|
+
});
|
|
27
|
+
Object.defineProperty(this, "contentHash", {
|
|
28
|
+
enumerable: true,
|
|
29
|
+
configurable: true,
|
|
30
|
+
writable: true,
|
|
31
|
+
value: void 0
|
|
32
|
+
});
|
|
33
|
+
Object.defineProperty(this, "metadataHash", {
|
|
34
|
+
enumerable: true,
|
|
35
|
+
configurable: true,
|
|
36
|
+
writable: true,
|
|
37
|
+
value: void 0
|
|
38
|
+
});
|
|
39
|
+
Object.defineProperty(this, "pageContent", {
|
|
40
|
+
enumerable: true,
|
|
41
|
+
configurable: true,
|
|
42
|
+
writable: true,
|
|
43
|
+
value: void 0
|
|
44
|
+
});
|
|
45
|
+
Object.defineProperty(this, "metadata", {
|
|
46
|
+
enumerable: true,
|
|
47
|
+
configurable: true,
|
|
48
|
+
writable: true,
|
|
49
|
+
value: void 0
|
|
50
|
+
});
|
|
51
|
+
this.uid = fields.uid;
|
|
52
|
+
this.pageContent = fields.pageContent;
|
|
53
|
+
this.metadata = fields.metadata;
|
|
54
|
+
}
|
|
55
|
+
calculateHashes() {
|
|
56
|
+
const forbiddenKeys = ["hash_", "content_hash", "metadata_hash"];
|
|
57
|
+
for (const key of forbiddenKeys) {
|
|
58
|
+
if (key in this.metadata) {
|
|
59
|
+
throw new Error(`Metadata cannot contain key ${key} as it is reserved for internal use. Restricted keys: [${forbiddenKeys.join(", ")}]`);
|
|
60
|
+
}
|
|
61
|
+
}
|
|
62
|
+
const contentHash = this._hashStringToUUID(this.pageContent);
|
|
63
|
+
try {
|
|
64
|
+
const metadataHash = this._hashNestedDictToUUID(this.metadata);
|
|
65
|
+
this.contentHash = contentHash;
|
|
66
|
+
this.metadataHash = metadataHash;
|
|
67
|
+
}
|
|
68
|
+
catch (e) {
|
|
69
|
+
throw new Error(`Failed to hash metadata: ${e}. Please use a dict that can be serialized using json.`);
|
|
70
|
+
}
|
|
71
|
+
this.hash_ = this._hashStringToUUID(this.contentHash + this.metadataHash);
|
|
72
|
+
if (!this.uid) {
|
|
73
|
+
this.uid = this.hash_;
|
|
74
|
+
}
|
|
75
|
+
}
|
|
76
|
+
toDocument() {
|
|
77
|
+
return new document_js_1.Document({
|
|
78
|
+
pageContent: this.pageContent,
|
|
79
|
+
metadata: this.metadata,
|
|
80
|
+
});
|
|
81
|
+
}
|
|
82
|
+
static fromDocument(document, uid) {
|
|
83
|
+
const doc = new this({
|
|
84
|
+
pageContent: document.pageContent,
|
|
85
|
+
metadata: document.metadata,
|
|
86
|
+
uid: uid || document.uid,
|
|
87
|
+
});
|
|
88
|
+
doc.calculateHashes();
|
|
89
|
+
return doc;
|
|
90
|
+
}
|
|
91
|
+
_hashStringToUUID(inputString) {
|
|
92
|
+
const hash_value = (0, hash_1.insecureHash)(inputString);
|
|
93
|
+
return (0, uuid_1.v5)(hash_value, base_1.UUIDV5_NAMESPACE);
|
|
94
|
+
}
|
|
95
|
+
_hashNestedDictToUUID(data) {
|
|
96
|
+
const serialized_data = JSON.stringify(data, Object.keys(data).sort());
|
|
97
|
+
const hash_value = (0, hash_1.insecureHash)(serialized_data);
|
|
98
|
+
return (0, uuid_1.v5)(hash_value, base_1.UUIDV5_NAMESPACE);
|
|
99
|
+
}
|
|
100
|
+
}
|
|
101
|
+
function batch(size, iterable) {
|
|
102
|
+
const batches = [];
|
|
103
|
+
let currentBatch = [];
|
|
104
|
+
iterable.forEach((item) => {
|
|
105
|
+
currentBatch.push(item);
|
|
106
|
+
if (currentBatch.length >= size) {
|
|
107
|
+
batches.push(currentBatch);
|
|
108
|
+
currentBatch = [];
|
|
109
|
+
}
|
|
110
|
+
});
|
|
111
|
+
if (currentBatch.length > 0) {
|
|
112
|
+
batches.push(currentBatch);
|
|
113
|
+
}
|
|
114
|
+
return batches;
|
|
115
|
+
}
|
|
116
|
+
function deduplicateInOrder(hashedDocuments) {
|
|
117
|
+
const seen = new Set();
|
|
118
|
+
const deduplicated = [];
|
|
119
|
+
for (const hashedDoc of hashedDocuments) {
|
|
120
|
+
if (!hashedDoc.hash_) {
|
|
121
|
+
throw new Error("Hashed document does not have a hash");
|
|
122
|
+
}
|
|
123
|
+
if (!seen.has(hashedDoc.hash_)) {
|
|
124
|
+
seen.add(hashedDoc.hash_);
|
|
125
|
+
deduplicated.push(hashedDoc);
|
|
126
|
+
}
|
|
127
|
+
}
|
|
128
|
+
return deduplicated;
|
|
129
|
+
}
|
|
130
|
+
function getSourceIdAssigner(sourceIdKey) {
|
|
131
|
+
if (sourceIdKey === null) {
|
|
132
|
+
return (_doc) => null;
|
|
133
|
+
}
|
|
134
|
+
else if (typeof sourceIdKey === "string") {
|
|
135
|
+
return (doc) => doc.metadata[sourceIdKey];
|
|
136
|
+
}
|
|
137
|
+
else if (typeof sourceIdKey === "function") {
|
|
138
|
+
return sourceIdKey;
|
|
139
|
+
}
|
|
140
|
+
else {
|
|
141
|
+
throw new Error(`sourceIdKey should be null, a string or a function, got ${typeof sourceIdKey}`);
|
|
142
|
+
}
|
|
143
|
+
}
|
|
144
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
145
|
+
const _isBaseDocumentLoader = (arg) => {
|
|
146
|
+
if ("load" in arg &&
|
|
147
|
+
typeof arg.load === "function" &&
|
|
148
|
+
"loadAndSplit" in arg &&
|
|
149
|
+
typeof arg.loadAndSplit === "function") {
|
|
150
|
+
return true;
|
|
151
|
+
}
|
|
152
|
+
return false;
|
|
153
|
+
};
|
|
154
|
+
/**
|
|
155
|
+
* Index data from the doc source into the vector store.
|
|
156
|
+
*
|
|
157
|
+
* Indexing functionality uses a manager to keep track of which documents
|
|
158
|
+
* are in the vector store.
|
|
159
|
+
*
|
|
160
|
+
* This allows us to keep track of which documents were updated, and which
|
|
161
|
+
* documents were deleted, which documents should be skipped.
|
|
162
|
+
*
|
|
163
|
+
* For the time being, documents are indexed using their hashes, and users
|
|
164
|
+
* are not able to specify the uid of the document.
|
|
165
|
+
*
|
|
166
|
+
* @param {IndexArgs} args
|
|
167
|
+
* @param {BaseDocumentLoader | DocumentInterface[]} args.docsSource The source of documents to index. Can be a DocumentLoader or a list of Documents.
|
|
168
|
+
* @param {RecordManagerInterface} args.recordManager The record manager to use for keeping track of indexed documents.
|
|
169
|
+
* @param {VectorStore} args.vectorStore The vector store to use for storing the documents.
|
|
170
|
+
* @param {IndexOptions | undefined} args.options Options for indexing.
|
|
171
|
+
* @returns {Promise<IndexingResult>}
|
|
172
|
+
*/
|
|
173
|
+
async function index(args) {
|
|
174
|
+
const { docsSource, recordManager, vectorStore, options } = args;
|
|
175
|
+
const { batchSize = 100, cleanup, sourceIdKey, cleanupBatchSize = 1000, forceUpdate = false, } = options ?? {};
|
|
176
|
+
if (cleanup === "incremental" && !sourceIdKey) {
|
|
177
|
+
throw new Error("sourceIdKey is required when cleanup mode is incremental. Please provide through 'options.sourceIdKey'.");
|
|
178
|
+
}
|
|
179
|
+
const docs = _isBaseDocumentLoader(docsSource)
|
|
180
|
+
? await docsSource.load()
|
|
181
|
+
: docsSource;
|
|
182
|
+
const sourceIdAssigner = getSourceIdAssigner(sourceIdKey ?? null);
|
|
183
|
+
const indexStartDt = await recordManager.getTime();
|
|
184
|
+
let numAdded = 0;
|
|
185
|
+
let numDeleted = 0;
|
|
186
|
+
let numUpdated = 0;
|
|
187
|
+
let numSkipped = 0;
|
|
188
|
+
const batches = batch(batchSize ?? 100, docs);
|
|
189
|
+
for (const batch of batches) {
|
|
190
|
+
const hashedDocs = deduplicateInOrder(batch.map((doc) => HashedDocument.fromDocument(doc)));
|
|
191
|
+
const sourceIds = hashedDocs.map((doc) => sourceIdAssigner(doc));
|
|
192
|
+
if (cleanup === "incremental") {
|
|
193
|
+
hashedDocs.forEach((_hashedDoc, index) => {
|
|
194
|
+
const source = sourceIds[index];
|
|
195
|
+
if (source === null) {
|
|
196
|
+
throw new Error("sourceIdKey must be provided when cleanup is incremental");
|
|
197
|
+
}
|
|
198
|
+
});
|
|
199
|
+
}
|
|
200
|
+
const batchExists = await recordManager.exists(hashedDocs.map((doc) => doc.uid));
|
|
201
|
+
const uids = [];
|
|
202
|
+
const docsToIndex = [];
|
|
203
|
+
const docsToUpdate = [];
|
|
204
|
+
const seenDocs = new Set();
|
|
205
|
+
hashedDocs.forEach((hashedDoc, i) => {
|
|
206
|
+
const docExists = batchExists[i];
|
|
207
|
+
if (docExists) {
|
|
208
|
+
if (forceUpdate) {
|
|
209
|
+
seenDocs.add(hashedDoc.uid);
|
|
210
|
+
}
|
|
211
|
+
else {
|
|
212
|
+
docsToUpdate.push(hashedDoc.uid);
|
|
213
|
+
return;
|
|
214
|
+
}
|
|
215
|
+
}
|
|
216
|
+
uids.push(hashedDoc.uid);
|
|
217
|
+
docsToIndex.push(hashedDoc.toDocument());
|
|
218
|
+
});
|
|
219
|
+
if (docsToUpdate.length > 0) {
|
|
220
|
+
await recordManager.update(docsToUpdate, { timeAtLeast: indexStartDt });
|
|
221
|
+
numSkipped += docsToUpdate.length;
|
|
222
|
+
}
|
|
223
|
+
if (docsToIndex.length > 0) {
|
|
224
|
+
await vectorStore.addDocuments(docsToIndex, { ids: uids });
|
|
225
|
+
numAdded += docsToIndex.length - seenDocs.size;
|
|
226
|
+
numUpdated += seenDocs.size;
|
|
227
|
+
}
|
|
228
|
+
await recordManager.update(hashedDocs.map((doc) => doc.uid), { timeAtLeast: indexStartDt, groupIds: sourceIds });
|
|
229
|
+
if (cleanup === "incremental") {
|
|
230
|
+
sourceIds.forEach((sourceId) => {
|
|
231
|
+
if (!sourceId)
|
|
232
|
+
throw new Error("Source id cannot be null");
|
|
233
|
+
});
|
|
234
|
+
const uidsToDelete = await recordManager.listKeys({
|
|
235
|
+
before: indexStartDt,
|
|
236
|
+
groupIds: sourceIds,
|
|
237
|
+
});
|
|
238
|
+
await vectorStore.delete({ ids: uidsToDelete });
|
|
239
|
+
await recordManager.deleteKeys(uidsToDelete);
|
|
240
|
+
numDeleted += uidsToDelete.length;
|
|
241
|
+
}
|
|
242
|
+
}
|
|
243
|
+
if (cleanup === "full") {
|
|
244
|
+
let uidsToDelete = await recordManager.listKeys({
|
|
245
|
+
before: indexStartDt,
|
|
246
|
+
limit: cleanupBatchSize,
|
|
247
|
+
});
|
|
248
|
+
while (uidsToDelete.length > 0) {
|
|
249
|
+
await vectorStore.delete({ ids: uidsToDelete });
|
|
250
|
+
await recordManager.deleteKeys(uidsToDelete);
|
|
251
|
+
numDeleted += uidsToDelete.length;
|
|
252
|
+
uidsToDelete = await recordManager.listKeys({
|
|
253
|
+
before: indexStartDt,
|
|
254
|
+
limit: cleanupBatchSize,
|
|
255
|
+
});
|
|
256
|
+
}
|
|
257
|
+
}
|
|
258
|
+
return {
|
|
259
|
+
numAdded,
|
|
260
|
+
numDeleted,
|
|
261
|
+
numUpdated,
|
|
262
|
+
numSkipped,
|
|
263
|
+
};
|
|
264
|
+
}
|
|
265
|
+
exports.index = index;
|
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
import { VectorStore } from "@langchain/core/vectorstores";
|
|
2
|
+
import { RecordManagerInterface } from "@langchain/community/indexes/base";
|
|
3
|
+
import { DocumentInterface } from "@langchain/core/documents";
|
|
4
|
+
import { BaseDocumentLoader } from "../document_loaders/base.js";
|
|
5
|
+
type IndexingResult = {
|
|
6
|
+
numAdded: number;
|
|
7
|
+
numDeleted: number;
|
|
8
|
+
numUpdated: number;
|
|
9
|
+
numSkipped: number;
|
|
10
|
+
};
|
|
11
|
+
type StringOrDocFunc = string | ((doc: DocumentInterface) => string);
|
|
12
|
+
export type CleanupMode = "full" | "incremental";
|
|
13
|
+
export type IndexOptions = {
|
|
14
|
+
/**
|
|
15
|
+
* The number of documents to index in one batch.
|
|
16
|
+
*/
|
|
17
|
+
batchSize?: number;
|
|
18
|
+
/**
|
|
19
|
+
* The cleanup mode to use. Can be "full", "incremental" or undefined.
|
|
20
|
+
* - **Incremental**: Cleans up all documents that haven't been updated AND
|
|
21
|
+
* that are associated with source ids that were seen
|
|
22
|
+
* during indexing.
|
|
23
|
+
* Clean up is done continuously during indexing helping
|
|
24
|
+
* to minimize the probability of users seeing duplicated
|
|
25
|
+
* content.
|
|
26
|
+
* - **Full**: Delete all documents that haven to been returned by the loader.
|
|
27
|
+
* Clean up runs after all documents have been indexed.
|
|
28
|
+
* This means that users may see duplicated content during indexing.
|
|
29
|
+
* - **undefined**: Do not delete any documents.
|
|
30
|
+
*/
|
|
31
|
+
cleanup?: CleanupMode;
|
|
32
|
+
/**
|
|
33
|
+
* Optional key that helps identify the original source of the document.
|
|
34
|
+
* Must either be a string representing the key of the source in the metadata
|
|
35
|
+
* or a function that takes a document and returns a string representing the source.
|
|
36
|
+
* **Required when cleanup is incremental**.
|
|
37
|
+
*/
|
|
38
|
+
sourceIdKey?: StringOrDocFunc;
|
|
39
|
+
/**
|
|
40
|
+
* Batch size to use when cleaning up documents.
|
|
41
|
+
*/
|
|
42
|
+
cleanupBatchSize?: number;
|
|
43
|
+
/**
|
|
44
|
+
* Force update documents even if they are present in the
|
|
45
|
+
* record manager. Useful if you are re-indexing with updated embeddings.
|
|
46
|
+
*/
|
|
47
|
+
forceUpdate?: boolean;
|
|
48
|
+
};
|
|
49
|
+
interface IndexArgs {
|
|
50
|
+
docsSource: BaseDocumentLoader | DocumentInterface[];
|
|
51
|
+
recordManager: RecordManagerInterface;
|
|
52
|
+
vectorStore: VectorStore;
|
|
53
|
+
options?: IndexOptions;
|
|
54
|
+
}
|
|
55
|
+
/**
|
|
56
|
+
* Index data from the doc source into the vector store.
|
|
57
|
+
*
|
|
58
|
+
* Indexing functionality uses a manager to keep track of which documents
|
|
59
|
+
* are in the vector store.
|
|
60
|
+
*
|
|
61
|
+
* This allows us to keep track of which documents were updated, and which
|
|
62
|
+
* documents were deleted, which documents should be skipped.
|
|
63
|
+
*
|
|
64
|
+
* For the time being, documents are indexed using their hashes, and users
|
|
65
|
+
* are not able to specify the uid of the document.
|
|
66
|
+
*
|
|
67
|
+
* @param {IndexArgs} args
|
|
68
|
+
* @param {BaseDocumentLoader | DocumentInterface[]} args.docsSource The source of documents to index. Can be a DocumentLoader or a list of Documents.
|
|
69
|
+
* @param {RecordManagerInterface} args.recordManager The record manager to use for keeping track of indexed documents.
|
|
70
|
+
* @param {VectorStore} args.vectorStore The vector store to use for storing the documents.
|
|
71
|
+
* @param {IndexOptions | undefined} args.options Options for indexing.
|
|
72
|
+
* @returns {Promise<IndexingResult>}
|
|
73
|
+
*/
|
|
74
|
+
export declare function index(args: IndexArgs): Promise<IndexingResult>;
|
|
75
|
+
export {};
|
|
@@ -0,0 +1,261 @@
|
|
|
1
|
+
import { v5 as uuidv5 } from "uuid";
|
|
2
|
+
import { UUIDV5_NAMESPACE, } from "@langchain/community/indexes/base";
|
|
3
|
+
import { insecureHash } from "@langchain/core/utils/hash";
|
|
4
|
+
import { Document } from "../document.js";
|
|
5
|
+
/**
|
|
6
|
+
* HashedDocument is a Document with hashes calculated.
|
|
7
|
+
* Hashes are calculated based on page content and metadata.
|
|
8
|
+
* It is used for indexing.
|
|
9
|
+
*/
|
|
10
|
+
class HashedDocument {
|
|
11
|
+
constructor(fields) {
|
|
12
|
+
Object.defineProperty(this, "uid", {
|
|
13
|
+
enumerable: true,
|
|
14
|
+
configurable: true,
|
|
15
|
+
writable: true,
|
|
16
|
+
value: void 0
|
|
17
|
+
});
|
|
18
|
+
Object.defineProperty(this, "hash_", {
|
|
19
|
+
enumerable: true,
|
|
20
|
+
configurable: true,
|
|
21
|
+
writable: true,
|
|
22
|
+
value: void 0
|
|
23
|
+
});
|
|
24
|
+
Object.defineProperty(this, "contentHash", {
|
|
25
|
+
enumerable: true,
|
|
26
|
+
configurable: true,
|
|
27
|
+
writable: true,
|
|
28
|
+
value: void 0
|
|
29
|
+
});
|
|
30
|
+
Object.defineProperty(this, "metadataHash", {
|
|
31
|
+
enumerable: true,
|
|
32
|
+
configurable: true,
|
|
33
|
+
writable: true,
|
|
34
|
+
value: void 0
|
|
35
|
+
});
|
|
36
|
+
Object.defineProperty(this, "pageContent", {
|
|
37
|
+
enumerable: true,
|
|
38
|
+
configurable: true,
|
|
39
|
+
writable: true,
|
|
40
|
+
value: void 0
|
|
41
|
+
});
|
|
42
|
+
Object.defineProperty(this, "metadata", {
|
|
43
|
+
enumerable: true,
|
|
44
|
+
configurable: true,
|
|
45
|
+
writable: true,
|
|
46
|
+
value: void 0
|
|
47
|
+
});
|
|
48
|
+
this.uid = fields.uid;
|
|
49
|
+
this.pageContent = fields.pageContent;
|
|
50
|
+
this.metadata = fields.metadata;
|
|
51
|
+
}
|
|
52
|
+
calculateHashes() {
|
|
53
|
+
const forbiddenKeys = ["hash_", "content_hash", "metadata_hash"];
|
|
54
|
+
for (const key of forbiddenKeys) {
|
|
55
|
+
if (key in this.metadata) {
|
|
56
|
+
throw new Error(`Metadata cannot contain key ${key} as it is reserved for internal use. Restricted keys: [${forbiddenKeys.join(", ")}]`);
|
|
57
|
+
}
|
|
58
|
+
}
|
|
59
|
+
const contentHash = this._hashStringToUUID(this.pageContent);
|
|
60
|
+
try {
|
|
61
|
+
const metadataHash = this._hashNestedDictToUUID(this.metadata);
|
|
62
|
+
this.contentHash = contentHash;
|
|
63
|
+
this.metadataHash = metadataHash;
|
|
64
|
+
}
|
|
65
|
+
catch (e) {
|
|
66
|
+
throw new Error(`Failed to hash metadata: ${e}. Please use a dict that can be serialized using json.`);
|
|
67
|
+
}
|
|
68
|
+
this.hash_ = this._hashStringToUUID(this.contentHash + this.metadataHash);
|
|
69
|
+
if (!this.uid) {
|
|
70
|
+
this.uid = this.hash_;
|
|
71
|
+
}
|
|
72
|
+
}
|
|
73
|
+
toDocument() {
|
|
74
|
+
return new Document({
|
|
75
|
+
pageContent: this.pageContent,
|
|
76
|
+
metadata: this.metadata,
|
|
77
|
+
});
|
|
78
|
+
}
|
|
79
|
+
static fromDocument(document, uid) {
|
|
80
|
+
const doc = new this({
|
|
81
|
+
pageContent: document.pageContent,
|
|
82
|
+
metadata: document.metadata,
|
|
83
|
+
uid: uid || document.uid,
|
|
84
|
+
});
|
|
85
|
+
doc.calculateHashes();
|
|
86
|
+
return doc;
|
|
87
|
+
}
|
|
88
|
+
_hashStringToUUID(inputString) {
|
|
89
|
+
const hash_value = insecureHash(inputString);
|
|
90
|
+
return uuidv5(hash_value, UUIDV5_NAMESPACE);
|
|
91
|
+
}
|
|
92
|
+
_hashNestedDictToUUID(data) {
|
|
93
|
+
const serialized_data = JSON.stringify(data, Object.keys(data).sort());
|
|
94
|
+
const hash_value = insecureHash(serialized_data);
|
|
95
|
+
return uuidv5(hash_value, UUIDV5_NAMESPACE);
|
|
96
|
+
}
|
|
97
|
+
}
|
|
98
|
+
function batch(size, iterable) {
|
|
99
|
+
const batches = [];
|
|
100
|
+
let currentBatch = [];
|
|
101
|
+
iterable.forEach((item) => {
|
|
102
|
+
currentBatch.push(item);
|
|
103
|
+
if (currentBatch.length >= size) {
|
|
104
|
+
batches.push(currentBatch);
|
|
105
|
+
currentBatch = [];
|
|
106
|
+
}
|
|
107
|
+
});
|
|
108
|
+
if (currentBatch.length > 0) {
|
|
109
|
+
batches.push(currentBatch);
|
|
110
|
+
}
|
|
111
|
+
return batches;
|
|
112
|
+
}
|
|
113
|
+
function deduplicateInOrder(hashedDocuments) {
|
|
114
|
+
const seen = new Set();
|
|
115
|
+
const deduplicated = [];
|
|
116
|
+
for (const hashedDoc of hashedDocuments) {
|
|
117
|
+
if (!hashedDoc.hash_) {
|
|
118
|
+
throw new Error("Hashed document does not have a hash");
|
|
119
|
+
}
|
|
120
|
+
if (!seen.has(hashedDoc.hash_)) {
|
|
121
|
+
seen.add(hashedDoc.hash_);
|
|
122
|
+
deduplicated.push(hashedDoc);
|
|
123
|
+
}
|
|
124
|
+
}
|
|
125
|
+
return deduplicated;
|
|
126
|
+
}
|
|
127
|
+
function getSourceIdAssigner(sourceIdKey) {
|
|
128
|
+
if (sourceIdKey === null) {
|
|
129
|
+
return (_doc) => null;
|
|
130
|
+
}
|
|
131
|
+
else if (typeof sourceIdKey === "string") {
|
|
132
|
+
return (doc) => doc.metadata[sourceIdKey];
|
|
133
|
+
}
|
|
134
|
+
else if (typeof sourceIdKey === "function") {
|
|
135
|
+
return sourceIdKey;
|
|
136
|
+
}
|
|
137
|
+
else {
|
|
138
|
+
throw new Error(`sourceIdKey should be null, a string or a function, got ${typeof sourceIdKey}`);
|
|
139
|
+
}
|
|
140
|
+
}
|
|
141
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
142
|
+
const _isBaseDocumentLoader = (arg) => {
|
|
143
|
+
if ("load" in arg &&
|
|
144
|
+
typeof arg.load === "function" &&
|
|
145
|
+
"loadAndSplit" in arg &&
|
|
146
|
+
typeof arg.loadAndSplit === "function") {
|
|
147
|
+
return true;
|
|
148
|
+
}
|
|
149
|
+
return false;
|
|
150
|
+
};
|
|
151
|
+
/**
|
|
152
|
+
* Index data from the doc source into the vector store.
|
|
153
|
+
*
|
|
154
|
+
* Indexing functionality uses a manager to keep track of which documents
|
|
155
|
+
* are in the vector store.
|
|
156
|
+
*
|
|
157
|
+
* This allows us to keep track of which documents were updated, and which
|
|
158
|
+
* documents were deleted, which documents should be skipped.
|
|
159
|
+
*
|
|
160
|
+
* For the time being, documents are indexed using their hashes, and users
|
|
161
|
+
* are not able to specify the uid of the document.
|
|
162
|
+
*
|
|
163
|
+
* @param {IndexArgs} args
|
|
164
|
+
* @param {BaseDocumentLoader | DocumentInterface[]} args.docsSource The source of documents to index. Can be a DocumentLoader or a list of Documents.
|
|
165
|
+
* @param {RecordManagerInterface} args.recordManager The record manager to use for keeping track of indexed documents.
|
|
166
|
+
* @param {VectorStore} args.vectorStore The vector store to use for storing the documents.
|
|
167
|
+
* @param {IndexOptions | undefined} args.options Options for indexing.
|
|
168
|
+
* @returns {Promise<IndexingResult>}
|
|
169
|
+
*/
|
|
170
|
+
export async function index(args) {
|
|
171
|
+
const { docsSource, recordManager, vectorStore, options } = args;
|
|
172
|
+
const { batchSize = 100, cleanup, sourceIdKey, cleanupBatchSize = 1000, forceUpdate = false, } = options ?? {};
|
|
173
|
+
if (cleanup === "incremental" && !sourceIdKey) {
|
|
174
|
+
throw new Error("sourceIdKey is required when cleanup mode is incremental. Please provide through 'options.sourceIdKey'.");
|
|
175
|
+
}
|
|
176
|
+
const docs = _isBaseDocumentLoader(docsSource)
|
|
177
|
+
? await docsSource.load()
|
|
178
|
+
: docsSource;
|
|
179
|
+
const sourceIdAssigner = getSourceIdAssigner(sourceIdKey ?? null);
|
|
180
|
+
const indexStartDt = await recordManager.getTime();
|
|
181
|
+
let numAdded = 0;
|
|
182
|
+
let numDeleted = 0;
|
|
183
|
+
let numUpdated = 0;
|
|
184
|
+
let numSkipped = 0;
|
|
185
|
+
const batches = batch(batchSize ?? 100, docs);
|
|
186
|
+
for (const batch of batches) {
|
|
187
|
+
const hashedDocs = deduplicateInOrder(batch.map((doc) => HashedDocument.fromDocument(doc)));
|
|
188
|
+
const sourceIds = hashedDocs.map((doc) => sourceIdAssigner(doc));
|
|
189
|
+
if (cleanup === "incremental") {
|
|
190
|
+
hashedDocs.forEach((_hashedDoc, index) => {
|
|
191
|
+
const source = sourceIds[index];
|
|
192
|
+
if (source === null) {
|
|
193
|
+
throw new Error("sourceIdKey must be provided when cleanup is incremental");
|
|
194
|
+
}
|
|
195
|
+
});
|
|
196
|
+
}
|
|
197
|
+
const batchExists = await recordManager.exists(hashedDocs.map((doc) => doc.uid));
|
|
198
|
+
const uids = [];
|
|
199
|
+
const docsToIndex = [];
|
|
200
|
+
const docsToUpdate = [];
|
|
201
|
+
const seenDocs = new Set();
|
|
202
|
+
hashedDocs.forEach((hashedDoc, i) => {
|
|
203
|
+
const docExists = batchExists[i];
|
|
204
|
+
if (docExists) {
|
|
205
|
+
if (forceUpdate) {
|
|
206
|
+
seenDocs.add(hashedDoc.uid);
|
|
207
|
+
}
|
|
208
|
+
else {
|
|
209
|
+
docsToUpdate.push(hashedDoc.uid);
|
|
210
|
+
return;
|
|
211
|
+
}
|
|
212
|
+
}
|
|
213
|
+
uids.push(hashedDoc.uid);
|
|
214
|
+
docsToIndex.push(hashedDoc.toDocument());
|
|
215
|
+
});
|
|
216
|
+
if (docsToUpdate.length > 0) {
|
|
217
|
+
await recordManager.update(docsToUpdate, { timeAtLeast: indexStartDt });
|
|
218
|
+
numSkipped += docsToUpdate.length;
|
|
219
|
+
}
|
|
220
|
+
if (docsToIndex.length > 0) {
|
|
221
|
+
await vectorStore.addDocuments(docsToIndex, { ids: uids });
|
|
222
|
+
numAdded += docsToIndex.length - seenDocs.size;
|
|
223
|
+
numUpdated += seenDocs.size;
|
|
224
|
+
}
|
|
225
|
+
await recordManager.update(hashedDocs.map((doc) => doc.uid), { timeAtLeast: indexStartDt, groupIds: sourceIds });
|
|
226
|
+
if (cleanup === "incremental") {
|
|
227
|
+
sourceIds.forEach((sourceId) => {
|
|
228
|
+
if (!sourceId)
|
|
229
|
+
throw new Error("Source id cannot be null");
|
|
230
|
+
});
|
|
231
|
+
const uidsToDelete = await recordManager.listKeys({
|
|
232
|
+
before: indexStartDt,
|
|
233
|
+
groupIds: sourceIds,
|
|
234
|
+
});
|
|
235
|
+
await vectorStore.delete({ ids: uidsToDelete });
|
|
236
|
+
await recordManager.deleteKeys(uidsToDelete);
|
|
237
|
+
numDeleted += uidsToDelete.length;
|
|
238
|
+
}
|
|
239
|
+
}
|
|
240
|
+
if (cleanup === "full") {
|
|
241
|
+
let uidsToDelete = await recordManager.listKeys({
|
|
242
|
+
before: indexStartDt,
|
|
243
|
+
limit: cleanupBatchSize,
|
|
244
|
+
});
|
|
245
|
+
while (uidsToDelete.length > 0) {
|
|
246
|
+
await vectorStore.delete({ ids: uidsToDelete });
|
|
247
|
+
await recordManager.deleteKeys(uidsToDelete);
|
|
248
|
+
numDeleted += uidsToDelete.length;
|
|
249
|
+
uidsToDelete = await recordManager.listKeys({
|
|
250
|
+
before: indexStartDt,
|
|
251
|
+
limit: cleanupBatchSize,
|
|
252
|
+
});
|
|
253
|
+
}
|
|
254
|
+
}
|
|
255
|
+
return {
|
|
256
|
+
numAdded,
|
|
257
|
+
numDeleted,
|
|
258
|
+
numUpdated,
|
|
259
|
+
numSkipped,
|
|
260
|
+
};
|
|
261
|
+
}
|
package/dist/load/import_map.cjs
CHANGED
|
@@ -25,7 +25,7 @@ var __importStar = (this && this.__importStar) || function (mod) {
|
|
|
25
25
|
};
|
|
26
26
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
27
27
|
exports.util__document = exports.storage__in_memory = exports.storage__encoder_backed = exports.stores__message__in_memory = exports.stores__file__in_memory = exports.stores__doc__in_memory = exports.retrievers__vespa = exports.retrievers__score_threshold = exports.retrievers__hyde = exports.retrievers__document_compressors__embeddings_filter = exports.retrievers__document_compressors__chain_extract = exports.retrievers__time_weighted = exports.retrievers__parent_document = exports.retrievers__multi_vector = exports.retrievers__multi_query = exports.retrievers__document_compressors = exports.retrievers__contextual_compression = exports.retrievers__remote = exports.output_parsers = exports.schema__query_constructor = exports.schema__prompt_template = exports.chat_models__anthropic = exports.document_transformers__openai_functions = exports.document_loaders__web__sort_xyz_blockchain = exports.document_loaders__web__serpapi = exports.document_loaders__web__searchapi = exports.document_loaders__base = exports.text_splitter = exports.vectorstores__memory = exports.llms__fake = exports.embeddings__fake = exports.embeddings__cache_backed = exports.chains__retrieval = exports.chains__openai_functions = exports.chains__history_aware_retriever = exports.chains__combine_documents__reduce = exports.chains__combine_documents = exports.chains = exports.tools__retriever = exports.tools__render = exports.agents__openai__output_parser = exports.agents__xml__output_parser = exports.agents__react__output_parser = exports.agents__format_scratchpad__log_to_message = exports.agents__format_scratchpad__xml = exports.agents__format_scratchpad__log = exports.agents__format_scratchpad__openai_tools = exports.agents__format_scratchpad = exports.agents__toolkits = exports.agents = void 0;
|
|
28
|
-
exports.llms__fireworks = exports.chat_models__fireworks = exports.schema__output = exports.schema__output_parser = exports.schema__runnable = exports.prompts__base = exports.prompts__pipeline = exports.prompts__chat = exports.schema__messages = exports.prompts__prompt = exports.embeddings__openai = exports.llms__openai = exports.chat_models__openai = exports.runnables__remote = exports.smith = exports.evaluation = exports.experimental__prompts__custom_format = exports.experimental__masking = exports.experimental__chains__violation_of_expectations = exports.experimental__chat_models__bittensor = exports.experimental__plan_and_execute = exports.experimental__generative_agents = exports.experimental__babyagi = exports.experimental__openai_files = exports.experimental__openai_assistant = exports.experimental__autogpt = exports.util__time = exports.util__math = void 0;
|
|
28
|
+
exports.llms__fireworks = exports.chat_models__fireworks = exports.schema__output = exports.schema__output_parser = exports.schema__runnable = exports.prompts__base = exports.prompts__pipeline = exports.prompts__chat = exports.schema__messages = exports.prompts__prompt = exports.embeddings__openai = exports.llms__openai = exports.chat_models__openai = exports.indexes = exports.runnables__remote = exports.smith = exports.evaluation = exports.experimental__prompts__custom_format = exports.experimental__masking = exports.experimental__chains__violation_of_expectations = exports.experimental__chat_models__bittensor = exports.experimental__plan_and_execute = exports.experimental__generative_agents = exports.experimental__babyagi = exports.experimental__openai_files = exports.experimental__openai_assistant = exports.experimental__autogpt = exports.util__time = exports.util__math = void 0;
|
|
29
29
|
exports.agents = __importStar(require("../agents/index.cjs"));
|
|
30
30
|
exports.agents__toolkits = __importStar(require("../agents/toolkits/index.cjs"));
|
|
31
31
|
exports.agents__format_scratchpad = __importStar(require("../agents/format_scratchpad/openai_functions.cjs"));
|
|
@@ -91,6 +91,7 @@ exports.experimental__prompts__custom_format = __importStar(require("../experime
|
|
|
91
91
|
exports.evaluation = __importStar(require("../evaluation/index.cjs"));
|
|
92
92
|
exports.smith = __importStar(require("../smith/index.cjs"));
|
|
93
93
|
exports.runnables__remote = __importStar(require("../runnables/remote.cjs"));
|
|
94
|
+
exports.indexes = __importStar(require("../indexes/index.cjs"));
|
|
94
95
|
const openai_1 = require("@langchain/openai");
|
|
95
96
|
const prompts_1 = require("@langchain/core/prompts");
|
|
96
97
|
const messages_1 = require("@langchain/core/messages");
|
|
@@ -63,6 +63,7 @@ export * as experimental__prompts__custom_format from "../experimental/prompts/c
|
|
|
63
63
|
export * as evaluation from "../evaluation/index.js";
|
|
64
64
|
export * as smith from "../smith/index.js";
|
|
65
65
|
export * as runnables__remote from "../runnables/remote.js";
|
|
66
|
+
export * as indexes from "../indexes/index.js";
|
|
66
67
|
import { ChatOpenAI, OpenAI, OpenAIEmbeddings } from "@langchain/openai";
|
|
67
68
|
import { PromptTemplate, AIMessagePromptTemplate, ChatMessagePromptTemplate, ChatPromptTemplate, HumanMessagePromptTemplate, MessagesPlaceholder, SystemMessagePromptTemplate, PipelinePromptTemplate } from "@langchain/core/prompts";
|
|
68
69
|
import { AIMessage, AIMessageChunk, BaseMessage, BaseMessageChunk, ChatMessage, ChatMessageChunk, FunctionMessage, FunctionMessageChunk, HumanMessage, HumanMessageChunk, SystemMessage, SystemMessageChunk, ToolMessage, ToolMessageChunk } from "@langchain/core/messages";
|
package/dist/load/import_map.js
CHANGED
|
@@ -64,6 +64,7 @@ export * as experimental__prompts__custom_format from "../experimental/prompts/c
|
|
|
64
64
|
export * as evaluation from "../evaluation/index.js";
|
|
65
65
|
export * as smith from "../smith/index.js";
|
|
66
66
|
export * as runnables__remote from "../runnables/remote.js";
|
|
67
|
+
export * as indexes from "../indexes/index.js";
|
|
67
68
|
import { ChatOpenAI, OpenAI, OpenAIEmbeddings } from "@langchain/openai";
|
|
68
69
|
import { PromptTemplate, AIMessagePromptTemplate, ChatMessagePromptTemplate, ChatPromptTemplate, HumanMessagePromptTemplate, MessagesPlaceholder, SystemMessagePromptTemplate, PipelinePromptTemplate } from "@langchain/core/prompts";
|
|
69
70
|
import { AIMessage, AIMessageChunk, BaseMessage, BaseMessageChunk, ChatMessage, ChatMessageChunk, FunctionMessage, FunctionMessageChunk, HumanMessage, HumanMessageChunk, SystemMessage, SystemMessageChunk, ToolMessage, ToolMessageChunk } from "@langchain/core/messages";
|
package/dist/smith/config.d.ts
CHANGED
|
@@ -15,18 +15,19 @@ export type EvaluatorInputFormatter = ({ rawInput, rawPrediction, rawReferenceOu
|
|
|
15
15
|
rawReferenceOutput?: any;
|
|
16
16
|
run: Run;
|
|
17
17
|
}) => EvaluatorInputs;
|
|
18
|
+
export type DynamicRunEvaluatorParams = {
|
|
19
|
+
input: Record<string, unknown>;
|
|
20
|
+
prediction?: Record<string, unknown>;
|
|
21
|
+
reference?: Record<string, unknown>;
|
|
22
|
+
run: Run;
|
|
23
|
+
example?: Example;
|
|
24
|
+
};
|
|
18
25
|
/**
|
|
19
26
|
* Type of a function that can be coerced into a RunEvaluator function.
|
|
20
27
|
* While we have the class-based RunEvaluator, it's often more convenient to directly
|
|
21
28
|
* pass a function to the runner. This type allows us to do that.
|
|
22
29
|
*/
|
|
23
|
-
export type RunEvaluatorLike = ((
|
|
24
|
-
run: Run;
|
|
25
|
-
example?: Example;
|
|
26
|
-
}) => Promise<EvaluationResult>) | (({ run, example }: {
|
|
27
|
-
run: Run;
|
|
28
|
-
example?: Example;
|
|
29
|
-
}) => EvaluationResult);
|
|
30
|
+
export type RunEvaluatorLike = ((props: DynamicRunEvaluatorParams) => Promise<EvaluationResult>) | ((props: DynamicRunEvaluatorParams) => EvaluationResult);
|
|
30
31
|
/**
|
|
31
32
|
* Configuration class for running evaluations on datasets.
|
|
32
33
|
*
|
|
@@ -51,10 +52,26 @@ export type RunEvalConfig<T extends keyof EvaluatorType = keyof EvaluatorType, U
|
|
|
51
52
|
*/
|
|
52
53
|
evaluators?: (T | EvalConfig)[];
|
|
53
54
|
/**
|
|
54
|
-
* Convert the evaluation data into
|
|
55
|
-
*
|
|
56
|
-
* and
|
|
57
|
-
*
|
|
55
|
+
* Convert the evaluation data into formats that can be used by the evaluator.
|
|
56
|
+
* This should most commonly be a string.
|
|
57
|
+
* Parameters are the raw input from the run, the raw output, raw reference output, and the raw run.
|
|
58
|
+
* @example
|
|
59
|
+
* ```ts
|
|
60
|
+
* // Chain input: { input: "some string" }
|
|
61
|
+
* // Chain output: { output: "some output" }
|
|
62
|
+
* // Reference example output format: { output: "some reference output" }
|
|
63
|
+
* const formatEvaluatorInputs = ({
|
|
64
|
+
* rawInput,
|
|
65
|
+
* rawPrediction,
|
|
66
|
+
* rawReferenceOutput,
|
|
67
|
+
* }) => {
|
|
68
|
+
* return {
|
|
69
|
+
* input: rawInput.input,
|
|
70
|
+
* prediction: rawPrediction.output,
|
|
71
|
+
* reference: rawReferenceOutput.output,
|
|
72
|
+
* };
|
|
73
|
+
* };
|
|
74
|
+
* ```
|
|
58
75
|
* @returns The prepared data.
|
|
59
76
|
*/
|
|
60
77
|
formatEvaluatorInputs?: EvaluatorInputFormatter;
|
|
@@ -76,8 +93,26 @@ export interface EvalConfig extends LoadEvaluatorOptions {
|
|
|
76
93
|
*/
|
|
77
94
|
feedbackKey?: string;
|
|
78
95
|
/**
|
|
79
|
-
* Convert the evaluation data into
|
|
80
|
-
*
|
|
96
|
+
* Convert the evaluation data into formats that can be used by the evaluator.
|
|
97
|
+
* This should most commonly be a string.
|
|
98
|
+
* Parameters are the raw input from the run, the raw output, raw reference output, and the raw run.
|
|
99
|
+
* @example
|
|
100
|
+
* ```ts
|
|
101
|
+
* // Chain input: { input: "some string" }
|
|
102
|
+
* // Chain output: { output: "some output" }
|
|
103
|
+
* // Reference example output format: { output: "some reference output" }
|
|
104
|
+
* const formatEvaluatorInputs = ({
|
|
105
|
+
* rawInput,
|
|
106
|
+
* rawPrediction,
|
|
107
|
+
* rawReferenceOutput,
|
|
108
|
+
* }) => {
|
|
109
|
+
* return {
|
|
110
|
+
* input: rawInput.input,
|
|
111
|
+
* prediction: rawPrediction.output,
|
|
112
|
+
* reference: rawReferenceOutput.output,
|
|
113
|
+
* };
|
|
114
|
+
* };
|
|
115
|
+
* ```
|
|
81
116
|
* @returns The prepared data.
|
|
82
117
|
*/
|
|
83
118
|
formatEvaluatorInputs: EvaluatorInputFormatter;
|
|
@@ -91,16 +126,21 @@ export interface EvalConfig extends LoadEvaluatorOptions {
|
|
|
91
126
|
* @returns The configuration for the evaluator.
|
|
92
127
|
* @example
|
|
93
128
|
* ```ts
|
|
94
|
-
* const evalConfig =
|
|
95
|
-
*
|
|
96
|
-
*
|
|
129
|
+
* const evalConfig = {
|
|
130
|
+
* evaluators: [{
|
|
131
|
+
* evaluatorType: "criteria",
|
|
132
|
+
* criteria: "helpfulness"
|
|
133
|
+
* }]
|
|
134
|
+
* };
|
|
97
135
|
* ```
|
|
98
136
|
* @example
|
|
99
137
|
* ```ts
|
|
100
|
-
* const evalConfig =
|
|
101
|
-
* [
|
|
102
|
-
*
|
|
103
|
-
*
|
|
138
|
+
* const evalConfig = {
|
|
139
|
+
* evaluators: [{
|
|
140
|
+
* evaluatorType: "criteria",
|
|
141
|
+
* criteria: { "isCompliant": "Does the submission comply with the requirements of XYZ"
|
|
142
|
+
* }]
|
|
143
|
+
* };
|
|
104
144
|
*/
|
|
105
145
|
export type CriteriaEvalChainConfig = EvalConfig & {
|
|
106
146
|
evaluatorType: "criteria";
|
|
@@ -133,16 +173,21 @@ export type CriteriaEvalChainConfig = EvalConfig & {
|
|
|
133
173
|
* @returns The configuration for the evaluator.
|
|
134
174
|
* @example
|
|
135
175
|
* ```ts
|
|
136
|
-
* const evalConfig =
|
|
137
|
-
*
|
|
138
|
-
*
|
|
176
|
+
* const evalConfig = {
|
|
177
|
+
* evaluators: [{
|
|
178
|
+
* evaluatorType: "labeled_criteria",
|
|
179
|
+
* criteria: "correctness"
|
|
180
|
+
* }],
|
|
181
|
+
* };
|
|
139
182
|
* ```
|
|
140
183
|
* @example
|
|
141
184
|
* ```ts
|
|
142
|
-
* const evalConfig =
|
|
143
|
-
* [
|
|
144
|
-
*
|
|
145
|
-
*
|
|
185
|
+
* const evalConfig = {
|
|
186
|
+
* evaluators: [{
|
|
187
|
+
* evaluatorType: "labeled_criteria",
|
|
188
|
+
* criteria: { "mentionsAllFacts": "Does the include all facts provided in the reference?" }
|
|
189
|
+
* }],
|
|
190
|
+
* };
|
|
146
191
|
*/
|
|
147
192
|
export type LabeledCriteria = EvalConfig & {
|
|
148
193
|
evaluatorType: "labeled_criteria";
|
package/dist/smith/index.cjs
CHANGED
|
@@ -1,5 +1,20 @@
|
|
|
1
1
|
"use strict";
|
|
2
|
+
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
3
|
+
if (k2 === undefined) k2 = k;
|
|
4
|
+
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
5
|
+
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
6
|
+
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
7
|
+
}
|
|
8
|
+
Object.defineProperty(o, k2, desc);
|
|
9
|
+
}) : (function(o, m, k, k2) {
|
|
10
|
+
if (k2 === undefined) k2 = k;
|
|
11
|
+
o[k2] = m[k];
|
|
12
|
+
}));
|
|
13
|
+
var __exportStar = (this && this.__exportStar) || function(m, exports) {
|
|
14
|
+
for (var p in m) if (p !== "default" && !Object.prototype.hasOwnProperty.call(exports, p)) __createBinding(exports, m, p);
|
|
15
|
+
};
|
|
2
16
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
17
|
exports.runOnDataset = void 0;
|
|
4
18
|
const runner_utils_js_1 = require("./runner_utils.cjs");
|
|
5
19
|
Object.defineProperty(exports, "runOnDataset", { enumerable: true, get: function () { return runner_utils_js_1.runOnDataset; } });
|
|
20
|
+
__exportStar(require("./config.cjs"), exports);
|
package/dist/smith/index.d.ts
CHANGED
|
@@ -1,3 +1,3 @@
|
|
|
1
|
-
import type
|
|
2
|
-
|
|
3
|
-
export
|
|
1
|
+
import { type EvalResults, type RunOnDatasetParams, runOnDataset } from "./runner_utils.js";
|
|
2
|
+
export { type EvalResults, type RunOnDatasetParams, runOnDataset };
|
|
3
|
+
export * from "./config.js";
|
package/dist/smith/index.js
CHANGED
|
@@ -29,9 +29,16 @@ class DynamicRunEvaluator {
|
|
|
29
29
|
* @returns A promise that resolves to the evaluation result.
|
|
30
30
|
*/
|
|
31
31
|
async evaluateRun(run, example) {
|
|
32
|
-
return await this.evaluator.invoke({
|
|
32
|
+
return await this.evaluator.invoke({
|
|
33
|
+
run,
|
|
34
|
+
example,
|
|
35
|
+
input: run.inputs,
|
|
36
|
+
prediction: run.outputs,
|
|
37
|
+
reference: example?.outputs,
|
|
38
|
+
});
|
|
33
39
|
}
|
|
34
40
|
}
|
|
41
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
35
42
|
function isLLMStringEvaluator(evaluator) {
|
|
36
43
|
return evaluator && typeof evaluator.evaluateStrings === "function";
|
|
37
44
|
}
|
|
@@ -254,7 +261,7 @@ const getExamplesInputs = (examples, chainOrFactory, dataType) => {
|
|
|
254
261
|
* for evaluation.
|
|
255
262
|
*
|
|
256
263
|
* @param options - (Optional) Additional parameters for the evaluation process:
|
|
257
|
-
* - `
|
|
264
|
+
* - `evaluationConfig` (RunEvalConfig): Configuration for the evaluation, including
|
|
258
265
|
* standard and custom evaluators.
|
|
259
266
|
* - `projectName` (string): Name of the project for logging and tracking.
|
|
260
267
|
* - `projectMetadata` (Record<string, unknown>): Additional metadata for the project.
|
|
@@ -273,10 +280,10 @@ const getExamplesInputs = (examples, chainOrFactory, dataType) => {
|
|
|
273
280
|
* const datasetName = 'example-dataset';
|
|
274
281
|
* const client = new Client(/* ...config... *\//);
|
|
275
282
|
*
|
|
276
|
-
* const evaluationConfig =
|
|
283
|
+
* const evaluationConfig = {
|
|
277
284
|
* evaluators: [/* ...evaluators... *\//],
|
|
278
285
|
* customEvaluators: [/* ...custom evaluators... *\//],
|
|
279
|
-
* }
|
|
286
|
+
* };
|
|
280
287
|
*
|
|
281
288
|
* const results = await runOnDataset(chain, datasetName, {
|
|
282
289
|
* evaluationConfig,
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import { Runnable } from "@langchain/core/runnables";
|
|
2
2
|
import { Client, Feedback } from "langsmith";
|
|
3
|
-
import { RunEvalConfig } from "./config.js";
|
|
3
|
+
import type { RunEvalConfig } from "./config.js";
|
|
4
4
|
export type ChainOrFactory = Runnable | (() => Runnable) | ((obj: any) => any) | ((obj: any) => Promise<any>) | (() => (obj: unknown) => unknown) | (() => (obj: unknown) => Promise<unknown>);
|
|
5
5
|
export type RunOnDatasetParams = {
|
|
6
6
|
evaluationConfig?: RunEvalConfig;
|
|
@@ -35,7 +35,7 @@ export type EvalResults = {
|
|
|
35
35
|
* for evaluation.
|
|
36
36
|
*
|
|
37
37
|
* @param options - (Optional) Additional parameters for the evaluation process:
|
|
38
|
-
* - `
|
|
38
|
+
* - `evaluationConfig` (RunEvalConfig): Configuration for the evaluation, including
|
|
39
39
|
* standard and custom evaluators.
|
|
40
40
|
* - `projectName` (string): Name of the project for logging and tracking.
|
|
41
41
|
* - `projectMetadata` (Record<string, unknown>): Additional metadata for the project.
|
|
@@ -54,10 +54,10 @@ export type EvalResults = {
|
|
|
54
54
|
* const datasetName = 'example-dataset';
|
|
55
55
|
* const client = new Client(/* ...config... *\//);
|
|
56
56
|
*
|
|
57
|
-
* const evaluationConfig =
|
|
57
|
+
* const evaluationConfig = {
|
|
58
58
|
* evaluators: [/* ...evaluators... *\//],
|
|
59
59
|
* customEvaluators: [/* ...custom evaluators... *\//],
|
|
60
|
-
* }
|
|
60
|
+
* };
|
|
61
61
|
*
|
|
62
62
|
* const results = await runOnDataset(chain, datasetName, {
|
|
63
63
|
* evaluationConfig,
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import { mapStoredMessagesToChatMessages } from "@langchain/core/messages";
|
|
2
|
-
import { Runnable, RunnableLambda } from "@langchain/core/runnables";
|
|
2
|
+
import { Runnable, RunnableLambda, } from "@langchain/core/runnables";
|
|
3
3
|
import { RunCollectorCallbackHandler } from "@langchain/core/tracers/run_collector";
|
|
4
4
|
import { LangChainTracer } from "@langchain/core/tracers/tracer_langchain";
|
|
5
5
|
import { Client } from "langsmith";
|
|
@@ -26,9 +26,16 @@ class DynamicRunEvaluator {
|
|
|
26
26
|
* @returns A promise that resolves to the evaluation result.
|
|
27
27
|
*/
|
|
28
28
|
async evaluateRun(run, example) {
|
|
29
|
-
return await this.evaluator.invoke({
|
|
29
|
+
return await this.evaluator.invoke({
|
|
30
|
+
run,
|
|
31
|
+
example,
|
|
32
|
+
input: run.inputs,
|
|
33
|
+
prediction: run.outputs,
|
|
34
|
+
reference: example?.outputs,
|
|
35
|
+
});
|
|
30
36
|
}
|
|
31
37
|
}
|
|
38
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
32
39
|
function isLLMStringEvaluator(evaluator) {
|
|
33
40
|
return evaluator && typeof evaluator.evaluateStrings === "function";
|
|
34
41
|
}
|
|
@@ -251,7 +258,7 @@ const getExamplesInputs = (examples, chainOrFactory, dataType) => {
|
|
|
251
258
|
* for evaluation.
|
|
252
259
|
*
|
|
253
260
|
* @param options - (Optional) Additional parameters for the evaluation process:
|
|
254
|
-
* - `
|
|
261
|
+
* - `evaluationConfig` (RunEvalConfig): Configuration for the evaluation, including
|
|
255
262
|
* standard and custom evaluators.
|
|
256
263
|
* - `projectName` (string): Name of the project for logging and tracking.
|
|
257
264
|
* - `projectMetadata` (Record<string, unknown>): Additional metadata for the project.
|
|
@@ -270,10 +277,10 @@ const getExamplesInputs = (examples, chainOrFactory, dataType) => {
|
|
|
270
277
|
* const datasetName = 'example-dataset';
|
|
271
278
|
* const client = new Client(/* ...config... *\//);
|
|
272
279
|
*
|
|
273
|
-
* const evaluationConfig =
|
|
280
|
+
* const evaluationConfig = {
|
|
274
281
|
* evaluators: [/* ...evaluators... *\//],
|
|
275
282
|
* customEvaluators: [/* ...custom evaluators... *\//],
|
|
276
|
-
* }
|
|
283
|
+
* };
|
|
277
284
|
*
|
|
278
285
|
* const results = await runOnDataset(chain, datasetName, {
|
|
279
286
|
* evaluationConfig,
|
package/indexes.cjs
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
module.exports = require('./dist/indexes/index.cjs');
|
package/indexes.d.ts
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export * from './dist/indexes/index.js'
|
package/indexes.js
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export * from './dist/indexes/index.js'
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "langchain",
|
|
3
|
-
"version": "0.1.
|
|
3
|
+
"version": "0.1.7",
|
|
4
4
|
"description": "Typescript bindings for langchain",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"engines": {
|
|
@@ -874,6 +874,9 @@
|
|
|
874
874
|
"runnables/remote.cjs",
|
|
875
875
|
"runnables/remote.js",
|
|
876
876
|
"runnables/remote.d.ts",
|
|
877
|
+
"indexes.cjs",
|
|
878
|
+
"indexes.js",
|
|
879
|
+
"indexes.d.ts",
|
|
877
880
|
"index.cjs",
|
|
878
881
|
"index.js",
|
|
879
882
|
"index.d.ts"
|
|
@@ -1205,7 +1208,7 @@
|
|
|
1205
1208
|
},
|
|
1206
1209
|
"dependencies": {
|
|
1207
1210
|
"@anthropic-ai/sdk": "^0.9.1",
|
|
1208
|
-
"@langchain/community": "~0.0.
|
|
1211
|
+
"@langchain/community": "~0.0.20",
|
|
1209
1212
|
"@langchain/core": "~0.1.16",
|
|
1210
1213
|
"@langchain/openai": "~0.0.12",
|
|
1211
1214
|
"binary-extensions": "^2.2.0",
|
|
@@ -2681,6 +2684,11 @@
|
|
|
2681
2684
|
"import": "./runnables/remote.js",
|
|
2682
2685
|
"require": "./runnables/remote.cjs"
|
|
2683
2686
|
},
|
|
2687
|
+
"./indexes": {
|
|
2688
|
+
"types": "./indexes.d.ts",
|
|
2689
|
+
"import": "./indexes.js",
|
|
2690
|
+
"require": "./indexes.cjs"
|
|
2691
|
+
},
|
|
2684
2692
|
"./package.json": "./package.json"
|
|
2685
2693
|
}
|
|
2686
2694
|
}
|