langchain 0.1.6 → 0.1.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. package/dist/agents/react/prompt.cjs +1 -1
  2. package/dist/agents/react/prompt.js +1 -1
  3. package/dist/agents/toolkits/conversational_retrieval/openai_functions.cjs +3 -1
  4. package/dist/agents/toolkits/conversational_retrieval/openai_functions.d.ts +1 -0
  5. package/dist/agents/toolkits/conversational_retrieval/openai_functions.js +3 -1
  6. package/dist/chains/constitutional_ai/constitutional_principle.js +1 -1
  7. package/dist/document_loaders/web/github.cjs +1 -1
  8. package/dist/document_loaders/web/github.js +1 -1
  9. package/dist/evaluation/comparison/pairwise.cjs +1 -1
  10. package/dist/evaluation/comparison/pairwise.js +1 -1
  11. package/dist/evaluation/criteria/criteria.cjs +1 -1
  12. package/dist/evaluation/criteria/criteria.js +1 -1
  13. package/dist/indexes/index.cjs +5 -0
  14. package/dist/indexes/index.d.ts +1 -0
  15. package/dist/indexes/index.js +1 -0
  16. package/dist/indexes/indexing.cjs +265 -0
  17. package/dist/indexes/indexing.d.ts +75 -0
  18. package/dist/indexes/indexing.js +261 -0
  19. package/dist/load/import_map.cjs +2 -1
  20. package/dist/load/import_map.d.ts +1 -0
  21. package/dist/load/import_map.js +1 -0
  22. package/dist/smith/config.d.ts +76 -26
  23. package/dist/smith/index.cjs +15 -0
  24. package/dist/smith/index.d.ts +3 -3
  25. package/dist/smith/index.js +2 -1
  26. package/dist/smith/runner_utils.cjs +56 -6
  27. package/dist/smith/runner_utils.d.ts +4 -4
  28. package/dist/smith/runner_utils.js +57 -7
  29. package/indexes.cjs +1 -0
  30. package/indexes.d.ts +1 -0
  31. package/indexes.js +1 -0
  32. package/package.json +28 -11
  33. package/tools/chain.cjs +1 -0
  34. package/tools/chain.d.ts +1 -0
  35. package/tools/chain.js +1 -0
  36. package/index.cjs +0 -1
  37. package/index.d.ts +0 -1
  38. package/index.js +0 -1
@@ -0,0 +1,261 @@
1
+ import { v5 as uuidv5 } from "uuid";
2
+ import { UUIDV5_NAMESPACE, } from "@langchain/community/indexes/base";
3
+ import { insecureHash } from "@langchain/core/utils/hash";
4
+ import { Document } from "../document.js";
5
+ /**
6
+ * HashedDocument is a Document with hashes calculated.
7
+ * Hashes are calculated based on page content and metadata.
8
+ * It is used for indexing.
9
+ */
10
+ class HashedDocument {
11
+ constructor(fields) {
12
+ Object.defineProperty(this, "uid", {
13
+ enumerable: true,
14
+ configurable: true,
15
+ writable: true,
16
+ value: void 0
17
+ });
18
+ Object.defineProperty(this, "hash_", {
19
+ enumerable: true,
20
+ configurable: true,
21
+ writable: true,
22
+ value: void 0
23
+ });
24
+ Object.defineProperty(this, "contentHash", {
25
+ enumerable: true,
26
+ configurable: true,
27
+ writable: true,
28
+ value: void 0
29
+ });
30
+ Object.defineProperty(this, "metadataHash", {
31
+ enumerable: true,
32
+ configurable: true,
33
+ writable: true,
34
+ value: void 0
35
+ });
36
+ Object.defineProperty(this, "pageContent", {
37
+ enumerable: true,
38
+ configurable: true,
39
+ writable: true,
40
+ value: void 0
41
+ });
42
+ Object.defineProperty(this, "metadata", {
43
+ enumerable: true,
44
+ configurable: true,
45
+ writable: true,
46
+ value: void 0
47
+ });
48
+ this.uid = fields.uid;
49
+ this.pageContent = fields.pageContent;
50
+ this.metadata = fields.metadata;
51
+ }
52
+ calculateHashes() {
53
+ const forbiddenKeys = ["hash_", "content_hash", "metadata_hash"];
54
+ for (const key of forbiddenKeys) {
55
+ if (key in this.metadata) {
56
+ throw new Error(`Metadata cannot contain key ${key} as it is reserved for internal use. Restricted keys: [${forbiddenKeys.join(", ")}]`);
57
+ }
58
+ }
59
+ const contentHash = this._hashStringToUUID(this.pageContent);
60
+ try {
61
+ const metadataHash = this._hashNestedDictToUUID(this.metadata);
62
+ this.contentHash = contentHash;
63
+ this.metadataHash = metadataHash;
64
+ }
65
+ catch (e) {
66
+ throw new Error(`Failed to hash metadata: ${e}. Please use a dict that can be serialized using json.`);
67
+ }
68
+ this.hash_ = this._hashStringToUUID(this.contentHash + this.metadataHash);
69
+ if (!this.uid) {
70
+ this.uid = this.hash_;
71
+ }
72
+ }
73
+ toDocument() {
74
+ return new Document({
75
+ pageContent: this.pageContent,
76
+ metadata: this.metadata,
77
+ });
78
+ }
79
+ static fromDocument(document, uid) {
80
+ const doc = new this({
81
+ pageContent: document.pageContent,
82
+ metadata: document.metadata,
83
+ uid: uid || document.uid,
84
+ });
85
+ doc.calculateHashes();
86
+ return doc;
87
+ }
88
+ _hashStringToUUID(inputString) {
89
+ const hash_value = insecureHash(inputString);
90
+ return uuidv5(hash_value, UUIDV5_NAMESPACE);
91
+ }
92
+ _hashNestedDictToUUID(data) {
93
+ const serialized_data = JSON.stringify(data, Object.keys(data).sort());
94
+ const hash_value = insecureHash(serialized_data);
95
+ return uuidv5(hash_value, UUIDV5_NAMESPACE);
96
+ }
97
+ }
98
+ function batch(size, iterable) {
99
+ const batches = [];
100
+ let currentBatch = [];
101
+ iterable.forEach((item) => {
102
+ currentBatch.push(item);
103
+ if (currentBatch.length >= size) {
104
+ batches.push(currentBatch);
105
+ currentBatch = [];
106
+ }
107
+ });
108
+ if (currentBatch.length > 0) {
109
+ batches.push(currentBatch);
110
+ }
111
+ return batches;
112
+ }
113
+ function deduplicateInOrder(hashedDocuments) {
114
+ const seen = new Set();
115
+ const deduplicated = [];
116
+ for (const hashedDoc of hashedDocuments) {
117
+ if (!hashedDoc.hash_) {
118
+ throw new Error("Hashed document does not have a hash");
119
+ }
120
+ if (!seen.has(hashedDoc.hash_)) {
121
+ seen.add(hashedDoc.hash_);
122
+ deduplicated.push(hashedDoc);
123
+ }
124
+ }
125
+ return deduplicated;
126
+ }
127
+ function getSourceIdAssigner(sourceIdKey) {
128
+ if (sourceIdKey === null) {
129
+ return (_doc) => null;
130
+ }
131
+ else if (typeof sourceIdKey === "string") {
132
+ return (doc) => doc.metadata[sourceIdKey];
133
+ }
134
+ else if (typeof sourceIdKey === "function") {
135
+ return sourceIdKey;
136
+ }
137
+ else {
138
+ throw new Error(`sourceIdKey should be null, a string or a function, got ${typeof sourceIdKey}`);
139
+ }
140
+ }
141
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
142
+ const _isBaseDocumentLoader = (arg) => {
143
+ if ("load" in arg &&
144
+ typeof arg.load === "function" &&
145
+ "loadAndSplit" in arg &&
146
+ typeof arg.loadAndSplit === "function") {
147
+ return true;
148
+ }
149
+ return false;
150
+ };
151
+ /**
152
+ * Index data from the doc source into the vector store.
153
+ *
154
+ * Indexing functionality uses a manager to keep track of which documents
155
+ * are in the vector store.
156
+ *
157
+ * This allows us to keep track of which documents were updated, and which
158
+ * documents were deleted, which documents should be skipped.
159
+ *
160
+ * For the time being, documents are indexed using their hashes, and users
161
+ * are not able to specify the uid of the document.
162
+ *
163
+ * @param {IndexArgs} args
164
+ * @param {BaseDocumentLoader | DocumentInterface[]} args.docsSource The source of documents to index. Can be a DocumentLoader or a list of Documents.
165
+ * @param {RecordManagerInterface} args.recordManager The record manager to use for keeping track of indexed documents.
166
+ * @param {VectorStore} args.vectorStore The vector store to use for storing the documents.
167
+ * @param {IndexOptions | undefined} args.options Options for indexing.
168
+ * @returns {Promise<IndexingResult>}
169
+ */
170
+ export async function index(args) {
171
+ const { docsSource, recordManager, vectorStore, options } = args;
172
+ const { batchSize = 100, cleanup, sourceIdKey, cleanupBatchSize = 1000, forceUpdate = false, } = options ?? {};
173
+ if (cleanup === "incremental" && !sourceIdKey) {
174
+ throw new Error("sourceIdKey is required when cleanup mode is incremental. Please provide through 'options.sourceIdKey'.");
175
+ }
176
+ const docs = _isBaseDocumentLoader(docsSource)
177
+ ? await docsSource.load()
178
+ : docsSource;
179
+ const sourceIdAssigner = getSourceIdAssigner(sourceIdKey ?? null);
180
+ const indexStartDt = await recordManager.getTime();
181
+ let numAdded = 0;
182
+ let numDeleted = 0;
183
+ let numUpdated = 0;
184
+ let numSkipped = 0;
185
+ const batches = batch(batchSize ?? 100, docs);
186
+ for (const batch of batches) {
187
+ const hashedDocs = deduplicateInOrder(batch.map((doc) => HashedDocument.fromDocument(doc)));
188
+ const sourceIds = hashedDocs.map((doc) => sourceIdAssigner(doc));
189
+ if (cleanup === "incremental") {
190
+ hashedDocs.forEach((_hashedDoc, index) => {
191
+ const source = sourceIds[index];
192
+ if (source === null) {
193
+ throw new Error("sourceIdKey must be provided when cleanup is incremental");
194
+ }
195
+ });
196
+ }
197
+ const batchExists = await recordManager.exists(hashedDocs.map((doc) => doc.uid));
198
+ const uids = [];
199
+ const docsToIndex = [];
200
+ const docsToUpdate = [];
201
+ const seenDocs = new Set();
202
+ hashedDocs.forEach((hashedDoc, i) => {
203
+ const docExists = batchExists[i];
204
+ if (docExists) {
205
+ if (forceUpdate) {
206
+ seenDocs.add(hashedDoc.uid);
207
+ }
208
+ else {
209
+ docsToUpdate.push(hashedDoc.uid);
210
+ return;
211
+ }
212
+ }
213
+ uids.push(hashedDoc.uid);
214
+ docsToIndex.push(hashedDoc.toDocument());
215
+ });
216
+ if (docsToUpdate.length > 0) {
217
+ await recordManager.update(docsToUpdate, { timeAtLeast: indexStartDt });
218
+ numSkipped += docsToUpdate.length;
219
+ }
220
+ if (docsToIndex.length > 0) {
221
+ await vectorStore.addDocuments(docsToIndex, { ids: uids });
222
+ numAdded += docsToIndex.length - seenDocs.size;
223
+ numUpdated += seenDocs.size;
224
+ }
225
+ await recordManager.update(hashedDocs.map((doc) => doc.uid), { timeAtLeast: indexStartDt, groupIds: sourceIds });
226
+ if (cleanup === "incremental") {
227
+ sourceIds.forEach((sourceId) => {
228
+ if (!sourceId)
229
+ throw new Error("Source id cannot be null");
230
+ });
231
+ const uidsToDelete = await recordManager.listKeys({
232
+ before: indexStartDt,
233
+ groupIds: sourceIds,
234
+ });
235
+ await vectorStore.delete({ ids: uidsToDelete });
236
+ await recordManager.deleteKeys(uidsToDelete);
237
+ numDeleted += uidsToDelete.length;
238
+ }
239
+ }
240
+ if (cleanup === "full") {
241
+ let uidsToDelete = await recordManager.listKeys({
242
+ before: indexStartDt,
243
+ limit: cleanupBatchSize,
244
+ });
245
+ while (uidsToDelete.length > 0) {
246
+ await vectorStore.delete({ ids: uidsToDelete });
247
+ await recordManager.deleteKeys(uidsToDelete);
248
+ numDeleted += uidsToDelete.length;
249
+ uidsToDelete = await recordManager.listKeys({
250
+ before: indexStartDt,
251
+ limit: cleanupBatchSize,
252
+ });
253
+ }
254
+ }
255
+ return {
256
+ numAdded,
257
+ numDeleted,
258
+ numUpdated,
259
+ numSkipped,
260
+ };
261
+ }
@@ -25,7 +25,7 @@ var __importStar = (this && this.__importStar) || function (mod) {
25
25
  };
26
26
  Object.defineProperty(exports, "__esModule", { value: true });
27
27
  exports.util__document = exports.storage__in_memory = exports.storage__encoder_backed = exports.stores__message__in_memory = exports.stores__file__in_memory = exports.stores__doc__in_memory = exports.retrievers__vespa = exports.retrievers__score_threshold = exports.retrievers__hyde = exports.retrievers__document_compressors__embeddings_filter = exports.retrievers__document_compressors__chain_extract = exports.retrievers__time_weighted = exports.retrievers__parent_document = exports.retrievers__multi_vector = exports.retrievers__multi_query = exports.retrievers__document_compressors = exports.retrievers__contextual_compression = exports.retrievers__remote = exports.output_parsers = exports.schema__query_constructor = exports.schema__prompt_template = exports.chat_models__anthropic = exports.document_transformers__openai_functions = exports.document_loaders__web__sort_xyz_blockchain = exports.document_loaders__web__serpapi = exports.document_loaders__web__searchapi = exports.document_loaders__base = exports.text_splitter = exports.vectorstores__memory = exports.llms__fake = exports.embeddings__fake = exports.embeddings__cache_backed = exports.chains__retrieval = exports.chains__openai_functions = exports.chains__history_aware_retriever = exports.chains__combine_documents__reduce = exports.chains__combine_documents = exports.chains = exports.tools__retriever = exports.tools__render = exports.agents__openai__output_parser = exports.agents__xml__output_parser = exports.agents__react__output_parser = exports.agents__format_scratchpad__log_to_message = exports.agents__format_scratchpad__xml = exports.agents__format_scratchpad__log = exports.agents__format_scratchpad__openai_tools = exports.agents__format_scratchpad = exports.agents__toolkits = exports.agents = void 0;
28
- exports.llms__fireworks = exports.chat_models__fireworks = exports.schema__output = exports.schema__output_parser = exports.schema__runnable = exports.prompts__base = exports.prompts__pipeline = exports.prompts__chat = exports.schema__messages = exports.prompts__prompt = exports.embeddings__openai = exports.llms__openai = exports.chat_models__openai = exports.runnables__remote = exports.smith = exports.evaluation = exports.experimental__prompts__custom_format = exports.experimental__masking = exports.experimental__chains__violation_of_expectations = exports.experimental__chat_models__bittensor = exports.experimental__plan_and_execute = exports.experimental__generative_agents = exports.experimental__babyagi = exports.experimental__openai_files = exports.experimental__openai_assistant = exports.experimental__autogpt = exports.util__time = exports.util__math = void 0;
28
+ exports.llms__fireworks = exports.chat_models__fireworks = exports.schema__output = exports.schema__output_parser = exports.schema__runnable = exports.prompts__base = exports.prompts__pipeline = exports.prompts__chat = exports.schema__messages = exports.prompts__prompt = exports.embeddings__openai = exports.llms__openai = exports.chat_models__openai = exports.indexes = exports.runnables__remote = exports.smith = exports.evaluation = exports.experimental__prompts__custom_format = exports.experimental__masking = exports.experimental__chains__violation_of_expectations = exports.experimental__chat_models__bittensor = exports.experimental__plan_and_execute = exports.experimental__generative_agents = exports.experimental__babyagi = exports.experimental__openai_files = exports.experimental__openai_assistant = exports.experimental__autogpt = exports.util__time = exports.util__math = void 0;
29
29
  exports.agents = __importStar(require("../agents/index.cjs"));
30
30
  exports.agents__toolkits = __importStar(require("../agents/toolkits/index.cjs"));
31
31
  exports.agents__format_scratchpad = __importStar(require("../agents/format_scratchpad/openai_functions.cjs"));
@@ -91,6 +91,7 @@ exports.experimental__prompts__custom_format = __importStar(require("../experime
91
91
  exports.evaluation = __importStar(require("../evaluation/index.cjs"));
92
92
  exports.smith = __importStar(require("../smith/index.cjs"));
93
93
  exports.runnables__remote = __importStar(require("../runnables/remote.cjs"));
94
+ exports.indexes = __importStar(require("../indexes/index.cjs"));
94
95
  const openai_1 = require("@langchain/openai");
95
96
  const prompts_1 = require("@langchain/core/prompts");
96
97
  const messages_1 = require("@langchain/core/messages");
@@ -63,6 +63,7 @@ export * as experimental__prompts__custom_format from "../experimental/prompts/c
63
63
  export * as evaluation from "../evaluation/index.js";
64
64
  export * as smith from "../smith/index.js";
65
65
  export * as runnables__remote from "../runnables/remote.js";
66
+ export * as indexes from "../indexes/index.js";
66
67
  import { ChatOpenAI, OpenAI, OpenAIEmbeddings } from "@langchain/openai";
67
68
  import { PromptTemplate, AIMessagePromptTemplate, ChatMessagePromptTemplate, ChatPromptTemplate, HumanMessagePromptTemplate, MessagesPlaceholder, SystemMessagePromptTemplate, PipelinePromptTemplate } from "@langchain/core/prompts";
68
69
  import { AIMessage, AIMessageChunk, BaseMessage, BaseMessageChunk, ChatMessage, ChatMessageChunk, FunctionMessage, FunctionMessageChunk, HumanMessage, HumanMessageChunk, SystemMessage, SystemMessageChunk, ToolMessage, ToolMessageChunk } from "@langchain/core/messages";
@@ -64,6 +64,7 @@ export * as experimental__prompts__custom_format from "../experimental/prompts/c
64
64
  export * as evaluation from "../evaluation/index.js";
65
65
  export * as smith from "../smith/index.js";
66
66
  export * as runnables__remote from "../runnables/remote.js";
67
+ export * as indexes from "../indexes/index.js";
67
68
  import { ChatOpenAI, OpenAI, OpenAIEmbeddings } from "@langchain/openai";
68
69
  import { PromptTemplate, AIMessagePromptTemplate, ChatMessagePromptTemplate, ChatPromptTemplate, HumanMessagePromptTemplate, MessagesPlaceholder, SystemMessagePromptTemplate, PipelinePromptTemplate } from "@langchain/core/prompts";
69
70
  import { AIMessage, AIMessageChunk, BaseMessage, BaseMessageChunk, ChatMessage, ChatMessageChunk, FunctionMessage, FunctionMessageChunk, HumanMessage, HumanMessageChunk, SystemMessage, SystemMessageChunk, ToolMessage, ToolMessageChunk } from "@langchain/core/messages";
@@ -1,4 +1,5 @@
1
1
  import { BaseLanguageModel } from "@langchain/core/language_models/base";
2
+ import { RunnableConfig } from "@langchain/core/runnables";
2
3
  import { Example, Run } from "langsmith";
3
4
  import { EvaluationResult, RunEvaluator } from "langsmith/evaluation";
4
5
  import { Criteria } from "../evaluation/index.js";
@@ -15,17 +16,22 @@ export type EvaluatorInputFormatter = ({ rawInput, rawPrediction, rawReferenceOu
15
16
  rawReferenceOutput?: any;
16
17
  run: Run;
17
18
  }) => EvaluatorInputs;
19
+ export type DynamicRunEvaluatorParams = {
20
+ input: Record<string, unknown>;
21
+ prediction?: Record<string, unknown>;
22
+ reference?: Record<string, unknown>;
23
+ run: Run;
24
+ example?: Example;
25
+ };
18
26
  /**
19
27
  * Type of a function that can be coerced into a RunEvaluator function.
20
28
  * While we have the class-based RunEvaluator, it's often more convenient to directly
21
29
  * pass a function to the runner. This type allows us to do that.
22
30
  */
23
- export type RunEvaluatorLike = (({ run, example, }: {
24
- run: Run;
25
- example?: Example;
26
- }) => Promise<EvaluationResult>) | (({ run, example }: {
27
- run: Run;
28
- example?: Example;
31
+ export type RunEvaluatorLike = ((props: DynamicRunEvaluatorParams, options?: {
32
+ config?: RunnableConfig;
33
+ }) => Promise<EvaluationResult>) | ((props: DynamicRunEvaluatorParams, options?: {
34
+ config?: RunnableConfig;
29
35
  }) => EvaluationResult);
30
36
  /**
31
37
  * Configuration class for running evaluations on datasets.
@@ -51,10 +57,26 @@ export type RunEvalConfig<T extends keyof EvaluatorType = keyof EvaluatorType, U
51
57
  */
52
58
  evaluators?: (T | EvalConfig)[];
53
59
  /**
54
- * Convert the evaluation data into a format that can be used by the evaluator.
55
- * By default, we pass the first value of the run.inputs, run.outputs (predictions),
56
- * and references (example.outputs)
57
- *
60
+ * Convert the evaluation data into formats that can be used by the evaluator.
61
+ * This should most commonly be a string.
62
+ * Parameters are the raw input from the run, the raw output, raw reference output, and the raw run.
63
+ * @example
64
+ * ```ts
65
+ * // Chain input: { input: "some string" }
66
+ * // Chain output: { output: "some output" }
67
+ * // Reference example output format: { output: "some reference output" }
68
+ * const formatEvaluatorInputs = ({
69
+ * rawInput,
70
+ * rawPrediction,
71
+ * rawReferenceOutput,
72
+ * }) => {
73
+ * return {
74
+ * input: rawInput.input,
75
+ * prediction: rawPrediction.output,
76
+ * reference: rawReferenceOutput.output,
77
+ * };
78
+ * };
79
+ * ```
58
80
  * @returns The prepared data.
59
81
  */
60
82
  formatEvaluatorInputs?: EvaluatorInputFormatter;
@@ -76,8 +98,26 @@ export interface EvalConfig extends LoadEvaluatorOptions {
76
98
  */
77
99
  feedbackKey?: string;
78
100
  /**
79
- * Convert the evaluation data into a format that can be used by the evaluator.
80
- * @param data The data to prepare.
101
+ * Convert the evaluation data into formats that can be used by the evaluator.
102
+ * This should most commonly be a string.
103
+ * Parameters are the raw input from the run, the raw output, raw reference output, and the raw run.
104
+ * @example
105
+ * ```ts
106
+ * // Chain input: { input: "some string" }
107
+ * // Chain output: { output: "some output" }
108
+ * // Reference example output format: { output: "some reference output" }
109
+ * const formatEvaluatorInputs = ({
110
+ * rawInput,
111
+ * rawPrediction,
112
+ * rawReferenceOutput,
113
+ * }) => {
114
+ * return {
115
+ * input: rawInput.input,
116
+ * prediction: rawPrediction.output,
117
+ * reference: rawReferenceOutput.output,
118
+ * };
119
+ * };
120
+ * ```
81
121
  * @returns The prepared data.
82
122
  */
83
123
  formatEvaluatorInputs: EvaluatorInputFormatter;
@@ -91,16 +131,21 @@ export interface EvalConfig extends LoadEvaluatorOptions {
91
131
  * @returns The configuration for the evaluator.
92
132
  * @example
93
133
  * ```ts
94
- * const evalConfig = new RunEvalConfig(
95
- * [new RunEvalConfig.Criteria("helpfulness")],
96
- * );
134
+ * const evalConfig = {
135
+ * evaluators: [{
136
+ * evaluatorType: "criteria",
137
+ * criteria: "helpfulness"
138
+ * }]
139
+ * };
97
140
  * ```
98
141
  * @example
99
142
  * ```ts
100
- * const evalConfig = new RunEvalConfig(
101
- * [new RunEvalConfig.Criteria(
102
- * { "isCompliant": "Does the submission comply with the requirements of XYZ"
103
- * })],
143
+ * const evalConfig = {
144
+ * evaluators: [{
145
+ * evaluatorType: "criteria",
146
+ * criteria: { "isCompliant": "Does the submission comply with the requirements of XYZ"
147
+ * }]
148
+ * };
104
149
  */
105
150
  export type CriteriaEvalChainConfig = EvalConfig & {
106
151
  evaluatorType: "criteria";
@@ -133,16 +178,21 @@ export type CriteriaEvalChainConfig = EvalConfig & {
133
178
  * @returns The configuration for the evaluator.
134
179
  * @example
135
180
  * ```ts
136
- * const evalConfig = new RunEvalConfig(
137
- * [new RunEvalConfig.LabeledCriteria("correctness")],
138
- * );
181
+ * const evalConfig = {
182
+ * evaluators: [{
183
+ * evaluatorType: "labeled_criteria",
184
+ * criteria: "correctness"
185
+ * }],
186
+ * };
139
187
  * ```
140
188
  * @example
141
189
  * ```ts
142
- * const evalConfig = new RunEvalConfig(
143
- * [new RunEvalConfig.Criteria(
144
- * { "mentionsAllFacts": "Does the include all facts provided in the reference?"
145
- * })],
190
+ * const evalConfig = {
191
+ * evaluators: [{
192
+ * evaluatorType: "labeled_criteria",
193
+ * criteria: { "mentionsAllFacts": "Does the include all facts provided in the reference?" }
194
+ * }],
195
+ * };
146
196
  */
147
197
  export type LabeledCriteria = EvalConfig & {
148
198
  evaluatorType: "labeled_criteria";
@@ -1,5 +1,20 @@
1
1
  "use strict";
2
+ var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
3
+ if (k2 === undefined) k2 = k;
4
+ var desc = Object.getOwnPropertyDescriptor(m, k);
5
+ if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
6
+ desc = { enumerable: true, get: function() { return m[k]; } };
7
+ }
8
+ Object.defineProperty(o, k2, desc);
9
+ }) : (function(o, m, k, k2) {
10
+ if (k2 === undefined) k2 = k;
11
+ o[k2] = m[k];
12
+ }));
13
+ var __exportStar = (this && this.__exportStar) || function(m, exports) {
14
+ for (var p in m) if (p !== "default" && !Object.prototype.hasOwnProperty.call(exports, p)) __createBinding(exports, m, p);
15
+ };
2
16
  Object.defineProperty(exports, "__esModule", { value: true });
3
17
  exports.runOnDataset = void 0;
4
18
  const runner_utils_js_1 = require("./runner_utils.cjs");
5
19
  Object.defineProperty(exports, "runOnDataset", { enumerable: true, get: function () { return runner_utils_js_1.runOnDataset; } });
20
+ __exportStar(require("./config.cjs"), exports);
@@ -1,3 +1,3 @@
1
- import type { RunEvalConfig } from "./config.js";
2
- import { runOnDataset } from "./runner_utils.js";
3
- export { type RunEvalConfig, runOnDataset };
1
+ import { type EvalResults, type RunOnDatasetParams, runOnDataset } from "./runner_utils.js";
2
+ export { type EvalResults, type RunOnDatasetParams, runOnDataset };
3
+ export * from "./config.js";
@@ -1,2 +1,3 @@
1
- import { runOnDataset } from "./runner_utils.js";
1
+ import { runOnDataset, } from "./runner_utils.js";
2
2
  export { runOnDataset };
3
+ export * from "./config.js";
@@ -9,6 +9,36 @@ const langsmith_1 = require("langsmith");
9
9
  const loader_js_1 = require("../evaluation/loader.cjs");
10
10
  const name_generation_js_1 = require("./name_generation.cjs");
11
11
  const progress_js_1 = require("./progress.cjs");
12
+ class RunIdExtractor {
13
+ constructor() {
14
+ Object.defineProperty(this, "runIdPromiseResolver", {
15
+ enumerable: true,
16
+ configurable: true,
17
+ writable: true,
18
+ value: void 0
19
+ });
20
+ Object.defineProperty(this, "runIdPromise", {
21
+ enumerable: true,
22
+ configurable: true,
23
+ writable: true,
24
+ value: void 0
25
+ });
26
+ Object.defineProperty(this, "handleChainStart", {
27
+ enumerable: true,
28
+ configurable: true,
29
+ writable: true,
30
+ value: (_chain, _inputs, runId) => {
31
+ this.runIdPromiseResolver(runId);
32
+ }
33
+ });
34
+ this.runIdPromise = new Promise((extract) => {
35
+ this.runIdPromiseResolver = extract;
36
+ });
37
+ }
38
+ async extract() {
39
+ return this.runIdPromise;
40
+ }
41
+ }
12
42
  /**
13
43
  * Wraps an evaluator function + implements the RunEvaluator interface.
14
44
  */
@@ -26,12 +56,27 @@ class DynamicRunEvaluator {
26
56
  * Evaluates a run with an optional example and returns the evaluation result.
27
57
  * @param run The run to evaluate.
28
58
  * @param example The optional example to use for evaluation.
29
- * @returns A promise that resolves to the evaluation result.
59
+ * @returns A promise that extracts to the evaluation result.
30
60
  */
31
61
  async evaluateRun(run, example) {
32
- return await this.evaluator.invoke({ run, example });
62
+ const extractor = new RunIdExtractor();
63
+ const result = await this.evaluator.invoke({
64
+ run,
65
+ example,
66
+ input: run.inputs,
67
+ prediction: run.outputs,
68
+ reference: example?.outputs,
69
+ }, {
70
+ callbacks: [extractor],
71
+ });
72
+ const runId = await extractor.extract();
73
+ return {
74
+ sourceRunId: runId,
75
+ ...result,
76
+ };
33
77
  }
34
78
  }
79
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
35
80
  function isLLMStringEvaluator(evaluator) {
36
81
  return evaluator && typeof evaluator.evaluateStrings === "function";
37
82
  }
@@ -91,7 +136,7 @@ class PreparedRunEvaluator {
91
136
  * Evaluates a run with an optional example and returns the evaluation result.
92
137
  * @param run The run to evaluate.
93
138
  * @param example The optional example to use for evaluation.
94
- * @returns A promise that resolves to the evaluation result.
139
+ * @returns A promise that extracts to the evaluation result.
95
140
  */
96
141
  async evaluateRun(run, example) {
97
142
  const { prediction, input, reference } = this.formatEvaluatorInputs({
@@ -100,15 +145,20 @@ class PreparedRunEvaluator {
100
145
  rawReferenceOutput: example?.outputs,
101
146
  run,
102
147
  });
148
+ const extractor = new RunIdExtractor();
103
149
  if (this.isStringEvaluator) {
104
150
  const evalResult = await this.evaluator.evaluateStrings({
105
151
  prediction: prediction,
106
152
  reference: reference,
107
153
  input: input,
154
+ }, {
155
+ callbacks: [extractor],
108
156
  });
157
+ const runId = await extractor.extract();
109
158
  return {
110
159
  key: this.evaluationName,
111
160
  comment: evalResult?.reasoning,
161
+ sourceRunId: runId,
112
162
  ...evalResult,
113
163
  };
114
164
  }
@@ -254,7 +304,7 @@ const getExamplesInputs = (examples, chainOrFactory, dataType) => {
254
304
  * for evaluation.
255
305
  *
256
306
  * @param options - (Optional) Additional parameters for the evaluation process:
257
- * - `evaluation` (RunEvalConfig): Configuration for the evaluation, including
307
+ * - `evaluationConfig` (RunEvalConfig): Configuration for the evaluation, including
258
308
  * standard and custom evaluators.
259
309
  * - `projectName` (string): Name of the project for logging and tracking.
260
310
  * - `projectMetadata` (Record<string, unknown>): Additional metadata for the project.
@@ -273,10 +323,10 @@ const getExamplesInputs = (examples, chainOrFactory, dataType) => {
273
323
  * const datasetName = 'example-dataset';
274
324
  * const client = new Client(/* ...config... *\//);
275
325
  *
276
- * const evaluationConfig = new RunEvalConfig({
326
+ * const evaluationConfig = {
277
327
  * evaluators: [/* ...evaluators... *\//],
278
328
  * customEvaluators: [/* ...custom evaluators... *\//],
279
- * });
329
+ * };
280
330
  *
281
331
  * const results = await runOnDataset(chain, datasetName, {
282
332
  * evaluationConfig,
@@ -1,6 +1,6 @@
1
1
  import { Runnable } from "@langchain/core/runnables";
2
2
  import { Client, Feedback } from "langsmith";
3
- import { RunEvalConfig } from "./config.js";
3
+ import type { RunEvalConfig } from "./config.js";
4
4
  export type ChainOrFactory = Runnable | (() => Runnable) | ((obj: any) => any) | ((obj: any) => Promise<any>) | (() => (obj: unknown) => unknown) | (() => (obj: unknown) => Promise<unknown>);
5
5
  export type RunOnDatasetParams = {
6
6
  evaluationConfig?: RunEvalConfig;
@@ -35,7 +35,7 @@ export type EvalResults = {
35
35
  * for evaluation.
36
36
  *
37
37
  * @param options - (Optional) Additional parameters for the evaluation process:
38
- * - `evaluation` (RunEvalConfig): Configuration for the evaluation, including
38
+ * - `evaluationConfig` (RunEvalConfig): Configuration for the evaluation, including
39
39
  * standard and custom evaluators.
40
40
  * - `projectName` (string): Name of the project for logging and tracking.
41
41
  * - `projectMetadata` (Record<string, unknown>): Additional metadata for the project.
@@ -54,10 +54,10 @@ export type EvalResults = {
54
54
  * const datasetName = 'example-dataset';
55
55
  * const client = new Client(/* ...config... *\//);
56
56
  *
57
- * const evaluationConfig = new RunEvalConfig({
57
+ * const evaluationConfig = {
58
58
  * evaluators: [/* ...evaluators... *\//],
59
59
  * customEvaluators: [/* ...custom evaluators... *\//],
60
- * });
60
+ * };
61
61
  *
62
62
  * const results = await runOnDataset(chain, datasetName, {
63
63
  * evaluationConfig,