@arizeai/phoenix-client 2.3.2 → 2.3.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. package/dist/esm/experiments/instrumention.d.ts.map +1 -1
  2. package/dist/esm/experiments/instrumention.js +11 -7
  3. package/dist/esm/experiments/instrumention.js.map +1 -1
  4. package/dist/esm/experiments/runExperiment.d.ts +14 -2
  5. package/dist/esm/experiments/runExperiment.d.ts.map +1 -1
  6. package/dist/esm/experiments/runExperiment.js +11 -3
  7. package/dist/esm/experiments/runExperiment.js.map +1 -1
  8. package/dist/esm/tsconfig.esm.tsbuildinfo +1 -1
  9. package/dist/src/datasets/appendDatasetExamples.js +34 -45
  10. package/dist/src/datasets/appendDatasetExamples.js.map +1 -1
  11. package/dist/src/datasets/createDataset.js +25 -36
  12. package/dist/src/datasets/createDataset.js.map +1 -1
  13. package/dist/src/datasets/getDataset.js +7 -18
  14. package/dist/src/datasets/getDataset.js.map +1 -1
  15. package/dist/src/datasets/getDatasetExamples.js +25 -36
  16. package/dist/src/datasets/getDatasetExamples.js.map +1 -1
  17. package/dist/src/datasets/getDatasetInfo.js +22 -33
  18. package/dist/src/datasets/getDatasetInfo.js.map +1 -1
  19. package/dist/src/datasets/getDatasetInfoByName.js +21 -32
  20. package/dist/src/datasets/getDatasetInfoByName.js.map +1 -1
  21. package/dist/src/datasets/listDatasets.js +6 -17
  22. package/dist/src/datasets/listDatasets.js.map +1 -1
  23. package/dist/src/experiments/getExperiment.js +13 -24
  24. package/dist/src/experiments/getExperiment.js.map +1 -1
  25. package/dist/src/experiments/getExperimentInfo.js +15 -26
  26. package/dist/src/experiments/getExperimentInfo.js.map +1 -1
  27. package/dist/src/experiments/getExperimentRuns.js +24 -35
  28. package/dist/src/experiments/getExperimentRuns.js.map +1 -1
  29. package/dist/src/experiments/instrumention.d.ts.map +1 -1
  30. package/dist/src/experiments/instrumention.js +11 -7
  31. package/dist/src/experiments/instrumention.js.map +1 -1
  32. package/dist/src/experiments/runExperiment.d.ts +14 -2
  33. package/dist/src/experiments/runExperiment.d.ts.map +1 -1
  34. package/dist/src/experiments/runExperiment.js +286 -293
  35. package/dist/src/experiments/runExperiment.js.map +1 -1
  36. package/dist/src/prompts/createPrompt.js +14 -25
  37. package/dist/src/prompts/createPrompt.js.map +1 -1
  38. package/dist/src/prompts/getPrompt.js +4 -15
  39. package/dist/src/prompts/getPrompt.js.map +1 -1
  40. package/dist/src/spans/addSpanAnnotation.js +14 -25
  41. package/dist/src/spans/addSpanAnnotation.js.map +1 -1
  42. package/dist/src/spans/getSpanAnnotations.js +29 -40
  43. package/dist/src/spans/getSpanAnnotations.js.map +1 -1
  44. package/dist/src/spans/getSpans.js +29 -40
  45. package/dist/src/spans/getSpans.js.map +1 -1
  46. package/dist/src/spans/logSpanAnnotations.js +14 -25
  47. package/dist/src/spans/logSpanAnnotations.js.map +1 -1
  48. package/dist/src/utils/getPromptBySelector.js +37 -48
  49. package/dist/src/utils/getPromptBySelector.js.map +1 -1
  50. package/dist/tsconfig.tsbuildinfo +1 -1
  51. package/package.json +3 -2
  52. package/src/experiments/instrumention.ts +7 -5
  53. package/src/experiments/runExperiment.ts +23 -1
@@ -1,13 +1,4 @@
1
1
  "use strict";
2
- var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
3
- function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
4
- return new (P || (P = Promise))(function (resolve, reject) {
5
- function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
6
- function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
7
- function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
8
- step((generator = generator.apply(thisArg, _arguments || [])).next());
9
- });
10
- };
11
2
  var __importDefault = (this && this.__importDefault) || function (mod) {
12
3
  return (mod && mod.__esModule) ? mod : { "default": mod };
13
4
  };
@@ -59,138 +50,141 @@ const urlUtils_1 = require("../utils/urlUtils");
59
50
  * });
60
51
  * ```
61
52
  */
62
- function runExperiment(_a) {
63
- return __awaiter(this, arguments, void 0, function* ({ experimentName, experimentDescription, experimentMetadata = {}, client: _client, dataset: DatasetSelector, task, evaluators, logger = console, record = true, concurrency = 5, dryRun = false, }) {
64
- var _b, _c, _d, _e, _f;
65
- let provider;
66
- const isDryRun = typeof dryRun === "number" || dryRun === true;
67
- const client = _client !== null && _client !== void 0 ? _client : (0, client_1.createClient)();
68
- const dataset = yield (0, getDataset_1.getDataset)({ dataset: DatasetSelector, client });
69
- (0, tiny_invariant_1.default)(dataset, `Dataset not found`);
70
- (0, tiny_invariant_1.default)(dataset.examples.length > 0, `Dataset has no examples`);
71
- const nExamples = typeof dryRun === "number"
72
- ? Math.min(dryRun, dataset.examples.length)
73
- : dataset.examples.length;
74
- let projectName = `${dataset.name}-exp-${new Date().toISOString()}`;
75
- // initialize the tracer into scope
76
- let taskTracer;
77
- let experiment;
78
- if (isDryRun) {
79
- experiment = {
80
- id: localId(),
81
- datasetId: dataset.id,
82
- datasetVersionId: dataset.versionId,
83
- projectName,
84
- metadata: experimentMetadata,
85
- };
86
- taskTracer = (0, instrumention_1.createNoOpProvider)().getTracer("no-op");
87
- }
88
- else {
89
- const experimentResponse = yield client
90
- .POST("/v1/datasets/{dataset_id}/experiments", {
91
- params: {
92
- path: {
93
- dataset_id: dataset.id,
94
- },
95
- },
96
- body: {
97
- name: experimentName,
98
- description: experimentDescription,
99
- metadata: experimentMetadata,
100
- project_name: projectName,
53
+ async function runExperiment({ experimentName, experimentDescription, experimentMetadata = {}, client: _client, dataset: DatasetSelector, task, evaluators, logger = console, record = true, concurrency = 5, dryRun = false, setGlobalTracerProvider = true, }) {
54
+ var _a, _b, _c, _d, _e;
55
+ let provider;
56
+ const isDryRun = typeof dryRun === "number" || dryRun === true;
57
+ const client = _client !== null && _client !== void 0 ? _client : (0, client_1.createClient)();
58
+ const dataset = await (0, getDataset_1.getDataset)({ dataset: DatasetSelector, client });
59
+ (0, tiny_invariant_1.default)(dataset, `Dataset not found`);
60
+ (0, tiny_invariant_1.default)(dataset.examples.length > 0, `Dataset has no examples`);
61
+ const nExamples = typeof dryRun === "number"
62
+ ? Math.min(dryRun, dataset.examples.length)
63
+ : dataset.examples.length;
64
+ let projectName = `${dataset.name}-exp-${new Date().toISOString()}`;
65
+ // initialize the tracer into scope
66
+ let taskTracer;
67
+ let experiment;
68
+ if (isDryRun) {
69
+ experiment = {
70
+ id: localId(),
71
+ datasetId: dataset.id,
72
+ datasetVersionId: dataset.versionId,
73
+ projectName,
74
+ metadata: experimentMetadata,
75
+ };
76
+ taskTracer = (0, instrumention_1.createNoOpProvider)().getTracer("no-op");
77
+ }
78
+ else {
79
+ const experimentResponse = await client
80
+ .POST("/v1/datasets/{dataset_id}/experiments", {
81
+ params: {
82
+ path: {
83
+ dataset_id: dataset.id,
101
84
  },
102
- })
103
- .then((res) => { var _a; return (_a = res.data) === null || _a === void 0 ? void 0 : _a.data; });
104
- (0, tiny_invariant_1.default)(experimentResponse, `Failed to create experiment`);
105
- projectName = (_b = experimentResponse.project_name) !== null && _b !== void 0 ? _b : projectName;
106
- experiment = {
107
- id: experimentResponse.id,
108
- datasetId: experimentResponse.dataset_id,
109
- datasetVersionId: experimentResponse.dataset_version_id,
110
- projectName,
111
- metadata: experimentResponse.metadata,
112
- };
113
- // Initialize the tracer, now that we have a project name
114
- const baseUrl = client.config.baseUrl;
115
- (0, tiny_invariant_1.default)(baseUrl, "Phoenix base URL not found. Please set PHOENIX_HOST or set baseUrl on the client.");
116
- provider = (0, instrumention_1.createProvider)({
117
- projectName,
118
- baseUrl,
119
- headers: (_c = client.config.headers) !== null && _c !== void 0 ? _c : {},
120
- });
121
- taskTracer = provider.getTracer(projectName);
122
- }
123
- if (!record) {
124
- logger.info(`🔧 Running experiment in readonly mode. Results will not be recorded.`);
125
- }
126
- if (!isDryRun && client.config.baseUrl) {
127
- const datasetUrl = (0, urlUtils_1.getDatasetUrl)({
128
- baseUrl: client.config.baseUrl,
129
- datasetId: dataset.id,
130
- });
131
- const datasetExperimentsUrl = (0, urlUtils_1.getDatasetExperimentsUrl)({
132
- baseUrl: client.config.baseUrl,
133
- datasetId: dataset.id,
134
- });
135
- const experimentUrl = (0, urlUtils_1.getExperimentUrl)({
136
- baseUrl: client.config.baseUrl,
137
- datasetId: dataset.id,
138
- experimentId: experiment.id,
139
- });
140
- logger.info(`📊 View dataset: ${datasetUrl}`);
141
- logger.info(`📺 View dataset experiments: ${datasetExperimentsUrl}`);
142
- logger.info(`🔗 View this experiment: ${experimentUrl}`);
143
- }
144
- logger.info(`🧪 Starting experiment "${experimentName || `<unnamed>`}" on dataset "${dataset.id}" with task "${task.name}" and ${(_d = evaluators === null || evaluators === void 0 ? void 0 : evaluators.length) !== null && _d !== void 0 ? _d : 0} ${(0, pluralize_1.pluralize)("evaluator", (_e = evaluators === null || evaluators === void 0 ? void 0 : evaluators.length) !== null && _e !== void 0 ? _e : 0)} and ${concurrency} concurrent runs`);
145
- const runs = {};
146
- yield runTaskWithExamples({
147
- client,
148
- experimentId: experiment.id,
149
- task,
150
- dataset,
151
- logger,
152
- onComplete: (run) => {
153
- runs[run.id] = run;
154
85
  },
155
- concurrency,
156
- isDryRun,
157
- nExamples,
158
- tracer: taskTracer,
86
+ body: {
87
+ name: experimentName,
88
+ description: experimentDescription,
89
+ metadata: experimentMetadata,
90
+ project_name: projectName,
91
+ },
92
+ })
93
+ .then((res) => { var _a; return (_a = res.data) === null || _a === void 0 ? void 0 : _a.data; });
94
+ (0, tiny_invariant_1.default)(experimentResponse, `Failed to create experiment`);
95
+ projectName = (_a = experimentResponse.project_name) !== null && _a !== void 0 ? _a : projectName;
96
+ experiment = {
97
+ id: experimentResponse.id,
98
+ datasetId: experimentResponse.dataset_id,
99
+ datasetVersionId: experimentResponse.dataset_version_id,
100
+ projectName,
101
+ metadata: experimentResponse.metadata,
102
+ };
103
+ // Initialize the tracer, now that we have a project name
104
+ const baseUrl = client.config.baseUrl;
105
+ (0, tiny_invariant_1.default)(baseUrl, "Phoenix base URL not found. Please set PHOENIX_HOST or set baseUrl on the client.");
106
+ provider = (0, instrumention_1.createProvider)({
107
+ projectName,
108
+ baseUrl,
109
+ headers: (_b = client.config.headers) !== null && _b !== void 0 ? _b : {},
159
110
  });
160
- logger.info(`✅ Task runs completed`);
161
- const ranExperiment = Object.assign(Object.assign({}, experiment), { runs });
162
- // Shut down the provider so that the experiments run
163
- if (provider) {
164
- yield ((_f = provider.shutdown) === null || _f === void 0 ? void 0 : _f.call(provider));
111
+ // Register the provider
112
+ if (setGlobalTracerProvider) {
113
+ provider.register();
165
114
  }
166
- const { evaluationRuns } = yield evaluateExperiment({
167
- experiment: ranExperiment,
168
- evaluators: evaluators !== null && evaluators !== void 0 ? evaluators : [],
169
- client,
170
- logger,
171
- concurrency,
172
- dryRun,
115
+ taskTracer = provider.getTracer(projectName);
116
+ }
117
+ if (!record) {
118
+ logger.info(`🔧 Running experiment in readonly mode. Results will not be recorded.`);
119
+ }
120
+ if (!isDryRun && client.config.baseUrl) {
121
+ const datasetUrl = (0, urlUtils_1.getDatasetUrl)({
122
+ baseUrl: client.config.baseUrl,
123
+ datasetId: dataset.id,
173
124
  });
174
- ranExperiment.evaluationRuns = evaluationRuns;
175
- logger.info(`✅ Experiment ${experiment.id} completed`);
176
- if (!isDryRun && client.config.baseUrl) {
177
- const experimentUrl = (0, urlUtils_1.getExperimentUrl)({
178
- baseUrl: client.config.baseUrl,
179
- datasetId: dataset.id,
180
- experimentId: experiment.id,
181
- });
182
- logger.info(`🔍 View results: ${experimentUrl}`);
183
- }
184
- return ranExperiment;
125
+ const datasetExperimentsUrl = (0, urlUtils_1.getDatasetExperimentsUrl)({
126
+ baseUrl: client.config.baseUrl,
127
+ datasetId: dataset.id,
128
+ });
129
+ const experimentUrl = (0, urlUtils_1.getExperimentUrl)({
130
+ baseUrl: client.config.baseUrl,
131
+ datasetId: dataset.id,
132
+ experimentId: experiment.id,
133
+ });
134
+ logger.info(`📊 View dataset: ${datasetUrl}`);
135
+ logger.info(`📺 View dataset experiments: ${datasetExperimentsUrl}`);
136
+ logger.info(`🔗 View this experiment: ${experimentUrl}`);
137
+ }
138
+ logger.info(`🧪 Starting experiment "${experimentName || `<unnamed>`}" on dataset "${dataset.id}" with task "${task.name}" and ${(_c = evaluators === null || evaluators === void 0 ? void 0 : evaluators.length) !== null && _c !== void 0 ? _c : 0} ${(0, pluralize_1.pluralize)("evaluator", (_d = evaluators === null || evaluators === void 0 ? void 0 : evaluators.length) !== null && _d !== void 0 ? _d : 0)} and ${concurrency} concurrent runs`);
139
+ const runs = {};
140
+ await runTaskWithExamples({
141
+ client,
142
+ experimentId: experiment.id,
143
+ task,
144
+ dataset,
145
+ logger,
146
+ onComplete: (run) => {
147
+ runs[run.id] = run;
148
+ },
149
+ concurrency,
150
+ isDryRun,
151
+ nExamples,
152
+ tracer: taskTracer,
185
153
  });
154
+ logger.info(`✅ Task runs completed`);
155
+ const ranExperiment = Object.assign(Object.assign({}, experiment), { runs });
156
+ // Shut down the provider so that the experiments run
157
+ if (provider) {
158
+ await ((_e = provider.shutdown) === null || _e === void 0 ? void 0 : _e.call(provider));
159
+ }
160
+ const { evaluationRuns } = await evaluateExperiment({
161
+ experiment: ranExperiment,
162
+ evaluators: evaluators !== null && evaluators !== void 0 ? evaluators : [],
163
+ client,
164
+ logger,
165
+ concurrency,
166
+ dryRun,
167
+ setGlobalTracerProvider,
168
+ });
169
+ ranExperiment.evaluationRuns = evaluationRuns;
170
+ logger.info(`✅ Experiment ${experiment.id} completed`);
171
+ if (!isDryRun && client.config.baseUrl) {
172
+ const experimentUrl = (0, urlUtils_1.getExperimentUrl)({
173
+ baseUrl: client.config.baseUrl,
174
+ datasetId: dataset.id,
175
+ experimentId: experiment.id,
176
+ });
177
+ logger.info(`🔍 View results: ${experimentUrl}`);
178
+ }
179
+ return ranExperiment;
186
180
  }
187
181
  /**
188
182
  * Run a task against n examples in a dataset.
189
183
  */
190
184
  function runTaskWithExamples({ client, experimentId, task, dataset, onComplete, logger, concurrency = 5, isDryRun, nExamples, tracer, }) {
191
185
  logger.info(`🔧 Running task "${task.name}" on dataset "${dataset.id}"`);
192
- const run = (example) => __awaiter(this, void 0, void 0, function* () {
193
- return tracer.startActiveSpan(`Task: ${task.name}`, (span) => __awaiter(this, void 0, void 0, function* () {
186
+ const run = async (example) => {
187
+ return tracer.startActiveSpan(`Task: ${task.name}`, async (span) => {
194
188
  var _a, _b;
195
189
  logger.info(`🔧 Running task "${task.name}" on example "${example.id} of dataset "${dataset.id}"`);
196
190
  const traceId = span.spanContext().traceId;
@@ -205,7 +199,7 @@ function runTaskWithExamples({ client, experimentId, task, dataset, onComplete,
205
199
  error: null,
206
200
  };
207
201
  try {
208
- const taskOutput = yield (0, promisifyResult_1.promisifyResult)(task(example));
202
+ const taskOutput = await (0, promisifyResult_1.promisifyResult)(task(example));
209
203
  thisRun.output = taskOutput;
210
204
  }
211
205
  catch (error) {
@@ -216,7 +210,7 @@ function runTaskWithExamples({ client, experimentId, task, dataset, onComplete,
216
210
  thisRun.endTime = new Date();
217
211
  if (!isDryRun) {
218
212
  // Log the run to the server
219
- const res = yield client.POST("/v1/experiments/{experiment_id}/runs", {
213
+ const res = await client.POST("/v1/experiments/{experiment_id}/runs", {
220
214
  params: {
221
215
  path: {
222
216
  experiment_id: experimentId,
@@ -248,8 +242,8 @@ function runTaskWithExamples({ client, experimentId, task, dataset, onComplete,
248
242
  span === null || span === void 0 ? void 0 : span.end();
249
243
  onComplete(thisRun);
250
244
  return thisRun;
251
- }));
252
- });
245
+ });
246
+ };
253
247
  const q = (0, async_1.queue)(run, concurrency);
254
248
  const examplesToUse = dataset.examples.slice(0, nExamples);
255
249
  examplesToUse.forEach((example) => q.push(example, (err) => {
@@ -264,179 +258,178 @@ function runTaskWithExamples({ client, experimentId, task, dataset, onComplete,
264
258
  *
265
259
  * @experimental This feature is not complete, and will change in the future.
266
260
  */
267
- function evaluateExperiment(_a) {
268
- return __awaiter(this, arguments, void 0, function* ({ experiment, evaluators, client: _client, logger = console, concurrency = 5, dryRun = false, }) {
269
- var _b, _c, _d, _e;
270
- const isDryRun = typeof dryRun === "number" || dryRun === true;
271
- const client = _client !== null && _client !== void 0 ? _client : (0, client_1.createClient)();
272
- const baseUrl = client.config.baseUrl;
273
- (0, tiny_invariant_1.default)(baseUrl, "Phoenix base URL not found. Please set PHOENIX_HOST or set baseUrl on the client.");
274
- let provider;
275
- if (!isDryRun) {
276
- provider = (0, instrumention_1.createProvider)({
277
- projectName: "evaluators",
278
- baseUrl,
279
- headers: (_b = client.config.headers) !== null && _b !== void 0 ? _b : {},
280
- });
281
- }
282
- else {
283
- provider = (0, instrumention_1.createNoOpProvider)();
284
- }
285
- const tracer = isDryRun
286
- ? provider.getTracer("no-op")
287
- : provider.getTracer("evaluators");
288
- const nRuns = typeof dryRun === "number"
289
- ? Math.max(dryRun, Object.keys(experiment.runs).length)
290
- : Object.keys(experiment.runs).length;
291
- const dataset = yield (0, getDataset_1.getDataset)({
292
- dataset: { datasetId: experiment.datasetId },
293
- client,
261
+ async function evaluateExperiment({ experiment, evaluators, client: _client, logger = console, concurrency = 5, dryRun = false, setGlobalTracerProvider = true, }) {
262
+ var _a, _b, _c, _d;
263
+ const isDryRun = typeof dryRun === "number" || dryRun === true;
264
+ const client = _client !== null && _client !== void 0 ? _client : (0, client_1.createClient)();
265
+ const baseUrl = client.config.baseUrl;
266
+ (0, tiny_invariant_1.default)(baseUrl, "Phoenix base URL not found. Please set PHOENIX_HOST or set baseUrl on the client.");
267
+ let provider;
268
+ if (!isDryRun) {
269
+ provider = (0, instrumention_1.createProvider)({
270
+ projectName: "evaluators",
271
+ baseUrl,
272
+ headers: (_a = client.config.headers) !== null && _a !== void 0 ? _a : {},
294
273
  });
295
- (0, tiny_invariant_1.default)(dataset, `Dataset "${experiment.datasetId}" not found`);
296
- (0, tiny_invariant_1.default)(dataset.examples.length > 0, `Dataset "${experiment.datasetId}" has no examples`);
297
- (0, tiny_invariant_1.default)(experiment.runs, `Experiment "${experiment.id}" has no runs`);
298
- const runsToEvaluate = Object.values(experiment.runs).slice(0, nRuns);
299
- if ((evaluators === null || evaluators === void 0 ? void 0 : evaluators.length) === 0) {
300
- return Object.assign(Object.assign({}, experiment), { evaluationRuns: [] });
274
+ if (setGlobalTracerProvider) {
275
+ provider.register();
301
276
  }
302
- logger.info(`🧠 Evaluating experiment "${experiment.id}" with ${(_c = evaluators === null || evaluators === void 0 ? void 0 : evaluators.length) !== null && _c !== void 0 ? _c : 0} ${(0, pluralize_1.pluralize)("evaluator", (_d = evaluators === null || evaluators === void 0 ? void 0 : evaluators.length) !== null && _d !== void 0 ? _d : 0)}`);
303
- if (!isDryRun && client.config.baseUrl) {
304
- const experimentUrl = (0, urlUtils_1.getExperimentUrl)({
305
- baseUrl: client.config.baseUrl,
306
- datasetId: experiment.datasetId,
307
- experimentId: experiment.id,
277
+ }
278
+ else {
279
+ provider = (0, instrumention_1.createNoOpProvider)();
280
+ }
281
+ const tracer = isDryRun
282
+ ? provider.getTracer("no-op")
283
+ : provider.getTracer("evaluators");
284
+ const nRuns = typeof dryRun === "number"
285
+ ? Math.min(dryRun, Object.keys(experiment.runs).length)
286
+ : Object.keys(experiment.runs).length;
287
+ const dataset = await (0, getDataset_1.getDataset)({
288
+ dataset: { datasetId: experiment.datasetId },
289
+ client,
290
+ });
291
+ (0, tiny_invariant_1.default)(dataset, `Dataset "${experiment.datasetId}" not found`);
292
+ (0, tiny_invariant_1.default)(dataset.examples.length > 0, `Dataset "${experiment.datasetId}" has no examples`);
293
+ (0, tiny_invariant_1.default)(experiment.runs, `Experiment "${experiment.id}" has no runs`);
294
+ const runsToEvaluate = Object.values(experiment.runs).slice(0, nRuns);
295
+ if ((evaluators === null || evaluators === void 0 ? void 0 : evaluators.length) === 0) {
296
+ return Object.assign(Object.assign({}, experiment), { evaluationRuns: [] });
297
+ }
298
+ logger.info(`🧠 Evaluating experiment "${experiment.id}" with ${(_b = evaluators === null || evaluators === void 0 ? void 0 : evaluators.length) !== null && _b !== void 0 ? _b : 0} ${(0, pluralize_1.pluralize)("evaluator", (_c = evaluators === null || evaluators === void 0 ? void 0 : evaluators.length) !== null && _c !== void 0 ? _c : 0)}`);
299
+ if (!isDryRun && client.config.baseUrl) {
300
+ const experimentUrl = (0, urlUtils_1.getExperimentUrl)({
301
+ baseUrl: client.config.baseUrl,
302
+ datasetId: experiment.datasetId,
303
+ experimentId: experiment.id,
304
+ });
305
+ logger.info(`🔗 View experiment evaluation: ${experimentUrl}`);
306
+ }
307
+ const evaluationRuns = {};
308
+ const examplesById = {};
309
+ for (const example of dataset.examples) {
310
+ examplesById[example.id] = example;
311
+ }
312
+ const onEvaluationComplete = (run) => {
313
+ evaluationRuns[run.id] = run;
314
+ };
315
+ // Run evaluators against all runs
316
+ // Flat list of evaluator + run tuples
317
+ const evaluatorsAndRuns = evaluators.flatMap((evaluator) => runsToEvaluate.map((run) => ({
318
+ evaluator,
319
+ run,
320
+ })));
321
+ const evaluatorsQueue = (0, async_1.queue)(async (evaluatorAndRun) => {
322
+ return tracer.startActiveSpan(`Evaluation: ${evaluatorAndRun.evaluator.name}`, async (span) => {
323
+ var _a, _b, _c;
324
+ const evalResult = await runEvaluator({
325
+ evaluator: evaluatorAndRun.evaluator,
326
+ run: evaluatorAndRun.run,
327
+ exampleCache: examplesById,
328
+ onComplete: onEvaluationComplete,
329
+ logger,
308
330
  });
309
- logger.info(`🔗 View experiment evaluation: ${experimentUrl}`);
310
- }
311
- const evaluationRuns = {};
312
- const examplesById = {};
313
- for (const example of dataset.examples) {
314
- examplesById[example.id] = example;
315
- }
316
- const onEvaluationComplete = (run) => {
317
- evaluationRuns[run.id] = run;
318
- };
319
- // Run evaluators against all runs
320
- // Flat list of evaluator + run tuples
321
- const evaluatorsAndRuns = evaluators.flatMap((evaluator) => runsToEvaluate.map((run) => ({
322
- evaluator,
323
- run,
324
- })));
325
- const evaluatorsQueue = (0, async_1.queue)((evaluatorAndRun) => __awaiter(this, void 0, void 0, function* () {
326
- return tracer.startActiveSpan(`Evaluation: ${evaluatorAndRun.evaluator.name}`, (span) => __awaiter(this, void 0, void 0, function* () {
327
- var _a, _b, _c;
328
- const evalResult = yield runEvaluator({
329
- evaluator: evaluatorAndRun.evaluator,
330
- run: evaluatorAndRun.run,
331
- exampleCache: examplesById,
332
- onComplete: onEvaluationComplete,
333
- logger,
331
+ span.setAttributes({
332
+ [openinference_semantic_conventions_1.SemanticConventions.OPENINFERENCE_SPAN_KIND]: openinference_semantic_conventions_1.OpenInferenceSpanKind.EVALUATOR,
333
+ [openinference_semantic_conventions_1.SemanticConventions.INPUT_MIME_TYPE]: openinference_semantic_conventions_1.MimeType.JSON,
334
+ [openinference_semantic_conventions_1.SemanticConventions.INPUT_VALUE]: JSON.stringify({
335
+ input: (_a = examplesById[evaluatorAndRun.run.datasetExampleId]) === null || _a === void 0 ? void 0 : _a.input,
336
+ output: evaluatorAndRun.run.output,
337
+ expected: (_b = examplesById[evaluatorAndRun.run.datasetExampleId]) === null || _b === void 0 ? void 0 : _b.output,
338
+ metadata: (_c = examplesById[evaluatorAndRun.run.datasetExampleId]) === null || _c === void 0 ? void 0 : _c.metadata,
339
+ }),
340
+ [openinference_semantic_conventions_1.SemanticConventions.OUTPUT_MIME_TYPE]: openinference_semantic_conventions_1.MimeType.JSON,
341
+ [openinference_semantic_conventions_1.SemanticConventions.OUTPUT_VALUE]: (0, ensureString_1.ensureString)(evalResult.result),
342
+ });
343
+ if (evalResult.error) {
344
+ span.setStatus({
345
+ code: api_1.SpanStatusCode.ERROR,
346
+ message: evalResult.error,
334
347
  });
335
- span.setAttributes({
336
- [openinference_semantic_conventions_1.SemanticConventions.OPENINFERENCE_SPAN_KIND]: openinference_semantic_conventions_1.OpenInferenceSpanKind.EVALUATOR,
337
- [openinference_semantic_conventions_1.SemanticConventions.INPUT_MIME_TYPE]: openinference_semantic_conventions_1.MimeType.JSON,
338
- [openinference_semantic_conventions_1.SemanticConventions.INPUT_VALUE]: JSON.stringify({
339
- input: (_a = examplesById[evaluatorAndRun.run.datasetExampleId]) === null || _a === void 0 ? void 0 : _a.input,
340
- output: evaluatorAndRun.run.output,
341
- expected: (_b = examplesById[evaluatorAndRun.run.datasetExampleId]) === null || _b === void 0 ? void 0 : _b.output,
342
- metadata: (_c = examplesById[evaluatorAndRun.run.datasetExampleId]) === null || _c === void 0 ? void 0 : _c.metadata,
343
- }),
344
- [openinference_semantic_conventions_1.SemanticConventions.OUTPUT_MIME_TYPE]: openinference_semantic_conventions_1.MimeType.JSON,
345
- [openinference_semantic_conventions_1.SemanticConventions.OUTPUT_VALUE]: (0, ensureString_1.ensureString)(evalResult.result),
348
+ }
349
+ else {
350
+ span.setStatus({ code: api_1.SpanStatusCode.OK });
351
+ }
352
+ if (evalResult.result) {
353
+ span.setAttributes((0, objectAsAttributes_1.objectAsAttributes)(evalResult.result));
354
+ }
355
+ evalResult.traceId = span.spanContext().traceId;
356
+ if (!isDryRun) {
357
+ // Log the evaluation to the server
358
+ // We log this without awaiting (e.g. best effort)
359
+ client.POST("/v1/experiment_evaluations", {
360
+ body: {
361
+ experiment_run_id: evaluatorAndRun.run.id,
362
+ name: evaluatorAndRun.evaluator.name,
363
+ annotator_kind: evaluatorAndRun.evaluator.kind,
364
+ start_time: evalResult.startTime.toISOString(),
365
+ end_time: evalResult.endTime.toISOString(),
366
+ result: Object.assign({}, evalResult.result),
367
+ error: evalResult.error,
368
+ trace_id: evalResult.traceId,
369
+ },
346
370
  });
347
- if (evalResult.error) {
348
- span.setStatus({
349
- code: api_1.SpanStatusCode.ERROR,
350
- message: evalResult.error,
351
- });
352
- }
353
- else {
354
- span.setStatus({ code: api_1.SpanStatusCode.OK });
355
- }
356
- if (evalResult.result) {
357
- span.setAttributes((0, objectAsAttributes_1.objectAsAttributes)(evalResult.result));
358
- }
359
- evalResult.traceId = span.spanContext().traceId;
360
- if (!isDryRun) {
361
- // Log the evaluation to the server
362
- // We log this without awaiting (e.g. best effort)
363
- client.POST("/v1/experiment_evaluations", {
364
- body: {
365
- experiment_run_id: evaluatorAndRun.run.id,
366
- name: evaluatorAndRun.evaluator.name,
367
- annotator_kind: evaluatorAndRun.evaluator.kind,
368
- start_time: evalResult.startTime.toISOString(),
369
- end_time: evalResult.endTime.toISOString(),
370
- result: Object.assign({}, evalResult.result),
371
- error: evalResult.error,
372
- trace_id: evalResult.traceId,
373
- },
374
- });
375
- }
376
- span.end();
377
- return evalResult;
378
- }));
379
- }), concurrency);
380
- if (!evaluatorsAndRuns.length) {
381
- logger.info(`⛔ No evaluators to run`);
382
- return Object.assign(Object.assign({}, experiment), { evaluationRuns: [] });
383
- }
384
- evaluatorsAndRuns.forEach((evaluatorAndRun) => evaluatorsQueue.push(evaluatorAndRun, (err) => {
385
- if (err) {
386
- logger.error(`❌ Error running evaluator "${evaluatorAndRun.evaluator.name}" on run "${evaluatorAndRun.run.id}": ${err}`);
387
371
  }
388
- }));
389
- yield evaluatorsQueue.drain();
390
- logger.info(`✅ Evaluation runs completed`);
391
- if (provider) {
392
- yield ((_e = provider.shutdown) === null || _e === void 0 ? void 0 : _e.call(provider));
372
+ span.end();
373
+ return evalResult;
374
+ });
375
+ }, concurrency);
376
+ if (!evaluatorsAndRuns.length) {
377
+ logger.info(`⛔ No evaluators to run`);
378
+ return Object.assign(Object.assign({}, experiment), { evaluationRuns: [] });
379
+ }
380
+ evaluatorsAndRuns.forEach((evaluatorAndRun) => evaluatorsQueue.push(evaluatorAndRun, (err) => {
381
+ if (err) {
382
+ logger.error(`❌ Error running evaluator "${evaluatorAndRun.evaluator.name}" on run "${evaluatorAndRun.run.id}": ${err}`);
393
383
  }
394
- return Object.assign(Object.assign({}, experiment), { evaluationRuns: Object.values(evaluationRuns) });
395
- });
384
+ }));
385
+ await evaluatorsQueue.drain();
386
+ logger.info(`✅ Evaluation runs completed`);
387
+ if (provider) {
388
+ await ((_d = provider.shutdown) === null || _d === void 0 ? void 0 : _d.call(provider));
389
+ }
390
+ return Object.assign(Object.assign({}, experiment), { evaluationRuns: Object.values(evaluationRuns) });
396
391
  }
397
392
  /**
398
393
  * Run an evaluator against a run.
399
394
  *
400
395
  * @experimental This feature is not complete, and will change in the future.
401
396
  */
402
- function runEvaluator(_a) {
403
- return __awaiter(this, arguments, void 0, function* ({ evaluator, run, exampleCache, onComplete, logger, }) {
404
- const example = exampleCache[run.datasetExampleId];
405
- (0, tiny_invariant_1.default)(example, `Example "${run.datasetExampleId}" not found`);
406
- const evaluate = () => __awaiter(this, void 0, void 0, function* () {
407
- var _a;
408
- logger.info(`🧠 Evaluating run "${run.id}" with evaluator "${evaluator.name}"`);
409
- const thisEval = {
410
- id: localId(),
411
- traceId: null,
412
- experimentRunId: run.id,
413
- startTime: new Date(),
414
- endTime: new Date(), // will get replaced with actual end time
415
- name: evaluator.name,
416
- result: null,
417
- error: null,
418
- annotatorKind: evaluator.kind,
419
- };
420
- try {
421
- const result = yield evaluator.evaluate({
422
- input: example.input,
423
- output: (_a = run.output) !== null && _a !== void 0 ? _a : null,
424
- expected: example.output,
425
- metadata: example.metadata,
426
- });
427
- thisEval.result = result;
428
- logger.info(`✅ Evaluator "${evaluator.name}" on run "${run.id}" completed`);
429
- }
430
- catch (error) {
431
- thisEval.error = error instanceof Error ? error.message : "Unknown error";
432
- logger.error(`❌ Evaluator "${evaluator.name}" on run "${run.id}" failed: ${thisEval.error}`);
433
- }
434
- thisEval.endTime = new Date();
435
- onComplete(thisEval);
436
- return thisEval;
437
- });
438
- return evaluate();
439
- });
397
+ async function runEvaluator({ evaluator, run, exampleCache, onComplete, logger, }) {
398
+ const example = exampleCache[run.datasetExampleId];
399
+ (0, tiny_invariant_1.default)(example, `Example "${run.datasetExampleId}" not found`);
400
+ const evaluate = async () => {
401
+ var _a;
402
+ logger.info(`🧠 Evaluating run "${run.id}" with evaluator "${evaluator.name}"`);
403
+ const thisEval = {
404
+ id: localId(),
405
+ traceId: null,
406
+ experimentRunId: run.id,
407
+ startTime: new Date(),
408
+ endTime: new Date(), // will get replaced with actual end time
409
+ name: evaluator.name,
410
+ result: null,
411
+ error: null,
412
+ annotatorKind: evaluator.kind,
413
+ };
414
+ try {
415
+ const result = await evaluator.evaluate({
416
+ input: example.input,
417
+ output: (_a = run.output) !== null && _a !== void 0 ? _a : null,
418
+ expected: example.output,
419
+ metadata: example.metadata,
420
+ });
421
+ thisEval.result = result;
422
+ logger.info(`✅ Evaluator "${evaluator.name}" on run "${run.id}" completed`);
423
+ }
424
+ catch (error) {
425
+ thisEval.error = error instanceof Error ? error.message : "Unknown error";
426
+ logger.error(`❌ Evaluator "${evaluator.name}" on run "${run.id}" failed: ${thisEval.error}`);
427
+ }
428
+ thisEval.endTime = new Date();
429
+ onComplete(thisEval);
430
+ return thisEval;
431
+ };
432
+ return evaluate();
440
433
  }
441
434
  /**
442
435
  * Wrap an evaluator function in an object with a name property.