@arizeai/phoenix-client 5.2.1 → 5.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (112) hide show
  1. package/dist/esm/__generated__/api/v1.d.ts +321 -9
  2. package/dist/esm/__generated__/api/v1.d.ts.map +1 -1
  3. package/dist/esm/experiments/createExperiment.d.ts +39 -0
  4. package/dist/esm/experiments/createExperiment.d.ts.map +1 -0
  5. package/dist/esm/experiments/createExperiment.js +43 -0
  6. package/dist/esm/experiments/createExperiment.js.map +1 -0
  7. package/dist/esm/experiments/deleteExperiment.d.ts +36 -0
  8. package/dist/esm/experiments/deleteExperiment.d.ts.map +1 -0
  9. package/dist/esm/experiments/deleteExperiment.js +49 -0
  10. package/dist/esm/experiments/deleteExperiment.js.map +1 -0
  11. package/dist/esm/experiments/getExperimentInfo.d.ts.map +1 -1
  12. package/dist/esm/experiments/getExperimentInfo.js +9 -2
  13. package/dist/esm/experiments/getExperimentInfo.js.map +1 -1
  14. package/dist/esm/experiments/index.d.ts +5 -0
  15. package/dist/esm/experiments/index.d.ts.map +1 -1
  16. package/dist/esm/experiments/index.js +5 -0
  17. package/dist/esm/experiments/index.js.map +1 -1
  18. package/dist/esm/experiments/listExperiments.d.ts +29 -0
  19. package/dist/esm/experiments/listExperiments.d.ts.map +1 -0
  20. package/dist/esm/experiments/listExperiments.js +59 -0
  21. package/dist/esm/experiments/listExperiments.js.map +1 -0
  22. package/dist/esm/experiments/resumeEvaluation.d.ts +105 -0
  23. package/dist/esm/experiments/resumeEvaluation.d.ts.map +1 -0
  24. package/dist/esm/experiments/resumeEvaluation.js +558 -0
  25. package/dist/esm/experiments/resumeEvaluation.js.map +1 -0
  26. package/dist/esm/experiments/resumeExperiment.d.ts +102 -0
  27. package/dist/esm/experiments/resumeExperiment.d.ts.map +1 -0
  28. package/dist/esm/experiments/resumeExperiment.js +517 -0
  29. package/dist/esm/experiments/resumeExperiment.js.map +1 -0
  30. package/dist/esm/experiments/runExperiment.d.ts.map +1 -1
  31. package/dist/esm/experiments/runExperiment.js +28 -2
  32. package/dist/esm/experiments/runExperiment.js.map +1 -1
  33. package/dist/esm/prompts/createPrompt.d.ts +19 -1
  34. package/dist/esm/prompts/createPrompt.d.ts.map +1 -1
  35. package/dist/esm/prompts/createPrompt.js +14 -1
  36. package/dist/esm/prompts/createPrompt.js.map +1 -1
  37. package/dist/esm/tsconfig.esm.tsbuildinfo +1 -1
  38. package/dist/esm/types/experiments.d.ts +60 -3
  39. package/dist/esm/types/experiments.d.ts.map +1 -1
  40. package/dist/esm/utils/channel.d.ts +229 -0
  41. package/dist/esm/utils/channel.d.ts.map +1 -0
  42. package/dist/esm/utils/channel.js +352 -0
  43. package/dist/esm/utils/channel.js.map +1 -0
  44. package/dist/esm/utils/formatPromptMessages.d.ts.map +1 -1
  45. package/dist/esm/utils/getPromptBySelector.d.ts.map +1 -1
  46. package/dist/esm/utils/isHttpError.d.ts +21 -0
  47. package/dist/esm/utils/isHttpError.d.ts.map +1 -0
  48. package/dist/esm/utils/isHttpError.js +33 -0
  49. package/dist/esm/utils/isHttpError.js.map +1 -0
  50. package/dist/src/__generated__/api/v1.d.ts +321 -9
  51. package/dist/src/__generated__/api/v1.d.ts.map +1 -1
  52. package/dist/src/experiments/createExperiment.d.ts +39 -0
  53. package/dist/src/experiments/createExperiment.d.ts.map +1 -0
  54. package/dist/src/experiments/createExperiment.js +43 -0
  55. package/dist/src/experiments/createExperiment.js.map +1 -0
  56. package/dist/src/experiments/deleteExperiment.d.ts +36 -0
  57. package/dist/src/experiments/deleteExperiment.d.ts.map +1 -0
  58. package/dist/src/experiments/deleteExperiment.js +52 -0
  59. package/dist/src/experiments/deleteExperiment.js.map +1 -0
  60. package/dist/src/experiments/getExperimentInfo.d.ts.map +1 -1
  61. package/dist/src/experiments/getExperimentInfo.js +9 -2
  62. package/dist/src/experiments/getExperimentInfo.js.map +1 -1
  63. package/dist/src/experiments/index.d.ts +5 -0
  64. package/dist/src/experiments/index.d.ts.map +1 -1
  65. package/dist/src/experiments/index.js +5 -0
  66. package/dist/src/experiments/index.js.map +1 -1
  67. package/dist/src/experiments/listExperiments.d.ts +29 -0
  68. package/dist/src/experiments/listExperiments.d.ts.map +1 -0
  69. package/dist/src/experiments/listExperiments.js +66 -0
  70. package/dist/src/experiments/listExperiments.js.map +1 -0
  71. package/dist/src/experiments/resumeEvaluation.d.ts +105 -0
  72. package/dist/src/experiments/resumeEvaluation.d.ts.map +1 -0
  73. package/dist/src/experiments/resumeEvaluation.js +584 -0
  74. package/dist/src/experiments/resumeEvaluation.js.map +1 -0
  75. package/dist/src/experiments/resumeExperiment.d.ts +102 -0
  76. package/dist/src/experiments/resumeExperiment.d.ts.map +1 -0
  77. package/dist/src/experiments/resumeExperiment.js +540 -0
  78. package/dist/src/experiments/resumeExperiment.js.map +1 -0
  79. package/dist/src/experiments/runExperiment.d.ts.map +1 -1
  80. package/dist/src/experiments/runExperiment.js +28 -2
  81. package/dist/src/experiments/runExperiment.js.map +1 -1
  82. package/dist/src/prompts/createPrompt.d.ts +19 -1
  83. package/dist/src/prompts/createPrompt.d.ts.map +1 -1
  84. package/dist/src/prompts/createPrompt.js +14 -1
  85. package/dist/src/prompts/createPrompt.js.map +1 -1
  86. package/dist/src/types/experiments.d.ts +60 -3
  87. package/dist/src/types/experiments.d.ts.map +1 -1
  88. package/dist/src/utils/channel.d.ts +229 -0
  89. package/dist/src/utils/channel.d.ts.map +1 -0
  90. package/dist/src/utils/channel.js +385 -0
  91. package/dist/src/utils/channel.js.map +1 -0
  92. package/dist/src/utils/formatPromptMessages.d.ts.map +1 -1
  93. package/dist/src/utils/getPromptBySelector.d.ts.map +1 -1
  94. package/dist/src/utils/isHttpError.d.ts +21 -0
  95. package/dist/src/utils/isHttpError.d.ts.map +1 -0
  96. package/dist/src/utils/isHttpError.js +37 -0
  97. package/dist/src/utils/isHttpError.js.map +1 -0
  98. package/dist/tsconfig.tsbuildinfo +1 -1
  99. package/package.json +1 -1
  100. package/src/__generated__/api/v1.ts +321 -9
  101. package/src/experiments/createExperiment.ts +90 -0
  102. package/src/experiments/deleteExperiment.ts +67 -0
  103. package/src/experiments/getExperimentInfo.ts +9 -2
  104. package/src/experiments/index.ts +5 -0
  105. package/src/experiments/listExperiments.ts +83 -0
  106. package/src/experiments/resumeEvaluation.ts +799 -0
  107. package/src/experiments/resumeExperiment.ts +742 -0
  108. package/src/experiments/runExperiment.ts +30 -2
  109. package/src/prompts/createPrompt.ts +19 -1
  110. package/src/types/experiments.ts +62 -3
  111. package/src/utils/channel.ts +397 -0
  112. package/src/utils/isHttpError.ts +45 -0
@@ -0,0 +1,742 @@
1
+ import {
2
+ MimeType,
3
+ OpenInferenceSpanKind,
4
+ SemanticConventions,
5
+ } from "@arizeai/openinference-semantic-conventions";
6
+ import {
7
+ type DiagLogLevel,
8
+ NodeTracerProvider,
9
+ objectAsAttributes,
10
+ register,
11
+ SpanStatusCode,
12
+ Tracer,
13
+ } from "@arizeai/phoenix-otel";
14
+
15
+ import { components } from "../__generated__/api/v1";
16
+ import { createClient, type PhoenixClient } from "../client";
17
+ import { ClientFn } from "../types/core";
18
+ import { ExampleWithId } from "../types/datasets";
19
+ import type { Evaluator, ExperimentTask } from "../types/experiments";
20
+ import { type Logger } from "../types/logger";
21
+ import { Channel, ChannelError } from "../utils/channel";
22
+ import { ensureString } from "../utils/ensureString";
23
+ import { isHttpErrorWithStatus } from "../utils/isHttpError";
24
+ import { toObjectHeaders } from "../utils/toObjectHeaders";
25
+ import { getDatasetExperimentsUrl, getExperimentUrl } from "../utils/urlUtils";
26
+
27
+ import { getExperimentInfo } from "./getExperimentInfo.js";
28
+ import { resumeEvaluation } from "./resumeEvaluation";
29
+
30
+ import invariant from "tiny-invariant";
31
+
32
+ /**
33
+ * Error thrown when task is aborted due to a failure in stopOnFirstError mode.
34
+ * This provides semantic context that the abort was intentional, not an infrastructure failure.
35
+ * @internal - Not exported to minimize API surface area
36
+ */
37
+ class TaskAbortedError extends Error {
38
+ constructor(message: string, cause?: Error) {
39
+ super(message);
40
+ this.name = "TaskAbortedError";
41
+ this.cause = cause;
42
+ }
43
+ }
44
+
45
+ /**
46
+ * Error thrown when the producer fails to fetch incomplete runs from the server.
47
+ * This is a critical error that should always be surfaced, even in stopOnFirstError=false mode.
48
+ * @internal - Not exported to minimize API surface area
49
+ */
50
+ class TaskFetchError extends Error {
51
+ constructor(message: string, cause?: Error) {
52
+ super(message);
53
+ this.name = "TaskFetchError";
54
+ this.cause = cause;
55
+ }
56
+ }
57
+
58
+ export type ResumeExperimentParams = ClientFn & {
59
+ /**
60
+ * The ID of the experiment to resume
61
+ */
62
+ readonly experimentId: string;
63
+ /**
64
+ * The task to run on incomplete examples
65
+ */
66
+ readonly task: ExperimentTask;
67
+ /**
68
+ * Optional evaluators to run on completed task runs
69
+ * @default undefined
70
+ */
71
+ readonly evaluators?: readonly Evaluator[];
72
+ /**
73
+ * The logger to use
74
+ * @default console
75
+ */
76
+ readonly logger?: Logger;
77
+ /**
78
+ * The number of concurrent task executions
79
+ * @default 5
80
+ */
81
+ readonly concurrency?: number;
82
+ /**
83
+ * Whether to set the global tracer provider when running the task.
84
+ * @default true
85
+ */
86
+ readonly setGlobalTracerProvider?: boolean;
87
+ /**
88
+ * Whether to use batch span processor for tracing.
89
+ * @default true
90
+ */
91
+ readonly useBatchSpanProcessor?: boolean;
92
+ /**
93
+ * Log level to set for the default DiagConsoleLogger when tracing.
94
+ */
95
+ readonly diagLogLevel?: DiagLogLevel;
96
+ /**
97
+ * Stop processing and exit as soon as any task fails.
98
+ * @default false
99
+ */
100
+ readonly stopOnFirstError?: boolean;
101
+ };
102
+
103
+ const DEFAULT_PAGE_SIZE = 50 as const;
104
+ /**
105
+ * Channel capacity multiplier for producer-consumer buffering.
106
+ * A value of 2 enables pipeline efficiency: workers process batch N while
107
+ * the producer fetches batch N+1, eliminating idle time without excessive
108
+ * memory usage. The channel blocks when full, providing natural backpressure.
109
+ */
110
+ const CHANNEL_CAPACITY_MULTIPLIER = 2 as const;
111
+
112
+ /**
113
+ * Task item for the producer-consumer channel
114
+ */
115
+ type TaskItem = {
116
+ readonly example: ExampleWithId;
117
+ readonly repetitionNumber: number;
118
+ };
119
+
120
+ /**
121
+ * Transforms API incomplete run response to ExampleWithId
122
+ */
123
+ function buildExampleFromApiResponse(
124
+ apiExample: components["schemas"]["DatasetExample"]
125
+ ): ExampleWithId {
126
+ return {
127
+ id: apiExample.id,
128
+ input: apiExample.input,
129
+ output: apiExample.output || null,
130
+ metadata: apiExample.metadata || {},
131
+ updatedAt: new Date(apiExample.updated_at),
132
+ };
133
+ }
134
+
135
+ /**
136
+ * Handles fetch errors with helpful version information for unsupported features
137
+ */
138
+ async function handleFetchError(
139
+ error: unknown,
140
+ client: PhoenixClient,
141
+ featureName: string
142
+ ): Promise<never> {
143
+ // Check if this is a JSON parse error (likely 404 HTML response from old server)
144
+ const isJsonError =
145
+ error instanceof SyntaxError &&
146
+ error.message.toLowerCase().includes("json");
147
+
148
+ if (isJsonError) {
149
+ // Fetch server version to provide helpful context
150
+ let versionInfo = "";
151
+ try {
152
+ const baseUrl = client.config.baseUrl || "";
153
+ const versionRes = await fetch(`${baseUrl}/arize_phoenix_version`);
154
+ if (versionRes.ok) {
155
+ const version = await versionRes.text();
156
+ versionInfo = ` Your current server version is ${version}.`;
157
+ }
158
+ } catch {
159
+ // Ignore errors fetching version
160
+ }
161
+
162
+ throw new Error(
163
+ `The ${featureName} feature is not available on this Phoenix server. ` +
164
+ "Please upgrade your Phoenix server to use this feature." +
165
+ versionInfo
166
+ );
167
+ }
168
+ throw error;
169
+ }
170
+
171
+ /**
172
+ * Sets up OpenTelemetry tracer for experiment tracing
173
+ */
174
+ function setupTracer({
175
+ projectName,
176
+ baseUrl,
177
+ headers,
178
+ useBatchSpanProcessor,
179
+ diagLogLevel,
180
+ setGlobalTracerProvider,
181
+ }: {
182
+ projectName: string | null;
183
+ baseUrl: string;
184
+ headers?: Record<string, string>;
185
+ useBatchSpanProcessor: boolean;
186
+ diagLogLevel?: DiagLogLevel;
187
+ setGlobalTracerProvider: boolean;
188
+ }): { provider: NodeTracerProvider; tracer: Tracer } | null {
189
+ if (!projectName) {
190
+ return null;
191
+ }
192
+
193
+ const provider = register({
194
+ projectName,
195
+ url: baseUrl,
196
+ headers,
197
+ batch: useBatchSpanProcessor,
198
+ diagLogLevel,
199
+ global: setGlobalTracerProvider,
200
+ });
201
+
202
+ const tracer = provider.getTracer(projectName);
203
+ return { provider, tracer };
204
+ }
205
+
206
+ /**
207
+ * Prints experiment summary to logger
208
+ */
209
+ function printExperimentSummary({
210
+ logger,
211
+ experimentId,
212
+ totalProcessed,
213
+ totalCompleted,
214
+ }: {
215
+ logger: Logger;
216
+ experimentId: string;
217
+ totalProcessed: number;
218
+ totalCompleted: number;
219
+ }): void {
220
+ logger.info("\n" + "=".repeat(70));
221
+ logger.info("📊 Experiment Resume Summary");
222
+ logger.info("=".repeat(70));
223
+ logger.info(`Experiment ID: ${experimentId}`);
224
+ logger.info(`Incomplete runs processed: ${totalProcessed}`);
225
+ logger.info(`Successfully completed: ${totalCompleted}`);
226
+ logger.info("=".repeat(70));
227
+ }
228
+
229
+ /**
230
+ * Resume an incomplete experiment by running only the missing or failed runs.
231
+ *
232
+ * This function identifies which (example, repetition) pairs have not been completed
233
+ * (either missing or failed) and re-runs the task only for those pairs. Optionally,
234
+ * evaluators can be run on the completed runs after task execution.
235
+ *
236
+ * The function processes incomplete runs in batches using pagination to minimize memory usage.
237
+ *
238
+ * @throws {Error} Throws different error types based on failure:
239
+ * - "TaskFetchError": Unable to fetch incomplete runs from the server.
240
+ * Always thrown regardless of stopOnFirstError, as it indicates critical infrastructure failure.
241
+ * - "TaskAbortedError": stopOnFirstError=true and a task failed.
242
+ * Original error preserved in `cause` property.
243
+ * - Generic Error: Other task execution errors or unexpected failures.
244
+ *
245
+ * @example
246
+ * ```ts
247
+ * import { resumeExperiment } from "@arizeai/phoenix-client/experiments";
248
+ *
249
+ * // Resume an interrupted experiment
250
+ * try {
251
+ * await resumeExperiment({
252
+ * experimentId: "exp_123",
253
+ * task: myTask,
254
+ * });
255
+ * } catch (error) {
256
+ * // Handle by error name (no instanceof needed)
257
+ * if (error.name === "TaskFetchError") {
258
+ * console.error("Failed to connect to server:", error.cause);
259
+ * } else if (error.name === "TaskAbortedError") {
260
+ * console.error("Task stopped due to error:", error.cause);
261
+ * } else {
262
+ * console.error("Unexpected error:", error);
263
+ * }
264
+ * }
265
+ *
266
+ * // Resume with evaluators
267
+ * await resumeExperiment({
268
+ * experimentId: "exp_123",
269
+ * task: myTask,
270
+ * evaluators: [correctnessEvaluator, relevanceEvaluator],
271
+ * });
272
+ *
273
+ * // Stop on first error (useful for debugging)
274
+ * await resumeExperiment({
275
+ * experimentId: "exp_123",
276
+ * task: myTask,
277
+ * stopOnFirstError: true, // Exit immediately on first task failure
278
+ * });
279
+ * ```
280
+ */
281
+ export async function resumeExperiment({
282
+ client: _client,
283
+ experimentId,
284
+ task,
285
+ evaluators,
286
+ logger = console,
287
+ concurrency = 5,
288
+ setGlobalTracerProvider = true,
289
+ useBatchSpanProcessor = true,
290
+ diagLogLevel,
291
+ stopOnFirstError = false,
292
+ }: ResumeExperimentParams): Promise<void> {
293
+ const client = _client ?? createClient();
294
+ const pageSize = DEFAULT_PAGE_SIZE;
295
+
296
+ // Get experiment info
297
+ logger.info(`🔍 Fetching experiment info...`);
298
+ const experiment = await getExperimentInfo({ client, experimentId });
299
+
300
+ // Check if there are incomplete runs
301
+ const totalExpected = experiment.exampleCount * experiment.repetitions;
302
+ const incompleteCount = totalExpected - experiment.successfulRunCount;
303
+
304
+ if (incompleteCount === 0) {
305
+ logger.info("✅ No incomplete runs found. Experiment is already complete.");
306
+ return;
307
+ }
308
+
309
+ logger.info(
310
+ `🧪 Resuming experiment with ${incompleteCount} incomplete runs...`
311
+ );
312
+
313
+ // Get base URL for tracing and URL generation
314
+ const baseUrl = client.config.baseUrl;
315
+ invariant(
316
+ baseUrl,
317
+ "Phoenix base URL not found. Please set PHOENIX_HOST or set baseUrl on the client."
318
+ );
319
+
320
+ // Initialize tracer (only if experiment has a project_name)
321
+ const tracerSetup = setupTracer({
322
+ projectName: experiment.projectName,
323
+ baseUrl,
324
+ headers: client.config.headers
325
+ ? toObjectHeaders(client.config.headers)
326
+ : undefined,
327
+ useBatchSpanProcessor,
328
+ diagLogLevel,
329
+ setGlobalTracerProvider,
330
+ });
331
+
332
+ const provider = tracerSetup?.provider ?? null;
333
+ const taskTracer = tracerSetup?.tracer ?? null;
334
+
335
+ // Display URLs
336
+ const datasetExperimentsUrl = getDatasetExperimentsUrl({
337
+ baseUrl,
338
+ datasetId: experiment.datasetId,
339
+ });
340
+ const experimentUrl = getExperimentUrl({
341
+ baseUrl,
342
+ datasetId: experiment.datasetId,
343
+ experimentId: experiment.id,
344
+ });
345
+
346
+ logger.info(`📺 View dataset experiments: ${datasetExperimentsUrl}`);
347
+ logger.info(`🔗 View this experiment: ${experimentUrl}`);
348
+
349
+ // Create a CSP-style bounded buffer for task distribution
350
+ const taskChannel = new Channel<TaskItem>(
351
+ pageSize * CHANNEL_CAPACITY_MULTIPLIER
352
+ );
353
+
354
+ // Abort controller for stopOnFirstError coordination
355
+ const abortController = new AbortController();
356
+ const { signal } = abortController;
357
+
358
+ let totalProcessed = 0;
359
+ let totalCompleted = 0;
360
+ let totalFailed = 0;
361
+
362
+ // Producer: Fetch incomplete runs and send to channel
363
+ async function fetchIncompleteRuns(): Promise<void> {
364
+ let cursor: string | null = null;
365
+
366
+ try {
367
+ do {
368
+ // Stop fetching if abort signal received
369
+ if (signal.aborted) {
370
+ logger.info("🛑 Stopping fetch due to error in task");
371
+ break;
372
+ }
373
+
374
+ let res: {
375
+ data?: components["schemas"]["GetIncompleteExperimentRunsResponseBody"];
376
+ };
377
+
378
+ try {
379
+ res = await client.GET(
380
+ "/v1/experiments/{experiment_id}/incomplete-runs",
381
+ {
382
+ params: {
383
+ path: {
384
+ experiment_id: experimentId,
385
+ },
386
+ query: {
387
+ cursor,
388
+ limit: pageSize,
389
+ },
390
+ },
391
+ }
392
+ );
393
+ } catch (error: unknown) {
394
+ // Check for version compatibility issues and throw helpful error
395
+ try {
396
+ await handleFetchError(error, client, "resume_experiment");
397
+ // TypeScript: handleFetchError never returns, but add throw for safety
398
+ throw new Error("handleFetchError should never return");
399
+ } catch (handledError) {
400
+ // Wrap the error (from handleFetchError or original) in semantic error type
401
+ throw new TaskFetchError(
402
+ "Failed to fetch incomplete runs from server",
403
+ handledError instanceof Error ? handledError : undefined
404
+ );
405
+ }
406
+ }
407
+
408
+ cursor = res.data?.next_cursor ?? null;
409
+ const batchIncomplete = res.data?.data;
410
+ invariant(batchIncomplete, "Failed to fetch incomplete runs");
411
+
412
+ if (batchIncomplete.length === 0) {
413
+ break;
414
+ }
415
+
416
+ // Send tasks to channel (blocks if channel is full - natural backpressure!)
417
+ let batchCount = 0;
418
+ for (const incomplete of batchIncomplete) {
419
+ // Stop sending items if abort signal received
420
+ if (signal.aborted) {
421
+ break;
422
+ }
423
+
424
+ const example = buildExampleFromApiResponse(
425
+ incomplete.dataset_example
426
+ );
427
+ for (const repNum of incomplete.repetition_numbers) {
428
+ // Stop sending items if abort signal received
429
+ if (signal.aborted) {
430
+ break;
431
+ }
432
+
433
+ await taskChannel.send({ example, repetitionNumber: repNum });
434
+ batchCount++;
435
+ totalProcessed++;
436
+ }
437
+ }
438
+
439
+ logger.info(
440
+ `Fetched batch of ${batchCount} incomplete runs (channel buffer: ${taskChannel.length})`
441
+ );
442
+ } while (cursor !== null && !signal.aborted);
443
+ } catch (error) {
444
+ // Re-throw with context preservation
445
+ if (error instanceof TaskFetchError) {
446
+ throw error;
447
+ }
448
+ // ChannelError from blocked send() should bubble up naturally
449
+ // (happens when channel closes while producer is blocked)
450
+ if (error instanceof ChannelError) {
451
+ throw error;
452
+ }
453
+ // Wrap any unexpected errors from channel operations
454
+ throw new TaskFetchError(
455
+ "Unexpected error during task fetch",
456
+ error instanceof Error ? error : undefined
457
+ );
458
+ } finally {
459
+ taskChannel.close(); // Signal workers we're done
460
+ }
461
+ }
462
+
463
+ // Worker: Process tasks from channel
464
+ async function processTasksFromChannel(): Promise<void> {
465
+ for await (const item of taskChannel) {
466
+ // Stop processing if abort signal received
467
+ if (signal.aborted) {
468
+ break;
469
+ }
470
+
471
+ try {
472
+ await runSingleTask({
473
+ client,
474
+ experimentId,
475
+ task,
476
+ example: item.example,
477
+ repetitionNumber: item.repetitionNumber,
478
+ tracer: taskTracer,
479
+ });
480
+ totalCompleted++;
481
+ } catch (error) {
482
+ totalFailed++;
483
+ logger.error(
484
+ `Failed to run task for example ${item.example.id}, repetition ${item.repetitionNumber}: ${error}`
485
+ );
486
+
487
+ // If stopOnFirstError is enabled, abort and re-throw
488
+ if (stopOnFirstError) {
489
+ logger.error("🛑 Stopping on first error");
490
+ abortController.abort();
491
+ throw error;
492
+ }
493
+ }
494
+ }
495
+ }
496
+
497
+ // Start concurrent execution
498
+ // Wrap in try-finally to ensure channel is always closed, even if Promise.all throws
499
+ let executionError: Error | null = null;
500
+ try {
501
+ const producerTask = fetchIncompleteRuns();
502
+ const workerTasks = Array.from({ length: concurrency }, () =>
503
+ processTasksFromChannel()
504
+ );
505
+
506
+ // Wait for producer and all workers to finish
507
+ await Promise.all([producerTask, ...workerTasks]);
508
+ } catch (error) {
509
+ // Classify and handle errors based on their nature
510
+ const err = error instanceof Error ? error : new Error(String(error));
511
+
512
+ // Always surface producer/infrastructure errors
513
+ if (error instanceof TaskFetchError) {
514
+ // Producer failed - this is ALWAYS critical regardless of stopOnFirstError
515
+ logger.error(`❌ Critical: Failed to fetch incomplete runs from server`);
516
+ executionError = err;
517
+ } else if (error instanceof ChannelError && signal.aborted) {
518
+ // Channel closed due to intentional abort - wrap in semantic error
519
+ executionError = new TaskAbortedError(
520
+ "Task execution stopped due to error in concurrent worker",
521
+ err
522
+ );
523
+ } else if (stopOnFirstError) {
524
+ // Worker error in stopOnFirstError mode - already logged by worker
525
+ executionError = err;
526
+ } else {
527
+ // Unexpected error (not from worker, not from producer fetch)
528
+ // This could be a bug in our code or infrastructure failure
529
+ logger.error(`❌ Unexpected error during task execution: ${err.message}`);
530
+ executionError = err;
531
+ }
532
+ } finally {
533
+ // Ensure channel is closed even if there are unexpected errors
534
+ // This is a safety net in case producer's finally block didn't execute
535
+ if (!taskChannel.isClosed) {
536
+ taskChannel.close();
537
+ }
538
+ }
539
+
540
+ // Only show completion message if we didn't stop on error
541
+ if (!executionError) {
542
+ logger.info(`✅ Task runs completed.`);
543
+ }
544
+
545
+ if (totalFailed > 0 && !executionError) {
546
+ logger.info(
547
+ `⚠️ Warning: ${totalFailed} out of ${totalProcessed} runs failed.`
548
+ );
549
+ }
550
+
551
+ // Run evaluators if provided (only on runs missing evaluations)
552
+ // Skip evaluators if we stopped on error
553
+ if (evaluators && evaluators.length > 0 && !executionError) {
554
+ logger.info(`\n🔬 Running evaluators...`);
555
+ await resumeEvaluation({
556
+ experimentId,
557
+ evaluators: [...evaluators],
558
+ client,
559
+ logger,
560
+ concurrency,
561
+ setGlobalTracerProvider,
562
+ useBatchSpanProcessor,
563
+ diagLogLevel,
564
+ stopOnFirstError,
565
+ });
566
+ }
567
+
568
+ // Print summary
569
+ printExperimentSummary({
570
+ logger,
571
+ experimentId: experiment.id,
572
+ totalProcessed,
573
+ totalCompleted,
574
+ });
575
+
576
+ // Flush spans (if tracer was initialized)
577
+ if (provider) {
578
+ await provider.forceFlush();
579
+ }
580
+
581
+ // Re-throw error if stopOnFirstError was triggered
582
+ if (executionError) {
583
+ throw executionError;
584
+ }
585
+ }
586
+
587
+ /**
588
+ * Record task result to API (without executing the task).
589
+ */
590
+ async function recordTaskResult({
591
+ client,
592
+ experimentId,
593
+ example,
594
+ repetitionNumber,
595
+ output,
596
+ error,
597
+ startTime,
598
+ endTime,
599
+ traceId = null,
600
+ }: {
601
+ readonly client: PhoenixClient;
602
+ readonly experimentId: string;
603
+ readonly example: ExampleWithId;
604
+ readonly repetitionNumber: number;
605
+ readonly output: unknown;
606
+ readonly error?: string;
607
+ readonly startTime: Date;
608
+ readonly endTime: Date;
609
+ readonly traceId?: string | null;
610
+ }): Promise<void> {
611
+ try {
612
+ await client.POST("/v1/experiments/{experiment_id}/runs", {
613
+ params: {
614
+ path: {
615
+ experiment_id: experimentId,
616
+ },
617
+ },
618
+ body: {
619
+ dataset_example_id: example.id,
620
+ repetition_number: repetitionNumber,
621
+ output: output as Record<string, unknown>,
622
+ start_time: startTime.toISOString(),
623
+ end_time: endTime.toISOString(),
624
+ error: error ? ensureString(error) : undefined,
625
+ trace_id: traceId,
626
+ },
627
+ });
628
+ } catch (err: unknown) {
629
+ // Ignore 409 Conflict - result already exists (idempotency)
630
+ if (isHttpErrorWithStatus(err, 409)) {
631
+ return;
632
+ }
633
+ throw err; // Re-throw other errors
634
+ }
635
+ }
636
+
637
+ /**
638
+ * Run a single task and record the result with optional tracing.
639
+ */
640
+ async function runSingleTask({
641
+ client,
642
+ experimentId,
643
+ task,
644
+ example,
645
+ repetitionNumber,
646
+ tracer,
647
+ }: {
648
+ readonly client: PhoenixClient;
649
+ readonly experimentId: string;
650
+ readonly task: ExperimentTask;
651
+ readonly example: ExampleWithId;
652
+ readonly repetitionNumber: number;
653
+ readonly tracer: Tracer | null;
654
+ }): Promise<void> {
655
+ const startTime = new Date();
656
+
657
+ // If no tracer (no project_name), execute without tracing
658
+ if (!tracer) {
659
+ let output: unknown = null; // Initialize to null for failed tasks
660
+ let error: string | undefined;
661
+
662
+ try {
663
+ output = await Promise.resolve(task(example));
664
+ } catch (err) {
665
+ error = err instanceof Error ? err.message : String(err);
666
+ throw err;
667
+ } finally {
668
+ const endTime = new Date();
669
+ await recordTaskResult({
670
+ client,
671
+ experimentId,
672
+ example,
673
+ repetitionNumber,
674
+ output,
675
+ error,
676
+ startTime,
677
+ endTime,
678
+ });
679
+ }
680
+ return;
681
+ }
682
+
683
+ // With tracer: wrap execution in a span for observability
684
+ return tracer.startActiveSpan(
685
+ `Task: ${task.name || "anonymous"}`,
686
+ async (span) => {
687
+ // Set span attributes
688
+ span.setAttributes({
689
+ [SemanticConventions.OPENINFERENCE_SPAN_KIND]:
690
+ OpenInferenceSpanKind.CHAIN,
691
+ [SemanticConventions.INPUT_VALUE]: ensureString(example.input),
692
+ [SemanticConventions.INPUT_MIME_TYPE]: MimeType.JSON,
693
+ ...objectAsAttributes({
694
+ experiment_id: experimentId,
695
+ dataset_example_id: example.id,
696
+ repetition_number: repetitionNumber,
697
+ }),
698
+ });
699
+
700
+ let output: unknown = null; // Initialize to null for failed tasks
701
+ let error: string | undefined;
702
+
703
+ try {
704
+ // Execute the task (only once!)
705
+ output = await Promise.resolve(task(example));
706
+
707
+ // Set output attributes
708
+ span.setAttributes({
709
+ [SemanticConventions.OUTPUT_VALUE]: ensureString(output),
710
+ [SemanticConventions.OUTPUT_MIME_TYPE]: MimeType.JSON,
711
+ });
712
+ span.setStatus({ code: SpanStatusCode.OK });
713
+ } catch (err) {
714
+ error = err instanceof Error ? err.message : String(err);
715
+
716
+ span.setStatus({
717
+ code: SpanStatusCode.ERROR,
718
+ message: error,
719
+ });
720
+ span.recordException(err as Error);
721
+
722
+ throw err;
723
+ } finally {
724
+ const endTime = new Date();
725
+ span.end();
726
+
727
+ // Record result to API
728
+ await recordTaskResult({
729
+ client,
730
+ experimentId,
731
+ example,
732
+ repetitionNumber,
733
+ output,
734
+ error,
735
+ startTime,
736
+ endTime,
737
+ traceId: span.spanContext().traceId,
738
+ });
739
+ }
740
+ }
741
+ );
742
+ }