@arizeai/phoenix-client 5.2.1 → 5.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (151) hide show
  1. package/README.md +3 -3
  2. package/dist/esm/__generated__/api/v1.d.ts +321 -9
  3. package/dist/esm/__generated__/api/v1.d.ts.map +1 -1
  4. package/dist/esm/experiments/createExperiment.d.ts +39 -0
  5. package/dist/esm/experiments/createExperiment.d.ts.map +1 -0
  6. package/dist/esm/experiments/createExperiment.js +43 -0
  7. package/dist/esm/experiments/createExperiment.js.map +1 -0
  8. package/dist/esm/experiments/deleteExperiment.d.ts +36 -0
  9. package/dist/esm/experiments/deleteExperiment.d.ts.map +1 -0
  10. package/dist/esm/experiments/deleteExperiment.js +49 -0
  11. package/dist/esm/experiments/deleteExperiment.js.map +1 -0
  12. package/dist/esm/experiments/getExperimentInfo.d.ts.map +1 -1
  13. package/dist/esm/experiments/getExperimentInfo.js +9 -2
  14. package/dist/esm/experiments/getExperimentInfo.js.map +1 -1
  15. package/dist/esm/experiments/helpers/asExperimentEvaluator.d.ts +19 -0
  16. package/dist/esm/experiments/helpers/asExperimentEvaluator.d.ts.map +1 -0
  17. package/dist/esm/experiments/helpers/asExperimentEvaluator.js +19 -0
  18. package/dist/esm/experiments/helpers/asExperimentEvaluator.js.map +1 -0
  19. package/dist/esm/experiments/helpers/fromPhoenixLLMEvaluator.d.ts +9 -0
  20. package/dist/esm/experiments/helpers/fromPhoenixLLMEvaluator.d.ts.map +1 -0
  21. package/dist/esm/experiments/helpers/fromPhoenixLLMEvaluator.js +18 -0
  22. package/dist/esm/experiments/helpers/fromPhoenixLLMEvaluator.js.map +1 -0
  23. package/dist/esm/experiments/helpers/getExperimentEvaluators.d.ts +6 -0
  24. package/dist/esm/experiments/helpers/getExperimentEvaluators.d.ts.map +1 -0
  25. package/dist/esm/experiments/helpers/getExperimentEvaluators.js +58 -0
  26. package/dist/esm/experiments/helpers/getExperimentEvaluators.js.map +1 -0
  27. package/dist/esm/experiments/helpers/index.d.ts +4 -0
  28. package/dist/esm/experiments/helpers/index.d.ts.map +1 -0
  29. package/dist/esm/experiments/helpers/index.js +4 -0
  30. package/dist/esm/experiments/helpers/index.js.map +1 -0
  31. package/dist/esm/experiments/index.d.ts +6 -0
  32. package/dist/esm/experiments/index.d.ts.map +1 -1
  33. package/dist/esm/experiments/index.js +6 -0
  34. package/dist/esm/experiments/index.js.map +1 -1
  35. package/dist/esm/experiments/listExperiments.d.ts +29 -0
  36. package/dist/esm/experiments/listExperiments.d.ts.map +1 -0
  37. package/dist/esm/experiments/listExperiments.js +59 -0
  38. package/dist/esm/experiments/listExperiments.js.map +1 -0
  39. package/dist/esm/experiments/resumeEvaluation.d.ts +105 -0
  40. package/dist/esm/experiments/resumeEvaluation.d.ts.map +1 -0
  41. package/dist/esm/experiments/resumeEvaluation.js +559 -0
  42. package/dist/esm/experiments/resumeEvaluation.js.map +1 -0
  43. package/dist/esm/experiments/resumeExperiment.d.ts +102 -0
  44. package/dist/esm/experiments/resumeExperiment.d.ts.map +1 -0
  45. package/dist/esm/experiments/resumeExperiment.js +517 -0
  46. package/dist/esm/experiments/resumeExperiment.js.map +1 -0
  47. package/dist/esm/experiments/runExperiment.d.ts +4 -3
  48. package/dist/esm/experiments/runExperiment.d.ts.map +1 -1
  49. package/dist/esm/experiments/runExperiment.js +32 -3
  50. package/dist/esm/experiments/runExperiment.js.map +1 -1
  51. package/dist/esm/prompts/createPrompt.d.ts +19 -1
  52. package/dist/esm/prompts/createPrompt.d.ts.map +1 -1
  53. package/dist/esm/prompts/createPrompt.js +14 -1
  54. package/dist/esm/prompts/createPrompt.js.map +1 -1
  55. package/dist/esm/tsconfig.esm.tsbuildinfo +1 -1
  56. package/dist/esm/types/experiments.d.ts +66 -3
  57. package/dist/esm/types/experiments.d.ts.map +1 -1
  58. package/dist/esm/utils/channel.d.ts +229 -0
  59. package/dist/esm/utils/channel.d.ts.map +1 -0
  60. package/dist/esm/utils/channel.js +352 -0
  61. package/dist/esm/utils/channel.js.map +1 -0
  62. package/dist/esm/utils/formatPromptMessages.d.ts.map +1 -1
  63. package/dist/esm/utils/getPromptBySelector.d.ts.map +1 -1
  64. package/dist/esm/utils/isHttpError.d.ts +21 -0
  65. package/dist/esm/utils/isHttpError.d.ts.map +1 -0
  66. package/dist/esm/utils/isHttpError.js +33 -0
  67. package/dist/esm/utils/isHttpError.js.map +1 -0
  68. package/dist/src/__generated__/api/v1.d.ts +321 -9
  69. package/dist/src/__generated__/api/v1.d.ts.map +1 -1
  70. package/dist/src/experiments/createExperiment.d.ts +39 -0
  71. package/dist/src/experiments/createExperiment.d.ts.map +1 -0
  72. package/dist/src/experiments/createExperiment.js +43 -0
  73. package/dist/src/experiments/createExperiment.js.map +1 -0
  74. package/dist/src/experiments/deleteExperiment.d.ts +36 -0
  75. package/dist/src/experiments/deleteExperiment.d.ts.map +1 -0
  76. package/dist/src/experiments/deleteExperiment.js +52 -0
  77. package/dist/src/experiments/deleteExperiment.js.map +1 -0
  78. package/dist/src/experiments/getExperimentInfo.d.ts.map +1 -1
  79. package/dist/src/experiments/getExperimentInfo.js +9 -2
  80. package/dist/src/experiments/getExperimentInfo.js.map +1 -1
  81. package/dist/src/experiments/helpers/asExperimentEvaluator.d.ts +19 -0
  82. package/dist/src/experiments/helpers/asExperimentEvaluator.d.ts.map +1 -0
  83. package/dist/src/experiments/helpers/asExperimentEvaluator.js +22 -0
  84. package/dist/src/experiments/helpers/asExperimentEvaluator.js.map +1 -0
  85. package/dist/src/experiments/helpers/fromPhoenixLLMEvaluator.d.ts +9 -0
  86. package/dist/src/experiments/helpers/fromPhoenixLLMEvaluator.d.ts.map +1 -0
  87. package/dist/src/experiments/helpers/fromPhoenixLLMEvaluator.js +21 -0
  88. package/dist/src/experiments/helpers/fromPhoenixLLMEvaluator.js.map +1 -0
  89. package/dist/src/experiments/helpers/getExperimentEvaluators.d.ts +6 -0
  90. package/dist/src/experiments/helpers/getExperimentEvaluators.d.ts.map +1 -0
  91. package/dist/src/experiments/helpers/getExperimentEvaluators.js +61 -0
  92. package/dist/src/experiments/helpers/getExperimentEvaluators.js.map +1 -0
  93. package/dist/src/experiments/helpers/index.d.ts +4 -0
  94. package/dist/src/experiments/helpers/index.d.ts.map +1 -0
  95. package/dist/src/experiments/helpers/index.js +20 -0
  96. package/dist/src/experiments/helpers/index.js.map +1 -0
  97. package/dist/src/experiments/index.d.ts +6 -0
  98. package/dist/src/experiments/index.d.ts.map +1 -1
  99. package/dist/src/experiments/index.js +6 -0
  100. package/dist/src/experiments/index.js.map +1 -1
  101. package/dist/src/experiments/listExperiments.d.ts +29 -0
  102. package/dist/src/experiments/listExperiments.d.ts.map +1 -0
  103. package/dist/src/experiments/listExperiments.js +66 -0
  104. package/dist/src/experiments/listExperiments.js.map +1 -0
  105. package/dist/src/experiments/resumeEvaluation.d.ts +105 -0
  106. package/dist/src/experiments/resumeEvaluation.d.ts.map +1 -0
  107. package/dist/src/experiments/resumeEvaluation.js +585 -0
  108. package/dist/src/experiments/resumeEvaluation.js.map +1 -0
  109. package/dist/src/experiments/resumeExperiment.d.ts +102 -0
  110. package/dist/src/experiments/resumeExperiment.d.ts.map +1 -0
  111. package/dist/src/experiments/resumeExperiment.js +540 -0
  112. package/dist/src/experiments/resumeExperiment.js.map +1 -0
  113. package/dist/src/experiments/runExperiment.d.ts +4 -3
  114. package/dist/src/experiments/runExperiment.d.ts.map +1 -1
  115. package/dist/src/experiments/runExperiment.js +32 -3
  116. package/dist/src/experiments/runExperiment.js.map +1 -1
  117. package/dist/src/prompts/createPrompt.d.ts +19 -1
  118. package/dist/src/prompts/createPrompt.d.ts.map +1 -1
  119. package/dist/src/prompts/createPrompt.js +14 -1
  120. package/dist/src/prompts/createPrompt.js.map +1 -1
  121. package/dist/src/types/experiments.d.ts +66 -3
  122. package/dist/src/types/experiments.d.ts.map +1 -1
  123. package/dist/src/utils/channel.d.ts +229 -0
  124. package/dist/src/utils/channel.d.ts.map +1 -0
  125. package/dist/src/utils/channel.js +385 -0
  126. package/dist/src/utils/channel.js.map +1 -0
  127. package/dist/src/utils/formatPromptMessages.d.ts.map +1 -1
  128. package/dist/src/utils/getPromptBySelector.d.ts.map +1 -1
  129. package/dist/src/utils/isHttpError.d.ts +21 -0
  130. package/dist/src/utils/isHttpError.d.ts.map +1 -0
  131. package/dist/src/utils/isHttpError.js +37 -0
  132. package/dist/src/utils/isHttpError.js.map +1 -0
  133. package/dist/tsconfig.tsbuildinfo +1 -1
  134. package/package.json +6 -5
  135. package/src/__generated__/api/v1.ts +321 -9
  136. package/src/experiments/createExperiment.ts +90 -0
  137. package/src/experiments/deleteExperiment.ts +67 -0
  138. package/src/experiments/getExperimentInfo.ts +9 -2
  139. package/src/experiments/helpers/asExperimentEvaluator.ts +29 -0
  140. package/src/experiments/helpers/fromPhoenixLLMEvaluator.ts +24 -0
  141. package/src/experiments/helpers/getExperimentEvaluators.ts +74 -0
  142. package/src/experiments/helpers/index.ts +3 -0
  143. package/src/experiments/index.ts +6 -0
  144. package/src/experiments/listExperiments.ts +83 -0
  145. package/src/experiments/resumeEvaluation.ts +804 -0
  146. package/src/experiments/resumeExperiment.ts +745 -0
  147. package/src/experiments/runExperiment.ts +37 -5
  148. package/src/prompts/createPrompt.ts +19 -1
  149. package/src/types/experiments.ts +72 -3
  150. package/src/utils/channel.ts +397 -0
  151. package/src/utils/isHttpError.ts +45 -0
@@ -0,0 +1,745 @@
1
+ import {
2
+ MimeType,
3
+ OpenInferenceSpanKind,
4
+ SemanticConventions,
5
+ } from "@arizeai/openinference-semantic-conventions";
6
+ import {
7
+ type DiagLogLevel,
8
+ NodeTracerProvider,
9
+ objectAsAttributes,
10
+ register,
11
+ SpanStatusCode,
12
+ Tracer,
13
+ } from "@arizeai/phoenix-otel";
14
+
15
+ import { components } from "../__generated__/api/v1";
16
+ import { createClient, type PhoenixClient } from "../client";
17
+ import { ClientFn } from "../types/core";
18
+ import { ExampleWithId } from "../types/datasets";
19
+ import type {
20
+ ExperimentEvaluatorLike,
21
+ ExperimentTask,
22
+ } from "../types/experiments";
23
+ import { type Logger } from "../types/logger";
24
+ import { Channel, ChannelError } from "../utils/channel";
25
+ import { ensureString } from "../utils/ensureString";
26
+ import { isHttpErrorWithStatus } from "../utils/isHttpError";
27
+ import { toObjectHeaders } from "../utils/toObjectHeaders";
28
+ import { getDatasetExperimentsUrl, getExperimentUrl } from "../utils/urlUtils";
29
+
30
+ import { getExperimentInfo } from "./getExperimentInfo.js";
31
+ import { resumeEvaluation } from "./resumeEvaluation";
32
+
33
+ import invariant from "tiny-invariant";
34
+
35
+ /**
36
+ * Error thrown when task is aborted due to a failure in stopOnFirstError mode.
37
+ * This provides semantic context that the abort was intentional, not an infrastructure failure.
38
+ * @internal - Not exported to minimize API surface area
39
+ */
40
+ class TaskAbortedError extends Error {
41
+ constructor(message: string, cause?: Error) {
42
+ super(message);
43
+ this.name = "TaskAbortedError";
44
+ this.cause = cause;
45
+ }
46
+ }
47
+
48
+ /**
49
+ * Error thrown when the producer fails to fetch incomplete runs from the server.
50
+ * This is a critical error that should always be surfaced, even in stopOnFirstError=false mode.
51
+ * @internal - Not exported to minimize API surface area
52
+ */
53
+ class TaskFetchError extends Error {
54
+ constructor(message: string, cause?: Error) {
55
+ super(message);
56
+ this.name = "TaskFetchError";
57
+ this.cause = cause;
58
+ }
59
+ }
60
+
61
+ export type ResumeExperimentParams = ClientFn & {
62
+ /**
63
+ * The ID of the experiment to resume
64
+ */
65
+ readonly experimentId: string;
66
+ /**
67
+ * The task to run on incomplete examples
68
+ */
69
+ readonly task: ExperimentTask;
70
+ /**
71
+ * Optional evaluators to run on completed task runs
72
+ * @default undefined
73
+ */
74
+ readonly evaluators?: readonly ExperimentEvaluatorLike[];
75
+ /**
76
+ * The logger to use
77
+ * @default console
78
+ */
79
+ readonly logger?: Logger;
80
+ /**
81
+ * The number of concurrent task executions
82
+ * @default 5
83
+ */
84
+ readonly concurrency?: number;
85
+ /**
86
+ * Whether to set the global tracer provider when running the task.
87
+ * @default true
88
+ */
89
+ readonly setGlobalTracerProvider?: boolean;
90
+ /**
91
+ * Whether to use batch span processor for tracing.
92
+ * @default true
93
+ */
94
+ readonly useBatchSpanProcessor?: boolean;
95
+ /**
96
+ * Log level to set for the default DiagConsoleLogger when tracing.
97
+ */
98
+ readonly diagLogLevel?: DiagLogLevel;
99
+ /**
100
+ * Stop processing and exit as soon as any task fails.
101
+ * @default false
102
+ */
103
+ readonly stopOnFirstError?: boolean;
104
+ };
105
+
106
+ const DEFAULT_PAGE_SIZE = 50 as const;
107
+ /**
108
+ * Channel capacity multiplier for producer-consumer buffering.
109
+ * A value of 2 enables pipeline efficiency: workers process batch N while
110
+ * the producer fetches batch N+1, eliminating idle time without excessive
111
+ * memory usage. The channel blocks when full, providing natural backpressure.
112
+ */
113
+ const CHANNEL_CAPACITY_MULTIPLIER = 2 as const;
114
+
115
+ /**
116
+ * Task item for the producer-consumer channel
117
+ */
118
+ type TaskItem = {
119
+ readonly example: ExampleWithId;
120
+ readonly repetitionNumber: number;
121
+ };
122
+
123
+ /**
124
+ * Transforms API incomplete run response to ExampleWithId
125
+ */
126
+ function buildExampleFromApiResponse(
127
+ apiExample: components["schemas"]["DatasetExample"]
128
+ ): ExampleWithId {
129
+ return {
130
+ id: apiExample.id,
131
+ input: apiExample.input,
132
+ output: apiExample.output || null,
133
+ metadata: apiExample.metadata || {},
134
+ updatedAt: new Date(apiExample.updated_at),
135
+ };
136
+ }
137
+
138
+ /**
139
+ * Handles fetch errors with helpful version information for unsupported features
140
+ */
141
+ async function handleFetchError(
142
+ error: unknown,
143
+ client: PhoenixClient,
144
+ featureName: string
145
+ ): Promise<never> {
146
+ // Check if this is a JSON parse error (likely 404 HTML response from old server)
147
+ const isJsonError =
148
+ error instanceof SyntaxError &&
149
+ error.message.toLowerCase().includes("json");
150
+
151
+ if (isJsonError) {
152
+ // Fetch server version to provide helpful context
153
+ let versionInfo = "";
154
+ try {
155
+ const baseUrl = client.config.baseUrl || "";
156
+ const versionRes = await fetch(`${baseUrl}/arize_phoenix_version`);
157
+ if (versionRes.ok) {
158
+ const version = await versionRes.text();
159
+ versionInfo = ` Your current server version is ${version}.`;
160
+ }
161
+ } catch {
162
+ // Ignore errors fetching version
163
+ }
164
+
165
+ throw new Error(
166
+ `The ${featureName} feature is not available on this Phoenix server. ` +
167
+ "Please upgrade your Phoenix server to use this feature." +
168
+ versionInfo
169
+ );
170
+ }
171
+ throw error;
172
+ }
173
+
174
+ /**
175
+ * Sets up OpenTelemetry tracer for experiment tracing
176
+ */
177
+ function setupTracer({
178
+ projectName,
179
+ baseUrl,
180
+ headers,
181
+ useBatchSpanProcessor,
182
+ diagLogLevel,
183
+ setGlobalTracerProvider,
184
+ }: {
185
+ projectName: string | null;
186
+ baseUrl: string;
187
+ headers?: Record<string, string>;
188
+ useBatchSpanProcessor: boolean;
189
+ diagLogLevel?: DiagLogLevel;
190
+ setGlobalTracerProvider: boolean;
191
+ }): { provider: NodeTracerProvider; tracer: Tracer } | null {
192
+ if (!projectName) {
193
+ return null;
194
+ }
195
+
196
+ const provider = register({
197
+ projectName,
198
+ url: baseUrl,
199
+ headers,
200
+ batch: useBatchSpanProcessor,
201
+ diagLogLevel,
202
+ global: setGlobalTracerProvider,
203
+ });
204
+
205
+ const tracer = provider.getTracer(projectName);
206
+ return { provider, tracer };
207
+ }
208
+
209
+ /**
210
+ * Prints experiment summary to logger
211
+ */
212
+ function printExperimentSummary({
213
+ logger,
214
+ experimentId,
215
+ totalProcessed,
216
+ totalCompleted,
217
+ }: {
218
+ logger: Logger;
219
+ experimentId: string;
220
+ totalProcessed: number;
221
+ totalCompleted: number;
222
+ }): void {
223
+ logger.info("\n" + "=".repeat(70));
224
+ logger.info("📊 Experiment Resume Summary");
225
+ logger.info("=".repeat(70));
226
+ logger.info(`Experiment ID: ${experimentId}`);
227
+ logger.info(`Incomplete runs processed: ${totalProcessed}`);
228
+ logger.info(`Successfully completed: ${totalCompleted}`);
229
+ logger.info("=".repeat(70));
230
+ }
231
+
232
+ /**
233
+ * Resume an incomplete experiment by running only the missing or failed runs.
234
+ *
235
+ * This function identifies which (example, repetition) pairs have not been completed
236
+ * (either missing or failed) and re-runs the task only for those pairs. Optionally,
237
+ * evaluators can be run on the completed runs after task execution.
238
+ *
239
+ * The function processes incomplete runs in batches using pagination to minimize memory usage.
240
+ *
241
+ * @throws {Error} Throws different error types based on failure:
242
+ * - "TaskFetchError": Unable to fetch incomplete runs from the server.
243
+ * Always thrown regardless of stopOnFirstError, as it indicates critical infrastructure failure.
244
+ * - "TaskAbortedError": stopOnFirstError=true and a task failed.
245
+ * Original error preserved in `cause` property.
246
+ * - Generic Error: Other task execution errors or unexpected failures.
247
+ *
248
+ * @example
249
+ * ```ts
250
+ * import { resumeExperiment } from "@arizeai/phoenix-client/experiments";
251
+ *
252
+ * // Resume an interrupted experiment
253
+ * try {
254
+ * await resumeExperiment({
255
+ * experimentId: "exp_123",
256
+ * task: myTask,
257
+ * });
258
+ * } catch (error) {
259
+ * // Handle by error name (no instanceof needed)
260
+ * if (error.name === "TaskFetchError") {
261
+ * console.error("Failed to connect to server:", error.cause);
262
+ * } else if (error.name === "TaskAbortedError") {
263
+ * console.error("Task stopped due to error:", error.cause);
264
+ * } else {
265
+ * console.error("Unexpected error:", error);
266
+ * }
267
+ * }
268
+ *
269
+ * // Resume with evaluators
270
+ * await resumeExperiment({
271
+ * experimentId: "exp_123",
272
+ * task: myTask,
273
+ * evaluators: [correctnessEvaluator, relevanceEvaluator],
274
+ * });
275
+ *
276
+ * // Stop on first error (useful for debugging)
277
+ * await resumeExperiment({
278
+ * experimentId: "exp_123",
279
+ * task: myTask,
280
+ * stopOnFirstError: true, // Exit immediately on first task failure
281
+ * });
282
+ * ```
283
+ */
284
+ export async function resumeExperiment({
285
+ client: _client,
286
+ experimentId,
287
+ task,
288
+ evaluators,
289
+ logger = console,
290
+ concurrency = 5,
291
+ setGlobalTracerProvider = true,
292
+ useBatchSpanProcessor = true,
293
+ diagLogLevel,
294
+ stopOnFirstError = false,
295
+ }: ResumeExperimentParams): Promise<void> {
296
+ const client = _client ?? createClient();
297
+ const pageSize = DEFAULT_PAGE_SIZE;
298
+
299
+ // Get experiment info
300
+ logger.info(`🔍 Fetching experiment info...`);
301
+ const experiment = await getExperimentInfo({ client, experimentId });
302
+
303
+ // Check if there are incomplete runs
304
+ const totalExpected = experiment.exampleCount * experiment.repetitions;
305
+ const incompleteCount = totalExpected - experiment.successfulRunCount;
306
+
307
+ if (incompleteCount === 0) {
308
+ logger.info("✅ No incomplete runs found. Experiment is already complete.");
309
+ return;
310
+ }
311
+
312
+ logger.info(
313
+ `🧪 Resuming experiment with ${incompleteCount} incomplete runs...`
314
+ );
315
+
316
+ // Get base URL for tracing and URL generation
317
+ const baseUrl = client.config.baseUrl;
318
+ invariant(
319
+ baseUrl,
320
+ "Phoenix base URL not found. Please set PHOENIX_HOST or set baseUrl on the client."
321
+ );
322
+
323
+ // Initialize tracer (only if experiment has a project_name)
324
+ const tracerSetup = setupTracer({
325
+ projectName: experiment.projectName,
326
+ baseUrl,
327
+ headers: client.config.headers
328
+ ? toObjectHeaders(client.config.headers)
329
+ : undefined,
330
+ useBatchSpanProcessor,
331
+ diagLogLevel,
332
+ setGlobalTracerProvider,
333
+ });
334
+
335
+ const provider = tracerSetup?.provider ?? null;
336
+ const taskTracer = tracerSetup?.tracer ?? null;
337
+
338
+ // Display URLs
339
+ const datasetExperimentsUrl = getDatasetExperimentsUrl({
340
+ baseUrl,
341
+ datasetId: experiment.datasetId,
342
+ });
343
+ const experimentUrl = getExperimentUrl({
344
+ baseUrl,
345
+ datasetId: experiment.datasetId,
346
+ experimentId: experiment.id,
347
+ });
348
+
349
+ logger.info(`📺 View dataset experiments: ${datasetExperimentsUrl}`);
350
+ logger.info(`🔗 View this experiment: ${experimentUrl}`);
351
+
352
+ // Create a CSP-style bounded buffer for task distribution
353
+ const taskChannel = new Channel<TaskItem>(
354
+ pageSize * CHANNEL_CAPACITY_MULTIPLIER
355
+ );
356
+
357
+ // Abort controller for stopOnFirstError coordination
358
+ const abortController = new AbortController();
359
+ const { signal } = abortController;
360
+
361
+ let totalProcessed = 0;
362
+ let totalCompleted = 0;
363
+ let totalFailed = 0;
364
+
365
+ // Producer: Fetch incomplete runs and send to channel
366
+ async function fetchIncompleteRuns(): Promise<void> {
367
+ let cursor: string | null = null;
368
+
369
+ try {
370
+ do {
371
+ // Stop fetching if abort signal received
372
+ if (signal.aborted) {
373
+ logger.info("🛑 Stopping fetch due to error in task");
374
+ break;
375
+ }
376
+
377
+ let res: {
378
+ data?: components["schemas"]["GetIncompleteExperimentRunsResponseBody"];
379
+ };
380
+
381
+ try {
382
+ res = await client.GET(
383
+ "/v1/experiments/{experiment_id}/incomplete-runs",
384
+ {
385
+ params: {
386
+ path: {
387
+ experiment_id: experimentId,
388
+ },
389
+ query: {
390
+ cursor,
391
+ limit: pageSize,
392
+ },
393
+ },
394
+ }
395
+ );
396
+ } catch (error: unknown) {
397
+ // Check for version compatibility issues and throw helpful error
398
+ try {
399
+ await handleFetchError(error, client, "resume_experiment");
400
+ // TypeScript: handleFetchError never returns, but add throw for safety
401
+ throw new Error("handleFetchError should never return");
402
+ } catch (handledError) {
403
+ // Wrap the error (from handleFetchError or original) in semantic error type
404
+ throw new TaskFetchError(
405
+ "Failed to fetch incomplete runs from server",
406
+ handledError instanceof Error ? handledError : undefined
407
+ );
408
+ }
409
+ }
410
+
411
+ cursor = res.data?.next_cursor ?? null;
412
+ const batchIncomplete = res.data?.data;
413
+ invariant(batchIncomplete, "Failed to fetch incomplete runs");
414
+
415
+ if (batchIncomplete.length === 0) {
416
+ break;
417
+ }
418
+
419
+ // Send tasks to channel (blocks if channel is full - natural backpressure!)
420
+ let batchCount = 0;
421
+ for (const incomplete of batchIncomplete) {
422
+ // Stop sending items if abort signal received
423
+ if (signal.aborted) {
424
+ break;
425
+ }
426
+
427
+ const example = buildExampleFromApiResponse(
428
+ incomplete.dataset_example
429
+ );
430
+ for (const repNum of incomplete.repetition_numbers) {
431
+ // Stop sending items if abort signal received
432
+ if (signal.aborted) {
433
+ break;
434
+ }
435
+
436
+ await taskChannel.send({ example, repetitionNumber: repNum });
437
+ batchCount++;
438
+ totalProcessed++;
439
+ }
440
+ }
441
+
442
+ logger.info(
443
+ `Fetched batch of ${batchCount} incomplete runs (channel buffer: ${taskChannel.length})`
444
+ );
445
+ } while (cursor !== null && !signal.aborted);
446
+ } catch (error) {
447
+ // Re-throw with context preservation
448
+ if (error instanceof TaskFetchError) {
449
+ throw error;
450
+ }
451
+ // ChannelError from blocked send() should bubble up naturally
452
+ // (happens when channel closes while producer is blocked)
453
+ if (error instanceof ChannelError) {
454
+ throw error;
455
+ }
456
+ // Wrap any unexpected errors from channel operations
457
+ throw new TaskFetchError(
458
+ "Unexpected error during task fetch",
459
+ error instanceof Error ? error : undefined
460
+ );
461
+ } finally {
462
+ taskChannel.close(); // Signal workers we're done
463
+ }
464
+ }
465
+
466
+ // Worker: Process tasks from channel
467
+ async function processTasksFromChannel(): Promise<void> {
468
+ for await (const item of taskChannel) {
469
+ // Stop processing if abort signal received
470
+ if (signal.aborted) {
471
+ break;
472
+ }
473
+
474
+ try {
475
+ await runSingleTask({
476
+ client,
477
+ experimentId,
478
+ task,
479
+ example: item.example,
480
+ repetitionNumber: item.repetitionNumber,
481
+ tracer: taskTracer,
482
+ });
483
+ totalCompleted++;
484
+ } catch (error) {
485
+ totalFailed++;
486
+ logger.error(
487
+ `Failed to run task for example ${item.example.id}, repetition ${item.repetitionNumber}: ${error}`
488
+ );
489
+
490
+ // If stopOnFirstError is enabled, abort and re-throw
491
+ if (stopOnFirstError) {
492
+ logger.error("🛑 Stopping on first error");
493
+ abortController.abort();
494
+ throw error;
495
+ }
496
+ }
497
+ }
498
+ }
499
+
500
+ // Start concurrent execution
501
+ // Wrap in try-finally to ensure channel is always closed, even if Promise.all throws
502
+ let executionError: Error | null = null;
503
+ try {
504
+ const producerTask = fetchIncompleteRuns();
505
+ const workerTasks = Array.from({ length: concurrency }, () =>
506
+ processTasksFromChannel()
507
+ );
508
+
509
+ // Wait for producer and all workers to finish
510
+ await Promise.all([producerTask, ...workerTasks]);
511
+ } catch (error) {
512
+ // Classify and handle errors based on their nature
513
+ const err = error instanceof Error ? error : new Error(String(error));
514
+
515
+ // Always surface producer/infrastructure errors
516
+ if (error instanceof TaskFetchError) {
517
+ // Producer failed - this is ALWAYS critical regardless of stopOnFirstError
518
+ logger.error(`❌ Critical: Failed to fetch incomplete runs from server`);
519
+ executionError = err;
520
+ } else if (error instanceof ChannelError && signal.aborted) {
521
+ // Channel closed due to intentional abort - wrap in semantic error
522
+ executionError = new TaskAbortedError(
523
+ "Task execution stopped due to error in concurrent worker",
524
+ err
525
+ );
526
+ } else if (stopOnFirstError) {
527
+ // Worker error in stopOnFirstError mode - already logged by worker
528
+ executionError = err;
529
+ } else {
530
+ // Unexpected error (not from worker, not from producer fetch)
531
+ // This could be a bug in our code or infrastructure failure
532
+ logger.error(`❌ Unexpected error during task execution: ${err.message}`);
533
+ executionError = err;
534
+ }
535
+ } finally {
536
+ // Ensure channel is closed even if there are unexpected errors
537
+ // This is a safety net in case producer's finally block didn't execute
538
+ if (!taskChannel.isClosed) {
539
+ taskChannel.close();
540
+ }
541
+ }
542
+
543
+ // Only show completion message if we didn't stop on error
544
+ if (!executionError) {
545
+ logger.info(`✅ Task runs completed.`);
546
+ }
547
+
548
+ if (totalFailed > 0 && !executionError) {
549
+ logger.info(
550
+ `⚠️ Warning: ${totalFailed} out of ${totalProcessed} runs failed.`
551
+ );
552
+ }
553
+
554
+ // Run evaluators if provided (only on runs missing evaluations)
555
+ // Skip evaluators if we stopped on error
556
+ if (evaluators && evaluators.length > 0 && !executionError) {
557
+ logger.info(`\n🔬 Running evaluators...`);
558
+ await resumeEvaluation({
559
+ experimentId,
560
+ evaluators: [...evaluators],
561
+ client,
562
+ logger,
563
+ concurrency,
564
+ setGlobalTracerProvider,
565
+ useBatchSpanProcessor,
566
+ diagLogLevel,
567
+ stopOnFirstError,
568
+ });
569
+ }
570
+
571
+ // Print summary
572
+ printExperimentSummary({
573
+ logger,
574
+ experimentId: experiment.id,
575
+ totalProcessed,
576
+ totalCompleted,
577
+ });
578
+
579
+ // Flush spans (if tracer was initialized)
580
+ if (provider) {
581
+ await provider.forceFlush();
582
+ }
583
+
584
+ // Re-throw error if stopOnFirstError was triggered
585
+ if (executionError) {
586
+ throw executionError;
587
+ }
588
+ }
589
+
590
+ /**
591
+ * Record task result to API (without executing the task).
592
+ */
593
+ async function recordTaskResult({
594
+ client,
595
+ experimentId,
596
+ example,
597
+ repetitionNumber,
598
+ output,
599
+ error,
600
+ startTime,
601
+ endTime,
602
+ traceId = null,
603
+ }: {
604
+ readonly client: PhoenixClient;
605
+ readonly experimentId: string;
606
+ readonly example: ExampleWithId;
607
+ readonly repetitionNumber: number;
608
+ readonly output: unknown;
609
+ readonly error?: string;
610
+ readonly startTime: Date;
611
+ readonly endTime: Date;
612
+ readonly traceId?: string | null;
613
+ }): Promise<void> {
614
+ try {
615
+ await client.POST("/v1/experiments/{experiment_id}/runs", {
616
+ params: {
617
+ path: {
618
+ experiment_id: experimentId,
619
+ },
620
+ },
621
+ body: {
622
+ dataset_example_id: example.id,
623
+ repetition_number: repetitionNumber,
624
+ output: output as Record<string, unknown>,
625
+ start_time: startTime.toISOString(),
626
+ end_time: endTime.toISOString(),
627
+ error: error ? ensureString(error) : undefined,
628
+ trace_id: traceId,
629
+ },
630
+ });
631
+ } catch (err: unknown) {
632
+ // Ignore 409 Conflict - result already exists (idempotency)
633
+ if (isHttpErrorWithStatus(err, 409)) {
634
+ return;
635
+ }
636
+ throw err; // Re-throw other errors
637
+ }
638
+ }
639
+
640
+ /**
641
+ * Run a single task and record the result with optional tracing.
642
+ */
643
+ async function runSingleTask({
644
+ client,
645
+ experimentId,
646
+ task,
647
+ example,
648
+ repetitionNumber,
649
+ tracer,
650
+ }: {
651
+ readonly client: PhoenixClient;
652
+ readonly experimentId: string;
653
+ readonly task: ExperimentTask;
654
+ readonly example: ExampleWithId;
655
+ readonly repetitionNumber: number;
656
+ readonly tracer: Tracer | null;
657
+ }): Promise<void> {
658
+ const startTime = new Date();
659
+
660
+ // If no tracer (no project_name), execute without tracing
661
+ if (!tracer) {
662
+ let output: unknown = null; // Initialize to null for failed tasks
663
+ let error: string | undefined;
664
+
665
+ try {
666
+ output = await Promise.resolve(task(example));
667
+ } catch (err) {
668
+ error = err instanceof Error ? err.message : String(err);
669
+ throw err;
670
+ } finally {
671
+ const endTime = new Date();
672
+ await recordTaskResult({
673
+ client,
674
+ experimentId,
675
+ example,
676
+ repetitionNumber,
677
+ output,
678
+ error,
679
+ startTime,
680
+ endTime,
681
+ });
682
+ }
683
+ return;
684
+ }
685
+
686
+ // With tracer: wrap execution in a span for observability
687
+ return tracer.startActiveSpan(
688
+ `Task: ${task.name || "anonymous"}`,
689
+ async (span) => {
690
+ // Set span attributes
691
+ span.setAttributes({
692
+ [SemanticConventions.OPENINFERENCE_SPAN_KIND]:
693
+ OpenInferenceSpanKind.CHAIN,
694
+ [SemanticConventions.INPUT_VALUE]: ensureString(example.input),
695
+ [SemanticConventions.INPUT_MIME_TYPE]: MimeType.JSON,
696
+ ...objectAsAttributes({
697
+ experiment_id: experimentId,
698
+ dataset_example_id: example.id,
699
+ repetition_number: repetitionNumber,
700
+ }),
701
+ });
702
+
703
+ let output: unknown = null; // Initialize to null for failed tasks
704
+ let error: string | undefined;
705
+
706
+ try {
707
+ // Execute the task (only once!)
708
+ output = await Promise.resolve(task(example));
709
+
710
+ // Set output attributes
711
+ span.setAttributes({
712
+ [SemanticConventions.OUTPUT_VALUE]: ensureString(output),
713
+ [SemanticConventions.OUTPUT_MIME_TYPE]: MimeType.JSON,
714
+ });
715
+ span.setStatus({ code: SpanStatusCode.OK });
716
+ } catch (err) {
717
+ error = err instanceof Error ? err.message : String(err);
718
+
719
+ span.setStatus({
720
+ code: SpanStatusCode.ERROR,
721
+ message: error,
722
+ });
723
+ span.recordException(err as Error);
724
+
725
+ throw err;
726
+ } finally {
727
+ const endTime = new Date();
728
+ span.end();
729
+
730
+ // Record result to API
731
+ await recordTaskResult({
732
+ client,
733
+ experimentId,
734
+ example,
735
+ repetitionNumber,
736
+ output,
737
+ error,
738
+ startTime,
739
+ endTime,
740
+ traceId: span.spanContext().traceId,
741
+ });
742
+ }
743
+ }
744
+ );
745
+ }