@arizeai/phoenix-client 6.5.3 → 6.5.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. package/README.md +45 -0
  2. package/dist/esm/__generated__/api/v1.d.ts +244 -0
  3. package/dist/esm/__generated__/api/v1.d.ts.map +1 -1
  4. package/dist/esm/experiments/resumeEvaluation.d.ts.map +1 -1
  5. package/dist/esm/experiments/resumeEvaluation.js +179 -170
  6. package/dist/esm/experiments/resumeEvaluation.js.map +1 -1
  7. package/dist/esm/experiments/resumeExperiment.d.ts.map +1 -1
  8. package/dist/esm/experiments/resumeExperiment.js +201 -185
  9. package/dist/esm/experiments/resumeExperiment.js.map +1 -1
  10. package/dist/esm/experiments/runExperiment.d.ts.map +1 -1
  11. package/dist/esm/experiments/runExperiment.js +238 -207
  12. package/dist/esm/experiments/runExperiment.js.map +1 -1
  13. package/dist/esm/experiments/tracing.d.ts +10 -0
  14. package/dist/esm/experiments/tracing.d.ts.map +1 -0
  15. package/dist/esm/experiments/tracing.js +21 -0
  16. package/dist/esm/experiments/tracing.js.map +1 -0
  17. package/dist/esm/prompts/sdks/toSDK.d.ts +2 -2
  18. package/dist/esm/tsconfig.esm.tsbuildinfo +1 -1
  19. package/dist/esm/utils/formatPromptMessages.d.ts.map +1 -1
  20. package/dist/esm/utils/getPromptBySelector.d.ts.map +1 -1
  21. package/dist/src/__generated__/api/v1.d.ts +244 -0
  22. package/dist/src/__generated__/api/v1.d.ts.map +1 -1
  23. package/dist/src/experiments/resumeEvaluation.d.ts.map +1 -1
  24. package/dist/src/experiments/resumeEvaluation.js +192 -183
  25. package/dist/src/experiments/resumeEvaluation.js.map +1 -1
  26. package/dist/src/experiments/resumeExperiment.d.ts.map +1 -1
  27. package/dist/src/experiments/resumeExperiment.js +214 -198
  28. package/dist/src/experiments/resumeExperiment.js.map +1 -1
  29. package/dist/src/experiments/runExperiment.d.ts.map +1 -1
  30. package/dist/src/experiments/runExperiment.js +228 -197
  31. package/dist/src/experiments/runExperiment.js.map +1 -1
  32. package/dist/src/experiments/tracing.d.ts +10 -0
  33. package/dist/src/experiments/tracing.d.ts.map +1 -0
  34. package/dist/src/experiments/tracing.js +24 -0
  35. package/dist/src/experiments/tracing.js.map +1 -0
  36. package/dist/src/utils/formatPromptMessages.d.ts.map +1 -1
  37. package/dist/src/utils/getPromptBySelector.d.ts.map +1 -1
  38. package/dist/tsconfig.tsbuildinfo +1 -1
  39. package/docs/annotations.mdx +83 -0
  40. package/docs/datasets.mdx +77 -0
  41. package/docs/document-annotations.mdx +208 -0
  42. package/docs/experiments.mdx +271 -0
  43. package/docs/overview.mdx +176 -0
  44. package/docs/prompts.mdx +73 -0
  45. package/docs/session-annotations.mdx +158 -0
  46. package/docs/sessions.mdx +87 -0
  47. package/docs/span-annotations.mdx +283 -0
  48. package/docs/spans.mdx +76 -0
  49. package/docs/traces.mdx +63 -0
  50. package/package.json +11 -5
  51. package/src/__generated__/api/v1.ts +244 -0
  52. package/src/experiments/resumeEvaluation.ts +224 -206
  53. package/src/experiments/resumeExperiment.ts +237 -213
  54. package/src/experiments/runExperiment.ts +281 -243
  55. package/src/experiments/tracing.ts +30 -0
@@ -3,8 +3,13 @@ import {
3
3
  OpenInferenceSpanKind,
4
4
  SemanticConventions,
5
5
  } from "@arizeai/openinference-semantic-conventions";
6
- import type { NodeTracerProvider, Tracer } from "@arizeai/phoenix-otel";
6
+ import type {
7
+ GlobalTracerProviderRegistration,
8
+ NodeTracerProvider,
9
+ Tracer,
10
+ } from "@arizeai/phoenix-otel";
7
11
  import {
12
+ attachGlobalTracerProvider,
8
13
  type DiagLogLevel,
9
14
  objectAsAttributes,
10
15
  register,
@@ -33,6 +38,7 @@ import {
33
38
  PROGRESS_PREFIX,
34
39
  } from "./logging";
35
40
  import { resumeEvaluation } from "./resumeEvaluation";
41
+ import { cleanupOwnedTracerProvider } from "./tracing";
36
42
 
37
43
  /**
38
44
  * Error thrown when task is aborted due to a failure in stopOnFirstError mode.
@@ -190,7 +196,11 @@ function setupTracer({
190
196
  useBatchSpanProcessor: boolean;
191
197
  diagLogLevel?: DiagLogLevel;
192
198
  setGlobalTracerProvider: boolean;
193
- }): { provider: NodeTracerProvider; tracer: Tracer } | null {
199
+ }): {
200
+ provider: NodeTracerProvider;
201
+ tracer: Tracer;
202
+ globalRegistration: GlobalTracerProviderRegistration | null;
203
+ } | null {
194
204
  if (!projectName) {
195
205
  return null;
196
206
  }
@@ -201,11 +211,14 @@ function setupTracer({
201
211
  headers,
202
212
  batch: useBatchSpanProcessor,
203
213
  diagLogLevel,
204
- global: setGlobalTracerProvider,
214
+ global: false,
205
215
  });
216
+ const globalRegistration = setGlobalTracerProvider
217
+ ? attachGlobalTracerProvider(provider)
218
+ : null;
206
219
 
207
220
  const tracer = provider.getTracer(projectName);
208
- return { provider, tracer };
221
+ return { provider, tracer, globalRegistration };
209
222
  }
210
223
 
211
224
  /**
@@ -313,256 +326,267 @@ export async function resumeExperiment({
313
326
  setGlobalTracerProvider,
314
327
  });
315
328
 
316
- const provider = tracerSetup?.provider ?? null;
329
+ let provider = tracerSetup?.provider ?? null;
330
+ let globalRegistration = tracerSetup?.globalRegistration ?? null;
317
331
  const taskTracer = tracerSetup?.tracer ?? null;
318
332
 
319
- // Display URLs
320
- const datasetExperimentsUrl = getDatasetExperimentsUrl({
321
- baseUrl,
322
- datasetId: experiment.datasetId,
323
- });
324
- const experimentUrl = getExperimentUrl({
325
- baseUrl,
326
- datasetId: experiment.datasetId,
327
- experimentId: experiment.id,
328
- });
333
+ try {
334
+ // Display URLs
335
+ const datasetExperimentsUrl = getDatasetExperimentsUrl({
336
+ baseUrl,
337
+ datasetId: experiment.datasetId,
338
+ });
339
+ const experimentUrl = getExperimentUrl({
340
+ baseUrl,
341
+ datasetId: experiment.datasetId,
342
+ experimentId: experiment.id,
343
+ });
329
344
 
330
- // Create a CSP-style bounded buffer for task distribution
331
- const taskChannel = new Channel<TaskItem>(
332
- pageSize * CHANNEL_CAPACITY_MULTIPLIER
333
- );
345
+ // Create a CSP-style bounded buffer for task distribution
346
+ const taskChannel = new Channel<TaskItem>(
347
+ pageSize * CHANNEL_CAPACITY_MULTIPLIER
348
+ );
334
349
 
335
- // Abort controller for stopOnFirstError coordination
336
- const abortController = new AbortController();
337
- const { signal } = abortController;
350
+ // Abort controller for stopOnFirstError coordination
351
+ const abortController = new AbortController();
352
+ const { signal } = abortController;
338
353
 
339
- let totalProcessed = 0;
340
- let totalCompleted = 0;
341
- let totalFailed = 0;
354
+ let totalProcessed = 0;
355
+ let totalCompleted = 0;
356
+ let totalFailed = 0;
342
357
 
343
- // Producer: Fetch incomplete runs and send to channel
344
- async function fetchIncompleteRuns(): Promise<void> {
345
- let cursor: string | null = null;
358
+ // Producer: Fetch incomplete runs and send to channel
359
+ async function fetchIncompleteRuns(): Promise<void> {
360
+ let cursor: string | null = null;
346
361
 
347
- try {
348
- do {
349
- // Stop fetching if abort signal received
350
- if (signal.aborted) {
351
- logger.debug(`${PROGRESS_PREFIX.progress}Stopping fetch.`);
352
- break;
353
- }
362
+ try {
363
+ do {
364
+ // Stop fetching if abort signal received
365
+ if (signal.aborted) {
366
+ logger.debug(`${PROGRESS_PREFIX.progress}Stopping fetch.`);
367
+ break;
368
+ }
354
369
 
355
- let res: {
356
- data?: components["schemas"]["GetIncompleteExperimentRunsResponseBody"];
357
- };
370
+ let res: {
371
+ data?: components["schemas"]["GetIncompleteExperimentRunsResponseBody"];
372
+ };
358
373
 
359
- try {
360
- res = await client.GET(
361
- "/v1/experiments/{experiment_id}/incomplete-runs",
362
- {
363
- params: {
364
- path: {
365
- experiment_id: experimentId,
366
- },
367
- query: {
368
- cursor,
369
- limit: pageSize,
370
- },
371
- },
372
- }
373
- );
374
- } catch (error: unknown) {
375
- // Check for version compatibility issues and throw helpful error
376
374
  try {
377
- await handleFetchError(error, client, "resume_experiment");
378
- // TypeScript: handleFetchError never returns, but add throw for safety
379
- throw new Error("handleFetchError should never return");
380
- } catch (handledError) {
381
- // Wrap the error (from handleFetchError or original) in semantic error type
382
- throw new TaskFetchError(
383
- "Failed to fetch incomplete runs from server",
384
- handledError instanceof Error ? handledError : undefined
375
+ res = await client.GET(
376
+ "/v1/experiments/{experiment_id}/incomplete-runs",
377
+ {
378
+ params: {
379
+ path: {
380
+ experiment_id: experimentId,
381
+ },
382
+ query: {
383
+ cursor,
384
+ limit: pageSize,
385
+ },
386
+ },
387
+ }
385
388
  );
389
+ } catch (error: unknown) {
390
+ // Check for version compatibility issues and throw helpful error
391
+ try {
392
+ await handleFetchError(error, client, "resume_experiment");
393
+ // TypeScript: handleFetchError never returns, but add throw for safety
394
+ throw new Error("handleFetchError should never return");
395
+ } catch (handledError) {
396
+ // Wrap the error (from handleFetchError or original) in semantic error type
397
+ throw new TaskFetchError(
398
+ "Failed to fetch incomplete runs from server",
399
+ handledError instanceof Error ? handledError : undefined
400
+ );
401
+ }
386
402
  }
387
- }
388
403
 
389
- cursor = res.data?.next_cursor ?? null;
390
- const batchIncomplete = res.data?.data;
391
- invariant(batchIncomplete, "Failed to fetch incomplete runs");
392
-
393
- if (batchIncomplete.length === 0) {
394
- break;
395
- }
404
+ cursor = res.data?.next_cursor ?? null;
405
+ const batchIncomplete = res.data?.data;
406
+ invariant(batchIncomplete, "Failed to fetch incomplete runs");
396
407
 
397
- // Send tasks to channel (blocks if channel is full - natural backpressure!)
398
- let batchCount = 0;
399
- for (const incomplete of batchIncomplete) {
400
- // Stop sending items if abort signal received
401
- if (signal.aborted) {
408
+ if (batchIncomplete.length === 0) {
402
409
  break;
403
410
  }
404
411
 
405
- const example = buildExampleFromApiResponse(
406
- incomplete.dataset_example
407
- );
408
- for (const repNum of incomplete.repetition_numbers) {
412
+ // Send tasks to channel (blocks if channel is full - natural backpressure!)
413
+ let batchCount = 0;
414
+ for (const incomplete of batchIncomplete) {
409
415
  // Stop sending items if abort signal received
410
416
  if (signal.aborted) {
411
417
  break;
412
418
  }
413
419
 
414
- await taskChannel.send({ example, repetitionNumber: repNum });
415
- batchCount++;
416
- totalProcessed++;
420
+ const example = buildExampleFromApiResponse(
421
+ incomplete.dataset_example
422
+ );
423
+ for (const repNum of incomplete.repetition_numbers) {
424
+ // Stop sending items if abort signal received
425
+ if (signal.aborted) {
426
+ break;
427
+ }
428
+
429
+ await taskChannel.send({ example, repetitionNumber: repNum });
430
+ batchCount++;
431
+ totalProcessed++;
432
+ }
417
433
  }
418
- }
419
434
 
420
- logger.debug(
421
- `${PROGRESS_PREFIX.progress}Fetched batch of ${batchCount} incomplete runs.`
435
+ logger.debug(
436
+ `${PROGRESS_PREFIX.progress}Fetched batch of ${batchCount} incomplete runs.`
437
+ );
438
+ } while (cursor !== null && !signal.aborted);
439
+ } catch (error) {
440
+ // Re-throw with context preservation
441
+ if (error instanceof TaskFetchError) {
442
+ throw error;
443
+ }
444
+ // ChannelError from blocked send() should bubble up naturally
445
+ // (happens when channel closes while producer is blocked)
446
+ if (error instanceof ChannelError) {
447
+ throw error;
448
+ }
449
+ // Wrap any unexpected errors from channel operations
450
+ throw new TaskFetchError(
451
+ "Unexpected error during task fetch",
452
+ error instanceof Error ? error : undefined
422
453
  );
423
- } while (cursor !== null && !signal.aborted);
424
- } catch (error) {
425
- // Re-throw with context preservation
426
- if (error instanceof TaskFetchError) {
427
- throw error;
428
- }
429
- // ChannelError from blocked send() should bubble up naturally
430
- // (happens when channel closes while producer is blocked)
431
- if (error instanceof ChannelError) {
432
- throw error;
454
+ } finally {
455
+ taskChannel.close(); // Signal workers we're done
433
456
  }
434
- // Wrap any unexpected errors from channel operations
435
- throw new TaskFetchError(
436
- "Unexpected error during task fetch",
437
- error instanceof Error ? error : undefined
438
- );
439
- } finally {
440
- taskChannel.close(); // Signal workers we're done
441
457
  }
442
- }
443
458
 
444
- // Worker: Process tasks from channel
445
- async function processTasksFromChannel(): Promise<void> {
446
- for await (const item of taskChannel) {
447
- // Stop processing if abort signal received
448
- if (signal.aborted) {
449
- break;
450
- }
459
+ // Worker: Process tasks from channel
460
+ async function processTasksFromChannel(): Promise<void> {
461
+ for await (const item of taskChannel) {
462
+ // Stop processing if abort signal received
463
+ if (signal.aborted) {
464
+ break;
465
+ }
451
466
 
452
- try {
453
- await runSingleTask({
454
- client,
455
- experimentId,
456
- task,
457
- example: item.example,
458
- repetitionNumber: item.repetitionNumber,
459
- tracer: taskTracer,
460
- });
461
- totalCompleted++;
462
- } catch (error) {
463
- totalFailed++;
464
- logger.error(
465
- `Failed to run task for example ${item.example.id}, repetition ${item.repetitionNumber}: ${error}`
466
- );
467
+ try {
468
+ await runSingleTask({
469
+ client,
470
+ experimentId,
471
+ task,
472
+ example: item.example,
473
+ repetitionNumber: item.repetitionNumber,
474
+ tracer: taskTracer,
475
+ });
476
+ totalCompleted++;
477
+ } catch (error) {
478
+ totalFailed++;
479
+ logger.error(
480
+ `Failed to run task for example ${item.example.id}, repetition ${item.repetitionNumber}: ${error}`
481
+ );
467
482
 
468
- // If stopOnFirstError is enabled, abort and re-throw
469
- if (stopOnFirstError) {
470
- logger.warn("Stopping on first error");
471
- abortController.abort();
472
- throw error;
483
+ // If stopOnFirstError is enabled, abort and re-throw
484
+ if (stopOnFirstError) {
485
+ logger.warn("Stopping on first error");
486
+ abortController.abort();
487
+ throw error;
488
+ }
473
489
  }
474
490
  }
475
491
  }
476
- }
477
-
478
- // Start concurrent execution
479
- // Wrap in try-finally to ensure channel is always closed, even if Promise.all throws
480
- let executionError: Error | null = null;
481
- try {
482
- const producerTask = fetchIncompleteRuns();
483
- const workerTasks = Array.from({ length: concurrency }, () =>
484
- processTasksFromChannel()
485
- );
486
492
 
487
- // Wait for producer and all workers to finish
488
- await Promise.all([producerTask, ...workerTasks]);
489
- } catch (error) {
490
- // Classify and handle errors based on their nature
491
- const err = error instanceof Error ? error : new Error(String(error));
492
-
493
- // Always surface producer/infrastructure errors
494
- if (error instanceof TaskFetchError) {
495
- // Producer failed - this is ALWAYS critical regardless of stopOnFirstError
496
- logger.error(`Critical: Failed to fetch incomplete runs from server`);
497
- executionError = err;
498
- } else if (error instanceof ChannelError && signal.aborted) {
499
- // Channel closed due to intentional abort - wrap in semantic error
500
- executionError = new TaskAbortedError(
501
- "Task execution stopped due to error in concurrent worker",
502
- err
493
+ // Start concurrent execution
494
+ // Wrap in try-finally to ensure channel is always closed, even if Promise.all throws
495
+ let executionError: Error | null = null;
496
+ try {
497
+ const producerTask = fetchIncompleteRuns();
498
+ const workerTasks = Array.from({ length: concurrency }, () =>
499
+ processTasksFromChannel()
503
500
  );
504
- } else if (stopOnFirstError) {
505
- // Worker error in stopOnFirstError mode - already logged by worker
506
- executionError = err;
507
- } else {
508
- // Unexpected error (not from worker, not from producer fetch)
509
- // This could be a bug in our code or infrastructure failure
510
- logger.error(`Unexpected error during task execution: ${err.message}`);
511
- executionError = err;
512
- }
513
- } finally {
514
- // Ensure channel is closed even if there are unexpected errors
515
- // This is a safety net in case producer's finally block didn't execute
516
- if (!taskChannel.isClosed) {
517
- taskChannel.close();
501
+
502
+ // Wait for producer and all workers to finish
503
+ await Promise.all([producerTask, ...workerTasks]);
504
+ } catch (error) {
505
+ // Classify and handle errors based on their nature
506
+ const err = error instanceof Error ? error : new Error(String(error));
507
+
508
+ // Always surface producer/infrastructure errors
509
+ if (error instanceof TaskFetchError) {
510
+ // Producer failed - this is ALWAYS critical regardless of stopOnFirstError
511
+ logger.error(`Critical: Failed to fetch incomplete runs from server`);
512
+ executionError = err;
513
+ } else if (error instanceof ChannelError && signal.aborted) {
514
+ // Channel closed due to intentional abort - wrap in semantic error
515
+ executionError = new TaskAbortedError(
516
+ "Task execution stopped due to error in concurrent worker",
517
+ err
518
+ );
519
+ } else if (stopOnFirstError) {
520
+ // Worker error in stopOnFirstError mode - already logged by worker
521
+ executionError = err;
522
+ } else {
523
+ // Unexpected error (not from worker, not from producer fetch)
524
+ // This could be a bug in our code or infrastructure failure
525
+ logger.error(`Unexpected error during task execution: ${err.message}`);
526
+ executionError = err;
527
+ }
528
+ } finally {
529
+ // Ensure channel is closed even if there are unexpected errors
530
+ // This is a safety net in case producer's finally block didn't execute
531
+ if (!taskChannel.isClosed) {
532
+ taskChannel.close();
533
+ }
518
534
  }
519
- }
520
535
 
521
- // Only show completion message if we didn't stop on error
522
- if (!executionError) {
523
- logger.info(`${PROGRESS_PREFIX.completed}Task runs completed.`);
524
- }
536
+ // Only show completion message if we didn't stop on error
537
+ if (!executionError) {
538
+ logger.info(`${PROGRESS_PREFIX.completed}Task runs completed.`);
539
+ }
525
540
 
526
- if (totalFailed > 0 && !executionError) {
527
- logger.warn(`${totalFailed} out of ${totalProcessed} runs failed.`);
528
- }
541
+ if (totalFailed > 0 && !executionError) {
542
+ logger.warn(`${totalFailed} out of ${totalProcessed} runs failed.`);
543
+ }
529
544
 
530
- // Run evaluators if provided (only on runs missing evaluations)
531
- // Skip evaluators if we stopped on error
532
- if (evaluators && evaluators.length > 0 && !executionError) {
533
- logger.info(`${PROGRESS_PREFIX.start}Running evaluators.`);
534
- await resumeEvaluation({
535
- experimentId,
536
- evaluators: [...evaluators],
537
- client,
538
- logger,
539
- concurrency,
540
- setGlobalTracerProvider,
541
- useBatchSpanProcessor,
542
- diagLogLevel,
543
- stopOnFirstError,
544
- });
545
- }
545
+ if (evaluators && evaluators.length > 0 && !executionError) {
546
+ await cleanupOwnedTracerProvider({
547
+ provider,
548
+ globalRegistration,
549
+ });
550
+ provider = null;
551
+ globalRegistration = null;
546
552
 
547
- logExperimentResumeSummary(logger, {
548
- experimentId: experiment.id,
549
- processed: totalProcessed,
550
- completed: totalCompleted,
551
- failed: totalFailed,
552
- });
553
- logLinks(logger, [
554
- { label: "Experiments", url: datasetExperimentsUrl },
555
- { label: "Experiment", url: experimentUrl },
556
- ]);
557
-
558
- // Flush spans (if tracer was initialized)
559
- if (provider) {
560
- await provider.forceFlush();
561
- }
553
+ logger.info(`${PROGRESS_PREFIX.start}Running evaluators.`);
554
+ await resumeEvaluation({
555
+ experimentId,
556
+ evaluators: [...evaluators],
557
+ client,
558
+ logger,
559
+ concurrency,
560
+ setGlobalTracerProvider,
561
+ useBatchSpanProcessor,
562
+ diagLogLevel,
563
+ stopOnFirstError,
564
+ });
565
+ }
562
566
 
563
- // Re-throw error if stopOnFirstError was triggered
564
- if (executionError) {
565
- throw executionError;
567
+ logExperimentResumeSummary(logger, {
568
+ experimentId: experiment.id,
569
+ processed: totalProcessed,
570
+ completed: totalCompleted,
571
+ failed: totalFailed,
572
+ });
573
+ logLinks(logger, [
574
+ { label: "Experiments", url: datasetExperimentsUrl },
575
+ { label: "Experiment", url: experimentUrl },
576
+ ]);
577
+
578
+ // Re-throw error if stopOnFirstError was triggered
579
+ if (executionError) {
580
+ throw executionError;
581
+ }
582
+ } finally {
583
+ // Safety net: on error paths the happy-path cleanup above is skipped,
584
+ // so ensure the provider is always cleaned up. On the happy path
585
+ // provider is already null (no-op).
586
+ await cleanupOwnedTracerProvider({
587
+ provider,
588
+ globalRegistration,
589
+ });
566
590
  }
567
591
  }
568
592