@arizeai/phoenix-client 4.0.2 → 4.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@arizeai/phoenix-client",
3
- "version": "4.0.2",
3
+ "version": "4.0.3",
4
4
  "description": "A client for the Phoenix API",
5
5
  "main": "dist/src/index.js",
6
6
  "module": "dist/esm/index.js",
@@ -61,7 +61,8 @@
61
61
  "openai": "^4.77.0",
62
62
  "openapi-typescript": "^7.6.1",
63
63
  "tsx": "^4.19.3",
64
- "vitest": "^2.1.9"
64
+ "vitest": "^2.1.9",
65
+ "@arizeai/phoenix-evals": "0.2.1"
65
66
  },
66
67
  "dependencies": {
67
68
  "@arizeai/openinference-semantic-conventions": "^1.1.0",
@@ -23,7 +23,7 @@ import { pluralize } from "../utils/pluralize";
23
23
  import { promisifyResult } from "../utils/promisifyResult";
24
24
  import { AnnotatorKind } from "../types/annotations";
25
25
  import { createProvider, createNoOpProvider } from "./instrumentation";
26
- import { SpanStatusCode, Tracer } from "@opentelemetry/api";
26
+ import { SpanStatusCode, Tracer, trace } from "@opentelemetry/api";
27
27
  import {
28
28
  MimeType,
29
29
  OpenInferenceSpanKind,
@@ -290,11 +290,6 @@ export async function runExperiment({
290
290
  runs,
291
291
  };
292
292
 
293
- // Shut down the provider so that the experiments run
294
- if (provider) {
295
- await provider.shutdown?.();
296
- }
297
-
298
293
  const { evaluationRuns } = await evaluateExperiment({
299
294
  experiment: ranExperiment,
300
295
  evaluators: evaluators ?? [],
@@ -302,8 +297,7 @@ export async function runExperiment({
302
297
  logger,
303
298
  concurrency,
304
299
  dryRun,
305
- setGlobalTracerProvider,
306
- useBatchSpanProcessor,
300
+ tracerProvider: provider,
307
301
  });
308
302
  ranExperiment.evaluationRuns = evaluationRuns;
309
303
 
@@ -473,6 +467,7 @@ export async function evaluateExperiment({
473
467
  dryRun = false,
474
468
  setGlobalTracerProvider = true,
475
469
  useBatchSpanProcessor = true,
470
+ tracerProvider: paramsTracerProvider,
476
471
  }: {
477
472
  /**
478
473
  * The experiment to evaluate
@@ -502,6 +497,11 @@ export async function evaluateExperiment({
502
497
  * @default true
503
498
  */
504
499
  useBatchSpanProcessor?: boolean;
500
+ /**
501
+ * The tracer provider to use. If set, the other parameters will be ignored and the passed tracer provider will get used
502
+ * Intended as a pass-through from runExperiment
503
+ */
504
+ tracerProvider?: NodeTracerProvider | null;
505
505
  }): Promise<RanExperiment> {
506
506
  const isDryRun = typeof dryRun === "number" || dryRun === true;
507
507
  const client = _client ?? createClient();
@@ -511,7 +511,11 @@ export async function evaluateExperiment({
511
511
  "Phoenix base URL not found. Please set PHOENIX_HOST or set baseUrl on the client."
512
512
  );
513
513
  let provider: NodeTracerProvider;
514
- if (!isDryRun) {
514
+
515
+ // Always allow changing of tracer providers
516
+ if (paramsTracerProvider) {
517
+ provider = paramsTracerProvider;
518
+ } else if (!isDryRun) {
515
519
  provider = createProvider({
516
520
  projectName: "evaluators",
517
521
  baseUrl,
@@ -668,7 +672,11 @@ export async function evaluateExperiment({
668
672
  logger.info(`✅ Evaluation runs completed`);
669
673
 
670
674
  if (provider) {
671
- await provider.shutdown?.();
675
+ await provider.shutdown();
676
+ // Make sure it's not set globally anymore
677
+ if (setGlobalTracerProvider) {
678
+ trace.disable();
679
+ }
672
680
  }
673
681
 
674
682
  return {
@@ -51,7 +51,7 @@ export interface ExperimentRun extends Node {
51
51
  traceId: string | null;
52
52
  }
53
53
 
54
- export type EvaluatorParams = {
54
+ export type EvaluatorParams<TaskOutputType = TaskOutput> = {
55
55
  /**
56
56
  * The input field of the Dataset Example
57
57
  */
@@ -59,7 +59,7 @@ export type EvaluatorParams = {
59
59
  /**
60
60
  * The output of the task
61
61
  */
62
- output: TaskOutput;
62
+ output: TaskOutputType;
63
63
  /**
64
64
  * The expected or reference output of the Dataset Example
65
65
  */
@@ -79,10 +79,10 @@ export type Evaluator = {
79
79
  };
80
80
 
81
81
  export type EvaluationResult = {
82
- score: number | null;
83
- label: string | null;
84
- metadata: Record<string, unknown>;
85
- explanation: string | null;
82
+ score?: number | null;
83
+ label?: string | null;
84
+ metadata?: Record<string, unknown>;
85
+ explanation?: string | null;
86
86
  };
87
87
 
88
88
  export interface ExperimentEvaluationRun extends Node {