npm - @arizeai/phoenix-client - Versions diffs - 5.2.1 → 5.4.0 - Mend

@arizeai/phoenix-client 5.2.1 → 5.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (151) hide show

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@arizeai/phoenix-client",
-  "version": "5.2.1",
+  "version": "5.4.0",
   "description": "A client for the Phoenix API",
   "main": "dist/src/index.js",
   "module": "dist/esm/index.js",
@@ -66,8 +66,8 @@
     "openapi-typescript": "^7.6.1",
     "tsx": "^4.19.3",
     "typescript": "^5.8.2",
-    "vitest": "^2.1.9",
-    "@arizeai/phoenix-evals": "0.3.0"
+    "vitest": "^4.0.10",
+    "@arizeai/phoenix-evals": "0.4.0"
   },
   "dependencies": {
     "@arizeai/openinference-semantic-conventions": "^1.1.0",
@@ -75,7 +75,7 @@
     "async": "^3.2.6",
     "openapi-fetch": "^0.12.5",
     "tiny-invariant": "^1.3.3",
-    "zod": "^3.24.2",
+    "zod": "^3.24.3",
     "zod-to-json-schema": "^3.24.3",
     "@arizeai/phoenix-otel": "0.3.0"
   },
@@ -94,6 +94,7 @@
     "build": "tsc --build tsconfig.json tsconfig.esm.json && tsc-alias -p tsconfig.esm.json",
     "postbuild": "echo '{\"type\": \"module\"}' > ./dist/esm/package.json",
     "type:check": "tsc --noEmit",
-    "test": "vitest --typecheck"
+    "test": "vitest run",
+    "test:watch": "vitest watch"
   }
 }

package/src/__generated__/api/v1.ts CHANGED Viewed

@@ -255,7 +255,10 @@ export interface paths {
             path?: never;
             cookie?: never;
         };
-        /** List experiments by dataset */
+        /**
+         * List experiments by dataset
+         * @description Retrieve a paginated list of experiments for the specified dataset.
+         */
         get: operations["listExperiments"];
         put?: never;
         /** Create experiment on a dataset */
@@ -277,6 +280,39 @@ export interface paths {
         get: operations["getExperiment"];
         put?: never;
         post?: never;
+        /** Delete experiment by ID */
+        delete: operations["deleteExperiment"];
+        options?: never;
+        head?: never;
+        patch?: never;
+        trace?: never;
+    };
+    "/v1/experiments/{experiment_id}/incomplete-runs": {
+        parameters: {
+            query?: never;
+            header?: never;
+            path?: never;
+            cookie?: never;
+        };
+        /**
+         * Get incomplete runs for an experiment
+         * @description Get runs that need to be completed for this experiment.
+         *
+         *     Returns all incomplete runs, including both missing runs (not yet attempted)
+         *     and failed runs (attempted but have errors).
+         *
+         *     Args:
+         *         experiment_id: The ID of the experiment
+         *         cursor: Cursor for pagination
+         *         limit: Maximum number of results to return
+         *
+         *     Returns:
+         *         Paginated list of incomplete runs grouped by dataset example,
+         *         with repetition numbers that need to be run
+         */
+        get: operations["getIncompleteExperimentRuns"];
+        put?: never;
+        post?: never;
         delete?: never;
         options?: never;
         head?: never;
@@ -338,6 +374,39 @@ export interface paths {
         patch?: never;
         trace?: never;
     };
+    "/v1/experiments/{experiment_id}/incomplete-evaluations": {
+        parameters: {
+            query?: never;
+            header?: never;
+            path?: never;
+            cookie?: never;
+        };
+        /**
+         * Get incomplete evaluations for an experiment
+         * @description Get experiment runs that have incomplete evaluations.
+         *
+         *     Returns runs with:
+         *     - Missing evaluations (evaluator has not been run)
+         *     - Failed evaluations (evaluator ran but has errors)
+         *
+         *     Args:
+         *         experiment_id: The ID of the experiment
+         *         evaluation_name: List of evaluation names to check (required, at least one)
+         *         cursor: Cursor for pagination
+         *         limit: Maximum number of results to return
+         *
+         *     Returns:
+         *         Paginated list of runs with incomplete evaluations
+         */
+        get: operations["getIncompleteExperimentEvaluations"];
+        put?: never;
+        post?: never;
+        delete?: never;
+        options?: never;
+        head?: never;
+        patch?: never;
+        trace?: never;
+    };
     "/v1/experiment_evaluations": {
         parameters: {
             query?: never;
@@ -1199,6 +1268,26 @@ export interface components {
              * @description The last update timestamp of the experiment
              */
             updated_at: string;
+            /**
+             * Example Count
+             * @description Number of examples in the experiment
+             */
+            example_count: number;
+            /**
+             * Successful Run Count
+             * @description Number of successful runs in the experiment
+             */
+            successful_run_count: number;
+            /**
+             * Failed Run Count
+             * @description Number of failed runs in the experiment
+             */
+            failed_run_count: number;
+            /**
+             * Missing Run Count
+             * @description Number of missing (not yet executed) runs in the experiment
+             */
+            missing_run_count: number;
         };
         /** ExperimentEvaluationResult */
         ExperimentEvaluationResult: {
@@ -1218,8 +1307,8 @@ export interface components {
              */
             explanation?: string | null;
         };
-        /** ExperimentRunResponse */
-        ExperimentRunResponse: {
+        /** ExperimentRun */
+        ExperimentRun: {
             /**
              * Dataset Example Id
              * @description The ID of the dataset example used in the experiment run
@@ -1314,6 +1403,20 @@ export interface components {
         GetExperimentResponseBody: {
             data: components["schemas"]["Experiment"];
         };
+        /** GetIncompleteEvaluationsResponseBody */
+        GetIncompleteEvaluationsResponseBody: {
+            /** Data */
+            data: components["schemas"]["IncompleteExperimentEvaluation"][];
+            /** Next Cursor */
+            next_cursor: string | null;
+        };
+        /** GetIncompleteExperimentRunsResponseBody */
+        GetIncompleteExperimentRunsResponseBody: {
+            /** Data */
+            data: components["schemas"]["IncompleteExperimentRun"][];
+            /** Next Cursor */
+            next_cursor: string | null;
+        };
         /** GetProjectResponseBody */
         GetProjectResponseBody: {
             data: components["schemas"]["Project"];
@@ -1364,6 +1467,34 @@ export interface components {
         };
         /** Identifier */
         Identifier: string;
+        /**
+         * IncompleteExperimentEvaluation
+         * @description Information about an experiment run with incomplete evaluations
+         */
+        IncompleteExperimentEvaluation: {
+            /** @description The experiment run */
+            experiment_run: components["schemas"]["ExperimentRun"];
+            /** @description The dataset example */
+            dataset_example: components["schemas"]["DatasetExample"];
+            /**
+             * Evaluation Names
+             * @description List of evaluation names that are incomplete (either missing or failed)
+             */
+            evaluation_names: string[];
+        };
+        /**
+         * IncompleteExperimentRun
+         * @description Information about incomplete runs for a dataset example
+         */
+        IncompleteExperimentRun: {
+            /** @description The dataset example */
+            dataset_example: components["schemas"]["DatasetExample"];
+            /**
+             * Repetition Numbers
+             * @description List of repetition numbers that need to be run
+             */
+            repetition_numbers: number[];
+        };
         /** InsertedSessionAnnotation */
         InsertedSessionAnnotation: {
             /**
@@ -1428,7 +1559,7 @@ export interface components {
         /** ListExperimentRunsResponseBody */
         ListExperimentRunsResponseBody: {
             /** Data */
-            data: components["schemas"]["ExperimentRunResponse"][];
+            data: components["schemas"]["ExperimentRun"][];
             /** Next Cursor */
             next_cursor: string | null;
         };
@@ -1436,6 +1567,8 @@ export interface components {
         ListExperimentsResponseBody: {
             /** Data */
             data: components["schemas"]["Experiment"][];
+            /** Next Cursor */
+            next_cursor: string | null;
         };
         /** LocalUser */
         LocalUser: {
@@ -1774,6 +1907,10 @@ export interface components {
             description?: string | null;
             /** Source Prompt Id */
             source_prompt_id?: string | null;
+            /** Metadata */
+            metadata?: {
+                [key: string]: unknown;
+            } | null;
             /** Id */
             id: string;
         };
@@ -1883,6 +2020,10 @@ export interface components {
             description?: string | null;
             /** Source Prompt Id */
             source_prompt_id?: string | null;
+            /** Metadata */
+            metadata?: {
+                [key: string]: unknown;
+            } | null;
         };
         /** PromptDeepSeekInvocationParameters */
         PromptDeepSeekInvocationParameters: {
@@ -3628,7 +3769,12 @@ export interface operations {
     };
     listExperiments: {
         parameters: {
-            query?: never;
+            query?: {
+                /** @description Cursor for pagination (base64-encoded experiment ID) */
+                cursor?: string | null;
+                /** @description The max number of experiments to return at a time. */
+                limit?: number;
+            };
             header?: never;
             path: {
                 dataset_id: string;
@@ -3637,7 +3783,7 @@ export interface operations {
         };
         requestBody?: never;
         responses: {
-            /** @description Experiments retrieved successfully */
+            /** @description Paginated list of experiments for the dataset */
             200: {
                 headers: {
                     [name: string]: unknown;
@@ -3655,13 +3801,13 @@ export interface operations {
                     "text/plain": string;
                 };
             };
-            /** @description Validation Error */
+            /** @description Unprocessable Entity */
             422: {
                 headers: {
                     [name: string]: unknown;
                 };
                 content: {
-                    "application/json": components["schemas"]["HTTPValidationError"];
+                    "text/plain": string;
                 };
             };
         };
@@ -3768,6 +3914,107 @@ export interface operations {
             };
         };
     };
+    deleteExperiment: {
+        parameters: {
+            query?: never;
+            header?: never;
+            path: {
+                experiment_id: string;
+            };
+            cookie?: never;
+        };
+        requestBody?: never;
+        responses: {
+            /** @description Experiment deleted successfully */
+            204: {
+                headers: {
+                    [name: string]: unknown;
+                };
+                content?: never;
+            };
+            /** @description Forbidden */
+            403: {
+                headers: {
+                    [name: string]: unknown;
+                };
+                content: {
+                    "text/plain": string;
+                };
+            };
+            /** @description Experiment not found */
+            404: {
+                headers: {
+                    [name: string]: unknown;
+                };
+                content: {
+                    "text/plain": string;
+                };
+            };
+            /** @description Validation Error */
+            422: {
+                headers: {
+                    [name: string]: unknown;
+                };
+                content: {
+                    "application/json": components["schemas"]["HTTPValidationError"];
+                };
+            };
+        };
+    };
+    getIncompleteExperimentRuns: {
+        parameters: {
+            query?: {
+                /** @description Cursor for pagination */
+                cursor?: string | null;
+                /** @description Maximum number of examples with incomplete runs to return */
+                limit?: number;
+            };
+            header?: never;
+            path: {
+                experiment_id: string;
+            };
+            cookie?: never;
+        };
+        requestBody?: never;
+        responses: {
+            /** @description Incomplete runs retrieved successfully */
+            200: {
+                headers: {
+                    [name: string]: unknown;
+                };
+                content: {
+                    "application/json": components["schemas"]["GetIncompleteExperimentRunsResponseBody"];
+                };
+            };
+            /** @description Forbidden */
+            403: {
+                headers: {
+                    [name: string]: unknown;
+                };
+                content: {
+                    "text/plain": string;
+                };
+            };
+            /** @description Experiment not found */
+            404: {
+                headers: {
+                    [name: string]: unknown;
+                };
+                content: {
+                    "text/plain": string;
+                };
+            };
+            /** @description Invalid cursor format */
+            422: {
+                headers: {
+                    [name: string]: unknown;
+                };
+                content: {
+                    "text/plain": string;
+                };
+            };
+        };
+    };
     getExperimentJSON: {
         parameters: {
             query?: never;
@@ -3954,7 +4201,7 @@ export interface operations {
                     "text/plain": string;
                 };
             };
-            /** @description This experiment run has already been submitted */
+            /** @description Experiment run already exists with a successful result and cannot be updated */
             409: {
                 headers: {
                     [name: string]: unknown;
@@ -3974,6 +4221,71 @@ export interface operations {
             };
         };
     };
+    getIncompleteExperimentEvaluations: {
+        parameters: {
+            query?: {
+                /** @description Evaluation names to check */
+                evaluation_name?: string[];
+                /** @description Cursor for pagination */
+                cursor?: string | null;
+                /** @description Maximum number of runs with incomplete evaluations to return */
+                limit?: number;
+            };
+            header?: never;
+            path: {
+                experiment_id: string;
+            };
+            cookie?: never;
+        };
+        requestBody?: never;
+        responses: {
+            /** @description Incomplete evaluations retrieved successfully */
+            200: {
+                headers: {
+                    [name: string]: unknown;
+                };
+                content: {
+                    "application/json": components["schemas"]["GetIncompleteEvaluationsResponseBody"];
+                };
+            };
+            /** @description No evaluator names provided */
+            400: {
+                headers: {
+                    [name: string]: unknown;
+                };
+                content: {
+                    "text/plain": string;
+                };
+            };
+            /** @description Forbidden */
+            403: {
+                headers: {
+                    [name: string]: unknown;
+                };
+                content: {
+                    "text/plain": string;
+                };
+            };
+            /** @description Experiment not found */
+            404: {
+                headers: {
+                    [name: string]: unknown;
+                };
+                content: {
+                    "text/plain": string;
+                };
+            };
+            /** @description Invalid cursor format */
+            422: {
+                headers: {
+                    [name: string]: unknown;
+                };
+                content: {
+                    "text/plain": string;
+                };
+            };
+        };
+    };
     upsertExperimentEvaluation: {
         parameters: {
             query?: never;

package/src/experiments/createExperiment.ts ADDED Viewed

@@ -0,0 +1,90 @@
+import { createClient } from "../client";
+import { ClientFn } from "../types/core";
+import { ExperimentInfo } from "../types/experiments";
+import invariant from "tiny-invariant";
+export type CreateExperimentParams = ClientFn & {
+  /**
+   * The dataset ID to create the experiment for
+   */
+  datasetId: string;
+  /**
+   * The dataset version ID (if omitted, the latest version will be used)
+   */
+  datasetVersionId?: string;
+  /**
+   * The name of the experiment (if omitted, a random name will be generated)
+   */
+  experimentName?: string;
+  /**
+   * An optional description of the experiment
+   */
+  experimentDescription?: string;
+  /**
+   * Metadata for the experiment
+   */
+  experimentMetadata?: Record<string, unknown>;
+  /**
+   * List of dataset split identifiers (GlobalIDs or names) to filter by
+   */
+  splits?: readonly string[];
+  /**
+   * Number of times the experiment should be repeated for each example
+   * @default 1
+   */
+  repetitions?: number;
+};
+/**
+ * Create a new experiment without running it.
+ * This creates an experiment record that can later be executed using resumeExperiment.
+ */
+export async function createExperiment({
+  client: _client,
+  datasetId,
+  datasetVersionId,
+  experimentName,
+  experimentDescription,
+  experimentMetadata = {},
+  splits,
+  repetitions = 1,
+}: CreateExperimentParams): Promise<ExperimentInfo> {
+  const client = _client || createClient();
+  const experimentResponse = await client
+    .POST("/v1/datasets/{dataset_id}/experiments", {
+      params: {
+        path: {
+          dataset_id: datasetId,
+        },
+      },
+      body: {
+        name: experimentName,
+        description: experimentDescription,
+        metadata: experimentMetadata,
+        repetitions,
+        ...(datasetVersionId ? { version_id: datasetVersionId } : {}),
+        ...(splits ? { splits: [...splits] } : {}),
+      },
+    })
+    .then((res) => res.data?.data);
+  invariant(experimentResponse, `Failed to create experiment`);
+  return {
+    id: experimentResponse.id,
+    datasetId: experimentResponse.dataset_id,
+    datasetVersionId: experimentResponse.dataset_version_id,
+    datasetSplits: splits ? [...splits] : [],
+    repetitions: experimentResponse.repetitions,
+    metadata: experimentResponse.metadata || {},
+    projectName: experimentResponse.project_name ?? null,
+    createdAt: experimentResponse.created_at,
+    updatedAt: experimentResponse.updated_at,
+    exampleCount: experimentResponse.example_count,
+    successfulRunCount: experimentResponse.successful_run_count,
+    failedRunCount: experimentResponse.failed_run_count,
+    missingRunCount: experimentResponse.missing_run_count,
+  };
+}

package/src/experiments/deleteExperiment.ts ADDED Viewed

@@ -0,0 +1,67 @@
+import { createClient } from "../client";
+import { ClientFn } from "../types/core";
+import { ensureString } from "../utils/ensureString";
+/**
+ * Parameters to delete an experiment
+ */
+export interface DeleteExperimentParams extends ClientFn {
+  /**
+   * The ID of the experiment to delete
+   */
+  experimentId: string;
+}
+/**
+ * Delete an experiment by ID.
+ *
+ * **Important**: This operation permanently deletes the experiment and all its associated
+ * runs, evaluations, and annotations.
+ *
+ * Behavior:
+ * - Deletes the experiment and all its data
+ * - Returns successfully if experiment is found and deleted
+ * - Throws error if experiment is not found (404) or other errors occur
+ *
+ * @param params - The parameters to delete an experiment
+ * @returns Promise that resolves when the experiment is successfully deleted
+ * @throws Error if the experiment is not found or deletion fails
+ *
+ * @example
+ * ```ts
+ * import { deleteExperiment } from "@arizeai/phoenix-client/experiments";
+ *
+ * await deleteExperiment({
+ *   experimentId: "exp_123",
+ * });
+ * ```
+ */
+export async function deleteExperiment({
+  client: _client,
+  experimentId,
+}: DeleteExperimentParams): Promise<void> {
+  const client = _client ?? createClient();
+  const { error } = await client.DELETE("/v1/experiments/{experiment_id}", {
+    params: {
+      path: {
+        experiment_id: experimentId,
+      },
+    },
+  });
+  if (error) {
+    const isNotFound =
+      typeof error === "object" &&
+      error !== null &&
+      "status" in error &&
+      error.status === 404;
+    if (isNotFound) {
+      throw new Error(`Experiment not found: ${experimentId}`);
+    }
+    // Extract meaningful error information
+    const errorMessage = ensureString(error);
+    throw new Error(`Failed to delete experiment: ${errorMessage}`);
+  }
+}

package/src/experiments/getExperimentInfo.ts CHANGED Viewed

@@ -32,7 +32,14 @@ export async function getExperimentInfo({
     id: experimentData.id,
     datasetId: experimentData.dataset_id,
     datasetVersionId: experimentData.dataset_version_id,
-    projectName: experimentData.project_name || "", // This will never happen
-    metadata: experimentData.metadata,
+    repetitions: experimentData.repetitions,
+    metadata: experimentData.metadata || {},
+    projectName: experimentData.project_name || null,
+    createdAt: experimentData.created_at,
+    updatedAt: experimentData.updated_at,
+    exampleCount: experimentData.example_count,
+    successfulRunCount: experimentData.successful_run_count,
+    failedRunCount: experimentData.failed_run_count,
+    missingRunCount: experimentData.missing_run_count,
   };
 }

package/src/experiments/helpers/asExperimentEvaluator.ts ADDED Viewed

@@ -0,0 +1,29 @@
+import { AnnotatorKind } from "../../types/annotations";
+import { Evaluator } from "../../types/experiments";
+/**
+ * Wrap an evaluator function in an object with a name property.
+ *
+ * @experimental This feature is not complete, and will change in the future.
+ *
+ * @param params - The parameters for creating the evaluator
+ * @param params.name - The name of the evaluator.
+ * @param params.kind - The kind of evaluator (e.g., "CODE", "LLM")
+ * @param params.evaluate - The evaluator function.
+ * @returns The evaluator object.
+ */
+export function asExperimentEvaluator({
+  name,
+  kind,
+  evaluate,
+}: {
+  name: string;
+  kind: AnnotatorKind;
+  evaluate: Evaluator["evaluate"];
+}): Evaluator {
+  return {
+    name,
+    kind,
+    evaluate,
+  };
+}