@arizeai/phoenix-client 5.3.0 → 5.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (79) hide show
  1. package/README.md +3 -3
  2. package/dist/esm/experiments/helpers/asExperimentEvaluator.d.ts +19 -0
  3. package/dist/esm/experiments/helpers/asExperimentEvaluator.d.ts.map +1 -0
  4. package/dist/esm/experiments/helpers/asExperimentEvaluator.js +19 -0
  5. package/dist/esm/experiments/helpers/asExperimentEvaluator.js.map +1 -0
  6. package/dist/esm/experiments/helpers/fromPhoenixLLMEvaluator.d.ts +9 -0
  7. package/dist/esm/experiments/helpers/fromPhoenixLLMEvaluator.d.ts.map +1 -0
  8. package/dist/esm/experiments/helpers/fromPhoenixLLMEvaluator.js +18 -0
  9. package/dist/esm/experiments/helpers/fromPhoenixLLMEvaluator.js.map +1 -0
  10. package/dist/esm/experiments/helpers/getExperimentEvaluators.d.ts +6 -0
  11. package/dist/esm/experiments/helpers/getExperimentEvaluators.d.ts.map +1 -0
  12. package/dist/esm/experiments/helpers/getExperimentEvaluators.js +58 -0
  13. package/dist/esm/experiments/helpers/getExperimentEvaluators.js.map +1 -0
  14. package/dist/esm/experiments/helpers/index.d.ts +4 -0
  15. package/dist/esm/experiments/helpers/index.d.ts.map +1 -0
  16. package/dist/esm/experiments/helpers/index.js +4 -0
  17. package/dist/esm/experiments/helpers/index.js.map +1 -0
  18. package/dist/esm/experiments/index.d.ts +1 -0
  19. package/dist/esm/experiments/index.d.ts.map +1 -1
  20. package/dist/esm/experiments/index.js +1 -0
  21. package/dist/esm/experiments/index.js.map +1 -1
  22. package/dist/esm/experiments/resumeEvaluation.d.ts +2 -2
  23. package/dist/esm/experiments/resumeEvaluation.d.ts.map +1 -1
  24. package/dist/esm/experiments/resumeEvaluation.js +2 -1
  25. package/dist/esm/experiments/resumeEvaluation.js.map +1 -1
  26. package/dist/esm/experiments/resumeExperiment.d.ts +2 -2
  27. package/dist/esm/experiments/resumeExperiment.d.ts.map +1 -1
  28. package/dist/esm/experiments/resumeExperiment.js.map +1 -1
  29. package/dist/esm/experiments/runExperiment.d.ts +4 -3
  30. package/dist/esm/experiments/runExperiment.d.ts.map +1 -1
  31. package/dist/esm/experiments/runExperiment.js +4 -1
  32. package/dist/esm/experiments/runExperiment.js.map +1 -1
  33. package/dist/esm/tsconfig.esm.tsbuildinfo +1 -1
  34. package/dist/esm/types/experiments.d.ts +6 -0
  35. package/dist/esm/types/experiments.d.ts.map +1 -1
  36. package/dist/src/experiments/helpers/asExperimentEvaluator.d.ts +19 -0
  37. package/dist/src/experiments/helpers/asExperimentEvaluator.d.ts.map +1 -0
  38. package/dist/src/experiments/helpers/asExperimentEvaluator.js +22 -0
  39. package/dist/src/experiments/helpers/asExperimentEvaluator.js.map +1 -0
  40. package/dist/src/experiments/helpers/fromPhoenixLLMEvaluator.d.ts +9 -0
  41. package/dist/src/experiments/helpers/fromPhoenixLLMEvaluator.d.ts.map +1 -0
  42. package/dist/src/experiments/helpers/fromPhoenixLLMEvaluator.js +21 -0
  43. package/dist/src/experiments/helpers/fromPhoenixLLMEvaluator.js.map +1 -0
  44. package/dist/src/experiments/helpers/getExperimentEvaluators.d.ts +6 -0
  45. package/dist/src/experiments/helpers/getExperimentEvaluators.d.ts.map +1 -0
  46. package/dist/src/experiments/helpers/getExperimentEvaluators.js +61 -0
  47. package/dist/src/experiments/helpers/getExperimentEvaluators.js.map +1 -0
  48. package/dist/src/experiments/helpers/index.d.ts +4 -0
  49. package/dist/src/experiments/helpers/index.d.ts.map +1 -0
  50. package/dist/src/experiments/helpers/index.js +20 -0
  51. package/dist/src/experiments/helpers/index.js.map +1 -0
  52. package/dist/src/experiments/index.d.ts +1 -0
  53. package/dist/src/experiments/index.d.ts.map +1 -1
  54. package/dist/src/experiments/index.js +1 -0
  55. package/dist/src/experiments/index.js.map +1 -1
  56. package/dist/src/experiments/resumeEvaluation.d.ts +2 -2
  57. package/dist/src/experiments/resumeEvaluation.d.ts.map +1 -1
  58. package/dist/src/experiments/resumeEvaluation.js +2 -1
  59. package/dist/src/experiments/resumeEvaluation.js.map +1 -1
  60. package/dist/src/experiments/resumeExperiment.d.ts +2 -2
  61. package/dist/src/experiments/resumeExperiment.d.ts.map +1 -1
  62. package/dist/src/experiments/resumeExperiment.js.map +1 -1
  63. package/dist/src/experiments/runExperiment.d.ts +4 -3
  64. package/dist/src/experiments/runExperiment.d.ts.map +1 -1
  65. package/dist/src/experiments/runExperiment.js +4 -1
  66. package/dist/src/experiments/runExperiment.js.map +1 -1
  67. package/dist/src/types/experiments.d.ts +6 -0
  68. package/dist/src/types/experiments.d.ts.map +1 -1
  69. package/dist/tsconfig.tsbuildinfo +1 -1
  70. package/package.json +6 -5
  71. package/src/experiments/helpers/asExperimentEvaluator.ts +29 -0
  72. package/src/experiments/helpers/fromPhoenixLLMEvaluator.ts +24 -0
  73. package/src/experiments/helpers/getExperimentEvaluators.ts +74 -0
  74. package/src/experiments/helpers/index.ts +3 -0
  75. package/src/experiments/index.ts +1 -0
  76. package/src/experiments/resumeEvaluation.ts +8 -3
  77. package/src/experiments/resumeExperiment.ts +5 -2
  78. package/src/experiments/runExperiment.ts +7 -3
  79. package/src/types/experiments.ts +10 -0
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@arizeai/phoenix-client",
3
- "version": "5.3.0",
3
+ "version": "5.4.0",
4
4
  "description": "A client for the Phoenix API",
5
5
  "main": "dist/src/index.js",
6
6
  "module": "dist/esm/index.js",
@@ -66,8 +66,8 @@
66
66
  "openapi-typescript": "^7.6.1",
67
67
  "tsx": "^4.19.3",
68
68
  "typescript": "^5.8.2",
69
- "vitest": "^2.1.9",
70
- "@arizeai/phoenix-evals": "0.3.0"
69
+ "vitest": "^4.0.10",
70
+ "@arizeai/phoenix-evals": "0.4.0"
71
71
  },
72
72
  "dependencies": {
73
73
  "@arizeai/openinference-semantic-conventions": "^1.1.0",
@@ -75,7 +75,7 @@
75
75
  "async": "^3.2.6",
76
76
  "openapi-fetch": "^0.12.5",
77
77
  "tiny-invariant": "^1.3.3",
78
- "zod": "^3.24.2",
78
+ "zod": "^3.24.3",
79
79
  "zod-to-json-schema": "^3.24.3",
80
80
  "@arizeai/phoenix-otel": "0.3.0"
81
81
  },
@@ -94,6 +94,7 @@
94
94
  "build": "tsc --build tsconfig.json tsconfig.esm.json && tsc-alias -p tsconfig.esm.json",
95
95
  "postbuild": "echo '{\"type\": \"module\"}' > ./dist/esm/package.json",
96
96
  "type:check": "tsc --noEmit",
97
- "test": "vitest --typecheck"
97
+ "test": "vitest run",
98
+ "test:watch": "vitest watch"
98
99
  }
99
100
  }
@@ -0,0 +1,29 @@
1
+ import { AnnotatorKind } from "../../types/annotations";
2
+ import { Evaluator } from "../../types/experiments";
3
+
4
+ /**
5
+ * Wrap an evaluator function in an object with a name property.
6
+ *
7
+ * @experimental This feature is not complete, and will change in the future.
8
+ *
9
+ * @param params - The parameters for creating the evaluator
10
+ * @param params.name - The name of the evaluator.
11
+ * @param params.kind - The kind of evaluator (e.g., "CODE", "LLM")
12
+ * @param params.evaluate - The evaluator function.
13
+ * @returns The evaluator object.
14
+ */
15
+ export function asExperimentEvaluator({
16
+ name,
17
+ kind,
18
+ evaluate,
19
+ }: {
20
+ name: string;
21
+ kind: AnnotatorKind;
22
+ evaluate: Evaluator["evaluate"];
23
+ }): Evaluator {
24
+ return {
25
+ name,
26
+ kind,
27
+ evaluate,
28
+ };
29
+ }
@@ -0,0 +1,24 @@
1
+ import type { LLMEvaluator } from "@arizeai/phoenix-evals";
2
+
3
+ import { Evaluator } from "../../types/experiments";
4
+
5
+ import { asExperimentEvaluator } from "./asExperimentEvaluator";
6
+
7
+ /**
8
+ * A function that acts as a bridge, converting phoenix-evals to be experiment evaluator compatible
9
+ * @param phoenixEvaluator
10
+ * @returns an experiment compatible Evaluator
11
+ */
12
+ export function fromPhoenixLLMEvaluator<
13
+ RecordType extends Record<string, unknown>,
14
+ >(phoenixLLMEvaluator: LLMEvaluator<RecordType>): Evaluator {
15
+ return asExperimentEvaluator({
16
+ name: phoenixLLMEvaluator.name,
17
+ kind: "LLM",
18
+ evaluate: (example) => {
19
+ // For now blindly coerce the types
20
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
21
+ return phoenixLLMEvaluator.evaluate(example as any);
22
+ },
23
+ });
24
+ }
@@ -0,0 +1,74 @@
1
+ import type { LLMEvaluator } from "@arizeai/phoenix-evals";
2
+
3
+ import { Evaluator } from "../../types/experiments";
4
+
5
+ import { fromPhoenixLLMEvaluator } from "./fromPhoenixLLMEvaluator";
6
+
7
+ /**
8
+ * A type guard for LLMEvaluator classes.
9
+ * Note: this is not fool proof, and may need to be updated as phoenix-evals evolves.
10
+ */
11
+ function isPhoenixLLMEvaluator(
12
+ evaluator: unknown
13
+ ): evaluator is LLMEvaluator<Record<string, unknown>> {
14
+ if (
15
+ typeof evaluator !== "object" ||
16
+ evaluator === null ||
17
+ !("evaluate" in evaluator) ||
18
+ typeof evaluator.evaluate !== "function" ||
19
+ !("name" in evaluator) ||
20
+ typeof evaluator.name !== "string" ||
21
+ !("kind" in evaluator) ||
22
+ typeof evaluator.kind !== "string" ||
23
+ evaluator.kind !== "LLM"
24
+ ) {
25
+ return false;
26
+ }
27
+
28
+ // Check if it's a class instance (not a plain object)
29
+ // Phoenix evaluators are class instances, plain evaluators are objects
30
+ const isClassInstance =
31
+ evaluator.constructor !== Object && evaluator.constructor !== undefined;
32
+
33
+ // If it's a class instance, it's definitely a phoenix evaluator
34
+ if (isClassInstance) {
35
+ return true;
36
+ }
37
+
38
+ // Otherwise, it's a plain Evaluator object, not a phoenix evaluator
39
+ return false;
40
+ }
41
+
42
+ /**
43
+ * A type guard for Evaluator objects.
44
+ * Note: this is not fool proof, and may need to be updated as the package evolves
45
+ */
46
+ function isExperimentEvaluator(evaluator: unknown): evaluator is Evaluator {
47
+ return (
48
+ typeof evaluator === "object" &&
49
+ evaluator !== null &&
50
+ "evaluate" in evaluator &&
51
+ typeof evaluator.evaluate === "function" &&
52
+ "name" in evaluator &&
53
+ typeof evaluator.name === "string" &&
54
+ "kind" in evaluator &&
55
+ typeof evaluator.kind === "string" &&
56
+ (evaluator.kind === "CODE" || evaluator.kind === "LLM")
57
+ );
58
+ }
59
+
60
+ /**
61
+ * A function that normalizes evaluators to be runnable by experiments. This is a best effort to support a variety of evaluator types.
62
+ */
63
+ export function getExperimentEvaluators(evaluators: unknown[]): Evaluator[] {
64
+ return evaluators.map((evaluator) => {
65
+ // Check phoenix evaluators first, as they are more specific
66
+ if (isPhoenixLLMEvaluator(evaluator)) {
67
+ return fromPhoenixLLMEvaluator(evaluator);
68
+ }
69
+ if (isExperimentEvaluator(evaluator)) {
70
+ return evaluator;
71
+ }
72
+ throw new Error(`Unsupported evaluator: ${JSON.stringify(evaluator)}`);
73
+ });
74
+ }
@@ -0,0 +1,3 @@
1
+ export * from "./asExperimentEvaluator";
2
+ export * from "./getExperimentEvaluators";
3
+ export * from "./fromPhoenixLLMEvaluator";
@@ -7,3 +7,4 @@ export * from "./listExperiments";
7
7
  export * from "./deleteExperiment";
8
8
  export * from "./resumeExperiment";
9
9
  export * from "./resumeEvaluation";
10
+ export * from "./helpers";
@@ -18,6 +18,7 @@ import { ClientFn } from "../types/core";
18
18
  import type {
19
19
  EvaluationResult,
20
20
  Evaluator,
21
+ ExperimentEvaluatorLike,
21
22
  IncompleteEvaluation,
22
23
  TaskOutput,
23
24
  } from "../types/experiments";
@@ -27,6 +28,7 @@ import { ensureString } from "../utils/ensureString";
27
28
  import { toObjectHeaders } from "../utils/toObjectHeaders";
28
29
 
29
30
  import { getExperimentInfo } from "./getExperimentInfo.js";
31
+ import { getExperimentEvaluators } from "./helpers";
30
32
 
31
33
  import invariant from "tiny-invariant";
32
34
 
@@ -64,7 +66,9 @@ export type ResumeEvaluationParams = ClientFn & {
64
66
  /**
65
67
  * A single evaluator or list of evaluators to run on incomplete evaluations
66
68
  */
67
- readonly evaluators: Evaluator | readonly Evaluator[];
69
+ readonly evaluators:
70
+ | ExperimentEvaluatorLike
71
+ | readonly ExperimentEvaluatorLike[];
68
72
  /**
69
73
  * The logger to use
70
74
  * @default console
@@ -321,8 +325,9 @@ export async function resumeEvaluation({
321
325
  const pageSize = DEFAULT_PAGE_SIZE;
322
326
 
323
327
  // Normalize evaluators to array
324
- const evaluators = Array.isArray(_evaluators) ? _evaluators : [_evaluators];
325
-
328
+ const evaluators = getExperimentEvaluators(
329
+ Array.isArray(_evaluators) ? _evaluators : [_evaluators]
330
+ );
326
331
  // Validate inputs
327
332
  invariant(evaluators.length > 0, "Must specify at least one evaluator");
328
333
 
@@ -16,7 +16,10 @@ import { components } from "../__generated__/api/v1";
16
16
  import { createClient, type PhoenixClient } from "../client";
17
17
  import { ClientFn } from "../types/core";
18
18
  import { ExampleWithId } from "../types/datasets";
19
- import type { Evaluator, ExperimentTask } from "../types/experiments";
19
+ import type {
20
+ ExperimentEvaluatorLike,
21
+ ExperimentTask,
22
+ } from "../types/experiments";
20
23
  import { type Logger } from "../types/logger";
21
24
  import { Channel, ChannelError } from "../utils/channel";
22
25
  import { ensureString } from "../utils/ensureString";
@@ -68,7 +71,7 @@ export type ResumeExperimentParams = ClientFn & {
68
71
  * Optional evaluators to run on completed task runs
69
72
  * @default undefined
70
73
  */
71
- readonly evaluators?: readonly Evaluator[];
74
+ readonly evaluators?: readonly ExperimentEvaluatorLike[];
72
75
  /**
73
76
  * The logger to use
74
77
  * @default console
@@ -27,6 +27,7 @@ import {
27
27
  import type {
28
28
  Evaluator,
29
29
  ExperimentEvaluationRun,
30
+ ExperimentEvaluatorLike,
30
31
  ExperimentInfo,
31
32
  ExperimentRun,
32
33
  ExperimentRunID,
@@ -45,6 +46,7 @@ import {
45
46
  } from "../utils/urlUtils";
46
47
 
47
48
  import { getExperimentInfo } from "./getExperimentInfo";
49
+ import { getExperimentEvaluators } from "./helpers";
48
50
 
49
51
  import assert from "assert";
50
52
  import { queue } from "async";
@@ -87,7 +89,7 @@ export type RunExperimentParams = ClientFn & {
87
89
  /**
88
90
  * The evaluators to use
89
91
  */
90
- evaluators?: Evaluator[];
92
+ evaluators?: ExperimentEvaluatorLike[];
91
93
  /**
92
94
  * The logger to use
93
95
  */
@@ -534,7 +536,7 @@ export async function evaluateExperiment({
534
536
  **/
535
537
  experiment: RanExperiment;
536
538
  /** The evaluators to use */
537
- evaluators: Evaluator[];
539
+ evaluators: ExperimentEvaluatorLike[];
538
540
  /** The client to use */
539
541
  client?: PhoenixClient;
540
542
  /** The logger to use */
@@ -652,7 +654,8 @@ export async function evaluateExperiment({
652
654
 
653
655
  // Run evaluators against all runs
654
656
  // Flat list of evaluator + run tuples
655
- const evaluatorsAndRuns = evaluators.flatMap((evaluator) =>
657
+ const normalizedEvaluators = getExperimentEvaluators(evaluators);
658
+ const evaluatorsAndRuns = normalizedEvaluators.flatMap((evaluator) =>
656
659
  runsToEvaluate.map((run) => ({
657
660
  evaluator,
658
661
  run,
@@ -825,6 +828,7 @@ async function runEvaluator({
825
828
  * @param params.kind - The kind of evaluator (e.g., "CODE", "LLM")
826
829
  * @param params.evaluate - The evaluator function.
827
830
  * @returns The evaluator object.
831
+ * @deprecated use asExperimentEvaluator instead
828
832
  */
829
833
  export function asEvaluator({
830
834
  name,
@@ -1,3 +1,5 @@
1
+ import type { LLMEvaluator } from "@arizeai/phoenix-evals";
2
+
1
3
  import { AnnotatorKind } from "./annotations";
2
4
  import { Node } from "./core";
3
5
  import { Example, ExampleWithId } from "./datasets";
@@ -176,3 +178,11 @@ export interface ExperimentParameters {
176
178
  */
177
179
  nExamples: number;
178
180
  }
181
+
182
+ /**
183
+ * A type that represents any type of evaluator that can be used in an experiment.
184
+ * Unknown is used to capture evaluators from an external library such as phoenix-evals.
185
+ */
186
+ export type ExperimentEvaluatorLike =
187
+ | Evaluator
188
+ | LLMEvaluator<Record<string, unknown>>;