@arizeai/phoenix-client 5.3.0 → 5.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +3 -3
- package/dist/esm/experiments/helpers/asExperimentEvaluator.d.ts +19 -0
- package/dist/esm/experiments/helpers/asExperimentEvaluator.d.ts.map +1 -0
- package/dist/esm/experiments/helpers/asExperimentEvaluator.js +19 -0
- package/dist/esm/experiments/helpers/asExperimentEvaluator.js.map +1 -0
- package/dist/esm/experiments/helpers/fromPhoenixLLMEvaluator.d.ts +9 -0
- package/dist/esm/experiments/helpers/fromPhoenixLLMEvaluator.d.ts.map +1 -0
- package/dist/esm/experiments/helpers/fromPhoenixLLMEvaluator.js +18 -0
- package/dist/esm/experiments/helpers/fromPhoenixLLMEvaluator.js.map +1 -0
- package/dist/esm/experiments/helpers/getExperimentEvaluators.d.ts +6 -0
- package/dist/esm/experiments/helpers/getExperimentEvaluators.d.ts.map +1 -0
- package/dist/esm/experiments/helpers/getExperimentEvaluators.js +58 -0
- package/dist/esm/experiments/helpers/getExperimentEvaluators.js.map +1 -0
- package/dist/esm/experiments/helpers/index.d.ts +4 -0
- package/dist/esm/experiments/helpers/index.d.ts.map +1 -0
- package/dist/esm/experiments/helpers/index.js +4 -0
- package/dist/esm/experiments/helpers/index.js.map +1 -0
- package/dist/esm/experiments/index.d.ts +1 -0
- package/dist/esm/experiments/index.d.ts.map +1 -1
- package/dist/esm/experiments/index.js +1 -0
- package/dist/esm/experiments/index.js.map +1 -1
- package/dist/esm/experiments/resumeEvaluation.d.ts +2 -2
- package/dist/esm/experiments/resumeEvaluation.d.ts.map +1 -1
- package/dist/esm/experiments/resumeEvaluation.js +2 -1
- package/dist/esm/experiments/resumeEvaluation.js.map +1 -1
- package/dist/esm/experiments/resumeExperiment.d.ts +2 -2
- package/dist/esm/experiments/resumeExperiment.d.ts.map +1 -1
- package/dist/esm/experiments/resumeExperiment.js.map +1 -1
- package/dist/esm/experiments/runExperiment.d.ts +4 -3
- package/dist/esm/experiments/runExperiment.d.ts.map +1 -1
- package/dist/esm/experiments/runExperiment.js +4 -1
- package/dist/esm/experiments/runExperiment.js.map +1 -1
- package/dist/esm/tsconfig.esm.tsbuildinfo +1 -1
- package/dist/esm/types/experiments.d.ts +6 -0
- package/dist/esm/types/experiments.d.ts.map +1 -1
- package/dist/src/experiments/helpers/asExperimentEvaluator.d.ts +19 -0
- package/dist/src/experiments/helpers/asExperimentEvaluator.d.ts.map +1 -0
- package/dist/src/experiments/helpers/asExperimentEvaluator.js +22 -0
- package/dist/src/experiments/helpers/asExperimentEvaluator.js.map +1 -0
- package/dist/src/experiments/helpers/fromPhoenixLLMEvaluator.d.ts +9 -0
- package/dist/src/experiments/helpers/fromPhoenixLLMEvaluator.d.ts.map +1 -0
- package/dist/src/experiments/helpers/fromPhoenixLLMEvaluator.js +21 -0
- package/dist/src/experiments/helpers/fromPhoenixLLMEvaluator.js.map +1 -0
- package/dist/src/experiments/helpers/getExperimentEvaluators.d.ts +6 -0
- package/dist/src/experiments/helpers/getExperimentEvaluators.d.ts.map +1 -0
- package/dist/src/experiments/helpers/getExperimentEvaluators.js +61 -0
- package/dist/src/experiments/helpers/getExperimentEvaluators.js.map +1 -0
- package/dist/src/experiments/helpers/index.d.ts +4 -0
- package/dist/src/experiments/helpers/index.d.ts.map +1 -0
- package/dist/src/experiments/helpers/index.js +20 -0
- package/dist/src/experiments/helpers/index.js.map +1 -0
- package/dist/src/experiments/index.d.ts +1 -0
- package/dist/src/experiments/index.d.ts.map +1 -1
- package/dist/src/experiments/index.js +1 -0
- package/dist/src/experiments/index.js.map +1 -1
- package/dist/src/experiments/resumeEvaluation.d.ts +2 -2
- package/dist/src/experiments/resumeEvaluation.d.ts.map +1 -1
- package/dist/src/experiments/resumeEvaluation.js +2 -1
- package/dist/src/experiments/resumeEvaluation.js.map +1 -1
- package/dist/src/experiments/resumeExperiment.d.ts +2 -2
- package/dist/src/experiments/resumeExperiment.d.ts.map +1 -1
- package/dist/src/experiments/resumeExperiment.js.map +1 -1
- package/dist/src/experiments/runExperiment.d.ts +4 -3
- package/dist/src/experiments/runExperiment.d.ts.map +1 -1
- package/dist/src/experiments/runExperiment.js +4 -1
- package/dist/src/experiments/runExperiment.js.map +1 -1
- package/dist/src/types/experiments.d.ts +6 -0
- package/dist/src/types/experiments.d.ts.map +1 -1
- package/dist/tsconfig.tsbuildinfo +1 -1
- package/package.json +6 -5
- package/src/experiments/helpers/asExperimentEvaluator.ts +29 -0
- package/src/experiments/helpers/fromPhoenixLLMEvaluator.ts +24 -0
- package/src/experiments/helpers/getExperimentEvaluators.ts +74 -0
- package/src/experiments/helpers/index.ts +3 -0
- package/src/experiments/index.ts +1 -0
- package/src/experiments/resumeEvaluation.ts +8 -3
- package/src/experiments/resumeExperiment.ts +5 -2
- package/src/experiments/runExperiment.ts +7 -3
- package/src/types/experiments.ts +10 -0
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@arizeai/phoenix-client",
|
|
3
|
-
"version": "5.
|
|
3
|
+
"version": "5.4.0",
|
|
4
4
|
"description": "A client for the Phoenix API",
|
|
5
5
|
"main": "dist/src/index.js",
|
|
6
6
|
"module": "dist/esm/index.js",
|
|
@@ -66,8 +66,8 @@
|
|
|
66
66
|
"openapi-typescript": "^7.6.1",
|
|
67
67
|
"tsx": "^4.19.3",
|
|
68
68
|
"typescript": "^5.8.2",
|
|
69
|
-
"vitest": "^
|
|
70
|
-
"@arizeai/phoenix-evals": "0.
|
|
69
|
+
"vitest": "^4.0.10",
|
|
70
|
+
"@arizeai/phoenix-evals": "0.4.0"
|
|
71
71
|
},
|
|
72
72
|
"dependencies": {
|
|
73
73
|
"@arizeai/openinference-semantic-conventions": "^1.1.0",
|
|
@@ -75,7 +75,7 @@
|
|
|
75
75
|
"async": "^3.2.6",
|
|
76
76
|
"openapi-fetch": "^0.12.5",
|
|
77
77
|
"tiny-invariant": "^1.3.3",
|
|
78
|
-
"zod": "^3.24.
|
|
78
|
+
"zod": "^3.24.3",
|
|
79
79
|
"zod-to-json-schema": "^3.24.3",
|
|
80
80
|
"@arizeai/phoenix-otel": "0.3.0"
|
|
81
81
|
},
|
|
@@ -94,6 +94,7 @@
|
|
|
94
94
|
"build": "tsc --build tsconfig.json tsconfig.esm.json && tsc-alias -p tsconfig.esm.json",
|
|
95
95
|
"postbuild": "echo '{\"type\": \"module\"}' > ./dist/esm/package.json",
|
|
96
96
|
"type:check": "tsc --noEmit",
|
|
97
|
-
"test": "vitest
|
|
97
|
+
"test": "vitest run",
|
|
98
|
+
"test:watch": "vitest watch"
|
|
98
99
|
}
|
|
99
100
|
}
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
import { AnnotatorKind } from "../../types/annotations";
|
|
2
|
+
import { Evaluator } from "../../types/experiments";
|
|
3
|
+
|
|
4
|
+
/**
|
|
5
|
+
* Wrap an evaluator function in an object with a name property.
|
|
6
|
+
*
|
|
7
|
+
* @experimental This feature is not complete, and will change in the future.
|
|
8
|
+
*
|
|
9
|
+
* @param params - The parameters for creating the evaluator
|
|
10
|
+
* @param params.name - The name of the evaluator.
|
|
11
|
+
* @param params.kind - The kind of evaluator (e.g., "CODE", "LLM")
|
|
12
|
+
* @param params.evaluate - The evaluator function.
|
|
13
|
+
* @returns The evaluator object.
|
|
14
|
+
*/
|
|
15
|
+
export function asExperimentEvaluator({
|
|
16
|
+
name,
|
|
17
|
+
kind,
|
|
18
|
+
evaluate,
|
|
19
|
+
}: {
|
|
20
|
+
name: string;
|
|
21
|
+
kind: AnnotatorKind;
|
|
22
|
+
evaluate: Evaluator["evaluate"];
|
|
23
|
+
}): Evaluator {
|
|
24
|
+
return {
|
|
25
|
+
name,
|
|
26
|
+
kind,
|
|
27
|
+
evaluate,
|
|
28
|
+
};
|
|
29
|
+
}
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
import type { LLMEvaluator } from "@arizeai/phoenix-evals";
|
|
2
|
+
|
|
3
|
+
import { Evaluator } from "../../types/experiments";
|
|
4
|
+
|
|
5
|
+
import { asExperimentEvaluator } from "./asExperimentEvaluator";
|
|
6
|
+
|
|
7
|
+
/**
|
|
8
|
+
* A function that acts as a bridge, converting phoenix-evals to be experiment evaluator compatible
|
|
9
|
+
* @param phoenixEvaluator
|
|
10
|
+
* @returns an experiment compatible Evaluator
|
|
11
|
+
*/
|
|
12
|
+
export function fromPhoenixLLMEvaluator<
|
|
13
|
+
RecordType extends Record<string, unknown>,
|
|
14
|
+
>(phoenixLLMEvaluator: LLMEvaluator<RecordType>): Evaluator {
|
|
15
|
+
return asExperimentEvaluator({
|
|
16
|
+
name: phoenixLLMEvaluator.name,
|
|
17
|
+
kind: "LLM",
|
|
18
|
+
evaluate: (example) => {
|
|
19
|
+
// For now blindly coerce the types
|
|
20
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
21
|
+
return phoenixLLMEvaluator.evaluate(example as any);
|
|
22
|
+
},
|
|
23
|
+
});
|
|
24
|
+
}
|
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
import type { LLMEvaluator } from "@arizeai/phoenix-evals";
|
|
2
|
+
|
|
3
|
+
import { Evaluator } from "../../types/experiments";
|
|
4
|
+
|
|
5
|
+
import { fromPhoenixLLMEvaluator } from "./fromPhoenixLLMEvaluator";
|
|
6
|
+
|
|
7
|
+
/**
|
|
8
|
+
* A type guard for LLMEvaluator classes.
|
|
9
|
+
* Note: this is not fool proof, and may need to be updated as phoenix-evals evolves.
|
|
10
|
+
*/
|
|
11
|
+
function isPhoenixLLMEvaluator(
|
|
12
|
+
evaluator: unknown
|
|
13
|
+
): evaluator is LLMEvaluator<Record<string, unknown>> {
|
|
14
|
+
if (
|
|
15
|
+
typeof evaluator !== "object" ||
|
|
16
|
+
evaluator === null ||
|
|
17
|
+
!("evaluate" in evaluator) ||
|
|
18
|
+
typeof evaluator.evaluate !== "function" ||
|
|
19
|
+
!("name" in evaluator) ||
|
|
20
|
+
typeof evaluator.name !== "string" ||
|
|
21
|
+
!("kind" in evaluator) ||
|
|
22
|
+
typeof evaluator.kind !== "string" ||
|
|
23
|
+
evaluator.kind !== "LLM"
|
|
24
|
+
) {
|
|
25
|
+
return false;
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
// Check if it's a class instance (not a plain object)
|
|
29
|
+
// Phoenix evaluators are class instances, plain evaluators are objects
|
|
30
|
+
const isClassInstance =
|
|
31
|
+
evaluator.constructor !== Object && evaluator.constructor !== undefined;
|
|
32
|
+
|
|
33
|
+
// If it's a class instance, it's definitely a phoenix evaluator
|
|
34
|
+
if (isClassInstance) {
|
|
35
|
+
return true;
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
// Otherwise, it's a plain Evaluator object, not a phoenix evaluator
|
|
39
|
+
return false;
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
/**
|
|
43
|
+
* A type guard for Evaluator objects.
|
|
44
|
+
* Note: this is not fool proof, and may need to be updated as the package evolves
|
|
45
|
+
*/
|
|
46
|
+
function isExperimentEvaluator(evaluator: unknown): evaluator is Evaluator {
|
|
47
|
+
return (
|
|
48
|
+
typeof evaluator === "object" &&
|
|
49
|
+
evaluator !== null &&
|
|
50
|
+
"evaluate" in evaluator &&
|
|
51
|
+
typeof evaluator.evaluate === "function" &&
|
|
52
|
+
"name" in evaluator &&
|
|
53
|
+
typeof evaluator.name === "string" &&
|
|
54
|
+
"kind" in evaluator &&
|
|
55
|
+
typeof evaluator.kind === "string" &&
|
|
56
|
+
(evaluator.kind === "CODE" || evaluator.kind === "LLM")
|
|
57
|
+
);
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
/**
|
|
61
|
+
* A function that normalizes evaluators to be runnable by experiments. This is a best effort to support a variety of evaluator types.
|
|
62
|
+
*/
|
|
63
|
+
export function getExperimentEvaluators(evaluators: unknown[]): Evaluator[] {
|
|
64
|
+
return evaluators.map((evaluator) => {
|
|
65
|
+
// Check phoenix evaluators first, as they are more specific
|
|
66
|
+
if (isPhoenixLLMEvaluator(evaluator)) {
|
|
67
|
+
return fromPhoenixLLMEvaluator(evaluator);
|
|
68
|
+
}
|
|
69
|
+
if (isExperimentEvaluator(evaluator)) {
|
|
70
|
+
return evaluator;
|
|
71
|
+
}
|
|
72
|
+
throw new Error(`Unsupported evaluator: ${JSON.stringify(evaluator)}`);
|
|
73
|
+
});
|
|
74
|
+
}
|
package/src/experiments/index.ts
CHANGED
|
@@ -18,6 +18,7 @@ import { ClientFn } from "../types/core";
|
|
|
18
18
|
import type {
|
|
19
19
|
EvaluationResult,
|
|
20
20
|
Evaluator,
|
|
21
|
+
ExperimentEvaluatorLike,
|
|
21
22
|
IncompleteEvaluation,
|
|
22
23
|
TaskOutput,
|
|
23
24
|
} from "../types/experiments";
|
|
@@ -27,6 +28,7 @@ import { ensureString } from "../utils/ensureString";
|
|
|
27
28
|
import { toObjectHeaders } from "../utils/toObjectHeaders";
|
|
28
29
|
|
|
29
30
|
import { getExperimentInfo } from "./getExperimentInfo.js";
|
|
31
|
+
import { getExperimentEvaluators } from "./helpers";
|
|
30
32
|
|
|
31
33
|
import invariant from "tiny-invariant";
|
|
32
34
|
|
|
@@ -64,7 +66,9 @@ export type ResumeEvaluationParams = ClientFn & {
|
|
|
64
66
|
/**
|
|
65
67
|
* A single evaluator or list of evaluators to run on incomplete evaluations
|
|
66
68
|
*/
|
|
67
|
-
readonly evaluators:
|
|
69
|
+
readonly evaluators:
|
|
70
|
+
| ExperimentEvaluatorLike
|
|
71
|
+
| readonly ExperimentEvaluatorLike[];
|
|
68
72
|
/**
|
|
69
73
|
* The logger to use
|
|
70
74
|
* @default console
|
|
@@ -321,8 +325,9 @@ export async function resumeEvaluation({
|
|
|
321
325
|
const pageSize = DEFAULT_PAGE_SIZE;
|
|
322
326
|
|
|
323
327
|
// Normalize evaluators to array
|
|
324
|
-
const evaluators =
|
|
325
|
-
|
|
328
|
+
const evaluators = getExperimentEvaluators(
|
|
329
|
+
Array.isArray(_evaluators) ? _evaluators : [_evaluators]
|
|
330
|
+
);
|
|
326
331
|
// Validate inputs
|
|
327
332
|
invariant(evaluators.length > 0, "Must specify at least one evaluator");
|
|
328
333
|
|
|
@@ -16,7 +16,10 @@ import { components } from "../__generated__/api/v1";
|
|
|
16
16
|
import { createClient, type PhoenixClient } from "../client";
|
|
17
17
|
import { ClientFn } from "../types/core";
|
|
18
18
|
import { ExampleWithId } from "../types/datasets";
|
|
19
|
-
import type {
|
|
19
|
+
import type {
|
|
20
|
+
ExperimentEvaluatorLike,
|
|
21
|
+
ExperimentTask,
|
|
22
|
+
} from "../types/experiments";
|
|
20
23
|
import { type Logger } from "../types/logger";
|
|
21
24
|
import { Channel, ChannelError } from "../utils/channel";
|
|
22
25
|
import { ensureString } from "../utils/ensureString";
|
|
@@ -68,7 +71,7 @@ export type ResumeExperimentParams = ClientFn & {
|
|
|
68
71
|
* Optional evaluators to run on completed task runs
|
|
69
72
|
* @default undefined
|
|
70
73
|
*/
|
|
71
|
-
readonly evaluators?: readonly
|
|
74
|
+
readonly evaluators?: readonly ExperimentEvaluatorLike[];
|
|
72
75
|
/**
|
|
73
76
|
* The logger to use
|
|
74
77
|
* @default console
|
|
@@ -27,6 +27,7 @@ import {
|
|
|
27
27
|
import type {
|
|
28
28
|
Evaluator,
|
|
29
29
|
ExperimentEvaluationRun,
|
|
30
|
+
ExperimentEvaluatorLike,
|
|
30
31
|
ExperimentInfo,
|
|
31
32
|
ExperimentRun,
|
|
32
33
|
ExperimentRunID,
|
|
@@ -45,6 +46,7 @@ import {
|
|
|
45
46
|
} from "../utils/urlUtils";
|
|
46
47
|
|
|
47
48
|
import { getExperimentInfo } from "./getExperimentInfo";
|
|
49
|
+
import { getExperimentEvaluators } from "./helpers";
|
|
48
50
|
|
|
49
51
|
import assert from "assert";
|
|
50
52
|
import { queue } from "async";
|
|
@@ -87,7 +89,7 @@ export type RunExperimentParams = ClientFn & {
|
|
|
87
89
|
/**
|
|
88
90
|
* The evaluators to use
|
|
89
91
|
*/
|
|
90
|
-
evaluators?:
|
|
92
|
+
evaluators?: ExperimentEvaluatorLike[];
|
|
91
93
|
/**
|
|
92
94
|
* The logger to use
|
|
93
95
|
*/
|
|
@@ -534,7 +536,7 @@ export async function evaluateExperiment({
|
|
|
534
536
|
**/
|
|
535
537
|
experiment: RanExperiment;
|
|
536
538
|
/** The evaluators to use */
|
|
537
|
-
evaluators:
|
|
539
|
+
evaluators: ExperimentEvaluatorLike[];
|
|
538
540
|
/** The client to use */
|
|
539
541
|
client?: PhoenixClient;
|
|
540
542
|
/** The logger to use */
|
|
@@ -652,7 +654,8 @@ export async function evaluateExperiment({
|
|
|
652
654
|
|
|
653
655
|
// Run evaluators against all runs
|
|
654
656
|
// Flat list of evaluator + run tuples
|
|
655
|
-
const
|
|
657
|
+
const normalizedEvaluators = getExperimentEvaluators(evaluators);
|
|
658
|
+
const evaluatorsAndRuns = normalizedEvaluators.flatMap((evaluator) =>
|
|
656
659
|
runsToEvaluate.map((run) => ({
|
|
657
660
|
evaluator,
|
|
658
661
|
run,
|
|
@@ -825,6 +828,7 @@ async function runEvaluator({
|
|
|
825
828
|
* @param params.kind - The kind of evaluator (e.g., "CODE", "LLM")
|
|
826
829
|
* @param params.evaluate - The evaluator function.
|
|
827
830
|
* @returns The evaluator object.
|
|
831
|
+
* @deprecated use asExperimentEvaluator instead
|
|
828
832
|
*/
|
|
829
833
|
export function asEvaluator({
|
|
830
834
|
name,
|
package/src/types/experiments.ts
CHANGED
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
import type { LLMEvaluator } from "@arizeai/phoenix-evals";
|
|
2
|
+
|
|
1
3
|
import { AnnotatorKind } from "./annotations";
|
|
2
4
|
import { Node } from "./core";
|
|
3
5
|
import { Example, ExampleWithId } from "./datasets";
|
|
@@ -176,3 +178,11 @@ export interface ExperimentParameters {
|
|
|
176
178
|
*/
|
|
177
179
|
nExamples: number;
|
|
178
180
|
}
|
|
181
|
+
|
|
182
|
+
/**
|
|
183
|
+
* A type that represents any type of evaluator that can be used in an experiment.
|
|
184
|
+
* Unknown is used to capture evaluators from an external library such as phoenix-evals.
|
|
185
|
+
*/
|
|
186
|
+
export type ExperimentEvaluatorLike =
|
|
187
|
+
| Evaluator
|
|
188
|
+
| LLMEvaluator<Record<string, unknown>>;
|