@arizeai/phoenix-client 4.0.2 → 4.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (83) hide show
  1. package/dist/esm/__generated__/api/v1.d.ts +529 -4
  2. package/dist/esm/__generated__/api/v1.d.ts.map +1 -1
  3. package/dist/esm/experiments/getExperimentRuns.d.ts +8 -2
  4. package/dist/esm/experiments/getExperimentRuns.d.ts.map +1 -1
  5. package/dist/esm/experiments/getExperimentRuns.js +34 -23
  6. package/dist/esm/experiments/getExperimentRuns.js.map +1 -1
  7. package/dist/esm/experiments/runExperiment.d.ts +7 -1
  8. package/dist/esm/experiments/runExperiment.d.ts.map +1 -1
  9. package/dist/esm/experiments/runExperiment.js +13 -10
  10. package/dist/esm/experiments/runExperiment.js.map +1 -1
  11. package/dist/esm/sessions/addSessionAnnotation.d.ts +44 -0
  12. package/dist/esm/sessions/addSessionAnnotation.d.ts.map +1 -0
  13. package/dist/esm/sessions/addSessionAnnotation.js +44 -0
  14. package/dist/esm/sessions/addSessionAnnotation.js.map +1 -0
  15. package/dist/esm/sessions/index.d.ts +3 -0
  16. package/dist/esm/sessions/index.d.ts.map +1 -0
  17. package/dist/esm/sessions/index.js +3 -0
  18. package/dist/esm/sessions/index.js.map +1 -0
  19. package/dist/esm/sessions/logSessionAnnotations.d.ts +56 -0
  20. package/dist/esm/sessions/logSessionAnnotations.d.ts.map +1 -0
  21. package/dist/esm/sessions/logSessionAnnotations.js +53 -0
  22. package/dist/esm/sessions/logSessionAnnotations.js.map +1 -0
  23. package/dist/esm/sessions/types.d.ts +19 -0
  24. package/dist/esm/sessions/types.d.ts.map +1 -0
  25. package/dist/esm/sessions/types.js +37 -0
  26. package/dist/esm/sessions/types.js.map +1 -0
  27. package/dist/esm/spans/types.d.ts +3 -50
  28. package/dist/esm/spans/types.d.ts.map +1 -1
  29. package/dist/esm/spans/types.js.map +1 -1
  30. package/dist/esm/tsconfig.esm.tsbuildinfo +1 -1
  31. package/dist/esm/types/annotations.d.ts +37 -0
  32. package/dist/esm/types/annotations.d.ts.map +1 -1
  33. package/dist/esm/types/experiments.d.ts +6 -6
  34. package/dist/esm/types/experiments.d.ts.map +1 -1
  35. package/dist/esm/utils/formatPromptMessages.d.ts.map +1 -1
  36. package/dist/esm/utils/getPromptBySelector.d.ts.map +1 -1
  37. package/dist/src/__generated__/api/v1.d.ts +529 -4
  38. package/dist/src/__generated__/api/v1.d.ts.map +1 -1
  39. package/dist/src/experiments/getExperimentRuns.d.ts +8 -2
  40. package/dist/src/experiments/getExperimentRuns.d.ts.map +1 -1
  41. package/dist/src/experiments/getExperimentRuns.js +35 -23
  42. package/dist/src/experiments/getExperimentRuns.js.map +1 -1
  43. package/dist/src/experiments/runExperiment.d.ts +7 -1
  44. package/dist/src/experiments/runExperiment.d.ts.map +1 -1
  45. package/dist/src/experiments/runExperiment.js +14 -11
  46. package/dist/src/experiments/runExperiment.js.map +1 -1
  47. package/dist/src/sessions/addSessionAnnotation.d.ts +44 -0
  48. package/dist/src/sessions/addSessionAnnotation.d.ts.map +1 -0
  49. package/dist/src/sessions/addSessionAnnotation.js +48 -0
  50. package/dist/src/sessions/addSessionAnnotation.js.map +1 -0
  51. package/dist/src/sessions/index.d.ts +3 -0
  52. package/dist/src/sessions/index.d.ts.map +1 -0
  53. package/dist/src/sessions/index.js +19 -0
  54. package/dist/src/sessions/index.js.map +1 -0
  55. package/dist/src/sessions/logSessionAnnotations.d.ts +56 -0
  56. package/dist/src/sessions/logSessionAnnotations.d.ts.map +1 -0
  57. package/dist/src/sessions/logSessionAnnotations.js +56 -0
  58. package/dist/src/sessions/logSessionAnnotations.js.map +1 -0
  59. package/dist/src/sessions/types.d.ts +19 -0
  60. package/dist/src/sessions/types.d.ts.map +1 -0
  61. package/dist/src/sessions/types.js +41 -0
  62. package/dist/src/sessions/types.js.map +1 -0
  63. package/dist/src/spans/types.d.ts +3 -50
  64. package/dist/src/spans/types.d.ts.map +1 -1
  65. package/dist/src/spans/types.js.map +1 -1
  66. package/dist/src/types/annotations.d.ts +37 -0
  67. package/dist/src/types/annotations.d.ts.map +1 -1
  68. package/dist/src/types/experiments.d.ts +6 -6
  69. package/dist/src/types/experiments.d.ts.map +1 -1
  70. package/dist/src/utils/formatPromptMessages.d.ts.map +1 -1
  71. package/dist/src/utils/getPromptBySelector.d.ts.map +1 -1
  72. package/dist/tsconfig.tsbuildinfo +1 -1
  73. package/package.json +8 -2
  74. package/src/__generated__/api/v1.ts +529 -4
  75. package/src/experiments/getExperimentRuns.ts +44 -15
  76. package/src/experiments/runExperiment.ts +18 -10
  77. package/src/sessions/addSessionAnnotation.ts +65 -0
  78. package/src/sessions/index.ts +2 -0
  79. package/src/sessions/logSessionAnnotations.ts +77 -0
  80. package/src/sessions/types.ts +67 -0
  81. package/src/spans/types.ts +3 -50
  82. package/src/types/annotations.ts +39 -0
  83. package/src/types/experiments.ts +6 -6
@@ -2,35 +2,60 @@ import { createClient } from "../client";
2
2
  import invariant from "tiny-invariant";
3
3
  import { ClientFn } from "../types/core";
4
4
  import { ExperimentRun } from "../types/experiments";
5
+ import { components } from "../__generated__/api/v1";
5
6
 
6
7
  export type GetExperimentRunsParams = ClientFn & {
7
8
  /**
8
9
  * The experiment ID.
9
10
  */
10
11
  experimentId: string;
12
+ /**
13
+ * The pagination size by which to pull runs
14
+ * Exposed for controlling the rate at which runs are pulled
15
+ * @default 100
16
+ */
17
+ pageSize?: number;
11
18
  };
12
19
 
20
+ const DEFAULT_PAGE_SIZE = 100;
21
+
13
22
  /**
14
- * A function that gets the runs (e.g. the results) of a experiment
23
+ * A function that gets all the runs (e.g. the results) of a experiment
15
24
  */
16
25
  export async function getExperimentRuns({
17
26
  client: _client,
18
27
  experimentId,
28
+ pageSize = DEFAULT_PAGE_SIZE,
19
29
  }: GetExperimentRunsParams): Promise<{ runs: ExperimentRun[] }> {
20
30
  const client = _client || createClient();
21
- const getRunsPromise = client.GET("/v1/experiments/{experiment_id}/runs", {
22
- params: {
23
- path: {
24
- experiment_id: experimentId,
31
+
32
+ // Validate that the parameter is an integer and exit early
33
+ invariant(
34
+ Number.isInteger(pageSize) && pageSize > 0,
35
+ "pageSize must be a positive integer greater than 0"
36
+ );
37
+ const runs: ExperimentRun[] = [];
38
+ let cursor: string | null = null;
39
+ do {
40
+ const res: {
41
+ data?: components["schemas"]["ListExperimentRunsResponseBody"];
42
+ } = await client.GET("/v1/experiments/{experiment_id}/runs", {
43
+ params: {
44
+ path: {
45
+ experiment_id: experimentId,
46
+ },
47
+ query: {
48
+ cursor,
49
+ limit: pageSize,
50
+ },
25
51
  },
26
- },
27
- });
28
- const [experimentRunResponse] = await Promise.all([getRunsPromise]);
29
- const { data: { data: experimentRunsData } = {} } = experimentRunResponse;
30
- invariant(experimentRunsData, "Failed to retrieve experiment runs");
31
- return {
32
- runs: experimentRunsData.map((run) => {
33
- return {
52
+ });
53
+ // NB: older versions of phoenix simply don't respond with a cursor and fetch all
54
+ cursor = res.data?.next_cursor || null;
55
+ const data = res.data?.data;
56
+ invariant(data, "Failed to fetch runs");
57
+ runs.push(
58
+ ...data.map((run) => ({
34
59
  id: run.id,
35
60
  traceId: run.trace_id || null,
36
61
  experimentId: run.experiment_id,
@@ -39,7 +64,11 @@ export async function getExperimentRuns({
39
64
  endTime: new Date(run.end_time),
40
65
  output: run.output as ExperimentRun["output"],
41
66
  error: run.error || null,
42
- };
43
- }),
67
+ }))
68
+ );
69
+ } while (cursor != null);
70
+
71
+ return {
72
+ runs,
44
73
  };
45
74
  }
@@ -23,7 +23,7 @@ import { pluralize } from "../utils/pluralize";
23
23
  import { promisifyResult } from "../utils/promisifyResult";
24
24
  import { AnnotatorKind } from "../types/annotations";
25
25
  import { createProvider, createNoOpProvider } from "./instrumentation";
26
- import { SpanStatusCode, Tracer } from "@opentelemetry/api";
26
+ import { SpanStatusCode, Tracer, trace } from "@opentelemetry/api";
27
27
  import {
28
28
  MimeType,
29
29
  OpenInferenceSpanKind,
@@ -290,11 +290,6 @@ export async function runExperiment({
290
290
  runs,
291
291
  };
292
292
 
293
- // Shut down the provider so that the experiments run
294
- if (provider) {
295
- await provider.shutdown?.();
296
- }
297
-
298
293
  const { evaluationRuns } = await evaluateExperiment({
299
294
  experiment: ranExperiment,
300
295
  evaluators: evaluators ?? [],
@@ -302,8 +297,7 @@ export async function runExperiment({
302
297
  logger,
303
298
  concurrency,
304
299
  dryRun,
305
- setGlobalTracerProvider,
306
- useBatchSpanProcessor,
300
+ tracerProvider: provider,
307
301
  });
308
302
  ranExperiment.evaluationRuns = evaluationRuns;
309
303
 
@@ -473,6 +467,7 @@ export async function evaluateExperiment({
473
467
  dryRun = false,
474
468
  setGlobalTracerProvider = true,
475
469
  useBatchSpanProcessor = true,
470
+ tracerProvider: paramsTracerProvider,
476
471
  }: {
477
472
  /**
478
473
  * The experiment to evaluate
@@ -502,6 +497,11 @@ export async function evaluateExperiment({
502
497
  * @default true
503
498
  */
504
499
  useBatchSpanProcessor?: boolean;
500
+ /**
501
+ * The tracer provider to use. If set, the other parameters will be ignored and the passed tracer provider will get used
502
+ * Intended as a pass-through from runExperiment
503
+ */
504
+ tracerProvider?: NodeTracerProvider | null;
505
505
  }): Promise<RanExperiment> {
506
506
  const isDryRun = typeof dryRun === "number" || dryRun === true;
507
507
  const client = _client ?? createClient();
@@ -511,7 +511,11 @@ export async function evaluateExperiment({
511
511
  "Phoenix base URL not found. Please set PHOENIX_HOST or set baseUrl on the client."
512
512
  );
513
513
  let provider: NodeTracerProvider;
514
- if (!isDryRun) {
514
+
515
+ // Always allow changing of tracer providers
516
+ if (paramsTracerProvider) {
517
+ provider = paramsTracerProvider;
518
+ } else if (!isDryRun) {
515
519
  provider = createProvider({
516
520
  projectName: "evaluators",
517
521
  baseUrl,
@@ -668,7 +672,11 @@ export async function evaluateExperiment({
668
672
  logger.info(`✅ Evaluation runs completed`);
669
673
 
670
674
  if (provider) {
671
- await provider.shutdown?.();
675
+ await provider.shutdown();
676
+ // Make sure it's not set globally anymore
677
+ if (setGlobalTracerProvider) {
678
+ trace.disable();
679
+ }
672
680
  }
673
681
 
674
682
  return {
@@ -0,0 +1,65 @@
1
+ import { createClient } from "../client";
2
+ import { ClientFn } from "../types/core";
3
+ import { SessionAnnotation, toSessionAnnotationData } from "./types";
4
+
5
+ /**
6
+ * Parameters to add a span annotation
7
+ */
8
+ export interface AddSessionAnnotationParams extends ClientFn {
9
+ sessionAnnotation: SessionAnnotation;
10
+ /**
11
+ * If true, the request will be fulfilled synchronously and return the annotation ID.
12
+ * If false, the request will be processed asynchronously and return null.
13
+ * @default false
14
+ */
15
+ sync?: boolean;
16
+ }
17
+
18
+ /**
19
+ * Add an annotation to a session.
20
+ *
21
+ * The annotation can be of type "LLM", "CODE", or "HUMAN" and can include a label, score, and metadata.
22
+ * If an identifier is provided and an annotation with that identifier already exists, it will be updated.
23
+ *
24
+ * @param params - The parameters to add a span annotation
25
+ * @returns The ID of the created or updated annotation
26
+ *
27
+ * @example
28
+ * ```ts
29
+ * const result = await addSessionAnnotation({
30
+ * sessionAnnotation: {
31
+ * sessionId: "123abc",
32
+ * name: "quality_score",
33
+ * label: "good",
34
+ * score: 0.95,
35
+ * annotatorKind: "LLM",
36
+ * identifier: "custom_id_123",
37
+ * metadata: {
38
+ * model: "gpt-4"
39
+ * }
40
+ * }
41
+ * });
42
+ * ```
43
+ */
44
+ export async function addSessionAnnotation({
45
+ client: _client,
46
+ sessionAnnotation,
47
+ sync = false,
48
+ }: AddSessionAnnotationParams): Promise<{ id: string } | null> {
49
+ const client = _client ?? createClient();
50
+
51
+ const { data, error } = await client.POST("/v1/session_annotations", {
52
+ params: {
53
+ query: { sync },
54
+ },
55
+ body: {
56
+ data: [toSessionAnnotationData(sessionAnnotation)],
57
+ },
58
+ });
59
+
60
+ if (error) {
61
+ throw new Error(`Failed to add session annotation: ${error}`);
62
+ }
63
+
64
+ return data?.data?.[0] || null;
65
+ }
@@ -0,0 +1,2 @@
1
+ export * from "./addSessionAnnotation";
2
+ export * from "./logSessionAnnotations";
@@ -0,0 +1,77 @@
1
+ import { createClient } from "../client";
2
+ import { ClientFn } from "../types/core";
3
+ import { SessionAnnotation, toSessionAnnotationData } from "./types";
4
+
5
+ /**
6
+ * Parameters to log multiple session annotations
7
+ */
8
+ export interface LogSessionAnnotationsParams extends ClientFn {
9
+ /**
10
+ * The session annotations to log
11
+ */
12
+ sessionAnnotations: SessionAnnotation[];
13
+ /**
14
+ * If true, the request will be fulfilled synchronously and return the annotation IDs.
15
+ * If false, the request will be processed asynchronously and return null.
16
+ * @default false
17
+ */
18
+ sync?: boolean;
19
+ }
20
+
21
+ /**
22
+ * Log multiple session annotations in a single request.
23
+ *
24
+ * Each annotation can be of type "LLM", "CODE", or "HUMAN" and can include a label, score, and metadata.
25
+ * If an identifier is provided and an annotation with that identifier already exists, it will be updated.
26
+ *
27
+ * @param params - The parameters to log session annotations
28
+ * @returns The IDs of the created or updated annotations
29
+ *
30
+ * @example
31
+ * ```ts
32
+ * const results = await logSessionAnnotations({
33
+ * sessionAnnotations: [
34
+ * {
35
+ * sessionId: "123abc",
36
+ * name: "quality_score",
37
+ * label: "good",
38
+ * score: 0.95,
39
+ * annotatorKind: "LLM",
40
+ * identifier: "custom_id_123",
41
+ * metadata: {
42
+ * model: "gpt-4"
43
+ * }
44
+ * },
45
+ * {
46
+ * sessionId: "456def",
47
+ * name: "sentiment",
48
+ * label: "positive",
49
+ * score: 0.8,
50
+ * annotatorKind: "CODE"
51
+ * }
52
+ * ]
53
+ * });
54
+ * ```
55
+ */
56
+ export async function logSessionAnnotations({
57
+ client: _client,
58
+ sessionAnnotations,
59
+ sync = false,
60
+ }: LogSessionAnnotationsParams): Promise<{ id: string }[]> {
61
+ const client = _client ?? createClient();
62
+
63
+ const { data, error } = await client.POST("/v1/session_annotations", {
64
+ params: {
65
+ query: { sync },
66
+ },
67
+ body: {
68
+ data: sessionAnnotations.map(toSessionAnnotationData),
69
+ },
70
+ });
71
+
72
+ if (error) {
73
+ throw new Error(`Failed to log session annotations: ${error}`);
74
+ }
75
+
76
+ return data?.data || [];
77
+ }
@@ -0,0 +1,67 @@
1
+ import { paths } from "../__generated__/api/v1";
2
+ import { Annotation, AnnotationResult } from "../types/annotations";
3
+
4
+ type SessionAnnotationData =
5
+ paths["/v1/session_annotations"]["post"]["requestBody"]["content"]["application/json"]["data"][0];
6
+
7
+ /**
8
+ * Parameters for a single session annotation
9
+ */
10
+ export interface SessionAnnotation extends Annotation {
11
+ /*
12
+ * The session ID used to track a conversation, thread, or session
13
+ */
14
+ sessionId: string;
15
+ /**
16
+ * The entity that performed the annotation
17
+ */
18
+ annotatorKind?: SessionAnnotationData["annotator_kind"];
19
+ }
20
+
21
+ /**
22
+ * Build and validate annotation result fields
23
+ */
24
+ function buildSessionAnnotationResult(
25
+ annotation: Pick<SessionAnnotation, "label" | "score" | "explanation">
26
+ ): AnnotationResult {
27
+ const result: AnnotationResult = {};
28
+
29
+ // Build result with trimming for string fields
30
+ if (annotation.label !== undefined) {
31
+ result.label = annotation.label.trim() || null;
32
+ }
33
+ if (annotation.score !== undefined) {
34
+ result.score = annotation.score;
35
+ }
36
+ if (annotation.explanation !== undefined) {
37
+ result.explanation = annotation.explanation.trim() || null;
38
+ }
39
+
40
+ // Validate that at least one result field is provided
41
+ const hasValidResult =
42
+ result.label || result.score !== undefined || result.explanation;
43
+ if (!hasValidResult) {
44
+ throw new Error(
45
+ `At least one of label, score, or explanation must be provided for session annotation`
46
+ );
47
+ }
48
+ return result;
49
+ }
50
+
51
+ /**
52
+ * Convert a SessionAnnotation to the API format
53
+ */
54
+ export function toSessionAnnotationData(
55
+ annotation: SessionAnnotation
56
+ ): SessionAnnotationData {
57
+ const result = buildSessionAnnotationResult(annotation);
58
+
59
+ return {
60
+ session_id: annotation.sessionId.trim(),
61
+ name: annotation.name.trim(),
62
+ annotator_kind: annotation.annotatorKind ?? "HUMAN",
63
+ result,
64
+ metadata: annotation.metadata ?? null,
65
+ identifier: annotation.identifier?.trim() ?? "",
66
+ };
67
+ }
@@ -1,4 +1,5 @@
1
1
  import { paths } from "../__generated__/api/v1";
2
+ import { Annotation } from "../types/annotations";
2
3
 
3
4
  type SpanAnnotationData =
4
5
  paths["/v1/span_annotations"]["post"]["requestBody"]["content"]["application/json"]["data"][0];
@@ -9,35 +10,11 @@ type SpanDocumentAnnotationData =
9
10
  /**
10
11
  * Parameters for a single span annotation
11
12
  */
12
- export interface SpanAnnotation {
13
+ export interface SpanAnnotation extends Annotation {
13
14
  /**
14
15
  * The OpenTelemetry Span ID (hex format without 0x prefix)
15
16
  */
16
17
  spanId: string;
17
- /**
18
- * The name of the annotation
19
- */
20
- name: string;
21
- /**
22
- * The label assigned by the annotation
23
- */
24
- label?: string;
25
- /**
26
- * The score assigned by the annotation
27
- */
28
- score?: number;
29
- /**
30
- * Explanation of the annotation result
31
- */
32
- explanation?: string;
33
- /**
34
- * The identifier of the annotation. If provided, the annotation will be updated if it already exists.
35
- */
36
- identifier?: string;
37
- /**
38
- * Metadata for the annotation
39
- */
40
- metadata?: Record<string, unknown>;
41
18
  /**
42
19
  * The kind of annotator used for the annotation
43
20
  * Can be "HUMAN", "LLM", or "CODE"
@@ -49,35 +26,11 @@ export interface SpanAnnotation {
49
26
  /**
50
27
  * Parameters for a single document annotation
51
28
  */
52
- export interface DocumentAnnotation {
53
- /**
54
- * The OpenTelemetry Span ID (hex format without 0x prefix)
55
- */
56
- spanId: string;
29
+ export interface DocumentAnnotation extends SpanAnnotation {
57
30
  /**
58
31
  * The 0-based index of the document within the span
59
32
  */
60
33
  documentPosition: number;
61
- /**
62
- * The name of the annotation
63
- */
64
- name: string;
65
- /**
66
- * The label assigned by the annotation
67
- */
68
- label?: string;
69
- /**
70
- * The score assigned by the annotation
71
- */
72
- score?: number;
73
- /**
74
- * Explanation of the annotation result
75
- */
76
- explanation?: string;
77
- /**
78
- * Metadata for the annotation
79
- */
80
- metadata?: Record<string, unknown>;
81
34
  /**
82
35
  * The kind of annotator used for the annotation
83
36
  * Can be "HUMAN", "LLM", or "CODE"
@@ -2,3 +2,42 @@ import { components } from "../__generated__/api/v1";
2
2
 
3
3
  export type AnnotatorKind =
4
4
  components["schemas"]["SpanAnnotationData"]["annotator_kind"];
5
+
6
+ /**
7
+ * The result of an annotation from an author (e.x. an LLM or human)
8
+ */
9
+ export type AnnotationResult = {
10
+ label?: string | null;
11
+ score?: number | null;
12
+ explanation?: string | null;
13
+ };
14
+
15
+ /**
16
+ * The base interface for all kinds of annotations (span, trace, session)
17
+ */
18
+ export interface Annotation {
19
+ /**
20
+ * The name of the annotation
21
+ */
22
+ name: string;
23
+ /**
24
+ * The label assigned by the annotation
25
+ */
26
+ label?: string;
27
+ /**
28
+ * The score assigned by the annotation
29
+ */
30
+ score?: number;
31
+ /**
32
+ * Explanation of the annotation result
33
+ */
34
+ explanation?: string;
35
+ /**
36
+ * The identifier of the annotation. If provided, the annotation will be updated if it already exists.
37
+ */
38
+ identifier?: string;
39
+ /**
40
+ * Metadata for the annotation
41
+ */
42
+ metadata?: Record<string, unknown>;
43
+ }
@@ -51,7 +51,7 @@ export interface ExperimentRun extends Node {
51
51
  traceId: string | null;
52
52
  }
53
53
 
54
- export type EvaluatorParams = {
54
+ export type EvaluatorParams<TaskOutputType = TaskOutput> = {
55
55
  /**
56
56
  * The input field of the Dataset Example
57
57
  */
@@ -59,7 +59,7 @@ export type EvaluatorParams = {
59
59
  /**
60
60
  * The output of the task
61
61
  */
62
- output: TaskOutput;
62
+ output: TaskOutputType;
63
63
  /**
64
64
  * The expected or reference output of the Dataset Example
65
65
  */
@@ -79,10 +79,10 @@ export type Evaluator = {
79
79
  };
80
80
 
81
81
  export type EvaluationResult = {
82
- score: number | null;
83
- label: string | null;
84
- metadata: Record<string, unknown>;
85
- explanation: string | null;
82
+ score?: number | null;
83
+ label?: string | null;
84
+ metadata?: Record<string, unknown>;
85
+ explanation?: string | null;
86
86
  };
87
87
 
88
88
  export interface ExperimentEvaluationRun extends Node {