@arizeai/phoenix-client 4.1.0 → 5.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (85) hide show
  1. package/README.md +1 -1
  2. package/dist/esm/__generated__/api/v1.d.ts +9 -0
  3. package/dist/esm/__generated__/api/v1.d.ts.map +1 -1
  4. package/dist/esm/datasets/createOrGetDataset.d.ts +18 -0
  5. package/dist/esm/datasets/createOrGetDataset.d.ts.map +1 -0
  6. package/dist/esm/datasets/createOrGetDataset.js +29 -0
  7. package/dist/esm/datasets/createOrGetDataset.js.map +1 -0
  8. package/dist/esm/datasets/getDataset.d.ts +2 -3
  9. package/dist/esm/datasets/getDataset.d.ts.map +1 -1
  10. package/dist/esm/datasets/getDataset.js +2 -3
  11. package/dist/esm/datasets/getDataset.js.map +1 -1
  12. package/dist/esm/datasets/getDatasetExamples.d.ts +4 -4
  13. package/dist/esm/datasets/getDatasetExamples.d.ts.map +1 -1
  14. package/dist/esm/datasets/getDatasetExamples.js +11 -11
  15. package/dist/esm/datasets/getDatasetExamples.js.map +1 -1
  16. package/dist/esm/datasets/index.d.ts +1 -0
  17. package/dist/esm/datasets/index.d.ts.map +1 -1
  18. package/dist/esm/datasets/index.js +1 -0
  19. package/dist/esm/datasets/index.js.map +1 -1
  20. package/dist/esm/experiments/getExperimentInfo.d.ts.map +1 -1
  21. package/dist/esm/experiments/getExperimentInfo.js +1 -1
  22. package/dist/esm/experiments/getExperimentInfo.js.map +1 -1
  23. package/dist/esm/experiments/instrumentation.d.ts +7 -1
  24. package/dist/esm/experiments/instrumentation.d.ts.map +1 -1
  25. package/dist/esm/experiments/instrumentation.js +5 -3
  26. package/dist/esm/experiments/instrumentation.js.map +1 -1
  27. package/dist/esm/experiments/runExperiment.d.ts +13 -2
  28. package/dist/esm/experiments/runExperiment.d.ts.map +1 -1
  29. package/dist/esm/experiments/runExperiment.js +25 -5
  30. package/dist/esm/experiments/runExperiment.js.map +1 -1
  31. package/dist/esm/tsconfig.esm.tsbuildinfo +1 -1
  32. package/dist/esm/types/datasets.d.ts +8 -10
  33. package/dist/esm/types/datasets.d.ts.map +1 -1
  34. package/dist/esm/types/experiments.d.ts +1 -0
  35. package/dist/esm/types/experiments.d.ts.map +1 -1
  36. package/dist/esm/utils/formatPromptMessages.d.ts.map +1 -1
  37. package/dist/esm/utils/getPromptBySelector.d.ts.map +1 -1
  38. package/dist/src/__generated__/api/v1.d.ts +9 -0
  39. package/dist/src/__generated__/api/v1.d.ts.map +1 -1
  40. package/dist/src/datasets/createOrGetDataset.d.ts +18 -0
  41. package/dist/src/datasets/createOrGetDataset.d.ts.map +1 -0
  42. package/dist/src/datasets/createOrGetDataset.js +32 -0
  43. package/dist/src/datasets/createOrGetDataset.js.map +1 -0
  44. package/dist/src/datasets/getDataset.d.ts +2 -3
  45. package/dist/src/datasets/getDataset.d.ts.map +1 -1
  46. package/dist/src/datasets/getDataset.js +2 -3
  47. package/dist/src/datasets/getDataset.js.map +1 -1
  48. package/dist/src/datasets/getDatasetExamples.d.ts +4 -4
  49. package/dist/src/datasets/getDatasetExamples.d.ts.map +1 -1
  50. package/dist/src/datasets/getDatasetExamples.js +8 -11
  51. package/dist/src/datasets/getDatasetExamples.js.map +1 -1
  52. package/dist/src/datasets/index.d.ts +1 -0
  53. package/dist/src/datasets/index.d.ts.map +1 -1
  54. package/dist/src/datasets/index.js +1 -0
  55. package/dist/src/datasets/index.js.map +1 -1
  56. package/dist/src/experiments/getExperimentInfo.d.ts.map +1 -1
  57. package/dist/src/experiments/getExperimentInfo.js +1 -1
  58. package/dist/src/experiments/getExperimentInfo.js.map +1 -1
  59. package/dist/src/experiments/instrumentation.d.ts +7 -1
  60. package/dist/src/experiments/instrumentation.d.ts.map +1 -1
  61. package/dist/src/experiments/instrumentation.js +4 -2
  62. package/dist/src/experiments/instrumentation.js.map +1 -1
  63. package/dist/src/experiments/runExperiment.d.ts +13 -2
  64. package/dist/src/experiments/runExperiment.d.ts.map +1 -1
  65. package/dist/src/experiments/runExperiment.js +26 -15
  66. package/dist/src/experiments/runExperiment.js.map +1 -1
  67. package/dist/src/prompts/sdks/toSDK.d.ts +2 -2
  68. package/dist/src/types/datasets.d.ts +8 -10
  69. package/dist/src/types/datasets.d.ts.map +1 -1
  70. package/dist/src/types/experiments.d.ts +1 -0
  71. package/dist/src/types/experiments.d.ts.map +1 -1
  72. package/dist/src/utils/formatPromptMessages.d.ts.map +1 -1
  73. package/dist/src/utils/getPromptBySelector.d.ts.map +1 -1
  74. package/dist/tsconfig.tsbuildinfo +1 -1
  75. package/package.json +4 -4
  76. package/src/__generated__/api/v1.ts +9 -0
  77. package/src/datasets/createOrGetDataset.ts +39 -0
  78. package/src/datasets/getDataset.ts +2 -4
  79. package/src/datasets/getDatasetExamples.ts +13 -13
  80. package/src/datasets/index.ts +1 -0
  81. package/src/experiments/getExperimentInfo.ts +3 -5
  82. package/src/experiments/instrumentation.ts +9 -1
  83. package/src/experiments/runExperiment.ts +41 -4
  84. package/src/types/datasets.ts +5 -9
  85. package/src/types/experiments.ts +2 -0
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@arizeai/phoenix-client",
3
- "version": "4.1.0",
3
+ "version": "5.0.0",
4
4
  "description": "A client for the Phoenix API",
5
5
  "main": "dist/src/index.js",
6
6
  "module": "dist/esm/index.js",
@@ -67,7 +67,7 @@
67
67
  "tsx": "^4.19.3",
68
68
  "typescript": "^5.8.2",
69
69
  "vitest": "^2.1.9",
70
- "@arizeai/phoenix-evals": "0.2.1"
70
+ "@arizeai/phoenix-evals": "0.2.2"
71
71
  },
72
72
  "dependencies": {
73
73
  "@arizeai/openinference-semantic-conventions": "^1.1.0",
@@ -90,7 +90,7 @@
90
90
  },
91
91
  "optionalDependencies": {
92
92
  "@anthropic-ai/sdk": "^0.35.0",
93
- "ai": "^5.0.38",
93
+ "ai": "^5.0.76",
94
94
  "openai": "^5.12.1"
95
95
  },
96
96
  "scripts": {
@@ -98,7 +98,7 @@
98
98
  "prebuild": "pnpm run clean && pnpm run generate",
99
99
  "generate": "openapi-typescript --empty-objects-unknown=true --default-non-nullable=false ../../../schemas/openapi.json -o ./src/__generated__/api/v1.ts",
100
100
  "build": "tsc --build tsconfig.json tsconfig.esm.json && tsc-alias -p tsconfig.esm.json",
101
- "postbuild": "echo '{\"type\": \"module\"}' > ./dist/esm/package.json && rimraf dist/test dist/examples",
101
+ "postbuild": "echo '{\"type\": \"module\"}' > ./dist/esm/package.json",
102
102
  "type:check": "tsc --noEmit",
103
103
  "test": "vitest --typecheck"
104
104
  }
@@ -939,6 +939,11 @@ export interface components {
939
939
  * @description ID of the dataset version over which the experiment will be run (if omitted, the latest version will be used)
940
940
  */
941
941
  version_id?: string | null;
942
+ /**
943
+ * Splits
944
+ * @description List of dataset split identifiers (GlobalIDs or names) to filter by
945
+ */
946
+ splits?: string[] | null;
942
947
  /**
943
948
  * Repetitions
944
949
  * @description Number of times the experiment should be repeated for each example
@@ -1397,6 +1402,8 @@ export interface components {
1397
1402
  dataset_id: string;
1398
1403
  /** Version Id */
1399
1404
  version_id: string;
1405
+ /** Filtered Splits */
1406
+ filtered_splits?: string[];
1400
1407
  /** Examples */
1401
1408
  examples: components["schemas"]["DatasetExample"][];
1402
1409
  };
@@ -3437,6 +3444,8 @@ export interface operations {
3437
3444
  query?: {
3438
3445
  /** @description The ID of the dataset version (if omitted, returns data from the latest version) */
3439
3446
  version_id?: string | null;
3447
+ /** @description List of dataset split identifiers (GlobalIDs or names) to filter by */
3448
+ split?: string[] | null;
3440
3449
  };
3441
3450
  header?: never;
3442
3451
  path: {
@@ -0,0 +1,39 @@
1
+ import { createClient } from "../client";
2
+ import { CreateDatasetParams, createDataset } from "./createDataset";
3
+ import { getDatasetInfoByName } from "./getDatasetInfoByName";
4
+
5
+ export type CreateOrGetDatasetParams = CreateDatasetParams;
6
+
7
+ export type CreateOrGetDatasetResponse = {
8
+ datasetId: string;
9
+ };
10
+
11
+ /**
12
+ * Given the parameters to create a dataset, this function will either
13
+ * retrieve an existing dataset by name or create a new one with the provided parameters.
14
+ *
15
+ * This is useful in cases where you would like to re-run a pipeline like:
16
+ * - ensure dataset exists
17
+ * - create a task
18
+ * - run experiment
19
+ * - evaluate experiment
20
+ * without having to create a new dataset each time.
21
+ */
22
+ export async function createOrGetDataset({
23
+ name,
24
+ description,
25
+ examples,
26
+ client: _client,
27
+ }: CreateOrGetDatasetParams): Promise<CreateOrGetDatasetResponse> {
28
+ const client = _client || createClient();
29
+ // start by fetching an existing dataset by name, catching any errors that occur
30
+ try {
31
+ const dataset = await getDatasetInfoByName({ datasetName: name, client });
32
+ return {
33
+ datasetId: dataset.id,
34
+ };
35
+ } catch {
36
+ // If the dataset doesn't exist, create it, falling back to the error handling inside createDataset
37
+ return await createDataset({ name, description, examples, client });
38
+ }
39
+ }
@@ -5,24 +5,22 @@ import { getDatasetExamples } from "./getDatasetExamples";
5
5
  import { getDatasetInfo } from "./getDatasetInfo";
6
6
 
7
7
  export type GetDatasetParams = ClientFn & {
8
+ /** Dataset selector (ID or name) */
8
9
  dataset: DatasetSelector;
9
- versionId?: string;
10
10
  };
11
11
 
12
12
  /**
13
13
  * Get dataset info and examples from the dataset
14
14
  * @param dataset - Dataset selector (ID or name)
15
- * @param versionId - Optional specific version ID (if omitted, returns data from the latest version)
16
15
  */
17
16
  export async function getDataset({
18
17
  client: _client,
19
18
  dataset,
20
- versionId,
21
19
  }: GetDatasetParams): Promise<Dataset> {
22
20
  const client = _client || createClient();
23
21
  const [datasetInfo, datasetExamples] = await Promise.all([
24
22
  getDatasetInfo({ client, dataset }),
25
- getDatasetExamples({ client, dataset, versionId }),
23
+ getDatasetExamples({ client, dataset }),
26
24
  ]);
27
25
  return {
28
26
  ...datasetInfo,
@@ -5,44 +5,44 @@ import { DatasetSelector, DatasetExamples } from "../types/datasets";
5
5
  import { getDatasetInfoByName } from "./getDatasetInfoByName";
6
6
 
7
7
  export type GetDatasetExamplesParams = ClientFn & {
8
+ /** Dataset selector (ID, name, or version ID) */
8
9
  dataset: DatasetSelector;
9
- versionId?: string;
10
10
  };
11
11
 
12
12
  /**
13
13
  * Get examples from a dataset
14
- * @param dataset - Dataset selector (ID, name, or version ID)
15
- * @param versionId - Optional specific version ID (ignored if dataset selector is datasetVersionId)
14
+ * @param dataset - Dataset selector (ID, name, version ID, or splits)
15
+ * @returns Dataset examples
16
16
  */
17
17
  export async function getDatasetExamples({
18
18
  client: _client,
19
- dataset,
20
- versionId,
19
+ dataset: datasetSelector,
21
20
  }: GetDatasetExamplesParams): Promise<DatasetExamples> {
22
21
  const client = _client || createClient();
23
22
 
24
23
  let datasetId: string;
25
24
 
26
- if ("datasetName" in dataset) {
25
+ if ("datasetName" in datasetSelector) {
27
26
  const datasetInfo = await getDatasetInfoByName({
28
27
  client,
29
- datasetName: dataset.datasetName,
28
+ datasetName: datasetSelector.datasetName,
30
29
  });
31
30
  datasetId = datasetInfo.id;
32
31
  } else {
33
- datasetId = dataset.datasetId;
32
+ datasetId = datasetSelector.datasetId;
34
33
  }
35
34
 
35
+ const { versionId, splits } = datasetSelector;
36
+
36
37
  const response = await client.GET("/v1/datasets/{id}/examples", {
37
38
  params: {
38
39
  path: {
39
40
  id: datasetId,
40
41
  },
41
- query: versionId
42
- ? {
43
- version_id: versionId,
44
- }
45
- : undefined,
42
+ query: {
43
+ ...(versionId ? { version_id: versionId } : {}),
44
+ ...(splits ? { split: splits } : {}),
45
+ },
46
46
  },
47
47
  });
48
48
 
@@ -3,3 +3,4 @@ export * from "./getDataset";
3
3
  export * from "./getDatasetExamples";
4
4
  export * from "./appendDatasetExamples";
5
5
  export * from "./getDatasetInfo";
6
+ export * from "./createOrGetDataset";
@@ -18,16 +18,14 @@ export async function getExperimentInfo({
18
18
  experimentId: experiment_id,
19
19
  }: GetExperimentParams): Promise<ExperimentInfo> {
20
20
  const client = _client || createClient();
21
- const { data: { data: experimentData } = {} } = await client.GET(
22
- "/v1/experiments/{experiment_id}",
23
- {
21
+ const { data: { data: experimentData } = { data: undefined } } =
22
+ await client.GET("/v1/experiments/{experiment_id}", {
24
23
  params: {
25
24
  path: {
26
25
  experiment_id,
27
26
  },
28
27
  },
29
- }
30
- );
28
+ });
31
29
  invariant(experimentData, "Failed to get experiment");
32
30
  return {
33
31
  id: experimentData.id,
@@ -20,6 +20,7 @@ export function createProvider({
20
20
  baseUrl,
21
21
  headers,
22
22
  useBatchSpanProcessor = true,
23
+ diagLogLevel,
23
24
  }: {
24
25
  projectName: string;
25
26
  headers: HeadersOptions;
@@ -32,8 +33,15 @@ export function createProvider({
32
33
  * The base URL of the Phoenix. Doesn't include the /v1/traces path.
33
34
  */
34
35
  baseUrl: string;
36
+ /**
37
+ * The diag log level to set for the built in DiagConsoleLogger instance.
38
+ * Omit to disable built in logging.
39
+ */
40
+ diagLogLevel?: DiagLogLevel;
35
41
  }) {
36
- diag.setLogger(new DiagConsoleLogger(), DiagLogLevel.ERROR);
42
+ if (diagLogLevel) {
43
+ diag.setLogger(new DiagConsoleLogger(), diagLogLevel);
44
+ }
37
45
 
38
46
  const exporter = new OTLPTraceExporter({
39
47
  url: `${baseUrl}/v1/traces`,
@@ -23,7 +23,12 @@ import { pluralize } from "../utils/pluralize";
23
23
  import { promisifyResult } from "../utils/promisifyResult";
24
24
  import { AnnotatorKind } from "../types/annotations";
25
25
  import { createProvider, createNoOpProvider } from "./instrumentation";
26
- import { SpanStatusCode, Tracer, trace } from "@opentelemetry/api";
26
+ import {
27
+ type DiagLogLevel,
28
+ SpanStatusCode,
29
+ Tracer,
30
+ trace,
31
+ } from "@opentelemetry/api";
27
32
  import {
28
33
  MimeType,
29
34
  OpenInferenceSpanKind,
@@ -111,6 +116,11 @@ export type RunExperimentParams = ClientFn & {
111
116
  * @default true
112
117
  */
113
118
  useBatchSpanProcessor?: boolean;
119
+ /**
120
+ * Log level to set for the default DiagConsoleLogger when tracing.
121
+ * Omit to disable default diag logging, or to bring your own.
122
+ */
123
+ diagLogLevel?: DiagLogLevel;
114
124
  };
115
125
 
116
126
  /**
@@ -150,7 +160,7 @@ export async function runExperiment({
150
160
  experimentDescription,
151
161
  experimentMetadata = {},
152
162
  client: _client,
153
- dataset: DatasetSelector,
163
+ dataset: datasetSelector,
154
164
  task,
155
165
  evaluators,
156
166
  logger = console,
@@ -160,6 +170,7 @@ export async function runExperiment({
160
170
  setGlobalTracerProvider = true,
161
171
  repetitions = 1,
162
172
  useBatchSpanProcessor = true,
173
+ diagLogLevel,
163
174
  }: RunExperimentParams): Promise<RanExperiment> {
164
175
  // Validation
165
176
  assert(
@@ -169,7 +180,10 @@ export async function runExperiment({
169
180
  let provider: NodeTracerProvider | undefined;
170
181
  const isDryRun = typeof dryRun === "number" || dryRun === true;
171
182
  const client = _client ?? createClient();
172
- const dataset = await getDataset({ dataset: DatasetSelector, client });
183
+ const dataset = await getDataset({
184
+ dataset: datasetSelector,
185
+ client,
186
+ });
173
187
  invariant(dataset, `Dataset not found`);
174
188
  invariant(dataset.examples.length > 0, `Dataset has no examples`);
175
189
  const nExamples =
@@ -186,6 +200,8 @@ export async function runExperiment({
186
200
  id: localId(),
187
201
  datasetId: dataset.id,
188
202
  datasetVersionId: dataset.versionId,
203
+ // @todo: the dataset should return splits in response body
204
+ datasetSplits: datasetSelector?.splits ?? [],
189
205
  projectName,
190
206
  metadata: experimentMetadata,
191
207
  };
@@ -204,6 +220,11 @@ export async function runExperiment({
204
220
  metadata: experimentMetadata,
205
221
  project_name: projectName,
206
222
  repetitions,
223
+ // @todo: the dataset should return splits in response body
224
+ ...(datasetSelector?.splits
225
+ ? { splits: datasetSelector.splits }
226
+ : {}),
227
+ ...(dataset?.versionId ? { version_id: dataset.versionId } : {}),
207
228
  },
208
229
  })
209
230
  .then((res) => res.data?.data);
@@ -213,6 +234,8 @@ export async function runExperiment({
213
234
  id: experimentResponse.id,
214
235
  datasetId: experimentResponse.dataset_id,
215
236
  datasetVersionId: experimentResponse.dataset_version_id,
237
+ // @todo: the dataset should return splits in response body
238
+ datasetSplits: datasetSelector?.splits ?? [],
216
239
  projectName,
217
240
  metadata: experimentResponse.metadata,
218
241
  };
@@ -227,6 +250,7 @@ export async function runExperiment({
227
250
  baseUrl,
228
251
  headers: client.config.headers ?? {},
229
252
  useBatchSpanProcessor,
253
+ diagLogLevel,
230
254
  });
231
255
  // Register the provider
232
256
  if (setGlobalTracerProvider) {
@@ -298,6 +322,8 @@ export async function runExperiment({
298
322
  concurrency,
299
323
  dryRun,
300
324
  tracerProvider: provider,
325
+ diagLogLevel,
326
+ useBatchSpanProcessor,
301
327
  });
302
328
  ranExperiment.evaluationRuns = evaluationRuns;
303
329
 
@@ -468,6 +494,7 @@ export async function evaluateExperiment({
468
494
  setGlobalTracerProvider = true,
469
495
  useBatchSpanProcessor = true,
470
496
  tracerProvider: paramsTracerProvider,
497
+ diagLogLevel,
471
498
  }: {
472
499
  /**
473
500
  * The experiment to evaluate
@@ -502,6 +529,11 @@ export async function evaluateExperiment({
502
529
  * Intended as a pass-through from runExperiment
503
530
  */
504
531
  tracerProvider?: NodeTracerProvider | null;
532
+ /**
533
+ * Log level to set for the default DiagConsoleLogger when tracing.
534
+ * Omit to disable default diag logging, or to bring your own.
535
+ */
536
+ diagLogLevel?: DiagLogLevel;
505
537
  }): Promise<RanExperiment> {
506
538
  const isDryRun = typeof dryRun === "number" || dryRun === true;
507
539
  const client = _client ?? createClient();
@@ -521,6 +553,7 @@ export async function evaluateExperiment({
521
553
  baseUrl,
522
554
  headers: client.config.headers ?? {},
523
555
  useBatchSpanProcessor,
556
+ diagLogLevel,
524
557
  });
525
558
  if (setGlobalTracerProvider) {
526
559
  provider.register();
@@ -536,7 +569,11 @@ export async function evaluateExperiment({
536
569
  ? Math.min(dryRun, Object.keys(experiment.runs).length)
537
570
  : Object.keys(experiment.runs).length;
538
571
  const dataset = await getDataset({
539
- dataset: { datasetId: experiment.datasetId },
572
+ dataset: {
573
+ datasetId: experiment.datasetId,
574
+ versionId: experiment.datasetVersionId,
575
+ splits: experiment.datasetSplits,
576
+ },
540
577
  client,
541
578
  });
542
579
  invariant(dataset, `Dataset "${experiment.datasetId}" not found`);
@@ -1,17 +1,13 @@
1
1
  import { Node } from "./core";
2
2
 
3
- /**
4
- * A dataset can be identified by its datasetId, datasetName, or datasetVersionId
5
- */
6
- export type DatasetSelector = { datasetId: string } | { datasetName: string };
3
+ type DatasetSelectorBase = { versionId?: string; splits?: string[] };
7
4
 
8
5
  /**
9
- * Parameters for selecting a specific version of a dataset
6
+ * A dataset can be identified by its datasetId, datasetName, or datasetVersionId
10
7
  */
11
- export interface DatasetVersionSelector {
12
- dataset: DatasetSelector;
13
- versionId?: string;
14
- }
8
+ export type DatasetSelector =
9
+ | (DatasetSelectorBase & { datasetId: string })
10
+ | (DatasetSelectorBase & { datasetName: string });
15
11
 
16
12
  /**
17
13
  * Overview information about a dataset
@@ -8,6 +8,8 @@ import { Example } from "./datasets";
8
8
  export interface ExperimentInfo extends Node {
9
9
  datasetId: string;
10
10
  datasetVersionId: string;
11
+ // @todo: mark this as required when experiment API returns it
12
+ datasetSplits?: string[];
11
13
  /**
12
14
  * The project under which the experiment task traces are recorded
13
15
  */