@arizeai/phoenix-client 4.0.1 → 4.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (30) hide show
  1. package/dist/esm/experiments/{instrumention.d.ts → instrumentation.d.ts} +1 -1
  2. package/dist/esm/experiments/instrumentation.d.ts.map +1 -0
  3. package/dist/esm/experiments/{instrumention.js → instrumentation.js} +1 -1
  4. package/dist/esm/experiments/instrumentation.js.map +1 -0
  5. package/dist/esm/experiments/runExperiment.d.ts +7 -1
  6. package/dist/esm/experiments/runExperiment.d.ts.map +1 -1
  7. package/dist/esm/experiments/runExperiment.js +26 -12
  8. package/dist/esm/experiments/runExperiment.js.map +1 -1
  9. package/dist/esm/tsconfig.esm.tsbuildinfo +1 -1
  10. package/dist/esm/types/experiments.d.ts +6 -6
  11. package/dist/esm/types/experiments.d.ts.map +1 -1
  12. package/dist/src/experiments/{instrumention.d.ts → instrumentation.d.ts} +1 -1
  13. package/dist/src/experiments/instrumentation.d.ts.map +1 -0
  14. package/dist/src/experiments/{instrumention.js → instrumentation.js} +1 -1
  15. package/dist/src/experiments/instrumentation.js.map +1 -0
  16. package/dist/src/experiments/runExperiment.d.ts +7 -1
  17. package/dist/src/experiments/runExperiment.d.ts.map +1 -1
  18. package/dist/src/experiments/runExperiment.js +31 -17
  19. package/dist/src/experiments/runExperiment.js.map +1 -1
  20. package/dist/src/types/experiments.d.ts +6 -6
  21. package/dist/src/types/experiments.d.ts.map +1 -1
  22. package/dist/tsconfig.tsbuildinfo +1 -1
  23. package/package.json +3 -2
  24. package/src/experiments/runExperiment.ts +38 -12
  25. package/src/types/experiments.ts +6 -6
  26. package/dist/esm/experiments/instrumention.d.ts.map +0 -1
  27. package/dist/esm/experiments/instrumention.js.map +0 -1
  28. package/dist/src/experiments/instrumention.d.ts.map +0 -1
  29. package/dist/src/experiments/instrumention.js.map +0 -1
  30. /package/src/experiments/{instrumention.ts → instrumentation.ts} +0 -0
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@arizeai/phoenix-client",
3
- "version": "4.0.1",
3
+ "version": "4.0.3",
4
4
  "description": "A client for the Phoenix API",
5
5
  "main": "dist/src/index.js",
6
6
  "module": "dist/esm/index.js",
@@ -61,7 +61,8 @@
61
61
  "openai": "^4.77.0",
62
62
  "openapi-typescript": "^7.6.1",
63
63
  "tsx": "^4.19.3",
64
- "vitest": "^2.1.9"
64
+ "vitest": "^2.1.9",
65
+ "@arizeai/phoenix-evals": "0.2.1"
65
66
  },
66
67
  "dependencies": {
67
68
  "@arizeai/openinference-semantic-conventions": "^1.1.0",
@@ -22,8 +22,8 @@ import { getDataset } from "../datasets/getDataset";
22
22
  import { pluralize } from "../utils/pluralize";
23
23
  import { promisifyResult } from "../utils/promisifyResult";
24
24
  import { AnnotatorKind } from "../types/annotations";
25
- import { createProvider, createNoOpProvider } from "./instrumention";
26
- import { SpanStatusCode, Tracer } from "@opentelemetry/api";
25
+ import { createProvider, createNoOpProvider } from "./instrumentation";
26
+ import { SpanStatusCode, Tracer, trace } from "@opentelemetry/api";
27
27
  import {
28
28
  MimeType,
29
29
  OpenInferenceSpanKind,
@@ -37,7 +37,14 @@ import {
37
37
  getDatasetExperimentsUrl,
38
38
  getExperimentUrl,
39
39
  } from "../utils/urlUtils";
40
+ import assert from "assert";
40
41
 
42
+ /**
43
+ * Validate that a repetition is valid
44
+ */
45
+ function isValidRepetitionParam(repetitions: number) {
46
+ return Number.isInteger(repetitions) && repetitions > 0;
47
+ }
41
48
  /**
42
49
  * Parameters for running an experiment.
43
50
  *
@@ -154,6 +161,11 @@ export async function runExperiment({
154
161
  repetitions = 1,
155
162
  useBatchSpanProcessor = true,
156
163
  }: RunExperimentParams): Promise<RanExperiment> {
164
+ // Validation
165
+ assert(
166
+ isValidRepetitionParam(repetitions),
167
+ "repetitions must be an integer greater than 0"
168
+ );
157
169
  let provider: NodeTracerProvider | undefined;
158
170
  const isDryRun = typeof dryRun === "number" || dryRun === true;
159
171
  const client = _client ?? createClient();
@@ -278,11 +290,6 @@ export async function runExperiment({
278
290
  runs,
279
291
  };
280
292
 
281
- // Shut down the provider so that the experiments run
282
- if (provider) {
283
- await provider.shutdown?.();
284
- }
285
-
286
293
  const { evaluationRuns } = await evaluateExperiment({
287
294
  experiment: ranExperiment,
288
295
  evaluators: evaluators ?? [],
@@ -290,8 +297,7 @@ export async function runExperiment({
290
297
  logger,
291
298
  concurrency,
292
299
  dryRun,
293
- setGlobalTracerProvider,
294
- useBatchSpanProcessor,
300
+ tracerProvider: provider,
295
301
  });
296
302
  ranExperiment.evaluationRuns = evaluationRuns;
297
303
 
@@ -348,6 +354,12 @@ function runTaskWithExamples({
348
354
  /** Number of repetitions per example */
349
355
  repetitions?: number;
350
356
  }): Promise<void> {
357
+ // Validate the input
358
+ assert(
359
+ isValidRepetitionParam(repetitions),
360
+ "repetitions must be an integer greater than 0"
361
+ );
362
+
351
363
  logger.info(`🔧 Running task "${task.name}" on dataset "${dataset.id}"`);
352
364
  const run = async ({
353
365
  example,
@@ -426,7 +438,7 @@ function runTaskWithExamples({
426
438
  .flatMap((example) =>
427
439
  Array.from({ length: repetitions }, (_, index) => ({
428
440
  example,
429
- repetitionNumber: index,
441
+ repetitionNumber: index + 1, // Repetitions start at 1
430
442
  }))
431
443
  )
432
444
  .forEach((exampleWithRepetition) =>
@@ -455,6 +467,7 @@ export async function evaluateExperiment({
455
467
  dryRun = false,
456
468
  setGlobalTracerProvider = true,
457
469
  useBatchSpanProcessor = true,
470
+ tracerProvider: paramsTracerProvider,
458
471
  }: {
459
472
  /**
460
473
  * The experiment to evaluate
@@ -484,6 +497,11 @@ export async function evaluateExperiment({
484
497
  * @default true
485
498
  */
486
499
  useBatchSpanProcessor?: boolean;
500
+ /**
501
+ * The tracer provider to use. If set, the other parameters will be ignored and the passed tracer provider will get used
502
+ * Intended as a pass-through from runExperiment
503
+ */
504
+ tracerProvider?: NodeTracerProvider | null;
487
505
  }): Promise<RanExperiment> {
488
506
  const isDryRun = typeof dryRun === "number" || dryRun === true;
489
507
  const client = _client ?? createClient();
@@ -493,7 +511,11 @@ export async function evaluateExperiment({
493
511
  "Phoenix base URL not found. Please set PHOENIX_HOST or set baseUrl on the client."
494
512
  );
495
513
  let provider: NodeTracerProvider;
496
- if (!isDryRun) {
514
+
515
+ // Always allow changing of tracer providers
516
+ if (paramsTracerProvider) {
517
+ provider = paramsTracerProvider;
518
+ } else if (!isDryRun) {
497
519
  provider = createProvider({
498
520
  projectName: "evaluators",
499
521
  baseUrl,
@@ -650,7 +672,11 @@ export async function evaluateExperiment({
650
672
  logger.info(`✅ Evaluation runs completed`);
651
673
 
652
674
  if (provider) {
653
- await provider.shutdown?.();
675
+ await provider.shutdown();
676
+ // Make sure it's not set globally anymore
677
+ if (setGlobalTracerProvider) {
678
+ trace.disable();
679
+ }
654
680
  }
655
681
 
656
682
  return {
@@ -51,7 +51,7 @@ export interface ExperimentRun extends Node {
51
51
  traceId: string | null;
52
52
  }
53
53
 
54
- export type EvaluatorParams = {
54
+ export type EvaluatorParams<TaskOutputType = TaskOutput> = {
55
55
  /**
56
56
  * The input field of the Dataset Example
57
57
  */
@@ -59,7 +59,7 @@ export type EvaluatorParams = {
59
59
  /**
60
60
  * The output of the task
61
61
  */
62
- output: TaskOutput;
62
+ output: TaskOutputType;
63
63
  /**
64
64
  * The expected or reference output of the Dataset Example
65
65
  */
@@ -79,10 +79,10 @@ export type Evaluator = {
79
79
  };
80
80
 
81
81
  export type EvaluationResult = {
82
- score: number | null;
83
- label: string | null;
84
- metadata: Record<string, unknown>;
85
- explanation: string | null;
82
+ score?: number | null;
83
+ label?: string | null;
84
+ metadata?: Record<string, unknown>;
85
+ explanation?: string | null;
86
86
  };
87
87
 
88
88
  export interface ExperimentEvaluationRun extends Node {
@@ -1 +0,0 @@
1
- {"version":3,"file":"instrumention.d.ts","sourceRoot":"","sources":["../../../src/experiments/instrumention.ts"],"names":[],"mappings":"AAGA,OAAO,EACL,kBAAkB,EAEnB,MAAM,+BAA+B,CAAC;AAEvC,OAAO,EAAE,cAAc,EAAE,MAAM,eAAe,CAAC;AAM/C;;GAEG;AACH,wBAAgB,cAAc,CAAC,EAC7B,WAAW,EACX,OAAO,EACP,OAAO,EACP,qBAA4B,GAC7B,EAAE;IACD,WAAW,EAAE,MAAM,CAAC;IACpB,OAAO,EAAE,cAAc,CAAC;IACxB;;;OAGG;IACH,qBAAqB,EAAE,OAAO,CAAC;IAC/B;;OAEG;IACH,OAAO,EAAE,MAAM,CAAC;CACjB,sBAuBA;AAED;;GAEG;AACH,wBAAgB,kBAAkB,uBAIjC"}
@@ -1 +0,0 @@
1
- {"version":3,"file":"instrumention.js","sourceRoot":"","sources":["../../../src/experiments/instrumention.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,IAAI,EAAE,iBAAiB,EAAE,YAAY,EAAE,MAAM,oBAAoB,CAAC;AAC3E,OAAO,EAAE,iBAAiB,EAAE,MAAM,0CAA0C,CAAC;AAC7E,OAAO,EAAE,sBAAsB,EAAE,MAAM,0BAA0B,CAAC;AAClE,OAAO,EACL,kBAAkB,GAEnB,MAAM,+BAA+B,CAAC;AACvC,OAAO,EAAE,wBAAwB,EAAE,MAAM,6CAA6C,CAAC;AAEvF,OAAO,EACL,+BAA+B,EAC/B,gCAAgC,GACjC,MAAM,+BAA+B,CAAC;AAEvC;;GAEG;AACH,MAAM,UAAU,cAAc,CAAC,EAC7B,WAAW,EACX,OAAO,EACP,OAAO,EACP,qBAAqB,GAAG,IAAI,GAa7B;IACC,IAAI,CAAC,SAAS,CAAC,IAAI,iBAAiB,EAAE,EAAE,YAAY,CAAC,KAAK,CAAC,CAAC;IAE5D,MAAM,QAAQ,GAAG,IAAI,iBAAiB,CAAC;QACrC,GAAG,EAAE,GAAG,OAAO,YAAY;QAC3B,OAAO,EAAE,KAAK,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,WAAW,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,OAAO;KACxE,CAAC,CAAC;IAEH,IAAI,aAA4B,CAAC;IACjC,IAAI,qBAAqB,EAAE,CAAC;QAC1B,aAAa,GAAG,IAAI,+BAA+B,CAAC,EAAE,QAAQ,EAAE,CAAC,CAAC;IACpE,CAAC;SAAM,CAAC;QACN,aAAa,GAAG,IAAI,gCAAgC,CAAC,EAAE,QAAQ,EAAE,CAAC,CAAC;IACrE,CAAC;IAED,MAAM,QAAQ,GAAG,IAAI,kBAAkB,CAAC;QACtC,QAAQ,EAAE,sBAAsB,CAAC;YAC/B,CAAC,wBAAwB,CAAC,EAAE,WAAW;SACxC,CAAC;QACF,cAAc,EAAE,CAAC,aAAa,CAAC;KAChC,CAAC,CAAC;IAEH,OAAO,QAAQ,CAAC;AAClB,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,kBAAkB;IAChC,MAAM,QAAQ,GAAG,IAAI,kBAAkB,CAAC,EAAE,CAAC,CAAC;IAE5C,OAAO,QAAQ,CAAC;AAClB,CAAC"}
@@ -1 +0,0 @@
1
- {"version":3,"file":"instrumention.d.ts","sourceRoot":"","sources":["../../../src/experiments/instrumention.ts"],"names":[],"mappings":"AAGA,OAAO,EACL,kBAAkB,EAEnB,MAAM,+BAA+B,CAAC;AAEvC,OAAO,EAAE,cAAc,EAAE,MAAM,eAAe,CAAC;AAM/C;;GAEG;AACH,wBAAgB,cAAc,CAAC,EAC7B,WAAW,EACX,OAAO,EACP,OAAO,EACP,qBAA4B,GAC7B,EAAE;IACD,WAAW,EAAE,MAAM,CAAC;IACpB,OAAO,EAAE,cAAc,CAAC;IACxB;;;OAGG;IACH,qBAAqB,EAAE,OAAO,CAAC;IAC/B;;OAEG;IACH,OAAO,EAAE,MAAM,CAAC;CACjB,sBAuBA;AAED;;GAEG;AACH,wBAAgB,kBAAkB,uBAIjC"}
@@ -1 +0,0 @@
1
- {"version":3,"file":"instrumention.js","sourceRoot":"","sources":["../../../src/experiments/instrumention.ts"],"names":[],"mappings":";;AAiBA,wCAwCC;AAKD,gDAIC;AAlED,4CAA2E;AAC3E,wFAA6E;AAC7E,wDAAkE;AAClE,kEAGuC;AACvC,oGAAuF;AAEvF,wEAGuC;AAEvC;;GAEG;AACH,SAAgB,cAAc,CAAC,EAC7B,WAAW,EACX,OAAO,EACP,OAAO,EACP,qBAAqB,GAAG,IAAI,GAa7B;IACC,UAAI,CAAC,SAAS,CAAC,IAAI,uBAAiB,EAAE,EAAE,kBAAY,CAAC,KAAK,CAAC,CAAC;IAE5D,MAAM,QAAQ,GAAG,IAAI,6CAAiB,CAAC;QACrC,GAAG,EAAE,GAAG,OAAO,YAAY;QAC3B,OAAO,EAAE,KAAK,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,WAAW,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,OAAO;KACxE,CAAC,CAAC;IAEH,IAAI,aAA4B,CAAC;IACjC,IAAI,qBAAqB,EAAE,CAAC;QAC1B,aAAa,GAAG,IAAI,sDAA+B,CAAC,EAAE,QAAQ,EAAE,CAAC,CAAC;IACpE,CAAC;SAAM,CAAC;QACN,aAAa,GAAG,IAAI,uDAAgC,CAAC,EAAE,QAAQ,EAAE,CAAC,CAAC;IACrE,CAAC;IAED,MAAM,QAAQ,GAAG,IAAI,mCAAkB,CAAC;QACtC,QAAQ,EAAE,IAAA,kCAAsB,EAAC;YAC/B,CAAC,6DAAwB,CAAC,EAAE,WAAW;SACxC,CAAC;QACF,cAAc,EAAE,CAAC,aAAa,CAAC;KAChC,CAAC,CAAC;IAEH,OAAO,QAAQ,CAAC;AAClB,CAAC;AAED;;GAEG;AACH,SAAgB,kBAAkB;IAChC,MAAM,QAAQ,GAAG,IAAI,mCAAkB,CAAC,EAAE,CAAC,CAAC;IAE5C,OAAO,QAAQ,CAAC;AAClB,CAAC"}