@arizeai/phoenix-client 1.0.2 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (84) hide show
  1. package/README.md +17 -5
  2. package/dist/esm/__generated__/api/v1.d.ts +1809 -295
  3. package/dist/esm/__generated__/api/v1.d.ts.map +1 -1
  4. package/dist/esm/experiments/runExperiment.d.ts +48 -13
  5. package/dist/esm/experiments/runExperiment.d.ts.map +1 -1
  6. package/dist/esm/experiments/runExperiment.js +139 -36
  7. package/dist/esm/experiments/runExperiment.js.map +1 -1
  8. package/dist/esm/schemas/llm/constants.d.ts +1 -1
  9. package/dist/esm/schemas/llm/converters.d.ts +4 -4
  10. package/dist/esm/schemas/llm/openai/converters.d.ts +1 -1
  11. package/dist/esm/schemas/llm/phoenixPrompt/converters.d.ts +8 -8
  12. package/dist/esm/schemas/llm/phoenixPrompt/messagePartSchemas.d.ts +5 -5
  13. package/dist/esm/schemas/llm/phoenixPrompt/messageSchemas.d.ts +8 -8
  14. package/dist/esm/schemas/llm/schemas.d.ts +4 -4
  15. package/dist/esm/spans/addSpanAnnotation.d.ts +39 -0
  16. package/dist/esm/spans/addSpanAnnotation.d.ts.map +1 -0
  17. package/dist/esm/spans/addSpanAnnotation.js +44 -0
  18. package/dist/esm/spans/addSpanAnnotation.js.map +1 -0
  19. package/dist/esm/spans/index.d.ts +3 -0
  20. package/dist/esm/spans/index.d.ts.map +1 -0
  21. package/dist/esm/spans/index.js +3 -0
  22. package/dist/esm/spans/index.js.map +1 -0
  23. package/dist/esm/spans/logSpanAnnotations.d.ts +51 -0
  24. package/dist/esm/spans/logSpanAnnotations.d.ts.map +1 -0
  25. package/dist/esm/spans/logSpanAnnotations.js +53 -0
  26. package/dist/esm/spans/logSpanAnnotations.js.map +1 -0
  27. package/dist/esm/spans/types.d.ts +43 -0
  28. package/dist/esm/spans/types.d.ts.map +1 -0
  29. package/dist/esm/spans/types.js +18 -0
  30. package/dist/esm/spans/types.js.map +1 -0
  31. package/dist/esm/tsconfig.esm.tsbuildinfo +1 -1
  32. package/dist/esm/types/annotations.d.ts +2 -1
  33. package/dist/esm/types/annotations.d.ts.map +1 -1
  34. package/dist/esm/types/experiments.d.ts +1 -7
  35. package/dist/esm/types/experiments.d.ts.map +1 -1
  36. package/dist/esm/utils/formatPromptMessages.d.ts +3 -1
  37. package/dist/esm/utils/formatPromptMessages.d.ts.map +1 -1
  38. package/dist/esm/utils/getPromptBySelector.d.ts.map +1 -1
  39. package/dist/src/__generated__/api/v1.d.ts +1809 -295
  40. package/dist/src/__generated__/api/v1.d.ts.map +1 -1
  41. package/dist/src/experiments/runExperiment.d.ts +48 -13
  42. package/dist/src/experiments/runExperiment.d.ts.map +1 -1
  43. package/dist/src/experiments/runExperiment.js +138 -36
  44. package/dist/src/experiments/runExperiment.js.map +1 -1
  45. package/dist/src/schemas/llm/constants.d.ts +1 -1
  46. package/dist/src/schemas/llm/converters.d.ts +4 -4
  47. package/dist/src/schemas/llm/openai/converters.d.ts +1 -1
  48. package/dist/src/schemas/llm/phoenixPrompt/converters.d.ts +8 -8
  49. package/dist/src/schemas/llm/phoenixPrompt/messagePartSchemas.d.ts +5 -5
  50. package/dist/src/schemas/llm/phoenixPrompt/messageSchemas.d.ts +8 -8
  51. package/dist/src/schemas/llm/schemas.d.ts +4 -4
  52. package/dist/src/spans/addSpanAnnotation.d.ts +39 -0
  53. package/dist/src/spans/addSpanAnnotation.d.ts.map +1 -0
  54. package/dist/src/spans/addSpanAnnotation.js +59 -0
  55. package/dist/src/spans/addSpanAnnotation.js.map +1 -0
  56. package/dist/src/spans/index.d.ts +3 -0
  57. package/dist/src/spans/index.d.ts.map +1 -0
  58. package/dist/src/spans/index.js +19 -0
  59. package/dist/src/spans/index.js.map +1 -0
  60. package/dist/src/spans/logSpanAnnotations.d.ts +51 -0
  61. package/dist/src/spans/logSpanAnnotations.d.ts.map +1 -0
  62. package/dist/src/spans/logSpanAnnotations.js +68 -0
  63. package/dist/src/spans/logSpanAnnotations.js.map +1 -0
  64. package/dist/src/spans/types.d.ts +43 -0
  65. package/dist/src/spans/types.d.ts.map +1 -0
  66. package/dist/src/spans/types.js +22 -0
  67. package/dist/src/spans/types.js.map +1 -0
  68. package/dist/src/types/annotations.d.ts +2 -1
  69. package/dist/src/types/annotations.d.ts.map +1 -1
  70. package/dist/src/types/experiments.d.ts +1 -7
  71. package/dist/src/types/experiments.d.ts.map +1 -1
  72. package/dist/src/utils/formatPromptMessages.d.ts +3 -1
  73. package/dist/src/utils/formatPromptMessages.d.ts.map +1 -1
  74. package/dist/src/utils/getPromptBySelector.d.ts.map +1 -1
  75. package/dist/tsconfig.tsbuildinfo +1 -1
  76. package/package.json +8 -2
  77. package/src/__generated__/api/v1.ts +1809 -295
  78. package/src/experiments/runExperiment.ts +211 -74
  79. package/src/spans/addSpanAnnotation.ts +59 -0
  80. package/src/spans/index.ts +2 -0
  81. package/src/spans/logSpanAnnotations.ts +71 -0
  82. package/src/spans/types.ts +60 -0
  83. package/src/types/annotations.ts +4 -1
  84. package/src/types/experiments.ts +1 -7
@@ -1,5 +1,8 @@
1
- import { Dataset, Example } from "../types/datasets";
1
+ import { queue } from "async";
2
+ import invariant from "tiny-invariant";
2
3
  import { createClient, type PhoenixClient } from "../client";
4
+ import { ClientFn } from "../types/core";
5
+ import { Dataset, Example } from "../types/datasets";
3
6
  import type {
4
7
  Evaluator,
5
8
  Experiment,
@@ -9,18 +12,16 @@ import type {
9
12
  ExperimentTask,
10
13
  RanExperiment,
11
14
  } from "../types/experiments";
12
- import { promisifyResult } from "../utils/promisifyResult";
13
- import invariant from "tiny-invariant";
14
- import { pluralize } from "../utils/pluralize";
15
- import { ClientFn } from "../types/core";
16
- import { getDatasetBySelector } from "../utils/getDatasetBySelector";
17
15
  import { type Logger } from "../types/logger";
16
+ import { getDatasetBySelector } from "../utils/getDatasetBySelector";
17
+ import { pluralize } from "../utils/pluralize";
18
+ import { promisifyResult } from "../utils/promisifyResult";
19
+ import { AnnotatorKind } from "../types/annotations";
18
20
 
19
21
  /**
20
22
  * Parameters for running an experiment.
21
23
  *
22
24
  * @experimental This feature is not complete, and will change in the future.
23
- * @deprecated This function will be un-marked as deprecated once the experimental feature flag is removed.
24
25
  */
25
26
  export type RunExperimentParams = ClientFn & {
26
27
  /**
@@ -28,6 +29,14 @@ export type RunExperimentParams = ClientFn & {
28
29
  * Defaults to the dataset name + a timestamp
29
30
  */
30
31
  experimentName?: string;
32
+ /**
33
+ * The description of the experiment
34
+ */
35
+ experimentDescription?: string;
36
+ /**
37
+ * Experiment metadata
38
+ */
39
+ experimentMetadata?: Record<string, unknown>;
31
40
  /**
32
41
  * The dataset to run the experiment on
33
42
  */
@@ -40,10 +49,6 @@ export type RunExperimentParams = ClientFn & {
40
49
  * The evaluators to use
41
50
  */
42
51
  evaluators?: Evaluator[];
43
- /**
44
- * The number of repetitions to run
45
- */
46
- repetitions?: number;
47
52
  /**
48
53
  * The project under which the experiment task traces are recorded
49
54
  */
@@ -56,43 +61,96 @@ export type RunExperimentParams = ClientFn & {
56
61
  * Whether to record the experiment results
57
62
  */
58
63
  record?: boolean;
64
+ /**
65
+ * The number of dataset examples to run in parallel
66
+ */
67
+ concurrency?: number;
68
+ /**
69
+ * Whether or not to run the experiment as a dry run. If a number is privided, n examples will be run.
70
+ * @default false
71
+ */
72
+ dryRun?: number | boolean;
59
73
  };
60
74
 
61
75
  /**
62
76
  * Run an experiment.
63
77
  *
78
+ * @example
79
+ * ```ts
80
+ * import { asEvaluator, runExperiment } from "@phoenix/client/experiments";
81
+ *
82
+ * const experiment = await runExperiment({
83
+ * dataset: "my-dataset",
84
+ * task: async (example) => example.input,
85
+ * evaluators: [
86
+ * asEvaluator("my-evaluator", "CODE", async (params) => params.output),
87
+ * ],
88
+ * });
89
+ * ```
90
+ *
64
91
  * @experimental This feature is not complete, and will change in the future.
65
- * @deprecated This function will be un-marked as deprecated once the experimental feature flag is removed.
66
92
  */
67
93
  export async function runExperiment({
68
94
  experimentName: _experimentName,
95
+ experimentDescription,
96
+ experimentMetadata,
69
97
  client: _client,
70
98
  dataset: _dataset,
71
99
  task,
72
100
  evaluators,
73
- repetitions = 1,
74
101
  projectName = "default",
75
102
  logger = console,
76
103
  record = true,
104
+ concurrency = 5,
105
+ dryRun = false,
77
106
  }: RunExperimentParams): Promise<RanExperiment> {
107
+ const isDryRun = typeof dryRun === "number" || dryRun === true;
78
108
  const client = _client ?? createClient();
79
109
  const dataset = await getDatasetBySelector({ dataset: _dataset, client });
80
110
  invariant(dataset, `Dataset not found`);
81
111
  invariant(dataset.examples.length > 0, `Dataset has no examples`);
112
+ const nExamples =
113
+ typeof dryRun === "number"
114
+ ? Math.max(dryRun, dataset.examples.length)
115
+ : dataset.examples.length;
116
+
82
117
  const experimentName =
83
118
  _experimentName ?? `${dataset.name}-${new Date().toISOString()}`;
84
119
  const experimentParams: ExperimentParameters = {
85
- nRepetitions: repetitions,
86
- // TODO: Make configurable?
87
- nExamples: dataset.examples.length,
88
- };
89
- const experiment: Experiment = {
90
- id: id(),
91
- datasetId: dataset.id,
92
- datasetVersionId: dataset.versionId,
93
- repetitions,
94
- projectName,
120
+ nExamples,
95
121
  };
122
+ let experiment: Experiment;
123
+ if (isDryRun) {
124
+ experiment = {
125
+ id: id(),
126
+ datasetId: dataset.id,
127
+ datasetVersionId: dataset.versionId,
128
+ projectName,
129
+ };
130
+ } else {
131
+ const experimentResponse = await client
132
+ .POST("/v1/datasets/{dataset_id}/experiments", {
133
+ params: {
134
+ path: {
135
+ dataset_id: dataset.id,
136
+ },
137
+ },
138
+ body: {
139
+ name: experimentName,
140
+ description: experimentDescription,
141
+ metadata: experimentMetadata,
142
+ project_name: projectName,
143
+ },
144
+ })
145
+ .then((res) => res.data?.data);
146
+ invariant(experimentResponse, `Failed to create experiment`);
147
+ experiment = {
148
+ id: experimentResponse.id,
149
+ datasetId: dataset.id,
150
+ datasetVersionId: dataset.versionId,
151
+ projectName,
152
+ };
153
+ }
96
154
 
97
155
  if (!record) {
98
156
  logger.info(
@@ -104,30 +162,25 @@ export async function runExperiment({
104
162
  `🧪 Starting experiment "${experimentName}" on dataset "${dataset.id}" with task "${task.name}" and ${evaluators?.length ?? 0} ${pluralize(
105
163
  "evaluator",
106
164
  evaluators?.length ?? 0
107
- )}`
108
- );
109
-
110
- logger.info(
111
- `🔁 Running ${repetitions} ${pluralize("repetition", repetitions)} of task "${task.name}"`
165
+ )} and ${concurrency} concurrent runs`
112
166
  );
113
167
 
114
168
  // Run task against all examples, for each repetition
115
169
  type ExperimentRunId = string;
116
170
  const runs: Record<ExperimentRunId, ExperimentRun> = {};
117
- await Promise.all(
118
- Array.from({ length: repetitions }, (_, i) =>
119
- runTask({
120
- repetition: i + 1,
121
- experimentId: experiment.id,
122
- task,
123
- dataset,
124
- logger,
125
- onComplete: (run) => {
126
- runs[run.id] = run;
127
- },
128
- })
129
- )
130
- );
171
+ await runTask({
172
+ client,
173
+ experimentId: experiment.id,
174
+ task,
175
+ dataset,
176
+ logger,
177
+ onComplete: (run) => {
178
+ runs[run.id] = run;
179
+ },
180
+ concurrency,
181
+ isDryRun,
182
+ nExamples,
183
+ });
131
184
  logger.info(`✅ Task runs completed`);
132
185
 
133
186
  const ranExperiment: RanExperiment = {
@@ -141,6 +194,8 @@ export async function runExperiment({
141
194
  evaluators: evaluators ?? [],
142
195
  client,
143
196
  logger,
197
+ concurrency,
198
+ dryRun,
144
199
  });
145
200
  ranExperiment.evaluationRuns = evaluationRuns;
146
201
 
@@ -150,40 +205,49 @@ export async function runExperiment({
150
205
  }
151
206
 
152
207
  /**
153
- * Run a task against all examples in a dataset.
208
+ * Run a task against n examples in a dataset.
154
209
  */
155
210
  function runTask({
211
+ client,
156
212
  experimentId,
157
213
  task,
158
214
  dataset,
159
- repetition,
160
215
  onComplete,
161
216
  logger,
217
+ concurrency = 5,
218
+ isDryRun,
219
+ nExamples,
162
220
  }: {
221
+ /** The client to use */
222
+ client: PhoenixClient;
163
223
  /** The id of the experiment */
164
224
  experimentId: string;
165
225
  /** The task to run */
166
226
  task: ExperimentTask;
167
227
  /** The dataset to run the task on */
168
228
  dataset: Dataset;
169
- /** The repetition number */
170
- repetition: number;
171
229
  /** A callback to call when the task is complete */
172
230
  onComplete: (run: ExperimentRun) => void;
173
231
  /** The logger to use */
174
232
  logger: Logger;
233
+ /** The number of examples to run in parallel */
234
+ concurrency: number;
235
+ /** Whether to run the task as a dry run */
236
+ isDryRun: boolean;
237
+ /** The number of examples to run */
238
+ nExamples: number;
175
239
  }) {
176
- logger.info(
177
- `🔧 (${repetition}) Running task "${task.name}" on dataset "${dataset.id}"`
178
- );
240
+ logger.info(`🔧 Running task "${task.name}" on dataset "${dataset.id}"`);
179
241
  const run = async (example: Example) => {
242
+ logger.info(
243
+ `🔧 Running task "${task.name}" on example "${example.id} of dataset "${dataset.id}"`
244
+ );
180
245
  const thisRun: ExperimentRun = {
181
246
  id: id(),
182
- traceId: id(),
247
+ traceId: null, // TODO: fill this in once we trace experiments
183
248
  experimentId,
184
249
  datasetExampleId: example.id,
185
250
  startTime: new Date(),
186
- repetitionNumber: repetition,
187
251
  endTime: new Date(), // will get replaced with actual end time
188
252
  output: null,
189
253
  error: null,
@@ -199,22 +263,49 @@ function runTask({
199
263
  thisRun.error = error instanceof Error ? error.message : "Unknown error";
200
264
  }
201
265
  thisRun.endTime = new Date();
266
+ if (!isDryRun) {
267
+ // Log the run to the server
268
+ // We log this without awaiting (e.g. best effort)
269
+ const res = await client.POST("/v1/experiments/{experiment_id}/runs", {
270
+ params: {
271
+ path: {
272
+ experiment_id: experimentId,
273
+ },
274
+ },
275
+ body: {
276
+ dataset_example_id: example.id,
277
+ output: thisRun.output,
278
+ repetition_number: 0,
279
+ start_time: thisRun.startTime.toISOString(),
280
+ end_time: thisRun.endTime.toISOString(),
281
+ trace_id: thisRun.traceId,
282
+ error: thisRun.error,
283
+ },
284
+ });
285
+ // replace the local run id with the server-assigned id
286
+ thisRun.id = res.data?.data.id ?? thisRun.id;
287
+ }
202
288
  onComplete(thisRun);
289
+ return thisRun;
203
290
  };
204
- return Promise.all(dataset.examples.map(run));
291
+ const q = queue(run, concurrency);
292
+ const examplesToUse = dataset.examples.slice(0, nExamples);
293
+ examplesToUse.forEach((example) => q.push(example));
294
+ return q.drain();
205
295
  }
206
296
 
207
297
  /**
208
298
  * Evaluate an experiment.
209
299
  *
210
300
  * @experimental This feature is not complete, and will change in the future.
211
- * @deprecated This function will be un-marked as deprecated once the experimental feature flag is removed.
212
301
  */
213
302
  export async function evaluateExperiment({
214
303
  experiment,
215
304
  evaluators,
216
305
  client: _client,
217
306
  logger,
307
+ concurrency = 5,
308
+ dryRun = false,
218
309
  }: {
219
310
  /**
220
311
  * The experiment to evaluate
@@ -227,7 +318,20 @@ export async function evaluateExperiment({
227
318
  client?: PhoenixClient;
228
319
  /** The logger to use */
229
320
  logger: Logger;
321
+ /** The number of evaluators to run in parallel */
322
+ concurrency: number;
323
+ /**
324
+ * Whether to run the evaluation as a dry run
325
+ * If a number is provided, the evaluation will be run for the first n runs
326
+ * @default false
327
+ * */
328
+ dryRun?: boolean | number;
230
329
  }): Promise<RanExperiment> {
330
+ const isDryRun = typeof dryRun === "number" || dryRun === true;
331
+ const nRuns =
332
+ typeof dryRun === "number"
333
+ ? Math.max(dryRun, Object.keys(experiment.runs).length)
334
+ : Object.keys(experiment.runs).length;
231
335
  const client = _client ?? createClient();
232
336
  const dataset = await getDatasetBySelector({
233
337
  dataset: experiment.datasetId,
@@ -240,6 +344,8 @@ export async function evaluateExperiment({
240
344
  );
241
345
  invariant(experiment.runs, `Experiment "${experiment.id}" has no runs`);
242
346
 
347
+ const runsToEvaluate = Object.values(experiment.runs).slice(0, nRuns);
348
+
243
349
  if (evaluators?.length === 0) {
244
350
  return {
245
351
  ...experiment,
@@ -266,21 +372,47 @@ export async function evaluateExperiment({
266
372
  };
267
373
 
268
374
  // Run evaluators against all runs
269
- await Promise.all(
270
- evaluators.map((evaluator) =>
271
- Promise.all(
272
- Object.values(experiment.runs).map((run) =>
273
- runEvaluator({
274
- evaluator,
275
- run,
276
- exampleCache: examplesById,
277
- onComplete: onEvaluationComplete,
278
- })
279
- )
280
- )
281
- )
375
+ // Flat list of evaluator + run tuples
376
+ const evaluatorsAndRuns = evaluators.flatMap((evaluator) =>
377
+ runsToEvaluate.map((run) => ({
378
+ evaluator,
379
+ run,
380
+ }))
282
381
  );
283
-
382
+ const evaluatorsQueue = queue(
383
+ async (evaluatorAndRun: { evaluator: Evaluator; run: ExperimentRun }) => {
384
+ const evalResult = await runEvaluator({
385
+ evaluator: evaluatorAndRun.evaluator,
386
+ run: evaluatorAndRun.run,
387
+ exampleCache: examplesById,
388
+ onComplete: onEvaluationComplete,
389
+ });
390
+ if (!isDryRun) {
391
+ logger.info(`📝 Logging evaluation ${evalResult.id}`);
392
+ // Log the evaluation to the server
393
+ // We log this without awaiting (e.g. best effort)
394
+ client.POST("/v1/experiment_evaluations", {
395
+ body: {
396
+ experiment_run_id: evaluatorAndRun.run.id,
397
+ name: evaluatorAndRun.evaluator.name,
398
+ annotator_kind: evaluatorAndRun.evaluator.kind,
399
+ start_time: evalResult.startTime.toISOString(),
400
+ end_time: evalResult.endTime.toISOString(),
401
+ result: {
402
+ ...evalResult.result,
403
+ },
404
+ error: evalResult.error,
405
+ trace_id: evalResult.traceId,
406
+ },
407
+ });
408
+ }
409
+ },
410
+ concurrency
411
+ );
412
+ evaluatorsAndRuns.forEach((evaluatorAndRun) =>
413
+ evaluatorsQueue.push(evaluatorAndRun)
414
+ );
415
+ await evaluatorsQueue.drain();
284
416
  logger.info(`✅ Evaluation runs completed`);
285
417
 
286
418
  return {
@@ -293,7 +425,6 @@ export async function evaluateExperiment({
293
425
  * Run an evaluator against a run.
294
426
  *
295
427
  * @experimental This feature is not complete, and will change in the future.
296
- * @deprecated This function will be un-marked as deprecated once the experimental feature flag is removed.
297
428
  */
298
429
  async function runEvaluator({
299
430
  evaluator,
@@ -311,7 +442,7 @@ async function runEvaluator({
311
442
  const evaluate = async () => {
312
443
  const thisEval: ExperimentEvaluationRun = {
313
444
  id: id(),
314
- traceId: id(),
445
+ traceId: null, // TODO: fill this in once we trace experiments
315
446
  experimentRunId: run.id,
316
447
  startTime: new Date(),
317
448
  endTime: new Date(), // will get replaced with actual end time
@@ -333,6 +464,7 @@ async function runEvaluator({
333
464
  }
334
465
  thisEval.endTime = new Date();
335
466
  onComplete(thisEval);
467
+ return thisEval;
336
468
  };
337
469
 
338
470
  return evaluate();
@@ -342,18 +474,23 @@ async function runEvaluator({
342
474
  * Wrap an evaluator function in an object with a name property.
343
475
  *
344
476
  * @experimental This feature is not complete, and will change in the future.
345
- * @deprecated This function will be un-marked as deprecated once the experimental feature flag is removed.
346
477
  *
347
478
  * @param name - The name of the evaluator.
348
479
  * @param evaluate - The evaluator function.
349
480
  * @returns The evaluator object.
350
481
  */
351
- export function asEvaluator(
352
- name: string,
353
- evaluate: Evaluator["evaluate"]
354
- ): Evaluator {
482
+ export function asEvaluator({
483
+ name,
484
+ kind,
485
+ evaluate,
486
+ }: {
487
+ name: string;
488
+ kind: AnnotatorKind;
489
+ evaluate: Evaluator["evaluate"];
490
+ }): Evaluator {
355
491
  return {
356
492
  name,
493
+ kind,
357
494
  evaluate,
358
495
  };
359
496
  }
@@ -0,0 +1,59 @@
1
+ import { createClient } from "../client";
2
+ import { ClientFn } from "../types/core";
3
+ import { SpanAnnotation, toSpanAnnotationData } from "./types";
4
+
5
+ /**
6
+ * Parameters to add a span annotation
7
+ */
8
+ interface AddSpanAnnotationParams extends ClientFn {
9
+ spanAnnotation: SpanAnnotation;
10
+ }
11
+
12
+ /**
13
+ * Add an annotation to a span.
14
+ *
15
+ * The annotation can be of type "LLM", "CODE", or "HUMAN" and can include a label, score, and metadata.
16
+ * If an identifier is provided and an annotation with that identifier already exists, it will be updated.
17
+ *
18
+ * @param params - The parameters to add a span annotation
19
+ * @returns The ID of the created or updated annotation
20
+ *
21
+ * @example
22
+ * ```ts
23
+ * const result = await addSpanAnnotation({
24
+ * spanAnnotation: {
25
+ * spanId: "123abc",
26
+ * name: "quality_score",
27
+ * label: "good",
28
+ * score: 0.95,
29
+ * annotatorKind: "LLM",
30
+ * identifier: "custom_id_123",
31
+ * metadata: {
32
+ * model: "gpt-4"
33
+ * }
34
+ * }
35
+ * });
36
+ * ```
37
+ */
38
+ export async function addSpanAnnotation({
39
+ client: _client,
40
+ spanAnnotation,
41
+ }: AddSpanAnnotationParams): Promise<{ id: string }> {
42
+ const client = _client ?? createClient();
43
+
44
+ const { data, error } = await client.POST("/v1/span_annotations", {
45
+ body: {
46
+ data: [toSpanAnnotationData(spanAnnotation)],
47
+ },
48
+ });
49
+
50
+ if (error) {
51
+ throw new Error(`Failed to add span annotation: ${error}`);
52
+ }
53
+
54
+ if (!data?.data?.[0]?.id) {
55
+ throw new Error("No annotation ID returned from server");
56
+ }
57
+
58
+ return data.data[0];
59
+ }
@@ -0,0 +1,2 @@
1
+ export * from "./addSpanAnnotation";
2
+ export * from "./logSpanAnnotations";
@@ -0,0 +1,71 @@
1
+ import { createClient } from "../client";
2
+ import { ClientFn } from "../types/core";
3
+ import { SpanAnnotation, toSpanAnnotationData } from "./types";
4
+
5
+ /**
6
+ * Parameters to log multiple span annotations
7
+ */
8
+ interface LogSpanAnnotationsParams extends ClientFn {
9
+ /**
10
+ * The span annotations to log
11
+ */
12
+ spanAnnotations: SpanAnnotation[];
13
+ }
14
+
15
+ /**
16
+ * Log multiple span annotations in a single request.
17
+ *
18
+ * Each annotation can be of type "LLM", "CODE", or "HUMAN" and can include a label, score, and metadata.
19
+ * If an identifier is provided and an annotation with that identifier already exists, it will be updated.
20
+ *
21
+ * @param params - The parameters to log span annotations
22
+ * @returns The IDs of the created or updated annotations
23
+ *
24
+ * @example
25
+ * ```ts
26
+ * const results = await logSpanAnnotations({
27
+ * spanAnnotations: [
28
+ * {
29
+ * spanId: "123abc",
30
+ * name: "quality_score",
31
+ * label: "good",
32
+ * score: 0.95,
33
+ * annotatorKind: "LLM",
34
+ * identifier: "custom_id_123",
35
+ * metadata: {
36
+ * model: "gpt-4"
37
+ * }
38
+ * },
39
+ * {
40
+ * spanId: "456def",
41
+ * name: "sentiment",
42
+ * label: "positive",
43
+ * score: 0.8,
44
+ * annotatorKind: "CODE"
45
+ * }
46
+ * ]
47
+ * });
48
+ * ```
49
+ */
50
+ export async function logSpanAnnotations({
51
+ client: _client,
52
+ spanAnnotations,
53
+ }: LogSpanAnnotationsParams): Promise<{ id: string }[]> {
54
+ const client = _client ?? createClient();
55
+
56
+ const { data, error } = await client.POST("/v1/span_annotations", {
57
+ body: {
58
+ data: spanAnnotations.map(toSpanAnnotationData),
59
+ },
60
+ });
61
+
62
+ if (error) {
63
+ throw new Error(`Failed to log span annotations: ${error}`);
64
+ }
65
+
66
+ if (!data?.data?.length) {
67
+ throw new Error("No annotation IDs returned from server");
68
+ }
69
+
70
+ return data.data;
71
+ }
@@ -0,0 +1,60 @@
1
+ import { paths } from "../__generated__/api/v1";
2
+
3
+ type SpanAnnotationData =
4
+ paths["/v1/span_annotations"]["post"]["requestBody"]["content"]["application/json"]["data"][0];
5
+
6
+ /**
7
+ * Parameters for a single span annotation
8
+ */
9
+ export interface SpanAnnotation {
10
+ /**
11
+ * The OpenTelemetry Span ID (hex format without 0x prefix)
12
+ */
13
+ spanId: string;
14
+ /**
15
+ * The name of the annotation
16
+ */
17
+ name: string;
18
+ /**
19
+ * The label assigned by the annotation
20
+ */
21
+ label?: string;
22
+ /**
23
+ * The score assigned by the annotation
24
+ */
25
+ score?: number;
26
+ /**
27
+ * The identifier of the annotation. If provided, the annotation will be updated if it already exists.
28
+ */
29
+ identifier?: string;
30
+ /**
31
+ * Metadata for the annotation
32
+ */
33
+ metadata?: Record<string, unknown>;
34
+ /**
35
+ * The kind of annotator used for the annotation
36
+ * Can be "HUMAN", "LLM", or "CODE"
37
+ * @default "HUMAN"
38
+ */
39
+ annotatorKind?: SpanAnnotationData["annotator_kind"];
40
+ }
41
+
42
+ /**
43
+ * Convert a SpanAnnotation to the API format
44
+ */
45
+ export function toSpanAnnotationData(
46
+ annotation: SpanAnnotation
47
+ ): SpanAnnotationData {
48
+ return {
49
+ span_id: annotation.spanId,
50
+ name: annotation.name,
51
+ annotator_kind: annotation.annotatorKind ?? "HUMAN",
52
+ result: {
53
+ label: annotation.label ?? null,
54
+ score: annotation.score ?? null,
55
+ explanation: null,
56
+ },
57
+ metadata: annotation.metadata ?? null,
58
+ identifier: annotation.identifier ?? "",
59
+ };
60
+ }
@@ -1 +1,4 @@
1
- export type AnnotatorKind = "HUMAN" | "LLM";
1
+ import { components } from "../__generated__/api/v1";
2
+
3
+ export type AnnotatorKind =
4
+ components["schemas"]["SpanAnnotationData"]["annotator_kind"];