@arizeai/phoenix-client 6.5.4 → 6.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/esm/__generated__/api/v1.d.ts +244 -0
- package/dist/esm/__generated__/api/v1.d.ts.map +1 -1
- package/dist/esm/experiments/resumeEvaluation.d.ts.map +1 -1
- package/dist/esm/experiments/resumeEvaluation.js +181 -170
- package/dist/esm/experiments/resumeEvaluation.js.map +1 -1
- package/dist/esm/experiments/resumeExperiment.d.ts.map +1 -1
- package/dist/esm/experiments/resumeExperiment.js +201 -185
- package/dist/esm/experiments/resumeExperiment.js.map +1 -1
- package/dist/esm/experiments/runExperiment.d.ts.map +1 -1
- package/dist/esm/experiments/runExperiment.js +239 -207
- package/dist/esm/experiments/runExperiment.js.map +1 -1
- package/dist/esm/experiments/tracing.d.ts +10 -0
- package/dist/esm/experiments/tracing.d.ts.map +1 -0
- package/dist/esm/experiments/tracing.js +21 -0
- package/dist/esm/experiments/tracing.js.map +1 -0
- package/dist/esm/prompts/sdks/toSDK.d.ts +2 -2
- package/dist/esm/tsconfig.esm.tsbuildinfo +1 -1
- package/dist/esm/types/experiments.d.ts +6 -0
- package/dist/esm/types/experiments.d.ts.map +1 -1
- package/dist/esm/utils/formatPromptMessages.d.ts.map +1 -1
- package/dist/esm/utils/getPromptBySelector.d.ts.map +1 -1
- package/dist/src/__generated__/api/v1.d.ts +244 -0
- package/dist/src/__generated__/api/v1.d.ts.map +1 -1
- package/dist/src/experiments/resumeEvaluation.d.ts.map +1 -1
- package/dist/src/experiments/resumeEvaluation.js +194 -183
- package/dist/src/experiments/resumeEvaluation.js.map +1 -1
- package/dist/src/experiments/resumeExperiment.d.ts.map +1 -1
- package/dist/src/experiments/resumeExperiment.js +214 -198
- package/dist/src/experiments/resumeExperiment.js.map +1 -1
- package/dist/src/experiments/runExperiment.d.ts.map +1 -1
- package/dist/src/experiments/runExperiment.js +229 -197
- package/dist/src/experiments/runExperiment.js.map +1 -1
- package/dist/src/experiments/tracing.d.ts +10 -0
- package/dist/src/experiments/tracing.d.ts.map +1 -0
- package/dist/src/experiments/tracing.js +24 -0
- package/dist/src/experiments/tracing.js.map +1 -0
- package/dist/src/types/experiments.d.ts +6 -0
- package/dist/src/types/experiments.d.ts.map +1 -1
- package/dist/src/utils/formatPromptMessages.d.ts.map +1 -1
- package/dist/src/utils/getPromptBySelector.d.ts.map +1 -1
- package/dist/tsconfig.tsbuildinfo +1 -1
- package/docs/annotations.mdx +83 -0
- package/docs/datasets.mdx +77 -0
- package/docs/document-annotations.mdx +208 -0
- package/docs/experiments.mdx +376 -0
- package/docs/overview.mdx +176 -0
- package/docs/prompts.mdx +73 -0
- package/docs/session-annotations.mdx +158 -0
- package/docs/sessions.mdx +87 -0
- package/docs/span-annotations.mdx +283 -0
- package/docs/spans.mdx +76 -0
- package/docs/traces.mdx +63 -0
- package/package.json +9 -3
- package/src/__generated__/api/v1.ts +244 -0
- package/src/experiments/resumeEvaluation.ts +226 -206
- package/src/experiments/resumeExperiment.ts +237 -213
- package/src/experiments/runExperiment.ts +282 -243
- package/src/experiments/tracing.ts +30 -0
- package/src/types/experiments.ts +6 -0
|
@@ -0,0 +1,376 @@
|
|
|
1
|
+
---
|
|
2
|
+
title: "Experiments"
|
|
3
|
+
description: "Run experiments with @arizeai/phoenix-client"
|
|
4
|
+
---
|
|
5
|
+
|
|
6
|
+
The experiments module runs tasks over dataset examples, records experiment runs in Phoenix, and can evaluate each run with either plain experiment evaluators or `@arizeai/phoenix-evals` evaluators.
|
|
7
|
+
|
|
8
|
+
<section className="hidden" data-agent-context="relevant-source-files" aria-label="Relevant source files">
|
|
9
|
+
<h2>Relevant Source Files</h2>
|
|
10
|
+
<ul>
|
|
11
|
+
<li><code>src/experiments/runExperiment.ts</code> for the task execution flow and return shape</li>
|
|
12
|
+
<li><code>src/experiments/helpers/getExperimentEvaluators.ts</code> for evaluator normalization</li>
|
|
13
|
+
<li><code>src/experiments/helpers/fromPhoenixLLMEvaluator.ts</code> for the phoenix-evals bridge</li>
|
|
14
|
+
<li><code>src/experiments/getExperimentRuns.ts</code> for reading runs back after execution</li>
|
|
15
|
+
<li><code>src/types/experiments.ts</code> for <code>EvaluatorParams</code> including <code>traceId</code></li>
|
|
16
|
+
<li><code>src/spans/getSpans.ts</code> for fetching spans by trace ID and span kind</li>
|
|
17
|
+
</ul>
|
|
18
|
+
</section>
|
|
19
|
+
|
|
20
|
+
## Two Common Patterns
|
|
21
|
+
|
|
22
|
+
Use `asExperimentEvaluator()` when your evaluation logic is plain TypeScript.
|
|
23
|
+
|
|
24
|
+
Use `@arizeai/phoenix-evals` evaluators directly when you want model-backed judging.
|
|
25
|
+
|
|
26
|
+
## Code-Based Example
|
|
27
|
+
|
|
28
|
+
If you just want to compare task output against a reference answer or apply deterministic checks, use `asExperimentEvaluator()`:
|
|
29
|
+
|
|
30
|
+
```ts
|
|
31
|
+
/* eslint-disable no-console */
|
|
32
|
+
import { createDataset } from "@arizeai/phoenix-client/datasets";
|
|
33
|
+
import {
|
|
34
|
+
asExperimentEvaluator,
|
|
35
|
+
runExperiment,
|
|
36
|
+
} from "@arizeai/phoenix-client/experiments";
|
|
37
|
+
|
|
38
|
+
async function main() {
|
|
39
|
+
const { datasetId } = await createDataset({
|
|
40
|
+
name: `simple-dataset-${Date.now()}`,
|
|
41
|
+
description: "a simple dataset",
|
|
42
|
+
examples: [
|
|
43
|
+
{
|
|
44
|
+
input: { name: "John" },
|
|
45
|
+
output: { text: "Hello, John!" },
|
|
46
|
+
metadata: {},
|
|
47
|
+
},
|
|
48
|
+
{
|
|
49
|
+
input: { name: "Jane" },
|
|
50
|
+
output: { text: "Hello, Jane!" },
|
|
51
|
+
metadata: {},
|
|
52
|
+
},
|
|
53
|
+
{
|
|
54
|
+
input: { name: "Bill" },
|
|
55
|
+
output: { text: "Hello, Bill!" },
|
|
56
|
+
metadata: {},
|
|
57
|
+
},
|
|
58
|
+
],
|
|
59
|
+
});
|
|
60
|
+
|
|
61
|
+
const experiment = await runExperiment({
|
|
62
|
+
dataset: { datasetId },
|
|
63
|
+
task: async (example) => `hello ${example.input.name}`,
|
|
64
|
+
evaluators: [
|
|
65
|
+
asExperimentEvaluator({
|
|
66
|
+
name: "matches",
|
|
67
|
+
kind: "CODE",
|
|
68
|
+
evaluate: async ({ output, expected }) => {
|
|
69
|
+
const matches = output === expected?.text;
|
|
70
|
+
return {
|
|
71
|
+
label: matches ? "matches" : "does not match",
|
|
72
|
+
score: matches ? 1 : 0,
|
|
73
|
+
explanation: matches
|
|
74
|
+
? "output matches expected"
|
|
75
|
+
: "output does not match expected",
|
|
76
|
+
metadata: {},
|
|
77
|
+
};
|
|
78
|
+
},
|
|
79
|
+
}),
|
|
80
|
+
asExperimentEvaluator({
|
|
81
|
+
name: "contains-hello",
|
|
82
|
+
kind: "CODE",
|
|
83
|
+
evaluate: async ({ output }) => {
|
|
84
|
+
const matches =
|
|
85
|
+
typeof output === "string" && output.includes("hello");
|
|
86
|
+
return {
|
|
87
|
+
label: matches ? "contains hello" : "does not contain hello",
|
|
88
|
+
score: matches ? 1 : 0,
|
|
89
|
+
explanation: matches
|
|
90
|
+
? "output contains hello"
|
|
91
|
+
: "output does not contain hello",
|
|
92
|
+
metadata: {},
|
|
93
|
+
};
|
|
94
|
+
},
|
|
95
|
+
}),
|
|
96
|
+
],
|
|
97
|
+
});
|
|
98
|
+
|
|
99
|
+
console.table(experiment.runs);
|
|
100
|
+
console.table(experiment.evaluationRuns);
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
main().catch(console.error);
|
|
104
|
+
```
|
|
105
|
+
|
|
106
|
+
This pattern is useful when:
|
|
107
|
+
|
|
108
|
+
- you already know the exact correctness rule
|
|
109
|
+
- you want fast, deterministic evaluation
|
|
110
|
+
- you do not want to call another model during evaluation
|
|
111
|
+
|
|
112
|
+
## Model-Backed Example
|
|
113
|
+
|
|
114
|
+
If you want a model-backed experiment with automatic tracing and an LLM-as-a-judge evaluator, this is the core pattern:
|
|
115
|
+
|
|
116
|
+
```ts
|
|
117
|
+
import { openai } from "@ai-sdk/openai";
|
|
118
|
+
import { createOrGetDataset } from "@arizeai/phoenix-client/datasets";
|
|
119
|
+
import { runExperiment } from "@arizeai/phoenix-client/experiments";
|
|
120
|
+
import type { ExperimentTask } from "@arizeai/phoenix-client/types/experiments";
|
|
121
|
+
import { createClassificationEvaluator } from "@arizeai/phoenix-evals";
|
|
122
|
+
import { generateText } from "ai";
|
|
123
|
+
|
|
124
|
+
const model = openai("gpt-4o-mini");
|
|
125
|
+
|
|
126
|
+
const main = async () => {
|
|
127
|
+
const answersQuestion = createClassificationEvaluator({
|
|
128
|
+
name: "answersQuestion",
|
|
129
|
+
model,
|
|
130
|
+
promptTemplate:
|
|
131
|
+
"Does the following answer the user's question: <question>{{input.question}}</question><answer>{{output}}</answer>",
|
|
132
|
+
choices: {
|
|
133
|
+
correct: 1,
|
|
134
|
+
incorrect: 0,
|
|
135
|
+
},
|
|
136
|
+
});
|
|
137
|
+
|
|
138
|
+
const dataset = await createOrGetDataset({
|
|
139
|
+
name: "correctness-eval",
|
|
140
|
+
description: "Evaluate the correctness of the model",
|
|
141
|
+
examples: [
|
|
142
|
+
{
|
|
143
|
+
input: {
|
|
144
|
+
question: "Is ArizeAI Phoenix Open-Source?",
|
|
145
|
+
context: "ArizeAI Phoenix is Open-Source.",
|
|
146
|
+
},
|
|
147
|
+
},
|
|
148
|
+
// ... more examples
|
|
149
|
+
],
|
|
150
|
+
});
|
|
151
|
+
|
|
152
|
+
const task: ExperimentTask = async (example) => {
|
|
153
|
+
if (typeof example.input.question !== "string") {
|
|
154
|
+
throw new Error("Invalid input: question must be a string");
|
|
155
|
+
}
|
|
156
|
+
if (typeof example.input.context !== "string") {
|
|
157
|
+
throw new Error("Invalid input: context must be a string");
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
return generateText({
|
|
161
|
+
model,
|
|
162
|
+
experimental_telemetry: {
|
|
163
|
+
isEnabled: true,
|
|
164
|
+
},
|
|
165
|
+
prompt: [
|
|
166
|
+
{
|
|
167
|
+
role: "system",
|
|
168
|
+
content: `You answer questions based on this context: ${example.input.context}`,
|
|
169
|
+
},
|
|
170
|
+
{
|
|
171
|
+
role: "user",
|
|
172
|
+
content: example.input.question,
|
|
173
|
+
},
|
|
174
|
+
],
|
|
175
|
+
}).then((response) => {
|
|
176
|
+
if (response.text) {
|
|
177
|
+
return response.text;
|
|
178
|
+
}
|
|
179
|
+
throw new Error("Invalid response: text is required");
|
|
180
|
+
});
|
|
181
|
+
};
|
|
182
|
+
|
|
183
|
+
const experiment = await runExperiment({
|
|
184
|
+
experimentName: "answers-question-eval",
|
|
185
|
+
experimentDescription:
|
|
186
|
+
"Evaluate the ability of the model to answer questions based on the context",
|
|
187
|
+
dataset,
|
|
188
|
+
task,
|
|
189
|
+
evaluators: [answersQuestion],
|
|
190
|
+
repetitions: 3,
|
|
191
|
+
});
|
|
192
|
+
|
|
193
|
+
console.log(experiment.id);
|
|
194
|
+
console.log(Object.values(experiment.runs).length);
|
|
195
|
+
console.log(experiment.evaluationRuns?.length ?? 0);
|
|
196
|
+
};
|
|
197
|
+
|
|
198
|
+
main().catch(console.error);
|
|
199
|
+
```
|
|
200
|
+
|
|
201
|
+
## What This Example Shows
|
|
202
|
+
|
|
203
|
+
- `createOrGetDataset()` creates or reuses the dataset the experiment will run against
|
|
204
|
+
- `task` receives the full dataset example object
|
|
205
|
+
- `generateText()` emits traces that Phoenix can attach to the experiment when telemetry is enabled
|
|
206
|
+
- `createClassificationEvaluator()` from `@arizeai/phoenix-evals` can be passed directly to `runExperiment()`
|
|
207
|
+
- `runExperiment()` records both task runs and evaluation runs in Phoenix
|
|
208
|
+
|
|
209
|
+
## Task Inputs
|
|
210
|
+
|
|
211
|
+
`runExperiment()` calls your task with the full dataset example, not just `example.input`.
|
|
212
|
+
|
|
213
|
+
That means your task should usually read:
|
|
214
|
+
|
|
215
|
+
- `example.input` for the task inputs
|
|
216
|
+
- `example.output` for any reference answer
|
|
217
|
+
- `example.metadata` for additional context
|
|
218
|
+
|
|
219
|
+
In the example above, the task validates `example.input.question` and `example.input.context` before generating a response.
|
|
220
|
+
|
|
221
|
+
## Evaluator Inputs
|
|
222
|
+
|
|
223
|
+
When an evaluator runs, it receives a normalized object with these fields:
|
|
224
|
+
|
|
225
|
+
| Field | Description |
|
|
226
|
+
|--------|-------------|
|
|
227
|
+
| `input` | The dataset example's `input` object |
|
|
228
|
+
| `output` | The task output for that run |
|
|
229
|
+
| `expected` | The dataset example's `output` object |
|
|
230
|
+
| `metadata` | The dataset example's `metadata` object |
|
|
231
|
+
| `traceId` | The OpenTelemetry trace ID of the task run (optional, `string \| null`) |
|
|
232
|
+
|
|
233
|
+
This is why the `createClassificationEvaluator()` prompt can reference `{{input.question}}` and `{{output}}`.
|
|
234
|
+
|
|
235
|
+
For code-based evaluators created with `asExperimentEvaluator()`, those same fields are available inside `evaluate({ input, output, expected, metadata, traceId })`.
|
|
236
|
+
|
|
237
|
+
## Trace-Based Evaluation
|
|
238
|
+
|
|
239
|
+
Each task run captures an OpenTelemetry trace ID. Evaluators can use `traceId` to fetch the task's spans from Phoenix and evaluate the execution trajectory — for example, verifying that specific tool calls were made or inspecting intermediate steps.
|
|
240
|
+
|
|
241
|
+
This pattern works best with `evaluateExperiment()` as a separate step after `runExperiment()`, so that all task spans are ingested into Phoenix before the evaluator queries them.
|
|
242
|
+
|
|
243
|
+
```ts
|
|
244
|
+
import { traceTool } from "@arizeai/openinference-core";
|
|
245
|
+
import { createClient } from "@arizeai/phoenix-client";
|
|
246
|
+
import { createDataset } from "@arizeai/phoenix-client/datasets";
|
|
247
|
+
import {
|
|
248
|
+
asExperimentEvaluator,
|
|
249
|
+
evaluateExperiment,
|
|
250
|
+
runExperiment,
|
|
251
|
+
} from "@arizeai/phoenix-client/experiments";
|
|
252
|
+
import { getSpans } from "@arizeai/phoenix-client/spans";
|
|
253
|
+
|
|
254
|
+
const client = createClient();
|
|
255
|
+
|
|
256
|
+
const { datasetId } = await createDataset({
|
|
257
|
+
client,
|
|
258
|
+
name: "tool-call-dataset",
|
|
259
|
+
description: "Questions that require tool use",
|
|
260
|
+
examples: [
|
|
261
|
+
{
|
|
262
|
+
input: { question: "What is the weather in San Francisco?" },
|
|
263
|
+
output: { expectedTool: "getWeather" },
|
|
264
|
+
metadata: {},
|
|
265
|
+
},
|
|
266
|
+
],
|
|
267
|
+
});
|
|
268
|
+
|
|
269
|
+
// Step 1: Run the experiment with traced tool calls
|
|
270
|
+
const experiment = await runExperiment({
|
|
271
|
+
client,
|
|
272
|
+
dataset: { datasetId },
|
|
273
|
+
setGlobalTracerProvider: true,
|
|
274
|
+
task: async (example) => {
|
|
275
|
+
// traceTool wraps a function with a TOOL span
|
|
276
|
+
const getWeather = traceTool(
|
|
277
|
+
({ location }: { location: string }) => ({
|
|
278
|
+
location,
|
|
279
|
+
temperature: 72,
|
|
280
|
+
condition: "sunny",
|
|
281
|
+
}),
|
|
282
|
+
{ name: "getWeather" }
|
|
283
|
+
);
|
|
284
|
+
|
|
285
|
+
const city = (example.input.question as string).match(/in (.+)\?/)?.[1];
|
|
286
|
+
const result = getWeather({ location: city ?? "Unknown" });
|
|
287
|
+
return `The weather in ${result.location} is ${result.temperature}F.`;
|
|
288
|
+
},
|
|
289
|
+
});
|
|
290
|
+
|
|
291
|
+
const projectName = experiment.projectName!;
|
|
292
|
+
|
|
293
|
+
// Step 2: Evaluate using traceId to inspect the task's spans
|
|
294
|
+
const evaluated = await evaluateExperiment({
|
|
295
|
+
client,
|
|
296
|
+
experiment,
|
|
297
|
+
evaluators: [
|
|
298
|
+
asExperimentEvaluator({
|
|
299
|
+
name: "has-expected-tool-call",
|
|
300
|
+
kind: "CODE",
|
|
301
|
+
evaluate: async ({ traceId, expected }) => {
|
|
302
|
+
if (!traceId) {
|
|
303
|
+
return { label: "no trace", score: 0 };
|
|
304
|
+
}
|
|
305
|
+
|
|
306
|
+
// Fetch TOOL spans from this task's trace
|
|
307
|
+
const { spans: toolSpans } = await getSpans({
|
|
308
|
+
client,
|
|
309
|
+
project: { projectName },
|
|
310
|
+
traceIds: [traceId],
|
|
311
|
+
spanKind: "TOOL",
|
|
312
|
+
});
|
|
313
|
+
|
|
314
|
+
const expectedTool = (expected as { expectedTool?: string })
|
|
315
|
+
?.expectedTool;
|
|
316
|
+
const toolNames = toolSpans.map((s) => s.name);
|
|
317
|
+
const found = toolNames.some((name) => name.includes(expectedTool!));
|
|
318
|
+
|
|
319
|
+
return {
|
|
320
|
+
label: found ? "tool called" : "no tool call",
|
|
321
|
+
score: found ? 1 : 0,
|
|
322
|
+
explanation: found
|
|
323
|
+
? `Found: ${toolNames.join(", ")}`
|
|
324
|
+
: `Expected "${expectedTool}" but found none`,
|
|
325
|
+
};
|
|
326
|
+
},
|
|
327
|
+
}),
|
|
328
|
+
],
|
|
329
|
+
});
|
|
330
|
+
```
|
|
331
|
+
|
|
332
|
+
Key points:
|
|
333
|
+
|
|
334
|
+
- Use `setGlobalTracerProvider: true` on `runExperiment()` so that child spans from `traceTool` or other OTel instrumentation land in the same trace as the task
|
|
335
|
+
- Use `evaluateExperiment()` as a separate step so spans are ingested before querying
|
|
336
|
+
- Use `getSpans()` with `traceIds` and `spanKind` filters to fetch specific spans from the task trace
|
|
337
|
+
- `traceId` is `null` in dry-run mode since no real traces are recorded
|
|
338
|
+
|
|
339
|
+
## What `runExperiment()` Returns
|
|
340
|
+
|
|
341
|
+
The returned object includes the experiment metadata plus the task and evaluation results from the run.
|
|
342
|
+
|
|
343
|
+
- `experiment.id` is the experiment ID in Phoenix
|
|
344
|
+
- `experiment.projectName` is the Phoenix project that received the task traces
|
|
345
|
+
- `experiment.runs` is a map of run IDs to task run objects
|
|
346
|
+
- `experiment.evaluationRuns` contains evaluator results when evaluators are provided
|
|
347
|
+
|
|
348
|
+
## Follow-Up Helpers
|
|
349
|
+
|
|
350
|
+
Use these exports for follow-up workflows:
|
|
351
|
+
|
|
352
|
+
- `createExperiment`
|
|
353
|
+
- `getExperiment`
|
|
354
|
+
- `getExperimentInfo`
|
|
355
|
+
- `getExperimentRuns`
|
|
356
|
+
- `listExperiments`
|
|
357
|
+
- `resumeExperiment`
|
|
358
|
+
- `resumeEvaluation`
|
|
359
|
+
- `deleteExperiment`
|
|
360
|
+
|
|
361
|
+
## Tracing Behavior
|
|
362
|
+
|
|
363
|
+
`runExperiment()` can register a tracer provider for the task run so that task spans and evaluator spans show up in Phoenix during the experiment. This is why tasks that call the AI SDK can still emit traces to Phoenix when global tracing is enabled.
|
|
364
|
+
|
|
365
|
+
<section className="hidden" data-agent-context="source-map" aria-label="Source map">
|
|
366
|
+
<h2>Source Map</h2>
|
|
367
|
+
<ul>
|
|
368
|
+
<li><code>src/experiments/runExperiment.ts</code></li>
|
|
369
|
+
<li><code>src/experiments/createExperiment.ts</code></li>
|
|
370
|
+
<li><code>src/experiments/getExperiment.ts</code></li>
|
|
371
|
+
<li><code>src/experiments/getExperimentRuns.ts</code></li>
|
|
372
|
+
<li><code>src/experiments/helpers/getExperimentEvaluators.ts</code></li>
|
|
373
|
+
<li><code>src/experiments/helpers/fromPhoenixLLMEvaluator.ts</code></li>
|
|
374
|
+
<li><code>src/experiments/helpers/asExperimentEvaluator.ts</code></li>
|
|
375
|
+
</ul>
|
|
376
|
+
</section>
|
|
@@ -0,0 +1,176 @@
|
|
|
1
|
+
---
|
|
2
|
+
title: "Overview"
|
|
3
|
+
description: "Typed TypeScript client for Phoenix platform APIs"
|
|
4
|
+
---
|
|
5
|
+
|
|
6
|
+
`@arizeai/phoenix-client` is the typed TypeScript client for Phoenix platform APIs. It ships a small root REST client plus focused module entrypoints for prompts, datasets, experiments, spans, sessions, and traces.
|
|
7
|
+
|
|
8
|
+
## Install
|
|
9
|
+
|
|
10
|
+
```bash
|
|
11
|
+
npm install @arizeai/phoenix-client
|
|
12
|
+
```
|
|
13
|
+
|
|
14
|
+
## Minimal Example
|
|
15
|
+
|
|
16
|
+
```ts
|
|
17
|
+
import { createClient } from "@arizeai/phoenix-client";
|
|
18
|
+
import { listDatasets } from "@arizeai/phoenix-client/datasets";
|
|
19
|
+
|
|
20
|
+
const client = createClient();
|
|
21
|
+
const datasets = await listDatasets({ client });
|
|
22
|
+
```
|
|
23
|
+
|
|
24
|
+
## Docs And Source In `node_modules`
|
|
25
|
+
|
|
26
|
+
After install, a coding agent can inspect the installed package directly:
|
|
27
|
+
|
|
28
|
+
```text
|
|
29
|
+
node_modules/@arizeai/phoenix-client/docs/
|
|
30
|
+
node_modules/@arizeai/phoenix-client/src/
|
|
31
|
+
```
|
|
32
|
+
|
|
33
|
+
That gives the agent version-matched docs plus the exact implementation and generated API types that shipped with your project.
|
|
34
|
+
|
|
35
|
+
## Module Map
|
|
36
|
+
|
|
37
|
+
| Import | Purpose |
|
|
38
|
+
|--------|---------|
|
|
39
|
+
| `@arizeai/phoenix-client` | `createClient`, generated OpenAPI types, config helpers |
|
|
40
|
+
| `@arizeai/phoenix-client/prompts` | Prompt CRUD plus `toSDK` conversion |
|
|
41
|
+
| `@arizeai/phoenix-client/datasets` | Dataset creation and retrieval |
|
|
42
|
+
| `@arizeai/phoenix-client/experiments` | Experiment execution and lifecycle |
|
|
43
|
+
| `@arizeai/phoenix-client/spans` | Span search, notes, and span/document annotations |
|
|
44
|
+
| `@arizeai/phoenix-client/sessions` | Session listing, retrieval, and session annotations |
|
|
45
|
+
| `@arizeai/phoenix-client/traces` | Project trace retrieval |
|
|
46
|
+
|
|
47
|
+
## Configuration
|
|
48
|
+
|
|
49
|
+
`createClient()` resolves Phoenix client options in this order: library defaults, environment variables, then explicit options. In most applications, the normal setup is to set `PHOENIX_HOST` and `PHOENIX_API_KEY` in the environment and call `createClient()` with no overrides.
|
|
50
|
+
|
|
51
|
+
### Recommended Setup
|
|
52
|
+
|
|
53
|
+
Use the environment-driven path unless you have a specific reason to override client options in code.
|
|
54
|
+
|
|
55
|
+
```bash
|
|
56
|
+
export PHOENIX_HOST=http://localhost:6006
|
|
57
|
+
export PHOENIX_API_KEY=<your-api-key>
|
|
58
|
+
```
|
|
59
|
+
|
|
60
|
+
If you're using Phoenix Cloud, `PHOENIX_HOST` may look like `https://app.phoenix.arize.com/s/my-space`.
|
|
61
|
+
|
|
62
|
+
```ts
|
|
63
|
+
import { createClient } from "@arizeai/phoenix-client";
|
|
64
|
+
|
|
65
|
+
const client = createClient();
|
|
66
|
+
|
|
67
|
+
const datasets = await client.GET("/v1/datasets");
|
|
68
|
+
```
|
|
69
|
+
|
|
70
|
+
`PHOENIX_API_KEY` is converted into `Authorization: Bearer <key>` automatically. You do not need to build that header yourself unless you are explicitly overriding `headers`.
|
|
71
|
+
|
|
72
|
+
### Explicit Overrides
|
|
73
|
+
|
|
74
|
+
```ts
|
|
75
|
+
import { createClient } from "@arizeai/phoenix-client";
|
|
76
|
+
|
|
77
|
+
const client = createClient({
|
|
78
|
+
options: {
|
|
79
|
+
baseUrl: "https://phoenix.example.com",
|
|
80
|
+
headers: {
|
|
81
|
+
Authorization: `Bearer ${process.env.PHOENIX_API_KEY}`,
|
|
82
|
+
},
|
|
83
|
+
},
|
|
84
|
+
});
|
|
85
|
+
```
|
|
86
|
+
|
|
87
|
+
Use explicit options when you want configuration to live in code or when you need to override the environment for a specific client instance.
|
|
88
|
+
|
|
89
|
+
### createClient Parameters
|
|
90
|
+
|
|
91
|
+
| Field | Type | Description |
|
|
92
|
+
|--------|------|-------------|
|
|
93
|
+
| `options` | `Partial<ClientOptions>` | Explicit options passed to the underlying `openapi-fetch` client. |
|
|
94
|
+
| `getEnvironmentOptions` | `() => Partial<ClientOptions>` | Optional resolver for environment-derived options. The default implementation reads `process.env` when available. |
|
|
95
|
+
|
|
96
|
+
### Resolved Phoenix Options
|
|
97
|
+
|
|
98
|
+
These are the Phoenix-specific options this package resolves before creating the underlying OpenAPI client:
|
|
99
|
+
|
|
100
|
+
| Option | Type | Description |
|
|
101
|
+
|--------|------|-------------|
|
|
102
|
+
| `baseUrl` | `string` | Base Phoenix URL. Defaults to `http://localhost:6006`, or `PHOENIX_HOST` when that environment variable is set. |
|
|
103
|
+
| `headers` | `ClientOptions["headers"]` | Headers sent on every request. `PHOENIX_API_KEY` populates `Authorization` automatically. Explicit `headers` replace environment-derived headers. |
|
|
104
|
+
|
|
105
|
+
### Header Override Rule
|
|
106
|
+
|
|
107
|
+
If you pass `options.headers`, they replace the environment-derived header object rather than deep-merging with it. That means if you override `headers` and still want API key authentication, include `Authorization` yourself:
|
|
108
|
+
|
|
109
|
+
```ts
|
|
110
|
+
const client = createClient({
|
|
111
|
+
options: {
|
|
112
|
+
headers: {
|
|
113
|
+
Authorization: `Bearer ${process.env.PHOENIX_API_KEY}`,
|
|
114
|
+
},
|
|
115
|
+
},
|
|
116
|
+
});
|
|
117
|
+
```
|
|
118
|
+
|
|
119
|
+
### Environment Variables
|
|
120
|
+
|
|
121
|
+
| Variable | Maps to | Description |
|
|
122
|
+
|--------|---------|-------------|
|
|
123
|
+
| `PHOENIX_HOST` | `options.baseUrl` | Base Phoenix URL, for example `http://localhost:6006`. |
|
|
124
|
+
| `PHOENIX_API_KEY` | `options.headers.Authorization` | Bearer token for authenticated environments. |
|
|
125
|
+
| `PHOENIX_CLIENT_HEADERS` | `options.headers` | Optional JSON-encoded object of additional headers to send on every request. Most setups do not need this. |
|
|
126
|
+
|
|
127
|
+
## API Client
|
|
128
|
+
|
|
129
|
+
`createClient()` returns an `openapi-fetch` client that is typed against Phoenix's generated OpenAPI schema. Use this layer when you need an endpoint that does not yet have a high-level helper.
|
|
130
|
+
|
|
131
|
+
```ts
|
|
132
|
+
import { createClient } from "@arizeai/phoenix-client";
|
|
133
|
+
|
|
134
|
+
const client = createClient();
|
|
135
|
+
|
|
136
|
+
const datasets = await client.GET("/v1/datasets");
|
|
137
|
+
|
|
138
|
+
const prompt = await client.GET("/v1/prompts/{prompt_identifier}/latest", {
|
|
139
|
+
params: {
|
|
140
|
+
path: {
|
|
141
|
+
prompt_identifier: "support-response",
|
|
142
|
+
},
|
|
143
|
+
},
|
|
144
|
+
});
|
|
145
|
+
```
|
|
146
|
+
|
|
147
|
+
The root export exposes generated API types: `pathsV1`, `componentsV1`, `operationsV1`, `Types`, and `PhoenixClient`.
|
|
148
|
+
|
|
149
|
+
Prefer this layer when:
|
|
150
|
+
|
|
151
|
+
- you need a newly added endpoint before a helper exists
|
|
152
|
+
- you want direct control over route, body, and query params
|
|
153
|
+
- you are building thin wrappers around Phoenix routes in your own codebase
|
|
154
|
+
|
|
155
|
+
## Where To Start
|
|
156
|
+
|
|
157
|
+
- [Prompts](./prompts), [Datasets](./datasets), [Experiments](./experiments) — higher-level workflows
|
|
158
|
+
- [Annotations](./annotations) — annotation concepts, then [Span](./span-annotations), [Document](./document-annotations), and [Session](./session-annotations) annotations for detailed usage
|
|
159
|
+
- [Spans](./spans), [Sessions](./sessions), [Traces](./traces) — retrieval and maintenance
|
|
160
|
+
|
|
161
|
+
<section className="hidden" data-agent-context="source-map" aria-label="Source map">
|
|
162
|
+
<h2>Source Map</h2>
|
|
163
|
+
<ul>
|
|
164
|
+
<li><code>src/client.ts</code></li>
|
|
165
|
+
<li><code>src/config.ts</code></li>
|
|
166
|
+
<li><code>src/__generated__/api/v1.ts</code></li>
|
|
167
|
+
<li><code>src/types/core.ts</code></li>
|
|
168
|
+
<li><code>src/prompts/</code></li>
|
|
169
|
+
<li><code>src/datasets/</code></li>
|
|
170
|
+
<li><code>src/experiments/</code></li>
|
|
171
|
+
<li><code>src/spans/</code></li>
|
|
172
|
+
<li><code>src/sessions/</code></li>
|
|
173
|
+
<li><code>src/traces/</code></li>
|
|
174
|
+
<li><code>src/types/</code></li>
|
|
175
|
+
</ul>
|
|
176
|
+
</section>
|
package/docs/prompts.mdx
ADDED
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
---
|
|
2
|
+
title: "Prompts"
|
|
3
|
+
description: "Manage prompts with @arizeai/phoenix-client"
|
|
4
|
+
---
|
|
5
|
+
|
|
6
|
+
The prompts module lets you create prompt versions in Phoenix, fetch them back by selector, list prompts, and adapt prompt versions to supported provider SDKs.
|
|
7
|
+
|
|
8
|
+
<section className="hidden" data-agent-context="relevant-source-files" aria-label="Relevant source files">
|
|
9
|
+
<h2>Relevant Source Files</h2>
|
|
10
|
+
<ul>
|
|
11
|
+
<li><code>src/prompts/getPrompt.ts</code> for the exact selector shape</li>
|
|
12
|
+
</ul>
|
|
13
|
+
</section>
|
|
14
|
+
|
|
15
|
+
## Create A Prompt
|
|
16
|
+
|
|
17
|
+
```ts
|
|
18
|
+
import {
|
|
19
|
+
createPrompt,
|
|
20
|
+
promptVersion,
|
|
21
|
+
} from "@arizeai/phoenix-client/prompts";
|
|
22
|
+
|
|
23
|
+
await createPrompt({
|
|
24
|
+
name: "support-response",
|
|
25
|
+
description: "Customer support reply prompt",
|
|
26
|
+
version: promptVersion({
|
|
27
|
+
modelProvider: "OPENAI",
|
|
28
|
+
modelName: "gpt-4o-mini",
|
|
29
|
+
template: [{ role: "user", content: "Reply to {{question}}" }],
|
|
30
|
+
}),
|
|
31
|
+
});
|
|
32
|
+
```
|
|
33
|
+
|
|
34
|
+
## Fetch By Selector
|
|
35
|
+
|
|
36
|
+
```ts
|
|
37
|
+
import { getPrompt } from "@arizeai/phoenix-client/prompts";
|
|
38
|
+
|
|
39
|
+
const prompt = await getPrompt({
|
|
40
|
+
prompt: { name: "support-response", tag: "production" },
|
|
41
|
+
});
|
|
42
|
+
```
|
|
43
|
+
|
|
44
|
+
`prompt` can be selected by `{ name }`, `{ name, tag }`, or `{ versionId }`.
|
|
45
|
+
|
|
46
|
+
## Convert To Another SDK
|
|
47
|
+
|
|
48
|
+
```ts
|
|
49
|
+
import { toSDK } from "@arizeai/phoenix-client/prompts";
|
|
50
|
+
|
|
51
|
+
const promptAsAI = toSDK({
|
|
52
|
+
sdk: "ai",
|
|
53
|
+
prompt,
|
|
54
|
+
variables: { question: "Where is my order?" },
|
|
55
|
+
});
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
Supported `sdk` targets:
|
|
59
|
+
|
|
60
|
+
- `ai`
|
|
61
|
+
- `openai`
|
|
62
|
+
- `anthropic`
|
|
63
|
+
|
|
64
|
+
<section className="hidden" data-agent-context="source-map" aria-label="Source map">
|
|
65
|
+
<h2>Source Map</h2>
|
|
66
|
+
<ul>
|
|
67
|
+
<li><code>src/prompts/createPrompt.ts</code></li>
|
|
68
|
+
<li><code>src/prompts/getPrompt.ts</code></li>
|
|
69
|
+
<li><code>src/prompts/listPrompts.ts</code></li>
|
|
70
|
+
<li><code>src/prompts/sdks/toSDK.ts</code></li>
|
|
71
|
+
<li><code>src/types/prompts.ts</code></li>
|
|
72
|
+
</ul>
|
|
73
|
+
</section>
|