@arizeai/phoenix-mcp 3.1.5 → 4.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,15 +1,23 @@
1
1
  import z from "zod";
2
+ import { MAX_LIST_LIMIT } from "./constants.js";
3
+ import { resolveDatasetId } from "./datasetUtils.js";
4
+ import { fetchAllPages } from "./pagination.js";
5
+ import { getResponseData } from "./responseUtils.js";
6
+ import { jsonResponse } from "./toolResults.js";
7
+ // ---------------------------------------------------------------------------
8
+ // Tool descriptions
9
+ // ---------------------------------------------------------------------------
2
10
  const LIST_EXPERIMENTS_DESCRIPTION = `Get a list of all the experiments run on a given dataset.
3
11
 
4
- Experiments are collections of experiment runs, each experiment run corresponds to a single
5
- dataset example. The dataset example is passed to an implied \`task\` which in turn
12
+ Experiments are collections of experiment runs, each experiment run corresponds to a single
13
+ dataset example. The dataset example is passed to an implied \`task\` which in turn
6
14
  produces an output.
7
15
 
8
16
  Example usage:
9
17
  Show me all the experiments I've run on dataset RGF0YXNldDox
10
18
 
11
19
  Expected return:
12
- Array of experiment objects with metadata.
20
+ Array of experiment objects with metadata.
13
21
  Example: [
14
22
  {
15
23
  "id": "experimentid1234",
@@ -24,101 +32,74 @@ Expected return:
24
32
  ]`;
25
33
  const GET_EXPERIMENT_DESCRIPTION = `Get an experiment by its ID.
26
34
 
27
- The tool returns experiment metadata in the first content block and a JSON object with the
28
- experiment data in the second. The experiment data contains both the results of each
29
- experiment run and the annotations made by an evaluator to score or label the results,
30
- for example, comparing the output of an experiment run to the expected output from the
35
+ The tool returns experiment metadata in the first content block and a JSON object with the
36
+ experiment data in the second. The experiment data contains both the results of each
37
+ experiment run and the annotations made by an evaluator to score or label the results,
38
+ for example, comparing the output of an experiment run to the expected output from the
31
39
  dataset example.
32
40
 
33
41
  Example usage:
34
42
  Show me the experiment results for experiment RXhwZXJpbWVudDo4
35
43
 
36
44
  Expected return:
37
- Object containing experiment metadata and results.
38
- Example: {
39
- "metadata": {
40
- "id": "experimentid1234",
41
- "dataset_id": "datasetid1234",
42
- "dataset_version_id": "datasetversionid1234",
43
- "repetitions": 1,
44
- "metadata": {},
45
- "project_name": "Experiment-abc123",
46
- "created_at": "YYYY-MM-DDTHH:mm:ssZ",
47
- "updated_at": "YYYY-MM-DDTHH:mm:ssZ"
48
- },
49
- "experimentResult": [
50
- {
51
- "example_id": "exampleid1234",
52
- "repetition_number": 0,
53
- "input": "Sample input text",
54
- "reference_output": "Expected output text",
55
- "output": "Actual output text",
56
- "error": null,
57
- "latency_ms": 1000,
58
- "start_time": "2025-03-20T12:00:00Z",
59
- "end_time": "2025-03-20T12:00:01Z",
60
- "trace_id": "trace-123",
61
- "prompt_token_count": 10,
62
- "completion_token_count": 20,
63
- "annotations": [
64
- {
65
- "name": "quality",
66
- "annotator_kind": "HUMAN",
67
- "label": "good",
68
- "score": 0.9,
69
- "explanation": "Output matches expected format",
70
- "trace_id": "trace-456",
71
- "error": null,
72
- "metadata": {},
73
- "start_time": "YYYY-MM-DDTHH:mm:ssZ",
74
- "end_time": "YYYY-MM-DDTHH:mm:ssZ"
75
- }
76
- ]
77
- }
78
- ]
79
- }`;
45
+ Object containing experiment metadata and results.`;
46
+ // ---------------------------------------------------------------------------
47
+ // Tool registration
48
+ // ---------------------------------------------------------------------------
49
+ /**
50
+ * Register experiment-related MCP tools on the given server.
51
+ */
80
52
  export const initializeExperimentTools = ({ client, server, }) => {
81
53
  server.tool("list-experiments-for-dataset", LIST_EXPERIMENTS_DESCRIPTION, {
82
- dataset_id: z.string(),
83
- }, async ({ dataset_id }) => {
84
- const response = await client.GET("/v1/datasets/{dataset_id}/experiments", {
85
- params: {
86
- path: {
87
- dataset_id,
88
- },
54
+ dataset_id: z.string().optional(),
55
+ dataset_name: z.string().optional(),
56
+ limit: z.number().min(1).max(MAX_LIST_LIMIT).default(100).optional(),
57
+ }, async ({ dataset_id, dataset_name, limit = 100 }) => {
58
+ const resolvedDatasetId = await resolveDatasetId({
59
+ client,
60
+ datasetId: dataset_id,
61
+ datasetName: dataset_name,
62
+ });
63
+ const experiments = await fetchAllPages({
64
+ limit,
65
+ fetchPage: async (cursor, pageSize) => {
66
+ const response = await client.GET("/v1/datasets/{dataset_id}/experiments", {
67
+ params: {
68
+ path: { dataset_id: resolvedDatasetId },
69
+ query: { cursor, limit: pageSize },
70
+ },
71
+ });
72
+ const data = getResponseData({
73
+ response,
74
+ errorPrefix: `Failed to fetch experiments for dataset "${resolvedDatasetId}"`,
75
+ });
76
+ return { data: data.data, nextCursor: data.next_cursor || undefined };
89
77
  },
90
78
  });
91
- return {
92
- content: [
93
- { type: "text", text: JSON.stringify(response.data?.data, null, 2) },
94
- ],
95
- };
79
+ return jsonResponse(experiments);
96
80
  });
97
81
  server.tool("get-experiment-by-id", GET_EXPERIMENT_DESCRIPTION, {
98
82
  experiment_id: z.string(),
99
83
  }, async ({ experiment_id }) => {
100
84
  const [experimentMetadataResponse, experimentDataResponse] = await Promise.all([
101
85
  client.GET("/v1/experiments/{experiment_id}", {
102
- params: {
103
- path: {
104
- experiment_id,
105
- },
106
- },
86
+ params: { path: { experiment_id } },
107
87
  }),
108
88
  client.GET("/v1/experiments/{experiment_id}/json", {
109
- params: {
110
- path: {
111
- experiment_id,
112
- },
113
- },
89
+ params: { path: { experiment_id } },
114
90
  }),
115
91
  ]);
116
- const text = JSON.stringify({
117
- metadata: experimentMetadataResponse.data?.data,
118
- experimentResult: experimentDataResponse.data,
92
+ const metadata = getResponseData({
93
+ response: experimentMetadataResponse,
94
+ errorPrefix: `Failed to fetch experiment "${experiment_id}" metadata`,
95
+ });
96
+ const experimentResult = getResponseData({
97
+ response: experimentDataResponse,
98
+ errorPrefix: `Failed to fetch experiment "${experiment_id}" JSON`,
99
+ });
100
+ return jsonResponse({
101
+ metadata: metadata.data,
102
+ experimentResult,
119
103
  });
120
- return {
121
- content: [{ type: "text", text }],
122
- };
123
104
  });
124
105
  };
@@ -0,0 +1,77 @@
1
+ /**
2
+ * Attempt to decode a base64 string, returning `null` when the input
3
+ * is not valid base64 or cannot round-trip cleanly.
4
+ */
5
+ function decodeBase64(value) {
6
+ try {
7
+ const decoded = Buffer.from(value, "base64").toString("utf8");
8
+ // Round-trip: re-encode and compare (ignoring padding) to reject
9
+ // strings that happen to survive lossy base64 decoding.
10
+ const reEncoded = Buffer.from(decoded, "utf8")
11
+ .toString("base64")
12
+ .replace(/=+$/, "");
13
+ const unpadded = value.replace(/=+$/, "");
14
+ return reEncoded === unpadded ? decoded : null;
15
+ }
16
+ catch {
17
+ return null;
18
+ }
19
+ }
20
+ /**
21
+ * Trim surrounding whitespace from an identifier string.
22
+ */
23
+ function getNormalizedIdentifier(identifier) {
24
+ return identifier.trim();
25
+ }
26
+ /**
27
+ * Require a non-empty identifier value and return it trimmed.
28
+ *
29
+ * @param options.identifier - The raw identifier string to validate.
30
+ * @param options.label - A human-readable label used in the error message
31
+ * (e.g. `"projectIdentifier"`).
32
+ * @throws When the identifier is empty or whitespace-only.
33
+ */
34
+ export function requireIdentifier({ identifier, label, }) {
35
+ const normalizedIdentifier = getNormalizedIdentifier(identifier);
36
+ if (!normalizedIdentifier) {
37
+ throw new Error(`${label} is required`);
38
+ }
39
+ return normalizedIdentifier;
40
+ }
41
+ /**
42
+ * Parse a Relay GlobalID into its `TypeName:nodeId` components.
43
+ *
44
+ * @returns The parsed components, or `null` if the string is not a valid Relay GlobalID.
45
+ */
46
+ export function parseRelayGlobalId(identifier) {
47
+ const normalizedIdentifier = getNormalizedIdentifier(identifier);
48
+ if (!normalizedIdentifier) {
49
+ return null;
50
+ }
51
+ const decodedIdentifier = decodeBase64(normalizedIdentifier);
52
+ if (!decodedIdentifier) {
53
+ return null;
54
+ }
55
+ const separatorIndex = decodedIdentifier.indexOf(":");
56
+ if (separatorIndex <= 0 || separatorIndex === decodedIdentifier.length - 1) {
57
+ return null;
58
+ }
59
+ return {
60
+ typeName: decodedIdentifier.slice(0, separatorIndex),
61
+ nodeId: decodedIdentifier.slice(separatorIndex + 1),
62
+ };
63
+ }
64
+ /**
65
+ * Return the normalized Relay GlobalID when it matches the expected type,
66
+ * or `null` otherwise.
67
+ *
68
+ * Useful for distinguishing a human-readable name from a Relay ID so that
69
+ * the correct API call path can be chosen.
70
+ */
71
+ export function getRelayGlobalIdIfType({ identifier, expectedTypeName, }) {
72
+ const normalizedIdentifier = getNormalizedIdentifier(identifier);
73
+ const relayGlobalId = parseRelayGlobalId(normalizedIdentifier);
74
+ return relayGlobalId?.typeName === expectedTypeName
75
+ ? normalizedIdentifier
76
+ : null;
77
+ }
package/build/index.js CHANGED
@@ -1,40 +1,42 @@
1
1
  #!/usr/bin/env node
2
2
  /* eslint-disable no-console */
3
- import { createClient } from "@arizeai/phoenix-client";
4
3
  import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
5
4
  import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
6
5
  import minimist from "minimist";
6
+ import { initializeAnnotationConfigTools } from "./annotationConfigTools.js";
7
+ import { createPhoenixClient } from "./client.js";
8
+ import { resolveConfig } from "./config.js";
7
9
  import { initializeDatasetTools } from "./datasetTools.js";
8
10
  import { initializeExperimentTools } from "./experimentTools.js";
9
11
  import { initializeProjectTools } from "./projectTools.js";
10
12
  import { initializePromptTools } from "./promptTools.js";
11
13
  import { initializeReadmeResources } from "./readmeResource.js";
14
+ import { initializeSessionTools } from "./sessionTools.js";
12
15
  import { initializeSpanTools } from "./spanTools.js";
13
16
  import { initializeSupportTools } from "./supportTools.js";
17
+ import { initializeTraceTools } from "./traceTools.js";
14
18
  const argv = minimist(process.argv.slice(2));
15
- const headers = argv.apiKey
16
- ? {
17
- Authorization: `Bearer ${argv.apiKey}`,
18
- api_key: argv.apiKey, // For hosted phoenix
19
- }
20
- : {};
21
- // Initialize Phoenix client
22
- const client = createClient({
23
- options: {
24
- baseUrl: argv.baseUrl || "http://localhost:6006",
25
- headers,
19
+ const config = resolveConfig({
20
+ commandLineOptions: {
21
+ apiKey: argv.apiKey,
22
+ baseUrl: argv.baseUrl,
23
+ project: argv.project,
26
24
  },
27
25
  });
26
+ const client = createPhoenixClient({ config });
28
27
  // Create server instance
29
28
  const server = new McpServer({
30
29
  name: "phoenix-mcp-server",
31
- version: "1.0.0",
30
+ version: "1.1.0",
32
31
  });
33
32
  initializePromptTools({ client, server });
34
33
  initializeExperimentTools({ client, server });
35
34
  initializeDatasetTools({ client, server });
36
35
  initializeProjectTools({ client, server });
37
- initializeSpanTools({ client, server });
36
+ initializeTraceTools({ client, server, defaultProject: config.project });
37
+ initializeSpanTools({ client, server, defaultProject: config.project });
38
+ initializeSessionTools({ client, server, defaultProject: config.project });
39
+ initializeAnnotationConfigTools({ client, server });
38
40
  initializeSupportTools({ server });
39
41
  async function main() {
40
42
  // Initialize readme resources first
@@ -0,0 +1,25 @@
1
+ import { DEFAULT_PAGE_SIZE } from "./constants.js";
2
+ /**
3
+ * Fetch items across multiple API pages until the requested `limit` is reached
4
+ * or no more pages remain.
5
+ *
6
+ * Each call site supplies a `fetchPage` callback that encapsulates the
7
+ * endpoint-specific request and response parsing. The helper handles cursor
8
+ * propagation, page-size clamping, and limit enforcement.
9
+ *
10
+ * @param options.fetchPage - Callback that retrieves a single page given a cursor and page size.
11
+ * @param options.limit - Maximum total items to collect across all pages.
12
+ * @param options.initialCursor - Optional cursor to resume pagination from.
13
+ * @returns Collected items, truncated to `limit`.
14
+ */
15
+ export async function fetchAllPages({ fetchPage, limit, initialCursor, }) {
16
+ const items = [];
17
+ let cursor = initialCursor;
18
+ do {
19
+ const pageSize = Math.min(limit - items.length, DEFAULT_PAGE_SIZE);
20
+ const page = await fetchPage(cursor, pageSize);
21
+ items.push(...page.data);
22
+ cursor = page.nextCursor;
23
+ } while (cursor && items.length < limit);
24
+ return items.slice(0, limit);
25
+ }
@@ -1,7 +1,14 @@
1
1
  import z from "zod";
2
+ import { MAX_LIST_LIMIT } from "./constants.js";
3
+ import { fetchAllPages } from "./pagination.js";
4
+ import { getResponseData } from "./responseUtils.js";
5
+ import { jsonResponse } from "./toolResults.js";
6
+ // ---------------------------------------------------------------------------
7
+ // Tool descriptions
8
+ // ---------------------------------------------------------------------------
2
9
  const LIST_PROJECTS_DESCRIPTION = `Get a list of all projects.
3
10
 
4
- Projects are containers for organizing traces, spans, and other observability data.
11
+ Projects are containers for organizing traces, spans, and other observability data.
5
12
  Each project has a unique name and can contain traces from different applications or experiments.
6
13
 
7
14
  Example usage:
@@ -16,33 +23,64 @@ Expected return:
16
23
  "description": "Default project for traces"
17
24
  },
18
25
  {
19
- "id": "UHJvamVjdDoy",
26
+ "id": "UHJvamVjdDoy",
20
27
  "name": "my-experiment",
21
28
  "description": "Project for my ML experiment"
22
29
  }
23
30
  ]`;
31
+ const GET_PROJECT_DESCRIPTION = `Get a project by name or ID.
32
+
33
+ Example usage:
34
+ Show me the project "default"
35
+
36
+ Expected return:
37
+ A single project object with metadata.`;
38
+ // ---------------------------------------------------------------------------
39
+ // Tool registration
40
+ // ---------------------------------------------------------------------------
41
+ /**
42
+ * Register project-related MCP tools on the given server.
43
+ */
24
44
  export const initializeProjectTools = ({ client, server, }) => {
25
45
  server.tool("list-projects", LIST_PROJECTS_DESCRIPTION, {
26
- limit: z.number().min(1).max(100).default(100).optional(),
46
+ limit: z.number().min(1).max(MAX_LIST_LIMIT).default(100).optional(),
27
47
  cursor: z.string().optional(),
28
- includeExperimentProjects: z.boolean().default(false).optional(),
29
- }, async ({ limit = 100, cursor, includeExperimentProjects = false }) => {
30
- const response = await client.GET("/v1/projects", {
48
+ include_experiment_projects: z.boolean().default(false).optional(),
49
+ }, async ({ limit = 100, cursor, include_experiment_projects = false }) => {
50
+ const projects = await fetchAllPages({
51
+ limit,
52
+ initialCursor: cursor,
53
+ fetchPage: async (pageCursor, pageSize) => {
54
+ const response = await client.GET("/v1/projects", {
55
+ params: {
56
+ query: {
57
+ limit: pageSize,
58
+ cursor: pageCursor,
59
+ include_experiment_projects,
60
+ },
61
+ },
62
+ });
63
+ const data = getResponseData({
64
+ response,
65
+ errorPrefix: "Failed to fetch projects",
66
+ });
67
+ return { data: data.data, nextCursor: data.next_cursor || undefined };
68
+ },
69
+ });
70
+ return jsonResponse(projects);
71
+ });
72
+ server.tool("get-project", GET_PROJECT_DESCRIPTION, {
73
+ project_identifier: z.string(),
74
+ }, async ({ project_identifier }) => {
75
+ const response = await client.GET("/v1/projects/{project_identifier}", {
31
76
  params: {
32
- query: {
33
- limit,
34
- cursor,
35
- include_experiment_projects: includeExperimentProjects,
36
- },
77
+ path: { project_identifier },
37
78
  },
38
79
  });
39
- return {
40
- content: [
41
- {
42
- type: "text",
43
- text: JSON.stringify(response.data?.data, null, 2),
44
- },
45
- ],
46
- };
80
+ const project = getResponseData({
81
+ response,
82
+ errorPrefix: `Failed to fetch project "${project_identifier}"`,
83
+ }).data;
84
+ return jsonResponse(project);
47
85
  });
48
86
  };
@@ -0,0 +1,14 @@
1
+ /**
2
+ * Resolve the project identifier for a project-scoped MCP tool.
3
+ *
4
+ * Precedence:
5
+ * 1. Explicit `projectIdentifier` from the tool call
6
+ * 2. Configured default from `PHOENIX_PROJECT` env var or `--project` CLI flag
7
+ */
8
+ export function resolveProjectIdentifier({ projectIdentifier, defaultProjectIdentifier, }) {
9
+ const resolved = projectIdentifier?.trim() || defaultProjectIdentifier?.trim();
10
+ if (!resolved) {
11
+ throw new Error("projectIdentifier is required. Pass projectIdentifier or configure PHOENIX_PROJECT/--project.");
12
+ }
13
+ return resolved;
14
+ }
@@ -1,42 +1,54 @@
1
1
  import z from "zod";
2
+ import { DEFAULT_MODEL_NAME, DEFAULT_MODEL_PROVIDER, DEFAULT_TEMPERATURE, } from "./constants.js";
2
3
  export const listPromptsSchema = z.object({
3
4
  limit: z.number().min(1).max(100).default(100),
4
5
  });
5
6
  export const getLatestPromptSchema = z.object({
6
7
  prompt_identifier: z.string(),
7
8
  });
9
+ export const getPromptSchema = z.object({
10
+ prompt_identifier: z.string(),
11
+ tag: z.string().optional(),
12
+ version_id: z.string().optional(),
13
+ });
8
14
  export const getPromptByIdentifierSchema = z.object({
9
15
  prompt_identifier: z.string(),
10
16
  });
11
17
  export const getPromptVersionSchema = z.object({
12
18
  prompt_version_id: z.string(),
13
19
  });
20
+ /**
21
+ * Name transformation applied to prompt names:
22
+ * - lowercase
23
+ * - spaces → underscores
24
+ * - strip non-alphanumeric / non-underscore characters
25
+ */
26
+ const promptNameSchema = z
27
+ .string()
28
+ .transform((val) => val
29
+ .toLowerCase()
30
+ .replace(/\s+/g, "_")
31
+ .replace(/[^\w_]/g, ""))
32
+ .refine((val) => val.length > 0, {
33
+ message: "Name cannot be empty after transformation",
34
+ });
14
35
  export const createPromptSchema = z.object({
15
- name: z
16
- .string()
17
- .transform((val) => val
18
- .toLowerCase()
19
- .replace(/\s+/g, "_") // Replace spaces with underscores
20
- .replace(/[^\w_]/g, "") // Remove anything that's not alphanumeric or underscore
21
- )
22
- .refine((val) => val.length > 0, {
23
- message: "Name cannot be empty after transformation",
24
- }),
36
+ name: promptNameSchema,
25
37
  description: z.string().optional(),
26
38
  template: z.string(),
27
39
  model_provider: z
28
40
  .enum(["OPENAI", "AZURE_OPENAI", "ANTHROPIC", "GOOGLE"])
29
41
  .optional()
30
- .default("OPENAI"),
31
- model_name: z.string().optional().default("gpt-4"),
32
- temperature: z.number().optional().default(0.7),
42
+ .default(DEFAULT_MODEL_PROVIDER),
43
+ model_name: z.string().optional().default(DEFAULT_MODEL_NAME),
44
+ temperature: z.number().optional().default(DEFAULT_TEMPERATURE),
33
45
  });
34
46
  export const updatePromptSchema = z.object({
35
47
  prompt_identifier: z.string(),
36
48
  name: z.string().optional(),
37
49
  description: z.string().optional(),
38
50
  template: z.string().optional(),
39
- metadata: z.record(z.string(), z.any()).optional(),
51
+ metadata: z.record(z.string(), z.unknown()).optional(),
40
52
  });
41
53
  export const deletePromptSchema = z.object({
42
54
  prompt_identifier: z.string(),