npm - @arizeai/phoenix-mcp - Versions diffs - 3.1.5 → 4.0.0 - Mend

@arizeai/phoenix-mcp 3.1.5 → 4.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (24) hide show

package/README.md +20 -2
package/build/annotationConfigTools.js +43 -0
package/build/client.js +22 -0
package/build/config.js +47 -0
package/build/constants.js +61 -0
package/build/datasetTools.js +123 -102
package/build/datasetUtils.js +59 -0
package/build/experimentTools.js +59 -78
package/build/identifiers.js +77 -0
package/build/index.js +16 -14
package/build/pagination.js +25 -0
package/build/projectTools.js +57 -19
package/build/projectUtils.js +14 -0
package/build/promptSchemas.js +26 -14
package/build/promptTools.js +184 -303
package/build/responseUtils.js +16 -0
package/build/sessionTools.js +126 -0
package/build/spanTools.js +92 -62
package/build/spanUtils.js +232 -0
package/build/supportTools.js +12 -9
package/build/toolResults.js +32 -0
package/build/traceTools.js +141 -0
package/build/traceUtils.js +57 -0
package/package.json +9 -6

package/README.md CHANGED Viewed

@@ -31,7 +31,8 @@ Phoenix MCP Server is an implementation of the Model Context Protocol for the Ar
 You can use Phoenix MCP Server for:
 - **Projects Management**: List and explore projects that organize your observability data
-- **Spans & Annotations**: Retrieve spans and their annotations for analysis and debugging
+- **Traces, Spans & Annotations**: Retrieve traces, spans, and annotation configs for analysis and debugging
+- **Sessions**: Explore conversation flows and session-level annotations
 - **Prompts Management**: Create, list, update, and iterate on prompts
 - **Datasets**: Explore datasets and synthesize new examples
 - **Experiments**: Pull experiment results and visualize them with the help of an LLM
@@ -103,10 +104,27 @@ pnpm inspect
 When developing, the server requires the following environment variables:
 - `PHOENIX_API_KEY`: Your Phoenix API key
-- `PHOENIX_BASE_URL`: The base URL for Phoenix
+- `PHOENIX_HOST`: The base URL for Phoenix
+- `PHOENIX_PROJECT`: Optional default project for project-scoped tools
+- `PHOENIX_CLIENT_HEADERS`: Optional JSON-encoded request headers
 Make sure to set these in a `.env` file. See `.env.example`.
+## Tool Coverage
+The MCP server now covers the main operational Phoenix workflows:
+- `list-projects`, `get-project`
+- `list-traces`, `get-trace`
+- `get-spans`, `get-span-annotations`
+- `list-sessions`, `get-session`
+- `list-annotation-configs`
+- `list-datasets`, `get-dataset`, `get-dataset-examples`, `get-dataset-experiments`, `add-dataset-examples`
+- `list-experiments-for-dataset`, `get-experiment-by-id`
+- `list-prompts`, `get-prompt`, legacy prompt getter aliases, prompt version/tag tools, `upsert-prompt`
+For Phoenix documentation search, use the separate Phoenix Docs MCP server instead of this package.
 ## Community
 Join our community to connect with thousands of AI builders:

package/build/annotationConfigTools.js ADDED Viewed

@@ -0,0 +1,43 @@
+import z from "zod";
+import { MAX_LIST_LIMIT } from "./constants.js";
+import { fetchAllPages } from "./pagination.js";
+import { getResponseData } from "./responseUtils.js";
+import { jsonResponse } from "./toolResults.js";
+// ---------------------------------------------------------------------------
+// Tool descriptions
+// ---------------------------------------------------------------------------
+const LIST_ANNOTATION_CONFIGS_DESCRIPTION = `List Phoenix annotation configs.
+Annotation configs define the available human or automated labels, scores, and freeform annotation types.
+Example usage:
+  Show me all annotation configs
+Expected return:
+  Array of annotation config objects.`;
+// ---------------------------------------------------------------------------
+// Tool registration
+// ---------------------------------------------------------------------------
+/**
+ * Register annotation-config-related MCP tools on the given server.
+ */
+export const initializeAnnotationConfigTools = ({ client, server, }) => {
+    server.tool("list-annotation-configs", LIST_ANNOTATION_CONFIGS_DESCRIPTION, {
+        limit: z.number().min(1).max(MAX_LIST_LIMIT).default(100).optional(),
+    }, async ({ limit = 100 }) => {
+        const configs = await fetchAllPages({
+            limit,
+            fetchPage: async (cursor, pageSize) => {
+                const response = await client.GET("/v1/annotation_configs", {
+                    params: { query: { cursor, limit: pageSize } },
+                });
+                const data = getResponseData({
+                    response,
+                    errorPrefix: "Failed to fetch annotation configs",
+                });
+                return { data: data.data, nextCursor: data.next_cursor || undefined };
+            },
+        });
+        return jsonResponse(configs);
+    });
+};

package/build/client.js ADDED Viewed

@@ -0,0 +1,22 @@
+import { createClient } from "@arizeai/phoenix-client";
+/**
+ * Create a Phoenix REST client for MCP tool handlers.
+ *
+ * The MCP package sends both bearer and `api_key` auth headers because Phoenix
+ * deployments may rely on either convention.
+ */
+export function createPhoenixClient({ config, }) {
+    const headers = {
+        ...(config.headers || {}),
+    };
+    if (config.apiKey) {
+        headers.Authorization = `Bearer ${config.apiKey}`;
+        headers.api_key = config.apiKey;
+    }
+    return createClient({
+        options: {
+            baseUrl: config.baseUrl,
+            headers,
+        },
+    });
+}

package/build/config.js ADDED Viewed

@@ -0,0 +1,47 @@
+import { DEFAULT_PHOENIX_BASE_URL, ENV_PHOENIX_API_KEY, ENV_PHOENIX_CLIENT_HEADERS, ENV_PHOENIX_HOST, ENV_PHOENIX_PROJECT, getHeadersFromEnvironment, getStrFromEnvironment, } from "@arizeai/phoenix-config";
+export const DEFAULT_PHOENIX_ENDPOINT = DEFAULT_PHOENIX_BASE_URL;
+/**
+ * Load Phoenix MCP configuration from environment variables.
+ */
+export function loadConfigFromEnvironment() {
+    const baseUrl = getStrFromEnvironment(ENV_PHOENIX_HOST);
+    const apiKey = getStrFromEnvironment(ENV_PHOENIX_API_KEY);
+    const headers = getHeadersFromEnvironment(ENV_PHOENIX_CLIENT_HEADERS);
+    const project = getStrFromEnvironment(ENV_PHOENIX_PROJECT);
+    return {
+        baseUrl: baseUrl || DEFAULT_PHOENIX_ENDPOINT,
+        apiKey: apiKey || undefined,
+        headers: headers || undefined,
+        project: project || undefined,
+    };
+}
+/**
+ * Extract only the string-valued command-line options, ignoring bare boolean
+ * flags that `minimist` produces when a flag is used without a value.
+ */
+function getStringCommandLineOptions(commandLineOptions) {
+    return {
+        ...(typeof commandLineOptions.baseUrl === "string"
+            ? { baseUrl: commandLineOptions.baseUrl }
+            : {}),
+        ...(typeof commandLineOptions.apiKey === "string"
+            ? { apiKey: commandLineOptions.apiKey }
+            : {}),
+        ...(typeof commandLineOptions.project === "string"
+            ? { project: commandLineOptions.project }
+            : {}),
+    };
+}
+/**
+ * Merge environment-derived Phoenix MCP configuration with command-line overrides.
+ *
+ * Only string command-line values are treated as overrides so that bare flags
+ * parsed by `minimist` do not replace valid environment defaults with boolean `true`.
+ */
+export function resolveConfig({ commandLineOptions, }) {
+    const envConfig = loadConfigFromEnvironment();
+    return {
+        ...envConfig,
+        ...getStringCommandLineOptions(commandLineOptions),
+    };
+}

package/build/constants.js ADDED Viewed

@@ -0,0 +1,61 @@
+// ============================================================
+// Pagination
+// ============================================================
+/** Default number of items fetched per API page request. */
+export const DEFAULT_PAGE_SIZE = 100;
+// ============================================================
+// Span queries
+// ============================================================
+/** Maximum number of spans that a single query may return. */
+export const MAX_SPAN_QUERY_LIMIT = 1000;
+// ============================================================
+// List queries (datasets, experiments, projects, configs)
+// ============================================================
+/** Upper bound for the `limit` parameter on list endpoints. */
+export const MAX_LIST_LIMIT = 500;
+// ============================================================
+// Annotation fetching
+// ============================================================
+/** Number of span IDs included in each annotation chunk request. */
+export const ANNOTATION_CHUNK_SIZE = 100;
+/** Maximum number of annotation chunk requests executed concurrently. */
+export const MAX_CONCURRENT_ANNOTATION_REQUESTS = 5;
+/** Page size used when exhausting annotation pages within a single chunk. */
+export const ANNOTATION_PAGE_SIZE = 1000;
+// ============================================================
+// Trace queries
+// ============================================================
+/** Default number of traces returned by the list-traces tool. */
+export const DEFAULT_TRACE_PAGE_SIZE = 10;
+/** Maximum number of traces the list-traces tool may return. */
+export const MAX_TRACE_PAGE_SIZE = 100;
+// ============================================================
+// Session queries
+// ============================================================
+/** Maximum number of sessions the list-sessions tool may return. */
+export const MAX_SESSION_PAGE_SIZE = 100;
+// ============================================================
+// Prompt defaults
+// ============================================================
+/** Default model provider when creating a prompt version. */
+export const DEFAULT_MODEL_PROVIDER = "OPENAI";
+/** Default model name when creating a prompt version. */
+export const DEFAULT_MODEL_NAME = "gpt-4";
+/** Default sampling temperature when creating a prompt version. */
+export const DEFAULT_TEMPERATURE = 0.7;
+/**
+ * Default `max_tokens` for Anthropic prompt versions.
+ *
+ * Anthropic models require an explicit `max_tokens` invocation parameter.
+ */
+export const ANTHROPIC_DEFAULT_MAX_TOKENS = 1000;
+// ============================================================
+// Time
+// ============================================================
+/** Number of milliseconds in one minute. */
+export const MS_PER_MINUTE = 60_000;
+// ============================================================
+// MCP metadata
+// ============================================================
+/** Provenance tag applied to dataset examples created through the MCP server. */
+export const MCP_SYNTHETIC_SOURCE = "Synthetic Example added via MCP";

package/build/datasetTools.js CHANGED Viewed

@@ -1,7 +1,15 @@
 import z from "zod";
+import { MAX_LIST_LIMIT, MCP_SYNTHETIC_SOURCE } from "./constants.js";
+import { resolveDatasetId } from "./datasetUtils.js";
+import { fetchAllPages } from "./pagination.js";
+import { getResponseData } from "./responseUtils.js";
+import { jsonResponse } from "./toolResults.js";
+// ---------------------------------------------------------------------------
+// Tool descriptions
+// ---------------------------------------------------------------------------
 const LIST_DATASETS_DESCRIPTION = `Get a list of all datasets.
-Datasets are collections of 'dataset examples' that each example includes an input,
+Datasets are collections of 'dataset examples' that each example includes an input,
 (expected) output, and optional metadata. They are primarily used as inputs for experiments.
 Example usage:
@@ -21,52 +29,23 @@ Expected return:
   ]`;
 const GET_DATASET_EXAMPLES_DESCRIPTION = `Get examples from a dataset.
-Dataset examples are an array of objects that each include an input,
-(expected) output, and optional metadata. These examples are typically used to represent
-input to an application or model (e.g. prompt template variables, a code file, or image)
+Dataset examples are an array of objects that each include an input,
+(expected) output, and optional metadata. These examples are typically used to represent
+input to an application or model (e.g. prompt template variables, a code file, or image)
 and used to test or benchmark changes.
 Example usage:
   Show me all examples from dataset RGF0YXNldDox
 Expected return:
-  Object containing dataset ID, version ID, and array of examples.
-  Example: {
-    "dataset_id": "datasetid1234",
-    "version_id": "datasetversionid1234",
-    "examples": [
-      {
-        "id": "exampleid1234",
-        "input": {
-          "text": "Sample input text"
-        },
-        "output": {
-          "text": "Expected output text"
-        },
-        "metadata": {},
-        "updated_at": "YYYY-MM-DDTHH:mm:ssZ"
-      }
-    ]
-  }`;
+  Object containing dataset ID, version ID, and array of examples.`;
 const GET_DATASET_EXPERIMENTS_DESCRIPTION = `List experiments run on a dataset.
 Example usage:
   Show me all experiments run on dataset RGF0YXNldDox
 Expected return:
-  Array of experiment objects with metadata.
-  Example: [
-    {
-      "id": "experimentid1234",
-      "dataset_id": "datasetid1234",
-      "dataset_version_id": "datasetversionid1234",
-      "repetitions": 1,
-      "metadata": {},
-      "project_name": "Experiment-abc123",
-      "created_at": "YYYY-MM-DDTHH:mm:ssZ",
-      "updated_at": "YYYY-MM-DDTHH:mm:ssZ"
-    }
-  ]`;
+  Array of experiment objects with metadata.`;
 const ADD_DATASET_EXAMPLES_DESCRIPTION = `Add examples to an existing dataset.
 This tool adds one or more examples to an existing dataset. Each example includes an input,
@@ -79,107 +58,149 @@ Example usage:
   Look at the analyze "my-dataset" and augment them with new examples to cover relevant edge cases
 Expected return:
-  Confirmation of successful addition of examples to the dataset.
-  Example: {
-    "dataset_name": "my-dataset",
-    "message": "Successfully added examples to dataset"
-  }`;
+  Confirmation of successful addition of examples to the dataset.`;
+const GET_DATASET_DESCRIPTION = `Get dataset metadata by name or ID.
+Example usage:
+  Show me the dataset "my-dataset"
+Expected return:
+  A dataset object with metadata and version information.`;
+// ---------------------------------------------------------------------------
+// Shared schema
+// ---------------------------------------------------------------------------
+const datasetSelectorSchema = z
+    .object({
+    dataset_id: z.string().optional(),
+    dataset_name: z.string().optional(),
+})
+    .refine(({ dataset_id, dataset_name }) => Boolean(dataset_id || dataset_name), { message: "Provide dataset_id or dataset_name" });
+// ---------------------------------------------------------------------------
+// Tool registration
+// ---------------------------------------------------------------------------
+/**
+ * Register dataset-related MCP tools on the given server.
+ */
 export const initializeDatasetTools = ({ client, server, }) => {
     server.tool("list-datasets", LIST_DATASETS_DESCRIPTION, {
-        limit: z.number().min(1).max(100).default(100),
+        limit: z.number().min(1).max(MAX_LIST_LIMIT).default(100),
     }, async ({ limit }) => {
-        const response = await client.GET("/v1/datasets", {
-            params: {
-                query: { limit },
+        const datasets = await fetchAllPages({
+            limit,
+            fetchPage: async (cursor, pageSize) => {
+                const response = await client.GET("/v1/datasets", {
+                    params: { query: { cursor, limit: pageSize } },
+                });
+                const data = getResponseData({
+                    response,
+                    errorPrefix: "Failed to fetch datasets",
+                });
+                return { data: data.data, nextCursor: data.next_cursor || undefined };
             },
         });
-        return {
-            content: [
-                {
-                    type: "text",
-                    text: JSON.stringify(response.data?.data, null, 2),
-                },
-            ],
-        };
+        return jsonResponse(datasets);
+    });
+    server.tool("get-dataset", GET_DATASET_DESCRIPTION, datasetSelectorSchema.shape, async ({ dataset_id, dataset_name }) => {
+        const resolvedId = await resolveDatasetId({
+            client,
+            datasetId: dataset_id,
+            datasetName: dataset_name,
+        });
+        const response = await client.GET("/v1/datasets/{id}", {
+            params: { path: { id: resolvedId } },
+        });
+        const dataset = getResponseData({
+            response,
+            errorPrefix: `Failed to fetch dataset "${resolvedId}"`,
+        }).data;
+        return jsonResponse(dataset);
     });
     server.tool("get-dataset-examples", GET_DATASET_EXAMPLES_DESCRIPTION, {
-        datasetId: z.string(),
-    }, async ({ datasetId }) => {
+        ...datasetSelectorSchema.shape,
+        version_id: z.string().optional(),
+        splits: z.array(z.string()).optional(),
+    }, async ({ dataset_id, dataset_name, version_id, splits }) => {
+        const resolvedId = await resolveDatasetId({
+            client,
+            datasetId: dataset_id,
+            datasetName: dataset_name,
+        });
         const response = await client.GET("/v1/datasets/{id}/examples", {
             params: {
-                path: { id: datasetId },
+                path: { id: resolvedId },
+                query: { version_id, split: splits },
             },
         });
-        return {
-            content: [
-                {
-                    type: "text",
-                    text: JSON.stringify(response.data, null, 2),
-                },
-            ],
-        };
+        const datasetExamples = getResponseData({
+            response,
+            errorPrefix: `Failed to fetch examples for dataset "${resolvedId}"`,
+        });
+        return jsonResponse(datasetExamples);
     });
     server.tool("get-dataset-experiments", GET_DATASET_EXPERIMENTS_DESCRIPTION, {
-        datasetId: z.string(),
-    }, async ({ datasetId }) => {
-        const response = await client.GET("/v1/datasets/{dataset_id}/experiments", {
-            params: {
-                path: { dataset_id: datasetId },
+        ...datasetSelectorSchema.shape,
+        limit: z.number().min(1).max(MAX_LIST_LIMIT).default(100).optional(),
+    }, async ({ dataset_id, dataset_name, limit = 100 }) => {
+        const resolvedId = await resolveDatasetId({
+            client,
+            datasetId: dataset_id,
+            datasetName: dataset_name,
+        });
+        const experiments = await fetchAllPages({
+            limit,
+            fetchPage: async (cursor, pageSize) => {
+                const response = await client.GET("/v1/datasets/{dataset_id}/experiments", {
+                    params: {
+                        path: { dataset_id: resolvedId },
+                        query: { cursor, limit: pageSize },
+                    },
+                });
+                const data = getResponseData({
+                    response,
+                    errorPrefix: `Failed to fetch experiments for dataset "${resolvedId}"`,
+                });
+                return { data: data.data, nextCursor: data.next_cursor || undefined };
             },
         });
-        return {
-            content: [
-                {
-                    type: "text",
-                    text: JSON.stringify(response.data, null, 2),
-                },
-            ],
-        };
+        return jsonResponse(experiments);
     });
     server.tool("add-dataset-examples", ADD_DATASET_EXAMPLES_DESCRIPTION, {
-        datasetName: z.string(),
+        dataset_name: z.string(),
         examples: z.array(z.object({
-            input: z.record(z.string(), z.any()),
-            output: z.record(z.string(), z.any()),
-            metadata: z.record(z.string(), z.any()).optional(),
+            input: z.record(z.string(), z.unknown()),
+            output: z.record(z.string(), z.unknown()),
+            metadata: z.record(z.string(), z.unknown()).optional(),
         })),
-    }, async ({ datasetName, examples }) => {
-        // Add MCP metadata to each example
+    }, async ({ dataset_name, examples }) => {
         const examplesWithMetadata = examples.map((example) => ({
             ...example,
             metadata: {
                 ...example.metadata,
-                source: "Synthetic Example added via MCP",
+                source: MCP_SYNTHETIC_SOURCE,
             },
         }));
         const response = await client.POST("/v1/datasets/upload", {
             body: {
                 action: "append",
-                name: datasetName,
+                name: dataset_name,
                 inputs: examplesWithMetadata.map((e) => e.input),
                 outputs: examplesWithMetadata.map((e) => e.output),
                 metadata: examplesWithMetadata.map((e) => e.metadata),
             },
-            params: {
-                query: {
-                    sync: true,
-                },
-            },
+            params: { query: { sync: true } },
         });
-        if (!response.data?.data?.dataset_id) {
+        const uploadResponse = getResponseData({
+            response,
+            errorPrefix: `Failed to add examples to dataset "${dataset_name}"`,
+        });
+        const uploadData = uploadResponse?.data;
+        if (!uploadData?.dataset_id) {
             throw new Error("Failed to add examples to dataset: No dataset ID received");
         }
-        return {
-            content: [
-                {
-                    type: "text",
-                    text: JSON.stringify({
-                        dataset_name: datasetName,
-                        dataset_id: response.data.data.dataset_id,
-                        message: "Successfully added examples to dataset",
-                    }, null, 2),
-                },
-            ],
-        };
+        return jsonResponse({
+            dataset_name,
+            dataset_id: uploadData.dataset_id,
+            message: "Successfully added examples to dataset",
+        });
     });
 };

package/build/datasetUtils.js ADDED Viewed

@@ -0,0 +1,59 @@
+import { getRelayGlobalIdIfType, requireIdentifier } from "./identifiers.js";
+import { getResponseData } from "./responseUtils.js";
+/**
+ * Determine whether a dataset identifier is already a Phoenix Relay GlobalID.
+ */
+export function isPhoenixDatasetId(identifier) {
+    return (getRelayGlobalIdIfType({
+        identifier,
+        expectedTypeName: "Dataset",
+    }) !== null);
+}
+/**
+ * Resolve a dataset name or Relay GlobalID to the dataset's canonical ID.
+ *
+ * When `datasetId` is provided and is a valid Relay GlobalID, it is returned
+ * directly without an API call. Otherwise `datasetName` is looked up via the
+ * datasets list endpoint.
+ */
+export async function resolveDatasetId({ client, datasetId, datasetName, }) {
+    // Prefer datasetId when provided
+    if (datasetId) {
+        const normalizedId = requireIdentifier({
+            identifier: datasetId,
+            label: "datasetId",
+        });
+        const relayId = getRelayGlobalIdIfType({
+            identifier: normalizedId,
+            expectedTypeName: "Dataset",
+        });
+        if (relayId) {
+            return relayId;
+        }
+        // datasetId might be a name if caller used the wrong field — fall through
+    }
+    const nameToResolve = datasetName || datasetId;
+    if (!nameToResolve?.trim()) {
+        throw new Error("datasetName or datasetId is required");
+    }
+    const normalizedName = requireIdentifier({
+        identifier: nameToResolve,
+        label: "datasetName",
+    });
+    const response = await client.GET("/v1/datasets", {
+        params: {
+            query: {
+                name: normalizedName,
+                limit: 1,
+            },
+        },
+    });
+    const data = getResponseData({
+        response,
+        errorPrefix: `Failed to resolve dataset "${normalizedName}"`,
+    });
+    if (data.data.length === 0) {
+        throw new Error(`Dataset not found: "${normalizedName}"`);
+    }
+    return data.data[0].id;
+}