npm - @arizeai/phoenix-mcp - Versions diffs - 3.1.5 → 4.0.0 - Mend

@arizeai/phoenix-mcp 3.1.5 → 4.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (24) hide show

package/README.md +20 -2
package/build/annotationConfigTools.js +43 -0
package/build/client.js +22 -0
package/build/config.js +47 -0
package/build/constants.js +61 -0
package/build/datasetTools.js +123 -102
package/build/datasetUtils.js +59 -0
package/build/experimentTools.js +59 -78
package/build/identifiers.js +77 -0
package/build/index.js +16 -14
package/build/pagination.js +25 -0
package/build/projectTools.js +57 -19
package/build/projectUtils.js +14 -0
package/build/promptSchemas.js +26 -14
package/build/promptTools.js +184 -303
package/build/responseUtils.js +16 -0
package/build/sessionTools.js +126 -0
package/build/spanTools.js +92 -62
package/build/spanUtils.js +232 -0
package/build/supportTools.js +12 -9
package/build/toolResults.js +32 -0
package/build/traceTools.js +141 -0
package/build/traceUtils.js +57 -0
package/package.json +9 -6

package/build/experimentTools.js CHANGED Viewed

@@ -1,15 +1,23 @@
 import z from "zod";
+import { MAX_LIST_LIMIT } from "./constants.js";
+import { resolveDatasetId } from "./datasetUtils.js";
+import { fetchAllPages } from "./pagination.js";
+import { getResponseData } from "./responseUtils.js";
+import { jsonResponse } from "./toolResults.js";
+// ---------------------------------------------------------------------------
+// Tool descriptions
+// ---------------------------------------------------------------------------
 const LIST_EXPERIMENTS_DESCRIPTION = `Get a list of all the experiments run on a given dataset.
-Experiments are collections of experiment runs, each experiment run corresponds to a single
-dataset example. The dataset example is passed to an implied \`task\` which in turn
+Experiments are collections of experiment runs, each experiment run corresponds to a single
+dataset example. The dataset example is passed to an implied \`task\` which in turn
 produces an output.
 Example usage:
   Show me all the experiments I've run on dataset RGF0YXNldDox
 Expected return:
-  Array of experiment objects with metadata.
+  Array of experiment objects with metadata.
   Example: [
     {
       "id": "experimentid1234",
@@ -24,101 +32,74 @@ Expected return:
   ]`;
 const GET_EXPERIMENT_DESCRIPTION = `Get an experiment by its ID.
-The tool returns experiment metadata in the first content block and a JSON object with the
-experiment data in the second. The experiment data contains both the results of each
-experiment run and the annotations made by an evaluator to score or label the results,
-for example, comparing the output of an experiment run to the expected output from the
+The tool returns experiment metadata in the first content block and a JSON object with the
+experiment data in the second. The experiment data contains both the results of each
+experiment run and the annotations made by an evaluator to score or label the results,
+for example, comparing the output of an experiment run to the expected output from the
 dataset example.
 Example usage:
   Show me the experiment results for experiment RXhwZXJpbWVudDo4
 Expected return:
-  Object containing experiment metadata and results.
-  Example: {
-    "metadata": {
-      "id": "experimentid1234",
-      "dataset_id": "datasetid1234",
-      "dataset_version_id": "datasetversionid1234",
-      "repetitions": 1,
-      "metadata": {},
-      "project_name": "Experiment-abc123",
-      "created_at": "YYYY-MM-DDTHH:mm:ssZ",
-      "updated_at": "YYYY-MM-DDTHH:mm:ssZ"
-    },
-    "experimentResult": [
-      {
-        "example_id": "exampleid1234",
-        "repetition_number": 0,
-        "input": "Sample input text",
-        "reference_output": "Expected output text",
-        "output": "Actual output text",
-        "error": null,
-        "latency_ms": 1000,
-        "start_time": "2025-03-20T12:00:00Z",
-        "end_time": "2025-03-20T12:00:01Z",
-        "trace_id": "trace-123",
-        "prompt_token_count": 10,
-        "completion_token_count": 20,
-        "annotations": [
-          {
-            "name": "quality",
-            "annotator_kind": "HUMAN",
-            "label": "good",
-            "score": 0.9,
-            "explanation": "Output matches expected format",
-            "trace_id": "trace-456",
-            "error": null,
-            "metadata": {},
-            "start_time": "YYYY-MM-DDTHH:mm:ssZ",
-            "end_time": "YYYY-MM-DDTHH:mm:ssZ"
-          }
-        ]
-      }
-    ]
-  }`;
+  Object containing experiment metadata and results.`;
+// ---------------------------------------------------------------------------
+// Tool registration
+// ---------------------------------------------------------------------------
+/**
+ * Register experiment-related MCP tools on the given server.
+ */
 export const initializeExperimentTools = ({ client, server, }) => {
     server.tool("list-experiments-for-dataset", LIST_EXPERIMENTS_DESCRIPTION, {
-        dataset_id: z.string(),
-    }, async ({ dataset_id }) => {
-        const response = await client.GET("/v1/datasets/{dataset_id}/experiments", {
-            params: {
-                path: {
-                    dataset_id,
-                },
+        dataset_id: z.string().optional(),
+        dataset_name: z.string().optional(),
+        limit: z.number().min(1).max(MAX_LIST_LIMIT).default(100).optional(),
+    }, async ({ dataset_id, dataset_name, limit = 100 }) => {
+        const resolvedDatasetId = await resolveDatasetId({
+            client,
+            datasetId: dataset_id,
+            datasetName: dataset_name,
+        });
+        const experiments = await fetchAllPages({
+            limit,
+            fetchPage: async (cursor, pageSize) => {
+                const response = await client.GET("/v1/datasets/{dataset_id}/experiments", {
+                    params: {
+                        path: { dataset_id: resolvedDatasetId },
+                        query: { cursor, limit: pageSize },
+                    },
+                });
+                const data = getResponseData({
+                    response,
+                    errorPrefix: `Failed to fetch experiments for dataset "${resolvedDatasetId}"`,
+                });
+                return { data: data.data, nextCursor: data.next_cursor || undefined };
             },
         });
-        return {
-            content: [
-                { type: "text", text: JSON.stringify(response.data?.data, null, 2) },
-            ],
-        };
+        return jsonResponse(experiments);
     });
     server.tool("get-experiment-by-id", GET_EXPERIMENT_DESCRIPTION, {
         experiment_id: z.string(),
     }, async ({ experiment_id }) => {
         const [experimentMetadataResponse, experimentDataResponse] = await Promise.all([
             client.GET("/v1/experiments/{experiment_id}", {
-                params: {
-                    path: {
-                        experiment_id,
-                    },
-                },
+                params: { path: { experiment_id } },
             }),
             client.GET("/v1/experiments/{experiment_id}/json", {
-                params: {
-                    path: {
-                        experiment_id,
-                    },
-                },
+                params: { path: { experiment_id } },
             }),
         ]);
-        const text = JSON.stringify({
-            metadata: experimentMetadataResponse.data?.data,
-            experimentResult: experimentDataResponse.data,
+        const metadata = getResponseData({
+            response: experimentMetadataResponse,
+            errorPrefix: `Failed to fetch experiment "${experiment_id}" metadata`,
+        });
+        const experimentResult = getResponseData({
+            response: experimentDataResponse,
+            errorPrefix: `Failed to fetch experiment "${experiment_id}" JSON`,
+        });
+        return jsonResponse({
+            metadata: metadata.data,
+            experimentResult,
         });
-        return {
-            content: [{ type: "text", text }],
-        };
     });
 };

package/build/identifiers.js ADDED Viewed

@@ -0,0 +1,77 @@
+/**
+ * Attempt to decode a base64 string, returning `null` when the input
+ * is not valid base64 or cannot round-trip cleanly.
+ */
+function decodeBase64(value) {
+    try {
+        const decoded = Buffer.from(value, "base64").toString("utf8");
+        // Round-trip: re-encode and compare (ignoring padding) to reject
+        // strings that happen to survive lossy base64 decoding.
+        const reEncoded = Buffer.from(decoded, "utf8")
+            .toString("base64")
+            .replace(/=+$/, "");
+        const unpadded = value.replace(/=+$/, "");
+        return reEncoded === unpadded ? decoded : null;
+    }
+    catch {
+        return null;
+    }
+}
+/**
+ * Trim surrounding whitespace from an identifier string.
+ */
+function getNormalizedIdentifier(identifier) {
+    return identifier.trim();
+}
+/**
+ * Require a non-empty identifier value and return it trimmed.
+ *
+ * @param options.identifier - The raw identifier string to validate.
+ * @param options.label - A human-readable label used in the error message
+ *   (e.g. `"projectIdentifier"`).
+ * @throws When the identifier is empty or whitespace-only.
+ */
+export function requireIdentifier({ identifier, label, }) {
+    const normalizedIdentifier = getNormalizedIdentifier(identifier);
+    if (!normalizedIdentifier) {
+        throw new Error(`${label} is required`);
+    }
+    return normalizedIdentifier;
+}
+/**
+ * Parse a Relay GlobalID into its `TypeName:nodeId` components.
+ *
+ * @returns The parsed components, or `null` if the string is not a valid Relay GlobalID.
+ */
+export function parseRelayGlobalId(identifier) {
+    const normalizedIdentifier = getNormalizedIdentifier(identifier);
+    if (!normalizedIdentifier) {
+        return null;
+    }
+    const decodedIdentifier = decodeBase64(normalizedIdentifier);
+    if (!decodedIdentifier) {
+        return null;
+    }
+    const separatorIndex = decodedIdentifier.indexOf(":");
+    if (separatorIndex <= 0 || separatorIndex === decodedIdentifier.length - 1) {
+        return null;
+    }
+    return {
+        typeName: decodedIdentifier.slice(0, separatorIndex),
+        nodeId: decodedIdentifier.slice(separatorIndex + 1),
+    };
+}
+/**
+ * Return the normalized Relay GlobalID when it matches the expected type,
+ * or `null` otherwise.
+ *
+ * Useful for distinguishing a human-readable name from a Relay ID so that
+ * the correct API call path can be chosen.
+ */
+export function getRelayGlobalIdIfType({ identifier, expectedTypeName, }) {
+    const normalizedIdentifier = getNormalizedIdentifier(identifier);
+    const relayGlobalId = parseRelayGlobalId(normalizedIdentifier);
+    return relayGlobalId?.typeName === expectedTypeName
+        ? normalizedIdentifier
+        : null;
+}

package/build/index.js CHANGED Viewed

@@ -1,40 +1,42 @@
 #!/usr/bin/env node
 /* eslint-disable no-console */
-import { createClient } from "@arizeai/phoenix-client";
 import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
 import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
 import minimist from "minimist";
+import { initializeAnnotationConfigTools } from "./annotationConfigTools.js";
+import { createPhoenixClient } from "./client.js";
+import { resolveConfig } from "./config.js";
 import { initializeDatasetTools } from "./datasetTools.js";
 import { initializeExperimentTools } from "./experimentTools.js";
 import { initializeProjectTools } from "./projectTools.js";
 import { initializePromptTools } from "./promptTools.js";
 import { initializeReadmeResources } from "./readmeResource.js";
+import { initializeSessionTools } from "./sessionTools.js";
 import { initializeSpanTools } from "./spanTools.js";
 import { initializeSupportTools } from "./supportTools.js";
+import { initializeTraceTools } from "./traceTools.js";
 const argv = minimist(process.argv.slice(2));
-const headers = argv.apiKey
-    ? {
-        Authorization: `Bearer ${argv.apiKey}`,
-        api_key: argv.apiKey, // For hosted phoenix
-    }
-    : {};
-// Initialize Phoenix client
-const client = createClient({
-    options: {
-        baseUrl: argv.baseUrl || "http://localhost:6006",
-        headers,
+const config = resolveConfig({
+    commandLineOptions: {
+        apiKey: argv.apiKey,
+        baseUrl: argv.baseUrl,
+        project: argv.project,
     },
 });
+const client = createPhoenixClient({ config });
 // Create server instance
 const server = new McpServer({
     name: "phoenix-mcp-server",
-    version: "1.0.0",
+    version: "1.1.0",
 });
 initializePromptTools({ client, server });
 initializeExperimentTools({ client, server });
 initializeDatasetTools({ client, server });
 initializeProjectTools({ client, server });
-initializeSpanTools({ client, server });
+initializeTraceTools({ client, server, defaultProject: config.project });
+initializeSpanTools({ client, server, defaultProject: config.project });
+initializeSessionTools({ client, server, defaultProject: config.project });
+initializeAnnotationConfigTools({ client, server });
 initializeSupportTools({ server });
 async function main() {
     // Initialize readme resources first

package/build/pagination.js ADDED Viewed

@@ -0,0 +1,25 @@
+import { DEFAULT_PAGE_SIZE } from "./constants.js";
+/**
+ * Fetch items across multiple API pages until the requested `limit` is reached
+ * or no more pages remain.
+ *
+ * Each call site supplies a `fetchPage` callback that encapsulates the
+ * endpoint-specific request and response parsing.  The helper handles cursor
+ * propagation, page-size clamping, and limit enforcement.
+ *
+ * @param options.fetchPage - Callback that retrieves a single page given a cursor and page size.
+ * @param options.limit - Maximum total items to collect across all pages.
+ * @param options.initialCursor - Optional cursor to resume pagination from.
+ * @returns Collected items, truncated to `limit`.
+ */
+export async function fetchAllPages({ fetchPage, limit, initialCursor, }) {
+    const items = [];
+    let cursor = initialCursor;
+    do {
+        const pageSize = Math.min(limit - items.length, DEFAULT_PAGE_SIZE);
+        const page = await fetchPage(cursor, pageSize);
+        items.push(...page.data);
+        cursor = page.nextCursor;
+    } while (cursor && items.length < limit);
+    return items.slice(0, limit);
+}

package/build/projectTools.js CHANGED Viewed

@@ -1,7 +1,14 @@
 import z from "zod";
+import { MAX_LIST_LIMIT } from "./constants.js";
+import { fetchAllPages } from "./pagination.js";
+import { getResponseData } from "./responseUtils.js";
+import { jsonResponse } from "./toolResults.js";
+// ---------------------------------------------------------------------------
+// Tool descriptions
+// ---------------------------------------------------------------------------
 const LIST_PROJECTS_DESCRIPTION = `Get a list of all projects.
-Projects are containers for organizing traces, spans, and other observability data.
+Projects are containers for organizing traces, spans, and other observability data.
 Each project has a unique name and can contain traces from different applications or experiments.
 Example usage:
@@ -16,33 +23,64 @@ Expected return:
       "description": "Default project for traces"
     },
     {
-      "id": "UHJvamVjdDoy",
+      "id": "UHJvamVjdDoy",
       "name": "my-experiment",
       "description": "Project for my ML experiment"
     }
   ]`;
+const GET_PROJECT_DESCRIPTION = `Get a project by name or ID.
+Example usage:
+  Show me the project "default"
+Expected return:
+  A single project object with metadata.`;
+// ---------------------------------------------------------------------------
+// Tool registration
+// ---------------------------------------------------------------------------
+/**
+ * Register project-related MCP tools on the given server.
+ */
 export const initializeProjectTools = ({ client, server, }) => {
     server.tool("list-projects", LIST_PROJECTS_DESCRIPTION, {
-        limit: z.number().min(1).max(100).default(100).optional(),
+        limit: z.number().min(1).max(MAX_LIST_LIMIT).default(100).optional(),
         cursor: z.string().optional(),
-        includeExperimentProjects: z.boolean().default(false).optional(),
-    }, async ({ limit = 100, cursor, includeExperimentProjects = false }) => {
-        const response = await client.GET("/v1/projects", {
+        include_experiment_projects: z.boolean().default(false).optional(),
+    }, async ({ limit = 100, cursor, include_experiment_projects = false }) => {
+        const projects = await fetchAllPages({
+            limit,
+            initialCursor: cursor,
+            fetchPage: async (pageCursor, pageSize) => {
+                const response = await client.GET("/v1/projects", {
+                    params: {
+                        query: {
+                            limit: pageSize,
+                            cursor: pageCursor,
+                            include_experiment_projects,
+                        },
+                    },
+                });
+                const data = getResponseData({
+                    response,
+                    errorPrefix: "Failed to fetch projects",
+                });
+                return { data: data.data, nextCursor: data.next_cursor || undefined };
+            },
+        });
+        return jsonResponse(projects);
+    });
+    server.tool("get-project", GET_PROJECT_DESCRIPTION, {
+        project_identifier: z.string(),
+    }, async ({ project_identifier }) => {
+        const response = await client.GET("/v1/projects/{project_identifier}", {
             params: {
-                query: {
-                    limit,
-                    cursor,
-                    include_experiment_projects: includeExperimentProjects,
-                },
+                path: { project_identifier },
             },
         });
-        return {
-            content: [
-                {
-                    type: "text",
-                    text: JSON.stringify(response.data?.data, null, 2),
-                },
-            ],
-        };
+        const project = getResponseData({
+            response,
+            errorPrefix: `Failed to fetch project "${project_identifier}"`,
+        }).data;
+        return jsonResponse(project);
     });
 };

package/build/projectUtils.js ADDED Viewed

@@ -0,0 +1,14 @@
+/**
+ * Resolve the project identifier for a project-scoped MCP tool.
+ *
+ * Precedence:
+ * 1. Explicit `projectIdentifier` from the tool call
+ * 2. Configured default from `PHOENIX_PROJECT` env var or `--project` CLI flag
+ */
+export function resolveProjectIdentifier({ projectIdentifier, defaultProjectIdentifier, }) {
+    const resolved = projectIdentifier?.trim() || defaultProjectIdentifier?.trim();
+    if (!resolved) {
+        throw new Error("projectIdentifier is required. Pass projectIdentifier or configure PHOENIX_PROJECT/--project.");
+    }
+    return resolved;
+}

package/build/promptSchemas.js CHANGED Viewed

@@ -1,42 +1,54 @@
 import z from "zod";
+import { DEFAULT_MODEL_NAME, DEFAULT_MODEL_PROVIDER, DEFAULT_TEMPERATURE, } from "./constants.js";
 export const listPromptsSchema = z.object({
     limit: z.number().min(1).max(100).default(100),
 });
 export const getLatestPromptSchema = z.object({
     prompt_identifier: z.string(),
 });
+export const getPromptSchema = z.object({
+    prompt_identifier: z.string(),
+    tag: z.string().optional(),
+    version_id: z.string().optional(),
+});
 export const getPromptByIdentifierSchema = z.object({
     prompt_identifier: z.string(),
 });
 export const getPromptVersionSchema = z.object({
     prompt_version_id: z.string(),
 });
+/**
+ * Name transformation applied to prompt names:
+ * - lowercase
+ * - spaces → underscores
+ * - strip non-alphanumeric / non-underscore characters
+ */
+const promptNameSchema = z
+    .string()
+    .transform((val) => val
+    .toLowerCase()
+    .replace(/\s+/g, "_")
+    .replace(/[^\w_]/g, ""))
+    .refine((val) => val.length > 0, {
+    message: "Name cannot be empty after transformation",
+});
 export const createPromptSchema = z.object({
-    name: z
-        .string()
-        .transform((val) => val
-        .toLowerCase()
-        .replace(/\s+/g, "_") // Replace spaces with underscores
-        .replace(/[^\w_]/g, "") // Remove anything that's not alphanumeric or underscore
-    )
-        .refine((val) => val.length > 0, {
-        message: "Name cannot be empty after transformation",
-    }),
+    name: promptNameSchema,
     description: z.string().optional(),
     template: z.string(),
     model_provider: z
         .enum(["OPENAI", "AZURE_OPENAI", "ANTHROPIC", "GOOGLE"])
         .optional()
-        .default("OPENAI"),
-    model_name: z.string().optional().default("gpt-4"),
-    temperature: z.number().optional().default(0.7),
+        .default(DEFAULT_MODEL_PROVIDER),
+    model_name: z.string().optional().default(DEFAULT_MODEL_NAME),
+    temperature: z.number().optional().default(DEFAULT_TEMPERATURE),
 });
 export const updatePromptSchema = z.object({
     prompt_identifier: z.string(),
     name: z.string().optional(),
     description: z.string().optional(),
     template: z.string().optional(),
-    metadata: z.record(z.string(), z.any()).optional(),
+    metadata: z.record(z.string(), z.unknown()).optional(),
 });
 export const deletePromptSchema = z.object({
     prompt_identifier: z.string(),