npm - @gaffer-sh/mcp - Versions diffs - 0.6.2 → 0.7.0 - Mend

@gaffer-sh/mcp 0.6.2 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (3) hide show

package/dist/index.js CHANGED Viewed

@@ -10,7 +10,6 @@ const REQUEST_TIMEOUT_MS = 3e4;
 const MAX_RETRIES = 3;
 const INITIAL_RETRY_DELAY_MS = 1e3;
 const RETRYABLE_STATUS_CODES = [
-	401,
 	429,
 	500,
 	502,
@@ -30,7 +29,8 @@ function sleep(ms) {
 */
 function detectTokenType(token) {
 	if (token.startsWith("gaf_")) return "user";
-	return "project";
+	if (token.startsWith("gfr_")) return "project";
+	throw new Error(`Unrecognized API key format. Expected a user API key (gaf_...) or project token (gfr_...). Got: "${token.substring(0, 4)}...". Check your GAFFER_API_KEY environment variable.`);
 }
 /**
 * Gaffer API v1 client for MCP server
@@ -46,7 +46,7 @@ var GafferApiClient = class GafferApiClient {
 	apiKey;
 	baseUrl;
 	tokenType;
-	resolvedProjectId = null;
+	resolveProjectIdPromise = null;
 	constructor(config) {
 		this.apiKey = config.apiKey;
 		this.baseUrl = config.baseUrl.replace(/\/$/, "");
@@ -74,15 +74,21 @@ var GafferApiClient = class GafferApiClient {
 	}
 	/**
 	* Resolve the project ID for the current token.
-	* For project tokens, fetches from /project on first call and caches.
+	* For project tokens, fetches from /project on first call and caches the Promise
+	* to deduplicate concurrent calls.
 	* For user tokens, requires explicit projectId.
 	*/
 	async resolveProjectId(projectId) {
 		if (projectId) return projectId;
 		if (this.isUserToken()) throw new Error("projectId is required when using a user API Key");
-		if (this.resolvedProjectId) return this.resolvedProjectId;
-		this.resolvedProjectId = (await this.request("/project")).project.id;
-		return this.resolvedProjectId;
+		if (!this.resolveProjectIdPromise) this.resolveProjectIdPromise = this.request("/project").then((response) => {
+			if (!response?.project?.id) throw new Error("Failed to resolve project ID from token: unexpected response from /project endpoint. Ensure your project token (gfr_) is valid and the project still exists.");
+			return response.project.id;
+		}).catch((error) => {
+			this.resolveProjectIdPromise = null;
+			throw error;
+		});
+		return this.resolveProjectIdPromise;
 	}
 	/**
 	* Make authenticated request to Gaffer API with retry logic
@@ -201,7 +207,9 @@ var GafferApiClient = class GafferApiClient {
 		});
 	}
 	/**
-	* Get report files for a test run
+	* Get report files for a test run.
+	* User-only: the /user/test-runs/:id/report route has no project-scoped equivalent,
+	* so project tokens cannot access raw report downloads.
 	*/
 	async getReport(testRunId) {
 		if (!this.isUserToken()) throw new Error("getReport requires a user API Key (gaf_). Project tokens (gfr_) cannot access reports via API.");
@@ -314,8 +322,120 @@ var GafferApiClient = class GafferApiClient {
 		const projectId = await this.resolveProjectId(options.projectId);
 		return this.request(`/user/projects/${projectId}/upload-sessions/${options.sessionId}`);
 	}
+	/**
+	* Search across test failures by error message, stack trace, or test name
+	*/
+	async searchFailures(options) {
+		if (!options.query) throw new Error("query is required");
+		const projectId = await this.resolveProjectId(options.projectId);
+		return this.request(`/user/projects/${projectId}/search-failures`, {
+			query: options.query,
+			...options.searchIn && { searchIn: options.searchIn },
+			...options.days && { days: options.days },
+			...options.branch && { branch: options.branch },
+			...options.limit && { limit: options.limit }
+		});
+	}
 };
+//#endregion
+//#region src/codemode/executor.ts
+/**
+* Patterns blocked from user code as a basic guard.
+* This is NOT a sandbox — determined users can bypass these checks via
+* string concatenation, bracket notation, or constructor access.
+* The real security boundary is the API layer (read-only, user's own token).
+*/
+const BLOCKED_PATTERNS = [
+	"globalThis",
+	"process",
+	"require(",
+	"import ",
+	"import(",
+	"eval(",
+	"new Function",
+	"Function(",
+	"Buffer",
+	"__dirname",
+	"__filename",
+	".constructor",
+	"Reflect"
+];
+/** Maximum API calls per execution */
+const MAX_API_CALLS = 20;
+/** Execution timeout in milliseconds */
+const EXECUTION_TIMEOUT_MS = 3e4;
+/**
+* Validate code doesn't contain blocked patterns.
+* Returns the first blocked pattern found, or null if safe.
+*/
+function validateCode(code) {
+	for (const pattern of BLOCKED_PATTERNS) if (code.includes(pattern)) return pattern;
+	return null;
+}
+/**
+* Execute user-provided JavaScript code with access to the codemode namespace.
+*
+* Uses AsyncFunction constructor to run code in an async context.
+* The namespace object is injected as `codemode` — all API calls go through it.
+*
+* Security notes:
+* - Not a true sandbox (no vm2/isolated-vm) — same pattern as Cloudflare code mode
+* - Blocked patterns prevent obvious escape hatches
+* - API call counting prevents resource exhaustion
+* - Timeout prevents infinite loops
+* - The real security boundary is the API itself (read-only, user's own token)
+*/
+async function executeCode(code, namespace) {
+	const blocked = validateCode(code);
+	if (blocked) throw new Error(`Blocked pattern detected: "${blocked}". Code must not use ${blocked}.`);
+	const logs = [];
+	const start = Date.now();
+	const serialize = (a) => {
+		if (typeof a !== "object" || a === null) return String(a);
+		try {
+			return JSON.stringify(a);
+		} catch {
+			return String(a);
+		}
+	};
+	const safeConsole = {
+		log: (...args) => logs.push(args.map(serialize).join(" ")),
+		warn: (...args) => logs.push(`[warn] ${args.map(serialize).join(" ")}`),
+		error: (...args) => logs.push(`[error] ${args.map(serialize).join(" ")}`)
+	};
+	let callCount = 0;
+	const countedNamespace = {};
+	for (const [name, fn] of Object.entries(namespace)) countedNamespace[name] = async (...args) => {
+		callCount++;
+		if (callCount > MAX_API_CALLS) throw new Error(`API call limit exceeded (max ${MAX_API_CALLS} calls per execution)`);
+		return fn(...args);
+	};
+	const AsyncFunction = Object.getPrototypeOf(async () => {}).constructor;
+	const fn = new AsyncFunction("codemode", "console", code);
+	let timeoutId;
+	const resultPromise = fn(countedNamespace, safeConsole);
+	const timeoutPromise = new Promise((_, reject) => {
+		timeoutId = setTimeout(() => reject(/* @__PURE__ */ new Error(`Execution timed out after ${EXECUTION_TIMEOUT_MS}ms`)), EXECUTION_TIMEOUT_MS);
+	});
+	try {
+		return {
+			result: await Promise.race([resultPromise, timeoutPromise]),
+			logs,
+			durationMs: Date.now() - start
+		};
+	} catch (error) {
+		const durationMs = Date.now() - start;
+		const message = error instanceof Error ? error.message : String(error);
+		const enrichedError = new Error(message, { cause: error });
+		enrichedError.logs = logs;
+		enrichedError.durationMs = durationMs;
+		throw enrichedError;
+	} finally {
+		clearTimeout(timeoutId);
+	}
+}
 //#endregion
 //#region src/tools/compare-test-metrics.ts
 /**
@@ -397,7 +517,7 @@ const compareTestMetricsMetadata = {
 Useful for measuring the impact of code changes on test performance or reliability.
 Parameters:
-- projectId (required): Project ID
+- projectId (optional): Project ID — required for user API keys, auto-resolved for project tokens
 - testName (required): The test name to compare (short name or full name)
 - Option 1 - Compare by commit:
   - beforeCommit: Commit SHA for "before" measurement
@@ -480,7 +600,7 @@ areas in your codebase that need attention. Files are ranked by a "risk score"
 calculated as: (100 - coverage%) × failureCount.
 Parameters:
-- projectId: The project to analyze (required)
+- projectId (optional): Project ID — required for user API keys, auto-resolved for project tokens
 - days: Analysis period for test failures (default: 30)
 - coverageThreshold: Include files below this coverage % (default: 80)
@@ -554,7 +674,7 @@ const getCoverageForFileMetadata = {
 	description: `Get coverage metrics for a specific file or files matching a path pattern.
 Parameters:
-- projectId: The project to query (required)
+- projectId (optional): Project ID — required for user API keys, auto-resolved for project tokens
 - filePath: File path to search for (exact or partial match)
 Returns:
@@ -693,7 +813,7 @@ const getFailureClustersMetadata = {
 	description: `Group failed tests by root cause using error message similarity.
 Parameters:
-- projectId (required): The project ID
+- projectId (optional): Project ID — required for user API keys, auto-resolved for project tokens
 - testRunId (required): The test run ID to analyze
 Returns:
@@ -901,7 +1021,7 @@ Returns a signed URL that can be opened directly in a browser without requiring
 the user to log in. The URL expires after 30 minutes for security.
 Parameters:
-- projectId: The project the test run belongs to (required)
+- projectId (optional): Project ID — required for user API keys, auto-resolved for project tokens
 - testRunId: The test run to view (required)
 - filename: Specific file to open (optional, defaults to index.html)
@@ -1063,7 +1183,7 @@ const getSlowestTestsMetadata = {
 	description: `Get the slowest tests in a project, sorted by P95 duration.
 Parameters:
-- projectId (required): Project ID to analyze
+- projectId (optional): Project ID — required for user API keys, auto-resolved for project tokens
 - days (optional): Analysis period in days (default: 30, max: 365)
 - limit (optional): Max tests to return (default: 20, max: 100)
 - framework (optional): Filter by framework (e.g., "playwright", "vitest")
@@ -1257,7 +1377,7 @@ const getTestRunDetailsMetadata = {
 Parameters:
 - testRunId (required): The test run ID to get details for
-- projectId (required): Project ID the test run belongs to
+- projectId (optional): Project ID — required for user API keys, auto-resolved for project tokens
 - status (optional): Filter by test status: "passed", "failed", or "skipped"
 - limit (optional): Max tests to return (default: 100, max: 500)
 - offset (optional): Pagination offset (default: 0)
@@ -1362,7 +1482,7 @@ Returns files sorted by coverage percentage (lowest first), filtered
 to only include files below a coverage threshold.
 Parameters:
-- projectId: The project to analyze (required)
+- projectId (optional): Project ID — required for user API keys, auto-resolved for project tokens
 - maxCoverage: Include files with coverage at or below this % (default: 10)
 - limit: Maximum number of files to return (default: 20, max: 100)
@@ -1460,7 +1580,7 @@ const getUploadStatusMetadata = {
 Use this tool to answer "are my test results ready?" after pushing code.
 Parameters:
-- projectId (required): The project ID
+- projectId (optional): Project ID — required for user API keys, auto-resolved for project tokens
 - sessionId (optional): Specific upload session ID for detailed status
 - commitSha (optional): Filter by commit SHA to find uploads for a specific commit
 - branch (optional): Filter by branch name
@@ -1491,63 +1611,6 @@ Returns (detail mode):
 - coverageReports: Linked coverage report summaries (id, format)`
 };
-//#endregion
-//#region src/tools/list-projects.ts
-/**
-* Input schema for list_projects tool
-*/
-const listProjectsInputSchema = {
-	organizationId: z.string().optional().describe("Filter by organization ID (optional)"),
-	limit: z.number().int().min(1).max(100).optional().describe("Maximum number of projects to return (default: 50)")
-};
-/**
-* Output schema for list_projects tool
-*/
-const listProjectsOutputSchema = {
-	projects: z.array(z.object({
-		id: z.string(),
-		name: z.string(),
-		description: z.string().nullable().optional(),
-		organization: z.object({
-			id: z.string(),
-			name: z.string(),
-			slug: z.string()
-		})
-	})),
-	total: z.number()
-};
-/**
-* Execute list_projects tool
-*/
-async function executeListProjects(client, input) {
-	const response = await client.listProjects({
-		organizationId: input.organizationId,
-		limit: input.limit
-	});
-	return {
-		projects: response.projects.map((p) => ({
-			id: p.id,
-			name: p.name,
-			description: p.description,
-			organization: p.organization
-		})),
-		total: response.pagination.total
-	};
-}
-/**
-* Tool metadata
-*/
-const listProjectsMetadata = {
-	name: "list_projects",
-	title: "List Projects",
-	description: `List all projects you have access to.
-Returns a list of projects with their IDs, names, and organization info.
-Use this to find project IDs for other tools like get_project_health.
-Requires a user API Key (gaf_). Get one from Account Settings in the Gaffer dashboard.`
-};
 //#endregion
 //#region src/tools/list-test-runs.ts
 /**
@@ -1635,6 +1698,508 @@ Use cases:
 - "What's the status of tests on my feature branch?"`
 };
+//#endregion
+//#region src/tools/search-failures.ts
+/**
+* Input schema for search_failures tool
+*/
+const searchFailuresInputSchema = {
+	projectId: z.string().optional().describe("Project ID. Required for user API keys (gaf_). Not needed for project tokens — omit and it resolves automatically."),
+	query: z.string().min(1).describe("Search query to match against failure messages, error stacks, or test names."),
+	searchIn: z.enum([
+		"errors",
+		"names",
+		"all"
+	]).optional().describe("Where to search: \"errors\" (error messages and stacks), \"names\" (test names), or \"all\" (default: \"all\")."),
+	days: z.number().int().min(1).max(365).optional().describe("Number of days to search back (default: 30)"),
+	branch: z.string().optional().describe("Filter to a specific branch"),
+	limit: z.number().int().min(1).max(100).optional().describe("Maximum number of matches to return (default: 20)")
+};
+/**
+* Output schema for search_failures tool
+*/
+const searchFailuresOutputSchema = {
+	matches: z.array(z.object({
+		testName: z.string(),
+		testRunId: z.string(),
+		branch: z.string().nullable(),
+		commitSha: z.string().nullable(),
+		errorMessage: z.string().nullable(),
+		errorStack: z.string().nullable(),
+		createdAt: z.string()
+	})),
+	total: z.number(),
+	query: z.string()
+};
+/**
+* Execute search_failures tool
+*/
+async function executeSearchFailures(client, input) {
+	return client.searchFailures(input);
+}
+/**
+* Tool metadata
+*/
+const searchFailuresMetadata = {
+	name: "search_failures",
+	title: "Search Failures",
+	description: `Search across test failures by error message, stack trace, or test name.
+Use this to find specific failures across test runs — like grep for your test history.
+Examples:
+- "TypeError: Cannot read properties of undefined" → find all occurrences of this error
+- "timeout" → find timeout-related failures
+- "auth" with searchIn="names" → find failing auth tests
+Returns matching failures with test run context (branch, commit, timestamp) for investigation.`
+};
+//#endregion
+//#region src/codemode/register-tools.ts
+const TOOLS = [
+	{
+		metadata: getProjectHealthMetadata,
+		inputSchema: getProjectHealthInputSchema,
+		execute: executeGetProjectHealth,
+		category: "health",
+		keywords: [
+			"health",
+			"score",
+			"pass rate",
+			"trend",
+			"overview"
+		]
+	},
+	{
+		metadata: getTestHistoryMetadata,
+		inputSchema: getTestHistoryInputSchema,
+		execute: executeGetTestHistory,
+		category: "testing",
+		keywords: [
+			"history",
+			"pass",
+			"fail",
+			"stability",
+			"regression"
+		]
+	},
+	{
+		metadata: getFlakyTestsMetadata,
+		inputSchema: getFlakyTestsInputSchema,
+		execute: executeGetFlakyTests,
+		category: "testing",
+		keywords: [
+			"flaky",
+			"flip",
+			"inconsistent",
+			"non-deterministic"
+		]
+	},
+	{
+		metadata: listTestRunsMetadata,
+		inputSchema: listTestRunsInputSchema,
+		execute: executeListTestRuns,
+		category: "testing",
+		keywords: [
+			"runs",
+			"list",
+			"commit",
+			"branch",
+			"recent"
+		]
+	},
+	{
+		metadata: getReportMetadata,
+		inputSchema: getReportInputSchema,
+		execute: executeGetReport,
+		category: "reports",
+		keywords: [
+			"report",
+			"files",
+			"download",
+			"artifacts"
+		]
+	},
+	{
+		metadata: getSlowestTestsMetadata,
+		inputSchema: getSlowestTestsInputSchema,
+		execute: executeGetSlowestTests,
+		category: "testing",
+		keywords: [
+			"slow",
+			"performance",
+			"duration",
+			"p95",
+			"bottleneck"
+		]
+	},
+	{
+		metadata: getTestRunDetailsMetadata,
+		inputSchema: getTestRunDetailsInputSchema,
+		execute: executeGetTestRunDetails,
+		category: "testing",
+		keywords: [
+			"details",
+			"results",
+			"errors",
+			"stack traces",
+			"test cases"
+		]
+	},
+	{
+		metadata: getFailureClustersMetadata,
+		inputSchema: getFailureClustersInputSchema,
+		execute: executeGetFailureClusters,
+		category: "testing",
+		keywords: [
+			"failure",
+			"clusters",
+			"root cause",
+			"error grouping"
+		]
+	},
+	{
+		metadata: compareTestMetricsMetadata,
+		inputSchema: compareTestMetricsInputSchema,
+		execute: executeCompareTestMetrics,
+		category: "testing",
+		keywords: [
+			"compare",
+			"before",
+			"after",
+			"regression",
+			"delta"
+		]
+	},
+	{
+		metadata: getCoverageSummaryMetadata,
+		inputSchema: getCoverageSummaryInputSchema,
+		execute: executeGetCoverageSummary,
+		category: "coverage",
+		keywords: [
+			"coverage",
+			"summary",
+			"lines",
+			"branches",
+			"functions"
+		]
+	},
+	{
+		metadata: getCoverageForFileMetadata,
+		inputSchema: getCoverageForFileInputSchema,
+		execute: executeGetCoverageForFile,
+		category: "coverage",
+		keywords: [
+			"coverage",
+			"file",
+			"path",
+			"lines",
+			"branches"
+		]
+	},
+	{
+		metadata: findUncoveredFailureAreasMetadata,
+		inputSchema: findUncoveredFailureAreasInputSchema,
+		execute: executeFindUncoveredFailureAreas,
+		category: "coverage",
+		keywords: [
+			"risk",
+			"uncovered",
+			"failures",
+			"low coverage"
+		]
+	},
+	{
+		metadata: getUntestedFilesMetadata,
+		inputSchema: getUntestedFilesInputSchema,
+		execute: executeGetUntestedFiles,
+		category: "coverage",
+		keywords: [
+			"untested",
+			"zero coverage",
+			"missing tests"
+		]
+	},
+	{
+		metadata: getReportBrowserUrlMetadata,
+		inputSchema: getReportBrowserUrlInputSchema,
+		execute: executeGetReportBrowserUrl,
+		category: "reports",
+		keywords: [
+			"browser",
+			"url",
+			"view",
+			"report",
+			"signed"
+		]
+	},
+	{
+		metadata: getUploadStatusMetadata,
+		inputSchema: getUploadStatusInputSchema,
+		execute: executeGetUploadStatus,
+		category: "uploads",
+		keywords: [
+			"upload",
+			"status",
+			"processing",
+			"CI",
+			"ready"
+		]
+	},
+	{
+		metadata: searchFailuresMetadata,
+		inputSchema: searchFailuresInputSchema,
+		execute: executeSearchFailures,
+		category: "testing",
+		keywords: [
+			"search",
+			"failure",
+			"error message",
+			"grep",
+			"find"
+		]
+	}
+];
+/**
+* Register all tool functions in the codemode registry.
+*/
+function registerAllTools(registry) {
+	for (const tool of TOOLS) registry.register({
+		name: tool.metadata.name,
+		description: tool.metadata.description,
+		category: tool.category,
+		keywords: tool.keywords,
+		inputSchema: tool.inputSchema,
+		execute: tool.execute
+	});
+}
+//#endregion
+//#region src/codemode/type-gen.ts
+/**
+* Convert a Zod schema to a TypeScript type string.
+* Handles the subset of Zod types used in our tool schemas.
+*/
+function zodToTs(schema) {
+	if (schema instanceof z.ZodEffects) return zodToTs(schema.innerType());
+	if (schema instanceof z.ZodOptional) return `${zodToTs(schema.unwrap())} | undefined`;
+	if (schema instanceof z.ZodNullable) return `${zodToTs(schema.unwrap())} | null`;
+	if (schema instanceof z.ZodDefault) return zodToTs(schema.removeDefault());
+	if (schema instanceof z.ZodString) return "string";
+	if (schema instanceof z.ZodNumber) return "number";
+	if (schema instanceof z.ZodBoolean) return "boolean";
+	if (schema instanceof z.ZodEnum) return schema.options.map((v) => `'${v}'`).join(" | ");
+	if (schema instanceof z.ZodLiteral) {
+		const val = schema.value;
+		return typeof val === "string" ? `'${val}'` : String(val);
+	}
+	if (schema instanceof z.ZodArray) {
+		const inner = zodToTs(schema.element);
+		if (inner.includes("|")) return `(${inner})[]`;
+		return `${inner}[]`;
+	}
+	if (schema instanceof z.ZodObject) {
+		const shape = schema.shape;
+		const entries = Object.entries(shape);
+		if (entries.length === 0) return "{}";
+		return `{ ${entries.map(([key, fieldSchema]) => formatField(key, fieldSchema)).join("; ")} }`;
+	}
+	if (schema instanceof z.ZodRecord) return `Record<string, ${zodToTs(schema.valueSchema)}>`;
+	if (schema instanceof z.ZodUnion) return schema.options.map((o) => zodToTs(o)).join(" | ");
+	console.error(`[gaffer-mcp] zodToTs: unhandled Zod type "${schema.constructor.name}", falling back to "unknown"`);
+	return "unknown";
+}
+/**
+* Format a single field as "name?: type" (with ? for optionals, unwrapping the inner type).
+*/
+function formatField(key, schema) {
+	const isOptional = schema instanceof z.ZodOptional;
+	return `${key}${isOptional ? "?" : ""}: ${isOptional ? zodToTs(schema.unwrap()) : zodToTs(schema)}`;
+}
+/**
+* Generate a TypeScript function declaration from a function name,
+* description, and Zod input schema (object shape).
+*/
+function generateDeclaration(name, description, inputSchema) {
+	const entries = Object.entries(inputSchema);
+	if (entries.length === 0) return `/** ${description} */\n${name}(): Promise<any>`;
+	return `/** ${description} */\n${name}(input: { ${entries.map(([key, schema]) => formatField(key, schema)).join("; ")} }): Promise<any>`;
+}
+//#endregion
+//#region src/codemode/registry.ts
+/**
+* Registry of codemode functions.
+* Wraps existing tool execute functions with metadata for discovery and namespace building.
+*/
+var FunctionRegistry = class {
+	entries = /* @__PURE__ */ new Map();
+	/**
+	* Register a function in the registry
+	*/
+	register(entry) {
+		this.entries.set(entry.name, entry);
+	}
+	/**
+	* Get all registered function entries
+	*/
+	getAll() {
+		return Array.from(this.entries.values());
+	}
+	/**
+	* Get a single entry by name
+	*/
+	get(name) {
+		return this.entries.get(name);
+	}
+	/**
+	* Build the namespace object that gets injected into the executor.
+	* Each function validates input via Zod then calls the tool's execute function.
+	*/
+	buildNamespace(client) {
+		const namespace = {};
+		for (const entry of this.entries.values()) namespace[entry.name] = async (input = {}) => {
+			const result = z.object(entry.inputSchema).safeParse(input);
+			if (!result.success) {
+				const issues = result.error.issues.map((i) => `${i.path.join(".")}: ${i.message}`).join(", ");
+				throw new Error(`Invalid input for ${entry.name}: ${issues}`);
+			}
+			try {
+				return await entry.execute(client, result.data);
+			} catch (error) {
+				const message = error instanceof Error ? error.message : String(error);
+				throw new Error(`${entry.name} failed: ${message}`, { cause: error });
+			}
+		};
+		return namespace;
+	}
+	/**
+	* Generate TypeScript declarations for all registered functions.
+	* Used in the execute_code tool description so the LLM knows available functions.
+	*/
+	generateAllDeclarations() {
+		return this.getAll().map((entry) => generateDeclaration(entry.name, entry.description, entry.inputSchema)).join("\n\n");
+	}
+	/**
+	* Generate a declaration for a single function
+	*/
+	generateDeclaration(name) {
+		const entry = this.entries.get(name);
+		if (!entry) return null;
+		return generateDeclaration(entry.name, entry.description, entry.inputSchema);
+	}
+	/**
+	* Search for functions matching a query.
+	* Scores: name match (10) > category match (5) > keyword match (3) > description match (1)
+	*/
+	search(query) {
+		if (!query.trim()) return this.listAll();
+		const terms = query.toLowerCase().split(/\s+/);
+		const scored = [];
+		for (const entry of this.entries.values()) {
+			let score = 0;
+			const nameLower = entry.name.toLowerCase();
+			const categoryLower = entry.category.toLowerCase();
+			const descLower = entry.description.toLowerCase();
+			const keywordsLower = entry.keywords.map((k) => k.toLowerCase());
+			for (const term of terms) {
+				if (nameLower.includes(term)) score += 10;
+				if (categoryLower.includes(term)) score += 5;
+				if (keywordsLower.some((k) => k.includes(term))) score += 3;
+				if (descLower.includes(term)) score += 1;
+			}
+			if (score > 0) scored.push({
+				entry,
+				score
+			});
+		}
+		scored.sort((a, b) => b.score - a.score);
+		return scored.map(({ entry }) => this.toSearchResult(entry));
+	}
+	/**
+	* List all functions (used when search query is empty)
+	*/
+	listAll() {
+		return Array.from(this.entries.values()).map((entry) => this.toSearchResult(entry));
+	}
+	toSearchResult(entry) {
+		return {
+			name: entry.name,
+			description: entry.description,
+			category: entry.category,
+			declaration: generateDeclaration(entry.name, entry.description, entry.inputSchema)
+		};
+	}
+};
+//#endregion
+//#region src/codemode/search.ts
+const searchToolsInputSchema = { query: z.string().optional().describe("Search query to find relevant functions. Leave empty to list all available functions.") };
+/**
+* Execute search_tools: find functions by keyword matching
+*/
+function executeSearchTools(registry, input) {
+	return { functions: input.query ? registry.search(input.query) : registry.listAll() };
+}
+//#endregion
+//#region src/tools/list-projects.ts
+/**
+* Input schema for list_projects tool
+*/
+const listProjectsInputSchema = {
+	organizationId: z.string().optional().describe("Filter by organization ID (optional)"),
+	limit: z.number().int().min(1).max(100).optional().describe("Maximum number of projects to return (default: 50)")
+};
+/**
+* Output schema for list_projects tool
+*/
+const listProjectsOutputSchema = {
+	projects: z.array(z.object({
+		id: z.string(),
+		name: z.string(),
+		description: z.string().nullable().optional(),
+		organization: z.object({
+			id: z.string(),
+			name: z.string(),
+			slug: z.string()
+		})
+	})),
+	total: z.number()
+};
+/**
+* Execute list_projects tool
+*/
+async function executeListProjects(client, input) {
+	const response = await client.listProjects({
+		organizationId: input.organizationId,
+		limit: input.limit
+	});
+	return {
+		projects: response.projects.map((p) => ({
+			id: p.id,
+			name: p.name,
+			description: p.description,
+			organization: p.organization
+		})),
+		total: response.pagination.total
+	};
+}
+/**
+* Tool metadata
+*/
+const listProjectsMetadata = {
+	name: "list_projects",
+	title: "List Projects",
+	description: `List all projects you have access to.
+Returns a list of projects with their IDs, names, and organization info.
+Use this to find project IDs for other tools like get_project_health.
+Requires a user API Key (gaf_). Get one from Account Settings in the Gaffer dashboard.`
+};
 //#endregion
 //#region src/index.ts
 /**
@@ -1653,48 +2218,29 @@ function logError(toolName, error) {
 */
 function handleToolError(toolName, error) {
 	logError(toolName, error);
+	const message = error instanceof Error ? error.message : "Unknown error";
+	const logs = Array.isArray(error?.logs) ? error.logs : void 0;
+	const durationMs = typeof error?.durationMs === "number" ? error.durationMs : void 0;
+	let text = `Error: ${message}`;
+	if (logs?.length) text += `\n\nCaptured logs:\n${logs.join("\n")}`;
+	if (durationMs !== void 0) text += `\n\nDuration: ${durationMs}ms`;
 	return {
 		content: [{
 			type: "text",
-			text: `Error: ${error instanceof Error ? error.message : "Unknown error"}`
+			text
 		}],
 		isError: true
 	};
 }
 /**
-* Register a tool with the MCP server using a consistent pattern.
-* Reduces boilerplate by handling error wrapping and response formatting.
-*/
-function registerTool(server, client, tool) {
-	server.registerTool(tool.metadata.name, {
-		title: tool.metadata.title,
-		description: tool.metadata.description,
-		inputSchema: tool.inputSchema,
-		outputSchema: tool.outputSchema
-	}, async (input) => {
-		try {
-			const output = await tool.execute(client, input);
-			return {
-				content: [{
-					type: "text",
-					text: JSON.stringify(output, null, 2)
-				}],
-				structuredContent: output
-			};
-		} catch (error) {
-			return handleToolError(tool.metadata.name, error);
-		}
-	});
-}
-/**
-* Gaffer MCP Server
+* Gaffer MCP Server — Code Mode
 *
-* Provides AI assistants with access to test history and health metrics.
+* Instead of individual tools, exposes 3 tools:
+* - execute_code: Run JavaScript that calls Gaffer API functions
+* - search_tools: Find available functions by keyword
+* - list_projects: List projects (user tokens only)
 *
-* Supports two authentication modes:
-* 1. User API Keys (gaf_) - Read-only access to all user's projects
-*    Set via GAFFER_API_KEY environment variable
-* 2. Project Upload Tokens (gfr_) - Legacy, single project access
+* This follows Cloudflare's "code mode" pattern for MCP servers.
 */
 async function main() {
 	if (!process.env.GAFFER_API_KEY) {
@@ -1711,169 +2257,153 @@ async function main() {
 		process.exit(1);
 	}
 	const client = GafferApiClient.fromEnv();
+	const registry = new FunctionRegistry();
+	registerAllTools(registry);
+	const namespace = registry.buildNamespace(client);
+	const declarations = registry.generateAllDeclarations();
 	const server = new McpServer({
 		name: "gaffer",
-		version: "0.1.0"
-	}, { instructions: `Gaffer provides test analytics and coverage data for your projects.
+		version: "0.7.0"
+	}, { instructions: `Gaffer provides test analytics and coverage data. This server uses **code mode** — instead of individual tools, write JavaScript that calls functions on the \`codemode\` namespace.
 ## Authentication
-${client.isUserToken() ? "You have access to multiple projects. Use `list_projects` to find project IDs, then pass `projectId` to all tools." : "Your token is scoped to a single project. Do NOT call `list_projects`. Do NOT pass `projectId` — it resolves automatically. All tools are available."}
-## Coverage Analysis Best Practices
-When helping users improve test coverage, combine coverage data with codebase exploration:
-1. **Understand code utilization first**: Before targeting files by coverage percentage, explore which code is critical:
-   - Find entry points (route definitions, event handlers, exported functions)
-   - Find heavily-imported files (files imported by many others are high-value targets)
-   - Identify critical business logic (auth, payments, data mutations)
-2. **Prioritize by impact**: Low coverage alone doesn't indicate priority. Consider:
-   - High utilization + low coverage = highest priority
-   - Large files with 0% coverage have bigger impact than small files
-   - Use find_uncovered_failure_areas for files with both low coverage AND test failures
-3. **Use path-based queries**: The get_untested_files tool may return many files of a certain type (e.g., UI components). For targeted analysis, use get_coverage_for_file with path prefixes to focus on specific areas of the codebase.
-4. **Iterate**: Get baseline → identify targets → write tests → re-check coverage after CI uploads new results.
+${client.isUserToken() ? "You have a user API key with access to multiple projects. Use `list_projects` to find project IDs, then pass `projectId` to all codemode functions." : "Your token is scoped to a single project. Do NOT pass `projectId` — it resolves automatically."}
-## Finding Invisible Files
+## How to Use
-Coverage tools can only report on files that were loaded during test execution. Some files have 0% coverage but don't appear in reports at all - these are "invisible" files that were never imported.
+1. Use \`search_tools\` to find relevant functions (or check the execute_code description for all declarations)
+2. Use \`execute_code\` to run JavaScript that calls one or more functions
+3. Results are returned as JSON — you can chain multiple calls in a single execution
-To find invisible files:
-1. Use get_coverage_for_file with a path prefix (e.g., "server/") to see what Gaffer tracks
-2. Use the local Glob tool to list all source files in that path
-3. Compare the lists - files in local but NOT in Gaffer are invisible
-4. These files need tests that actually import them
+## Example
-Example: If get_coverage_for_file("server/api") returns user.ts, auth.ts, but Glob finds user.ts, auth.ts, billing.ts - then billing.ts is invisible and needs tests that import it.
-## Agentic CI / Test Failure Diagnosis
-When helping diagnose CI failures or fix failing tests:
-1. **Check flakiness first**: Use get_flaky_tests to identify non-deterministic tests.
-   Skip flaky tests unless the user specifically wants to stabilize them.
-2. **Get failure details**: Use get_test_run_details with status='failed'
-   to see error messages and stack traces for failing tests.
-3. **Group by root cause**: Use get_failure_clusters to see which failures
-   share the same underlying error — fix the root cause, not individual tests.
-4. **Check history**: Use get_test_history to understand if the failure is new
-   (regression) or recurring (existing bug).
-5. **Verify fixes**: After code changes, use compare_test_metrics to confirm
-   the specific test now passes.
-6. **Prioritize by risk**: Use find_uncovered_failure_areas to identify
-   which failing code has the lowest test coverage — fix those first.
-## Checking Upload Status
+\`\`\`javascript
+// Get project health, then check flaky tests if any exist
+const health = await codemode.get_project_health({ projectId: "proj_abc" });
+if (health.flakyTestCount > 0) {
+  const flaky = await codemode.get_flaky_tests({ projectId: "proj_abc" });
+  return { health, flaky };
+}
+return { health };
+\`\`\`
+## Tips
+- Use \`return\` to send data back — the return value becomes the tool result
+- Use \`console.log()\` for debug output (captured and returned alongside results)
+- You can make up to 20 API calls per execution
+- All functions are async — use \`await\`` });
+	server.registerTool("execute_code", {
+		title: "Execute Code",
+		description: `Execute JavaScript code that calls Gaffer API functions via the \`codemode\` namespace.
+Write async JavaScript — all functions are available as \`codemode.<function_name>(input)\`.
+Use \`return\` to send results back. Use \`console.log()\` for debug output.
+## Available Functions
+\`\`\`typescript
+${declarations}
+\`\`\`
+## Examples
+\`\`\`javascript
+// Single call
+const health = await codemode.get_project_health({ projectId: "proj_abc" });
+return health;
+\`\`\`
+\`\`\`javascript
+// Multi-step: get flaky tests and check history for each
+const flaky = await codemode.get_flaky_tests({ projectId: "proj_abc", limit: 5 });
+const histories = [];
+for (const test of flaky.flakyTests) {
+  const history = await codemode.get_test_history({ projectId: "proj_abc", testName: test.name, limit: 5 });
+  histories.push({ test: test.name, score: test.flakinessScore, history: history.summary });
+}
+return { flaky: flaky.summary, details: histories };
+\`\`\`
+\`\`\`javascript
+// Coverage analysis
+const summary = await codemode.get_coverage_summary({ projectId: "proj_abc" });
+const lowFiles = await codemode.get_coverage_for_file({ projectId: "proj_abc", maxCoverage: 50, limit: 10 });
+return { summary, lowCoverageFiles: lowFiles };
+\`\`\`
+## Constraints
+- Max 20 API calls per execution
+- 30s timeout
+- No access to Node.js globals (process, require, etc.)`,
+		inputSchema: { code: z.string().describe("JavaScript code to execute. Use `codemode.<function>()` to call API functions. Use `return` for results.") }
+	}, async (input) => {
+		try {
+			const result = await executeCode(input.code, namespace);
+			const output = {};
+			if (result.result !== void 0) output.result = result.result;
+			if (result.logs.length > 0) output.logs = result.logs;
+			output.durationMs = result.durationMs;
+			let text;
+			try {
+				text = JSON.stringify(output, null, 2);
+			} catch {
+				text = JSON.stringify({
+					error: "Result could not be serialized to JSON (possible circular reference). Use console.log() to inspect the result, or return a simpler object.",
+					logs: result.logs.length > 0 ? result.logs : void 0,
+					durationMs: result.durationMs
+				});
+			}
+			return { content: [{
+				type: "text",
+				text
+			}] };
+		} catch (error) {
+			return handleToolError("execute_code", error);
+		}
+	});
+	server.registerTool("search_tools", {
+		title: "Search Tools",
+		description: `Search for available Gaffer API functions by keyword.
-When an agent needs to know if CI results are ready:
+Returns matching functions with their TypeScript declarations so you can use them with execute_code.
-1. Use get_upload_status with commitSha or branch to find upload sessions
-2. Check processingStatus: "completed" means results are ready, "processing" means wait
-3. Once completed, use the linked testRunIds to get test results` });
-	registerTool(server, client, {
-		metadata: getProjectHealthMetadata,
-		inputSchema: getProjectHealthInputSchema,
-		outputSchema: getProjectHealthOutputSchema,
-		execute: executeGetProjectHealth
-	});
-	registerTool(server, client, {
-		metadata: getTestHistoryMetadata,
-		inputSchema: getTestHistoryInputSchema,
-		outputSchema: getTestHistoryOutputSchema,
-		execute: executeGetTestHistory
-	});
-	registerTool(server, client, {
-		metadata: getFlakyTestsMetadata,
-		inputSchema: getFlakyTestsInputSchema,
-		outputSchema: getFlakyTestsOutputSchema,
-		execute: executeGetFlakyTests
-	});
-	registerTool(server, client, {
-		metadata: listTestRunsMetadata,
-		inputSchema: listTestRunsInputSchema,
-		outputSchema: listTestRunsOutputSchema,
-		execute: executeListTestRuns
+Examples:
+- "coverage" → coverage-related functions
+- "flaky" → flaky test detection
+- "" (empty) → list all available functions`,
+		inputSchema: searchToolsInputSchema
+	}, async (input) => {
+		try {
+			const result = executeSearchTools(registry, input);
+			return { content: [{
+				type: "text",
+				text: JSON.stringify(result, null, 2)
+			}] };
+		} catch (error) {
+			return handleToolError("search_tools", error);
+		}
 	});
-	if (client.isUserToken()) registerTool(server, client, {
-		metadata: listProjectsMetadata,
+	if (client.isUserToken()) server.registerTool(listProjectsMetadata.name, {
+		title: listProjectsMetadata.title,
+		description: listProjectsMetadata.description,
 		inputSchema: listProjectsInputSchema,
-		outputSchema: listProjectsOutputSchema,
-		execute: executeListProjects
-	});
-	registerTool(server, client, {
-		metadata: getReportMetadata,
-		inputSchema: getReportInputSchema,
-		outputSchema: getReportOutputSchema,
-		execute: executeGetReport
-	});
-	registerTool(server, client, {
-		metadata: getSlowestTestsMetadata,
-		inputSchema: getSlowestTestsInputSchema,
-		outputSchema: getSlowestTestsOutputSchema,
-		execute: executeGetSlowestTests
-	});
-	registerTool(server, client, {
-		metadata: getTestRunDetailsMetadata,
-		inputSchema: getTestRunDetailsInputSchema,
-		outputSchema: getTestRunDetailsOutputSchema,
-		execute: executeGetTestRunDetails
-	});
-	registerTool(server, client, {
-		metadata: getFailureClustersMetadata,
-		inputSchema: getFailureClustersInputSchema,
-		outputSchema: getFailureClustersOutputSchema,
-		execute: executeGetFailureClusters
-	});
-	registerTool(server, client, {
-		metadata: compareTestMetricsMetadata,
-		inputSchema: compareTestMetricsInputSchema,
-		outputSchema: compareTestMetricsOutputSchema,
-		execute: executeCompareTestMetrics
-	});
-	registerTool(server, client, {
-		metadata: getCoverageSummaryMetadata,
-		inputSchema: getCoverageSummaryInputSchema,
-		outputSchema: getCoverageSummaryOutputSchema,
-		execute: executeGetCoverageSummary
-	});
-	registerTool(server, client, {
-		metadata: getCoverageForFileMetadata,
-		inputSchema: getCoverageForFileInputSchema,
-		outputSchema: getCoverageForFileOutputSchema,
-		execute: executeGetCoverageForFile
-	});
-	registerTool(server, client, {
-		metadata: findUncoveredFailureAreasMetadata,
-		inputSchema: findUncoveredFailureAreasInputSchema,
-		outputSchema: findUncoveredFailureAreasOutputSchema,
-		execute: executeFindUncoveredFailureAreas
-	});
-	registerTool(server, client, {
-		metadata: getUntestedFilesMetadata,
-		inputSchema: getUntestedFilesInputSchema,
-		outputSchema: getUntestedFilesOutputSchema,
-		execute: executeGetUntestedFiles
-	});
-	registerTool(server, client, {
-		metadata: getReportBrowserUrlMetadata,
-		inputSchema: getReportBrowserUrlInputSchema,
-		outputSchema: getReportBrowserUrlOutputSchema,
-		execute: executeGetReportBrowserUrl
-	});
-	registerTool(server, client, {
-		metadata: getUploadStatusMetadata,
-		inputSchema: getUploadStatusInputSchema,
-		outputSchema: getUploadStatusOutputSchema,
-		execute: executeGetUploadStatus
+		outputSchema: listProjectsOutputSchema
+	}, async (input) => {
+		try {
+			const output = await executeListProjects(client, input);
+			return {
+				content: [{
+					type: "text",
+					text: JSON.stringify(output, null, 2)
+				}],
+				structuredContent: output
+			};
+		} catch (error) {
+			return handleToolError(listProjectsMetadata.name, error);
+		}
 	});
 	const transport = new StdioServerTransport();
 	await server.connect(transport);