@iris-eval/mcp-server 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (83) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +168 -0
  3. package/dist/config/defaults.d.ts +2 -0
  4. package/dist/config/defaults.js +40 -0
  5. package/dist/config/index.d.ts +11 -0
  6. package/dist/config/index.js +106 -0
  7. package/dist/dashboard/assets/index-BStyrSkE.js +127 -0
  8. package/dist/dashboard/assets/index-DsCtYyvh.css +1 -0
  9. package/dist/dashboard/index.html +13 -0
  10. package/dist/eval/engine.d.ts +8 -0
  11. package/dist/eval/engine.js +61 -0
  12. package/dist/eval/index.d.ts +2 -0
  13. package/dist/eval/index.js +2 -0
  14. package/dist/eval/rules/completeness.d.ts +6 -0
  15. package/dist/eval/rules/completeness.js +79 -0
  16. package/dist/eval/rules/cost.d.ts +4 -0
  17. package/dist/eval/rules/cost.js +44 -0
  18. package/dist/eval/rules/custom.d.ts +2 -0
  19. package/dist/eval/rules/custom.js +88 -0
  20. package/dist/eval/rules/index.d.ts +4 -0
  21. package/dist/eval/rules/index.js +15 -0
  22. package/dist/eval/rules/relevance.d.ts +5 -0
  23. package/dist/eval/rules/relevance.js +87 -0
  24. package/dist/eval/rules/safety.d.ts +5 -0
  25. package/dist/eval/rules/safety.js +81 -0
  26. package/dist/index.d.ts +2 -0
  27. package/dist/index.js +101 -0
  28. package/dist/middleware/auth.d.ts +3 -0
  29. package/dist/middleware/auth.js +24 -0
  30. package/dist/middleware/cors.d.ts +2 -0
  31. package/dist/middleware/cors.js +29 -0
  32. package/dist/middleware/error-handler.d.ts +3 -0
  33. package/dist/middleware/error-handler.js +19 -0
  34. package/dist/middleware/index.d.ts +4 -0
  35. package/dist/middleware/index.js +4 -0
  36. package/dist/middleware/rate-limit.d.ts +3 -0
  37. package/dist/middleware/rate-limit.js +19 -0
  38. package/dist/resources/dashboard-summary.d.ts +3 -0
  39. package/dist/resources/dashboard-summary.js +14 -0
  40. package/dist/resources/index.d.ts +3 -0
  41. package/dist/resources/index.js +6 -0
  42. package/dist/resources/trace-detail.d.ts +3 -0
  43. package/dist/resources/trace-detail.js +28 -0
  44. package/dist/server.d.ts +9 -0
  45. package/dist/server.js +14 -0
  46. package/dist/storage/index.d.ts +4 -0
  47. package/dist/storage/index.js +10 -0
  48. package/dist/storage/migrations/001-initial-schema.d.ts +3 -0
  49. package/dist/storage/migrations/001-initial-schema.js +57 -0
  50. package/dist/storage/migrations/index.d.ts +2 -0
  51. package/dist/storage/migrations/index.js +22 -0
  52. package/dist/storage/sqlite-adapter.d.ts +33 -0
  53. package/dist/storage/sqlite-adapter.js +232 -0
  54. package/dist/tools/evaluate-output.d.ts +4 -0
  55. package/dist/tools/evaluate-output.js +58 -0
  56. package/dist/tools/get-traces.d.ts +3 -0
  57. package/dist/tools/get-traces.js +53 -0
  58. package/dist/tools/index.d.ts +4 -0
  59. package/dist/tools/index.js +8 -0
  60. package/dist/tools/log-trace.d.ts +3 -0
  61. package/dist/tools/log-trace.js +80 -0
  62. package/dist/transport/http.d.ts +10 -0
  63. package/dist/transport/http.js +37 -0
  64. package/dist/transport/index.d.ts +3 -0
  65. package/dist/transport/index.js +2 -0
  66. package/dist/transport/stdio.d.ts +2 -0
  67. package/dist/transport/stdio.js +4 -0
  68. package/dist/types/config.d.ts +37 -0
  69. package/dist/types/config.js +1 -0
  70. package/dist/types/eval.d.ts +51 -0
  71. package/dist/types/eval.js +1 -0
  72. package/dist/types/index.d.ts +4 -0
  73. package/dist/types/index.js +1 -0
  74. package/dist/types/query.d.ts +64 -0
  75. package/dist/types/query.js +1 -0
  76. package/dist/types/trace.d.ts +47 -0
  77. package/dist/types/trace.js +1 -0
  78. package/dist/utils/ids.d.ts +3 -0
  79. package/dist/utils/ids.js +10 -0
  80. package/dist/utils/logger.d.ts +8 -0
  81. package/dist/utils/logger.js +14 -0
  82. package/package.json +77 -0
  83. package/server.json +69 -0
@@ -0,0 +1,53 @@
1
+ import { z } from 'zod';
2
+ const inputSchema = {
3
+ agent_name: z.string().optional().describe('Filter by agent name'),
4
+ framework: z.string().optional().describe('Filter by framework'),
5
+ since: z.string().optional().describe('ISO timestamp lower bound'),
6
+ until: z.string().optional().describe('ISO timestamp upper bound'),
7
+ min_score: z.number().optional().describe('Minimum eval score filter'),
8
+ max_score: z.number().optional().describe('Maximum eval score filter'),
9
+ limit: z.number().default(50).describe('Results per page'),
10
+ offset: z.number().default(0).describe('Pagination offset'),
11
+ sort_by: z.enum(['timestamp', 'latency_ms', 'cost_usd']).default('timestamp').describe('Sort field'),
12
+ sort_order: z.enum(['asc', 'desc']).default('desc').describe('Sort order'),
13
+ include_summary: z.boolean().default(false).describe('Include dashboard summary stats'),
14
+ };
15
+ export function registerGetTracesTool(server, storage) {
16
+ server.registerTool('get_traces', {
17
+ title: 'Get Traces',
18
+ description: 'Query stored traces with filters, pagination, and optional summary stats',
19
+ inputSchema,
20
+ }, async (args) => {
21
+ const result = await storage.queryTraces({
22
+ filter: {
23
+ agent_name: args.agent_name,
24
+ framework: args.framework,
25
+ since: args.since,
26
+ until: args.until,
27
+ min_score: args.min_score,
28
+ max_score: args.max_score,
29
+ },
30
+ limit: args.limit,
31
+ offset: args.offset,
32
+ sort_by: args.sort_by,
33
+ sort_order: args.sort_order,
34
+ });
35
+ const response = {
36
+ traces: result.traces,
37
+ total: result.total,
38
+ limit: result.limit,
39
+ offset: result.offset,
40
+ };
41
+ if (args.include_summary) {
42
+ response.summary = await storage.getDashboardSummary();
43
+ }
44
+ return {
45
+ content: [
46
+ {
47
+ type: 'text',
48
+ text: JSON.stringify(response),
49
+ },
50
+ ],
51
+ };
52
+ });
53
+ }
@@ -0,0 +1,4 @@
1
+ import type { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js';
2
+ import type { IStorageAdapter } from '../types/query.js';
3
+ import type { EvalEngine } from '../eval/engine.js';
4
+ export declare function registerAllTools(server: McpServer, storage: IStorageAdapter, evalEngine: EvalEngine): void;
@@ -0,0 +1,8 @@
1
+ import { registerLogTraceTool } from './log-trace.js';
2
+ import { registerEvaluateOutputTool } from './evaluate-output.js';
3
+ import { registerGetTracesTool } from './get-traces.js';
4
+ export function registerAllTools(server, storage, evalEngine) {
5
+ registerLogTraceTool(server, storage);
6
+ registerEvaluateOutputTool(server, storage, evalEngine);
7
+ registerGetTracesTool(server, storage);
8
+ }
@@ -0,0 +1,3 @@
1
+ import type { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js';
2
+ import type { IStorageAdapter } from '../types/query.js';
3
+ export declare function registerLogTraceTool(server: McpServer, storage: IStorageAdapter): void;
@@ -0,0 +1,80 @@
1
+ import { z } from 'zod';
2
+ import { generateTraceId, generateSpanId } from '../utils/ids.js';
3
+ const ToolCallSchema = z.object({
4
+ tool_name: z.string(),
5
+ input: z.unknown().optional(),
6
+ output: z.unknown().optional(),
7
+ latency_ms: z.number().optional(),
8
+ error: z.string().optional(),
9
+ });
10
+ const SpanSchema = z.object({
11
+ span_id: z.string().optional(),
12
+ parent_span_id: z.string().optional(),
13
+ name: z.string(),
14
+ kind: z.enum(['INTERNAL', 'SERVER', 'CLIENT', 'PRODUCER', 'CONSUMER', 'LLM', 'TOOL']).default('INTERNAL'),
15
+ status_code: z.enum(['UNSET', 'OK', 'ERROR']).default('UNSET'),
16
+ status_message: z.string().optional(),
17
+ start_time: z.string(),
18
+ end_time: z.string().optional(),
19
+ attributes: z.record(z.unknown()).optional(),
20
+ events: z.array(z.object({
21
+ name: z.string(),
22
+ timestamp: z.string(),
23
+ attributes: z.record(z.unknown()).optional(),
24
+ })).optional(),
25
+ });
26
+ const TokenUsageSchema = z.object({
27
+ prompt_tokens: z.number().optional(),
28
+ completion_tokens: z.number().optional(),
29
+ total_tokens: z.number().optional(),
30
+ });
31
+ const inputSchema = {
32
+ agent_name: z.string().describe('Name of the agent'),
33
+ framework: z.string().optional().describe('Agent framework name'),
34
+ input: z.string().optional().describe('Agent input text'),
35
+ output: z.string().optional().describe('Agent output text'),
36
+ tool_calls: z.array(ToolCallSchema).optional().describe('Tool calls made during execution'),
37
+ latency_ms: z.number().optional().describe('Total execution time in milliseconds'),
38
+ token_usage: TokenUsageSchema.optional().describe('Token usage breakdown'),
39
+ cost_usd: z.number().optional().describe('Total cost in USD'),
40
+ metadata: z.record(z.unknown()).optional().describe('Arbitrary metadata'),
41
+ spans: z.array(SpanSchema).optional().describe('Detailed execution spans'),
42
+ timestamp: z.string().optional().describe('Trace timestamp (ISO 8601)'),
43
+ };
44
+ export function registerLogTraceTool(server, storage) {
45
+ server.registerTool('log_trace', {
46
+ title: 'Log Trace',
47
+ description: 'Log an agent execution trace with spans, tool calls, and metrics',
48
+ inputSchema,
49
+ }, async (args) => {
50
+ const traceId = generateTraceId();
51
+ const timestamp = args.timestamp ?? new Date().toISOString();
52
+ const trace = {
53
+ trace_id: traceId,
54
+ agent_name: args.agent_name,
55
+ framework: args.framework,
56
+ input: args.input,
57
+ output: args.output,
58
+ tool_calls: args.tool_calls,
59
+ latency_ms: args.latency_ms,
60
+ token_usage: args.token_usage,
61
+ cost_usd: args.cost_usd,
62
+ metadata: args.metadata,
63
+ timestamp,
64
+ spans: args.spans?.map((s) => ({
65
+ ...s,
66
+ span_id: s.span_id ?? generateSpanId(),
67
+ trace_id: traceId,
68
+ })),
69
+ };
70
+ await storage.insertTrace(trace);
71
+ return {
72
+ content: [
73
+ {
74
+ type: 'text',
75
+ text: JSON.stringify({ trace_id: traceId, status: 'stored' }),
76
+ },
77
+ ],
78
+ };
79
+ });
80
+ }
@@ -0,0 +1,10 @@
1
+ import type { Server } from 'node:http';
2
+ import { StreamableHTTPServerTransport } from '@modelcontextprotocol/sdk/server/streamableHttp.js';
3
+ import type { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js';
4
+ import type { IrisConfig } from '../types/config.js';
5
+ import type { Logger } from '../utils/logger.js';
6
+ export interface HttpTransportResult {
7
+ transport: StreamableHTTPServerTransport;
8
+ httpServer: Server;
9
+ }
10
+ export declare function createHttpTransport(mcpServer: McpServer, config: IrisConfig, logger: Logger): Promise<HttpTransportResult>;
@@ -0,0 +1,37 @@
1
+ import express from 'express';
2
+ import helmet from 'helmet';
3
+ import { StreamableHTTPServerTransport } from '@modelcontextprotocol/sdk/server/streamableHttp.js';
4
+ import { createAuthMiddleware } from '../middleware/auth.js';
5
+ import { createErrorHandler } from '../middleware/error-handler.js';
6
+ import { createMcpRateLimiter } from '../middleware/rate-limit.js';
7
+ export async function createHttpTransport(mcpServer, config, logger) {
8
+ const app = express();
9
+ // Security headers (no CSP — API only, no HTML)
10
+ app.use(helmet({ contentSecurityPolicy: false }));
11
+ // Body parser with size limit
12
+ app.use(express.json({ limit: config.security.requestSizeLimit }));
13
+ // Health endpoint (no auth, no rate limit)
14
+ app.get('/health', (_req, res) => {
15
+ res.json({ status: 'ok', server: 'iris-eval', timestamp: new Date().toISOString() });
16
+ });
17
+ // Authentication
18
+ app.use(createAuthMiddleware(config));
19
+ const transport = new StreamableHTTPServerTransport({ sessionIdGenerator: () => crypto.randomUUID() });
20
+ // Rate limiter for MCP POST/DELETE (not GET — SSE streaming)
21
+ const mcpLimiter = createMcpRateLimiter(config);
22
+ app.post('/mcp', mcpLimiter, async (req, res) => {
23
+ await transport.handleRequest(req, res, req.body);
24
+ });
25
+ app.get('/mcp', async (req, res) => {
26
+ await transport.handleRequest(req, res);
27
+ });
28
+ app.delete('/mcp', mcpLimiter, async (req, res) => {
29
+ await transport.handleRequest(req, res);
30
+ });
31
+ // Error handler (must be last)
32
+ app.use(createErrorHandler(logger));
33
+ const httpServer = await new Promise((resolve) => {
34
+ const server = app.listen(config.transport.port, config.transport.host, () => resolve(server));
35
+ });
36
+ return { transport, httpServer };
37
+ }
@@ -0,0 +1,3 @@
1
+ export { createStdioTransport } from './stdio.js';
2
+ export { createHttpTransport } from './http.js';
3
+ export type { HttpTransportResult } from './http.js';
@@ -0,0 +1,2 @@
1
+ export { createStdioTransport } from './stdio.js';
2
+ export { createHttpTransport } from './http.js';
@@ -0,0 +1,2 @@
1
+ import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js';
2
+ export declare function createStdioTransport(): StdioServerTransport;
@@ -0,0 +1,4 @@
1
+ import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js';
2
+ export function createStdioTransport() {
3
+ return new StdioServerTransport();
4
+ }
@@ -0,0 +1,37 @@
1
+ export interface IrisConfig {
2
+ storage: {
3
+ type: 'sqlite';
4
+ path: string;
5
+ };
6
+ server: {
7
+ name: string;
8
+ version: string;
9
+ };
10
+ transport: {
11
+ type: 'stdio' | 'http';
12
+ port: number;
13
+ host: string;
14
+ };
15
+ dashboard: {
16
+ enabled: boolean;
17
+ port: number;
18
+ };
19
+ eval: {
20
+ defaultThreshold: number;
21
+ };
22
+ logging: {
23
+ level: 'debug' | 'info' | 'warn' | 'error';
24
+ };
25
+ retention: {
26
+ days: number;
27
+ };
28
+ security: {
29
+ apiKey?: string;
30
+ allowedOrigins: string[];
31
+ rateLimit: {
32
+ api: number;
33
+ mcp: number;
34
+ };
35
+ requestSizeLimit: string;
36
+ };
37
+ }
@@ -0,0 +1 @@
1
+ export {};
@@ -0,0 +1,51 @@
1
+ export type EvalType = 'completeness' | 'relevance' | 'safety' | 'cost' | 'custom';
2
+ export interface EvalRule {
3
+ name: string;
4
+ description: string;
5
+ evalType: EvalType;
6
+ weight: number;
7
+ evaluate(context: EvalContext): EvalRuleResult;
8
+ }
9
+ export interface EvalContext {
10
+ output: string;
11
+ expected?: string;
12
+ input?: string;
13
+ toolCalls?: Array<{
14
+ tool_name: string;
15
+ input?: unknown;
16
+ output?: unknown;
17
+ }>;
18
+ tokenUsage?: {
19
+ prompt_tokens?: number;
20
+ completion_tokens?: number;
21
+ total_tokens?: number;
22
+ };
23
+ costUsd?: number;
24
+ metadata?: Record<string, unknown>;
25
+ customConfig?: Record<string, unknown>;
26
+ }
27
+ export interface EvalRuleResult {
28
+ ruleName: string;
29
+ passed: boolean;
30
+ score: number;
31
+ message: string;
32
+ }
33
+ export interface EvalResult {
34
+ id: string;
35
+ trace_id?: string;
36
+ eval_type: EvalType;
37
+ output_text: string;
38
+ expected_text?: string;
39
+ score: number;
40
+ passed: boolean;
41
+ rule_results: EvalRuleResult[];
42
+ suggestions: string[];
43
+ created_at?: string;
44
+ }
45
+ export type CustomRuleType = 'regex_match' | 'regex_no_match' | 'min_length' | 'max_length' | 'contains_keywords' | 'excludes_keywords' | 'json_schema' | 'cost_threshold';
46
+ export interface CustomRuleDefinition {
47
+ name: string;
48
+ type: CustomRuleType;
49
+ config: Record<string, unknown>;
50
+ weight?: number;
51
+ }
@@ -0,0 +1 @@
1
+ export {};
@@ -0,0 +1,4 @@
1
+ export type { SpanKind, SpanStatus, SpanEvent, ToolCallRecord, TokenUsage, Span, Trace, } from './trace.js';
2
+ export type { EvalType, EvalRule, EvalContext, EvalRuleResult, EvalResult, CustomRuleType, CustomRuleDefinition, } from './eval.js';
3
+ export type { TraceFilter, TraceQueryOptions, TraceQueryResult, DashboardSummary, IStorageAdapter, } from './query.js';
4
+ export type { IrisConfig } from './config.js';
@@ -0,0 +1 @@
1
+ export {};
@@ -0,0 +1,64 @@
1
+ import type { Trace, Span } from './trace.js';
2
+ import type { EvalResult } from './eval.js';
3
+ export interface TraceFilter {
4
+ agent_name?: string;
5
+ framework?: string;
6
+ since?: string;
7
+ until?: string;
8
+ min_score?: number;
9
+ max_score?: number;
10
+ has_errors?: boolean;
11
+ }
12
+ export interface TraceQueryOptions {
13
+ filter?: TraceFilter;
14
+ limit?: number;
15
+ offset?: number;
16
+ sort_by?: 'timestamp' | 'latency_ms' | 'cost_usd';
17
+ sort_order?: 'asc' | 'desc';
18
+ }
19
+ export interface TraceQueryResult {
20
+ traces: Trace[];
21
+ total: number;
22
+ limit: number;
23
+ offset: number;
24
+ }
25
+ export interface DashboardSummary {
26
+ total_traces: number;
27
+ avg_latency_ms: number;
28
+ total_cost_usd: number;
29
+ error_rate: number;
30
+ eval_pass_rate: number;
31
+ traces_per_hour: Array<{
32
+ hour: string;
33
+ count: number;
34
+ }>;
35
+ top_agents: Array<{
36
+ agent_name: string;
37
+ count: number;
38
+ }>;
39
+ }
40
+ export interface IStorageAdapter {
41
+ initialize(): Promise<void>;
42
+ close(): Promise<void>;
43
+ insertTrace(trace: Trace): Promise<void>;
44
+ getTrace(traceId: string): Promise<Trace | null>;
45
+ queryTraces(options: TraceQueryOptions): Promise<TraceQueryResult>;
46
+ insertSpan(span: Span): Promise<void>;
47
+ getSpansByTraceId(traceId: string): Promise<Span[]>;
48
+ insertEvalResult(result: EvalResult): Promise<void>;
49
+ getEvalsByTraceId(traceId: string): Promise<EvalResult[]>;
50
+ queryEvalResults(options: {
51
+ eval_type?: string;
52
+ passed?: boolean;
53
+ since?: string;
54
+ until?: string;
55
+ limit?: number;
56
+ offset?: number;
57
+ }): Promise<{
58
+ results: EvalResult[];
59
+ total: number;
60
+ }>;
61
+ getDashboardSummary(sinceHours?: number): Promise<DashboardSummary>;
62
+ deleteTracesOlderThan(days: number): Promise<number>;
63
+ getDistinctValues(column: string): Promise<string[]>;
64
+ }
@@ -0,0 +1 @@
1
+ export {};
@@ -0,0 +1,47 @@
1
+ export type SpanKind = 'INTERNAL' | 'SERVER' | 'CLIENT' | 'PRODUCER' | 'CONSUMER' | 'LLM' | 'TOOL';
2
+ export type SpanStatus = 'UNSET' | 'OK' | 'ERROR';
3
+ export interface SpanEvent {
4
+ name: string;
5
+ timestamp: string;
6
+ attributes?: Record<string, unknown>;
7
+ }
8
+ export interface ToolCallRecord {
9
+ tool_name: string;
10
+ input?: unknown;
11
+ output?: unknown;
12
+ latency_ms?: number;
13
+ error?: string;
14
+ }
15
+ export interface TokenUsage {
16
+ prompt_tokens?: number;
17
+ completion_tokens?: number;
18
+ total_tokens?: number;
19
+ }
20
+ export interface Span {
21
+ span_id: string;
22
+ trace_id: string;
23
+ parent_span_id?: string;
24
+ name: string;
25
+ kind: SpanKind;
26
+ status_code: SpanStatus;
27
+ status_message?: string;
28
+ start_time: string;
29
+ end_time?: string;
30
+ attributes?: Record<string, unknown>;
31
+ events?: SpanEvent[];
32
+ }
33
+ export interface Trace {
34
+ trace_id: string;
35
+ agent_name: string;
36
+ framework?: string;
37
+ input?: string;
38
+ output?: string;
39
+ tool_calls?: ToolCallRecord[];
40
+ latency_ms?: number;
41
+ token_usage?: TokenUsage;
42
+ cost_usd?: number;
43
+ metadata?: Record<string, unknown>;
44
+ timestamp: string;
45
+ created_at?: string;
46
+ spans?: Span[];
47
+ }
@@ -0,0 +1 @@
1
+ export {};
@@ -0,0 +1,3 @@
1
+ export declare function generateTraceId(): string;
2
+ export declare function generateSpanId(): string;
3
+ export declare function generateEvalId(): string;
@@ -0,0 +1,10 @@
1
+ import { randomBytes, randomUUID } from 'node:crypto';
2
+ export function generateTraceId() {
3
+ return randomBytes(16).toString('hex');
4
+ }
5
+ export function generateSpanId() {
6
+ return randomBytes(8).toString('hex');
7
+ }
8
+ export function generateEvalId() {
9
+ return randomUUID();
10
+ }
@@ -0,0 +1,8 @@
1
+ import type { IrisConfig } from '../types/index.js';
2
+ export interface Logger {
3
+ debug(message: string, ...args: unknown[]): void;
4
+ info(message: string, ...args: unknown[]): void;
5
+ warn(message: string, ...args: unknown[]): void;
6
+ error(message: string, ...args: unknown[]): void;
7
+ }
8
+ export declare function createLogger(config: Pick<IrisConfig, 'logging'>): Logger;
@@ -0,0 +1,14 @@
1
+ import pino from 'pino';
2
+ export function createLogger(config) {
3
+ const logger = pino({
4
+ level: config.logging.level,
5
+ // Write to stderr — stdout is reserved for stdio MCP transport
6
+ transport: undefined,
7
+ }, pino.destination(2));
8
+ return {
9
+ debug: (msg, ...args) => logger.debug(args.length ? { data: args } : {}, msg),
10
+ info: (msg, ...args) => logger.info(args.length ? { data: args } : {}, msg),
11
+ warn: (msg, ...args) => logger.warn(args.length ? { data: args } : {}, msg),
12
+ error: (msg, ...args) => logger.error(args.length ? { data: args } : {}, msg),
13
+ };
14
+ }
package/package.json ADDED
@@ -0,0 +1,77 @@
1
+ {
2
+ "name": "@iris-eval/mcp-server",
3
+ "version": "0.1.0",
4
+ "description": "MCP-native agent evaluation and observability server",
5
+ "type": "module",
6
+ "main": "dist/index.js",
7
+ "types": "dist/index.d.ts",
8
+ "bin": {
9
+ "iris-mcp": "dist/index.js"
10
+ },
11
+ "scripts": {
12
+ "build": "tsc -p tsconfig.build.json",
13
+ "dev": "tsx src/index.ts",
14
+ "start": "node dist/index.js",
15
+ "lint": "eslint src/ tests/",
16
+ "format": "prettier --write .",
17
+ "format:check": "prettier --check .",
18
+ "typecheck": "tsc --noEmit",
19
+ "test": "vitest run",
20
+ "test:watch": "vitest",
21
+ "test:coverage": "vitest run --coverage",
22
+ "test:integration": "vitest run tests/integration/",
23
+ "clean": "rm -rf dist coverage",
24
+ "seed:demo": "tsx scripts/seed-demo-data.ts",
25
+ "demo": "tsx scripts/demo.ts"
26
+ },
27
+ "keywords": [
28
+ "mcp",
29
+ "agent",
30
+ "evaluation",
31
+ "observability",
32
+ "tracing",
33
+ "llm"
34
+ ],
35
+ "author": "",
36
+ "license": "MIT",
37
+ "repository": {
38
+ "type": "git",
39
+ "url": "git+https://github.com/iris-eval/mcp-server.git"
40
+ },
41
+ "homepage": "https://github.com/iris-eval/mcp-server#readme",
42
+ "bugs": {
43
+ "url": "https://github.com/iris-eval/mcp-server/issues"
44
+ },
45
+ "files": [
46
+ "dist",
47
+ "LICENSE",
48
+ "README.md",
49
+ "server.json"
50
+ ],
51
+ "engines": {
52
+ "node": ">=18.0.0"
53
+ },
54
+ "dependencies": {
55
+ "@modelcontextprotocol/sdk": "^1.27.0",
56
+ "better-sqlite3": "^11.0.0",
57
+ "express": "^5.1.0",
58
+ "express-rate-limit": "^8.3.1",
59
+ "helmet": "^8.1.0",
60
+ "pino": "^10.3.1",
61
+ "safe-regex2": "^5.1.0",
62
+ "zod": "^3.25.0"
63
+ },
64
+ "devDependencies": {
65
+ "@types/better-sqlite3": "^7.6.0",
66
+ "@types/express": "^5.0.0",
67
+ "@types/node": "^22.0.0",
68
+ "@typescript-eslint/eslint-plugin": "^8.57.0",
69
+ "@typescript-eslint/parser": "^8.57.0",
70
+ "@vitest/coverage-v8": "^3.0.0",
71
+ "eslint": "^9.0.0",
72
+ "prettier": "^3.0.0",
73
+ "tsx": "^4.0.0",
74
+ "typescript": "^5.7.0",
75
+ "vitest": "^3.0.0"
76
+ }
77
+ }
package/server.json ADDED
@@ -0,0 +1,69 @@
1
+ {
2
+ "name": "iris-eval",
3
+ "version": "0.1.0",
4
+ "description": "MCP-native agent evaluation and observability server",
5
+ "homepage": "https://github.com/iris-eval/mcp-server",
6
+ "packages": {
7
+ "npm": {
8
+ "name": "@iris-eval/mcp-server",
9
+ "registry": "https://registry.npmjs.org"
10
+ },
11
+ "oci": {
12
+ "name": "ghcr.io/iris-eval/mcp-server",
13
+ "registry": "ghcr.io"
14
+ }
15
+ },
16
+ "tools": [
17
+ {
18
+ "name": "log_trace",
19
+ "description": "Log an agent execution trace with spans, tool calls, and metrics"
20
+ },
21
+ {
22
+ "name": "evaluate_output",
23
+ "description": "Evaluate agent output quality using configurable rules"
24
+ },
25
+ {
26
+ "name": "get_traces",
27
+ "description": "Query stored traces with filters, pagination, and summary stats"
28
+ }
29
+ ],
30
+ "resources": [
31
+ {
32
+ "uri": "iris://dashboard/summary",
33
+ "description": "Dashboard summary with key metrics and trends"
34
+ },
35
+ {
36
+ "uri_template": "iris://traces/{trace_id}",
37
+ "description": "Full trace detail with spans and evaluation results"
38
+ }
39
+ ],
40
+ "env": {
41
+ "IRIS_TRANSPORT": {
42
+ "description": "Transport type: stdio or http",
43
+ "default": "stdio"
44
+ },
45
+ "IRIS_PORT": {
46
+ "description": "HTTP transport port",
47
+ "default": "3000"
48
+ },
49
+ "IRIS_DB_PATH": {
50
+ "description": "SQLite database path",
51
+ "default": "~/.iris/iris.db"
52
+ },
53
+ "IRIS_LOG_LEVEL": {
54
+ "description": "Log level: debug, info, warn, error",
55
+ "default": "info"
56
+ },
57
+ "IRIS_DASHBOARD": {
58
+ "description": "Enable web dashboard",
59
+ "default": "false"
60
+ },
61
+ "IRIS_API_KEY": {
62
+ "description": "API key for HTTP authentication (optional, recommended for production)"
63
+ },
64
+ "IRIS_ALLOWED_ORIGINS": {
65
+ "description": "Comma-separated list of allowed CORS origins",
66
+ "default": "http://localhost:*"
67
+ }
68
+ }
69
+ }