@iris-eval/mcp-server 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (83) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +168 -0
  3. package/dist/config/defaults.d.ts +2 -0
  4. package/dist/config/defaults.js +40 -0
  5. package/dist/config/index.d.ts +11 -0
  6. package/dist/config/index.js +106 -0
  7. package/dist/dashboard/assets/index-BStyrSkE.js +127 -0
  8. package/dist/dashboard/assets/index-DsCtYyvh.css +1 -0
  9. package/dist/dashboard/index.html +13 -0
  10. package/dist/eval/engine.d.ts +8 -0
  11. package/dist/eval/engine.js +61 -0
  12. package/dist/eval/index.d.ts +2 -0
  13. package/dist/eval/index.js +2 -0
  14. package/dist/eval/rules/completeness.d.ts +6 -0
  15. package/dist/eval/rules/completeness.js +79 -0
  16. package/dist/eval/rules/cost.d.ts +4 -0
  17. package/dist/eval/rules/cost.js +44 -0
  18. package/dist/eval/rules/custom.d.ts +2 -0
  19. package/dist/eval/rules/custom.js +88 -0
  20. package/dist/eval/rules/index.d.ts +4 -0
  21. package/dist/eval/rules/index.js +15 -0
  22. package/dist/eval/rules/relevance.d.ts +5 -0
  23. package/dist/eval/rules/relevance.js +87 -0
  24. package/dist/eval/rules/safety.d.ts +5 -0
  25. package/dist/eval/rules/safety.js +81 -0
  26. package/dist/index.d.ts +2 -0
  27. package/dist/index.js +101 -0
  28. package/dist/middleware/auth.d.ts +3 -0
  29. package/dist/middleware/auth.js +24 -0
  30. package/dist/middleware/cors.d.ts +2 -0
  31. package/dist/middleware/cors.js +29 -0
  32. package/dist/middleware/error-handler.d.ts +3 -0
  33. package/dist/middleware/error-handler.js +19 -0
  34. package/dist/middleware/index.d.ts +4 -0
  35. package/dist/middleware/index.js +4 -0
  36. package/dist/middleware/rate-limit.d.ts +3 -0
  37. package/dist/middleware/rate-limit.js +19 -0
  38. package/dist/resources/dashboard-summary.d.ts +3 -0
  39. package/dist/resources/dashboard-summary.js +14 -0
  40. package/dist/resources/index.d.ts +3 -0
  41. package/dist/resources/index.js +6 -0
  42. package/dist/resources/trace-detail.d.ts +3 -0
  43. package/dist/resources/trace-detail.js +28 -0
  44. package/dist/server.d.ts +9 -0
  45. package/dist/server.js +14 -0
  46. package/dist/storage/index.d.ts +4 -0
  47. package/dist/storage/index.js +10 -0
  48. package/dist/storage/migrations/001-initial-schema.d.ts +3 -0
  49. package/dist/storage/migrations/001-initial-schema.js +57 -0
  50. package/dist/storage/migrations/index.d.ts +2 -0
  51. package/dist/storage/migrations/index.js +22 -0
  52. package/dist/storage/sqlite-adapter.d.ts +33 -0
  53. package/dist/storage/sqlite-adapter.js +232 -0
  54. package/dist/tools/evaluate-output.d.ts +4 -0
  55. package/dist/tools/evaluate-output.js +58 -0
  56. package/dist/tools/get-traces.d.ts +3 -0
  57. package/dist/tools/get-traces.js +53 -0
  58. package/dist/tools/index.d.ts +4 -0
  59. package/dist/tools/index.js +8 -0
  60. package/dist/tools/log-trace.d.ts +3 -0
  61. package/dist/tools/log-trace.js +80 -0
  62. package/dist/transport/http.d.ts +10 -0
  63. package/dist/transport/http.js +37 -0
  64. package/dist/transport/index.d.ts +3 -0
  65. package/dist/transport/index.js +2 -0
  66. package/dist/transport/stdio.d.ts +2 -0
  67. package/dist/transport/stdio.js +4 -0
  68. package/dist/types/config.d.ts +37 -0
  69. package/dist/types/config.js +1 -0
  70. package/dist/types/eval.d.ts +51 -0
  71. package/dist/types/eval.js +1 -0
  72. package/dist/types/index.d.ts +4 -0
  73. package/dist/types/index.js +1 -0
  74. package/dist/types/query.d.ts +64 -0
  75. package/dist/types/query.js +1 -0
  76. package/dist/types/trace.d.ts +47 -0
  77. package/dist/types/trace.js +1 -0
  78. package/dist/utils/ids.d.ts +3 -0
  79. package/dist/utils/ids.js +10 -0
  80. package/dist/utils/logger.d.ts +8 -0
  81. package/dist/utils/logger.js +14 -0
  82. package/package.json +77 -0
  83. package/server.json +69 -0
package/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Iris Eval Contributors
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
package/README.md ADDED
@@ -0,0 +1,168 @@
1
+ # Iris — MCP-Native Agent Eval & Observability
2
+
3
+ [![npm version](https://img.shields.io/npm/v/@iris-eval/mcp-server)](https://npmjs.com/package/@iris-eval/mcp-server)
4
+ [![CI](https://github.com/iris-eval/mcp-server/actions/workflows/ci.yml/badge.svg)](https://github.com/iris-eval/mcp-server/actions/workflows/ci.yml)
5
+ [![License: MIT](https://img.shields.io/badge/License-MIT-blue.svg)](LICENSE)
6
+ [![Node.js](https://img.shields.io/badge/node-%3E%3D18-brightgreen)](https://nodejs.org)
7
+
8
+ Iris is an open-source [Model Context Protocol](https://modelcontextprotocol.io/) (MCP) server that provides trace logging, quality evaluation, and drift detection for AI agents. Any MCP-compatible agent framework can discover and invoke Iris tools.
9
+
10
+ <!-- TODO: Replace with actual dashboard screenshot after running seed:demo -->
11
+ <!-- ![Iris Dashboard](docs/assets/dashboard-screenshot.png) -->
12
+
13
+ ## Quickstart
14
+
15
+ ```bash
16
+ npm install -g @iris-eval/mcp-server
17
+ iris-mcp
18
+ ```
19
+
20
+ Or run directly:
21
+
22
+ ```bash
23
+ npx @iris-eval/mcp-server
24
+ ```
25
+
26
+ ### Docker
27
+
28
+ ```bash
29
+ docker run -p 3000:3000 -v iris-data:/data ghcr.io/iris-eval/mcp-server
30
+ ```
31
+
32
+ ## Configuration
33
+
34
+ Iris looks for config in this order (later overrides earlier):
35
+
36
+ 1. Built-in defaults
37
+ 2. `~/.iris/config.json`
38
+ 3. Environment variables (`IRIS_*`)
39
+ 4. CLI arguments
40
+
41
+ ### CLI Arguments
42
+
43
+ | Flag | Default | Description |
44
+ |------|---------|-------------|
45
+ | `--transport` | `stdio` | Transport type: `stdio` or `http` |
46
+ | `--port` | `3000` | HTTP transport port |
47
+ | `--db-path` | `~/.iris/iris.db` | SQLite database path |
48
+ | `--config` | `~/.iris/config.json` | Config file path |
49
+ | `--api-key` | — | API key for HTTP authentication |
50
+ | `--dashboard` | `false` | Enable web dashboard |
51
+ | `--dashboard-port` | `6920` | Dashboard port |
52
+
53
+ ### Environment Variables
54
+
55
+ | Variable | Description |
56
+ |----------|-------------|
57
+ | `IRIS_TRANSPORT` | Transport type |
58
+ | `IRIS_PORT` | HTTP port |
59
+ | `IRIS_DB_PATH` | Database path |
60
+ | `IRIS_LOG_LEVEL` | Log level: debug, info, warn, error |
61
+ | `IRIS_DASHBOARD` | Enable dashboard (true/false) |
62
+ | `IRIS_API_KEY` | API key for HTTP authentication |
63
+ | `IRIS_ALLOWED_ORIGINS` | Comma-separated allowed CORS origins |
64
+
65
+ ## Security
66
+
67
+ When using the HTTP transport, Iris includes production-grade security:
68
+
69
+ - **Authentication** — Set `IRIS_API_KEY` or `--api-key` to require `Authorization: Bearer <key>` on all endpoints (except `/health`). Recommended for any network-exposed deployment.
70
+ - **CORS** — Restricted to `http://localhost:*` by default. Configure with `IRIS_ALLOWED_ORIGINS`.
71
+ - **Rate limiting** — 100 requests/minute for dashboard API, 20 requests/minute for MCP endpoints. Configurable via `~/.iris/config.json`.
72
+ - **Security headers** — Helmet middleware applies CSP, X-Frame-Options, X-Content-Type-Options, and other standard headers.
73
+ - **Input validation** — All query parameters validated with Zod schemas. Malformed requests return 400.
74
+ - **Request size limits** — Body payloads limited to 1MB by default.
75
+ - **Safe regex** — User-supplied regex patterns in custom eval rules are validated against ReDoS attacks.
76
+ - **Structured logging** — JSON logs to stderr via pino. Never writes to stdout (reserved for stdio transport).
77
+
78
+ ```bash
79
+ # Production deployment example
80
+ iris-mcp --transport http --port 3000 --api-key "$(openssl rand -hex 32)" --dashboard
81
+ ```
82
+
83
+ ## MCP Tools
84
+
85
+ ### `log_trace`
86
+
87
+ Log an agent execution trace with spans, tool calls, and metrics.
88
+
89
+ **Input:**
90
+ - `agent_name` (required) — Name of the agent
91
+ - `input` — Agent input text
92
+ - `output` — Agent output text
93
+ - `tool_calls` — Array of tool call records
94
+ - `latency_ms` — Execution time in milliseconds
95
+ - `token_usage` — `{ prompt_tokens, completion_tokens, total_tokens }`
96
+ - `cost_usd` — Total cost in USD
97
+ - `metadata` — Arbitrary key-value metadata
98
+ - `spans` — Array of span objects for detailed tracing
99
+
100
+ ### `evaluate_output`
101
+
102
+ Evaluate agent output quality using configurable rules.
103
+
104
+ **Input:**
105
+ - `output` (required) — The text to evaluate
106
+ - `eval_type` — Type: `completeness`, `relevance`, `safety`, `cost`, `custom`
107
+ - `expected` — Expected output for comparison
108
+ - `trace_id` — Link evaluation to a trace
109
+ - `custom_rules` — Array of custom rule definitions
110
+
111
+ ### `get_traces`
112
+
113
+ Query stored traces with filters and pagination.
114
+
115
+ **Input:**
116
+ - `agent_name` — Filter by agent name
117
+ - `framework` — Filter by framework
118
+ - `since` — ISO timestamp lower bound
119
+ - `until` — ISO timestamp upper bound
120
+ - `min_score` / `max_score` — Score range filter
121
+ - `limit` — Results per page (default 50)
122
+ - `offset` — Pagination offset
123
+
124
+ ## MCP Resources
125
+
126
+ - `iris://dashboard/summary` — Dashboard summary statistics
127
+ - `iris://traces/{trace_id}` — Full trace detail with spans and evals
128
+
129
+ ## Claude Desktop
130
+
131
+ Add Iris to your Claude Desktop MCP config:
132
+
133
+ ```json
134
+ {
135
+ "mcpServers": {
136
+ "iris-eval": {
137
+ "command": "npx",
138
+ "args": ["@iris-eval/mcp-server"]
139
+ }
140
+ }
141
+ }
142
+ ```
143
+
144
+ Then ask Claude to "log a trace" or "evaluate this output" — Iris tools are automatically available.
145
+
146
+ See [examples/claude-desktop/](examples/claude-desktop/) for more configuration options.
147
+
148
+ ## Web Dashboard
149
+
150
+ Start with `--dashboard` flag to enable the web UI at `http://localhost:6920`.
151
+
152
+ ## Examples
153
+
154
+ - [Claude Desktop setup](examples/claude-desktop/) — MCP config for stdio and HTTP modes
155
+ - [TypeScript](examples/typescript/basic-usage.ts) — MCP SDK client usage
156
+ - [LangChain](examples/langchain/observe-agent.py) — Agent instrumentation
157
+ - [CrewAI](examples/crewai/observe-crew.py) — Crew observability
158
+
159
+ ## Community
160
+
161
+ - [GitHub Issues](https://github.com/iris-eval/mcp-server/issues) — Bug reports and feature requests
162
+ - [GitHub Discussions](https://github.com/iris-eval/mcp-server/discussions) — Questions and ideas
163
+ - [Contributing Guide](CONTRIBUTING.md) — How to contribute
164
+ - [Roadmap](docs/roadmap.md) — What's coming next
165
+
166
+ ## License
167
+
168
+ MIT
@@ -0,0 +1,2 @@
1
+ import type { IrisConfig } from '../types/index.js';
2
+ export declare const defaultConfig: IrisConfig;
@@ -0,0 +1,40 @@
1
+ import { join } from 'node:path';
2
+ import { homedir } from 'node:os';
3
+ const irisHome = join(homedir(), '.iris');
4
+ export const defaultConfig = {
5
+ storage: {
6
+ type: 'sqlite',
7
+ path: join(irisHome, 'iris.db'),
8
+ },
9
+ server: {
10
+ name: 'iris-eval',
11
+ version: '0.1.0',
12
+ },
13
+ transport: {
14
+ type: 'stdio',
15
+ port: 3000,
16
+ host: '0.0.0.0',
17
+ },
18
+ dashboard: {
19
+ enabled: false,
20
+ port: 6920,
21
+ },
22
+ eval: {
23
+ defaultThreshold: 0.7,
24
+ },
25
+ logging: {
26
+ level: 'info',
27
+ },
28
+ retention: {
29
+ days: 30,
30
+ },
31
+ security: {
32
+ apiKey: undefined,
33
+ allowedOrigins: ['http://localhost:*'],
34
+ rateLimit: {
35
+ api: 100,
36
+ mcp: 20,
37
+ },
38
+ requestSizeLimit: '1mb',
39
+ },
40
+ };
@@ -0,0 +1,11 @@
1
+ import type { IrisConfig } from '../types/index.js';
2
+ export interface CliArgs {
3
+ transport?: string;
4
+ port?: number;
5
+ config?: string;
6
+ dbPath?: string;
7
+ dashboard?: boolean;
8
+ dashboardPort?: number;
9
+ apiKey?: string;
10
+ }
11
+ export declare function loadConfig(cliArgs?: CliArgs): IrisConfig;
@@ -0,0 +1,106 @@
1
+ import { readFileSync, mkdirSync, existsSync } from 'node:fs';
2
+ import { join, dirname } from 'node:path';
3
+ import { homedir } from 'node:os';
4
+ import { defaultConfig } from './defaults.js';
5
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
6
+ function deepMerge(target, source) {
7
+ const result = { ...target };
8
+ for (const key of Object.keys(source)) {
9
+ const sourceVal = source[key];
10
+ if (sourceVal !== undefined &&
11
+ typeof sourceVal === 'object' &&
12
+ sourceVal !== null &&
13
+ !Array.isArray(sourceVal) &&
14
+ typeof result[key] === 'object' &&
15
+ result[key] !== null) {
16
+ result[key] = deepMerge(result[key], sourceVal);
17
+ }
18
+ else if (sourceVal !== undefined) {
19
+ result[key] = sourceVal;
20
+ }
21
+ }
22
+ return result;
23
+ }
24
+ function loadConfigFile(path) {
25
+ try {
26
+ const content = readFileSync(path, 'utf-8');
27
+ return JSON.parse(content);
28
+ }
29
+ catch {
30
+ return {};
31
+ }
32
+ }
33
+ function loadEnvVars() {
34
+ const config = {};
35
+ if (process.env.IRIS_TRANSPORT) {
36
+ config.transport = { type: process.env.IRIS_TRANSPORT };
37
+ }
38
+ if (process.env.IRIS_PORT) {
39
+ config.transport = { ...config.transport, port: parseInt(process.env.IRIS_PORT) };
40
+ }
41
+ if (process.env.IRIS_DB_PATH) {
42
+ config.storage = { type: 'sqlite', path: process.env.IRIS_DB_PATH };
43
+ }
44
+ if (process.env.IRIS_LOG_LEVEL) {
45
+ config.logging = { level: process.env.IRIS_LOG_LEVEL };
46
+ }
47
+ if (process.env.IRIS_DASHBOARD) {
48
+ config.dashboard = { enabled: process.env.IRIS_DASHBOARD === 'true' };
49
+ }
50
+ if (process.env.IRIS_DASHBOARD_PORT) {
51
+ config.dashboard = {
52
+ ...config.dashboard,
53
+ port: parseInt(process.env.IRIS_DASHBOARD_PORT),
54
+ };
55
+ }
56
+ if (process.env.IRIS_API_KEY) {
57
+ config.security = { ...config.security, apiKey: process.env.IRIS_API_KEY };
58
+ }
59
+ if (process.env.IRIS_ALLOWED_ORIGINS) {
60
+ config.security = {
61
+ ...config.security,
62
+ allowedOrigins: process.env.IRIS_ALLOWED_ORIGINS.split(',').map((s) => s.trim()),
63
+ };
64
+ }
65
+ return config;
66
+ }
67
+ function cliArgsToConfig(args) {
68
+ const config = {};
69
+ if (args.transport) {
70
+ config.transport = { type: args.transport };
71
+ }
72
+ if (args.port) {
73
+ config.transport = { ...config.transport, port: args.port };
74
+ }
75
+ if (args.dbPath) {
76
+ config.storage = { type: 'sqlite', path: args.dbPath };
77
+ }
78
+ if (args.dashboard !== undefined) {
79
+ config.dashboard = { enabled: args.dashboard };
80
+ }
81
+ if (args.dashboardPort) {
82
+ config.dashboard = { ...config.dashboard, port: args.dashboardPort };
83
+ }
84
+ if (args.apiKey) {
85
+ config.security = { ...config.security, apiKey: args.apiKey };
86
+ }
87
+ return config;
88
+ }
89
+ export function loadConfig(cliArgs) {
90
+ const irisHome = join(homedir(), '.iris');
91
+ if (!existsSync(irisHome)) {
92
+ mkdirSync(irisHome, { recursive: true });
93
+ }
94
+ const configPath = cliArgs?.config ?? join(irisHome, 'config.json');
95
+ const fileConfig = loadConfigFile(configPath);
96
+ const envConfig = loadEnvVars();
97
+ const argsConfig = cliArgs ? cliArgsToConfig(cliArgs) : {};
98
+ let config = deepMerge(defaultConfig, fileConfig);
99
+ config = deepMerge(config, envConfig);
100
+ config = deepMerge(config, argsConfig);
101
+ const dbDir = dirname(config.storage.path);
102
+ if (!existsSync(dbDir)) {
103
+ mkdirSync(dbDir, { recursive: true });
104
+ }
105
+ return config;
106
+ }