@iris-eval/mcp-server 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +168 -0
- package/dist/config/defaults.d.ts +2 -0
- package/dist/config/defaults.js +40 -0
- package/dist/config/index.d.ts +11 -0
- package/dist/config/index.js +106 -0
- package/dist/dashboard/assets/index-BStyrSkE.js +127 -0
- package/dist/dashboard/assets/index-DsCtYyvh.css +1 -0
- package/dist/dashboard/index.html +13 -0
- package/dist/eval/engine.d.ts +8 -0
- package/dist/eval/engine.js +61 -0
- package/dist/eval/index.d.ts +2 -0
- package/dist/eval/index.js +2 -0
- package/dist/eval/rules/completeness.d.ts +6 -0
- package/dist/eval/rules/completeness.js +79 -0
- package/dist/eval/rules/cost.d.ts +4 -0
- package/dist/eval/rules/cost.js +44 -0
- package/dist/eval/rules/custom.d.ts +2 -0
- package/dist/eval/rules/custom.js +88 -0
- package/dist/eval/rules/index.d.ts +4 -0
- package/dist/eval/rules/index.js +15 -0
- package/dist/eval/rules/relevance.d.ts +5 -0
- package/dist/eval/rules/relevance.js +87 -0
- package/dist/eval/rules/safety.d.ts +5 -0
- package/dist/eval/rules/safety.js +81 -0
- package/dist/index.d.ts +2 -0
- package/dist/index.js +101 -0
- package/dist/middleware/auth.d.ts +3 -0
- package/dist/middleware/auth.js +24 -0
- package/dist/middleware/cors.d.ts +2 -0
- package/dist/middleware/cors.js +29 -0
- package/dist/middleware/error-handler.d.ts +3 -0
- package/dist/middleware/error-handler.js +19 -0
- package/dist/middleware/index.d.ts +4 -0
- package/dist/middleware/index.js +4 -0
- package/dist/middleware/rate-limit.d.ts +3 -0
- package/dist/middleware/rate-limit.js +19 -0
- package/dist/resources/dashboard-summary.d.ts +3 -0
- package/dist/resources/dashboard-summary.js +14 -0
- package/dist/resources/index.d.ts +3 -0
- package/dist/resources/index.js +6 -0
- package/dist/resources/trace-detail.d.ts +3 -0
- package/dist/resources/trace-detail.js +28 -0
- package/dist/server.d.ts +9 -0
- package/dist/server.js +14 -0
- package/dist/storage/index.d.ts +4 -0
- package/dist/storage/index.js +10 -0
- package/dist/storage/migrations/001-initial-schema.d.ts +3 -0
- package/dist/storage/migrations/001-initial-schema.js +57 -0
- package/dist/storage/migrations/index.d.ts +2 -0
- package/dist/storage/migrations/index.js +22 -0
- package/dist/storage/sqlite-adapter.d.ts +33 -0
- package/dist/storage/sqlite-adapter.js +232 -0
- package/dist/tools/evaluate-output.d.ts +4 -0
- package/dist/tools/evaluate-output.js +58 -0
- package/dist/tools/get-traces.d.ts +3 -0
- package/dist/tools/get-traces.js +53 -0
- package/dist/tools/index.d.ts +4 -0
- package/dist/tools/index.js +8 -0
- package/dist/tools/log-trace.d.ts +3 -0
- package/dist/tools/log-trace.js +80 -0
- package/dist/transport/http.d.ts +10 -0
- package/dist/transport/http.js +37 -0
- package/dist/transport/index.d.ts +3 -0
- package/dist/transport/index.js +2 -0
- package/dist/transport/stdio.d.ts +2 -0
- package/dist/transport/stdio.js +4 -0
- package/dist/types/config.d.ts +37 -0
- package/dist/types/config.js +1 -0
- package/dist/types/eval.d.ts +51 -0
- package/dist/types/eval.js +1 -0
- package/dist/types/index.d.ts +4 -0
- package/dist/types/index.js +1 -0
- package/dist/types/query.d.ts +64 -0
- package/dist/types/query.js +1 -0
- package/dist/types/trace.d.ts +47 -0
- package/dist/types/trace.js +1 -0
- package/dist/utils/ids.d.ts +3 -0
- package/dist/utils/ids.js +10 -0
- package/dist/utils/logger.d.ts +8 -0
- package/dist/utils/logger.js +14 -0
- package/package.json +77 -0
- package/server.json +69 -0
package/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Iris Eval Contributors
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
package/README.md
ADDED
|
@@ -0,0 +1,168 @@
|
|
|
1
|
+
# Iris — MCP-Native Agent Eval & Observability
|
|
2
|
+
|
|
3
|
+
[](https://npmjs.com/package/@iris-eval/mcp-server)
|
|
4
|
+
[](https://github.com/iris-eval/mcp-server/actions/workflows/ci.yml)
|
|
5
|
+
[](LICENSE)
|
|
6
|
+
[](https://nodejs.org)
|
|
7
|
+
|
|
8
|
+
Iris is an open-source [Model Context Protocol](https://modelcontextprotocol.io/) (MCP) server that provides trace logging, quality evaluation, and drift detection for AI agents. Any MCP-compatible agent framework can discover and invoke Iris tools.
|
|
9
|
+
|
|
10
|
+
<!-- TODO: Replace with actual dashboard screenshot after running seed:demo -->
|
|
11
|
+
<!--  -->
|
|
12
|
+
|
|
13
|
+
## Quickstart
|
|
14
|
+
|
|
15
|
+
```bash
|
|
16
|
+
npm install -g @iris-eval/mcp-server
|
|
17
|
+
iris-mcp
|
|
18
|
+
```
|
|
19
|
+
|
|
20
|
+
Or run directly:
|
|
21
|
+
|
|
22
|
+
```bash
|
|
23
|
+
npx @iris-eval/mcp-server
|
|
24
|
+
```
|
|
25
|
+
|
|
26
|
+
### Docker
|
|
27
|
+
|
|
28
|
+
```bash
|
|
29
|
+
docker run -p 3000:3000 -v iris-data:/data ghcr.io/iris-eval/mcp-server
|
|
30
|
+
```
|
|
31
|
+
|
|
32
|
+
## Configuration
|
|
33
|
+
|
|
34
|
+
Iris looks for config in this order (later overrides earlier):
|
|
35
|
+
|
|
36
|
+
1. Built-in defaults
|
|
37
|
+
2. `~/.iris/config.json`
|
|
38
|
+
3. Environment variables (`IRIS_*`)
|
|
39
|
+
4. CLI arguments
|
|
40
|
+
|
|
41
|
+
### CLI Arguments
|
|
42
|
+
|
|
43
|
+
| Flag | Default | Description |
|
|
44
|
+
|------|---------|-------------|
|
|
45
|
+
| `--transport` | `stdio` | Transport type: `stdio` or `http` |
|
|
46
|
+
| `--port` | `3000` | HTTP transport port |
|
|
47
|
+
| `--db-path` | `~/.iris/iris.db` | SQLite database path |
|
|
48
|
+
| `--config` | `~/.iris/config.json` | Config file path |
|
|
49
|
+
| `--api-key` | — | API key for HTTP authentication |
|
|
50
|
+
| `--dashboard` | `false` | Enable web dashboard |
|
|
51
|
+
| `--dashboard-port` | `6920` | Dashboard port |
|
|
52
|
+
|
|
53
|
+
### Environment Variables
|
|
54
|
+
|
|
55
|
+
| Variable | Description |
|
|
56
|
+
|----------|-------------|
|
|
57
|
+
| `IRIS_TRANSPORT` | Transport type |
|
|
58
|
+
| `IRIS_PORT` | HTTP port |
|
|
59
|
+
| `IRIS_DB_PATH` | Database path |
|
|
60
|
+
| `IRIS_LOG_LEVEL` | Log level: debug, info, warn, error |
|
|
61
|
+
| `IRIS_DASHBOARD` | Enable dashboard (true/false) |
|
|
62
|
+
| `IRIS_API_KEY` | API key for HTTP authentication |
|
|
63
|
+
| `IRIS_ALLOWED_ORIGINS` | Comma-separated allowed CORS origins |
|
|
64
|
+
|
|
65
|
+
## Security
|
|
66
|
+
|
|
67
|
+
When using the HTTP transport, Iris includes production-grade security:
|
|
68
|
+
|
|
69
|
+
- **Authentication** — Set `IRIS_API_KEY` or `--api-key` to require `Authorization: Bearer <key>` on all endpoints (except `/health`). Recommended for any network-exposed deployment.
|
|
70
|
+
- **CORS** — Restricted to `http://localhost:*` by default. Configure with `IRIS_ALLOWED_ORIGINS`.
|
|
71
|
+
- **Rate limiting** — 100 requests/minute for dashboard API, 20 requests/minute for MCP endpoints. Configurable via `~/.iris/config.json`.
|
|
72
|
+
- **Security headers** — Helmet middleware applies CSP, X-Frame-Options, X-Content-Type-Options, and other standard headers.
|
|
73
|
+
- **Input validation** — All query parameters validated with Zod schemas. Malformed requests return 400.
|
|
74
|
+
- **Request size limits** — Body payloads limited to 1MB by default.
|
|
75
|
+
- **Safe regex** — User-supplied regex patterns in custom eval rules are validated against ReDoS attacks.
|
|
76
|
+
- **Structured logging** — JSON logs to stderr via pino. Never writes to stdout (reserved for stdio transport).
|
|
77
|
+
|
|
78
|
+
```bash
|
|
79
|
+
# Production deployment example
|
|
80
|
+
iris-mcp --transport http --port 3000 --api-key "$(openssl rand -hex 32)" --dashboard
|
|
81
|
+
```
|
|
82
|
+
|
|
83
|
+
## MCP Tools
|
|
84
|
+
|
|
85
|
+
### `log_trace`
|
|
86
|
+
|
|
87
|
+
Log an agent execution trace with spans, tool calls, and metrics.
|
|
88
|
+
|
|
89
|
+
**Input:**
|
|
90
|
+
- `agent_name` (required) — Name of the agent
|
|
91
|
+
- `input` — Agent input text
|
|
92
|
+
- `output` — Agent output text
|
|
93
|
+
- `tool_calls` — Array of tool call records
|
|
94
|
+
- `latency_ms` — Execution time in milliseconds
|
|
95
|
+
- `token_usage` — `{ prompt_tokens, completion_tokens, total_tokens }`
|
|
96
|
+
- `cost_usd` — Total cost in USD
|
|
97
|
+
- `metadata` — Arbitrary key-value metadata
|
|
98
|
+
- `spans` — Array of span objects for detailed tracing
|
|
99
|
+
|
|
100
|
+
### `evaluate_output`
|
|
101
|
+
|
|
102
|
+
Evaluate agent output quality using configurable rules.
|
|
103
|
+
|
|
104
|
+
**Input:**
|
|
105
|
+
- `output` (required) — The text to evaluate
|
|
106
|
+
- `eval_type` — Type: `completeness`, `relevance`, `safety`, `cost`, `custom`
|
|
107
|
+
- `expected` — Expected output for comparison
|
|
108
|
+
- `trace_id` — Link evaluation to a trace
|
|
109
|
+
- `custom_rules` — Array of custom rule definitions
|
|
110
|
+
|
|
111
|
+
### `get_traces`
|
|
112
|
+
|
|
113
|
+
Query stored traces with filters and pagination.
|
|
114
|
+
|
|
115
|
+
**Input:**
|
|
116
|
+
- `agent_name` — Filter by agent name
|
|
117
|
+
- `framework` — Filter by framework
|
|
118
|
+
- `since` — ISO timestamp lower bound
|
|
119
|
+
- `until` — ISO timestamp upper bound
|
|
120
|
+
- `min_score` / `max_score` — Score range filter
|
|
121
|
+
- `limit` — Results per page (default 50)
|
|
122
|
+
- `offset` — Pagination offset
|
|
123
|
+
|
|
124
|
+
## MCP Resources
|
|
125
|
+
|
|
126
|
+
- `iris://dashboard/summary` — Dashboard summary statistics
|
|
127
|
+
- `iris://traces/{trace_id}` — Full trace detail with spans and evals
|
|
128
|
+
|
|
129
|
+
## Claude Desktop
|
|
130
|
+
|
|
131
|
+
Add Iris to your Claude Desktop MCP config:
|
|
132
|
+
|
|
133
|
+
```json
|
|
134
|
+
{
|
|
135
|
+
"mcpServers": {
|
|
136
|
+
"iris-eval": {
|
|
137
|
+
"command": "npx",
|
|
138
|
+
"args": ["@iris-eval/mcp-server"]
|
|
139
|
+
}
|
|
140
|
+
}
|
|
141
|
+
}
|
|
142
|
+
```
|
|
143
|
+
|
|
144
|
+
Then ask Claude to "log a trace" or "evaluate this output" — Iris tools are automatically available.
|
|
145
|
+
|
|
146
|
+
See [examples/claude-desktop/](examples/claude-desktop/) for more configuration options.
|
|
147
|
+
|
|
148
|
+
## Web Dashboard
|
|
149
|
+
|
|
150
|
+
Start with `--dashboard` flag to enable the web UI at `http://localhost:6920`.
|
|
151
|
+
|
|
152
|
+
## Examples
|
|
153
|
+
|
|
154
|
+
- [Claude Desktop setup](examples/claude-desktop/) — MCP config for stdio and HTTP modes
|
|
155
|
+
- [TypeScript](examples/typescript/basic-usage.ts) — MCP SDK client usage
|
|
156
|
+
- [LangChain](examples/langchain/observe-agent.py) — Agent instrumentation
|
|
157
|
+
- [CrewAI](examples/crewai/observe-crew.py) — Crew observability
|
|
158
|
+
|
|
159
|
+
## Community
|
|
160
|
+
|
|
161
|
+
- [GitHub Issues](https://github.com/iris-eval/mcp-server/issues) — Bug reports and feature requests
|
|
162
|
+
- [GitHub Discussions](https://github.com/iris-eval/mcp-server/discussions) — Questions and ideas
|
|
163
|
+
- [Contributing Guide](CONTRIBUTING.md) — How to contribute
|
|
164
|
+
- [Roadmap](docs/roadmap.md) — What's coming next
|
|
165
|
+
|
|
166
|
+
## License
|
|
167
|
+
|
|
168
|
+
MIT
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
import { join } from 'node:path';
|
|
2
|
+
import { homedir } from 'node:os';
|
|
3
|
+
const irisHome = join(homedir(), '.iris');
|
|
4
|
+
export const defaultConfig = {
|
|
5
|
+
storage: {
|
|
6
|
+
type: 'sqlite',
|
|
7
|
+
path: join(irisHome, 'iris.db'),
|
|
8
|
+
},
|
|
9
|
+
server: {
|
|
10
|
+
name: 'iris-eval',
|
|
11
|
+
version: '0.1.0',
|
|
12
|
+
},
|
|
13
|
+
transport: {
|
|
14
|
+
type: 'stdio',
|
|
15
|
+
port: 3000,
|
|
16
|
+
host: '0.0.0.0',
|
|
17
|
+
},
|
|
18
|
+
dashboard: {
|
|
19
|
+
enabled: false,
|
|
20
|
+
port: 6920,
|
|
21
|
+
},
|
|
22
|
+
eval: {
|
|
23
|
+
defaultThreshold: 0.7,
|
|
24
|
+
},
|
|
25
|
+
logging: {
|
|
26
|
+
level: 'info',
|
|
27
|
+
},
|
|
28
|
+
retention: {
|
|
29
|
+
days: 30,
|
|
30
|
+
},
|
|
31
|
+
security: {
|
|
32
|
+
apiKey: undefined,
|
|
33
|
+
allowedOrigins: ['http://localhost:*'],
|
|
34
|
+
rateLimit: {
|
|
35
|
+
api: 100,
|
|
36
|
+
mcp: 20,
|
|
37
|
+
},
|
|
38
|
+
requestSizeLimit: '1mb',
|
|
39
|
+
},
|
|
40
|
+
};
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
import type { IrisConfig } from '../types/index.js';
|
|
2
|
+
export interface CliArgs {
|
|
3
|
+
transport?: string;
|
|
4
|
+
port?: number;
|
|
5
|
+
config?: string;
|
|
6
|
+
dbPath?: string;
|
|
7
|
+
dashboard?: boolean;
|
|
8
|
+
dashboardPort?: number;
|
|
9
|
+
apiKey?: string;
|
|
10
|
+
}
|
|
11
|
+
export declare function loadConfig(cliArgs?: CliArgs): IrisConfig;
|
|
@@ -0,0 +1,106 @@
|
|
|
1
|
+
import { readFileSync, mkdirSync, existsSync } from 'node:fs';
|
|
2
|
+
import { join, dirname } from 'node:path';
|
|
3
|
+
import { homedir } from 'node:os';
|
|
4
|
+
import { defaultConfig } from './defaults.js';
|
|
5
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
6
|
+
function deepMerge(target, source) {
|
|
7
|
+
const result = { ...target };
|
|
8
|
+
for (const key of Object.keys(source)) {
|
|
9
|
+
const sourceVal = source[key];
|
|
10
|
+
if (sourceVal !== undefined &&
|
|
11
|
+
typeof sourceVal === 'object' &&
|
|
12
|
+
sourceVal !== null &&
|
|
13
|
+
!Array.isArray(sourceVal) &&
|
|
14
|
+
typeof result[key] === 'object' &&
|
|
15
|
+
result[key] !== null) {
|
|
16
|
+
result[key] = deepMerge(result[key], sourceVal);
|
|
17
|
+
}
|
|
18
|
+
else if (sourceVal !== undefined) {
|
|
19
|
+
result[key] = sourceVal;
|
|
20
|
+
}
|
|
21
|
+
}
|
|
22
|
+
return result;
|
|
23
|
+
}
|
|
24
|
+
function loadConfigFile(path) {
|
|
25
|
+
try {
|
|
26
|
+
const content = readFileSync(path, 'utf-8');
|
|
27
|
+
return JSON.parse(content);
|
|
28
|
+
}
|
|
29
|
+
catch {
|
|
30
|
+
return {};
|
|
31
|
+
}
|
|
32
|
+
}
|
|
33
|
+
function loadEnvVars() {
|
|
34
|
+
const config = {};
|
|
35
|
+
if (process.env.IRIS_TRANSPORT) {
|
|
36
|
+
config.transport = { type: process.env.IRIS_TRANSPORT };
|
|
37
|
+
}
|
|
38
|
+
if (process.env.IRIS_PORT) {
|
|
39
|
+
config.transport = { ...config.transport, port: parseInt(process.env.IRIS_PORT) };
|
|
40
|
+
}
|
|
41
|
+
if (process.env.IRIS_DB_PATH) {
|
|
42
|
+
config.storage = { type: 'sqlite', path: process.env.IRIS_DB_PATH };
|
|
43
|
+
}
|
|
44
|
+
if (process.env.IRIS_LOG_LEVEL) {
|
|
45
|
+
config.logging = { level: process.env.IRIS_LOG_LEVEL };
|
|
46
|
+
}
|
|
47
|
+
if (process.env.IRIS_DASHBOARD) {
|
|
48
|
+
config.dashboard = { enabled: process.env.IRIS_DASHBOARD === 'true' };
|
|
49
|
+
}
|
|
50
|
+
if (process.env.IRIS_DASHBOARD_PORT) {
|
|
51
|
+
config.dashboard = {
|
|
52
|
+
...config.dashboard,
|
|
53
|
+
port: parseInt(process.env.IRIS_DASHBOARD_PORT),
|
|
54
|
+
};
|
|
55
|
+
}
|
|
56
|
+
if (process.env.IRIS_API_KEY) {
|
|
57
|
+
config.security = { ...config.security, apiKey: process.env.IRIS_API_KEY };
|
|
58
|
+
}
|
|
59
|
+
if (process.env.IRIS_ALLOWED_ORIGINS) {
|
|
60
|
+
config.security = {
|
|
61
|
+
...config.security,
|
|
62
|
+
allowedOrigins: process.env.IRIS_ALLOWED_ORIGINS.split(',').map((s) => s.trim()),
|
|
63
|
+
};
|
|
64
|
+
}
|
|
65
|
+
return config;
|
|
66
|
+
}
|
|
67
|
+
function cliArgsToConfig(args) {
|
|
68
|
+
const config = {};
|
|
69
|
+
if (args.transport) {
|
|
70
|
+
config.transport = { type: args.transport };
|
|
71
|
+
}
|
|
72
|
+
if (args.port) {
|
|
73
|
+
config.transport = { ...config.transport, port: args.port };
|
|
74
|
+
}
|
|
75
|
+
if (args.dbPath) {
|
|
76
|
+
config.storage = { type: 'sqlite', path: args.dbPath };
|
|
77
|
+
}
|
|
78
|
+
if (args.dashboard !== undefined) {
|
|
79
|
+
config.dashboard = { enabled: args.dashboard };
|
|
80
|
+
}
|
|
81
|
+
if (args.dashboardPort) {
|
|
82
|
+
config.dashboard = { ...config.dashboard, port: args.dashboardPort };
|
|
83
|
+
}
|
|
84
|
+
if (args.apiKey) {
|
|
85
|
+
config.security = { ...config.security, apiKey: args.apiKey };
|
|
86
|
+
}
|
|
87
|
+
return config;
|
|
88
|
+
}
|
|
89
|
+
export function loadConfig(cliArgs) {
|
|
90
|
+
const irisHome = join(homedir(), '.iris');
|
|
91
|
+
if (!existsSync(irisHome)) {
|
|
92
|
+
mkdirSync(irisHome, { recursive: true });
|
|
93
|
+
}
|
|
94
|
+
const configPath = cliArgs?.config ?? join(irisHome, 'config.json');
|
|
95
|
+
const fileConfig = loadConfigFile(configPath);
|
|
96
|
+
const envConfig = loadEnvVars();
|
|
97
|
+
const argsConfig = cliArgs ? cliArgsToConfig(cliArgs) : {};
|
|
98
|
+
let config = deepMerge(defaultConfig, fileConfig);
|
|
99
|
+
config = deepMerge(config, envConfig);
|
|
100
|
+
config = deepMerge(config, argsConfig);
|
|
101
|
+
const dbDir = dirname(config.storage.path);
|
|
102
|
+
if (!existsSync(dbDir)) {
|
|
103
|
+
mkdirSync(dbDir, { recursive: true });
|
|
104
|
+
}
|
|
105
|
+
return config;
|
|
106
|
+
}
|