@mastra/evals 0.1.0-alpha.5 → 0.1.0-alpha.51

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (76) hide show
  1. package/CHANGELOG.md +379 -0
  2. package/README.md +185 -0
  3. package/dist/_tsup-dts-rollup.d.ts +723 -0
  4. package/dist/chunk-4VNS5WPM.js +37 -0
  5. package/dist/dist-56AYDN4X.js +17575 -0
  6. package/dist/index.d.ts +3 -0
  7. package/dist/index.js +87 -0
  8. package/dist/magic-string.es-5UDOWOAZ.js +1296 -0
  9. package/dist/metrics/llm/index.d.ts +10 -0
  10. package/dist/metrics/llm/index.js +2121 -0
  11. package/dist/metrics/nlp/index.d.ts +5 -0
  12. package/dist/metrics/nlp/index.js +189 -0
  13. package/package.json +16 -28
  14. package/src/attachListeners.ts +40 -0
  15. package/src/constants.ts +1 -0
  16. package/src/evaluation.test.ts +15 -18
  17. package/src/evaluation.ts +48 -11
  18. package/src/index.ts +1 -0
  19. package/src/metrics/judge/index.ts +4 -3
  20. package/src/metrics/llm/answer-relevancy/index.test.ts +44 -74
  21. package/src/metrics/llm/answer-relevancy/index.ts +8 -5
  22. package/src/metrics/llm/answer-relevancy/metricJudge.ts +4 -3
  23. package/src/metrics/llm/answer-relevancy/prompts.ts +26 -28
  24. package/src/metrics/llm/bias/index.test.ts +19 -34
  25. package/src/metrics/llm/bias/index.ts +13 -4
  26. package/src/metrics/llm/bias/metricJudge.ts +20 -4
  27. package/src/metrics/llm/bias/prompts.ts +27 -0
  28. package/src/metrics/llm/context-position/index.test.ts +98 -108
  29. package/src/metrics/llm/context-position/index.ts +13 -13
  30. package/src/metrics/llm/context-position/metricJudge.ts +2 -2
  31. package/src/metrics/llm/context-position/prompts.ts +31 -36
  32. package/src/metrics/llm/context-precision/index.test.ts +72 -100
  33. package/src/metrics/llm/context-precision/index.ts +13 -13
  34. package/src/metrics/llm/context-precision/metricJudge.ts +2 -2
  35. package/src/metrics/llm/context-relevancy/index.test.ts +28 -36
  36. package/src/metrics/llm/context-relevancy/index.ts +22 -12
  37. package/src/metrics/llm/context-relevancy/metricJudge.ts +20 -6
  38. package/src/metrics/llm/context-relevancy/prompts.ts +37 -0
  39. package/src/metrics/llm/contextual-recall/index.test.ts +30 -37
  40. package/src/metrics/llm/contextual-recall/index.ts +19 -12
  41. package/src/metrics/llm/contextual-recall/metricJudge.ts +19 -4
  42. package/src/metrics/llm/contextual-recall/prompts.ts +42 -1
  43. package/src/metrics/llm/faithfulness/index.test.ts +71 -109
  44. package/src/metrics/llm/faithfulness/index.ts +21 -14
  45. package/src/metrics/llm/faithfulness/metricJudge.ts +12 -12
  46. package/src/metrics/llm/hallucination/index.test.ts +66 -104
  47. package/src/metrics/llm/hallucination/index.ts +21 -14
  48. package/src/metrics/llm/hallucination/metricJudge.ts +13 -15
  49. package/src/metrics/llm/hallucination/prompts.ts +28 -35
  50. package/src/metrics/llm/index.ts +1 -0
  51. package/src/metrics/llm/prompt-alignment/index.test.ts +59 -74
  52. package/src/metrics/llm/prompt-alignment/index.ts +15 -6
  53. package/src/metrics/llm/prompt-alignment/metricJudge.ts +12 -16
  54. package/src/metrics/llm/summarization/index.test.ts +33 -75
  55. package/src/metrics/llm/summarization/index.ts +18 -9
  56. package/src/metrics/llm/summarization/metricJudge.ts +14 -27
  57. package/src/metrics/llm/summarization/prompts.ts +52 -14
  58. package/src/metrics/llm/toxicity/index.test.ts +22 -31
  59. package/src/metrics/llm/toxicity/index.ts +10 -7
  60. package/src/metrics/llm/toxicity/metricJudge.ts +7 -6
  61. package/src/metrics/llm/toxicity/prompts.ts +5 -12
  62. package/src/metrics/llm/types.ts +7 -0
  63. package/src/metrics/nlp/completeness/index.test.ts +20 -20
  64. package/src/metrics/nlp/completeness/index.ts +14 -6
  65. package/src/metrics/nlp/content-similarity/index.test.ts +17 -48
  66. package/src/metrics/nlp/content-similarity/index.ts +15 -8
  67. package/src/metrics/nlp/keyword-coverage/index.test.ts +31 -60
  68. package/src/metrics/nlp/keyword-coverage/index.ts +10 -9
  69. package/src/metrics/nlp/textual-difference/index.test.ts +34 -62
  70. package/src/metrics/nlp/textual-difference/index.ts +12 -6
  71. package/src/metrics/nlp/tone/index.test.ts +49 -72
  72. package/src/metrics/nlp/tone/index.ts +16 -9
  73. package/tsconfig.json +1 -10
  74. package/vitest.config.ts +11 -0
  75. package/jest.config.ts +0 -21
  76. package/src/metrics/nlp/types.ts +0 -13
package/CHANGELOG.md CHANGED
@@ -1,5 +1,384 @@
1
1
  # @mastra/evals
2
2
 
3
+ ## 0.1.0-alpha.51
4
+
5
+ ### Patch Changes
6
+
7
+ - Updated dependencies [d5fccfb]
8
+ - @mastra/core@0.2.0-alpha.109
9
+
10
+ ## 0.1.0-alpha.50
11
+
12
+ ### Patch Changes
13
+
14
+ - Updated dependencies [5ee67d3]
15
+ - Updated dependencies [95a4697]
16
+ - @mastra/core@0.2.0-alpha.108
17
+
18
+ ## 0.1.0-alpha.49
19
+
20
+ ### Patch Changes
21
+
22
+ - Updated dependencies [66a5392]
23
+ - @mastra/core@0.2.0-alpha.107
24
+
25
+ ## 0.1.0-alpha.48
26
+
27
+ ### Patch Changes
28
+
29
+ - a8a459a: Updated Evals table UI
30
+ - Updated dependencies [6f2c0f5]
31
+ - Updated dependencies [a8a459a]
32
+ - @mastra/core@0.2.0-alpha.106
33
+
34
+ ## 0.1.0-alpha.47
35
+
36
+ ### Patch Changes
37
+
38
+ - Updated dependencies [1420ae2]
39
+ - Updated dependencies [99f1847]
40
+ - @mastra/core@0.2.0-alpha.105
41
+
42
+ ## 0.1.0-alpha.46
43
+
44
+ ### Patch Changes
45
+
46
+ - 5fdc87c: Update evals storage in attachListeners
47
+ - b97ca96: Tracing into default storage
48
+ - 72d1990: Updated evals table schema
49
+ - Updated dependencies [5fdc87c]
50
+ - Updated dependencies [b97ca96]
51
+ - Updated dependencies [72d1990]
52
+ - Updated dependencies [cf6d825]
53
+ - Updated dependencies [10870bc]
54
+ - @mastra/core@0.2.0-alpha.104
55
+
56
+ ## 0.1.0-alpha.45
57
+
58
+ ### Patch Changes
59
+
60
+ - Updated dependencies [4534e77]
61
+ - @mastra/core@0.2.0-alpha.103
62
+
63
+ ## 0.1.0-alpha.44
64
+
65
+ ### Patch Changes
66
+
67
+ - Updated dependencies [a9345f9]
68
+ - @mastra/core@0.2.0-alpha.102
69
+
70
+ ## 0.1.0-alpha.43
71
+
72
+ ### Patch Changes
73
+
74
+ - 4f1d1a1: Enforce types ann cleanup package.json
75
+ - Updated dependencies [66a03ec]
76
+ - Updated dependencies [4f1d1a1]
77
+ - @mastra/core@0.2.0-alpha.101
78
+
79
+ ## 0.1.0-alpha.42
80
+
81
+ ### Patch Changes
82
+
83
+ - Updated dependencies [9d1796d]
84
+ - @mastra/core@0.2.0-alpha.100
85
+
86
+ ## 0.1.0-alpha.41
87
+
88
+ ### Patch Changes
89
+
90
+ - Updated dependencies [7d83b92]
91
+ - @mastra/core@0.2.0-alpha.99
92
+
93
+ ## 0.1.0-alpha.40
94
+
95
+ ### Patch Changes
96
+
97
+ - 70dabd9: Fix broken publish
98
+ - 202d404: Added instructions when generating evals
99
+ - Updated dependencies [70dabd9]
100
+ - Updated dependencies [202d404]
101
+ - @mastra/core@0.2.0-alpha.98
102
+
103
+ ## 0.1.0-alpha.39
104
+
105
+ ### Patch Changes
106
+
107
+ - 7892533: Updated test evals to use Mastra Storage
108
+ - d641d91: Fix exports for @mastra/evals
109
+ - Updated dependencies [07c069d]
110
+ - Updated dependencies [7892533]
111
+ - Updated dependencies [e6d8055]
112
+ - Updated dependencies [5950de5]
113
+ - Updated dependencies [df843d3]
114
+ - Updated dependencies [a870123]
115
+ - @mastra/core@0.2.0-alpha.97
116
+
117
+ ## 0.1.0-alpha.38
118
+
119
+ ### Patch Changes
120
+
121
+ - Updated dependencies [74b3078]
122
+ - @mastra/core@0.2.0-alpha.96
123
+
124
+ ## 0.1.0-alpha.37
125
+
126
+ ### Patch Changes
127
+
128
+ - Updated dependencies [9fb59d6]
129
+ - @mastra/core@0.2.0-alpha.95
130
+
131
+ ## 0.1.0-alpha.36
132
+
133
+ ### Minor Changes
134
+
135
+ - 8b416d9: Breaking changes
136
+
137
+ ### Patch Changes
138
+
139
+ - 9c10484: update all packages
140
+ - Updated dependencies [9c10484]
141
+ - Updated dependencies [8b416d9]
142
+ - @mastra/core@0.2.0-alpha.94
143
+
144
+ ## 0.1.0-alpha.35
145
+
146
+ ### Patch Changes
147
+
148
+ - Updated dependencies [5285356]
149
+ - @mastra/core@0.2.0-alpha.93
150
+
151
+ ## 0.1.0-alpha.34
152
+
153
+ ### Patch Changes
154
+
155
+ - Updated dependencies [4d4f6b6]
156
+ - @mastra/core@0.2.0-alpha.92
157
+
158
+ ## 0.1.0-alpha.33
159
+
160
+ ### Patch Changes
161
+
162
+ - Updated dependencies [d7d465a]
163
+ - Updated dependencies [d7d465a]
164
+ - Updated dependencies [2017553]
165
+ - Updated dependencies [a10b7a3]
166
+ - Updated dependencies [16e5b04]
167
+ - @mastra/core@0.2.0-alpha.91
168
+
169
+ ## 0.1.0-alpha.32
170
+
171
+ ### Patch Changes
172
+
173
+ - Updated dependencies [8151f44]
174
+ - Updated dependencies [e897f1c]
175
+ - Updated dependencies [3700be1]
176
+ - @mastra/core@0.2.0-alpha.90
177
+
178
+ ## 0.1.0-alpha.31
179
+
180
+ ### Patch Changes
181
+
182
+ - Updated dependencies [27275c9]
183
+ - @mastra/core@0.2.0-alpha.89
184
+
185
+ ## 0.1.0-alpha.30
186
+
187
+ ### Patch Changes
188
+
189
+ - Updated dependencies [ccbc581]
190
+ - @mastra/core@0.2.0-alpha.88
191
+
192
+ ## 0.1.0-alpha.29
193
+
194
+ ### Patch Changes
195
+
196
+ - Updated dependencies [7365b6c]
197
+ - @mastra/core@0.2.0-alpha.87
198
+
199
+ ## 0.1.0-alpha.28
200
+
201
+ ### Minor Changes
202
+
203
+ - 5916f9d: Update deps from fixed to ^
204
+
205
+ ### Patch Changes
206
+
207
+ - Updated dependencies [6fa4bd2]
208
+ - Updated dependencies [e2e76de]
209
+ - Updated dependencies [7f24c29]
210
+ - Updated dependencies [67637ba]
211
+ - Updated dependencies [04f3171]
212
+ - @mastra/core@0.2.0-alpha.86
213
+
214
+ ## 0.1.0-alpha.27
215
+
216
+ ### Patch Changes
217
+
218
+ - Updated dependencies [e9d1b47]
219
+ - @mastra/core@0.2.0-alpha.85
220
+
221
+ ## 0.1.0-alpha.26
222
+
223
+ ### Patch Changes
224
+
225
+ - Updated dependencies [2f17a5f]
226
+ - Updated dependencies [cb290ee]
227
+ - Updated dependencies [b4d7416]
228
+ - Updated dependencies [38b7f66]
229
+ - @mastra/core@0.2.0-alpha.84
230
+
231
+ ## 0.1.0-alpha.25
232
+
233
+ ### Patch Changes
234
+
235
+ - 9625602: Use mastra core splitted bundles in other packages
236
+ - 8769a62: Split core into seperate entry fils
237
+ - Updated dependencies [30322ce]
238
+ - Updated dependencies [78eec7c]
239
+ - Updated dependencies [9625602]
240
+ - Updated dependencies [8769a62]
241
+ - @mastra/core@0.2.0-alpha.83
242
+
243
+ ## 0.1.0-alpha.24
244
+
245
+ ### Patch Changes
246
+
247
+ - Updated dependencies [73d112c]
248
+ - @mastra/core@0.1.27-alpha.82
249
+
250
+ ## 0.1.0-alpha.23
251
+
252
+ ### Patch Changes
253
+
254
+ - Updated dependencies [9fb3039]
255
+ - @mastra/core@0.1.27-alpha.81
256
+
257
+ ## 0.1.0-alpha.22
258
+
259
+ ### Patch Changes
260
+
261
+ - cb2e997: Bundle evals package with tsup
262
+
263
+ ## 0.1.0-alpha.21
264
+
265
+ ### Patch Changes
266
+
267
+ - Updated dependencies [327ece7]
268
+ - @mastra/core@0.1.27-alpha.80
269
+
270
+ ## 0.1.0-alpha.20
271
+
272
+ ### Patch Changes
273
+
274
+ - Updated dependencies [21fe536]
275
+ - @mastra/core@0.1.27-alpha.79
276
+
277
+ ## 0.1.0-alpha.19
278
+
279
+ ### Patch Changes
280
+
281
+ - Updated dependencies [685108a]
282
+ - Updated dependencies [685108a]
283
+ - @mastra/core@0.1.27-alpha.78
284
+
285
+ ## 0.1.0-alpha.18
286
+
287
+ ### Patch Changes
288
+
289
+ - Updated dependencies [8105fae]
290
+ - @mastra/core@0.1.27-alpha.77
291
+
292
+ ## 0.1.0-alpha.17
293
+
294
+ ### Patch Changes
295
+
296
+ - Updated dependencies [ae7bf94]
297
+ - Updated dependencies [ae7bf94]
298
+ - @mastra/core@0.1.27-alpha.76
299
+
300
+ ## 0.1.0-alpha.16
301
+
302
+ ### Patch Changes
303
+
304
+ - Updated dependencies [23dcb23]
305
+ - @mastra/core@0.1.27-alpha.75
306
+
307
+ ## 0.1.0-alpha.15
308
+
309
+ ### Patch Changes
310
+
311
+ - Updated dependencies [7b87567]
312
+ - @mastra/core@0.1.27-alpha.74
313
+
314
+ ## 0.1.0-alpha.14
315
+
316
+ ### Patch Changes
317
+
318
+ - Updated dependencies [3427b95]
319
+ - @mastra/core@0.1.27-alpha.73
320
+
321
+ ## 0.1.0-alpha.13
322
+
323
+ ### Patch Changes
324
+
325
+ - 06b2c0a: Update summarization prompt and fix eval input
326
+ - Updated dependencies [e4d4ede]
327
+ - Updated dependencies [06b2c0a]
328
+ - @mastra/core@0.1.27-alpha.72
329
+
330
+ ## 0.1.0-alpha.12
331
+
332
+ ### Patch Changes
333
+
334
+ - Updated dependencies [d9c8dd0]
335
+ - @mastra/core@0.1.27-alpha.71
336
+
337
+ ## 0.1.0-alpha.11
338
+
339
+ ### Patch Changes
340
+
341
+ - bdaf834: publish packages
342
+
343
+ ## 0.1.0-alpha.10
344
+
345
+ ### Patch Changes
346
+
347
+ - Updated dependencies [dd6d87f]
348
+ - Updated dependencies [04434b6]
349
+ - @mastra/core@0.1.27-alpha.70
350
+
351
+ ## 0.1.0-alpha.9
352
+
353
+ ### Patch Changes
354
+
355
+ - 1944807: Unified logger and major step in better logs
356
+ - 9ade36e: Changed measure for evals, added endpoints, attached metrics to agent, added ui for evals in playground, and updated docs
357
+ - Updated dependencies [1944807]
358
+ - Updated dependencies [9ade36e]
359
+ - @mastra/core@0.1.27-alpha.69
360
+
361
+ ## 0.1.0-alpha.8
362
+
363
+ ### Patch Changes
364
+
365
+ - Updated dependencies [0be7181]
366
+ - Updated dependencies [0be7181]
367
+ - @mastra/core@0.1.27-alpha.68
368
+
369
+ ## 0.1.0-alpha.7
370
+
371
+ ### Patch Changes
372
+
373
+ - Updated dependencies [c8ff2f5]
374
+ - @mastra/core@0.1.27-alpha.67
375
+
376
+ ## 0.1.0-alpha.6
377
+
378
+ ### Patch Changes
379
+
380
+ - aea3c13: Fix evals export for llm and nlp
381
+
3
382
  ## 0.1.0-alpha.5
4
383
 
5
384
  ### Minor Changes
package/README.md ADDED
@@ -0,0 +1,185 @@
1
+ # @mastra/evals
2
+
3
+ A comprehensive evaluation framework for assessing AI model outputs across multiple dimensions.
4
+
5
+ ## Installation
6
+
7
+ ```bash
8
+ npm install @mastra/evals
9
+ ```
10
+
11
+ ## Overview
12
+
13
+ `@mastra/evals` provides a suite of evaluation metrics for assessing AI model outputs. The package includes both LLM-based and NLP-based metrics, enabling both automated and model-assisted evaluation of AI responses.
14
+
15
+ ## Features
16
+
17
+ ### LLM-Based Metrics
18
+
19
+ 1. **Answer Relevancy**
20
+
21
+ - Evaluates how well an answer addresses the input question
22
+ - Considers uncertainty weighting for more nuanced scoring
23
+ - Returns detailed reasoning for scores
24
+
25
+ 2. **Bias Detection**
26
+
27
+ - Identifies potential biases in model outputs
28
+ - Analyzes opinions and statements for bias indicators
29
+ - Provides explanations for detected biases
30
+ - Configurable scoring scale
31
+
32
+ 3. **Context Precision & Relevancy**
33
+
34
+ - Assesses how well responses use provided context
35
+ - Evaluates accuracy of context usage
36
+ - Measures relevance of context to the response
37
+ - Analyzes context positioning in responses
38
+
39
+ 4. **Faithfulness**
40
+
41
+ - Verifies that responses are faithful to provided context
42
+ - Detects hallucinations or fabricated information
43
+ - Evaluates claims against provided context
44
+ - Provides detailed analysis of faithfulness breaches
45
+
46
+ 5. **Prompt Alignment**
47
+
48
+ - Measures how well responses follow given instructions
49
+ - Evaluates adherence to multiple instruction criteria
50
+ - Provides per-instruction scoring
51
+ - Supports custom instruction sets
52
+
53
+ 6. **Toxicity**
54
+ - Detects toxic or harmful content in responses
55
+ - Provides detailed reasoning for toxicity verdicts
56
+ - Configurable scoring thresholds
57
+ - Considers both input and output context
58
+
59
+ ### NLP-Based Metrics
60
+
61
+ 1. **Completeness**
62
+
63
+ - Analyzes structural completeness of responses
64
+ - Identifies missing elements from input requirements
65
+ - Provides detailed element coverage analysis
66
+ - Tracks input-output element ratios
67
+
68
+ 2. **Content Similarity**
69
+
70
+ - Measures text similarity between inputs and outputs
71
+ - Configurable for case and whitespace sensitivity
72
+ - Returns normalized similarity scores
73
+ - Uses string comparison algorithms for accuracy
74
+
75
+ 3. **Keyword Coverage**
76
+ - Tracks presence of key terms from input in output
77
+ - Provides detailed keyword matching statistics
78
+ - Calculates coverage ratios
79
+ - Useful for ensuring comprehensive responses
80
+
81
+ ## Usage
82
+
83
+ ### Basic Example
84
+
85
+ ```typescript
86
+ import { ContentSimilarityMetric, ToxicityMetric } from '@mastra/evals';
87
+
88
+ // Initialize metrics
89
+ const similarityMetric = new ContentSimilarityMetric({
90
+ ignoreCase: true,
91
+ ignoreWhitespace: true,
92
+ });
93
+
94
+ const toxicityMetric = new ToxicityMetric({
95
+ model: openai('gpt-4'),
96
+ scale: 1, // Optional: adjust scoring scale
97
+ });
98
+
99
+ // Evaluate outputs
100
+ const input = 'What is the capital of France?';
101
+ const output = 'Paris is the capital of France.';
102
+
103
+ const similarityResult = await similarityMetric.measure(input, output);
104
+ const toxicityResult = await toxicityMetric.measure(input, output);
105
+
106
+ console.log('Similarity Score:', similarityResult.score);
107
+ console.log('Toxicity Score:', toxicityResult.score);
108
+ ```
109
+
110
+ ### Context-Aware Evaluation
111
+
112
+ ```typescript
113
+ import { FaithfulnessMetric } from '@mastra/evals';
114
+
115
+ // Initialize with context
116
+ const faithfulnessMetric = new FaithfulnessMetric({
117
+ model: openai('gpt-4'),
118
+ context: ['Paris is the capital of France', 'Paris has a population of 2.2 million'],
119
+ scale: 1,
120
+ });
121
+
122
+ // Evaluate response against context
123
+ const result = await faithfulnessMetric.measure(
124
+ 'Tell me about Paris',
125
+ 'Paris is the capital of France with 2.2 million residents',
126
+ );
127
+
128
+ console.log('Faithfulness Score:', result.score);
129
+ console.log('Reasoning:', result.reason);
130
+ ```
131
+
132
+ ## Metric Results
133
+
134
+ Each metric returns a standardized result object containing:
135
+
136
+ - `score`: Normalized score (typically 0-1)
137
+ - `info`: Detailed information about the evaluation
138
+ - Additional metric-specific data (e.g., matched keywords, missing elements)
139
+
140
+ Some metrics also provide:
141
+
142
+ - `reason`: Detailed explanation of the score
143
+ - `verdicts`: Individual judgments that contributed to the final score
144
+
145
+ ## Telemetry and Logging
146
+
147
+ The package includes built-in telemetry and logging capabilities:
148
+
149
+ - Automatic evaluation tracking through Mastra Storage
150
+ - Integration with OpenTelemetry for performance monitoring
151
+ - Detailed evaluation traces for debugging
152
+
153
+ ```typescript
154
+ import { attachListeners } from '@mastra/evals';
155
+
156
+ // Enable basic evaluation tracking
157
+ await attachListeners();
158
+
159
+ // Store evals in Mastra Storage (if storage is enabled)
160
+ await attachListeners(mastra);
161
+ // Note: When using in-memory storage, evaluations are isolated to the test process.
162
+ // When using file storage, evaluations are persisted and can be queried later.
163
+ ```
164
+
165
+ ## Environment Variables
166
+
167
+ Required for LLM-based metrics:
168
+
169
+ - `OPENAI_API_KEY`: For OpenAI model access
170
+ - Additional provider keys as needed (Cohere, Anthropic, etc.)
171
+
172
+ ## Package Exports
173
+
174
+ ```typescript
175
+ // Main package exports
176
+ import { evaluate } from '@mastra/evals';
177
+ // NLP-specific metrics
178
+ import { ContentSimilarityMetric } from '@mastra/evals/nlp';
179
+ ```
180
+
181
+ ## Related Packages
182
+
183
+ - `@mastra/core`: Core framework functionality
184
+ - `@mastra/engine`: LLM execution engine
185
+ - `@mastra/mcp`: Model Context Protocol integration