datadog-mcp 5.8.1 → 5.8.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +89 -321
- package/dist/index.js +107 -21
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -8,30 +8,88 @@
|
|
|
8
8
|
|
|
9
9
|
> **DISCLAIMER**: This is a community-maintained project and is not officially affiliated with, endorsed by, or supported by Datadog, Inc. This MCP server utilizes the Datadog API but is developed independently.
|
|
10
10
|
|
|
11
|
-
MCP server providing AI assistants with full Datadog observability access. Features grep-like log search, APM trace filtering with duration/status/error queries, smart sampling modes for token efficiency, and cross-correlation between logs, traces, and metrics.
|
|
11
|
+
MCP server providing AI assistants with full Datadog observability access. Features grep-like log search, APM trace filtering with duration/status/error queries, smart sampling modes for token efficiency, and cross-correlation between logs, traces, and metrics. Supports both `stdio` (local) and `http` (remote/Kubernetes) transports.
|
|
12
|
+
|
|
13
|
+
## Quick Start
|
|
14
|
+
|
|
15
|
+
Minimal Claude Desktop / VS Code / Cursor config — just the two required keys:
|
|
16
|
+
|
|
17
|
+
```json
|
|
18
|
+
{
|
|
19
|
+
"mcpServers": {
|
|
20
|
+
"datadog": {
|
|
21
|
+
"command": "npx",
|
|
22
|
+
"args": ["-y", "datadog-mcp"],
|
|
23
|
+
"env": {
|
|
24
|
+
"DD_API_KEY": "your-api-key",
|
|
25
|
+
"DD_APP_KEY": "your-app-key"
|
|
26
|
+
}
|
|
27
|
+
}
|
|
28
|
+
}
|
|
29
|
+
}
|
|
30
|
+
```
|
|
31
|
+
|
|
32
|
+
With optional tuning (EU site, custom default limits, longer log windows):
|
|
33
|
+
|
|
34
|
+
```json
|
|
35
|
+
{
|
|
36
|
+
"mcpServers": {
|
|
37
|
+
"datadog": {
|
|
38
|
+
"command": "npx",
|
|
39
|
+
"args": ["-y", "datadog-mcp"],
|
|
40
|
+
"env": {
|
|
41
|
+
"DD_API_KEY": "your-api-key",
|
|
42
|
+
"DD_APP_KEY": "your-app-key",
|
|
43
|
+
"DD_SITE": "datadoghq.eu",
|
|
44
|
+
"MCP_DEFAULT_LIMIT": "50",
|
|
45
|
+
"MCP_DEFAULT_LOG_LINES": "200",
|
|
46
|
+
"MCP_DEFAULT_METRIC_POINTS": "1000",
|
|
47
|
+
"MCP_DEFAULT_TIME_RANGE": "24"
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
}
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
To run as an HTTP server (e.g. inside a container or Kubernetes pod), add transport variables to the same `env` block:
|
|
55
|
+
|
|
56
|
+
```json
|
|
57
|
+
"env": {
|
|
58
|
+
"DD_API_KEY": "your-api-key",
|
|
59
|
+
"DD_APP_KEY": "your-app-key",
|
|
60
|
+
"MCP_TRANSPORT": "http",
|
|
61
|
+
"MCP_PORT": "3000",
|
|
62
|
+
"MCP_HOST": "0.0.0.0"
|
|
63
|
+
}
|
|
64
|
+
```
|
|
12
65
|
|
|
13
66
|
## Configuration
|
|
14
67
|
|
|
15
|
-
### Required
|
|
68
|
+
### Required environment variables
|
|
16
69
|
|
|
17
70
|
```bash
|
|
18
71
|
DD_API_KEY=your-api-key
|
|
19
72
|
DD_APP_KEY=your-app-key
|
|
20
73
|
```
|
|
21
74
|
|
|
22
|
-
### Optional
|
|
75
|
+
### Optional environment variables
|
|
23
76
|
|
|
24
77
|
```bash
|
|
25
78
|
DD_SITE=datadoghq.com # Default. Use datadoghq.eu for EU, etc.
|
|
26
79
|
|
|
27
|
-
# Limit defaults (fallbacks when AI doesn't specify)
|
|
80
|
+
# Limit defaults (fallbacks when the AI doesn't specify)
|
|
28
81
|
MCP_DEFAULT_LIMIT=50 # General tools default limit
|
|
29
82
|
MCP_DEFAULT_LOG_LINES=200 # Logs tool default limit
|
|
30
83
|
MCP_DEFAULT_METRIC_POINTS=1000 # Metrics timeseries data points
|
|
31
84
|
MCP_DEFAULT_TIME_RANGE=24 # Default time range in hours
|
|
85
|
+
|
|
86
|
+
# Transport (alternative to CLI flags — useful in Kubernetes)
|
|
87
|
+
MCP_TRANSPORT=stdio # stdio | http
|
|
88
|
+
MCP_PORT=3000 # HTTP port
|
|
89
|
+
MCP_HOST=0.0.0.0 # HTTP host
|
|
32
90
|
```
|
|
33
91
|
|
|
34
|
-
### Optional
|
|
92
|
+
### Optional flags
|
|
35
93
|
|
|
36
94
|
```bash
|
|
37
95
|
--site=datadoghq.com # Datadog site (overrides DD_SITE)
|
|
@@ -42,25 +100,16 @@ MCP_DEFAULT_TIME_RANGE=24 # Default time range in hours
|
|
|
42
100
|
--disable-tools=synthetics,rum,security # Comma-separated list of tools to disable
|
|
43
101
|
```
|
|
44
102
|
|
|
45
|
-
##
|
|
103
|
+
## Transports
|
|
46
104
|
|
|
47
|
-
|
|
105
|
+
| Transport | When to use | Endpoints |
|
|
106
|
+
|-----------|-------------|-----------|
|
|
107
|
+
| `stdio` (default) | Local MCP clients — Claude Desktop, Cursor, VS Code | n/a (process stdin/stdout) |
|
|
108
|
+
| `http` | Remote / container / Kubernetes | `POST /mcp` · `GET /mcp` (SSE) · `DELETE /mcp` · `GET /health` |
|
|
48
109
|
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
"datadog": {
|
|
53
|
-
"command": "npx",
|
|
54
|
-
"args": ["-y", "datadog-mcp"],
|
|
55
|
-
"env": {
|
|
56
|
-
"DD_API_KEY": "your-api-key",
|
|
57
|
-
"DD_APP_KEY": "your-app-key",
|
|
58
|
-
"DD_SITE": "datadoghq.com"
|
|
59
|
-
}
|
|
60
|
-
}
|
|
61
|
-
}
|
|
62
|
-
}
|
|
63
|
-
```
|
|
110
|
+
Select with `--transport=http` or `MCP_TRANSPORT=http`.
|
|
111
|
+
|
|
112
|
+
## Deployment
|
|
64
113
|
|
|
65
114
|
### Docker
|
|
66
115
|
|
|
@@ -88,7 +137,7 @@ MCP_DEFAULT_TIME_RANGE=24 # Default time range in hours
|
|
|
88
137
|
|
|
89
138
|
### Kubernetes
|
|
90
139
|
|
|
91
|
-
|
|
140
|
+
Use environment variables — not container args — for transport configuration:
|
|
92
141
|
|
|
93
142
|
```yaml
|
|
94
143
|
env:
|
|
@@ -106,15 +155,6 @@ env:
|
|
|
106
155
|
|
|
107
156
|
> **Note:** Kubernetes `args:` replaces the entire Dockerfile CMD, causing Node.js to receive the flags instead of your application. Environment variables avoid this issue.
|
|
108
157
|
|
|
109
|
-
### HTTP Transport
|
|
110
|
-
|
|
111
|
-
When running with `--transport=http`:
|
|
112
|
-
|
|
113
|
-
- `POST /mcp` — MCP protocol endpoint
|
|
114
|
-
- `GET /mcp` — SSE stream for responses
|
|
115
|
-
- `DELETE /mcp` — Close session
|
|
116
|
-
- `GET /health` — Health check
|
|
117
|
-
|
|
118
158
|
## Tools
|
|
119
159
|
|
|
120
160
|
| Tool | Action | Category | Description | Required Scopes |
|
|
@@ -124,7 +164,7 @@ When running with `--transport=http`:
|
|
|
124
164
|
| `monitors` | search | Alerting | Search monitors by query | `monitors_read` |
|
|
125
165
|
| `monitors` | create | Alerting | Create a new monitor; `config` is validated against a typed schema covering documented options (notifyNoData, renotifyInterval, thresholds, …) — unknown keys surface in `warnings`. Pass `dry_run: true` to validate without creating (uses `/api/v1/monitor/validate`, allowed in read-only mode). | `monitors_write` |
|
|
126
166
|
| `monitors` | update | Alerting | Update an existing monitor; same validated schema as `create`; partial configs accepted; validation errors short-circuit before any HTTP call as `EINVALID_MONITOR_CONFIG:` | `monitors_write` |
|
|
127
|
-
| `monitors` | preview | Alerting | Render a monitor template (inline `message` or by `monitor_id`/`id`) with optional `context` of variables and conditionals. Returns `{rendered, variablesUsed, variablesMissing, conditionalsResolved}`. Supports Datadog Mustache subset: variable substitution + six documented conditionals (`is_alert`, `is_warning`, `is_no_data`, `is_recovery`, `is_alert_to_warning`, `is_warning_to_alert`); `{{#each}}`/partials throw `EUNSUPPORTED_TEMPLATE_SYNTAX`. Read-only. | `monitors_read` |
|
|
167
|
+
| `monitors` | preview | Alerting | Render a monitor template (inline `message` or by `monitor_id`/`id`) with optional `context` of variables and conditionals. Returns `{rendered, variablesUsed, variablesMissing, conditionalsResolved, tagConditionalsResolved}`. Supports Datadog Mustache subset: variable substitution + six documented conditionals (`is_alert`, `is_warning`, `is_no_data`, `is_recovery`, `is_alert_to_warning`, `is_warning_to_alert`) + tag conditionals `{{#is_match "tag" "val"}}`/`{{#is_exact_match "tag" "val"}}` (and `^` negations); `{{#each}}`/partials throw `EUNSUPPORTED_TEMPLATE_SYNTAX`. Read-only. | `monitors_read` |
|
|
128
168
|
| `monitors` | test_notification | Alerting | **Known limitation**: returns `ENOT_SUPPORTED` — Datadog has no public REST endpoint for triggering a test notification. Documentation pointer in response. | n/a |
|
|
129
169
|
| `monitors` | delete | Alerting | Delete a monitor | `monitors_write` |
|
|
130
170
|
| `monitors` | mute | Alerting | Mute a monitor | `monitors_write` |
|
|
@@ -147,7 +187,7 @@ When running with `--transport=http`:
|
|
|
147
187
|
| `logs_archives` | list, get | Logs Config | Inspect log archives (S3 / GCS / Azure destinations); per-provider credential fields are forwarded unchanged | `logs_read_archives` |
|
|
148
188
|
| `logs_archives` | create, update, delete, reorder | Logs Config | Manage archive destinations; `destination.type` validated against `s3 | gcs | azure_storage` before SDK call | `logs_write_archives` |
|
|
149
189
|
| `logs_archives` | get_order | Logs Config | Read archive evaluation order | `logs_read_archives` |
|
|
150
|
-
| `metrics` | query | Metrics | Query timeseries data. Response `meta`
|
|
190
|
+
| `metrics` | query | Metrics | Query timeseries data. Response `meta` includes `rollupRequested` (parsed from `rollup(method, seconds)`, with `methodInferred` flag), `rollupEffective` (interval derived from returned pointlist intervals + deduped `intervalsObserved` for multi-series), and `rollupOverridden: boolean` so callers can detect when Datadog silently downsampled. | `metrics_read`, `timeseries_query` |
|
|
151
191
|
| `metrics` | search | Metrics | Search for metrics by name | `metrics_read` |
|
|
152
192
|
| `metrics` | list | Metrics | List active metrics | `metrics_read` |
|
|
153
193
|
| `metrics` | metadata | Metrics | Get metric metadata | `metrics_read` |
|
|
@@ -169,8 +209,8 @@ When running with `--transport=http`:
|
|
|
169
209
|
| `incidents` | create | Incidents | Create an incident | `incident_write` |
|
|
170
210
|
| `incidents` | update | Incidents | Update an incident | `incident_write` |
|
|
171
211
|
| `incidents` | delete | Incidents | Delete an incident | `incident_write` |
|
|
172
|
-
| `slos` | list | SLOs | List SLOs | `slos_read` |
|
|
173
|
-
| `slos` | get | SLOs | Get SLO by ID | `slos_read` |
|
|
212
|
+
| `slos` | list | SLOs | List SLOs. Each item exposes `query`, `monitorIds`, `monitorTags`, `groups`, and a UI `url` so round-trips (get → edit → update) preserve definition fields. | `slos_read` |
|
|
213
|
+
| `slos` | get | SLOs | Get SLO by ID (same projection as `list`). | `slos_read` |
|
|
174
214
|
| `slos` | create | SLOs | Create an SLO | `slos_write` |
|
|
175
215
|
| `slos` | update | SLOs | Update an SLO | `slos_write` |
|
|
176
216
|
| `slos` | delete | SLOs | Delete an SLO | `slos_write` |
|
|
@@ -223,11 +263,9 @@ When running with `--transport=http`:
|
|
|
223
263
|
| `usage` | ingested_spans | Billing | Ingested spans usage | `usage_read` |
|
|
224
264
|
| `auth` | validate | Auth | Test API and App key validity | — |
|
|
225
265
|
|
|
226
|
-
##
|
|
227
|
-
|
|
228
|
-
### Limit Control
|
|
266
|
+
## Limit Control
|
|
229
267
|
|
|
230
|
-
AI assistants have full control over query limits. The environment variables set
|
|
268
|
+
AI assistants have full control over query limits. The `MCP_DEFAULT_*` environment variables only set the fallback used when the AI doesn't specify a limit — they do NOT cap what the AI can request.
|
|
231
269
|
|
|
232
270
|
| Tool | Default | Parameter | Description |
|
|
233
271
|
|------|---------|-----------|-------------|
|
|
@@ -235,292 +273,22 @@ AI assistants have full control over query limits. The environment variables set
|
|
|
235
273
|
| Metrics (timeseries) | 1000 | `pointLimit` | Data points per series (controls resolution) |
|
|
236
274
|
| General tools | 50 | `limit` | Results to return |
|
|
237
275
|
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
```json
|
|
241
|
-
{
|
|
242
|
-
"mcpServers": {
|
|
243
|
-
"datadog": {
|
|
244
|
-
"command": "npx",
|
|
245
|
-
"args": ["-y", "datadog-mcp"],
|
|
246
|
-
"env": {
|
|
247
|
-
"DD_API_KEY": "your-api-key",
|
|
248
|
-
"DD_APP_KEY": "your-app-key",
|
|
249
|
-
"MCP_DEFAULT_LIMIT": "50", // General fallback for most tools
|
|
250
|
-
"MCP_DEFAULT_LOG_LINES": "200", // Logs search only
|
|
251
|
-
"MCP_DEFAULT_METRIC_POINTS": "1000", // Metrics query timeseries only
|
|
252
|
-
"MCP_DEFAULT_TIME_RANGE": "24" // Default time range in hours
|
|
253
|
-
}
|
|
254
|
-
}
|
|
255
|
-
}
|
|
256
|
-
}
|
|
257
|
-
```
|
|
258
|
-
|
|
259
|
-
### Compact Mode (Logs)
|
|
260
|
-
|
|
261
|
-
Use `compact: true` when searching logs to reduce token usage. Strips custom attributes and keeps only essential fields:
|
|
262
|
-
|
|
263
|
-
```
|
|
264
|
-
logs({ action: "search", status: "error", compact: true })
|
|
265
|
-
```
|
|
266
|
-
|
|
267
|
-
Returns: `id`, `timestamp`, `service`, `status`, `message` (truncated), `traceId`, `spanId`, `error`
|
|
268
|
-
|
|
269
|
-
### Sampling Modes (Logs)
|
|
270
|
-
|
|
271
|
-
Control how logs are sampled with the `sample` parameter:
|
|
272
|
-
|
|
273
|
-
| Mode | Description | Use Case |
|
|
274
|
-
|------|-------------|----------|
|
|
275
|
-
| `first` | Chronological order (default) | Timeline analysis, specific events |
|
|
276
|
-
| `spread` | Evenly distributed across time range | See patterns over time |
|
|
277
|
-
| `diverse` | Deduplicated by message pattern | Error investigation (distinct error types) |
|
|
278
|
-
|
|
279
|
-
Example - find distinct error patterns:
|
|
280
|
-
```
|
|
281
|
-
logs({ action: "search", status: "error", sample: "diverse", limit: 25 })
|
|
282
|
-
```
|
|
283
|
-
|
|
284
|
-
The `diverse` mode normalizes messages (strips UUIDs, timestamps, IPs, numbers) to identify unique error patterns instead of returning duplicates.
|
|
285
|
-
|
|
286
|
-
## Events Aggregation
|
|
287
|
-
|
|
288
|
-
### Top Monitors Report (Monitor-Specific)
|
|
289
|
-
|
|
290
|
-
**Use `monitors` tool for monitor alerts with real monitor names:**
|
|
291
|
-
|
|
292
|
-
```
|
|
293
|
-
monitors({ action: "top", from: "7d", limit: 10 })
|
|
294
|
-
```
|
|
295
|
-
|
|
296
|
-
Returns monitors with **real names** (including {{template.vars}}) from monitors API:
|
|
297
|
-
```json
|
|
298
|
-
{
|
|
299
|
-
"top": [
|
|
300
|
-
{
|
|
301
|
-
"rank": 1,
|
|
302
|
-
"monitor_id": 67860480,
|
|
303
|
-
"name": "High number of ready messages on {{queue.name}}",
|
|
304
|
-
"message": "Queue {{queue.name}} has {{value}} ready messages",
|
|
305
|
-
"total_count": 50,
|
|
306
|
-
"by_context": [
|
|
307
|
-
{"context": "queue:email-notifications", "count": 30},
|
|
308
|
-
{"context": "queue:payment-processing", "count": 20}
|
|
309
|
-
]
|
|
310
|
-
},
|
|
311
|
-
{
|
|
312
|
-
"rank": 2,
|
|
313
|
-
"monitor_id": 134611486,
|
|
314
|
-
"name": "Nginx some requests on errors (HTTP 5XX) on {{ingress.name}}",
|
|
315
|
-
"message": "Nginx request on ingress {{ingress.name}} contains some errors (HTTP 5XX)",
|
|
316
|
-
"total_count": 42,
|
|
317
|
-
"by_context": [
|
|
318
|
-
{"context": "ingress:api-gateway", "count": 29},
|
|
319
|
-
{"context": "ingress:admin-panel", "count": 13}
|
|
320
|
-
]
|
|
321
|
-
}
|
|
322
|
-
]
|
|
323
|
-
}
|
|
324
|
-
```
|
|
325
|
-
|
|
326
|
-
### Top Events Report (Generic)
|
|
327
|
-
|
|
328
|
-
**Use `events` tool for any event type** (deployments, configs, custom events):
|
|
329
|
-
|
|
330
|
-
```
|
|
331
|
-
events({ action: "top", from: "7d", limit: 10, groupBy: ["service"] })
|
|
332
|
-
```
|
|
333
|
-
|
|
334
|
-
Returns event groups by custom fields:
|
|
335
|
-
```json
|
|
336
|
-
{
|
|
337
|
-
"top": [
|
|
338
|
-
{
|
|
339
|
-
"rank": 1,
|
|
340
|
-
"service": "api-server",
|
|
341
|
-
"message": "Deployment completed",
|
|
342
|
-
"total_count": 30,
|
|
343
|
-
"by_context": [
|
|
344
|
-
{"context": "env:prod", "count": 20},
|
|
345
|
-
{"context": "env:staging", "count": 10}
|
|
346
|
-
]
|
|
347
|
-
}
|
|
348
|
-
]
|
|
349
|
-
}
|
|
350
|
-
```
|
|
351
|
-
|
|
352
|
-
**Key Differences:**
|
|
353
|
-
- `monitors top`: Fetches real monitor names from monitors API (slower, monitor-specific)
|
|
354
|
-
- `events top`: Fast generic grouping, returns event message text (any event type)
|
|
355
|
-
|
|
356
|
-
Context tags are auto-extracted: `queue:`, `service:`, `ingress:`, `pod_name:`, `kube_namespace:`, `kube_container_name:`
|
|
357
|
-
|
|
358
|
-
### Tag Discovery
|
|
359
|
-
|
|
360
|
-
Discover available tag prefixes in your alert data:
|
|
361
|
-
|
|
362
|
-
```
|
|
363
|
-
events({ action: "discover", from: "7d", tags: ["source:alert"] })
|
|
364
|
-
```
|
|
365
|
-
|
|
366
|
-
Returns: `{tagPrefixes: ["queue", "service", "ingress", "pod_name", "monitor", "priority"], sampleSize: 150}`
|
|
367
|
-
|
|
368
|
-
### Custom Aggregation
|
|
369
|
-
|
|
370
|
-
For custom grouping patterns, use `aggregate`:
|
|
371
|
-
|
|
372
|
-
```
|
|
373
|
-
events({
|
|
374
|
-
action: "aggregate",
|
|
375
|
-
from: "7d",
|
|
376
|
-
tags: ["source:alert"],
|
|
377
|
-
groupBy: ["monitor_name", "priority"]
|
|
378
|
-
})
|
|
379
|
-
```
|
|
380
|
-
|
|
381
|
-
Supported groupBy fields: `monitor_name`, `priority`, `alert_type`, `source`, `status`, `host`, or any tag prefix
|
|
382
|
-
|
|
383
|
-
The aggregation uses v2 API with cursor pagination to stream through events efficiently (up to 10k events).
|
|
384
|
-
|
|
385
|
-
## Alert Trends (Timeseries)
|
|
386
|
-
|
|
387
|
-
Visualize alert patterns over time with time-bucketed aggregation:
|
|
388
|
-
|
|
389
|
-
```
|
|
390
|
-
events({ action: "timeseries", from: "7d", interval: "1d" })
|
|
391
|
-
```
|
|
392
|
-
|
|
393
|
-
Returns hourly/daily alert counts grouped by monitor:
|
|
394
|
-
```json
|
|
395
|
-
{
|
|
396
|
-
"timeseries": [
|
|
397
|
-
{ "timestamp": "2024-01-15T00:00:00Z", "counts": { "High CPU": 5, "Low Disk": 2 }, "total": 7 },
|
|
398
|
-
{ "timestamp": "2024-01-16T00:00:00Z", "counts": { "High CPU": 3 }, "total": 3 }
|
|
399
|
-
]
|
|
400
|
-
}
|
|
401
|
-
```
|
|
402
|
-
|
|
403
|
-
| Interval | Use Case |
|
|
404
|
-
|----------|----------|
|
|
405
|
-
| `1h` | Recent incident analysis (default) |
|
|
406
|
-
| `4h` | Daily patterns |
|
|
407
|
-
| `1d` | Weekly trends |
|
|
408
|
-
|
|
409
|
-
Combine with `groupBy` to see trends per monitor, source, or priority.
|
|
410
|
-
|
|
411
|
-
## Incident Deduplication
|
|
412
|
-
|
|
413
|
-
Consolidate noisy alert floods into logical incidents:
|
|
414
|
-
|
|
415
|
-
```
|
|
416
|
-
events({ action: "incidents", from: "24h", dedupeWindow: "5m" })
|
|
417
|
-
```
|
|
418
|
-
|
|
419
|
-
Groups repeated triggers within the dedupe window and pairs with recovery events:
|
|
420
|
-
```json
|
|
421
|
-
{
|
|
422
|
-
"incidents": [
|
|
423
|
-
{
|
|
424
|
-
"monitorName": "High CPU Usage",
|
|
425
|
-
"firstTrigger": "2024-01-15T10:00:00Z",
|
|
426
|
-
"lastTrigger": "2024-01-15T10:15:00Z",
|
|
427
|
-
"triggerCount": 4,
|
|
428
|
-
"recovered": true,
|
|
429
|
-
"recoveredAt": "2024-01-15T10:30:00Z",
|
|
430
|
-
"duration": "30m"
|
|
431
|
-
}
|
|
432
|
-
],
|
|
433
|
-
"meta": { "totalIncidents": 15, "recoveredCount": 12, "activeCount": 3 }
|
|
434
|
-
}
|
|
435
|
-
```
|
|
436
|
-
|
|
437
|
-
| Dedupe Window | Use Case |
|
|
438
|
-
|---------------|----------|
|
|
439
|
-
| `5m` | Flapping detection (default) |
|
|
440
|
-
| `15m` | Alert storm consolidation |
|
|
441
|
-
| `1h` | Incident grouping |
|
|
442
|
-
|
|
443
|
-
## Monitor Enrichment
|
|
444
|
-
|
|
445
|
-
Add monitor metadata to search results for deeper context:
|
|
446
|
-
|
|
447
|
-
```
|
|
448
|
-
events({ action: "search", tags: ["source:alert"], from: "1h", enrich: true })
|
|
449
|
-
```
|
|
450
|
-
|
|
451
|
-
Returns events with monitor details (type, thresholds, tags):
|
|
452
|
-
```json
|
|
453
|
-
{
|
|
454
|
-
"events": [{
|
|
455
|
-
"id": "...",
|
|
456
|
-
"title": "[Triggered on {host:prod-1}] High CPU Usage",
|
|
457
|
-
"monitorMetadata": {
|
|
458
|
-
"id": 12345,
|
|
459
|
-
"type": "metric alert",
|
|
460
|
-
"message": "CPU is above threshold",
|
|
461
|
-
"tags": ["team:platform", "env:prod"],
|
|
462
|
-
"options": { "thresholds": { "critical": 90 } }
|
|
463
|
-
}
|
|
464
|
-
}]
|
|
465
|
-
}
|
|
466
|
-
```
|
|
467
|
-
|
|
468
|
-
Note: Enrichment adds latency (fetches monitor list). Use for detailed investigation, not bulk analysis.
|
|
469
|
-
|
|
470
|
-
## Cross-Correlation
|
|
471
|
-
|
|
472
|
-
### Logs → Traces → Metrics
|
|
473
|
-
|
|
474
|
-
1. **Find errors in logs**: `logs({ action: "search", status: "error", sample: "diverse" })`
|
|
475
|
-
2. **Extract trace_id** from log attributes (`dd.trace_id`)
|
|
476
|
-
3. **Get full trace**: `traces({ action: "search", query: "trace_id:<id>" })`
|
|
477
|
-
4. **Query APM metrics** (avg): `metrics({ action: "query", query: "avg:trace.express.request.duration{service:my-service}" })`
|
|
478
|
-
5. **Query APM latency percentiles** (p95): `metrics({ action: "query", query: "p95:trace.express.request{service:my-service}" })` — note: use root metric without `.duration` suffix for percentiles
|
|
479
|
-
|
|
480
|
-
## Deep Links
|
|
481
|
-
|
|
482
|
-
All query responses include a `datadog_url` field that links directly to the Datadog UI, allowing AI assistants to provide evidence links back to the source data.
|
|
483
|
-
|
|
484
|
-
### Example Response
|
|
485
|
-
|
|
486
|
-
```json
|
|
487
|
-
{
|
|
488
|
-
"logs": [...],
|
|
489
|
-
"meta": {
|
|
490
|
-
"count": 25,
|
|
491
|
-
"query": "service:api status:error",
|
|
492
|
-
"from": "2024-01-15T10:00:00Z",
|
|
493
|
-
"to": "2024-01-15T11:00:00Z",
|
|
494
|
-
"datadog_url": "https://app.datadoghq.com/logs?query=service%3Aapi%20status%3Aerror&from_ts=1705312800000&to_ts=1705316400000"
|
|
495
|
-
}
|
|
496
|
-
}
|
|
497
|
-
```
|
|
498
|
-
|
|
499
|
-
### Supported Tools
|
|
276
|
+
Tool-level token reduction features (`compact: true` on logs, `sample: "diverse" | "spread" | "first"`, field projections, diagnostics) are surfaced in each tool's MCP description and chosen by the AI at call time.
|
|
500
277
|
|
|
501
|
-
|
|
502
|
-
|------|----------|
|
|
503
|
-
| `logs` | Logs Explorer with query and time range |
|
|
504
|
-
| `metrics` | Metrics Explorer with query and time range |
|
|
505
|
-
| `traces` | APM Traces with query and time range |
|
|
506
|
-
| `events` | Event Explorer with query and time range |
|
|
507
|
-
| `monitors` | Monitor detail page (get) or Manage Monitors (list/search) |
|
|
508
|
-
| `rum` | RUM Explorer or Session Replay |
|
|
278
|
+
## Notable behaviors
|
|
509
279
|
|
|
510
|
-
|
|
280
|
+
A handful of patterns worth knowing about — the AI can discover the rest from tool descriptions.
|
|
511
281
|
|
|
512
|
-
|
|
282
|
+
- **Renotifies vs real fires.** `monitors top` and `events search` with `source:alert` count every renotify Datadog emits (one every `renotify_interval` while a monitor is Alert). To get actual state transitions, use `monitors history` (defaults to `transitionType: ["alert","alert recovery"]`) or pass `transitionType` to `events search`.
|
|
283
|
+
- **DST-safe time buckets.** `events histogram` buckets by `hour_of_day` / `day_of_week` / `day_of_month` in any IANA `timezone` via `Intl.DateTimeFormat`. Cursor-paginates the underlying search; cap controlled by `MCP_MAX_EVENTS_HISTOGRAM` (default 5000) with `bucketCountIncomplete` + `nextCursor` on overflow.
|
|
284
|
+
- **Validate before create.** `monitors create` with `dry_run: true` calls `/api/v1/monitor/validate` instead of persisting. Allowed in `--read-only` mode.
|
|
285
|
+
- **Monitor template preview.** `monitors preview` renders a notification against a `context` payload — variable substitution + Datadog's six documented conditionals (`is_alert`, `is_warning`, `is_no_data`, `is_recovery`, `is_alert_to_warning`, `is_warning_to_alert`) + the tag conditionals `{{#is_match "tag" "val"}}` (substring) and `{{#is_exact_match "tag" "val"}}` (exact), with `^` negations and OR'd multiple comparison values (resolved against `context.variables`; case-sensitive). `{{#each}}` and partials throw `EUNSUPPORTED_TEMPLATE_SYNTAX`.
|
|
286
|
+
- **SLO round-trip.** `slos get` projects `query`, `monitorIds`, `monitorTags`, `groups`, and a UI `url` so you can edit and feed back into `slos update` without dropping definition fields.
|
|
287
|
+
- **Cross-correlation.** `logs(sample:"diverse")` → pull `dd.trace_id` → `traces(query:"trace_id:<id>")` → `metrics(query:"p95:trace.express.request{service:...}")` (root metric without `.duration` for percentiles).
|
|
513
288
|
|
|
514
|
-
|
|
515
|
-
|------|---------|
|
|
516
|
-
| `datadoghq.com` (default) | `https://app.datadoghq.com` |
|
|
517
|
-
| `datadoghq.eu` | `https://app.datadoghq.eu` |
|
|
518
|
-
| `us3.datadoghq.com` | `https://us3.datadoghq.com` |
|
|
519
|
-
| `us5.datadoghq.com` | `https://us5.datadoghq.com` |
|
|
520
|
-
| `ap1.datadoghq.com` | `https://ap1.datadoghq.com` |
|
|
521
|
-
| `ddog-gov.com` | `https://app.ddog-gov.com` |
|
|
289
|
+
## Deep links
|
|
522
290
|
|
|
523
|
-
|
|
291
|
+
Every query response includes a `datadog_url` field built for your configured `DD_SITE` — `datadoghq.com` (default), `.eu`, `us3` / `us5` / `ap1.datadoghq.com`, or `ddog-gov.com`. Supported on `logs`, `metrics`, `traces`, `events`, `monitors`, `rum`, `slos`.
|
|
524
292
|
|
|
525
293
|
## Contributing
|
|
526
294
|
|
package/dist/index.js
CHANGED
|
@@ -304458,6 +304458,28 @@ function lookupVariable(path, variables) {
|
|
|
304458
304458
|
return String(cursor);
|
|
304459
304459
|
}
|
|
304460
304460
|
var TAG_REGEX = /\{\{\s*([#^/>])?\s*([^}]*?)\s*\}\}/g;
|
|
304461
|
+
var TAG_CONDITIONAL_HEAD = /^(is_match|is_exact_match)\b/;
|
|
304462
|
+
var QUOTED_ARG_REGEX = /"([^"]*)"/g;
|
|
304463
|
+
function parseTagConditionalHead(body, prefix) {
|
|
304464
|
+
const headMatch = TAG_CONDITIONAL_HEAD.exec(body);
|
|
304465
|
+
if (!headMatch?.[1]) {
|
|
304466
|
+
return null;
|
|
304467
|
+
}
|
|
304468
|
+
const conditional = headMatch[1];
|
|
304469
|
+
const args = [];
|
|
304470
|
+
QUOTED_ARG_REGEX.lastIndex = 0;
|
|
304471
|
+
let arg;
|
|
304472
|
+
while ((arg = QUOTED_ARG_REGEX.exec(body)) !== null) {
|
|
304473
|
+
args.push(arg[1] ?? "");
|
|
304474
|
+
}
|
|
304475
|
+
const [variable, ...comparisons] = args;
|
|
304476
|
+
if (variable === void 0 || variable === "" || comparisons.length === 0) {
|
|
304477
|
+
throw unsupportedSyntaxError(
|
|
304478
|
+
`${conditional} requires a quoted tag and at least one quoted comparison value (found {{${prefix}${body}}})`
|
|
304479
|
+
);
|
|
304480
|
+
}
|
|
304481
|
+
return { conditional, variable, comparisons };
|
|
304482
|
+
}
|
|
304461
304483
|
function parseBlocks(template) {
|
|
304462
304484
|
const stack = [{ children: [] }];
|
|
304463
304485
|
let cursor = 0;
|
|
@@ -304477,16 +304499,32 @@ function parseBlocks(template) {
|
|
|
304477
304499
|
throw unsupportedSyntaxError(`partials are not supported (found {{> ${name}}})`);
|
|
304478
304500
|
}
|
|
304479
304501
|
if (prefix === "#" || prefix === "^") {
|
|
304480
|
-
|
|
304502
|
+
const tagHead = parseTagConditionalHead(name, prefix);
|
|
304503
|
+
if (tagHead) {
|
|
304504
|
+
stack.push({
|
|
304505
|
+
children: [],
|
|
304506
|
+
closer: {
|
|
304507
|
+
kind: "tag",
|
|
304508
|
+
conditional: tagHead.conditional,
|
|
304509
|
+
negated: prefix === "^",
|
|
304510
|
+
variable: tagHead.variable,
|
|
304511
|
+
comparisons: tagHead.comparisons
|
|
304512
|
+
}
|
|
304513
|
+
});
|
|
304514
|
+
} else if (name.startsWith("each") || /\s/.test(name)) {
|
|
304481
304515
|
throw unsupportedSyntaxError(`loops are not supported (found {{${prefix}${name}}})`);
|
|
304482
|
-
}
|
|
304483
|
-
if (!SUPPORTED_SET.has(name)) {
|
|
304516
|
+
} else if (!SUPPORTED_SET.has(name)) {
|
|
304484
304517
|
throw unsupportedSyntaxError(`unknown conditional '${name}' in {{${prefix}${name}}}`);
|
|
304518
|
+
} else {
|
|
304519
|
+
stack.push({
|
|
304520
|
+
children: [],
|
|
304521
|
+
closer: {
|
|
304522
|
+
kind: "boolean",
|
|
304523
|
+
conditional: name,
|
|
304524
|
+
negated: prefix === "^"
|
|
304525
|
+
}
|
|
304526
|
+
});
|
|
304485
304527
|
}
|
|
304486
|
-
stack.push({
|
|
304487
|
-
children: [],
|
|
304488
|
-
closer: { conditional: name, negated: prefix === "^" }
|
|
304489
|
-
});
|
|
304490
304528
|
} else if (prefix === "/") {
|
|
304491
304529
|
const frame = stack.pop();
|
|
304492
304530
|
if (!frame || !frame.closer) {
|
|
@@ -304501,12 +304539,23 @@ function parseBlocks(template) {
|
|
|
304501
304539
|
if (!parent) {
|
|
304502
304540
|
throw unsupportedSyntaxError("block stack underflow while closing tag");
|
|
304503
304541
|
}
|
|
304504
|
-
|
|
304505
|
-
|
|
304506
|
-
|
|
304507
|
-
|
|
304508
|
-
|
|
304509
|
-
|
|
304542
|
+
if (frame.closer.kind === "tag") {
|
|
304543
|
+
parent.children.push({
|
|
304544
|
+
kind: "tagBlock",
|
|
304545
|
+
conditional: frame.closer.conditional,
|
|
304546
|
+
negated: frame.closer.negated,
|
|
304547
|
+
variable: frame.closer.variable,
|
|
304548
|
+
comparisons: frame.closer.comparisons,
|
|
304549
|
+
children: frame.children
|
|
304550
|
+
});
|
|
304551
|
+
} else {
|
|
304552
|
+
parent.children.push({
|
|
304553
|
+
kind: "block",
|
|
304554
|
+
conditional: frame.closer.conditional,
|
|
304555
|
+
negated: frame.closer.negated,
|
|
304556
|
+
children: frame.children
|
|
304557
|
+
});
|
|
304558
|
+
}
|
|
304510
304559
|
} else {
|
|
304511
304560
|
const top = stack[stack.length - 1];
|
|
304512
304561
|
if (top) {
|
|
@@ -304531,20 +304580,46 @@ function parseBlocks(template) {
|
|
|
304531
304580
|
}
|
|
304532
304581
|
return root.children;
|
|
304533
304582
|
}
|
|
304534
|
-
function
|
|
304583
|
+
function evaluateTagConditional(name, variable, comparisons, variables) {
|
|
304584
|
+
const resolvedValue = lookupVariable(variable, variables);
|
|
304585
|
+
const value = resolvedValue ?? "";
|
|
304586
|
+
if (name === "is_exact_match") {
|
|
304587
|
+
return comparisons.some((comparison) => value === comparison);
|
|
304588
|
+
}
|
|
304589
|
+
return comparisons.some((comparison) => value.includes(comparison));
|
|
304590
|
+
}
|
|
304591
|
+
function renderBlocks(tokens, sink) {
|
|
304535
304592
|
let out = "";
|
|
304536
304593
|
for (const token of tokens) {
|
|
304537
304594
|
if (token.kind === "literal") {
|
|
304538
304595
|
out += token.text;
|
|
304539
304596
|
continue;
|
|
304540
304597
|
}
|
|
304541
|
-
|
|
304542
|
-
|
|
304543
|
-
|
|
304598
|
+
let include;
|
|
304599
|
+
if (token.kind === "tagBlock") {
|
|
304600
|
+
const matched = evaluateTagConditional(
|
|
304601
|
+
token.conditional,
|
|
304602
|
+
token.variable,
|
|
304603
|
+
token.comparisons,
|
|
304604
|
+
sink.variables
|
|
304605
|
+
);
|
|
304606
|
+
sink.tagResolved.push({
|
|
304607
|
+
name: token.conditional,
|
|
304608
|
+
negated: token.negated,
|
|
304609
|
+
variable: token.variable,
|
|
304610
|
+
comparisons: token.comparisons,
|
|
304611
|
+
matched
|
|
304612
|
+
});
|
|
304613
|
+
include = token.negated ? !matched : matched;
|
|
304614
|
+
} else {
|
|
304615
|
+
const flag = sink.conditionals[token.conditional] ?? false;
|
|
304616
|
+
sink.resolved[token.conditional] = flag;
|
|
304617
|
+
include = token.negated ? !flag : flag;
|
|
304618
|
+
}
|
|
304544
304619
|
if (include) {
|
|
304545
|
-
out += renderBlocks(token.children,
|
|
304620
|
+
out += renderBlocks(token.children, sink);
|
|
304546
304621
|
} else {
|
|
304547
|
-
renderBlocks(token.children,
|
|
304622
|
+
renderBlocks(token.children, sink);
|
|
304548
304623
|
}
|
|
304549
304624
|
}
|
|
304550
304625
|
return out;
|
|
@@ -304574,13 +304649,20 @@ function renderMonitorTemplate(template, context) {
|
|
|
304574
304649
|
const conditionals = context.conditionals ?? {};
|
|
304575
304650
|
const tree = parseBlocks(template);
|
|
304576
304651
|
const conditionalsResolved = {};
|
|
304577
|
-
const
|
|
304652
|
+
const tagConditionalsResolved = [];
|
|
304653
|
+
const afterConditionals = renderBlocks(tree, {
|
|
304654
|
+
conditionals,
|
|
304655
|
+
variables,
|
|
304656
|
+
resolved: conditionalsResolved,
|
|
304657
|
+
tagResolved: tagConditionalsResolved
|
|
304658
|
+
});
|
|
304578
304659
|
const { rendered, used, missing } = substituteVariables(afterConditionals, variables);
|
|
304579
304660
|
return {
|
|
304580
304661
|
rendered,
|
|
304581
304662
|
variablesUsed: used,
|
|
304582
304663
|
variablesMissing: missing,
|
|
304583
|
-
conditionalsResolved
|
|
304664
|
+
conditionalsResolved,
|
|
304665
|
+
tagConditionalsResolved
|
|
304584
304666
|
};
|
|
304585
304667
|
}
|
|
304586
304668
|
|
|
@@ -305324,6 +305406,10 @@ preview: Render a Datadog monitor message template against a context (read-only
|
|
|
305324
305406
|
- Inputs: either inline 'message' OR 'monitor_id' (or existing 'id'); plus optional 'context' { variables, conditionals }.
|
|
305325
305407
|
- Supported syntax: {{variable.name}} substitution and conditional blocks {{#name}}...{{/name}} / {{^name}}...{{/name}}
|
|
305326
305408
|
where name is one of: ${SUPPORTED_CONDITIONALS.join(", ")}.
|
|
305409
|
+
- Also supports Datadog tag conditionals {{#is_match "tag" "val" ...}} (substring) and
|
|
305410
|
+
{{#is_exact_match "tag" "val" ...}} (exact), plus their {{^...}} negations. The tag is resolved
|
|
305411
|
+
against context.variables; multiple comparison values are OR'd; comparison is case-sensitive.
|
|
305412
|
+
Evaluated tag conditionals are reported in 'tagConditionalsResolved'.
|
|
305327
305413
|
- Missing variables render as {{undefined:name}} markers and are reported in 'variablesMissing'.
|
|
305328
305414
|
- Loops ({{#each ...}}) and partials ({{> ...}}) return EUNSUPPORTED_TEMPLATE_SYNTAX.
|
|
305329
305415
|
- Allowed under --read-only (no mutation; at most a getMonitor load).
|