datadog-mcp 5.5.0 → 5.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -127,7 +127,8 @@ When running with `--transport=http`:
127
127
  | `monitors` | delete | Alerting | Delete a monitor | `monitors_write` |
128
128
  | `monitors` | mute | Alerting | Mute a monitor | `monitors_write` |
129
129
  | `monitors` | unmute | Alerting | Unmute a monitor | `monitors_write` |
130
- | `monitors` | top | Alerting | Top N monitors by alert frequency with real monitor names and context breakdown. Groups without context tags are included as "no_context" | `monitors_read` |
130
+ | `monitors` | top | Alerting | Top N monitors by alert frequency with real monitor names and context breakdown. **WARNING:** `total_count` includes renotifies/re-evaluations (Datadog emits a renotify event every `renotify_interval` minutes while Alert). For real fires use `action=history`. | `monitors_read` |
131
+ | `monitors` | history | Alerting | Count and list real state transitions for one monitor over a time window. Filters by `transitionType` (default `["alert","alert recovery"]` — fires+recoveries, excludes renotifies) and optional `group`. Returns `{transitions: [...], count, meta}` where `count` is the number of real transitions (e.g. for one always-Alert burn-rate monitor over 7d: 98 raw events vs **38 real transitions**). | `monitors_read`, `events_read` |
131
132
  | `dashboards` | list | Visualization | List all dashboards | `dashboards_read` |
132
133
  | `dashboards` | get | Visualization | Get dashboard by ID | `dashboards_read` |
133
134
  | `dashboards` | create | Visualization | Create a new dashboard | `dashboards_write` |
@@ -154,7 +155,7 @@ When running with `--transport=http`:
154
155
  | `events` | list | Events | List events | `events_read` |
155
156
  | `events` | get | Events | Get event by ID | `events_read` |
156
157
  | `events` | create | Events | Create an event | `events_read` |
157
- | `events` | search | Events | Search events with v2 API and cursor pagination | `events_read` |
158
+ | `events` | search | Events | Search events with v2 API and cursor pagination. Optional `transitionType` filter (e.g. `["alert","alert recovery"]`) restricts to monitor state-transition events — without it, `source:alert` includes renotifies. For monitor-specific fires use `monitors action=history`. | `events_read` |
158
159
  | `events` | aggregate | Events | Client-side aggregation by monitor_name, source, etc. | `events_read` |
159
160
  | `events` | top | Events | Top N event groups by count with generic groupBy support (deployments, configs, alerts, etc.). Groups without context tags are included as "no_context" | `events_read` |
160
161
  | `events` | timeseries | Events | Time-bucketed alert trends (hourly/daily counts) | `events_read` |
package/dist/index.js CHANGED
@@ -302945,6 +302945,20 @@ var InputSchema = {
302945
302945
  ),
302946
302946
  maxEvents: external_exports.number().min(1).max(5e3).optional().describe(
302947
302947
  "Maximum events to fetch for grouping in top action (default: 5000, max: 5000). Higher = more accurate but slower"
302948
+ ),
302949
+ // Monitor transition filter (additive — see requirement 5.2 / monitors action=history)
302950
+ transitionType: external_exports.array(
302951
+ external_exports.enum([
302952
+ "alert",
302953
+ "alert recovery",
302954
+ "warning",
302955
+ "warning recovery",
302956
+ "no data",
302957
+ "no data recovery",
302958
+ "renotify"
302959
+ ])
302960
+ ).optional().describe(
302961
+ 'Filter events by monitor state transition type. When set, restricts results to events with @monitor.transition.transition_type matching any value. Use ["alert","alert recovery"] to count real fires/recoveries and skip renotifies. Empty array is treated as undefined (no filter). For a fires-only count by monitor ID, prefer monitors action=history.'
302948
302962
  )
302949
302963
  };
302950
302964
  function extractMonitorInfo(title) {
@@ -303172,6 +303186,9 @@ async function createEventV1(api, params) {
303172
303186
  }
303173
303187
  };
303174
303188
  }
303189
+ function quoteIfNeeded(value) {
303190
+ return /^[A-Za-z0-9_.-]+$/.test(value) ? value : `"${value}"`;
303191
+ }
303175
303192
  function buildEventQuery(params) {
303176
303193
  const parts = [];
303177
303194
  if (params.query) {
@@ -303189,6 +303206,10 @@ function buildEventQuery(params) {
303189
303206
  if (params.priority) {
303190
303207
  parts.push(`priority:${params.priority}`);
303191
303208
  }
303209
+ if (params.transitionType && params.transitionType.length > 0) {
303210
+ const inner = params.transitionType.map(quoteIfNeeded).join(" OR ");
303211
+ parts.push(`@monitor.transition.transition_type:(${inner})`);
303212
+ }
303192
303213
  return parts.length > 0 ? parts.join(" ") : "*";
303193
303214
  }
303194
303215
  async function searchEventsV2(api, params, limits, site) {
@@ -303204,7 +303225,8 @@ async function searchEventsV2(api, params, limits, site) {
303204
303225
  query: params.query,
303205
303226
  sources: params.sources,
303206
303227
  tags: params.tags,
303207
- priority: params.priority
303228
+ priority: params.priority,
303229
+ transitionType: params.transitionType
303208
303230
  });
303209
303231
  const effectiveLimit = params.limit ?? limits.defaultLimit;
303210
303232
  const body = {
@@ -303247,7 +303269,8 @@ async function aggregateEventsV2(api, params, limits, site) {
303247
303269
  const fullQuery = buildEventQuery({
303248
303270
  query: params.query,
303249
303271
  sources: params.sources,
303250
- tags: params.tags
303272
+ tags: params.tags,
303273
+ transitionType: params.transitionType
303251
303274
  });
303252
303275
  const groupByFields = params.groupBy ?? ["monitor_name"];
303253
303276
  const maxEventsToAggregate = 1e4;
@@ -303329,7 +303352,8 @@ async function topEventsV2(api, params, limits, site) {
303329
303352
  to: params.to,
303330
303353
  sources: params.sources,
303331
303354
  tags: effectiveTags,
303332
- limit: params.maxEvents ?? 5e3
303355
+ limit: params.maxEvents ?? 5e3,
303356
+ transitionType: params.transitionType
303333
303357
  },
303334
303358
  limits,
303335
303359
  site
@@ -303418,7 +303442,8 @@ async function timeseriesEventsV2(api, params, limits, site) {
303418
303442
  const fullQuery = buildEventQuery({
303419
303443
  query: params.query ?? "source:alert",
303420
303444
  sources: params.sources,
303421
- tags: params.tags
303445
+ tags: params.tags,
303446
+ transitionType: params.transitionType
303422
303447
  });
303423
303448
  const intervalMs = parseIntervalToMs(params.interval);
303424
303449
  const groupByFields = params.groupBy ?? ["monitor_name"];
@@ -303503,7 +303528,8 @@ async function incidentsEventsV2(api, params, limits, site) {
303503
303528
  const fullQuery = buildEventQuery({
303504
303529
  query: params.query ?? "source:alert",
303505
303530
  sources: params.sources,
303506
- tags: params.tags
303531
+ tags: params.tags,
303532
+ transitionType: params.transitionType
303507
303533
  });
303508
303534
  const dedupeWindowNs = parseDurationToNs(params.dedupeWindow ?? "5m");
303509
303535
  const dedupeWindowMs = dedupeWindowNs ? Math.floor(dedupeWindowNs / 1e6) : 3e5;
@@ -303691,10 +303717,17 @@ function registerEventsTool(server, apiV1, apiV2, monitorsApi, limits, readOnly
303691
303717
  `Track Datadog events. Actions: list, get, create, search, aggregate, top, timeseries, incidents, discover.
303692
303718
  For monitor alerts, use tags: ["source:alert"].
303693
303719
 
303720
+ IMPORTANT \u2014 re-evaluation vs transition:
303721
+ - source:alert events INCLUDE renotifies and re-evaluations (every Datadog re-evaluation of an alerting monitor emits an event). A "how many times did monitor X fire" question answered with source:alert alone over-counts.
303722
+ - To restrict to real state transitions, pass transitionType (e.g. ["alert","alert recovery"]). This appends @monitor.transition.transition_type:(...) to the query and matches the design's live investigation.
303723
+ - For a fires-only numeric count rooted in a single monitor ID, prefer the higher-level primitive monitors action=history \u2014 it returns {transitions, count, meta} with the same filter applied for you.
303724
+
303725
+ transitionType: Optional array of monitor transition types (alert, alert recovery, warning, warning recovery, no data, no data recovery, renotify). Empty array is treated as undefined.
303694
303726
  top: Generic event grouping by any fields (groupBy parameter). Returns groups ranked by count with optional context breakdown.
303695
303727
  - Example: {groupBy: ["service"], message: "...", service: "api", total_count: 50, by_context: [{context: "queue:X", count: 30}]}
303696
303728
  - Use for deployments, configs, custom events, or monitor alerts
303697
303729
  - Returns "message" field (event title), NOT monitor name (use monitors tool for real names)
303730
+ - total_count includes renotifies when source:alert is used without transitionType \u2014 see monitors action=history for fires-only counts
303698
303731
  discover: Returns available tag prefixes from events.
303699
303732
  aggregate: Custom groupBy, returns pipe-delimited keys.
303700
303733
  search: Full event details.
@@ -303720,7 +303753,8 @@ incidents: Deduplicate alerts with dedupeWindow.`,
303720
303753
  dedupeWindow,
303721
303754
  enrich,
303722
303755
  contextTags,
303723
- maxEvents
303756
+ maxEvents,
303757
+ transitionType
303724
303758
  }) => {
303725
303759
  try {
303726
303760
  checkReadOnly(action, readOnly);
@@ -303769,7 +303803,8 @@ incidents: Deduplicate alerts with dedupeWindow.`,
303769
303803
  tags,
303770
303804
  priority,
303771
303805
  limit,
303772
- cursor
303806
+ cursor,
303807
+ transitionType
303773
303808
  },
303774
303809
  limits,
303775
303810
  site
@@ -303791,7 +303826,8 @@ incidents: Deduplicate alerts with dedupeWindow.`,
303791
303826
  sources,
303792
303827
  tags,
303793
303828
  groupBy,
303794
- limit
303829
+ limit,
303830
+ transitionType
303795
303831
  },
303796
303832
  limits,
303797
303833
  site
@@ -303810,7 +303846,8 @@ incidents: Deduplicate alerts with dedupeWindow.`,
303810
303846
  limit,
303811
303847
  groupBy,
303812
303848
  contextTags,
303813
- maxEvents
303849
+ maxEvents,
303850
+ transitionType
303814
303851
  },
303815
303852
  limits,
303816
303853
  site
@@ -303843,7 +303880,8 @@ incidents: Deduplicate alerts with dedupeWindow.`,
303843
303880
  tags,
303844
303881
  groupBy,
303845
303882
  interval,
303846
- limit
303883
+ limit,
303884
+ transitionType
303847
303885
  },
303848
303886
  limits,
303849
303887
  site
@@ -303860,7 +303898,8 @@ incidents: Deduplicate alerts with dedupeWindow.`,
303860
303898
  sources,
303861
303899
  tags,
303862
303900
  dedupeWindow,
303863
- limit
303901
+ limit,
303902
+ transitionType
303864
303903
  },
303865
303904
  limits,
303866
303905
  site
@@ -303886,7 +303925,8 @@ var ActionSchema2 = external_exports.enum([
303886
303925
  "delete",
303887
303926
  "mute",
303888
303927
  "unmute",
303889
- "top"
303928
+ "top",
303929
+ "history"
303890
303930
  ]);
303891
303931
  var InputSchema2 = {
303892
303932
  action: ActionSchema2.describe("Action to perform"),
@@ -303907,7 +303947,24 @@ var InputSchema2 = {
303907
303947
  contextTags: external_exports.array(external_exports.string()).optional().describe(
303908
303948
  "Tag prefixes for context breakdown in top action (default: queue, service, ingress, pod_name, kube_namespace, kube_container_name)"
303909
303949
  ),
303910
- maxEvents: external_exports.number().min(1).max(5e3).optional().describe("Maximum events to fetch for top action (default: 5000, max: 5000)")
303950
+ maxEvents: external_exports.number().min(1).max(5e3).optional().describe("Maximum events to fetch for top action (default: 5000, max: 5000)"),
303951
+ // History action parameters
303952
+ transitionType: external_exports.array(
303953
+ external_exports.enum([
303954
+ "alert",
303955
+ "alert recovery",
303956
+ "warning",
303957
+ "warning recovery",
303958
+ "no data",
303959
+ "no data recovery",
303960
+ "renotify"
303961
+ ])
303962
+ ).optional().describe(
303963
+ 'For history action: filter by monitor state transition types. Default: ["alert","alert recovery"] (real fires + recoveries, excludes renotifies). Pass ["alert"] for fires only, or include "renotify" for full chronological audit.'
303964
+ ),
303965
+ group: external_exports.string().optional().describe(
303966
+ 'For history action: filter transitions to a specific multi-alert monitor group (e.g., "pod_name:foo,kube_namespace:bar"). Optional; omit for all groups.'
303967
+ )
303911
303968
  };
303912
303969
  var MonitorThresholdsSchema = external_exports.object({
303913
303970
  critical: external_exports.number().optional(),
@@ -304032,6 +304089,153 @@ function formatMonitorDetail(m, site = "datadoghq.com") {
304032
304089
  }
304033
304090
  return detail;
304034
304091
  }
304092
+ var DEFAULT_HISTORY_TRANSITION_TYPES = [
304093
+ "alert",
304094
+ "alert recovery"
304095
+ ];
304096
+ function quoteIfNeeded2(value) {
304097
+ return /^[A-Za-z0-9_.-]+$/.test(value) ? value : `"${value}"`;
304098
+ }
304099
+ function buildMonitorHistoryQuery(params) {
304100
+ const parts = ["source:alert", `@monitor.id:${params.monitorId}`];
304101
+ const transitionTypes = params.transitionType && params.transitionType.length > 0 ? params.transitionType : void 0;
304102
+ if (transitionTypes) {
304103
+ const inner = transitionTypes.map(quoteIfNeeded2).join(" OR ");
304104
+ parts.push(`@monitor.transition.transition_type:(${inner})`);
304105
+ }
304106
+ if (params.group && params.group.length > 0) {
304107
+ const escaped = params.group.replace(/\\/g, "\\\\").replace(/"/g, '\\"');
304108
+ parts.push(`@monitor.groups:"${escaped}"`);
304109
+ }
304110
+ return parts.join(" ");
304111
+ }
304112
+ function isMonitorState(value) {
304113
+ return value === "Alert" || value === "Warn" || value === "OK" || value === "No Data";
304114
+ }
304115
+ function isTransitionType(value) {
304116
+ return value === "alert" || value === "alert recovery" || value === "warning" || value === "warning recovery" || value === "no data" || value === "no data recovery" || value === "renotify";
304117
+ }
304118
+ function extractTimestamp(outer, inner) {
304119
+ const outerTs = outer.timestamp;
304120
+ if (outerTs instanceof Date) {
304121
+ return outerTs.toISOString();
304122
+ }
304123
+ if (typeof outerTs === "string" && outerTs.length > 0) {
304124
+ const d = new Date(outerTs);
304125
+ if (!Number.isNaN(d.getTime())) return d.toISOString();
304126
+ }
304127
+ const innerTs = inner.timestamp;
304128
+ if (typeof innerTs === "number" && Number.isFinite(innerTs)) {
304129
+ return new Date(innerTs).toISOString();
304130
+ }
304131
+ if (typeof innerTs === "string" && innerTs.length > 0) {
304132
+ const parsed = Number.parseInt(innerTs, 10);
304133
+ if (!Number.isNaN(parsed)) {
304134
+ return new Date(parsed).toISOString();
304135
+ }
304136
+ }
304137
+ return "";
304138
+ }
304139
+ function formatMonitorTransition(event) {
304140
+ const outer = event.attributes ?? {};
304141
+ const inner = outer.attributes ?? {};
304142
+ const monitor = inner.monitor;
304143
+ if (!monitor) {
304144
+ return null;
304145
+ }
304146
+ const transition = monitor.transition ?? monitor.additionalProperties?.transition;
304147
+ if (!transition) {
304148
+ return null;
304149
+ }
304150
+ const fromState = isMonitorState(transition.source_state) ? transition.source_state : null;
304151
+ const toState = isMonitorState(transition.destination_state) ? transition.destination_state : null;
304152
+ const transitionType = isTransitionType(transition.transition_type) ? transition.transition_type : null;
304153
+ if (!fromState || !toState || !transitionType) {
304154
+ return null;
304155
+ }
304156
+ const groupsRaw = monitor.groups;
304157
+ const group = Array.isArray(groupsRaw) && groupsRaw.length > 0 ? groupsRaw.map((g) => String(g)).join(",") : null;
304158
+ const monitorId = typeof monitor.id === "number" ? monitor.id : 0;
304159
+ const monitorName = typeof monitor.name === "string" && monitor.name.length > 0 ? monitor.name : `Monitor ${monitorId}`;
304160
+ return {
304161
+ timestamp: extractTimestamp(outer, inner),
304162
+ monitorId,
304163
+ monitorName,
304164
+ group,
304165
+ fromState,
304166
+ toState,
304167
+ transitionType,
304168
+ eventId: String(event.id ?? "")
304169
+ };
304170
+ }
304171
+ async function historyMonitor(eventsApi, monitorId, params, limits, site) {
304172
+ const defaultFrom = hoursAgo(limits.defaultTimeRangeHours);
304173
+ const defaultTo = now();
304174
+ const [validFrom, validTo] = ensureValidTimeRange(
304175
+ parseTime(params.from, defaultFrom),
304176
+ parseTime(params.to, defaultTo)
304177
+ );
304178
+ const fromTime = new Date(validFrom * 1e3).toISOString();
304179
+ const toTime = new Date(validTo * 1e3).toISOString();
304180
+ const effectiveTransitionTypes = params.transitionType && params.transitionType.length > 0 ? params.transitionType : [...DEFAULT_HISTORY_TRANSITION_TYPES];
304181
+ const query = buildMonitorHistoryQuery({
304182
+ monitorId,
304183
+ transitionType: effectiveTransitionTypes,
304184
+ group: params.group
304185
+ });
304186
+ const transitions = [];
304187
+ const maxEventsToProcess = 1e4;
304188
+ const maxPages = 100;
304189
+ let eventCount = 0;
304190
+ let pageCount = 0;
304191
+ const body = {
304192
+ filter: {
304193
+ query,
304194
+ from: fromTime,
304195
+ to: toTime
304196
+ },
304197
+ sort: "timestamp",
304198
+ page: { limit: 1e3 }
304199
+ };
304200
+ let cursor;
304201
+ while (pageCount < maxPages && eventCount < maxEventsToProcess) {
304202
+ const pageBody = { ...body, page: { ...body.page, cursor } };
304203
+ const response = await eventsApi.searchEvents({ body: pageBody });
304204
+ const events = response.data ?? [];
304205
+ if (events.length === 0) break;
304206
+ for (const event of events) {
304207
+ const transition = formatMonitorTransition(event);
304208
+ if (transition !== null) {
304209
+ transitions.push(transition);
304210
+ }
304211
+ eventCount++;
304212
+ if (eventCount >= maxEventsToProcess) break;
304213
+ }
304214
+ cursor = response.meta?.page?.after;
304215
+ if (!cursor) break;
304216
+ pageCount++;
304217
+ }
304218
+ const truncated = eventCount >= maxEventsToProcess;
304219
+ const resolvedGroup = params.group && params.group.length > 0 ? params.group : null;
304220
+ const count = transitions.length;
304221
+ const meta = {
304222
+ monitorId,
304223
+ query,
304224
+ from: fromTime,
304225
+ to: toTime,
304226
+ transitionTypes: effectiveTransitionTypes,
304227
+ group: resolvedGroup,
304228
+ count,
304229
+ totalFetched: eventCount,
304230
+ truncated,
304231
+ datadog_url: buildEventsUrl(query, validFrom, validTo, site)
304232
+ };
304233
+ return {
304234
+ transitions,
304235
+ count,
304236
+ meta
304237
+ };
304238
+ }
304035
304239
  async function listMonitors(api, params, limits, site) {
304036
304240
  const effectiveLimit = params.limit ?? limits.defaultLimit;
304037
304241
  const response = await api.listMonitors({
@@ -304339,7 +304543,7 @@ async function topMonitors(eventsApi, monitorsApi, params, limits, site) {
304339
304543
  function registerMonitorsTool(server, api, eventsApi, limits, readOnly = false, site = "datadoghq.com") {
304340
304544
  server.tool(
304341
304545
  "monitors",
304342
- `Manage Datadog monitors. Actions: list, get, search, create, update, delete, mute, unmute, top.
304546
+ `Manage Datadog monitors. Actions: list, get, search, create, update, delete, mute, unmute, top, history.
304343
304547
  Filters: name, tags, groupStates (alert/warn/ok/no data).
304344
304548
  get/create/update return the full options object so callers can safely read-then-patch.
304345
304549
 
@@ -304361,8 +304565,25 @@ top: Ranked monitors by alert frequency with real monitor names and context brea
304361
304565
  - Returns: {rank, monitor_id, name (with {{template.vars}}), message (template), total_count, by_context}
304362
304566
  - Perfect for weekly/daily alert reports
304363
304567
  - Gets real monitor names from monitors API (not event titles)
304364
-
304365
- For generic event grouping (deployments, configs), use events tool instead.`,
304568
+ - WARNING: total_count is the raw alert-event count and INCLUDES renotifies/re-evaluations.
304569
+ For monitors stuck in Alert state, Datadog emits a renotify event every renotify_interval
304570
+ minutes, which inflates this count well beyond the number of real fires. When the question
304571
+ is "how many times did this monitor actually fire", use action=history instead.
304572
+
304573
+ history: Count and list real state transitions for one monitor over a time window.
304574
+ - Inputs: id (required, monitor ID), from/to (optional time range), transitionType (optional
304575
+ filter, defaults to ["alert","alert recovery"]), group (optional multi-alert group filter).
304576
+ - Returns: {transitions: [{timestamp, monitorId, monitorName, group, fromState, toState,
304577
+ transitionType, eventId}], count, meta}
304578
+ - count = transitions.length \u2014 the number of REAL state changes (fires + recoveries by
304579
+ default), NOT the renotify-inflated count returned by action=top or events action=search.
304580
+ - Backed by Datadog v2 events search with a hardcoded source:alert + @monitor.transition.
304581
+ transition_type filter that excludes renotifies by default. To include renotifies, pass
304582
+ transitionType including "renotify".
304583
+
304584
+ For generic event grouping (deployments, configs), use events tool instead. Note that the
304585
+ events tool's action=search with source:alert ALSO includes renotifies; use its
304586
+ transitionType filter (or this action=history) for fires-only counts.`,
304366
304587
  InputSchema2,
304367
304588
  async ({
304368
304589
  action,
@@ -304377,7 +304598,9 @@ For generic event grouping (deployments, configs), use events tool instead.`,
304377
304598
  from,
304378
304599
  to,
304379
304600
  contextTags,
304380
- maxEvents
304601
+ maxEvents,
304602
+ transitionType,
304603
+ group
304381
304604
  }) => {
304382
304605
  try {
304383
304606
  checkReadOnly(action, readOnly);
@@ -304432,6 +304655,22 @@ For generic event grouping (deployments, configs), use events tool instead.`,
304432
304655
  site
304433
304656
  )
304434
304657
  );
304658
+ case "history": {
304659
+ const monitorIdString = requireParam(id, "id", "history");
304660
+ const monitorId = Number.parseInt(monitorIdString, 10);
304661
+ if (Number.isNaN(monitorId)) {
304662
+ throw new Error(`Invalid monitor ID: ${monitorIdString}`);
304663
+ }
304664
+ return toolResult(
304665
+ await historyMonitor(
304666
+ eventsApi,
304667
+ monitorId,
304668
+ { from, to, transitionType, group },
304669
+ limits,
304670
+ site
304671
+ )
304672
+ );
304673
+ }
304435
304674
  default:
304436
304675
  throw new Error(`Unknown action: ${action}`);
304437
304676
  }