datadog-mcp 3.0.0 → 4.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -252,23 +252,57 @@ The `diverse` mode normalizes messages (strips UUIDs, timestamps, IPs, numbers)
252
252
 
253
253
  ## Events Aggregation
254
254
 
255
- Find the noisiest monitors with a single query:
255
+ ### Top Monitors Report (Best for Weekly/Daily Meteo)
256
+
257
+ Get top alerting monitors with automatic context breakdown by queue, service, ingress, pod, etc:
256
258
 
257
259
  ```
258
260
  events({ action: "top", from: "7d", limit: 10 })
259
261
  ```
260
262
 
261
- Returns:
263
+ Returns nested structure perfect for reports:
262
264
  ```json
263
265
  {
264
266
  "top": [
265
- { "rank": 1, "name": "Error budget (SLI)", "alertCount": 44, "lastAlert": "..." },
266
- { "rank": 2, "name": "High number of ready messages", "alertCount": 38, "lastAlert": "..." }
267
+ {
268
+ "rank": 1,
269
+ "name": "High number of ready messages",
270
+ "monitor_id": 67860480,
271
+ "total_count": 50,
272
+ "by_context": [
273
+ {"context": "queue:state-status_tasks", "count": 30},
274
+ {"context": "queue:updated_order_service", "count": 20}
275
+ ]
276
+ },
277
+ {
278
+ "rank": 2,
279
+ "name": "Nginx 5XX errors",
280
+ "monitor_id": 134611486,
281
+ "total_count": 42,
282
+ "by_context": [
283
+ {"context": "ingress:trusk-api", "count": 29},
284
+ {"context": "ingress:backoffice", "count": 13}
285
+ ]
286
+ }
267
287
  ]
268
288
  }
269
289
  ```
270
290
 
271
- For more control, use `aggregate` with custom groupBy:
291
+ Context tags are auto-extracted: `queue:`, `service:`, `ingress:`, `pod_name:`, `kube_namespace:`, `kube_container_name:`
292
+
293
+ ### Tag Discovery
294
+
295
+ Discover available tag prefixes in your alert data:
296
+
297
+ ```
298
+ events({ action: "discover", from: "7d", tags: ["source:alert"] })
299
+ ```
300
+
301
+ Returns: `{tagPrefixes: ["queue", "service", "ingress", "pod_name", "monitor", "priority"], sampleSize: 150}`
302
+
303
+ ### Custom Aggregation
304
+
305
+ For custom grouping patterns, use `aggregate`:
272
306
 
273
307
  ```
274
308
  events({
@@ -279,7 +313,7 @@ events({
279
313
  })
280
314
  ```
281
315
 
282
- Supported groupBy fields: `monitor_name`, `priority`, `alert_type`, `source`, `status`, `host`
316
+ Supported groupBy fields: `monitor_name`, `priority`, `alert_type`, `source`, `status`, `host`, or any tag prefix
283
317
 
284
318
  The aggregation uses v2 API with cursor pagination to stream through events efficiently (up to 10k events).
285
319
 
package/dist/index.js CHANGED
@@ -1761,7 +1761,8 @@ var ActionSchema6 = z7.enum([
1761
1761
  "aggregate",
1762
1762
  "top",
1763
1763
  "timeseries",
1764
- "incidents"
1764
+ "incidents",
1765
+ "discover"
1765
1766
  ]);
1766
1767
  var InputSchema6 = {
1767
1768
  action: ActionSchema6.describe("Action to perform"),
@@ -1783,7 +1784,14 @@ var InputSchema6 = {
1783
1784
  // Phase 2: Incidents deduplication
1784
1785
  dedupeWindow: z7.string().optional().describe("Deduplication window for incidents: 5m, 15m, 1h (default: 5m)"),
1785
1786
  // Phase 3: Monitor enrichment
1786
- enrich: z7.boolean().optional().describe("Enrich events with monitor metadata (slower, adds monitor details)")
1787
+ enrich: z7.boolean().optional().describe("Enrich events with monitor metadata (slower, adds monitor details)"),
1788
+ // Context tag extraction for top action
1789
+ contextTags: z7.array(z7.string()).optional().describe(
1790
+ "Tag prefixes for context breakdown in top action (default: queue, service, ingress, pod_name, kube_namespace, kube_container_name)"
1791
+ ),
1792
+ maxEvents: z7.number().min(1).max(5e3).optional().describe(
1793
+ "Maximum events to fetch for grouping in top action (default: 5000, max: 5000). Higher = more accurate but slower"
1794
+ )
1787
1795
  };
1788
1796
  function extractMonitorInfo(title) {
1789
1797
  const priorityMatch = title.match(/^\[P(\d+)\]\s*/);
@@ -1906,6 +1914,48 @@ function formatEventV2(e) {
1906
1914
  } : void 0
1907
1915
  };
1908
1916
  }
1917
+ function findFirstContextTag(tags, prefixes) {
1918
+ for (const tag of tags) {
1919
+ const colonIndex = tag.indexOf(":");
1920
+ if (colonIndex > 0) {
1921
+ const prefix = tag.substring(0, colonIndex);
1922
+ if (prefixes.has(prefix)) {
1923
+ return tag;
1924
+ }
1925
+ }
1926
+ }
1927
+ return null;
1928
+ }
1929
+ async function discoverTagsV2(api, params, limits, site) {
1930
+ const result = await searchEventsV2(
1931
+ api,
1932
+ {
1933
+ ...params,
1934
+ limit: 200
1935
+ },
1936
+ limits,
1937
+ site
1938
+ );
1939
+ const prefixSet = /* @__PURE__ */ new Set();
1940
+ for (const event of result.events) {
1941
+ for (const tag of event.tags) {
1942
+ if (tag.includes(":")) {
1943
+ const prefix = tag.split(":")[0];
1944
+ if (prefix) {
1945
+ prefixSet.add(prefix);
1946
+ }
1947
+ }
1948
+ }
1949
+ }
1950
+ return {
1951
+ tagPrefixes: Array.from(prefixSet).sort((a, b) => a.localeCompare(b)),
1952
+ sampleSize: result.events.length,
1953
+ meta: {
1954
+ from: result.meta.from,
1955
+ to: result.meta.to
1956
+ }
1957
+ };
1958
+ }
1909
1959
  async function listEventsV1(api, params, limits) {
1910
1960
  const effectiveLimit = params.limit ?? limits.defaultLimit;
1911
1961
  const defaultFrom = hoursAgo(limits.defaultTimeRangeHours);
@@ -2095,34 +2145,79 @@ async function aggregateEventsV2(api, params, limits, site) {
2095
2145
  };
2096
2146
  }
2097
2147
  async function topEventsV2(api, params, limits, site) {
2148
+ if (params.contextTags !== void 0) {
2149
+ if (!Array.isArray(params.contextTags)) {
2150
+ throw new Error("contextTags must be an array");
2151
+ }
2152
+ if (params.contextTags.some((tag) => typeof tag !== "string" || tag.trim() === "")) {
2153
+ throw new Error("contextTags must be an array of non-empty strings");
2154
+ }
2155
+ }
2098
2156
  const effectiveQuery = params.query ?? "source:alert";
2099
2157
  const effectiveTags = params.tags ?? ["source:alert"];
2100
- const result = await aggregateEventsV2(
2158
+ const result = await searchEventsV2(
2101
2159
  api,
2102
2160
  {
2103
- ...params,
2104
2161
  query: effectiveQuery,
2162
+ from: params.from,
2163
+ to: params.to,
2164
+ sources: params.sources,
2105
2165
  tags: effectiveTags,
2106
- groupBy: params.groupBy ?? ["monitor_name"],
2107
- limit: params.limit ?? 10
2166
+ limit: params.maxEvents ?? 5e3
2108
2167
  },
2109
2168
  limits,
2110
2169
  site
2111
2170
  );
2112
- return {
2113
- top: result.buckets.map((bucket, index) => ({
2114
- rank: index + 1,
2115
- name: bucket.key,
2116
- monitorId: bucket.sample.monitorId,
2117
- alertCount: bucket.count,
2118
- lastAlert: bucket.sample.timestamp,
2119
- sample: {
2120
- title: bucket.sample.title,
2121
- source: bucket.sample.source,
2122
- alertType: bucket.sample.alertType
2171
+ const monitorGroups = /* @__PURE__ */ new Map();
2172
+ for (const event of result.events) {
2173
+ const monitorName = event.monitorInfo?.name ?? event.title;
2174
+ const monitorId = event.monitorId ?? 0;
2175
+ const key = `${monitorId}|${monitorName}`;
2176
+ let monitorGroup = monitorGroups.get(key);
2177
+ if (!monitorGroup) {
2178
+ monitorGroup = { name: monitorName, monitorId, events: [] };
2179
+ monitorGroups.set(key, monitorGroup);
2180
+ }
2181
+ monitorGroup.events.push(event);
2182
+ }
2183
+ const contextPrefixes = new Set(
2184
+ params.contextTags ?? [
2185
+ "queue",
2186
+ "service",
2187
+ "ingress",
2188
+ "pod_name",
2189
+ "kube_namespace",
2190
+ "kube_container_name"
2191
+ ]
2192
+ );
2193
+ const monitors = Array.from(monitorGroups.values()).map((monitor) => {
2194
+ const contextGroups = /* @__PURE__ */ new Map();
2195
+ for (const event of monitor.events) {
2196
+ const contextTag = findFirstContextTag(event.tags, contextPrefixes);
2197
+ if (contextTag) {
2198
+ contextGroups.set(contextTag, (contextGroups.get(contextTag) || 0) + 1);
2123
2199
  }
2124
- })),
2125
- meta: result.meta
2200
+ }
2201
+ return {
2202
+ name: monitor.name,
2203
+ monitor_id: monitor.monitorId,
2204
+ total_count: monitor.events.length,
2205
+ by_context: Array.from(contextGroups.entries()).map(([context, count]) => ({ context, count })).sort((a, b) => b.count - a.count)
2206
+ // Sort by count desc
2207
+ };
2208
+ }).filter((monitor) => monitor.by_context.length > 0);
2209
+ const topMonitors = monitors.sort((a, b) => b.total_count - a.total_count).slice(0, params.limit ?? 10).map((m, i) => ({ rank: i + 1, ...m }));
2210
+ return {
2211
+ top: topMonitors,
2212
+ meta: {
2213
+ query: effectiveQuery,
2214
+ from: result.meta.from,
2215
+ to: result.meta.to,
2216
+ totalMonitors: monitorGroups.size,
2217
+ totalEvents: result.events.length,
2218
+ contextPrefixes,
2219
+ datadog_url: result.meta.datadog_url
2220
+ }
2126
2221
  };
2127
2222
  }
2128
2223
  function parseIntervalToMs(interval) {
@@ -2411,16 +2506,15 @@ async function enrichWithMonitorMetadata(events, monitorsApi) {
2411
2506
  function registerEventsTool(server, apiV1, apiV2, monitorsApi, limits, readOnly = false, site = "datadoghq.com") {
2412
2507
  server.tool(
2413
2508
  "events",
2414
- `Track Datadog events. Actions: list, get, create, search, aggregate, top, timeseries, incidents.
2415
- IMPORTANT: For monitor alert history, use tags: ["source:alert"] to find all triggered monitors.
2416
- Filters: query (text search), sources, tags, priority, time range.
2417
- Use for: monitor alerts, deployments, incidents, change tracking.
2509
+ `Track Datadog events. Actions: list, get, create, search, aggregate, top, timeseries, incidents, discover.
2510
+ For monitor alerts, use tags: ["source:alert"].
2418
2511
 
2419
- Use action:"top" with from:"7d" to find the noisiest monitors.
2420
- Use action:"aggregate" with groupBy:["monitor_name"] for alert counts per monitor.
2421
- Use action:"timeseries" with interval:"1h" to see alert trends over time.
2422
- Use action:"incidents" with dedupeWindow:"5m" to deduplicate alerts into incidents.
2423
- Use enrich:true with search to get monitor metadata (slower).`,
2512
+ top: Returns monitors with context breakdown. Example: {name, monitor_id, total_count, by_context: [{context: "queue:X", count: 30}]}
2513
+ discover: Returns available tag prefixes from events.
2514
+ aggregate: Custom groupBy, returns pipe-delimited keys.
2515
+ search: Full event details.
2516
+ timeseries: Time-bucketed trends with interval.
2517
+ incidents: Deduplicate alerts with dedupeWindow.`,
2424
2518
  InputSchema6,
2425
2519
  async ({
2426
2520
  action,
@@ -2439,7 +2533,8 @@ Use enrich:true with search to get monitor metadata (slower).`,
2439
2533
  cursor,
2440
2534
  interval,
2441
2535
  dedupeWindow,
2442
- enrich
2536
+ enrich,
2537
+ contextTags
2443
2538
  }) => {
2444
2539
  try {
2445
2540
  checkReadOnly(action, readOnly);
@@ -2526,8 +2621,23 @@ Use enrich:true with search to get monitor metadata (slower).`,
2526
2621
  to,
2527
2622
  sources,
2528
2623
  tags,
2529
- groupBy,
2530
- limit
2624
+ limit,
2625
+ contextTags
2626
+ },
2627
+ limits,
2628
+ site
2629
+ )
2630
+ );
2631
+ case "discover":
2632
+ return toolResult(
2633
+ await discoverTagsV2(
2634
+ apiV2,
2635
+ {
2636
+ query,
2637
+ from,
2638
+ to,
2639
+ sources,
2640
+ tags
2531
2641
  },
2532
2642
  limits,
2533
2643
  site