holmesgpt 0.14.0a0__py3-none-any.whl → 0.14.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of holmesgpt might be problematic. Click here for more details.

Files changed (82) hide show
  1. holmes/__init__.py +1 -1
  2. holmes/clients/robusta_client.py +15 -4
  3. holmes/common/env_vars.py +8 -1
  4. holmes/config.py +66 -139
  5. holmes/core/investigation.py +1 -2
  6. holmes/core/llm.py +295 -52
  7. holmes/core/models.py +2 -0
  8. holmes/core/safeguards.py +4 -4
  9. holmes/core/supabase_dal.py +14 -8
  10. holmes/core/tool_calling_llm.py +110 -102
  11. holmes/core/tools.py +260 -25
  12. holmes/core/tools_utils/data_types.py +81 -0
  13. holmes/core/tools_utils/tool_context_window_limiter.py +33 -0
  14. holmes/core/tools_utils/tool_executor.py +2 -2
  15. holmes/core/toolset_manager.py +150 -3
  16. holmes/core/transformers/__init__.py +23 -0
  17. holmes/core/transformers/base.py +62 -0
  18. holmes/core/transformers/llm_summarize.py +174 -0
  19. holmes/core/transformers/registry.py +122 -0
  20. holmes/core/transformers/transformer.py +31 -0
  21. holmes/main.py +5 -0
  22. holmes/plugins/prompts/_fetch_logs.jinja2 +10 -1
  23. holmes/plugins/toolsets/aks-node-health.yaml +46 -0
  24. holmes/plugins/toolsets/aks.yaml +64 -0
  25. holmes/plugins/toolsets/atlas_mongodb/mongodb_atlas.py +17 -15
  26. holmes/plugins/toolsets/azure_sql/tools/analyze_connection_failures.py +8 -4
  27. holmes/plugins/toolsets/azure_sql/tools/analyze_database_connections.py +7 -3
  28. holmes/plugins/toolsets/azure_sql/tools/analyze_database_health_status.py +3 -3
  29. holmes/plugins/toolsets/azure_sql/tools/analyze_database_performance.py +3 -3
  30. holmes/plugins/toolsets/azure_sql/tools/analyze_database_storage.py +7 -3
  31. holmes/plugins/toolsets/azure_sql/tools/get_active_alerts.py +4 -4
  32. holmes/plugins/toolsets/azure_sql/tools/get_slow_queries.py +7 -3
  33. holmes/plugins/toolsets/azure_sql/tools/get_top_cpu_queries.py +7 -3
  34. holmes/plugins/toolsets/azure_sql/tools/get_top_data_io_queries.py +7 -3
  35. holmes/plugins/toolsets/azure_sql/tools/get_top_log_io_queries.py +7 -3
  36. holmes/plugins/toolsets/bash/bash_toolset.py +6 -6
  37. holmes/plugins/toolsets/bash/common/bash.py +7 -7
  38. holmes/plugins/toolsets/coralogix/toolset_coralogix_logs.py +5 -3
  39. holmes/plugins/toolsets/datadog/datadog_api.py +490 -24
  40. holmes/plugins/toolsets/datadog/datadog_logs_instructions.jinja2 +21 -10
  41. holmes/plugins/toolsets/datadog/toolset_datadog_general.py +344 -205
  42. holmes/plugins/toolsets/datadog/toolset_datadog_logs.py +189 -17
  43. holmes/plugins/toolsets/datadog/toolset_datadog_metrics.py +95 -30
  44. holmes/plugins/toolsets/datadog/toolset_datadog_rds.py +10 -10
  45. holmes/plugins/toolsets/datadog/toolset_datadog_traces.py +20 -20
  46. holmes/plugins/toolsets/git.py +21 -21
  47. holmes/plugins/toolsets/grafana/common.py +2 -2
  48. holmes/plugins/toolsets/grafana/toolset_grafana.py +4 -4
  49. holmes/plugins/toolsets/grafana/toolset_grafana_loki.py +5 -4
  50. holmes/plugins/toolsets/grafana/toolset_grafana_tempo.jinja2 +123 -23
  51. holmes/plugins/toolsets/grafana/toolset_grafana_tempo.py +165 -307
  52. holmes/plugins/toolsets/internet/internet.py +3 -3
  53. holmes/plugins/toolsets/internet/notion.py +3 -3
  54. holmes/plugins/toolsets/investigator/core_investigation.py +3 -3
  55. holmes/plugins/toolsets/kafka.py +18 -18
  56. holmes/plugins/toolsets/kubernetes.yaml +58 -0
  57. holmes/plugins/toolsets/kubernetes_logs.py +6 -6
  58. holmes/plugins/toolsets/kubernetes_logs.yaml +32 -0
  59. holmes/plugins/toolsets/logging_utils/logging_api.py +1 -1
  60. holmes/plugins/toolsets/mcp/toolset_mcp.py +4 -4
  61. holmes/plugins/toolsets/newrelic.py +5 -5
  62. holmes/plugins/toolsets/opensearch/opensearch.py +5 -5
  63. holmes/plugins/toolsets/opensearch/opensearch_logs.py +7 -7
  64. holmes/plugins/toolsets/opensearch/opensearch_traces.py +10 -10
  65. holmes/plugins/toolsets/prometheus/prometheus.py +841 -351
  66. holmes/plugins/toolsets/prometheus/prometheus_instructions.jinja2 +39 -2
  67. holmes/plugins/toolsets/prometheus/utils.py +28 -0
  68. holmes/plugins/toolsets/rabbitmq/toolset_rabbitmq.py +6 -4
  69. holmes/plugins/toolsets/robusta/robusta.py +10 -10
  70. holmes/plugins/toolsets/runbook/runbook_fetcher.py +4 -4
  71. holmes/plugins/toolsets/servicenow/servicenow.py +6 -6
  72. holmes/plugins/toolsets/utils.py +88 -0
  73. holmes/utils/config_utils.py +91 -0
  74. holmes/utils/env.py +7 -0
  75. holmes/utils/holmes_status.py +2 -1
  76. holmes/utils/sentry_helper.py +41 -0
  77. holmes/utils/stream.py +9 -0
  78. {holmesgpt-0.14.0a0.dist-info → holmesgpt-0.14.1.dist-info}/METADATA +10 -14
  79. {holmesgpt-0.14.0a0.dist-info → holmesgpt-0.14.1.dist-info}/RECORD +82 -72
  80. {holmesgpt-0.14.0a0.dist-info → holmesgpt-0.14.1.dist-info}/LICENSE.txt +0 -0
  81. {holmesgpt-0.14.0a0.dist-info → holmesgpt-0.14.1.dist-info}/WHEEL +0 -0
  82. {holmesgpt-0.14.0a0.dist-info → holmesgpt-0.14.1.dist-info}/entry_points.txt +0 -0
@@ -1,5 +1,5 @@
1
1
  import subprocess
2
- from holmes.core.tools import StructuredToolResult, ToolResultStatus
2
+ from holmes.core.tools import StructuredToolResult, StructuredToolResultStatus
3
3
 
4
4
 
5
5
  def execute_bash_command(cmd: str, timeout: int, params: dict) -> StructuredToolResult:
@@ -18,11 +18,11 @@ def execute_bash_command(cmd: str, timeout: int, params: dict) -> StructuredTool
18
18
  stdout = process.stdout.strip() if process.stdout else ""
19
19
  result_data = f"{cmd}\n" f"{stdout}"
20
20
 
21
- status = ToolResultStatus.ERROR
21
+ status = StructuredToolResultStatus.ERROR
22
22
  if process.returncode == 0 and stdout:
23
- status = ToolResultStatus.SUCCESS
23
+ status = StructuredToolResultStatus.SUCCESS
24
24
  elif not stdout:
25
- status = ToolResultStatus.NO_DATA
25
+ status = StructuredToolResultStatus.NO_DATA
26
26
 
27
27
  return StructuredToolResult(
28
28
  status=status,
@@ -33,20 +33,20 @@ def execute_bash_command(cmd: str, timeout: int, params: dict) -> StructuredTool
33
33
  )
34
34
  except subprocess.TimeoutExpired:
35
35
  return StructuredToolResult(
36
- status=ToolResultStatus.ERROR,
36
+ status=StructuredToolResultStatus.ERROR,
37
37
  error=f"Error: Command '{cmd}' timed out after {timeout} seconds.",
38
38
  params=params,
39
39
  )
40
40
  except FileNotFoundError:
41
41
  # This might occur if /bin/bash is not found, or if shell=False and command is not found
42
42
  return StructuredToolResult(
43
- status=ToolResultStatus.ERROR,
43
+ status=StructuredToolResultStatus.ERROR,
44
44
  error="Error: Bash executable or command not found. Ensure bash is installed and the command is valid.",
45
45
  params=params,
46
46
  )
47
47
  except Exception as e:
48
48
  return StructuredToolResult(
49
- status=ToolResultStatus.ERROR,
49
+ status=StructuredToolResultStatus.ERROR,
50
50
  error=f"Error executing command '{cmd}': {str(e)}",
51
51
  params=params,
52
52
  )
@@ -3,7 +3,7 @@ from typing import Any, Optional, Tuple, Set
3
3
  from holmes.core.tools import (
4
4
  CallablePrerequisite,
5
5
  StructuredToolResult,
6
- ToolResultStatus,
6
+ StructuredToolResultStatus,
7
7
  ToolsetTag,
8
8
  )
9
9
  from holmes.plugins.toolsets.consts import (
@@ -74,7 +74,7 @@ class CoralogixLogsToolset(BasePodLoggingToolset):
74
74
  def fetch_pod_logs(self, params: FetchPodLogsParams) -> StructuredToolResult:
75
75
  if not self.coralogix_config:
76
76
  return StructuredToolResult(
77
- status=ToolResultStatus.ERROR,
77
+ status=StructuredToolResultStatus.ERROR,
78
78
  error=f"The {self.name} toolset is not configured",
79
79
  params=params.model_dump(),
80
80
  )
@@ -102,7 +102,9 @@ class CoralogixLogsToolset(BasePodLoggingToolset):
102
102
 
103
103
  return StructuredToolResult(
104
104
  status=(
105
- ToolResultStatus.ERROR if logs_data.error else ToolResultStatus.SUCCESS
105
+ StructuredToolResultStatus.ERROR
106
+ if logs_data.error
107
+ else StructuredToolResultStatus.SUCCESS
106
108
  ),
107
109
  error=logs_data.error,
108
110
  data=data,
@@ -1,6 +1,9 @@
1
1
  import json
2
2
  import logging
3
- from typing import Any, Optional, Dict, Union
3
+ import re
4
+ from datetime import datetime, timedelta, timezone
5
+ from typing import Any, Optional, Dict, Union, Tuple
6
+ from urllib.parse import urlparse, urlunparse
4
7
  import requests # type: ignore
5
8
  from pydantic import AnyUrl, BaseModel
6
9
  from requests.structures import CaseInsensitiveDict # type: ignore
@@ -16,6 +19,75 @@ MAX_RETRY_COUNT_ON_RATE_LIMIT = 5
16
19
 
17
20
  RATE_LIMIT_REMAINING_SECONDS_HEADER = "X-RateLimit-Reset"
18
21
 
22
+ # Cache for OpenAPI spec
23
+ _openapi_spec_cache: Dict[str, Any] = {}
24
+
25
+ # Relative time pattern (m = minutes, mo = months)
26
+ RELATIVE_TIME_PATTERN = re.compile(r"^-?(\d+)([hdwsy]|min|m|mo)$|^now$", re.IGNORECASE)
27
+
28
+
29
+ def convert_api_url_to_app_url(api_url: Union[str, AnyUrl]) -> str:
30
+ """
31
+ Convert a Datadog API URL to the corresponding web app URL.
32
+
33
+ Handles various URL formats:
34
+ - https://api.datadoghq.com -> https://app.datadoghq.com
35
+ - https://api.datadoghq.eu -> https://app.datadoghq.eu
36
+ - https://api.us5.datadoghq.com -> https://app.us5.datadoghq.com
37
+ - Also handles URLs with paths like https://api.datadoghq.com/api/v1
38
+
39
+ Args:
40
+ api_url: The API URL to convert
41
+
42
+ Returns:
43
+ The web app URL without trailing slash
44
+ """
45
+ url_str = str(api_url)
46
+ parsed = urlparse(url_str)
47
+
48
+ # Replace 'api.' subdomain with 'app.' in the hostname
49
+ # This handles cases like:
50
+ # - api.datadoghq.com -> app.datadoghq.com
51
+ # - api.datadoghq.eu -> app.datadoghq.eu
52
+ # - api.us5.datadoghq.com -> app.us5.datadoghq.com
53
+ if parsed.hostname and parsed.hostname.startswith("api."):
54
+ new_hostname = "app." + parsed.hostname[4:]
55
+ # Reconstruct the netloc with the new hostname
56
+ if parsed.port:
57
+ new_netloc = f"{new_hostname}:{parsed.port}"
58
+ else:
59
+ new_netloc = new_hostname
60
+ else:
61
+ # If it doesn't start with 'api.', keep the hostname as is
62
+ # This handles edge cases where the URL might not follow the pattern
63
+ new_netloc = parsed.netloc
64
+
65
+ # Remove any /api path segments if present
66
+ # Some configurations might include /api/v1 or similar in the base URL
67
+ new_path = parsed.path
68
+ if new_path.startswith("/api/"):
69
+ new_path = new_path[4:] # Remove '/api' prefix
70
+ elif new_path == "/api":
71
+ new_path = "/"
72
+
73
+ # Reconstruct the URL with the app subdomain
74
+ app_url = urlunparse(
75
+ (
76
+ parsed.scheme,
77
+ new_netloc,
78
+ new_path,
79
+ "", # params
80
+ "", # query
81
+ "", # fragment
82
+ )
83
+ )
84
+
85
+ # Remove trailing slash
86
+ if app_url.endswith("/"):
87
+ app_url = app_url[:-1]
88
+
89
+ return app_url
90
+
19
91
 
20
92
  class DatadogBaseConfig(BaseModel):
21
93
  """Base configuration for all Datadog toolsets"""
@@ -166,15 +238,9 @@ def execute_datadog_http_request(
166
238
  timeout: int,
167
239
  method: str = "POST",
168
240
  ) -> Any:
169
- # Log the request details
170
- logging.info("Datadog API Request:")
171
- logging.info(f" Method: {method}")
172
- logging.info(f" URL: {url}")
173
- logging.info(f" Headers: {json.dumps(sanitize_headers(headers), indent=2)}")
174
- logging.info(
175
- f" {'Params' if method == 'GET' else 'Payload'}: {json.dumps(payload_or_params, indent=2)}"
241
+ logging.debug(
242
+ f"Datadog API Request: Method: {method} URL: {url} Headers: {json.dumps(sanitize_headers(headers), indent=2)} {'Params' if method == 'GET' else 'Payload'}: {json.dumps(payload_or_params, indent=2)} Timeout: {timeout}s"
176
243
  )
177
- logging.info(f" Timeout: {timeout}s")
178
244
 
179
245
  if method == "GET":
180
246
  response = requests.get(
@@ -186,24 +252,12 @@ def execute_datadog_http_request(
186
252
  )
187
253
 
188
254
  # Log the response details
189
- logging.info("Datadog API Response:")
190
- logging.info(f" Status Code: {response.status_code}")
191
- logging.info(f" Response Headers: {dict(sanitize_headers(response.headers))}")
255
+ logging.debug(
256
+ f"Datadog API Response: Status Code: {response.status_code} Response Headers: {dict(sanitize_headers(response.headers))}"
257
+ )
192
258
 
193
259
  if response.status_code == 200:
194
260
  response_data = response.json()
195
- # Log response size but not full content (could be large)
196
- if isinstance(response_data, dict):
197
- logging.info(f" Response Keys: {list(response_data.keys())}")
198
- if "data" in response_data:
199
- data_len = (
200
- len(response_data["data"])
201
- if isinstance(response_data["data"], list)
202
- else 1
203
- )
204
- logging.info(f" Data Items Count: {data_len}")
205
- else:
206
- logging.info(f" Response Type: {type(response_data).__name__}")
207
261
  return response_data
208
262
 
209
263
  else:
@@ -214,3 +268,415 @@ def execute_datadog_http_request(
214
268
  response_text=response.text,
215
269
  response_headers=response.headers,
216
270
  )
271
+
272
+
273
+ def fetch_openapi_spec(
274
+ site_api_url: Optional[str] = None, version: str = "both"
275
+ ) -> Optional[Dict[str, Any]]:
276
+ """Fetch and cache the Datadog OpenAPI specification.
277
+
278
+ Args:
279
+ site_api_url: Base URL for Datadog API (not used, kept for compatibility)
280
+ version: Which version to fetch ('v1', 'v2', or 'both')
281
+
282
+ Returns:
283
+ OpenAPI spec as dictionary (combined if 'both'), or None if fetch fails
284
+ """
285
+ global _openapi_spec_cache
286
+
287
+ # Use version as cache key
288
+ cache_key = f"openapi_{version}"
289
+
290
+ # Check cache first
291
+ if cache_key in _openapi_spec_cache:
292
+ return _openapi_spec_cache[cache_key]
293
+
294
+ try:
295
+ import yaml
296
+
297
+ # GitHub raw URLs for Datadog's official OpenAPI specs
298
+ spec_urls = {
299
+ "v1": "https://raw.githubusercontent.com/DataDog/datadog-api-client-python/master/.generator/schemas/v1/openapi.yaml",
300
+ "v2": "https://raw.githubusercontent.com/DataDog/datadog-api-client-python/master/.generator/schemas/v2/openapi.yaml",
301
+ }
302
+
303
+ combined_spec: Dict[str, Any] = {
304
+ "openapi": "3.0.0",
305
+ "paths": {},
306
+ "components": {},
307
+ }
308
+
309
+ versions_to_fetch = []
310
+ if version == "both":
311
+ versions_to_fetch = ["v1", "v2"]
312
+ elif version in spec_urls:
313
+ versions_to_fetch = [version]
314
+ else:
315
+ logging.error(f"Invalid version: {version}")
316
+ return None
317
+
318
+ for ver in versions_to_fetch:
319
+ try:
320
+ logging.debug(f"Fetching Datadog OpenAPI spec for {ver}...")
321
+ response = requests.get(spec_urls[ver], timeout=30)
322
+ if response.status_code == 200:
323
+ # Parse YAML to dict
324
+ spec = yaml.safe_load(response.text)
325
+
326
+ if version == "both":
327
+ # Merge specs
328
+ if "paths" in spec:
329
+ # Prefix v1 paths with /api/v1 and v2 with /api/v2
330
+ for path, methods in spec.get("paths", {}).items():
331
+ prefixed_path = (
332
+ f"/api/{ver}{path}"
333
+ if not path.startswith("/api/")
334
+ else path
335
+ )
336
+ paths_dict = combined_spec.get("paths", {})
337
+ if isinstance(paths_dict, dict):
338
+ paths_dict[prefixed_path] = methods
339
+
340
+ # Merge components
341
+ if "components" in spec:
342
+ for comp_type, components in spec.get(
343
+ "components", {}
344
+ ).items():
345
+ components_dict = combined_spec.get("components", {})
346
+ if isinstance(components_dict, dict):
347
+ if comp_type not in components_dict:
348
+ components_dict[comp_type] = {}
349
+ components_dict[comp_type].update(components)
350
+ else:
351
+ combined_spec = spec
352
+
353
+ logging.info(f"Successfully fetched OpenAPI spec for {ver}")
354
+ else:
355
+ logging.warning(
356
+ f"Failed to fetch spec for {ver}: HTTP {response.status_code}"
357
+ )
358
+ except Exception as e:
359
+ logging.error(f"Failed to fetch spec for {ver}: {e}")
360
+ if version != "both":
361
+ return None
362
+
363
+ if combined_spec["paths"]:
364
+ _openapi_spec_cache[cache_key] = combined_spec
365
+ logging.info(
366
+ f"Cached OpenAPI spec with {len(combined_spec['paths'])} endpoints"
367
+ )
368
+ return combined_spec
369
+ else:
370
+ logging.warning("No endpoints found in OpenAPI spec")
371
+ return None
372
+
373
+ except Exception as e:
374
+ logging.error(f"Error fetching OpenAPI spec: {e}")
375
+ return None
376
+
377
+
378
+ def get_endpoint_requirements(
379
+ spec: Dict[str, Any], endpoint: str, method: str
380
+ ) -> Optional[Dict[str, Any]]:
381
+ """Extract parameter requirements for a specific endpoint from OpenAPI spec.
382
+
383
+ Args:
384
+ spec: OpenAPI specification
385
+ endpoint: API endpoint path
386
+ method: HTTP method
387
+
388
+ Returns:
389
+ Dictionary with parameter requirements, or None if not found
390
+ """
391
+ if not spec or "paths" not in spec:
392
+ return None
393
+
394
+ # Normalize endpoint path
395
+ endpoint = endpoint.strip("/")
396
+ if not endpoint.startswith("/"):
397
+ endpoint = "/" + endpoint
398
+
399
+ # Find the endpoint in the spec
400
+ paths = spec.get("paths", {})
401
+ if endpoint not in paths:
402
+ # Try to find a matching pattern (e.g., /api/v2/logs/events/search)
403
+ for path_pattern in paths.keys():
404
+ if (
405
+ path_pattern == endpoint
406
+ or path_pattern.replace("{", "").replace("}", "") in endpoint
407
+ ):
408
+ endpoint = path_pattern
409
+ break
410
+ else:
411
+ return None
412
+
413
+ # Get method requirements
414
+ endpoint_spec = paths.get(endpoint, {})
415
+ method_spec = endpoint_spec.get(method.lower(), {})
416
+
417
+ if not method_spec:
418
+ return None
419
+
420
+ requirements = {
421
+ "description": method_spec.get("description", ""),
422
+ "parameters": [],
423
+ "requestBody": None,
424
+ }
425
+
426
+ # Extract parameters
427
+ for param in method_spec.get("parameters", []):
428
+ param_info = {
429
+ "name": param.get("name"),
430
+ "in": param.get("in"), # query, path, header
431
+ "required": param.get("required", False),
432
+ "description": param.get("description", ""),
433
+ "schema": param.get("schema", {}),
434
+ }
435
+ requirements["parameters"].append(param_info)
436
+
437
+ # Extract request body schema
438
+ if "requestBody" in method_spec:
439
+ body = method_spec["requestBody"]
440
+ content = body.get("content", {})
441
+ json_content = content.get("application/json", {})
442
+ requirements["requestBody"] = {
443
+ "required": body.get("required", False),
444
+ "schema": json_content.get("schema", {}),
445
+ "description": body.get("description", ""),
446
+ }
447
+
448
+ return requirements
449
+
450
+
451
+ def convert_relative_time(time_str: str) -> Tuple[str, str]:
452
+ """Convert relative time strings to RFC3339 format.
453
+
454
+ Args:
455
+ time_str: Time string (e.g., '-24h', 'now', '-7d', '2024-01-01T00:00:00Z')
456
+
457
+ Returns:
458
+ Tuple of (converted_time, format_type) where format_type is 'relative', 'rfc3339', or 'unix'
459
+ """
460
+ # Check if already in RFC3339 format
461
+ try:
462
+ # Try parsing as RFC3339
463
+ if "T" in time_str and (
464
+ time_str.endswith("Z") or "+" in time_str or "-" in time_str[-6:]
465
+ ):
466
+ datetime.fromisoformat(time_str.replace("Z", "+00:00"))
467
+ return time_str, "rfc3339"
468
+ except (ValueError, AttributeError):
469
+ pass
470
+
471
+ # Check if Unix timestamp
472
+ try:
473
+ timestamp = float(time_str)
474
+ if 1000000000 < timestamp < 2000000000: # Reasonable Unix timestamp range
475
+ return time_str, "unix"
476
+ except (ValueError, TypeError):
477
+ pass
478
+
479
+ # Check for relative time
480
+ match = RELATIVE_TIME_PATTERN.match(time_str.strip())
481
+ if not match:
482
+ # Return as-is if not recognized
483
+ return time_str, "unknown"
484
+
485
+ now = datetime.now(timezone.utc)
486
+
487
+ if time_str.lower() == "now":
488
+ return now.isoformat().replace("+00:00", "Z"), "relative"
489
+
490
+ # Parse relative time
491
+ groups = match.groups()
492
+ if groups[0] is None:
493
+ return time_str, "unknown"
494
+
495
+ amount = int(groups[0])
496
+ unit = groups[1].lower()
497
+
498
+ # Convert to timedelta
499
+ if unit == "s":
500
+ delta = timedelta(seconds=amount)
501
+ elif unit == "min":
502
+ delta = timedelta(minutes=amount)
503
+ elif unit == "m":
504
+ delta = timedelta(minutes=amount) # m = minutes
505
+ elif unit == "h":
506
+ delta = timedelta(hours=amount)
507
+ elif unit == "d":
508
+ delta = timedelta(days=amount)
509
+ elif unit == "w":
510
+ delta = timedelta(weeks=amount)
511
+ elif unit == "mo":
512
+ delta = timedelta(days=amount * 30) # mo = months (approximate)
513
+ elif unit == "y":
514
+ delta = timedelta(days=amount * 365) # Approximate
515
+ else:
516
+ return time_str, "unknown"
517
+
518
+ # Apply delta (subtract if negative relative time)
519
+ if time_str.startswith("-"):
520
+ result_time = now - delta
521
+ else:
522
+ result_time = now + delta
523
+
524
+ return result_time.isoformat().replace("+00:00", "Z"), "relative"
525
+
526
+
527
+ def preprocess_time_fields(payload: Dict[str, Any], endpoint: str) -> Dict[str, Any]:
528
+ """Preprocess time fields in payload, converting relative times to appropriate format.
529
+
530
+ Args:
531
+ payload: Request payload
532
+ endpoint: API endpoint
533
+
534
+ Returns:
535
+ Modified payload with converted time fields
536
+ """
537
+ # Deep copy to avoid modifying original
538
+ import copy
539
+
540
+ processed = copy.deepcopy(payload)
541
+
542
+ # Common time field paths to check
543
+ time_fields = [
544
+ ["filter", "from"],
545
+ ["filter", "to"],
546
+ ["from"],
547
+ ["to"],
548
+ ["start"],
549
+ ["end"],
550
+ ["start_time"],
551
+ ["end_time"],
552
+ ]
553
+
554
+ def get_nested(d, path):
555
+ """Get nested dictionary value."""
556
+ for key in path:
557
+ if isinstance(d, dict) and key in d:
558
+ d = d[key]
559
+ else:
560
+ return None
561
+ return d
562
+
563
+ def set_nested(d, path, value):
564
+ """Set nested dictionary value."""
565
+ for key in path[:-1]:
566
+ if key not in d:
567
+ d[key] = {}
568
+ d = d[key]
569
+ d[path[-1]] = value
570
+
571
+ conversions = []
572
+
573
+ for field_path in time_fields:
574
+ value = get_nested(processed, field_path)
575
+ if value and isinstance(value, str):
576
+ converted, format_type = convert_relative_time(value)
577
+ if format_type == "relative":
578
+ set_nested(processed, field_path, converted)
579
+ conversions.append(
580
+ f"{'.'.join(field_path)}: '{value}' -> '{converted}'"
581
+ )
582
+
583
+ if conversions:
584
+ logging.info(f"Converted relative time fields: {', '.join(conversions)}")
585
+
586
+ return processed
587
+
588
+
589
+ def enhance_error_message(
590
+ error: DataDogRequestError, endpoint: str, method: str, site_api_url: str
591
+ ) -> str:
592
+ """Enhance error message with OpenAPI spec details and format examples.
593
+
594
+ Args:
595
+ error: Original DataDog request error
596
+ endpoint: API endpoint
597
+ method: HTTP method
598
+ site_api_url: Base API URL
599
+
600
+ Returns:
601
+ Enhanced error message
602
+ """
603
+ base_msg = f"HTTP error: {error.status_code} - {error.response_text}"
604
+
605
+ # For 400 errors, try to provide more context
606
+ if error.status_code == 400:
607
+ enhanced_parts = [base_msg]
608
+
609
+ # Try to parse error details
610
+ try:
611
+ error_body = json.loads(error.response_text)
612
+ if "errors" in error_body:
613
+ enhanced_parts.append(f"\nErrors: {error_body['errors']}")
614
+
615
+ # Check for specific field validation errors
616
+ for err in error_body.get("errors", []):
617
+ if "input_validation_error" in str(err):
618
+ enhanced_parts.append("\n⚠️ Input validation error detected.")
619
+
620
+ # Add time format help
621
+ if any(
622
+ field in str(err).lower()
623
+ for field in ["from", "to", "time", "date"]
624
+ ):
625
+ enhanced_parts.append(
626
+ "\nTime format requirements:\n"
627
+ " - v1 API: Unix timestamps (e.g., 1704067200)\n"
628
+ " - v2 API: RFC3339 format (e.g., '2024-01-01T00:00:00Z')\n"
629
+ " - NOT supported: Relative times like '-24h', 'now', '-7d'"
630
+ )
631
+ except (json.JSONDecodeError, TypeError):
632
+ pass
633
+
634
+ # Try to fetch OpenAPI spec for more details
635
+ spec = fetch_openapi_spec(version="both")
636
+ if spec:
637
+ requirements = get_endpoint_requirements(spec, endpoint, method)
638
+ if requirements:
639
+ enhanced_parts.append(f"\nEndpoint: {method} {endpoint}")
640
+ if requirements["description"]:
641
+ enhanced_parts.append(f"Description: {requirements['description']}")
642
+
643
+ # Add parameter requirements
644
+ if requirements["parameters"]:
645
+ enhanced_parts.append("\nRequired parameters:")
646
+ for param in requirements["parameters"]:
647
+ if param["required"]:
648
+ enhanced_parts.append(
649
+ f" - {param['name']} ({param['in']}): {param['description']}"
650
+ )
651
+
652
+ # Add request body schema hints
653
+ if (
654
+ requirements["requestBody"]
655
+ and requirements["requestBody"]["required"]
656
+ ):
657
+ enhanced_parts.append("\nRequest body is required")
658
+ if requirements["requestBody"]["description"]:
659
+ enhanced_parts.append(
660
+ f"Body: {requirements['requestBody']['description']}"
661
+ )
662
+
663
+ # Add example for common endpoints
664
+ if "/logs/events/search" in endpoint:
665
+ enhanced_parts.append(
666
+ "\nExample request body for logs search:\n"
667
+ "```json\n"
668
+ "{\n"
669
+ ' "filter": {\n'
670
+ ' "from": "2024-01-01T00:00:00Z",\n'
671
+ ' "to": "2024-01-02T00:00:00Z",\n'
672
+ ' "query": "*"\n'
673
+ " },\n"
674
+ ' "sort": "-timestamp",\n'
675
+ ' "page": {"limit": 50}\n'
676
+ "}\n"
677
+ "```"
678
+ )
679
+
680
+ return "\n".join(enhanced_parts)
681
+
682
+ return base_msg
@@ -14,21 +14,32 @@ Before running logs queries:
14
14
 
15
15
  ### CRITICAL: Pod Name Resolution Workflow
16
16
 
17
- **When user provides an exact pod name** (e.g., `my-workload-5f9d8b7c4d-x2km9`):
18
- - FIRST query Datadog directly with that pod name using appropriate tags
17
+ **IMPORTANT WILDCARD USAGE:**
18
+ - **ALWAYS use wildcards** when searching for pods unless you have the COMPLETE pod name with all suffixes
19
+ - Kubernetes pod names include deployment hash + replica ID (e.g., `nginx-ingress-7b9899-x2km9`, `frontend-5f4d3b2a1-abc123`)
20
+ - When user says "nginx pod" or "frontend pod", search for `nginx-*` or `frontend-*` NOT just `nginx` or `frontend`
21
+ - Datadog supports wildcards: `*` matches any characters (e.g., `nginx-*`, `*ingress*`, `*-x2km9`)
22
+ - For partial matches, use wildcards on both sides: `*keyword*` to find logs from any pod containing "keyword"
23
+
24
+ **When user provides what looks like a complete pod name** (e.g., `my-workload-5f9d8b7c4d-x2km9`):
25
+ - Query Datadog directly with that exact pod name
19
26
  - Do NOT try to verify if the pod exists in Kubernetes first
20
27
  - This allows querying historical pods that have been deleted/replaced
21
28
 
22
- **When user provides a generic workload name** (e.g., "my-workload", "nginx", "telemetry-processor"):
23
- - First use `kubectl_find_resource` to find actual pod names
24
- - Example: `kubectl_find_resource` with "my-workload" finds pods like "my-workload-8f8cdfxyz-c7zdr"
25
- - Then use those specific pod names in Datadog queries
26
- - Alternative: Use deployment-level tags when appropriate
29
+ **When user provides a simple/generic name** (e.g., "nginx", "redis", "payment-service", "auth"):
30
+ - **DEFAULT ACTION: Use wildcards** - Query with `pod-name-*` pattern
31
+ - For historical queries (yesterday, last week): ALWAYS use wildcards directly in Datadog
32
+ - For current issues: Optionally use `kubectl_find_resource` to find exact pod names, but wildcards often work better
33
+ - Examples:
34
+ - User says "nginx pod" → Query Datadog with `nginx-*`
35
+ - User says "redis instance" → Query Datadog with `redis-*`
36
+ - User says "payment service" → Query Datadog with `payment-*`
27
37
 
28
- **Why this matters:**
38
+ **Why wildcards are critical:**
29
39
  - Pod names in Datadog are the actual Kubernetes pod names (with random suffixes)
30
- - Historical pods that no longer exist in the cluster can still have logs in Datadog
31
- - Deployment/service names alone are NOT pod names (they need the suffix)
40
+ - Users typically refer to pods by their deployment/service name without suffixes
41
+ - Without wildcards, queries for "nginx" will find NOTHING when actual pods are named "nginx-7b9899-x2km9"
42
+ - Historical pods that no longer exist can only be found via Datadog with proper wildcard usage
32
43
 
33
44
  ### Time Parameters
34
45
  - Use RFC3339 format: `2023-03-01T10:30:00Z`