awslabs.cloudwatch-mcp-server 0.0.11__py3-none-any.whl → 0.0.14__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- awslabs/cloudwatch_mcp_server/__init__.py +2 -1
- awslabs/cloudwatch_mcp_server/cloudwatch_alarms/models.py +1 -1
- awslabs/cloudwatch_mcp_server/cloudwatch_alarms/tools.py +2 -2
- awslabs/cloudwatch_mcp_server/cloudwatch_metrics/cloudformation_template_generator.py +162 -0
- awslabs/cloudwatch_mcp_server/cloudwatch_metrics/constants.py +30 -0
- awslabs/cloudwatch_mcp_server/cloudwatch_metrics/metric_analyzer.py +192 -0
- awslabs/cloudwatch_mcp_server/cloudwatch_metrics/metric_data_decomposer.py +218 -0
- awslabs/cloudwatch_mcp_server/cloudwatch_metrics/models.py +129 -3
- awslabs/cloudwatch_mcp_server/cloudwatch_metrics/tools.py +377 -33
- {awslabs_cloudwatch_mcp_server-0.0.11.dist-info → awslabs_cloudwatch_mcp_server-0.0.14.dist-info}/METADATA +7 -3
- awslabs_cloudwatch_mcp_server-0.0.14.dist-info/RECORD +21 -0
- {awslabs_cloudwatch_mcp_server-0.0.11.dist-info → awslabs_cloudwatch_mcp_server-0.0.14.dist-info}/WHEEL +1 -1
- awslabs_cloudwatch_mcp_server-0.0.11.dist-info/RECORD +0 -17
- {awslabs_cloudwatch_mcp_server-0.0.11.dist-info → awslabs_cloudwatch_mcp_server-0.0.14.dist-info}/entry_points.txt +0 -0
- {awslabs_cloudwatch_mcp_server-0.0.11.dist-info → awslabs_cloudwatch_mcp_server-0.0.14.dist-info}/licenses/LICENSE +0 -0
- {awslabs_cloudwatch_mcp_server-0.0.11.dist-info → awslabs_cloudwatch_mcp_server-0.0.14.dist-info}/licenses/NOTICE +0 -0
|
@@ -14,10 +14,57 @@
|
|
|
14
14
|
|
|
15
15
|
"""Data models for CloudWatch Metrics MCP tools."""
|
|
16
16
|
|
|
17
|
+
from awslabs.cloudwatch_mcp_server.cloudwatch_metrics.constants import (
|
|
18
|
+
DAYS_PER_WEEK,
|
|
19
|
+
HOURS_PER_DAY,
|
|
20
|
+
MINUTES_PER_HOUR,
|
|
21
|
+
SECONDS_PER_MINUTE,
|
|
22
|
+
)
|
|
17
23
|
from datetime import datetime
|
|
18
24
|
from enum import Enum
|
|
19
|
-
from pydantic import BaseModel, Field
|
|
20
|
-
from typing import Any, Dict, List
|
|
25
|
+
from pydantic import BaseModel, Field, field_validator, model_serializer, model_validator
|
|
26
|
+
from typing import Any, ClassVar, Dict, List, Optional, Union
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class Trend(str, Enum):
|
|
30
|
+
"""Trend direction based on statistical significance."""
|
|
31
|
+
|
|
32
|
+
POSITIVE = 'positive'
|
|
33
|
+
NEGATIVE = 'negative'
|
|
34
|
+
NONE = 'none'
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
# Seasonality rounding threshold constant
|
|
38
|
+
SEASONALITY_ROUNDING_THRESHOLD = 0.1
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
class Seasonality(Enum):
|
|
42
|
+
"""Seasonality detection results with period in seconds."""
|
|
43
|
+
|
|
44
|
+
NONE = 0
|
|
45
|
+
FIFTEEN_MINUTES = 15 * SECONDS_PER_MINUTE
|
|
46
|
+
ONE_HOUR = MINUTES_PER_HOUR * SECONDS_PER_MINUTE
|
|
47
|
+
SIX_HOURS = 6 * ONE_HOUR
|
|
48
|
+
ONE_DAY = HOURS_PER_DAY * ONE_HOUR
|
|
49
|
+
ONE_WEEK = DAYS_PER_WEEK * ONE_DAY
|
|
50
|
+
|
|
51
|
+
@classmethod
|
|
52
|
+
def from_seconds(cls, seconds: Union[float, int]) -> 'Seasonality':
|
|
53
|
+
"""Convert seconds to closest seasonality enum."""
|
|
54
|
+
seconds = int(seconds)
|
|
55
|
+
closest = min(cls, key=lambda x: abs(x.value - seconds))
|
|
56
|
+
return (
|
|
57
|
+
closest
|
|
58
|
+
if abs(closest.value - seconds) < closest.value * SEASONALITY_ROUNDING_THRESHOLD
|
|
59
|
+
else cls.NONE
|
|
60
|
+
)
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
class DecompositionResult(BaseModel):
|
|
64
|
+
"""Result of metric data decomposition into seasonal and trend components."""
|
|
65
|
+
|
|
66
|
+
seasonality: Seasonality
|
|
67
|
+
trend: Trend
|
|
21
68
|
|
|
22
69
|
|
|
23
70
|
class SortOrder(str, Enum):
|
|
@@ -107,8 +154,32 @@ class MetricMetadata(BaseModel):
|
|
|
107
154
|
class AlarmRecommendationThreshold(BaseModel):
|
|
108
155
|
"""Represents an alarm threshold configuration."""
|
|
109
156
|
|
|
157
|
+
justification: str = Field(default='', description='Justification for the threshold value')
|
|
158
|
+
|
|
159
|
+
|
|
160
|
+
class StaticAlarmThreshold(AlarmRecommendationThreshold):
|
|
161
|
+
"""Represents an alarm static threshold configuration."""
|
|
162
|
+
|
|
110
163
|
staticValue: float = Field(..., description='The static threshold value')
|
|
111
|
-
|
|
164
|
+
|
|
165
|
+
|
|
166
|
+
class AnomalyDetectionAlarmThreshold(AlarmRecommendationThreshold):
|
|
167
|
+
"""Represents an anomaly detection alarm threshold configuration."""
|
|
168
|
+
|
|
169
|
+
DEFAULT_SENSITIVITY: ClassVar[float] = 2.0
|
|
170
|
+
|
|
171
|
+
sensitivity: float = Field(
|
|
172
|
+
default=DEFAULT_SENSITIVITY, description='The sensitivity of the Anomaly Detection bands.'
|
|
173
|
+
)
|
|
174
|
+
|
|
175
|
+
@field_validator('sensitivity')
|
|
176
|
+
@classmethod
|
|
177
|
+
def validate_sensitivity(cls, v):
|
|
178
|
+
"""Validate sensitivity is within acceptable range."""
|
|
179
|
+
# Extreme sensitivity values result in reduced Anomaly Detection performance
|
|
180
|
+
if not 0 < v <= 100:
|
|
181
|
+
raise ValueError('Sensitivity must be above 0 and less than or equal to 100')
|
|
182
|
+
return v
|
|
112
183
|
|
|
113
184
|
|
|
114
185
|
class AlarmRecommendationDimension(BaseModel):
|
|
@@ -148,3 +219,58 @@ class AlarmRecommendation(BaseModel):
|
|
|
148
219
|
default_factory=list, description='List of dimensions for the alarm'
|
|
149
220
|
)
|
|
150
221
|
intent: str = Field(..., description='The intent or purpose of the alarm')
|
|
222
|
+
cloudformation_template: Optional[Dict[str, Any]] = Field(
|
|
223
|
+
default=None,
|
|
224
|
+
description='CloudFormation template (only for anomaly detection alarms)',
|
|
225
|
+
)
|
|
226
|
+
|
|
227
|
+
@model_serializer
|
|
228
|
+
def serialize_model(self):
|
|
229
|
+
"""Serialize alarm recommendation to dict format."""
|
|
230
|
+
data = {
|
|
231
|
+
'alarmDescription': self.alarmDescription,
|
|
232
|
+
'threshold': self.threshold,
|
|
233
|
+
'period': self.period,
|
|
234
|
+
'comparisonOperator': self.comparisonOperator,
|
|
235
|
+
'statistic': self.statistic,
|
|
236
|
+
'evaluationPeriods': self.evaluationPeriods,
|
|
237
|
+
'datapointsToAlarm': self.datapointsToAlarm,
|
|
238
|
+
'treatMissingData': self.treatMissingData,
|
|
239
|
+
'dimensions': self.dimensions,
|
|
240
|
+
'intent': self.intent,
|
|
241
|
+
}
|
|
242
|
+
if self.cloudformation_template is not None:
|
|
243
|
+
data['cloudformation_template'] = self.cloudformation_template
|
|
244
|
+
return data
|
|
245
|
+
|
|
246
|
+
|
|
247
|
+
class MetricData(BaseModel):
|
|
248
|
+
"""Represents CloudWatch Metric (time series) data."""
|
|
249
|
+
|
|
250
|
+
period_seconds: int = Field(
|
|
251
|
+
..., description='The aggregation period in seconds of the requested metric data'
|
|
252
|
+
)
|
|
253
|
+
timestamps: List[int] = Field(default_factory=list, description='List of metric timestamps')
|
|
254
|
+
values: List[float] = Field(default_factory=list, description='List of metric values')
|
|
255
|
+
|
|
256
|
+
@model_validator(mode='after')
|
|
257
|
+
def validate_metric_data(self):
|
|
258
|
+
"""Validate MetricData after initialization."""
|
|
259
|
+
if len(self.timestamps) != len(self.values):
|
|
260
|
+
raise ValueError('Timestamps and values must have the same length')
|
|
261
|
+
if self.period_seconds <= 0:
|
|
262
|
+
raise ValueError('Timeseries must have a period >= 0')
|
|
263
|
+
return self
|
|
264
|
+
|
|
265
|
+
|
|
266
|
+
class AlarmRecommendationResult(BaseModel):
|
|
267
|
+
"""Result wrapper for alarm recommendations with a success/failure message to guide the calling LLM."""
|
|
268
|
+
|
|
269
|
+
recommendations: List[AlarmRecommendation] = Field(
|
|
270
|
+
default_factory=list,
|
|
271
|
+
description='A list of alarm recommendations that match the provided dimensions.',
|
|
272
|
+
)
|
|
273
|
+
message: str = Field(
|
|
274
|
+
...,
|
|
275
|
+
description='Message describing the success/failure of generating alarm recommendation.',
|
|
276
|
+
)
|
|
@@ -18,19 +18,32 @@ import boto3
|
|
|
18
18
|
import json
|
|
19
19
|
import os
|
|
20
20
|
from awslabs.cloudwatch_mcp_server import MCP_SERVER_VERSION
|
|
21
|
+
from awslabs.cloudwatch_mcp_server.cloudwatch_metrics.cloudformation_template_generator import (
|
|
22
|
+
CloudFormationTemplateGenerator,
|
|
23
|
+
)
|
|
24
|
+
from awslabs.cloudwatch_mcp_server.cloudwatch_metrics.constants import (
|
|
25
|
+
COMPARISON_OPERATOR_ANOMALY,
|
|
26
|
+
DEFAULT_ANALYSIS_PERIOD_MINUTES,
|
|
27
|
+
)
|
|
28
|
+
from awslabs.cloudwatch_mcp_server.cloudwatch_metrics.metric_analyzer import MetricAnalyzer
|
|
29
|
+
from awslabs.cloudwatch_mcp_server.cloudwatch_metrics.metric_data_decomposer import Seasonality
|
|
21
30
|
from awslabs.cloudwatch_mcp_server.cloudwatch_metrics.models import (
|
|
22
31
|
AlarmRecommendation,
|
|
23
32
|
AlarmRecommendationDimension,
|
|
33
|
+
AlarmRecommendationResult,
|
|
24
34
|
AlarmRecommendationThreshold,
|
|
35
|
+
AnomalyDetectionAlarmThreshold,
|
|
25
36
|
Dimension,
|
|
26
37
|
GetMetricDataResponse,
|
|
38
|
+
MetricData,
|
|
27
39
|
MetricDataPoint,
|
|
28
40
|
MetricDataResult,
|
|
29
41
|
MetricMetadata,
|
|
30
42
|
MetricMetadataIndexKey,
|
|
43
|
+
StaticAlarmThreshold,
|
|
31
44
|
)
|
|
32
45
|
from botocore.config import Config
|
|
33
|
-
from datetime import datetime
|
|
46
|
+
from datetime import datetime, timedelta, timezone
|
|
34
47
|
from loguru import logger
|
|
35
48
|
from mcp.server.fastmcp import Context
|
|
36
49
|
from pathlib import Path
|
|
@@ -48,6 +61,8 @@ class CloudWatchMetricsTools:
|
|
|
48
61
|
self._load_and_index_metadata()
|
|
49
62
|
)
|
|
50
63
|
logger.info(f'Loaded {len(self.metric_metadata_index)} metric metadata entries')
|
|
64
|
+
self.cloudformation_generator = CloudFormationTemplateGenerator()
|
|
65
|
+
self.metric_analyzer = MetricAnalyzer()
|
|
51
66
|
|
|
52
67
|
def _get_cloudwatch_client(self, region: str):
|
|
53
68
|
"""Create a CloudWatch client for the specified region."""
|
|
@@ -136,6 +151,9 @@ class CloudWatchMetricsTools:
|
|
|
136
151
|
# Register get_metric_metadata tool
|
|
137
152
|
mcp.tool(name='get_metric_metadata')(self.get_metric_metadata)
|
|
138
153
|
|
|
154
|
+
# Register analyze_metric tool
|
|
155
|
+
mcp.tool(name='analyze_metric')(self.analyze_metric)
|
|
156
|
+
|
|
139
157
|
# Register get_recommended_metric_alarms tool
|
|
140
158
|
mcp.tool(name='get_recommended_metric_alarms')(self.get_recommended_metric_alarms)
|
|
141
159
|
|
|
@@ -144,7 +162,10 @@ class CloudWatchMetricsTools:
|
|
|
144
162
|
ctx: Context,
|
|
145
163
|
namespace: str,
|
|
146
164
|
metric_name: str,
|
|
147
|
-
start_time:
|
|
165
|
+
start_time: Annotated[
|
|
166
|
+
Union[str, datetime],
|
|
167
|
+
Field(description='The start time for the metric data query (ISO format or datetime)'),
|
|
168
|
+
],
|
|
148
169
|
dimensions: List[Dimension] = [],
|
|
149
170
|
end_time: Annotated[
|
|
150
171
|
Union[str, datetime] | None,
|
|
@@ -372,10 +393,17 @@ class CloudWatchMetricsTools:
|
|
|
372
393
|
start_time = datetime.fromisoformat(start_time.replace('Z', '+00:00'))
|
|
373
394
|
|
|
374
395
|
if end_time is None:
|
|
375
|
-
end_time = datetime.
|
|
396
|
+
end_time = datetime.now(timezone.utc)
|
|
376
397
|
elif isinstance(end_time, str):
|
|
377
398
|
end_time = datetime.fromisoformat(end_time.replace('Z', '+00:00'))
|
|
378
399
|
|
|
400
|
+
# Ensure both datetimes have timezone info for correct datetime arithmetic afterwards.
|
|
401
|
+
# This avoids issues when datetime is passed as naive values (without timezone)
|
|
402
|
+
if start_time.tzinfo is None:
|
|
403
|
+
start_time = start_time.replace(tzinfo=timezone.utc)
|
|
404
|
+
if end_time.tzinfo is None:
|
|
405
|
+
end_time = end_time.replace(tzinfo=timezone.utc)
|
|
406
|
+
|
|
379
407
|
# Calculate period based on time window and target datapoints
|
|
380
408
|
time_window_seconds = int((end_time - start_time).total_seconds())
|
|
381
409
|
calculated_period = max(60, int(time_window_seconds / target_datapoints))
|
|
@@ -669,7 +697,22 @@ class CloudWatchMetricsTools:
|
|
|
669
697
|
description='AWS region for consistency. Note: This function uses local metadata and does not make AWS API calls. Defaults to us-east-1.'
|
|
670
698
|
),
|
|
671
699
|
] = 'us-east-1',
|
|
672
|
-
|
|
700
|
+
statistic: Annotated[
|
|
701
|
+
Literal[
|
|
702
|
+
'AVG',
|
|
703
|
+
'COUNT',
|
|
704
|
+
'MAX',
|
|
705
|
+
'MIN',
|
|
706
|
+
'SUM',
|
|
707
|
+
'Average',
|
|
708
|
+
'Sum',
|
|
709
|
+
'Maximum',
|
|
710
|
+
'Minimum',
|
|
711
|
+
'SampleCount',
|
|
712
|
+
],
|
|
713
|
+
Field(description='The statistic to use for alarm recommendations'),
|
|
714
|
+
] = 'AVG',
|
|
715
|
+
) -> AlarmRecommendationResult:
|
|
673
716
|
"""Gets recommended alarms for a CloudWatch metric.
|
|
674
717
|
|
|
675
718
|
This tool retrieves alarm recommendations for a specific CloudWatch metric
|
|
@@ -685,11 +728,19 @@ class CloudWatchMetricsTools:
|
|
|
685
728
|
metric_name: The name of the metric (e.g., "CPUUtilization", "Duration")
|
|
686
729
|
dimensions: List of dimensions with name and value pairs
|
|
687
730
|
region: AWS region to query. Defaults to 'us-east-1'.
|
|
731
|
+
statistic: The statistic to use for alarm recommendations. Must match the metric's data type:
|
|
732
|
+
- Aggregate count metrics (RequestCount, Errors, Faults, Throttles, CacheHits, Connections, EventsProcessed): Use 'Sum'
|
|
733
|
+
- Event occurrence metrics (Invocations, CacheMisses): Use 'SampleCount'
|
|
734
|
+
- Utilization metrics (CPUUtilization, MemoryUtilization, DiskUtilization, NetworkUtilization): Use 'Average'
|
|
735
|
+
- Latency/Time metrics (Duration, Latency, ResponseTime, ProcessingTime, Delay, ExecutionTime, WaitTime): Use 'Average'
|
|
736
|
+
- Size metrics (PayloadSize, MessageSize, RequestSize, BodySize): Use 'Average'
|
|
737
|
+
If uncertain about the correct statistic for a custom metric, ask the user
|
|
738
|
+
to confirm the metric type before generating recommendations. Using the wrong statistic
|
|
739
|
+
(e.g., 'Average' on Invocations) will produce ineffective alarm thresholds
|
|
688
740
|
|
|
689
741
|
Returns:
|
|
690
|
-
|
|
691
|
-
|
|
692
|
-
are found or available.
|
|
742
|
+
AlarmRecommendationResult: A result containing alarm recommendations and optional message.
|
|
743
|
+
Empty recommendations list if no recommendations are found.
|
|
693
744
|
|
|
694
745
|
Example:
|
|
695
746
|
recommendations = await get_recommended_metric_alarms(
|
|
@@ -714,12 +765,12 @@ class CloudWatchMetricsTools:
|
|
|
714
765
|
|
|
715
766
|
if not metadata or 'alarmRecommendations' not in metadata:
|
|
716
767
|
logger.info(f'No alarm recommendations found for {namespace}/{metric_name}')
|
|
717
|
-
|
|
718
|
-
|
|
719
|
-
|
|
720
|
-
|
|
721
|
-
|
|
722
|
-
|
|
768
|
+
alarm_recommendations = []
|
|
769
|
+
else:
|
|
770
|
+
alarm_recommendations = metadata['alarmRecommendations']
|
|
771
|
+
logger.info(
|
|
772
|
+
f'Found {len(alarm_recommendations)} alarm recommendations for {namespace}/{metric_name}'
|
|
773
|
+
)
|
|
723
774
|
|
|
724
775
|
# Filter recommendations based on provided dimensions
|
|
725
776
|
matching_recommendations = []
|
|
@@ -735,11 +786,58 @@ class CloudWatchMetricsTools:
|
|
|
735
786
|
logger.warning(f'Error parsing alarm recommendation: {e}')
|
|
736
787
|
continue
|
|
737
788
|
|
|
738
|
-
|
|
739
|
-
|
|
789
|
+
if len(matching_recommendations) > 0:
|
|
790
|
+
logger.info(
|
|
791
|
+
f'Found {len(matching_recommendations)} matching alarm recommendations'
|
|
792
|
+
)
|
|
793
|
+
return AlarmRecommendationResult(
|
|
794
|
+
recommendations=matching_recommendations,
|
|
795
|
+
message=f'Found {len(matching_recommendations)} matching alarm recommendations',
|
|
796
|
+
)
|
|
797
|
+
|
|
798
|
+
# Generate additional recommendations based on metric analysis
|
|
799
|
+
additional_recommendations = []
|
|
800
|
+
logger.info('No predefined recommendations found - performing metric analysis')
|
|
801
|
+
analysis_result = await self.analyze_metric(
|
|
802
|
+
ctx,
|
|
803
|
+
namespace,
|
|
804
|
+
metric_name,
|
|
805
|
+
dimensions,
|
|
806
|
+
region,
|
|
807
|
+
statistic,
|
|
740
808
|
)
|
|
741
|
-
return matching_recommendations
|
|
742
809
|
|
|
810
|
+
# Generate additional recommendations based on seasonality
|
|
811
|
+
seasonality_value = analysis_result.get('seasonality_seconds', 0)
|
|
812
|
+
seasonality = Seasonality.from_seconds(seasonality_value)
|
|
813
|
+
|
|
814
|
+
if seasonality != Seasonality.NONE:
|
|
815
|
+
anomaly_detector_data = self._create_anomaly_detector_data(
|
|
816
|
+
metric_name=metric_name,
|
|
817
|
+
namespace=namespace,
|
|
818
|
+
dimensions=dimensions,
|
|
819
|
+
seasonality=seasonality,
|
|
820
|
+
)
|
|
821
|
+
alarm_rec = self._parse_alarm_recommendation(anomaly_detector_data)
|
|
822
|
+
additional_recommendations.append(alarm_rec)
|
|
823
|
+
logger.info(
|
|
824
|
+
f'Recommended anomaly detection alarm due to seasonality: {seasonality.name}'
|
|
825
|
+
)
|
|
826
|
+
|
|
827
|
+
if len(additional_recommendations) > 0:
|
|
828
|
+
message = f'Generated {len(additional_recommendations)} alarm recommendation(s) for {namespace}/{metric_name} based on metric analysis'
|
|
829
|
+
logger.info(message)
|
|
830
|
+
return AlarmRecommendationResult(
|
|
831
|
+
recommendations=additional_recommendations,
|
|
832
|
+
message=message,
|
|
833
|
+
)
|
|
834
|
+
|
|
835
|
+
message = f'No alarm recommendations available for {namespace}/{metric_name} with the provided dimensions'
|
|
836
|
+
logger.info(message)
|
|
837
|
+
return AlarmRecommendationResult(
|
|
838
|
+
recommendations=[],
|
|
839
|
+
message=message,
|
|
840
|
+
)
|
|
743
841
|
except Exception as e:
|
|
744
842
|
logger.error(f'Error in get_recommended_metric_alarms: {str(e)}')
|
|
745
843
|
await ctx.error(f'Error getting alarm recommendations: {str(e)}')
|
|
@@ -781,6 +879,30 @@ class CloudWatchMetricsTools:
|
|
|
781
879
|
|
|
782
880
|
return True
|
|
783
881
|
|
|
882
|
+
def _create_alarm_threshold(
|
|
883
|
+
self, threshold_data: Dict[str, Any]
|
|
884
|
+
) -> AlarmRecommendationThreshold:
|
|
885
|
+
"""Create threshold object from threshold data.
|
|
886
|
+
|
|
887
|
+
Args:
|
|
888
|
+
threshold_data: Raw alarm threshold data
|
|
889
|
+
|
|
890
|
+
Returns:
|
|
891
|
+
AlarmRecommendationThreshold: Appropriate threshold object based on threshold type.
|
|
892
|
+
"""
|
|
893
|
+
if 'sensitivity' in threshold_data:
|
|
894
|
+
return AnomalyDetectionAlarmThreshold(
|
|
895
|
+
sensitivity=threshold_data.get(
|
|
896
|
+
'sensitivity', AnomalyDetectionAlarmThreshold.DEFAULT_SENSITIVITY
|
|
897
|
+
),
|
|
898
|
+
justification=threshold_data.get('justification', ''),
|
|
899
|
+
)
|
|
900
|
+
|
|
901
|
+
return StaticAlarmThreshold(
|
|
902
|
+
staticValue=threshold_data.get('staticValue', 0.0),
|
|
903
|
+
justification=threshold_data.get('justification', ''),
|
|
904
|
+
)
|
|
905
|
+
|
|
784
906
|
def _parse_alarm_recommendation(self, alarm_data: Dict[str, Any]) -> AlarmRecommendation:
|
|
785
907
|
"""Parse alarm recommendation data into AlarmRecommendation object.
|
|
786
908
|
|
|
@@ -792,12 +914,105 @@ class CloudWatchMetricsTools:
|
|
|
792
914
|
"""
|
|
793
915
|
# Parse threshold
|
|
794
916
|
threshold_data = alarm_data.get('threshold', {})
|
|
795
|
-
threshold =
|
|
796
|
-
|
|
797
|
-
|
|
917
|
+
threshold = self._create_alarm_threshold(threshold_data)
|
|
918
|
+
|
|
919
|
+
# Generate CloudFormation template only for anomaly detection alarms
|
|
920
|
+
cfn_template = self.cloudformation_generator.generate_metric_alarm_template(alarm_data)
|
|
921
|
+
|
|
922
|
+
# Build alarm recommendation kwargs
|
|
923
|
+
alarm_kwargs = {
|
|
924
|
+
'alarmDescription': alarm_data.get('alarmDescription', ''),
|
|
925
|
+
'metricName': alarm_data.get('metricName', ''),
|
|
926
|
+
'namespace': alarm_data.get('namespace', ''),
|
|
927
|
+
'threshold': threshold,
|
|
928
|
+
'period': alarm_data.get('period', 300),
|
|
929
|
+
'comparisonOperator': alarm_data.get('comparisonOperator', ''),
|
|
930
|
+
'statistic': alarm_data.get('statistic', ''),
|
|
931
|
+
'evaluationPeriods': alarm_data.get('evaluationPeriods', 1),
|
|
932
|
+
'datapointsToAlarm': alarm_data.get('datapointsToAlarm', 1),
|
|
933
|
+
'treatMissingData': alarm_data.get('treatMissingData', 'missing'),
|
|
934
|
+
'dimensions': self._parse_metric_dimensions(alarm_data),
|
|
935
|
+
'intent': alarm_data.get('intent', ''),
|
|
936
|
+
}
|
|
937
|
+
|
|
938
|
+
# Only include cloudformation_template if it was successfully generated
|
|
939
|
+
if cfn_template:
|
|
940
|
+
alarm_kwargs['cloudformation_template'] = cfn_template
|
|
941
|
+
|
|
942
|
+
return AlarmRecommendation(**alarm_kwargs)
|
|
943
|
+
|
|
944
|
+
def _create_anomaly_detector_data(
|
|
945
|
+
self,
|
|
946
|
+
metric_name: str,
|
|
947
|
+
namespace: str,
|
|
948
|
+
dimensions: List[Dimension],
|
|
949
|
+
seasonality: Seasonality,
|
|
950
|
+
) -> Dict[str, Any]:
|
|
951
|
+
"""Format Anomaly Detector data for use in alarm creation.
|
|
952
|
+
|
|
953
|
+
Args:
|
|
954
|
+
metric_name: The metric name
|
|
955
|
+
namespace: The metric namespace
|
|
956
|
+
dimensions: List of metric dimensions
|
|
957
|
+
seasonality: Detected seasonality
|
|
958
|
+
|
|
959
|
+
Returns:
|
|
960
|
+
Dict[str, Any]: Anomaly detector formatted data
|
|
961
|
+
"""
|
|
962
|
+
# Create alarm data structure for _parse_alarm_recommendation
|
|
963
|
+
return {
|
|
964
|
+
'alarmDescription': f'Anomaly detection alarm for {namespace}/{metric_name} (seasonality {seasonality.name})',
|
|
965
|
+
'statistic': 'Average',
|
|
966
|
+
'dimensions': [{'Name': dim.name, 'Value': dim.value} for dim in dimensions],
|
|
967
|
+
'threshold': {
|
|
968
|
+
'sensitivity': AnomalyDetectionAlarmThreshold.DEFAULT_SENSITIVITY,
|
|
969
|
+
'justification': f'Metric has a seasonality of {seasonality.name} making it suitable for Anomaly Detection.',
|
|
970
|
+
},
|
|
971
|
+
'comparisonOperator': COMPARISON_OPERATOR_ANOMALY,
|
|
972
|
+
'evaluationPeriods': 2,
|
|
973
|
+
'datapointsToAlarm': 2,
|
|
974
|
+
'period': 300,
|
|
975
|
+
'treatMissingData': 'missing',
|
|
976
|
+
'intent': f'Detect anomalies in {metric_name} based on {seasonality.name} seasonal length',
|
|
977
|
+
'metricName': metric_name,
|
|
978
|
+
'namespace': namespace,
|
|
979
|
+
}
|
|
980
|
+
|
|
981
|
+
def _create_anomaly_detector_recommendation(
|
|
982
|
+
self,
|
|
983
|
+
metric_name: str,
|
|
984
|
+
namespace: str,
|
|
985
|
+
dimensions: List[Dimension],
|
|
986
|
+
seasonality: Seasonality,
|
|
987
|
+
) -> AlarmRecommendation:
|
|
988
|
+
"""Create an anomaly detector recommendation.
|
|
989
|
+
|
|
990
|
+
Args:
|
|
991
|
+
metric_name: The metric name
|
|
992
|
+
namespace: The metric namespace
|
|
993
|
+
dimensions: List of metric dimensions
|
|
994
|
+
seasonality: Detected seasonality
|
|
995
|
+
|
|
996
|
+
Returns:
|
|
997
|
+
AlarmRecommendation: Anomaly detector alarm recommendation
|
|
998
|
+
"""
|
|
999
|
+
alarm_data = self._create_anomaly_detector_data(
|
|
1000
|
+
metric_name=metric_name,
|
|
1001
|
+
namespace=namespace,
|
|
1002
|
+
dimensions=dimensions,
|
|
1003
|
+
seasonality=seasonality,
|
|
798
1004
|
)
|
|
1005
|
+
return self._parse_alarm_recommendation(alarm_data)
|
|
1006
|
+
|
|
1007
|
+
def _parse_metric_dimensions(self, alarm_data: Dict[str, Any]) -> List[str]:
|
|
1008
|
+
"""Parse metric dimensions from the alarm data.
|
|
799
1009
|
|
|
800
|
-
|
|
1010
|
+
Args:
|
|
1011
|
+
alarm_data: Raw alarm recommendation data
|
|
1012
|
+
|
|
1013
|
+
Returns:
|
|
1014
|
+
AlarmRecommendation: Parsed alarm recommendation object
|
|
1015
|
+
"""
|
|
801
1016
|
dimensions = []
|
|
802
1017
|
for dim_data in alarm_data.get('dimensions', []):
|
|
803
1018
|
alarm_dim = AlarmRecommendationDimension(
|
|
@@ -806,16 +1021,145 @@ class CloudWatchMetricsTools:
|
|
|
806
1021
|
)
|
|
807
1022
|
dimensions.append(alarm_dim)
|
|
808
1023
|
|
|
809
|
-
|
|
810
|
-
|
|
811
|
-
|
|
812
|
-
|
|
813
|
-
|
|
814
|
-
|
|
815
|
-
|
|
816
|
-
|
|
817
|
-
|
|
818
|
-
|
|
819
|
-
|
|
820
|
-
|
|
821
|
-
|
|
1024
|
+
return dimensions
|
|
1025
|
+
|
|
1026
|
+
def _parse_metric_data_response(
|
|
1027
|
+
self, response: GetMetricDataResponse, period_seconds: int
|
|
1028
|
+
) -> MetricData:
|
|
1029
|
+
"""Parse CloudWatch GetMetricData response into MetricData."""
|
|
1030
|
+
timestamps = []
|
|
1031
|
+
values = []
|
|
1032
|
+
|
|
1033
|
+
if response.metricDataResults and response.metricDataResults[0].datapoints:
|
|
1034
|
+
datapoints = response.metricDataResults[0].datapoints
|
|
1035
|
+
timestamps_ms = [int(dp.timestamp.timestamp() * 1000) for dp in datapoints]
|
|
1036
|
+
raw_values = [dp.value for dp in datapoints]
|
|
1037
|
+
|
|
1038
|
+
sorted_data = sorted(zip(timestamps_ms, raw_values))
|
|
1039
|
+
if sorted_data:
|
|
1040
|
+
timestamps, values = zip(*sorted_data)
|
|
1041
|
+
timestamps = list(timestamps)
|
|
1042
|
+
values = list(values)
|
|
1043
|
+
|
|
1044
|
+
return MetricData(period_seconds=period_seconds, timestamps=timestamps, values=values)
|
|
1045
|
+
|
|
1046
|
+
async def analyze_metric(
|
|
1047
|
+
self,
|
|
1048
|
+
ctx: Context,
|
|
1049
|
+
namespace: str = Field(
|
|
1050
|
+
..., description="The namespace of the metric (e.g., 'AWS/EC2', 'AWS/Lambda')"
|
|
1051
|
+
),
|
|
1052
|
+
metric_name: str = Field(
|
|
1053
|
+
..., description="The name of the metric (e.g., 'CPUUtilization', 'Duration')"
|
|
1054
|
+
),
|
|
1055
|
+
dimensions: List[Dimension] = Field(
|
|
1056
|
+
default_factory=list,
|
|
1057
|
+
description='List of dimensions that identify the metric, each with name and value',
|
|
1058
|
+
),
|
|
1059
|
+
region: Annotated[
|
|
1060
|
+
str,
|
|
1061
|
+
Field(description='AWS region to query. Defaults to us-east-1.'),
|
|
1062
|
+
] = 'us-east-1',
|
|
1063
|
+
statistic: Annotated[
|
|
1064
|
+
Literal[
|
|
1065
|
+
'AVG',
|
|
1066
|
+
'COUNT',
|
|
1067
|
+
'MAX',
|
|
1068
|
+
'MIN',
|
|
1069
|
+
'SUM',
|
|
1070
|
+
'Average',
|
|
1071
|
+
'Sum',
|
|
1072
|
+
'Maximum',
|
|
1073
|
+
'Minimum',
|
|
1074
|
+
'SampleCount',
|
|
1075
|
+
],
|
|
1076
|
+
Field(description='The statistic to use for the metric analysis'),
|
|
1077
|
+
] = 'AVG',
|
|
1078
|
+
) -> Dict[str, Any]:
|
|
1079
|
+
"""Analyzes CloudWatch metric data to determine seasonality, trend, data density and statistical properties.
|
|
1080
|
+
|
|
1081
|
+
This tool provides RAW DATA ONLY about historical metric data and performs analysis including:
|
|
1082
|
+
- Seasonality detection
|
|
1083
|
+
- Trend analysis
|
|
1084
|
+
- Data density and publishing period
|
|
1085
|
+
- Advanced statistical measures (min/max/median, std dev, noise)
|
|
1086
|
+
|
|
1087
|
+
Usage: Use this tool to get objective metric analysis data.
|
|
1088
|
+
|
|
1089
|
+
Args:
|
|
1090
|
+
ctx: The MCP context object for error handling and logging.
|
|
1091
|
+
namespace: The metric namespace (e.g., "AWS/EC2", "AWS/Lambda")
|
|
1092
|
+
metric_name: The name of the metric (e.g., "CPUUtilization", "Duration")
|
|
1093
|
+
dimensions: List of dimensions with name and value pairs
|
|
1094
|
+
statistic: The statistic to use for metric analysis. For guidance on choosing the correct statistic, refer to the get_recommended_metric_alarms tool.
|
|
1095
|
+
region: AWS region to query. Defaults to 'us-east-1'.
|
|
1096
|
+
|
|
1097
|
+
Returns:
|
|
1098
|
+
Dict[str, Any]: Analysis results including:
|
|
1099
|
+
- message: Status message indicating success or reason for empty result
|
|
1100
|
+
- seasonality_seconds: Detected seasonality period in seconds
|
|
1101
|
+
- trend: Trend direction (INCREASING, DECREASING, or NONE)
|
|
1102
|
+
- statistics: Statistical measures (std_deviation, variance, etc.)
|
|
1103
|
+
- data_quality: Data density and publishing period information
|
|
1104
|
+
|
|
1105
|
+
Example:
|
|
1106
|
+
analysis = await analyze_metric(
|
|
1107
|
+
ctx,
|
|
1108
|
+
namespace="AWS/EC2",
|
|
1109
|
+
metric_name="CPUUtilization",
|
|
1110
|
+
dimensions=[
|
|
1111
|
+
Dimension(name="InstanceId", value="i-1234567890abcdef0")
|
|
1112
|
+
]
|
|
1113
|
+
)
|
|
1114
|
+
print(f"Status: {analysis['message']}")
|
|
1115
|
+
print(f"Seasonality: {analysis['seasonality_seconds']} seconds")
|
|
1116
|
+
print(f"Trend: {analysis['trend']}")
|
|
1117
|
+
"""
|
|
1118
|
+
try:
|
|
1119
|
+
analysis_period_minutes = DEFAULT_ANALYSIS_PERIOD_MINUTES
|
|
1120
|
+
|
|
1121
|
+
logger.info(f'Analyzing metric: {namespace}/{metric_name} in region {region}')
|
|
1122
|
+
|
|
1123
|
+
end_time = datetime.now(timezone.utc)
|
|
1124
|
+
start_time = end_time - timedelta(minutes=analysis_period_minutes)
|
|
1125
|
+
|
|
1126
|
+
metric_data_response = await self.get_metric_data(
|
|
1127
|
+
ctx=ctx,
|
|
1128
|
+
namespace=namespace,
|
|
1129
|
+
metric_name=metric_name,
|
|
1130
|
+
dimensions=dimensions,
|
|
1131
|
+
start_time=start_time.isoformat(),
|
|
1132
|
+
end_time=end_time.isoformat(),
|
|
1133
|
+
statistic=statistic,
|
|
1134
|
+
region=region,
|
|
1135
|
+
target_datapoints=analysis_period_minutes,
|
|
1136
|
+
)
|
|
1137
|
+
|
|
1138
|
+
# Parse response into structured data
|
|
1139
|
+
_, _, period_seconds = self._prepare_time_parameters(
|
|
1140
|
+
start_time, end_time, analysis_period_minutes
|
|
1141
|
+
)
|
|
1142
|
+
metric_data = self._parse_metric_data_response(metric_data_response, period_seconds)
|
|
1143
|
+
analysis_result = self.metric_analyzer.analyze_metric_data(metric_data)
|
|
1144
|
+
|
|
1145
|
+
analysis_result.update(
|
|
1146
|
+
{
|
|
1147
|
+
'metric_info': {
|
|
1148
|
+
'namespace': namespace,
|
|
1149
|
+
'metric_name': metric_name,
|
|
1150
|
+
'statistic': statistic,
|
|
1151
|
+
'dimensions': [{'name': d.name, 'value': d.value} for d in dimensions],
|
|
1152
|
+
'analysis_period_minutes': analysis_period_minutes,
|
|
1153
|
+
'time_range': {
|
|
1154
|
+
'start': start_time.isoformat(),
|
|
1155
|
+
'end': end_time.isoformat(),
|
|
1156
|
+
},
|
|
1157
|
+
},
|
|
1158
|
+
}
|
|
1159
|
+
)
|
|
1160
|
+
|
|
1161
|
+
return analysis_result
|
|
1162
|
+
except Exception as e:
|
|
1163
|
+
logger.error(f'Error in analyze_metric: {str(e)}')
|
|
1164
|
+
await ctx.error(f'Error encountered when analyzing metric: {str(e)}')
|
|
1165
|
+
raise
|