awslabs.cloudwatch-applicationsignals-mcp-server 0.1.21__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. awslabs/__init__.py +17 -0
  2. awslabs/cloudwatch_applicationsignals_mcp_server/__init__.py +17 -0
  3. awslabs/cloudwatch_applicationsignals_mcp_server/audit_presentation_utils.py +288 -0
  4. awslabs/cloudwatch_applicationsignals_mcp_server/audit_utils.py +912 -0
  5. awslabs/cloudwatch_applicationsignals_mcp_server/aws_clients.py +120 -0
  6. awslabs/cloudwatch_applicationsignals_mcp_server/canary_utils.py +910 -0
  7. awslabs/cloudwatch_applicationsignals_mcp_server/enablement_guides/templates/ec2/ec2-dotnet-enablement.md +435 -0
  8. awslabs/cloudwatch_applicationsignals_mcp_server/enablement_guides/templates/ec2/ec2-java-enablement.md +321 -0
  9. awslabs/cloudwatch_applicationsignals_mcp_server/enablement_guides/templates/ec2/ec2-nodejs-enablement.md +420 -0
  10. awslabs/cloudwatch_applicationsignals_mcp_server/enablement_guides/templates/ec2/ec2-python-enablement.md +598 -0
  11. awslabs/cloudwatch_applicationsignals_mcp_server/enablement_guides/templates/ecs/ecs-dotnet-enablement.md +264 -0
  12. awslabs/cloudwatch_applicationsignals_mcp_server/enablement_guides/templates/ecs/ecs-java-enablement.md +193 -0
  13. awslabs/cloudwatch_applicationsignals_mcp_server/enablement_guides/templates/ecs/ecs-nodejs-enablement.md +198 -0
  14. awslabs/cloudwatch_applicationsignals_mcp_server/enablement_guides/templates/ecs/ecs-python-enablement.md +236 -0
  15. awslabs/cloudwatch_applicationsignals_mcp_server/enablement_guides/templates/eks/eks-dotnet-enablement.md +166 -0
  16. awslabs/cloudwatch_applicationsignals_mcp_server/enablement_guides/templates/eks/eks-java-enablement.md +166 -0
  17. awslabs/cloudwatch_applicationsignals_mcp_server/enablement_guides/templates/eks/eks-nodejs-enablement.md +166 -0
  18. awslabs/cloudwatch_applicationsignals_mcp_server/enablement_guides/templates/eks/eks-python-enablement.md +169 -0
  19. awslabs/cloudwatch_applicationsignals_mcp_server/enablement_guides/templates/lambda/lambda-dotnet-enablement.md +336 -0
  20. awslabs/cloudwatch_applicationsignals_mcp_server/enablement_guides/templates/lambda/lambda-java-enablement.md +336 -0
  21. awslabs/cloudwatch_applicationsignals_mcp_server/enablement_guides/templates/lambda/lambda-nodejs-enablement.md +336 -0
  22. awslabs/cloudwatch_applicationsignals_mcp_server/enablement_guides/templates/lambda/lambda-python-enablement.md +336 -0
  23. awslabs/cloudwatch_applicationsignals_mcp_server/enablement_tools.py +147 -0
  24. awslabs/cloudwatch_applicationsignals_mcp_server/server.py +1505 -0
  25. awslabs/cloudwatch_applicationsignals_mcp_server/service_audit_utils.py +231 -0
  26. awslabs/cloudwatch_applicationsignals_mcp_server/service_tools.py +659 -0
  27. awslabs/cloudwatch_applicationsignals_mcp_server/sli_report_client.py +333 -0
  28. awslabs/cloudwatch_applicationsignals_mcp_server/slo_tools.py +386 -0
  29. awslabs/cloudwatch_applicationsignals_mcp_server/trace_tools.py +784 -0
  30. awslabs/cloudwatch_applicationsignals_mcp_server/utils.py +172 -0
  31. awslabs_cloudwatch_applicationsignals_mcp_server-0.1.21.dist-info/METADATA +808 -0
  32. awslabs_cloudwatch_applicationsignals_mcp_server-0.1.21.dist-info/RECORD +36 -0
  33. awslabs_cloudwatch_applicationsignals_mcp_server-0.1.21.dist-info/WHEEL +4 -0
  34. awslabs_cloudwatch_applicationsignals_mcp_server-0.1.21.dist-info/entry_points.txt +2 -0
  35. awslabs_cloudwatch_applicationsignals_mcp_server-0.1.21.dist-info/licenses/LICENSE +174 -0
  36. awslabs_cloudwatch_applicationsignals_mcp_server-0.1.21.dist-info/licenses/NOTICE +2 -0
@@ -0,0 +1,333 @@
1
+ # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ """Retrieve service SLI status based on configured Application Signals SLOs."""
16
+
17
+ import logging
18
+ from .aws_clients import applicationsignals_client, cloudwatch_client
19
+ from botocore.exceptions import ClientError
20
+ from dataclasses import dataclass, field
21
+ from datetime import datetime, timedelta, timezone
22
+ from typing import Any, Dict, List, Optional
23
+
24
+
25
+ # Initialize module logger
26
+ logger = logging.getLogger(__name__)
27
+
28
+
29
+ @dataclass
30
+ class AWSConfig:
31
+ """Configuration class for AWS settings and service parameters.
32
+
33
+ Attributes:
34
+ region (str): AWS region identifier (default: us-west-1)
35
+ period_in_hours (int): Time period for metrics collection (max 24 hours)
36
+ service_name (str): Name of the AWS service to monitor
37
+ key_attributes (Dict[str, str]): Key attributes to identify the service
38
+ """
39
+
40
+ region: str
41
+ period_in_hours: int
42
+ service_name: str
43
+ key_attributes: Dict[str, str] = field(default_factory=dict)
44
+
45
+ def __init__(
46
+ self,
47
+ region: str = 'us-east-1',
48
+ period_in_hours: int = 24,
49
+ service_name: str = 'UnknownService',
50
+ key_attributes: Optional[Dict[str, str]] = None,
51
+ ):
52
+ """Initialize AWSConfig with region, period, and service name.
53
+
54
+ Args:
55
+ region: AWS region identifier (default: us-east-1)
56
+ period_in_hours: Time period for metrics collection, max 24 hours (default: 24)
57
+ service_name: Name of the AWS service to monitor (default: UnknownService)
58
+ key_attributes: Optional key attributes to override defaults
59
+ """
60
+ self.region = region
61
+ self.period_in_hours = min(period_in_hours, 24) # Ensure period doesn't exceed 24 hours
62
+ self.service_name = service_name
63
+ if key_attributes is not None:
64
+ self.key_attributes = key_attributes
65
+ else:
66
+ self.key_attributes = {
67
+ 'Name': self.service_name,
68
+ 'Type': 'Service',
69
+ 'Environment': self.region,
70
+ }
71
+
72
+
73
+ @dataclass
74
+ class SLOSummary:
75
+ """Data class representing a Service Level Objective summary.
76
+
77
+ Attributes:
78
+ name (str): Name of the SLO
79
+ arn (str): Amazon Resource Name
80
+ key_attributes (Dict): Service identification attributes
81
+ operation_name (str): Name of the monitored operation
82
+ created_time (datetime): When the SLO was created
83
+ """
84
+
85
+ name: str
86
+ arn: str
87
+ key_attributes: Dict[str, str]
88
+ operation_name: str
89
+ created_time: datetime
90
+
91
+
92
+ @dataclass
93
+ class MetricDataResult:
94
+ """Data class holding CloudWatch metric data results.
95
+
96
+ Attributes:
97
+ timestamps (List[datetime]): Timestamps of metric data points
98
+ values (List[float]): Corresponding metric values
99
+ """
100
+
101
+ timestamps: List[datetime]
102
+ values: List[float]
103
+
104
+
105
+ class SLIReport:
106
+ """Class representing an SLI report with various metrics and status information.
107
+
108
+ Provides read-only access to report data including start/end times,
109
+ SLI status, and counts of total, successful, and breached SLOs.
110
+ """
111
+
112
+ def __init__(
113
+ self,
114
+ start_time: datetime,
115
+ end_time: datetime,
116
+ sli_status: str,
117
+ total_slo_count: int,
118
+ ok_slo_count: int,
119
+ breached_slo_count: int,
120
+ breached_slo_names: List[str],
121
+ ):
122
+ """Initialize SLIReport with metrics and status information.
123
+
124
+ Args:
125
+ start_time: Start time of the reporting period
126
+ end_time: End time of the reporting period
127
+ sli_status: Overall SLI status (OK/CRITICAL)
128
+ total_slo_count: Total number of SLOs monitored
129
+ ok_slo_count: Number of SLOs meeting their objectives
130
+ breached_slo_count: Number of SLOs failing to meet their objectives
131
+ breached_slo_names: Names of SLOs that failed to meet their objectives
132
+ """
133
+ self._start_time = start_time
134
+ self._end_time = end_time
135
+ self._sli_status = sli_status
136
+ self._total_slo_count = total_slo_count
137
+ self._ok_slo_count = ok_slo_count
138
+ self._breached_slo_count = breached_slo_count
139
+ self._breached_slo_names = breached_slo_names
140
+
141
+ # Property getters for all attributes
142
+ @property
143
+ def start_time(self) -> datetime:
144
+ """Start time of the reporting period."""
145
+ return self._start_time
146
+
147
+ @property
148
+ def end_time(self) -> datetime:
149
+ """End time of the reporting period."""
150
+ return self._end_time
151
+
152
+ @property
153
+ def sli_status(self) -> str:
154
+ """Overall SLI status (OK/CRITICAL)."""
155
+ return self._sli_status
156
+
157
+ @property
158
+ def total_slo_count(self) -> int:
159
+ """Total number of SLOs monitored."""
160
+ return self._total_slo_count
161
+
162
+ @property
163
+ def ok_slo_count(self) -> int:
164
+ """Number of SLOs meeting their objectives."""
165
+ return self._ok_slo_count
166
+
167
+ @property
168
+ def breached_slo_count(self) -> int:
169
+ """Number of SLOs failing to meet their objectives."""
170
+ return self._breached_slo_count
171
+
172
+ @property
173
+ def breached_slo_names(self) -> List[str]:
174
+ """Names of SLOs that failed to meet their objectives."""
175
+ return self._breached_slo_names.copy()
176
+
177
+
178
+ class SLIReportClient:
179
+ """Client for generating SLI reports using AWS Application Signals and CloudWatch.
180
+
181
+ Handles interaction with AWS services to collect and analyze SLO data.
182
+ """
183
+
184
+ def __init__(self, config: AWSConfig):
185
+ """Initialize SLIReportClient with AWS configuration.
186
+
187
+ Args:
188
+ config: AWSConfig instance containing region, period, and service settings
189
+ """
190
+ self.config = config
191
+ logger.info(
192
+ f'Initializing SLIReportClient for service: {config.service_name}, region: {config.region}'
193
+ )
194
+
195
+ # Use shared AWS clients from aws_clients module
196
+ self.signals_client = applicationsignals_client
197
+ self.cloudwatch_client = cloudwatch_client
198
+ logger.debug('Using shared AWS clients')
199
+
200
+ def get_slo_summaries(self) -> List[SLOSummary]:
201
+ """Fetches SLO summaries from AWS Application Signals."""
202
+ logger.debug(f'Fetching SLO summaries for {self.config.service_name}')
203
+
204
+ try:
205
+ response = self.signals_client.list_service_level_objectives(
206
+ KeyAttributes=self.config.key_attributes,
207
+ MetricSourceTypes=['ServiceOperation'],
208
+ IncludeLinkedAccounts=True,
209
+ )
210
+ logger.info(f'Retrieved {len(response.get("SloSummaries", []))} SLO summaries')
211
+ except ClientError as e:
212
+ error_msg = e.response.get('Error', {}).get('Message', 'Unknown error')
213
+ error_code = e.response.get('Error', {}).get('Code', 'Unknown')
214
+ logger.error(f'AWS ClientError getting SLO summaries: {error_code} - {error_msg}')
215
+ raise
216
+ except Exception as e:
217
+ logger.error(f'Unexpected error getting SLO summaries: {str(e)}', exc_info=True)
218
+ raise
219
+
220
+ return [
221
+ SLOSummary(
222
+ name=slo['Name'],
223
+ arn=slo['Arn'],
224
+ key_attributes=slo.get('KeyAttributes', {}),
225
+ operation_name=slo.get('OperationName', 'N/A'),
226
+ created_time=slo.get('CreatedTime', datetime.now(timezone.utc)),
227
+ )
228
+ for slo in response['SloSummaries']
229
+ ]
230
+
231
+ def create_metric_queries(self, slo_summaries: List[SLOSummary]) -> List[Dict[str, Any]]:
232
+ """Creates CloudWatch metric queries for each SLO."""
233
+ return [
234
+ {
235
+ 'Id': f'slo{i}',
236
+ 'MetricStat': {
237
+ 'Metric': {
238
+ 'Namespace': 'AWS/ApplicationSignals',
239
+ 'MetricName': 'BreachedCount',
240
+ 'Dimensions': [{'Name': 'SloName', 'Value': slo.name}],
241
+ },
242
+ 'Period': self.config.period_in_hours * 60 * 60,
243
+ 'Stat': 'Maximum',
244
+ },
245
+ 'ReturnData': True,
246
+ }
247
+ for i, slo in enumerate(slo_summaries)
248
+ ]
249
+
250
+ def get_metric_data(
251
+ self, queries: List[Dict[str, Any]], start_time: datetime, end_time: datetime
252
+ ) -> List[MetricDataResult]:
253
+ """Retrieves metric data from CloudWatch using the specified queries."""
254
+ logger.debug(f'Fetching metric data with {len(queries)} queries')
255
+
256
+ try:
257
+ response = self.cloudwatch_client.get_metric_data(
258
+ MetricDataQueries=queries, # type: ignore
259
+ StartTime=start_time,
260
+ EndTime=end_time,
261
+ )
262
+ logger.debug(f'Retrieved {len(response.get("MetricDataResults", []))} metric results')
263
+ except ClientError as e:
264
+ error_msg = e.response.get('Error', {}).get('Message', 'Unknown error')
265
+ error_code = e.response.get('Error', {}).get('Code', 'Unknown')
266
+ logger.error(f'AWS ClientError getting metric data: {error_code} - {error_msg}')
267
+ raise
268
+ except Exception as e:
269
+ logger.error(f'Unexpected error getting metric data: {str(e)}', exc_info=True)
270
+ raise
271
+
272
+ return [
273
+ MetricDataResult(
274
+ timestamps=result.get('Timestamps', []), values=result.get('Values', [])
275
+ )
276
+ for result in response['MetricDataResults']
277
+ ]
278
+
279
+ def get_sli_status(self, num_breaching: int) -> str:
280
+ """Determines overall SLI status based on number of breaching SLOs."""
281
+ return 'CRITICAL' if num_breaching > 0 else 'OK'
282
+
283
+ def generate_sli_report(self) -> SLIReport:
284
+ """Generates a comprehensive SLI report.
285
+
286
+ Collects SLO data, analyzes metrics, and produces a report containing
287
+ the overall status and details about breaching/healthy SLOs.
288
+ """
289
+ logger.info(f'Generating SLI report for {self.config.service_name}')
290
+ end_time = datetime.now()
291
+ start_time = end_time - timedelta(hours=self.config.period_in_hours)
292
+ logger.debug(f'Report time range: {start_time} to {end_time}')
293
+
294
+ slo_summaries = self.get_slo_summaries()
295
+
296
+ # If no SLOs found, return empty report
297
+ if not slo_summaries:
298
+ logger.warning(f'No SLOs found for service {self.config.service_name}')
299
+ return SLIReport(
300
+ start_time=start_time,
301
+ end_time=end_time,
302
+ sli_status='OK', # No SLOs means nothing can be breached
303
+ total_slo_count=0,
304
+ ok_slo_count=0,
305
+ breached_slo_count=0,
306
+ breached_slo_names=[],
307
+ )
308
+
309
+ metric_queries = self.create_metric_queries(slo_summaries)
310
+ metric_results = self.get_metric_data(metric_queries, start_time, end_time)
311
+
312
+ healthy_slos = []
313
+ breaching_slos = []
314
+
315
+ for i, result in enumerate(metric_results):
316
+ # Check if we have any values and if the SLO is breached
317
+ if result.values and len(result.values) > 0 and result.values[0] > 0:
318
+ breaching_slos.append(slo_summaries[i].name)
319
+ else:
320
+ healthy_slos.append(slo_summaries[i].name)
321
+
322
+ logger.debug(
323
+ f'SLI report generated - Total SLOs: {len(slo_summaries)}, Breaching: {len(breaching_slos)}, Healthy: {len(healthy_slos)}'
324
+ )
325
+ return SLIReport(
326
+ start_time=start_time,
327
+ end_time=end_time,
328
+ sli_status=self.get_sli_status(len(breaching_slos)),
329
+ total_slo_count=len(slo_summaries),
330
+ ok_slo_count=len(healthy_slos),
331
+ breached_slo_count=len(breaching_slos),
332
+ breached_slo_names=breaching_slos,
333
+ )