awslabs.cloudwatch-applicationsignals-mcp-server 0.1.21__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- awslabs/__init__.py +17 -0
- awslabs/cloudwatch_applicationsignals_mcp_server/__init__.py +17 -0
- awslabs/cloudwatch_applicationsignals_mcp_server/audit_presentation_utils.py +288 -0
- awslabs/cloudwatch_applicationsignals_mcp_server/audit_utils.py +912 -0
- awslabs/cloudwatch_applicationsignals_mcp_server/aws_clients.py +120 -0
- awslabs/cloudwatch_applicationsignals_mcp_server/canary_utils.py +910 -0
- awslabs/cloudwatch_applicationsignals_mcp_server/enablement_guides/templates/ec2/ec2-dotnet-enablement.md +435 -0
- awslabs/cloudwatch_applicationsignals_mcp_server/enablement_guides/templates/ec2/ec2-java-enablement.md +321 -0
- awslabs/cloudwatch_applicationsignals_mcp_server/enablement_guides/templates/ec2/ec2-nodejs-enablement.md +420 -0
- awslabs/cloudwatch_applicationsignals_mcp_server/enablement_guides/templates/ec2/ec2-python-enablement.md +598 -0
- awslabs/cloudwatch_applicationsignals_mcp_server/enablement_guides/templates/ecs/ecs-dotnet-enablement.md +264 -0
- awslabs/cloudwatch_applicationsignals_mcp_server/enablement_guides/templates/ecs/ecs-java-enablement.md +193 -0
- awslabs/cloudwatch_applicationsignals_mcp_server/enablement_guides/templates/ecs/ecs-nodejs-enablement.md +198 -0
- awslabs/cloudwatch_applicationsignals_mcp_server/enablement_guides/templates/ecs/ecs-python-enablement.md +236 -0
- awslabs/cloudwatch_applicationsignals_mcp_server/enablement_guides/templates/eks/eks-dotnet-enablement.md +166 -0
- awslabs/cloudwatch_applicationsignals_mcp_server/enablement_guides/templates/eks/eks-java-enablement.md +166 -0
- awslabs/cloudwatch_applicationsignals_mcp_server/enablement_guides/templates/eks/eks-nodejs-enablement.md +166 -0
- awslabs/cloudwatch_applicationsignals_mcp_server/enablement_guides/templates/eks/eks-python-enablement.md +169 -0
- awslabs/cloudwatch_applicationsignals_mcp_server/enablement_guides/templates/lambda/lambda-dotnet-enablement.md +336 -0
- awslabs/cloudwatch_applicationsignals_mcp_server/enablement_guides/templates/lambda/lambda-java-enablement.md +336 -0
- awslabs/cloudwatch_applicationsignals_mcp_server/enablement_guides/templates/lambda/lambda-nodejs-enablement.md +336 -0
- awslabs/cloudwatch_applicationsignals_mcp_server/enablement_guides/templates/lambda/lambda-python-enablement.md +336 -0
- awslabs/cloudwatch_applicationsignals_mcp_server/enablement_tools.py +147 -0
- awslabs/cloudwatch_applicationsignals_mcp_server/server.py +1505 -0
- awslabs/cloudwatch_applicationsignals_mcp_server/service_audit_utils.py +231 -0
- awslabs/cloudwatch_applicationsignals_mcp_server/service_tools.py +659 -0
- awslabs/cloudwatch_applicationsignals_mcp_server/sli_report_client.py +333 -0
- awslabs/cloudwatch_applicationsignals_mcp_server/slo_tools.py +386 -0
- awslabs/cloudwatch_applicationsignals_mcp_server/trace_tools.py +784 -0
- awslabs/cloudwatch_applicationsignals_mcp_server/utils.py +172 -0
- awslabs_cloudwatch_applicationsignals_mcp_server-0.1.21.dist-info/METADATA +808 -0
- awslabs_cloudwatch_applicationsignals_mcp_server-0.1.21.dist-info/RECORD +36 -0
- awslabs_cloudwatch_applicationsignals_mcp_server-0.1.21.dist-info/WHEEL +4 -0
- awslabs_cloudwatch_applicationsignals_mcp_server-0.1.21.dist-info/entry_points.txt +2 -0
- awslabs_cloudwatch_applicationsignals_mcp_server-0.1.21.dist-info/licenses/LICENSE +174 -0
- awslabs_cloudwatch_applicationsignals_mcp_server-0.1.21.dist-info/licenses/NOTICE +2 -0
|
@@ -0,0 +1,659 @@
|
|
|
1
|
+
# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
"""CloudWatch Application Signals MCP Server - Service-related tools."""
|
|
16
|
+
|
|
17
|
+
from .aws_clients import applicationsignals_client, cloudwatch_client
|
|
18
|
+
from botocore.exceptions import ClientError
|
|
19
|
+
from datetime import datetime, timedelta, timezone
|
|
20
|
+
from loguru import logger
|
|
21
|
+
from pydantic import Field
|
|
22
|
+
from time import perf_counter as timer
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
async def list_monitored_services() -> str:
|
|
26
|
+
"""OPTIONAL TOOL for service discovery - audit_services() can automatically discover services using wildcard patterns.
|
|
27
|
+
|
|
28
|
+
**IMPORTANT: For service auditing and operation analysis, use audit_services() as the PRIMARY tool instead.**
|
|
29
|
+
|
|
30
|
+
**WHEN TO USE THIS TOOL:**
|
|
31
|
+
- Getting a detailed overview of all monitored services in your environment
|
|
32
|
+
- Discovering specific service names and environments for manual audit target construction
|
|
33
|
+
- Understanding the complete service inventory before targeted analysis
|
|
34
|
+
- When you need detailed service attributes beyond what wildcard expansion provides
|
|
35
|
+
|
|
36
|
+
**RECOMMENDED WORKFLOW FOR SERVICE AND OPERATION AUDITING:**
|
|
37
|
+
1. **Use audit_services() FIRST** with wildcard patterns for comprehensive service discovery AND analysis
|
|
38
|
+
2. **Only use this tool** if you need basic service inventory without performance analysis
|
|
39
|
+
3. **audit_services() is more comprehensive** - it discovers services AND provides performance insights
|
|
40
|
+
|
|
41
|
+
**AUTOMATIC SERVICE DISCOVERY IN AUDIT:**
|
|
42
|
+
The `audit_services()` tool automatically discovers services when you use wildcard patterns:
|
|
43
|
+
- `[{"Type":"service","Data":{"Service":{"Type":"Service","Name":"*"}}}]` - Audits all services
|
|
44
|
+
- `[{"Type":"service","Data":{"Service":{"Type":"Service","Name":"*payment*"}}}]` - Audits services with "payment" in the name
|
|
45
|
+
|
|
46
|
+
**What this tool provides:**
|
|
47
|
+
- Basic service inventory (names, types, environments)
|
|
48
|
+
- Service count and categorization
|
|
49
|
+
- Key attributes for manual target construction
|
|
50
|
+
|
|
51
|
+
**What this tool does NOT provide:**
|
|
52
|
+
- Service performance analysis
|
|
53
|
+
- Operation discovery and analysis
|
|
54
|
+
- Root cause analysis
|
|
55
|
+
- Actionable recommendations
|
|
56
|
+
|
|
57
|
+
**For comprehensive service auditing, use audit_services() instead:**
|
|
58
|
+
```
|
|
59
|
+
audit_services(
|
|
60
|
+
service_targets='[{"Type":"service","Data":{"Service":{"Type":"Service","Name":"*"}}}]',
|
|
61
|
+
auditors='all',
|
|
62
|
+
)
|
|
63
|
+
```
|
|
64
|
+
|
|
65
|
+
Returns a formatted list showing:
|
|
66
|
+
- Service name and type
|
|
67
|
+
- Key attributes (Name, Environment, Platform, etc.)
|
|
68
|
+
- Total count of services
|
|
69
|
+
|
|
70
|
+
**NOTE**: For operation auditing, use audit_services() as the primary tool instead of get_service_detail() or list_service_operations().
|
|
71
|
+
"""
|
|
72
|
+
start_time_perf = timer()
|
|
73
|
+
logger.debug('Starting list_application_signals_services request')
|
|
74
|
+
|
|
75
|
+
try:
|
|
76
|
+
# Calculate time range (last 24 hours)
|
|
77
|
+
end_time = datetime.now(timezone.utc)
|
|
78
|
+
start_time = end_time - timedelta(hours=24)
|
|
79
|
+
|
|
80
|
+
# Get all services
|
|
81
|
+
logger.debug(f'Querying services for time range: {start_time} to {end_time}')
|
|
82
|
+
response = applicationsignals_client.list_services(
|
|
83
|
+
StartTime=start_time, EndTime=end_time, MaxResults=100
|
|
84
|
+
)
|
|
85
|
+
services = response.get('ServiceSummaries', [])
|
|
86
|
+
logger.debug(f'Retrieved {len(services)} services from Application Signals')
|
|
87
|
+
|
|
88
|
+
if not services:
|
|
89
|
+
logger.warning('No services found in Application Signals')
|
|
90
|
+
return 'No services found in Application Signals.'
|
|
91
|
+
|
|
92
|
+
result = f'Application Signals Services ({len(services)} total):\n\n'
|
|
93
|
+
|
|
94
|
+
for service in services:
|
|
95
|
+
# Extract service name from KeyAttributes
|
|
96
|
+
key_attrs = service.get('KeyAttributes', {})
|
|
97
|
+
service_name = key_attrs.get('Name', 'Unknown')
|
|
98
|
+
service_type = key_attrs.get('Type', 'Unknown')
|
|
99
|
+
|
|
100
|
+
result += f'• Service: {service_name}\n'
|
|
101
|
+
result += f' Type: {service_type}\n'
|
|
102
|
+
|
|
103
|
+
# Add key attributes
|
|
104
|
+
if key_attrs:
|
|
105
|
+
result += ' Key Attributes:\n'
|
|
106
|
+
for key, value in key_attrs.items():
|
|
107
|
+
result += f' {key}: {value}\n'
|
|
108
|
+
|
|
109
|
+
result += '\n'
|
|
110
|
+
|
|
111
|
+
elapsed_time = timer() - start_time_perf
|
|
112
|
+
logger.debug(f'list_monitored_services completed in {elapsed_time:.3f}s')
|
|
113
|
+
return result
|
|
114
|
+
|
|
115
|
+
except ClientError as e:
|
|
116
|
+
error_code = e.response.get('Error', {}).get('Code', 'Unknown')
|
|
117
|
+
error_message = e.response.get('Error', {}).get('Message', 'Unknown error')
|
|
118
|
+
logger.error(f'AWS ClientError in list_monitored_services: {error_code} - {error_message}')
|
|
119
|
+
return f'AWS Error: {error_message}'
|
|
120
|
+
except Exception as e:
|
|
121
|
+
logger.error(f'Unexpected error in list_monitored_services: {str(e)}', exc_info=True)
|
|
122
|
+
return f'Error: {str(e)}'
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
async def get_service_detail(
|
|
126
|
+
service_name: str = Field(
|
|
127
|
+
..., description='Name of the service to get details for (case-sensitive)'
|
|
128
|
+
),
|
|
129
|
+
) -> str:
|
|
130
|
+
"""Get detailed information about a specific Application Signals service.
|
|
131
|
+
|
|
132
|
+
**IMPORTANT: For operation auditing, use audit_services() as the PRIMARY tool instead.**
|
|
133
|
+
|
|
134
|
+
**RECOMMENDED WORKFLOW FOR OPERATION AUDITING:**
|
|
135
|
+
1. **Use audit_services() FIRST** for comprehensive operation discovery and analysis
|
|
136
|
+
2. **Only use this tool** for basic service metadata and configuration details
|
|
137
|
+
3. **This tool does NOT provide operation names** - it only shows service-level metrics
|
|
138
|
+
|
|
139
|
+
**What this tool provides:**
|
|
140
|
+
- Service metadata and configuration
|
|
141
|
+
- Platform information (EKS, Lambda, etc.)
|
|
142
|
+
- Service-level metrics (Latency, Error, Fault aggregates)
|
|
143
|
+
- Log groups associated with the service
|
|
144
|
+
- Key attributes (Type, Environment, Platform)
|
|
145
|
+
|
|
146
|
+
**What this tool does NOT provide:**
|
|
147
|
+
- Operation names (GET, POST, etc.)
|
|
148
|
+
- Operation-specific metrics
|
|
149
|
+
- Operation-level performance data
|
|
150
|
+
|
|
151
|
+
**For operation auditing, use audit_services() instead:**
|
|
152
|
+
```
|
|
153
|
+
audit_services(
|
|
154
|
+
service_targets='[{"Type":"service","Data":{"Service":{"Type":"Service","Name":"your-service"}}}]',
|
|
155
|
+
auditors='all',
|
|
156
|
+
)
|
|
157
|
+
```
|
|
158
|
+
|
|
159
|
+
This tool is useful for understanding service deployment details and basic configuration,
|
|
160
|
+
but audit_services() is the primary tool for operation discovery and performance analysis.
|
|
161
|
+
"""
|
|
162
|
+
start_time_perf = timer()
|
|
163
|
+
logger.debug(f'Starting get_service_healthy_detail request for service: {service_name}')
|
|
164
|
+
|
|
165
|
+
try:
|
|
166
|
+
# Calculate time range (last 24 hours)
|
|
167
|
+
end_time = datetime.now(timezone.utc)
|
|
168
|
+
start_time = end_time - timedelta(hours=24)
|
|
169
|
+
|
|
170
|
+
# First, get all services to find the one we want
|
|
171
|
+
services_response = applicationsignals_client.list_services(
|
|
172
|
+
StartTime=start_time, EndTime=end_time, MaxResults=100
|
|
173
|
+
)
|
|
174
|
+
|
|
175
|
+
# Find the service with matching name
|
|
176
|
+
target_service = None
|
|
177
|
+
for service in services_response.get('ServiceSummaries', []):
|
|
178
|
+
key_attrs = service.get('KeyAttributes', {})
|
|
179
|
+
if key_attrs.get('Name') == service_name:
|
|
180
|
+
target_service = service
|
|
181
|
+
break
|
|
182
|
+
|
|
183
|
+
if not target_service:
|
|
184
|
+
logger.warning(f"Service '{service_name}' not found in Application Signals")
|
|
185
|
+
return f"Service '{service_name}' not found in Application Signals."
|
|
186
|
+
|
|
187
|
+
# Get detailed service information
|
|
188
|
+
logger.debug(f'Getting detailed information for service: {service_name}')
|
|
189
|
+
service_response = applicationsignals_client.get_service(
|
|
190
|
+
StartTime=start_time, EndTime=end_time, KeyAttributes=target_service['KeyAttributes']
|
|
191
|
+
)
|
|
192
|
+
|
|
193
|
+
service_details = service_response['Service']
|
|
194
|
+
|
|
195
|
+
# Build detailed response
|
|
196
|
+
result = f'Service Details: {service_name}\n\n'
|
|
197
|
+
|
|
198
|
+
# Key Attributes
|
|
199
|
+
key_attrs = service_details.get('KeyAttributes', {})
|
|
200
|
+
if key_attrs:
|
|
201
|
+
result += 'Key Attributes:\n'
|
|
202
|
+
for key, value in key_attrs.items():
|
|
203
|
+
result += f' {key}: {value}\n'
|
|
204
|
+
result += '\n'
|
|
205
|
+
|
|
206
|
+
# Attribute Maps (Platform, Application, Telemetry info)
|
|
207
|
+
attr_maps = service_details.get('AttributeMaps', [])
|
|
208
|
+
if attr_maps:
|
|
209
|
+
result += 'Additional Attributes:\n'
|
|
210
|
+
for attr_map in attr_maps:
|
|
211
|
+
for key, value in attr_map.items():
|
|
212
|
+
result += f' {key}: {value}\n'
|
|
213
|
+
result += '\n'
|
|
214
|
+
|
|
215
|
+
# Metric References
|
|
216
|
+
metric_refs = service_details.get('MetricReferences', [])
|
|
217
|
+
if metric_refs:
|
|
218
|
+
result += f'Metric References ({len(metric_refs)} total):\n'
|
|
219
|
+
for metric in metric_refs:
|
|
220
|
+
result += f' • {metric.get("Namespace", "")}/{metric.get("MetricName", "")}\n'
|
|
221
|
+
result += f' Type: {metric.get("MetricType", "")}\n'
|
|
222
|
+
dimensions = metric.get('Dimensions', [])
|
|
223
|
+
if dimensions:
|
|
224
|
+
result += ' Dimensions: '
|
|
225
|
+
dim_strs = [f'{d["Name"]}={d["Value"]}' for d in dimensions]
|
|
226
|
+
result += ', '.join(dim_strs) + '\n'
|
|
227
|
+
result += '\n'
|
|
228
|
+
|
|
229
|
+
# Log Group References
|
|
230
|
+
log_refs = service_details.get('LogGroupReferences', [])
|
|
231
|
+
if log_refs:
|
|
232
|
+
result += f'Log Group References ({len(log_refs)} total):\n'
|
|
233
|
+
for log_ref in log_refs:
|
|
234
|
+
log_group = log_ref.get('Identifier', 'Unknown')
|
|
235
|
+
result += f' • {log_group}\n'
|
|
236
|
+
result += '\n'
|
|
237
|
+
|
|
238
|
+
elapsed_time = timer() - start_time_perf
|
|
239
|
+
logger.debug(f"get_service_detail completed for '{service_name}' in {elapsed_time:.3f}s")
|
|
240
|
+
return result
|
|
241
|
+
|
|
242
|
+
except ClientError as e:
|
|
243
|
+
error_code = e.response.get('Error', {}).get('Code', 'Unknown')
|
|
244
|
+
error_message = e.response.get('Error', {}).get('Message', 'Unknown error')
|
|
245
|
+
logger.error(f'AWS ClientError in get_service_detail: {error_code} - {error_message}')
|
|
246
|
+
return f'AWS Error: {error_message}'
|
|
247
|
+
except Exception as e:
|
|
248
|
+
logger.error(
|
|
249
|
+
f"Unexpected error in get_service_healthy_detail for '{service_name}': {str(e)}",
|
|
250
|
+
exc_info=True,
|
|
251
|
+
)
|
|
252
|
+
return f'Error: {str(e)}'
|
|
253
|
+
|
|
254
|
+
|
|
255
|
+
async def query_service_metrics(
|
|
256
|
+
service_name: str = Field(
|
|
257
|
+
..., description='Name of the service to get metrics for (case-sensitive)'
|
|
258
|
+
),
|
|
259
|
+
metric_name: str = Field(
|
|
260
|
+
...,
|
|
261
|
+
description='Specific metric name (e.g., Latency, Error, Fault). Leave empty to list available metrics',
|
|
262
|
+
),
|
|
263
|
+
statistic: str = Field(
|
|
264
|
+
default='Average',
|
|
265
|
+
description='Standard statistic type (Average, Sum, Maximum, Minimum, SampleCount)',
|
|
266
|
+
),
|
|
267
|
+
extended_statistic: str = Field(
|
|
268
|
+
default='p99', description='Extended statistic (p99, p95, p90, p50, etc)'
|
|
269
|
+
),
|
|
270
|
+
hours: int = Field(
|
|
271
|
+
default=1, description='Number of hours to look back (default 1, max 168 for 1 week)'
|
|
272
|
+
),
|
|
273
|
+
) -> str:
|
|
274
|
+
"""Get CloudWatch metrics for a specific Application Signals service.
|
|
275
|
+
|
|
276
|
+
Use this tool to:
|
|
277
|
+
- Analyze service performance (latency, throughput)
|
|
278
|
+
- Check error rates and reliability
|
|
279
|
+
- View trends over time
|
|
280
|
+
- Get both standard statistics (Average, Max) and percentiles (p99, p95)
|
|
281
|
+
|
|
282
|
+
Common metric names:
|
|
283
|
+
- 'Latency': Response time in milliseconds
|
|
284
|
+
- 'Error': Percentage of failed requests
|
|
285
|
+
- 'Fault': Percentage of server errors (5xx)
|
|
286
|
+
|
|
287
|
+
Returns:
|
|
288
|
+
- Summary statistics (latest, average, min, max)
|
|
289
|
+
- Recent data points with timestamps
|
|
290
|
+
- Both standard and percentile values when available
|
|
291
|
+
|
|
292
|
+
The tool automatically adjusts the granularity based on time range:
|
|
293
|
+
- Up to 3 hours: 1-minute resolution
|
|
294
|
+
- Up to 24 hours: 5-minute resolution
|
|
295
|
+
- Over 24 hours: 1-hour resolution
|
|
296
|
+
"""
|
|
297
|
+
start_time_perf = timer()
|
|
298
|
+
logger.info(
|
|
299
|
+
f'Starting query_service_metrics request - service: {service_name}, metric: {metric_name}, hours: {hours}'
|
|
300
|
+
)
|
|
301
|
+
|
|
302
|
+
try:
|
|
303
|
+
# Calculate time range
|
|
304
|
+
end_time = datetime.now(timezone.utc)
|
|
305
|
+
start_time = end_time - timedelta(hours=hours)
|
|
306
|
+
|
|
307
|
+
# Get service details to find metrics
|
|
308
|
+
services_response = applicationsignals_client.list_services(
|
|
309
|
+
StartTime=start_time, EndTime=end_time, MaxResults=100
|
|
310
|
+
)
|
|
311
|
+
|
|
312
|
+
# Find the target service
|
|
313
|
+
target_service = None
|
|
314
|
+
for service in services_response.get('ServiceSummaries', []):
|
|
315
|
+
key_attrs = service.get('KeyAttributes', {})
|
|
316
|
+
if key_attrs.get('Name') == service_name:
|
|
317
|
+
target_service = service
|
|
318
|
+
break
|
|
319
|
+
|
|
320
|
+
if not target_service:
|
|
321
|
+
logger.warning(f"Service '{service_name}' not found in Application Signals")
|
|
322
|
+
return f"Service '{service_name}' not found in Application Signals."
|
|
323
|
+
|
|
324
|
+
# Get detailed service info for metric references
|
|
325
|
+
service_response = applicationsignals_client.get_service(
|
|
326
|
+
StartTime=start_time, EndTime=end_time, KeyAttributes=target_service['KeyAttributes']
|
|
327
|
+
)
|
|
328
|
+
|
|
329
|
+
metric_refs = service_response['Service'].get('MetricReferences', [])
|
|
330
|
+
|
|
331
|
+
if not metric_refs:
|
|
332
|
+
logger.warning(f"No metrics found for service '{service_name}'")
|
|
333
|
+
return f"No metrics found for service '{service_name}'."
|
|
334
|
+
|
|
335
|
+
# If no specific metric requested, show available metrics
|
|
336
|
+
if not metric_name:
|
|
337
|
+
result = f"Available metrics for service '{service_name}':\n\n"
|
|
338
|
+
for metric in metric_refs:
|
|
339
|
+
result += f'• {metric.get("MetricName", "Unknown")}\n'
|
|
340
|
+
result += f' Namespace: {metric.get("Namespace", "Unknown")}\n'
|
|
341
|
+
result += f' Type: {metric.get("MetricType", "Unknown")}\n'
|
|
342
|
+
result += '\n'
|
|
343
|
+
return result
|
|
344
|
+
|
|
345
|
+
# Find the specific metric
|
|
346
|
+
target_metric = None
|
|
347
|
+
for metric in metric_refs:
|
|
348
|
+
if metric.get('MetricName') == metric_name:
|
|
349
|
+
target_metric = metric
|
|
350
|
+
break
|
|
351
|
+
|
|
352
|
+
if not target_metric:
|
|
353
|
+
available = [m.get('MetricName', 'Unknown') for m in metric_refs]
|
|
354
|
+
return f"Metric '{metric_name}' not found for service '{service_name}'. Available: {', '.join(available)}"
|
|
355
|
+
|
|
356
|
+
# Calculate appropriate period based on time range
|
|
357
|
+
if hours <= 3:
|
|
358
|
+
period = 60 # 1 minute
|
|
359
|
+
elif hours <= 24:
|
|
360
|
+
period = 300 # 5 minutes
|
|
361
|
+
else:
|
|
362
|
+
period = 3600 # 1 hour
|
|
363
|
+
|
|
364
|
+
# Get both standard and extended statistics in a single call
|
|
365
|
+
response = cloudwatch_client.get_metric_statistics(
|
|
366
|
+
Namespace=target_metric['Namespace'],
|
|
367
|
+
MetricName=target_metric['MetricName'],
|
|
368
|
+
Dimensions=target_metric.get('Dimensions', []),
|
|
369
|
+
StartTime=start_time,
|
|
370
|
+
EndTime=end_time,
|
|
371
|
+
Period=period,
|
|
372
|
+
Statistics=[statistic], # type: ignore
|
|
373
|
+
ExtendedStatistics=[extended_statistic],
|
|
374
|
+
)
|
|
375
|
+
|
|
376
|
+
datapoints = response.get('Datapoints', [])
|
|
377
|
+
|
|
378
|
+
if not datapoints:
|
|
379
|
+
logger.warning(
|
|
380
|
+
f"No data points found for metric '{metric_name}' on service '{service_name}' in the last {hours} hour(s)"
|
|
381
|
+
)
|
|
382
|
+
return f"No data points found for metric '{metric_name}' on service '{service_name}' in the last {hours} hour(s)."
|
|
383
|
+
|
|
384
|
+
# Sort by timestamp
|
|
385
|
+
datapoints.sort(key=lambda x: x.get('Timestamp', datetime.min)) # type: ignore
|
|
386
|
+
|
|
387
|
+
# Build response
|
|
388
|
+
result = f'Metrics for {service_name} - {metric_name}\n'
|
|
389
|
+
result += f'Time Range: Last {hours} hour(s)\n'
|
|
390
|
+
result += f'Period: {period} seconds\n\n'
|
|
391
|
+
|
|
392
|
+
# Calculate summary statistics for both standard and extended statistics
|
|
393
|
+
standard_values = [dp.get(statistic) for dp in datapoints if dp.get(statistic) is not None]
|
|
394
|
+
extended_values = [
|
|
395
|
+
dp.get(extended_statistic)
|
|
396
|
+
for dp in datapoints
|
|
397
|
+
if dp.get(extended_statistic) is not None
|
|
398
|
+
]
|
|
399
|
+
|
|
400
|
+
result += 'Summary:\n'
|
|
401
|
+
|
|
402
|
+
if standard_values:
|
|
403
|
+
latest_standard = datapoints[-1].get(statistic)
|
|
404
|
+
avg_of_standard = sum(standard_values) / len(standard_values) # type: ignore
|
|
405
|
+
max_standard = max(standard_values) # type: ignore
|
|
406
|
+
min_standard = min(standard_values) # type: ignore
|
|
407
|
+
|
|
408
|
+
result += f'{statistic} Statistics:\n'
|
|
409
|
+
result += f'• Latest: {latest_standard:.2f}\n'
|
|
410
|
+
result += f'• Average: {avg_of_standard:.2f}\n'
|
|
411
|
+
result += f'• Maximum: {max_standard:.2f}\n'
|
|
412
|
+
result += f'• Minimum: {min_standard:.2f}\n\n'
|
|
413
|
+
|
|
414
|
+
if extended_values:
|
|
415
|
+
latest_extended = datapoints[-1].get(extended_statistic)
|
|
416
|
+
avg_extended = sum(extended_values) / len(extended_values) # type: ignore
|
|
417
|
+
max_extended = max(extended_values) # type: ignore
|
|
418
|
+
min_extended = min(extended_values) # type: ignore
|
|
419
|
+
|
|
420
|
+
result += f'{extended_statistic} Statistics:\n'
|
|
421
|
+
result += f'• Latest: {latest_extended:.2f}\n'
|
|
422
|
+
result += f'• Average: {avg_extended:.2f}\n'
|
|
423
|
+
result += f'• Maximum: {max_extended:.2f}\n'
|
|
424
|
+
result += f'• Minimum: {min_extended:.2f}\n\n'
|
|
425
|
+
|
|
426
|
+
result += f'• Data Points: {len(datapoints)}\n\n'
|
|
427
|
+
|
|
428
|
+
# Show recent values (last 10) with both metrics
|
|
429
|
+
result += 'Recent Values:\n'
|
|
430
|
+
for dp in datapoints[-10:]:
|
|
431
|
+
timestamp = dp.get('Timestamp', datetime.min).strftime('%m/%d %H:%M') # type: ignore
|
|
432
|
+
unit = dp.get('Unit', '')
|
|
433
|
+
|
|
434
|
+
values_str = []
|
|
435
|
+
if dp.get(statistic) is not None:
|
|
436
|
+
values_str.append(f'{statistic}: {dp[statistic]:.2f}')
|
|
437
|
+
if dp.get(extended_statistic) is not None:
|
|
438
|
+
values_str.append(f'{extended_statistic}: {dp[extended_statistic]:.2f}')
|
|
439
|
+
|
|
440
|
+
result += f'• {timestamp}: {", ".join(values_str)} {unit}\n'
|
|
441
|
+
|
|
442
|
+
elapsed_time = timer() - start_time_perf
|
|
443
|
+
logger.info(
|
|
444
|
+
f"query_service_metrics completed for '{service_name}/{metric_name}' in {elapsed_time:.3f}s"
|
|
445
|
+
)
|
|
446
|
+
return result
|
|
447
|
+
|
|
448
|
+
except ClientError as e:
|
|
449
|
+
error_code = e.response.get('Error', {}).get('Code', 'Unknown')
|
|
450
|
+
error_message = e.response.get('Error', {}).get('Message', 'Unknown error')
|
|
451
|
+
logger.error(f'AWS ClientError in query_service_metrics: {error_code} - {error_message}')
|
|
452
|
+
return f'AWS Error: {error_message}'
|
|
453
|
+
except Exception as e:
|
|
454
|
+
logger.error(
|
|
455
|
+
f"Unexpected error in query_service_metrics for '{service_name}/{metric_name}': {str(e)}",
|
|
456
|
+
exc_info=True,
|
|
457
|
+
)
|
|
458
|
+
return f'Error: {str(e)}'
|
|
459
|
+
|
|
460
|
+
|
|
461
|
+
async def list_service_operations(
|
|
462
|
+
service_name: str = Field(
|
|
463
|
+
..., description='Name of the service to list operations for (case-sensitive)'
|
|
464
|
+
),
|
|
465
|
+
hours: int = Field(
|
|
466
|
+
default=24,
|
|
467
|
+
description='Number of hours to look back for operation discovery (default 24, max 24 for Application Signals operation discovery)',
|
|
468
|
+
),
|
|
469
|
+
) -> str:
|
|
470
|
+
"""OPERATION DISCOVERY TOOL - For operation inventory only. Use audit_services() as PRIMARY tool for operation auditing.
|
|
471
|
+
|
|
472
|
+
**IMPORTANT: For operation auditing and performance analysis, use audit_services() as the PRIMARY tool instead.**
|
|
473
|
+
|
|
474
|
+
**CRITICAL LIMITATION: This tool only discovers operations that have been ACTIVELY INVOKED in the specified time window.**
|
|
475
|
+
- **Maximum time window: 24 hours** (Application Signals limitation for operation discovery)
|
|
476
|
+
- **No results = No operation invocations** in the time window (operations exist but weren't called)
|
|
477
|
+
- **Empty results do NOT mean operations don't exist** - they may just be inactive
|
|
478
|
+
- **For comprehensive operation analysis regardless of recent activity, use audit_services() instead**
|
|
479
|
+
|
|
480
|
+
**RECOMMENDED WORKFLOW FOR OPERATION AUDITING:**
|
|
481
|
+
1. **Use audit_services() FIRST** for comprehensive operation discovery AND performance analysis
|
|
482
|
+
2. **Only use this tool** if you need a simple operation inventory of RECENTLY ACTIVE operations
|
|
483
|
+
3. **audit_services() is more comprehensive** - it discovers operations AND provides performance insights even for inactive operations
|
|
484
|
+
|
|
485
|
+
**What this tool provides:**
|
|
486
|
+
- Basic operation inventory (names and available metric types) for RECENTLY INVOKED operations only
|
|
487
|
+
- Operation count and categorization (GET, POST, etc.) for active operations
|
|
488
|
+
- Time range for discovery (max 24 hours)
|
|
489
|
+
|
|
490
|
+
**What this tool does NOT provide:**
|
|
491
|
+
- Operations that exist but weren't invoked in the time window
|
|
492
|
+
- Operation performance analysis
|
|
493
|
+
- Latency, error rate, or fault analysis
|
|
494
|
+
- Root cause analysis
|
|
495
|
+
- Actionable recommendations
|
|
496
|
+
|
|
497
|
+
**For comprehensive operation auditing, use audit_services() instead:**
|
|
498
|
+
```
|
|
499
|
+
audit_services(
|
|
500
|
+
service_targets='[{"Type":"service","Data":{"Service":{"Type":"Service","Name":"your-service"}}}]',
|
|
501
|
+
auditors='all',
|
|
502
|
+
)
|
|
503
|
+
```
|
|
504
|
+
|
|
505
|
+
**OPERATION DISCOVERY USE CASES (when audit_services is not sufficient):**
|
|
506
|
+
|
|
507
|
+
1. **Active operation inventory**: When you only need recently invoked operation names without performance data
|
|
508
|
+
2. **Traffic pattern analysis**: To see which operations are currently being used
|
|
509
|
+
3. **Quick active operation count**: To understand current operation activity of a service
|
|
510
|
+
|
|
511
|
+
**RECOMMENDED WORKFLOW:**
|
|
512
|
+
1. **Use audit_services() FIRST** for comprehensive operation discovery and analysis
|
|
513
|
+
2. **Only use this tool** for basic inventory of recently active operations if audit_services() provides too much detail
|
|
514
|
+
|
|
515
|
+
This tool provides basic operation discovery for ACTIVE operations only, but audit_services() is the primary tool for
|
|
516
|
+
comprehensive operation auditing, performance analysis, and operation insights regardless of recent activity.
|
|
517
|
+
"""
|
|
518
|
+
start_time_perf = timer()
|
|
519
|
+
logger.debug(f'Starting list_service_operations request for service: {service_name}')
|
|
520
|
+
|
|
521
|
+
try:
|
|
522
|
+
# Calculate time range - enforce 24 hour maximum for Application Signals operation discovery
|
|
523
|
+
end_time = datetime.now(timezone.utc)
|
|
524
|
+
hours = min(hours, 24) # Enforce maximum of 24 hours
|
|
525
|
+
start_time = end_time - timedelta(hours=hours)
|
|
526
|
+
|
|
527
|
+
# First, get the service to find its key attributes
|
|
528
|
+
services_response = applicationsignals_client.list_services(
|
|
529
|
+
StartTime=start_time, EndTime=end_time, MaxResults=100
|
|
530
|
+
)
|
|
531
|
+
|
|
532
|
+
# Find the target service
|
|
533
|
+
target_service = None
|
|
534
|
+
for service in services_response.get('ServiceSummaries', []):
|
|
535
|
+
key_attrs = service.get('KeyAttributes', {})
|
|
536
|
+
if key_attrs.get('Name') == service_name:
|
|
537
|
+
target_service = service
|
|
538
|
+
break
|
|
539
|
+
|
|
540
|
+
if not target_service:
|
|
541
|
+
logger.warning(f"Service '{service_name}' not found in Application Signals")
|
|
542
|
+
return f"Service '{service_name}' not found in Application Signals. Use list_monitored_services() to see available services."
|
|
543
|
+
|
|
544
|
+
# Get operations for the service using ListServiceOperations API
|
|
545
|
+
logger.debug(f'Getting operations for service: {service_name}')
|
|
546
|
+
operations_response = applicationsignals_client.list_service_operations(
|
|
547
|
+
StartTime=start_time,
|
|
548
|
+
EndTime=end_time,
|
|
549
|
+
KeyAttributes=target_service['KeyAttributes'],
|
|
550
|
+
MaxResults=100,
|
|
551
|
+
)
|
|
552
|
+
|
|
553
|
+
operations = operations_response.get('ServiceOperations', [])
|
|
554
|
+
logger.debug(f'Retrieved {len(operations)} operations for service: {service_name}')
|
|
555
|
+
|
|
556
|
+
if not operations:
|
|
557
|
+
logger.warning(
|
|
558
|
+
f"No operations found for service '{service_name}' in the last {hours} hours"
|
|
559
|
+
)
|
|
560
|
+
return (
|
|
561
|
+
f"No operations found for service '{service_name}' in the last {hours} hours.\n\n"
|
|
562
|
+
f'⚠️ IMPORTANT: This means NO OPERATION INVOCATIONS occurred in the time window.\n'
|
|
563
|
+
f' • Operations may exist but were not actively called\n'
|
|
564
|
+
f' • Maximum discovery window is 24 hours for Application Signals\n'
|
|
565
|
+
f' • For comprehensive operation analysis regardless of recent activity, use audit_services()\n'
|
|
566
|
+
f' • Empty results ≠ no operations exist, just no recent invocations'
|
|
567
|
+
)
|
|
568
|
+
|
|
569
|
+
# Build detailed response
|
|
570
|
+
result = f'Operations for Service: {service_name}\n'
|
|
571
|
+
result += f'Time Range: Last {hours} hour(s)\n'
|
|
572
|
+
result += f'Total Operations: {len(operations)}\n\n'
|
|
573
|
+
|
|
574
|
+
# Group operations by type for better organization
|
|
575
|
+
get_operations = []
|
|
576
|
+
post_operations = []
|
|
577
|
+
other_operations = []
|
|
578
|
+
|
|
579
|
+
for operation in operations:
|
|
580
|
+
operation_name = operation.get('Name', 'Unknown')
|
|
581
|
+
|
|
582
|
+
if 'GET' in operation_name.upper():
|
|
583
|
+
get_operations.append(operation)
|
|
584
|
+
elif 'POST' in operation_name.upper():
|
|
585
|
+
post_operations.append(operation)
|
|
586
|
+
else:
|
|
587
|
+
other_operations.append(operation)
|
|
588
|
+
|
|
589
|
+
# Display GET operations first (most relevant for the current task)
|
|
590
|
+
if get_operations:
|
|
591
|
+
result += f'🔍 GET Operations ({len(get_operations)}):\n'
|
|
592
|
+
for operation in get_operations:
|
|
593
|
+
operation_name = operation.get('Name', 'Unknown')
|
|
594
|
+
result += f' • {operation_name}\n'
|
|
595
|
+
|
|
596
|
+
# Show available metrics for this operation
|
|
597
|
+
metric_refs = operation.get('MetricReferences', [])
|
|
598
|
+
if metric_refs:
|
|
599
|
+
metric_types = [ref.get('MetricType', 'Unknown') for ref in metric_refs]
|
|
600
|
+
result += f' Available Metrics: {", ".join(set(metric_types))}\n'
|
|
601
|
+
result += '\n'
|
|
602
|
+
|
|
603
|
+
# Display POST operations
|
|
604
|
+
if post_operations:
|
|
605
|
+
result += f'📝 POST Operations ({len(post_operations)}):\n'
|
|
606
|
+
for operation in post_operations:
|
|
607
|
+
operation_name = operation.get('Name', 'Unknown')
|
|
608
|
+
result += f' • {operation_name}\n'
|
|
609
|
+
|
|
610
|
+
# Show available metrics for this operation
|
|
611
|
+
metric_refs = operation.get('MetricReferences', [])
|
|
612
|
+
if metric_refs:
|
|
613
|
+
metric_types = [ref.get('MetricType', 'Unknown') for ref in metric_refs]
|
|
614
|
+
result += f' Available Metrics: {", ".join(set(metric_types))}\n'
|
|
615
|
+
result += '\n'
|
|
616
|
+
|
|
617
|
+
# Display other operations
|
|
618
|
+
if other_operations:
|
|
619
|
+
result += f'🔧 Other Operations ({len(other_operations)}):\n'
|
|
620
|
+
for operation in other_operations:
|
|
621
|
+
operation_name = operation.get('Name', 'Unknown')
|
|
622
|
+
result += f' • {operation_name}\n'
|
|
623
|
+
|
|
624
|
+
# Show available metrics for this operation
|
|
625
|
+
metric_refs = operation.get('MetricReferences', [])
|
|
626
|
+
if metric_refs:
|
|
627
|
+
metric_types = [ref.get('MetricType', 'Unknown') for ref in metric_refs]
|
|
628
|
+
result += f' Available Metrics: {", ".join(set(metric_types))}\n'
|
|
629
|
+
result += '\n'
|
|
630
|
+
|
|
631
|
+
# Add summary for audit planning
|
|
632
|
+
result += '📊 Operation Discovery Summary:\n'
|
|
633
|
+
result += f'• Total Operations: {len(operations)}\n'
|
|
634
|
+
result += f'• GET Operations: {len(get_operations)}\n'
|
|
635
|
+
result += f'• POST Operations: {len(post_operations)}\n'
|
|
636
|
+
result += f'• Other Operations: {len(other_operations)}\n\n'
|
|
637
|
+
|
|
638
|
+
result += '💡 Next Steps:\n'
|
|
639
|
+
result += '• Use audit_service_operations() with specific operation targets for detailed analysis\n'
|
|
640
|
+
result += '• Focus on GET operations for latency auditing\n'
|
|
641
|
+
result += '• Check operations with Latency metrics for performance analysis\n'
|
|
642
|
+
|
|
643
|
+
elapsed_time = timer() - start_time_perf
|
|
644
|
+
logger.debug(
|
|
645
|
+
f"list_service_operations completed for '{service_name}' in {elapsed_time:.3f}s"
|
|
646
|
+
)
|
|
647
|
+
return result
|
|
648
|
+
|
|
649
|
+
except ClientError as e:
|
|
650
|
+
error_code = e.response.get('Error', {}).get('Code', 'Unknown')
|
|
651
|
+
error_message = e.response.get('Error', {}).get('Message', 'Unknown error')
|
|
652
|
+
logger.error(f'AWS ClientError in list_service_operations: {error_code} - {error_message}')
|
|
653
|
+
return f'AWS Error: {error_message}'
|
|
654
|
+
except Exception as e:
|
|
655
|
+
logger.error(
|
|
656
|
+
f"Unexpected error in list_service_operations for '{service_name}': {str(e)}",
|
|
657
|
+
exc_info=True,
|
|
658
|
+
)
|
|
659
|
+
return f'Error: {str(e)}'
|