awslabs.cloudwatch-applicationsignals-mcp-server 0.1.21__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. awslabs/__init__.py +17 -0
  2. awslabs/cloudwatch_applicationsignals_mcp_server/__init__.py +17 -0
  3. awslabs/cloudwatch_applicationsignals_mcp_server/audit_presentation_utils.py +288 -0
  4. awslabs/cloudwatch_applicationsignals_mcp_server/audit_utils.py +912 -0
  5. awslabs/cloudwatch_applicationsignals_mcp_server/aws_clients.py +120 -0
  6. awslabs/cloudwatch_applicationsignals_mcp_server/canary_utils.py +910 -0
  7. awslabs/cloudwatch_applicationsignals_mcp_server/enablement_guides/templates/ec2/ec2-dotnet-enablement.md +435 -0
  8. awslabs/cloudwatch_applicationsignals_mcp_server/enablement_guides/templates/ec2/ec2-java-enablement.md +321 -0
  9. awslabs/cloudwatch_applicationsignals_mcp_server/enablement_guides/templates/ec2/ec2-nodejs-enablement.md +420 -0
  10. awslabs/cloudwatch_applicationsignals_mcp_server/enablement_guides/templates/ec2/ec2-python-enablement.md +598 -0
  11. awslabs/cloudwatch_applicationsignals_mcp_server/enablement_guides/templates/ecs/ecs-dotnet-enablement.md +264 -0
  12. awslabs/cloudwatch_applicationsignals_mcp_server/enablement_guides/templates/ecs/ecs-java-enablement.md +193 -0
  13. awslabs/cloudwatch_applicationsignals_mcp_server/enablement_guides/templates/ecs/ecs-nodejs-enablement.md +198 -0
  14. awslabs/cloudwatch_applicationsignals_mcp_server/enablement_guides/templates/ecs/ecs-python-enablement.md +236 -0
  15. awslabs/cloudwatch_applicationsignals_mcp_server/enablement_guides/templates/eks/eks-dotnet-enablement.md +166 -0
  16. awslabs/cloudwatch_applicationsignals_mcp_server/enablement_guides/templates/eks/eks-java-enablement.md +166 -0
  17. awslabs/cloudwatch_applicationsignals_mcp_server/enablement_guides/templates/eks/eks-nodejs-enablement.md +166 -0
  18. awslabs/cloudwatch_applicationsignals_mcp_server/enablement_guides/templates/eks/eks-python-enablement.md +169 -0
  19. awslabs/cloudwatch_applicationsignals_mcp_server/enablement_guides/templates/lambda/lambda-dotnet-enablement.md +336 -0
  20. awslabs/cloudwatch_applicationsignals_mcp_server/enablement_guides/templates/lambda/lambda-java-enablement.md +336 -0
  21. awslabs/cloudwatch_applicationsignals_mcp_server/enablement_guides/templates/lambda/lambda-nodejs-enablement.md +336 -0
  22. awslabs/cloudwatch_applicationsignals_mcp_server/enablement_guides/templates/lambda/lambda-python-enablement.md +336 -0
  23. awslabs/cloudwatch_applicationsignals_mcp_server/enablement_tools.py +147 -0
  24. awslabs/cloudwatch_applicationsignals_mcp_server/server.py +1505 -0
  25. awslabs/cloudwatch_applicationsignals_mcp_server/service_audit_utils.py +231 -0
  26. awslabs/cloudwatch_applicationsignals_mcp_server/service_tools.py +659 -0
  27. awslabs/cloudwatch_applicationsignals_mcp_server/sli_report_client.py +333 -0
  28. awslabs/cloudwatch_applicationsignals_mcp_server/slo_tools.py +386 -0
  29. awslabs/cloudwatch_applicationsignals_mcp_server/trace_tools.py +784 -0
  30. awslabs/cloudwatch_applicationsignals_mcp_server/utils.py +172 -0
  31. awslabs_cloudwatch_applicationsignals_mcp_server-0.1.21.dist-info/METADATA +808 -0
  32. awslabs_cloudwatch_applicationsignals_mcp_server-0.1.21.dist-info/RECORD +36 -0
  33. awslabs_cloudwatch_applicationsignals_mcp_server-0.1.21.dist-info/WHEEL +4 -0
  34. awslabs_cloudwatch_applicationsignals_mcp_server-0.1.21.dist-info/entry_points.txt +2 -0
  35. awslabs_cloudwatch_applicationsignals_mcp_server-0.1.21.dist-info/licenses/LICENSE +174 -0
  36. awslabs_cloudwatch_applicationsignals_mcp_server-0.1.21.dist-info/licenses/NOTICE +2 -0
@@ -0,0 +1,659 @@
1
+ # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ """CloudWatch Application Signals MCP Server - Service-related tools."""
16
+
17
+ from .aws_clients import applicationsignals_client, cloudwatch_client
18
+ from botocore.exceptions import ClientError
19
+ from datetime import datetime, timedelta, timezone
20
+ from loguru import logger
21
+ from pydantic import Field
22
+ from time import perf_counter as timer
23
+
24
+
25
+ async def list_monitored_services() -> str:
26
+ """OPTIONAL TOOL for service discovery - audit_services() can automatically discover services using wildcard patterns.
27
+
28
+ **IMPORTANT: For service auditing and operation analysis, use audit_services() as the PRIMARY tool instead.**
29
+
30
+ **WHEN TO USE THIS TOOL:**
31
+ - Getting a detailed overview of all monitored services in your environment
32
+ - Discovering specific service names and environments for manual audit target construction
33
+ - Understanding the complete service inventory before targeted analysis
34
+ - When you need detailed service attributes beyond what wildcard expansion provides
35
+
36
+ **RECOMMENDED WORKFLOW FOR SERVICE AND OPERATION AUDITING:**
37
+ 1. **Use audit_services() FIRST** with wildcard patterns for comprehensive service discovery AND analysis
38
+ 2. **Only use this tool** if you need basic service inventory without performance analysis
39
+ 3. **audit_services() is more comprehensive** - it discovers services AND provides performance insights
40
+
41
+ **AUTOMATIC SERVICE DISCOVERY IN AUDIT:**
42
+ The `audit_services()` tool automatically discovers services when you use wildcard patterns:
43
+ - `[{"Type":"service","Data":{"Service":{"Type":"Service","Name":"*"}}}]` - Audits all services
44
+ - `[{"Type":"service","Data":{"Service":{"Type":"Service","Name":"*payment*"}}}]` - Audits services with "payment" in the name
45
+
46
+ **What this tool provides:**
47
+ - Basic service inventory (names, types, environments)
48
+ - Service count and categorization
49
+ - Key attributes for manual target construction
50
+
51
+ **What this tool does NOT provide:**
52
+ - Service performance analysis
53
+ - Operation discovery and analysis
54
+ - Root cause analysis
55
+ - Actionable recommendations
56
+
57
+ **For comprehensive service auditing, use audit_services() instead:**
58
+ ```
59
+ audit_services(
60
+ service_targets='[{"Type":"service","Data":{"Service":{"Type":"Service","Name":"*"}}}]',
61
+ auditors='all',
62
+ )
63
+ ```
64
+
65
+ Returns a formatted list showing:
66
+ - Service name and type
67
+ - Key attributes (Name, Environment, Platform, etc.)
68
+ - Total count of services
69
+
70
+ **NOTE**: For operation auditing, use audit_services() as the primary tool instead of get_service_detail() or list_service_operations().
71
+ """
72
+ start_time_perf = timer()
73
+ logger.debug('Starting list_application_signals_services request')
74
+
75
+ try:
76
+ # Calculate time range (last 24 hours)
77
+ end_time = datetime.now(timezone.utc)
78
+ start_time = end_time - timedelta(hours=24)
79
+
80
+ # Get all services
81
+ logger.debug(f'Querying services for time range: {start_time} to {end_time}')
82
+ response = applicationsignals_client.list_services(
83
+ StartTime=start_time, EndTime=end_time, MaxResults=100
84
+ )
85
+ services = response.get('ServiceSummaries', [])
86
+ logger.debug(f'Retrieved {len(services)} services from Application Signals')
87
+
88
+ if not services:
89
+ logger.warning('No services found in Application Signals')
90
+ return 'No services found in Application Signals.'
91
+
92
+ result = f'Application Signals Services ({len(services)} total):\n\n'
93
+
94
+ for service in services:
95
+ # Extract service name from KeyAttributes
96
+ key_attrs = service.get('KeyAttributes', {})
97
+ service_name = key_attrs.get('Name', 'Unknown')
98
+ service_type = key_attrs.get('Type', 'Unknown')
99
+
100
+ result += f'• Service: {service_name}\n'
101
+ result += f' Type: {service_type}\n'
102
+
103
+ # Add key attributes
104
+ if key_attrs:
105
+ result += ' Key Attributes:\n'
106
+ for key, value in key_attrs.items():
107
+ result += f' {key}: {value}\n'
108
+
109
+ result += '\n'
110
+
111
+ elapsed_time = timer() - start_time_perf
112
+ logger.debug(f'list_monitored_services completed in {elapsed_time:.3f}s')
113
+ return result
114
+
115
+ except ClientError as e:
116
+ error_code = e.response.get('Error', {}).get('Code', 'Unknown')
117
+ error_message = e.response.get('Error', {}).get('Message', 'Unknown error')
118
+ logger.error(f'AWS ClientError in list_monitored_services: {error_code} - {error_message}')
119
+ return f'AWS Error: {error_message}'
120
+ except Exception as e:
121
+ logger.error(f'Unexpected error in list_monitored_services: {str(e)}', exc_info=True)
122
+ return f'Error: {str(e)}'
123
+
124
+
125
+ async def get_service_detail(
126
+ service_name: str = Field(
127
+ ..., description='Name of the service to get details for (case-sensitive)'
128
+ ),
129
+ ) -> str:
130
+ """Get detailed information about a specific Application Signals service.
131
+
132
+ **IMPORTANT: For operation auditing, use audit_services() as the PRIMARY tool instead.**
133
+
134
+ **RECOMMENDED WORKFLOW FOR OPERATION AUDITING:**
135
+ 1. **Use audit_services() FIRST** for comprehensive operation discovery and analysis
136
+ 2. **Only use this tool** for basic service metadata and configuration details
137
+ 3. **This tool does NOT provide operation names** - it only shows service-level metrics
138
+
139
+ **What this tool provides:**
140
+ - Service metadata and configuration
141
+ - Platform information (EKS, Lambda, etc.)
142
+ - Service-level metrics (Latency, Error, Fault aggregates)
143
+ - Log groups associated with the service
144
+ - Key attributes (Type, Environment, Platform)
145
+
146
+ **What this tool does NOT provide:**
147
+ - Operation names (GET, POST, etc.)
148
+ - Operation-specific metrics
149
+ - Operation-level performance data
150
+
151
+ **For operation auditing, use audit_services() instead:**
152
+ ```
153
+ audit_services(
154
+ service_targets='[{"Type":"service","Data":{"Service":{"Type":"Service","Name":"your-service"}}}]',
155
+ auditors='all',
156
+ )
157
+ ```
158
+
159
+ This tool is useful for understanding service deployment details and basic configuration,
160
+ but audit_services() is the primary tool for operation discovery and performance analysis.
161
+ """
162
+ start_time_perf = timer()
163
+ logger.debug(f'Starting get_service_healthy_detail request for service: {service_name}')
164
+
165
+ try:
166
+ # Calculate time range (last 24 hours)
167
+ end_time = datetime.now(timezone.utc)
168
+ start_time = end_time - timedelta(hours=24)
169
+
170
+ # First, get all services to find the one we want
171
+ services_response = applicationsignals_client.list_services(
172
+ StartTime=start_time, EndTime=end_time, MaxResults=100
173
+ )
174
+
175
+ # Find the service with matching name
176
+ target_service = None
177
+ for service in services_response.get('ServiceSummaries', []):
178
+ key_attrs = service.get('KeyAttributes', {})
179
+ if key_attrs.get('Name') == service_name:
180
+ target_service = service
181
+ break
182
+
183
+ if not target_service:
184
+ logger.warning(f"Service '{service_name}' not found in Application Signals")
185
+ return f"Service '{service_name}' not found in Application Signals."
186
+
187
+ # Get detailed service information
188
+ logger.debug(f'Getting detailed information for service: {service_name}')
189
+ service_response = applicationsignals_client.get_service(
190
+ StartTime=start_time, EndTime=end_time, KeyAttributes=target_service['KeyAttributes']
191
+ )
192
+
193
+ service_details = service_response['Service']
194
+
195
+ # Build detailed response
196
+ result = f'Service Details: {service_name}\n\n'
197
+
198
+ # Key Attributes
199
+ key_attrs = service_details.get('KeyAttributes', {})
200
+ if key_attrs:
201
+ result += 'Key Attributes:\n'
202
+ for key, value in key_attrs.items():
203
+ result += f' {key}: {value}\n'
204
+ result += '\n'
205
+
206
+ # Attribute Maps (Platform, Application, Telemetry info)
207
+ attr_maps = service_details.get('AttributeMaps', [])
208
+ if attr_maps:
209
+ result += 'Additional Attributes:\n'
210
+ for attr_map in attr_maps:
211
+ for key, value in attr_map.items():
212
+ result += f' {key}: {value}\n'
213
+ result += '\n'
214
+
215
+ # Metric References
216
+ metric_refs = service_details.get('MetricReferences', [])
217
+ if metric_refs:
218
+ result += f'Metric References ({len(metric_refs)} total):\n'
219
+ for metric in metric_refs:
220
+ result += f' • {metric.get("Namespace", "")}/{metric.get("MetricName", "")}\n'
221
+ result += f' Type: {metric.get("MetricType", "")}\n'
222
+ dimensions = metric.get('Dimensions', [])
223
+ if dimensions:
224
+ result += ' Dimensions: '
225
+ dim_strs = [f'{d["Name"]}={d["Value"]}' for d in dimensions]
226
+ result += ', '.join(dim_strs) + '\n'
227
+ result += '\n'
228
+
229
+ # Log Group References
230
+ log_refs = service_details.get('LogGroupReferences', [])
231
+ if log_refs:
232
+ result += f'Log Group References ({len(log_refs)} total):\n'
233
+ for log_ref in log_refs:
234
+ log_group = log_ref.get('Identifier', 'Unknown')
235
+ result += f' • {log_group}\n'
236
+ result += '\n'
237
+
238
+ elapsed_time = timer() - start_time_perf
239
+ logger.debug(f"get_service_detail completed for '{service_name}' in {elapsed_time:.3f}s")
240
+ return result
241
+
242
+ except ClientError as e:
243
+ error_code = e.response.get('Error', {}).get('Code', 'Unknown')
244
+ error_message = e.response.get('Error', {}).get('Message', 'Unknown error')
245
+ logger.error(f'AWS ClientError in get_service_detail: {error_code} - {error_message}')
246
+ return f'AWS Error: {error_message}'
247
+ except Exception as e:
248
+ logger.error(
249
+ f"Unexpected error in get_service_healthy_detail for '{service_name}': {str(e)}",
250
+ exc_info=True,
251
+ )
252
+ return f'Error: {str(e)}'
253
+
254
+
255
+ async def query_service_metrics(
256
+ service_name: str = Field(
257
+ ..., description='Name of the service to get metrics for (case-sensitive)'
258
+ ),
259
+ metric_name: str = Field(
260
+ ...,
261
+ description='Specific metric name (e.g., Latency, Error, Fault). Leave empty to list available metrics',
262
+ ),
263
+ statistic: str = Field(
264
+ default='Average',
265
+ description='Standard statistic type (Average, Sum, Maximum, Minimum, SampleCount)',
266
+ ),
267
+ extended_statistic: str = Field(
268
+ default='p99', description='Extended statistic (p99, p95, p90, p50, etc)'
269
+ ),
270
+ hours: int = Field(
271
+ default=1, description='Number of hours to look back (default 1, max 168 for 1 week)'
272
+ ),
273
+ ) -> str:
274
+ """Get CloudWatch metrics for a specific Application Signals service.
275
+
276
+ Use this tool to:
277
+ - Analyze service performance (latency, throughput)
278
+ - Check error rates and reliability
279
+ - View trends over time
280
+ - Get both standard statistics (Average, Max) and percentiles (p99, p95)
281
+
282
+ Common metric names:
283
+ - 'Latency': Response time in milliseconds
284
+ - 'Error': Percentage of failed requests
285
+ - 'Fault': Percentage of server errors (5xx)
286
+
287
+ Returns:
288
+ - Summary statistics (latest, average, min, max)
289
+ - Recent data points with timestamps
290
+ - Both standard and percentile values when available
291
+
292
+ The tool automatically adjusts the granularity based on time range:
293
+ - Up to 3 hours: 1-minute resolution
294
+ - Up to 24 hours: 5-minute resolution
295
+ - Over 24 hours: 1-hour resolution
296
+ """
297
+ start_time_perf = timer()
298
+ logger.info(
299
+ f'Starting query_service_metrics request - service: {service_name}, metric: {metric_name}, hours: {hours}'
300
+ )
301
+
302
+ try:
303
+ # Calculate time range
304
+ end_time = datetime.now(timezone.utc)
305
+ start_time = end_time - timedelta(hours=hours)
306
+
307
+ # Get service details to find metrics
308
+ services_response = applicationsignals_client.list_services(
309
+ StartTime=start_time, EndTime=end_time, MaxResults=100
310
+ )
311
+
312
+ # Find the target service
313
+ target_service = None
314
+ for service in services_response.get('ServiceSummaries', []):
315
+ key_attrs = service.get('KeyAttributes', {})
316
+ if key_attrs.get('Name') == service_name:
317
+ target_service = service
318
+ break
319
+
320
+ if not target_service:
321
+ logger.warning(f"Service '{service_name}' not found in Application Signals")
322
+ return f"Service '{service_name}' not found in Application Signals."
323
+
324
+ # Get detailed service info for metric references
325
+ service_response = applicationsignals_client.get_service(
326
+ StartTime=start_time, EndTime=end_time, KeyAttributes=target_service['KeyAttributes']
327
+ )
328
+
329
+ metric_refs = service_response['Service'].get('MetricReferences', [])
330
+
331
+ if not metric_refs:
332
+ logger.warning(f"No metrics found for service '{service_name}'")
333
+ return f"No metrics found for service '{service_name}'."
334
+
335
+ # If no specific metric requested, show available metrics
336
+ if not metric_name:
337
+ result = f"Available metrics for service '{service_name}':\n\n"
338
+ for metric in metric_refs:
339
+ result += f'• {metric.get("MetricName", "Unknown")}\n'
340
+ result += f' Namespace: {metric.get("Namespace", "Unknown")}\n'
341
+ result += f' Type: {metric.get("MetricType", "Unknown")}\n'
342
+ result += '\n'
343
+ return result
344
+
345
+ # Find the specific metric
346
+ target_metric = None
347
+ for metric in metric_refs:
348
+ if metric.get('MetricName') == metric_name:
349
+ target_metric = metric
350
+ break
351
+
352
+ if not target_metric:
353
+ available = [m.get('MetricName', 'Unknown') for m in metric_refs]
354
+ return f"Metric '{metric_name}' not found for service '{service_name}'. Available: {', '.join(available)}"
355
+
356
+ # Calculate appropriate period based on time range
357
+ if hours <= 3:
358
+ period = 60 # 1 minute
359
+ elif hours <= 24:
360
+ period = 300 # 5 minutes
361
+ else:
362
+ period = 3600 # 1 hour
363
+
364
+ # Get both standard and extended statistics in a single call
365
+ response = cloudwatch_client.get_metric_statistics(
366
+ Namespace=target_metric['Namespace'],
367
+ MetricName=target_metric['MetricName'],
368
+ Dimensions=target_metric.get('Dimensions', []),
369
+ StartTime=start_time,
370
+ EndTime=end_time,
371
+ Period=period,
372
+ Statistics=[statistic], # type: ignore
373
+ ExtendedStatistics=[extended_statistic],
374
+ )
375
+
376
+ datapoints = response.get('Datapoints', [])
377
+
378
+ if not datapoints:
379
+ logger.warning(
380
+ f"No data points found for metric '{metric_name}' on service '{service_name}' in the last {hours} hour(s)"
381
+ )
382
+ return f"No data points found for metric '{metric_name}' on service '{service_name}' in the last {hours} hour(s)."
383
+
384
+ # Sort by timestamp
385
+ datapoints.sort(key=lambda x: x.get('Timestamp', datetime.min)) # type: ignore
386
+
387
+ # Build response
388
+ result = f'Metrics for {service_name} - {metric_name}\n'
389
+ result += f'Time Range: Last {hours} hour(s)\n'
390
+ result += f'Period: {period} seconds\n\n'
391
+
392
+ # Calculate summary statistics for both standard and extended statistics
393
+ standard_values = [dp.get(statistic) for dp in datapoints if dp.get(statistic) is not None]
394
+ extended_values = [
395
+ dp.get(extended_statistic)
396
+ for dp in datapoints
397
+ if dp.get(extended_statistic) is not None
398
+ ]
399
+
400
+ result += 'Summary:\n'
401
+
402
+ if standard_values:
403
+ latest_standard = datapoints[-1].get(statistic)
404
+ avg_of_standard = sum(standard_values) / len(standard_values) # type: ignore
405
+ max_standard = max(standard_values) # type: ignore
406
+ min_standard = min(standard_values) # type: ignore
407
+
408
+ result += f'{statistic} Statistics:\n'
409
+ result += f'• Latest: {latest_standard:.2f}\n'
410
+ result += f'• Average: {avg_of_standard:.2f}\n'
411
+ result += f'• Maximum: {max_standard:.2f}\n'
412
+ result += f'• Minimum: {min_standard:.2f}\n\n'
413
+
414
+ if extended_values:
415
+ latest_extended = datapoints[-1].get(extended_statistic)
416
+ avg_extended = sum(extended_values) / len(extended_values) # type: ignore
417
+ max_extended = max(extended_values) # type: ignore
418
+ min_extended = min(extended_values) # type: ignore
419
+
420
+ result += f'{extended_statistic} Statistics:\n'
421
+ result += f'• Latest: {latest_extended:.2f}\n'
422
+ result += f'• Average: {avg_extended:.2f}\n'
423
+ result += f'• Maximum: {max_extended:.2f}\n'
424
+ result += f'• Minimum: {min_extended:.2f}\n\n'
425
+
426
+ result += f'• Data Points: {len(datapoints)}\n\n'
427
+
428
+ # Show recent values (last 10) with both metrics
429
+ result += 'Recent Values:\n'
430
+ for dp in datapoints[-10:]:
431
+ timestamp = dp.get('Timestamp', datetime.min).strftime('%m/%d %H:%M') # type: ignore
432
+ unit = dp.get('Unit', '')
433
+
434
+ values_str = []
435
+ if dp.get(statistic) is not None:
436
+ values_str.append(f'{statistic}: {dp[statistic]:.2f}')
437
+ if dp.get(extended_statistic) is not None:
438
+ values_str.append(f'{extended_statistic}: {dp[extended_statistic]:.2f}')
439
+
440
+ result += f'• {timestamp}: {", ".join(values_str)} {unit}\n'
441
+
442
+ elapsed_time = timer() - start_time_perf
443
+ logger.info(
444
+ f"query_service_metrics completed for '{service_name}/{metric_name}' in {elapsed_time:.3f}s"
445
+ )
446
+ return result
447
+
448
+ except ClientError as e:
449
+ error_code = e.response.get('Error', {}).get('Code', 'Unknown')
450
+ error_message = e.response.get('Error', {}).get('Message', 'Unknown error')
451
+ logger.error(f'AWS ClientError in query_service_metrics: {error_code} - {error_message}')
452
+ return f'AWS Error: {error_message}'
453
+ except Exception as e:
454
+ logger.error(
455
+ f"Unexpected error in query_service_metrics for '{service_name}/{metric_name}': {str(e)}",
456
+ exc_info=True,
457
+ )
458
+ return f'Error: {str(e)}'
459
+
460
+
461
+ async def list_service_operations(
462
+ service_name: str = Field(
463
+ ..., description='Name of the service to list operations for (case-sensitive)'
464
+ ),
465
+ hours: int = Field(
466
+ default=24,
467
+ description='Number of hours to look back for operation discovery (default 24, max 24 for Application Signals operation discovery)',
468
+ ),
469
+ ) -> str:
470
+ """OPERATION DISCOVERY TOOL - For operation inventory only. Use audit_services() as PRIMARY tool for operation auditing.
471
+
472
+ **IMPORTANT: For operation auditing and performance analysis, use audit_services() as the PRIMARY tool instead.**
473
+
474
+ **CRITICAL LIMITATION: This tool only discovers operations that have been ACTIVELY INVOKED in the specified time window.**
475
+ - **Maximum time window: 24 hours** (Application Signals limitation for operation discovery)
476
+ - **No results = No operation invocations** in the time window (operations exist but weren't called)
477
+ - **Empty results do NOT mean operations don't exist** - they may just be inactive
478
+ - **For comprehensive operation analysis regardless of recent activity, use audit_services() instead**
479
+
480
+ **RECOMMENDED WORKFLOW FOR OPERATION AUDITING:**
481
+ 1. **Use audit_services() FIRST** for comprehensive operation discovery AND performance analysis
482
+ 2. **Only use this tool** if you need a simple operation inventory of RECENTLY ACTIVE operations
483
+ 3. **audit_services() is more comprehensive** - it discovers operations AND provides performance insights even for inactive operations
484
+
485
+ **What this tool provides:**
486
+ - Basic operation inventory (names and available metric types) for RECENTLY INVOKED operations only
487
+ - Operation count and categorization (GET, POST, etc.) for active operations
488
+ - Time range for discovery (max 24 hours)
489
+
490
+ **What this tool does NOT provide:**
491
+ - Operations that exist but weren't invoked in the time window
492
+ - Operation performance analysis
493
+ - Latency, error rate, or fault analysis
494
+ - Root cause analysis
495
+ - Actionable recommendations
496
+
497
+ **For comprehensive operation auditing, use audit_services() instead:**
498
+ ```
499
+ audit_services(
500
+ service_targets='[{"Type":"service","Data":{"Service":{"Type":"Service","Name":"your-service"}}}]',
501
+ auditors='all',
502
+ )
503
+ ```
504
+
505
+ **OPERATION DISCOVERY USE CASES (when audit_services is not sufficient):**
506
+
507
+ 1. **Active operation inventory**: When you only need recently invoked operation names without performance data
508
+ 2. **Traffic pattern analysis**: To see which operations are currently being used
509
+ 3. **Quick active operation count**: To understand current operation activity of a service
510
+
511
+ **RECOMMENDED WORKFLOW:**
512
+ 1. **Use audit_services() FIRST** for comprehensive operation discovery and analysis
513
+ 2. **Only use this tool** for basic inventory of recently active operations if audit_services() provides too much detail
514
+
515
+ This tool provides basic operation discovery for ACTIVE operations only, but audit_services() is the primary tool for
516
+ comprehensive operation auditing, performance analysis, and operation insights regardless of recent activity.
517
+ """
518
+ start_time_perf = timer()
519
+ logger.debug(f'Starting list_service_operations request for service: {service_name}')
520
+
521
+ try:
522
+ # Calculate time range - enforce 24 hour maximum for Application Signals operation discovery
523
+ end_time = datetime.now(timezone.utc)
524
+ hours = min(hours, 24) # Enforce maximum of 24 hours
525
+ start_time = end_time - timedelta(hours=hours)
526
+
527
+ # First, get the service to find its key attributes
528
+ services_response = applicationsignals_client.list_services(
529
+ StartTime=start_time, EndTime=end_time, MaxResults=100
530
+ )
531
+
532
+ # Find the target service
533
+ target_service = None
534
+ for service in services_response.get('ServiceSummaries', []):
535
+ key_attrs = service.get('KeyAttributes', {})
536
+ if key_attrs.get('Name') == service_name:
537
+ target_service = service
538
+ break
539
+
540
+ if not target_service:
541
+ logger.warning(f"Service '{service_name}' not found in Application Signals")
542
+ return f"Service '{service_name}' not found in Application Signals. Use list_monitored_services() to see available services."
543
+
544
+ # Get operations for the service using ListServiceOperations API
545
+ logger.debug(f'Getting operations for service: {service_name}')
546
+ operations_response = applicationsignals_client.list_service_operations(
547
+ StartTime=start_time,
548
+ EndTime=end_time,
549
+ KeyAttributes=target_service['KeyAttributes'],
550
+ MaxResults=100,
551
+ )
552
+
553
+ operations = operations_response.get('ServiceOperations', [])
554
+ logger.debug(f'Retrieved {len(operations)} operations for service: {service_name}')
555
+
556
+ if not operations:
557
+ logger.warning(
558
+ f"No operations found for service '{service_name}' in the last {hours} hours"
559
+ )
560
+ return (
561
+ f"No operations found for service '{service_name}' in the last {hours} hours.\n\n"
562
+ f'⚠️ IMPORTANT: This means NO OPERATION INVOCATIONS occurred in the time window.\n'
563
+ f' • Operations may exist but were not actively called\n'
564
+ f' • Maximum discovery window is 24 hours for Application Signals\n'
565
+ f' • For comprehensive operation analysis regardless of recent activity, use audit_services()\n'
566
+ f' • Empty results ≠ no operations exist, just no recent invocations'
567
+ )
568
+
569
+ # Build detailed response
570
+ result = f'Operations for Service: {service_name}\n'
571
+ result += f'Time Range: Last {hours} hour(s)\n'
572
+ result += f'Total Operations: {len(operations)}\n\n'
573
+
574
+ # Group operations by type for better organization
575
+ get_operations = []
576
+ post_operations = []
577
+ other_operations = []
578
+
579
+ for operation in operations:
580
+ operation_name = operation.get('Name', 'Unknown')
581
+
582
+ if 'GET' in operation_name.upper():
583
+ get_operations.append(operation)
584
+ elif 'POST' in operation_name.upper():
585
+ post_operations.append(operation)
586
+ else:
587
+ other_operations.append(operation)
588
+
589
+ # Display GET operations first (most relevant for the current task)
590
+ if get_operations:
591
+ result += f'🔍 GET Operations ({len(get_operations)}):\n'
592
+ for operation in get_operations:
593
+ operation_name = operation.get('Name', 'Unknown')
594
+ result += f' • {operation_name}\n'
595
+
596
+ # Show available metrics for this operation
597
+ metric_refs = operation.get('MetricReferences', [])
598
+ if metric_refs:
599
+ metric_types = [ref.get('MetricType', 'Unknown') for ref in metric_refs]
600
+ result += f' Available Metrics: {", ".join(set(metric_types))}\n'
601
+ result += '\n'
602
+
603
+ # Display POST operations
604
+ if post_operations:
605
+ result += f'📝 POST Operations ({len(post_operations)}):\n'
606
+ for operation in post_operations:
607
+ operation_name = operation.get('Name', 'Unknown')
608
+ result += f' • {operation_name}\n'
609
+
610
+ # Show available metrics for this operation
611
+ metric_refs = operation.get('MetricReferences', [])
612
+ if metric_refs:
613
+ metric_types = [ref.get('MetricType', 'Unknown') for ref in metric_refs]
614
+ result += f' Available Metrics: {", ".join(set(metric_types))}\n'
615
+ result += '\n'
616
+
617
+ # Display other operations
618
+ if other_operations:
619
+ result += f'🔧 Other Operations ({len(other_operations)}):\n'
620
+ for operation in other_operations:
621
+ operation_name = operation.get('Name', 'Unknown')
622
+ result += f' • {operation_name}\n'
623
+
624
+ # Show available metrics for this operation
625
+ metric_refs = operation.get('MetricReferences', [])
626
+ if metric_refs:
627
+ metric_types = [ref.get('MetricType', 'Unknown') for ref in metric_refs]
628
+ result += f' Available Metrics: {", ".join(set(metric_types))}\n'
629
+ result += '\n'
630
+
631
+ # Add summary for audit planning
632
+ result += '📊 Operation Discovery Summary:\n'
633
+ result += f'• Total Operations: {len(operations)}\n'
634
+ result += f'• GET Operations: {len(get_operations)}\n'
635
+ result += f'• POST Operations: {len(post_operations)}\n'
636
+ result += f'• Other Operations: {len(other_operations)}\n\n'
637
+
638
+ result += '💡 Next Steps:\n'
639
+ result += '• Use audit_service_operations() with specific operation targets for detailed analysis\n'
640
+ result += '• Focus on GET operations for latency auditing\n'
641
+ result += '• Check operations with Latency metrics for performance analysis\n'
642
+
643
+ elapsed_time = timer() - start_time_perf
644
+ logger.debug(
645
+ f"list_service_operations completed for '{service_name}' in {elapsed_time:.3f}s"
646
+ )
647
+ return result
648
+
649
+ except ClientError as e:
650
+ error_code = e.response.get('Error', {}).get('Code', 'Unknown')
651
+ error_message = e.response.get('Error', {}).get('Message', 'Unknown error')
652
+ logger.error(f'AWS ClientError in list_service_operations: {error_code} - {error_message}')
653
+ return f'AWS Error: {error_message}'
654
+ except Exception as e:
655
+ logger.error(
656
+ f"Unexpected error in list_service_operations for '{service_name}': {str(e)}",
657
+ exc_info=True,
658
+ )
659
+ return f'Error: {str(e)}'