awslabs.cloudwatch-applicationsignals-mcp-server 0.1.21__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. awslabs/__init__.py +17 -0
  2. awslabs/cloudwatch_applicationsignals_mcp_server/__init__.py +17 -0
  3. awslabs/cloudwatch_applicationsignals_mcp_server/audit_presentation_utils.py +288 -0
  4. awslabs/cloudwatch_applicationsignals_mcp_server/audit_utils.py +912 -0
  5. awslabs/cloudwatch_applicationsignals_mcp_server/aws_clients.py +120 -0
  6. awslabs/cloudwatch_applicationsignals_mcp_server/canary_utils.py +910 -0
  7. awslabs/cloudwatch_applicationsignals_mcp_server/enablement_guides/templates/ec2/ec2-dotnet-enablement.md +435 -0
  8. awslabs/cloudwatch_applicationsignals_mcp_server/enablement_guides/templates/ec2/ec2-java-enablement.md +321 -0
  9. awslabs/cloudwatch_applicationsignals_mcp_server/enablement_guides/templates/ec2/ec2-nodejs-enablement.md +420 -0
  10. awslabs/cloudwatch_applicationsignals_mcp_server/enablement_guides/templates/ec2/ec2-python-enablement.md +598 -0
  11. awslabs/cloudwatch_applicationsignals_mcp_server/enablement_guides/templates/ecs/ecs-dotnet-enablement.md +264 -0
  12. awslabs/cloudwatch_applicationsignals_mcp_server/enablement_guides/templates/ecs/ecs-java-enablement.md +193 -0
  13. awslabs/cloudwatch_applicationsignals_mcp_server/enablement_guides/templates/ecs/ecs-nodejs-enablement.md +198 -0
  14. awslabs/cloudwatch_applicationsignals_mcp_server/enablement_guides/templates/ecs/ecs-python-enablement.md +236 -0
  15. awslabs/cloudwatch_applicationsignals_mcp_server/enablement_guides/templates/eks/eks-dotnet-enablement.md +166 -0
  16. awslabs/cloudwatch_applicationsignals_mcp_server/enablement_guides/templates/eks/eks-java-enablement.md +166 -0
  17. awslabs/cloudwatch_applicationsignals_mcp_server/enablement_guides/templates/eks/eks-nodejs-enablement.md +166 -0
  18. awslabs/cloudwatch_applicationsignals_mcp_server/enablement_guides/templates/eks/eks-python-enablement.md +169 -0
  19. awslabs/cloudwatch_applicationsignals_mcp_server/enablement_guides/templates/lambda/lambda-dotnet-enablement.md +336 -0
  20. awslabs/cloudwatch_applicationsignals_mcp_server/enablement_guides/templates/lambda/lambda-java-enablement.md +336 -0
  21. awslabs/cloudwatch_applicationsignals_mcp_server/enablement_guides/templates/lambda/lambda-nodejs-enablement.md +336 -0
  22. awslabs/cloudwatch_applicationsignals_mcp_server/enablement_guides/templates/lambda/lambda-python-enablement.md +336 -0
  23. awslabs/cloudwatch_applicationsignals_mcp_server/enablement_tools.py +147 -0
  24. awslabs/cloudwatch_applicationsignals_mcp_server/server.py +1505 -0
  25. awslabs/cloudwatch_applicationsignals_mcp_server/service_audit_utils.py +231 -0
  26. awslabs/cloudwatch_applicationsignals_mcp_server/service_tools.py +659 -0
  27. awslabs/cloudwatch_applicationsignals_mcp_server/sli_report_client.py +333 -0
  28. awslabs/cloudwatch_applicationsignals_mcp_server/slo_tools.py +386 -0
  29. awslabs/cloudwatch_applicationsignals_mcp_server/trace_tools.py +784 -0
  30. awslabs/cloudwatch_applicationsignals_mcp_server/utils.py +172 -0
  31. awslabs_cloudwatch_applicationsignals_mcp_server-0.1.21.dist-info/METADATA +808 -0
  32. awslabs_cloudwatch_applicationsignals_mcp_server-0.1.21.dist-info/RECORD +36 -0
  33. awslabs_cloudwatch_applicationsignals_mcp_server-0.1.21.dist-info/WHEEL +4 -0
  34. awslabs_cloudwatch_applicationsignals_mcp_server-0.1.21.dist-info/entry_points.txt +2 -0
  35. awslabs_cloudwatch_applicationsignals_mcp_server-0.1.21.dist-info/licenses/LICENSE +174 -0
  36. awslabs_cloudwatch_applicationsignals_mcp_server-0.1.21.dist-info/licenses/NOTICE +2 -0
@@ -0,0 +1,912 @@
1
+ # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ """Shared utilities for audit tools."""
16
+
17
+ import json
18
+ import os
19
+ import tempfile
20
+ from datetime import datetime, timezone
21
+ from loguru import logger
22
+ from typing import Any, Dict, List, Optional, Tuple, Union
23
+
24
+
25
+ # Constants
26
+ DEFAULT_BATCH_SIZE = 5
27
+ FUZZY_MATCH_THRESHOLD = 30 # Minimum similarity score for fuzzy matching
28
+ HIGH_CONFIDENCE_MATCH_THRESHOLD = 85 # High confidence threshold for exact fuzzy matches
29
+
30
+
31
+ async def execute_audit_api(input_obj: Dict[str, Any], region: str, banner: str) -> str:
32
+ """Execute the Application Signals audit API call with the given input object."""
33
+ from .aws_clients import applicationsignals_client
34
+
35
+ # File log path
36
+ desired_log_path = os.environ.get('AUDITOR_LOG_PATH', tempfile.gettempdir())
37
+ try:
38
+ if desired_log_path.endswith(os.sep) or os.path.isdir(desired_log_path):
39
+ os.makedirs(desired_log_path, exist_ok=True)
40
+ log_path = os.path.join(desired_log_path, 'aws_api.log')
41
+ else:
42
+ os.makedirs(os.path.dirname(desired_log_path) or '.', exist_ok=True)
43
+ log_path = desired_log_path
44
+ except Exception:
45
+ temp_dir = tempfile.gettempdir()
46
+ os.makedirs(temp_dir, exist_ok=True)
47
+ log_path = os.path.join(temp_dir, 'aws_api.log')
48
+
49
+ # Process targets in batches if needed
50
+ targets = input_obj.get('AuditTargets', [])
51
+ batch_size = DEFAULT_BATCH_SIZE
52
+ target_batches = []
53
+
54
+ if len(targets) > batch_size:
55
+ logger.info(f'Processing {len(targets)} targets in batches of {batch_size}')
56
+ for i in range(0, len(targets), batch_size):
57
+ batch = targets[i : i + batch_size]
58
+ target_batches.append(batch)
59
+ else:
60
+ target_batches.append(targets)
61
+
62
+ all_batch_results = []
63
+
64
+ for batch_idx, batch_targets in enumerate(target_batches, 1):
65
+ logger.info(
66
+ f'Processing batch {batch_idx}/{len(target_batches)} with {len(batch_targets)} targets'
67
+ )
68
+
69
+ # Build API input for this batch
70
+ batch_input_obj = {
71
+ 'StartTime': datetime.fromtimestamp(input_obj['StartTime'], tz=timezone.utc),
72
+ 'EndTime': datetime.fromtimestamp(input_obj['EndTime'], tz=timezone.utc),
73
+ 'AuditTargets': batch_targets,
74
+ }
75
+ if 'Auditors' in input_obj:
76
+ batch_input_obj['Auditors'] = input_obj['Auditors']
77
+
78
+ # Log API invocation details
79
+ api_pretty_input = json.dumps(
80
+ {
81
+ 'StartTime': input_obj['StartTime'],
82
+ 'EndTime': input_obj['EndTime'],
83
+ 'AuditTargets': batch_targets,
84
+ 'Auditors': input_obj.get('Auditors', []),
85
+ },
86
+ indent=2,
87
+ )
88
+
89
+ # Also log the actual batch_input_obj that will be sent to AWS API
90
+ batch_input_for_logging = {
91
+ 'StartTime': batch_input_obj['StartTime'].isoformat(),
92
+ 'EndTime': batch_input_obj['EndTime'].isoformat(),
93
+ 'AuditTargets': batch_input_obj['AuditTargets'],
94
+ }
95
+ if 'Auditors' in batch_input_obj:
96
+ batch_input_for_logging['Auditors'] = batch_input_obj['Auditors']
97
+
98
+ batch_payload_json = json.dumps(batch_input_for_logging, indent=2)
99
+
100
+ logger.info('═' * 80)
101
+ logger.info(
102
+ f'BATCH {batch_idx}/{len(target_batches)} - {datetime.now(timezone.utc).isoformat()}'
103
+ )
104
+ logger.info(banner.strip())
105
+ logger.info('---- API INVOCATION ----')
106
+ logger.info('applicationsignals_client.list_audit_findings()')
107
+ logger.info('---- API PARAMETERS (JSON) ----')
108
+ logger.info(api_pretty_input)
109
+ logger.info('---- ACTUAL AWS API PAYLOAD ----')
110
+ logger.info(batch_payload_json)
111
+ logger.info('---- END PARAMETERS ----')
112
+
113
+ # Write detailed payload to log file
114
+ try:
115
+ with open(log_path, 'a') as f:
116
+ f.write('═' * 80 + '\n')
117
+ f.write(
118
+ f'BATCH {batch_idx}/{len(target_batches)} - {datetime.now(timezone.utc).isoformat()}\n'
119
+ )
120
+ f.write(banner.strip() + '\n')
121
+ f.write('---- API INVOCATION ----\n')
122
+ f.write('applicationsignals_client.list_audit_findings()\n')
123
+ f.write('---- API PARAMETERS (JSON) ----\n')
124
+ f.write(api_pretty_input + '\n')
125
+ f.write('---- ACTUAL AWS API PAYLOAD ----\n')
126
+ f.write(batch_payload_json + '\n')
127
+ f.write('---- END PARAMETERS ----\n\n')
128
+ except Exception as log_error:
129
+ logger.warning(f'Failed to write audit log to {log_path}: {log_error}')
130
+
131
+ # Call the Application Signals API for this batch
132
+ try:
133
+ response = applicationsignals_client.list_audit_findings(**batch_input_obj) # type: ignore[attr-defined]
134
+
135
+ # Format and log output for this batch
136
+ observation_text = json.dumps(response, indent=2, default=str)
137
+ all_batch_results.append(response)
138
+
139
+ if not response.get('AuditFindings'):
140
+ try:
141
+ with open(log_path, 'a') as f:
142
+ f.write(f'📭 Batch {batch_idx}: No findings returned.\n')
143
+ f.write('---- END RESPONSE ----\n\n')
144
+ except Exception as log_error:
145
+ logger.warning(f'Failed to write audit log to {log_path}: {log_error}')
146
+ logger.info(f'📭 Batch {batch_idx}: No findings returned.\n---- END RESPONSE ----')
147
+ else:
148
+ try:
149
+ with open(log_path, 'a') as f:
150
+ f.write(f'---- BATCH {batch_idx} API RESPONSE (JSON) ----\n')
151
+ f.write(observation_text + '\n')
152
+ f.write('---- END RESPONSE ----\n\n')
153
+ except Exception as log_error:
154
+ logger.warning(f'Failed to write audit log to {log_path}: {log_error}')
155
+ logger.info(
156
+ f'---- BATCH {batch_idx} API RESPONSE (JSON) ----\n'
157
+ + observation_text
158
+ + '\n---- END RESPONSE ----'
159
+ )
160
+
161
+ except Exception as e:
162
+ error_msg = str(e)
163
+ try:
164
+ with open(log_path, 'a') as f:
165
+ f.write(f'---- BATCH {batch_idx} API ERROR ----\n')
166
+ f.write(error_msg + '\n')
167
+ f.write('---- END ERROR ----\n\n')
168
+ except Exception as log_error:
169
+ logger.warning(f'Failed to write audit log to {log_path}: {log_error}')
170
+ logger.error(
171
+ f'---- BATCH {batch_idx} API ERROR ----\n' + error_msg + '\n---- END ERROR ----'
172
+ )
173
+
174
+ batch_error_result = {
175
+ 'error': f'API call failed: {error_msg}',
176
+ 'targets': batch_targets,
177
+ }
178
+ all_batch_results.append(batch_error_result)
179
+ continue
180
+
181
+ # Aggregate results from all batches
182
+ if not all_batch_results:
183
+ return banner + 'Result: No findings from any batch.'
184
+
185
+ # Aggregate the findings from all successful batches
186
+ aggregated_findings = []
187
+ failed_batches = 0
188
+
189
+ for batch_idx, batch_result in enumerate(all_batch_results):
190
+ if isinstance(batch_result, dict):
191
+ if 'error' in batch_result:
192
+ failed_batches += 1
193
+ continue
194
+
195
+ batch_findings = batch_result.get('AuditFindings', [])
196
+ aggregated_findings.extend(batch_findings)
197
+
198
+ # Create final aggregated response
199
+ final_result = {
200
+ 'AuditFindings': aggregated_findings,
201
+ }
202
+
203
+ # Add any error information if there were failed batches
204
+ if failed_batches > 0:
205
+ error_details = []
206
+ for batch_result in all_batch_results:
207
+ if isinstance(batch_result, dict) and 'error' in batch_result:
208
+ error_details.append(
209
+ {
210
+ 'error': batch_result['error'],
211
+ 'targets': batch_result['targets'],
212
+ }
213
+ )
214
+ final_result['ListAuditFindingsErrors'] = error_details
215
+
216
+ final_observation_text = json.dumps(final_result, indent=2, default=str)
217
+ return banner + final_observation_text
218
+
219
+
220
+ def _create_service_target(
221
+ service_name: str, environment: str, aws_account_id: Optional[str] = None
222
+ ) -> Dict[str, Any]:
223
+ """Create a standardized service target configuration."""
224
+ service_config = {
225
+ 'Type': 'Service',
226
+ 'Name': service_name,
227
+ 'Environment': environment,
228
+ }
229
+ if aws_account_id:
230
+ service_config['AwsAccountId'] = aws_account_id
231
+
232
+ return {
233
+ 'Type': 'service',
234
+ 'Data': {'Service': service_config},
235
+ }
236
+
237
+
238
+ def _filter_instrumented_services(all_services: List[Any]) -> List[Dict[str, Any]]:
239
+ """Filter out uninstrumented and aws native services.
240
+
241
+ Args:
242
+ all_services: List of service summaries from list_services API
243
+ Returns:
244
+ List of services that are instrumented
245
+ """
246
+ instrumented_services = []
247
+
248
+ for service in all_services:
249
+ service_attrs = service.get('KeyAttributes', {})
250
+ service_name = service_attrs.get('Name', '')
251
+ service_type = service_attrs.get('Type', '')
252
+ environment = service_attrs.get('Environment', '')
253
+
254
+ # Filter out services without proper names or that are not actual services
255
+ if not service_name or service_name == 'Unknown' or service_type != 'Service':
256
+ logger.debug(
257
+ f"Skipping service: Name='{service_name}', Type='{service_type}', Environment='{environment}'"
258
+ )
259
+ continue
260
+
261
+ # Check InstrumentationType in AttributeMaps to filter out UNINSTRUMENTED and AWS_NATIVE services
262
+ attribute_maps = service.get('AttributeMaps', [])
263
+ is_instrumented = True
264
+
265
+ for attr_map in attribute_maps:
266
+ if isinstance(attr_map, dict) and 'InstrumentationType' in attr_map:
267
+ instrumentation_type = attr_map['InstrumentationType']
268
+ if (
269
+ instrumentation_type == 'UNINSTRUMENTED'
270
+ or instrumentation_type == 'AWS_NATIVE'
271
+ ):
272
+ is_instrumented = False
273
+ logger.debug(
274
+ f"Filtering out uninstrumented service: Name='{service_name}', InstrumentationType='{instrumentation_type}'"
275
+ )
276
+ break
277
+
278
+ if is_instrumented:
279
+ instrumented_services.append(service)
280
+ logger.debug(
281
+ f"Including instrumented service: Name='{service_name}', Environment='{environment}'"
282
+ )
283
+
284
+ logger.info(
285
+ f'Filtered services: {len(instrumented_services)} instrumented out of {len(all_services)} total services'
286
+ )
287
+ return instrumented_services
288
+
289
+
290
+ def _fetch_instrumented_services_with_pagination(
291
+ unix_start: int,
292
+ unix_end: int,
293
+ next_token: Optional[str] = None,
294
+ max_results: int = 5,
295
+ applicationsignals_client=None,
296
+ ) -> tuple[List[Dict[str, Any]], Optional[str], List[str], Dict[str, int]]:
297
+ """Common pagination logic for fetching instrumented services.
298
+
299
+ Args:
300
+ unix_start: Start time as unix timestamp
301
+ unix_end: End time as unix timestamp
302
+ next_token: Token for pagination from previous list_services call
303
+ max_results: Maximum number of services to return per batch
304
+ applicationsignals_client: AWS Application Signals client
305
+ Returns:
306
+ Tuple of (instrumented_services, next_token, all_service_names, filtering_stats)
307
+ filtering_stats contains: {'total_services': int, 'instrumented_services': int, 'filtered_out': int}
308
+ """
309
+ if applicationsignals_client is None:
310
+ from .aws_clients import applicationsignals_client
311
+
312
+ all_service_names = []
313
+ filtering_stats = {'total_services': 0, 'instrumented_services': 0, 'filtered_out': 0}
314
+
315
+ # Initialize variables for the loop
316
+ current_next_token = next_token
317
+ total_services_viewed = 0
318
+ total_filtered_out = 0
319
+ instrumented_services = []
320
+ returned_next_token = None
321
+
322
+ # Loop until we find instrumented services or run out of pages
323
+ while True:
324
+ # Build list_services parameters
325
+ list_services_params = {
326
+ 'StartTime': datetime.fromtimestamp(unix_start, tz=timezone.utc),
327
+ 'EndTime': datetime.fromtimestamp(unix_end, tz=timezone.utc),
328
+ 'MaxResults': max_results,
329
+ }
330
+
331
+ # Add NextToken if provided for pagination
332
+ if current_next_token:
333
+ list_services_params['NextToken'] = current_next_token
334
+
335
+ logger.info(f'Fetching batch (viewed so far: {total_services_viewed} services)')
336
+
337
+ services_response = applicationsignals_client.list_services(**list_services_params)
338
+ services_batch = services_response.get('ServiceSummaries', [])
339
+ returned_next_token = services_response.get('NextToken')
340
+
341
+ # Collect all service names from this batch (no filtering)
342
+ for service in services_batch:
343
+ service_attrs = service.get('KeyAttributes', {})
344
+ service_name = service_attrs.get('Name', '')
345
+ all_service_names.append(service_name)
346
+
347
+ # Update total services viewed
348
+ total_services_viewed += len(services_batch)
349
+
350
+ logger.debug(
351
+ f'Retrieved {len(services_batch)} services in this batch, NextToken: {returned_next_token is not None}'
352
+ )
353
+
354
+ # Filter out uninstrumented services using the helper function
355
+ instrumented_services = _filter_instrumented_services(services_batch)
356
+
357
+ # Update totals
358
+ batch_filtered_out = len(services_batch) - len(instrumented_services)
359
+ total_filtered_out += batch_filtered_out
360
+
361
+ logger.info(
362
+ f'Fetch instrumented services batch results: {len(services_batch)} total, {len(instrumented_services)} instrumented, {batch_filtered_out} filtered out'
363
+ )
364
+ logger.info(
365
+ f'Fetch instrumented services cumulative: {total_services_viewed} total viewed, {total_filtered_out} filtered out'
366
+ )
367
+
368
+ # Check if we found instrumented services - if so, exit the loop immediately
369
+ if len(instrumented_services) > 0:
370
+ logger.info(
371
+ f'Found {len(instrumented_services)} instrumented services, proceeding with expansion'
372
+ )
373
+ break
374
+ elif not returned_next_token:
375
+ logger.warning(
376
+ f'No instrumented services found after viewing {total_services_viewed} total services across all pages'
377
+ )
378
+ break
379
+ else:
380
+ logger.info(
381
+ 'No instrumented services in this batch, continuing to next page (next_token available)'
382
+ )
383
+ current_next_token = returned_next_token
384
+
385
+ # Update filtering stats with final totals
386
+ filtering_stats['total_services'] = total_services_viewed
387
+ filtering_stats['instrumented_services'] = len(instrumented_services)
388
+ filtering_stats['filtered_out'] = total_filtered_out
389
+
390
+ return (instrumented_services, returned_next_token, all_service_names, filtering_stats)
391
+
392
+
393
+ def parse_auditors(
394
+ auditors_value: Union[str, None, Any], default_auditors: List[str]
395
+ ) -> List[str]:
396
+ """Parse and validate auditors parameter."""
397
+ # Handle Pydantic Field objects that may be passed instead of actual values
398
+ if hasattr(auditors_value, 'default') and hasattr(auditors_value, 'description'):
399
+ # This is a Pydantic Field object, use its default value
400
+ auditors_value = getattr(auditors_value, 'default', None)
401
+
402
+ if auditors_value is None:
403
+ user_prompt_text = os.environ.get('MCP_USER_PROMPT', '') or ''
404
+ wants_root_cause = 'root cause' in user_prompt_text.lower()
405
+ raw_a = default_auditors if not wants_root_cause else []
406
+ elif str(auditors_value).lower() == 'all':
407
+ raw_a = [] # Empty list means use all auditors
408
+ else:
409
+ raw_a = [a.strip() for a in str(auditors_value).split(',') if a.strip()]
410
+
411
+ # Validate auditors
412
+ if len(raw_a) == 0:
413
+ return [] # Empty list means use all auditors
414
+ else:
415
+ allowed = {
416
+ 'slo',
417
+ 'operation_metric',
418
+ 'trace',
419
+ 'log',
420
+ 'dependency_metric',
421
+ 'top_contributor',
422
+ 'service_quota',
423
+ }
424
+ invalid = [a for a in raw_a if a not in allowed]
425
+ if invalid:
426
+ raise ValueError(
427
+ f'Invalid auditor(s): {", ".join(invalid)}. Allowed: {", ".join(sorted(allowed))}'
428
+ )
429
+ return raw_a
430
+
431
+
432
+ def expand_service_wildcard_patterns(
433
+ targets: List[dict],
434
+ unix_start: int,
435
+ unix_end: int,
436
+ next_token: Optional[str] = None,
437
+ max_results: int = 5,
438
+ applicationsignals_client=None,
439
+ ) -> Tuple[List[dict], Optional[str], List[str], Dict[str, int]]:
440
+ """Expand wildcard patterns for service targets with pagination support.
441
+
442
+ Args:
443
+ targets: List of target dictionaries
444
+ unix_start: Start time as unix timestamp
445
+ unix_end: End time as unix timestamp
446
+ next_token: Token for pagination from previous list_services call
447
+ max_results: Maximum number of services to return
448
+ applicationsignals_client: AWS Application Signals client
449
+
450
+ Returns:
451
+ Tuple of (expanded_targets, next_token, all_service_names, filtering_stats)
452
+ filtering_stats contains: {'total_services': int, 'instrumented_services': int, 'filtered_out': int}
453
+ """
454
+ from .utils import calculate_name_similarity
455
+
456
+ if applicationsignals_client is None:
457
+ from .aws_clients import applicationsignals_client
458
+
459
+ expanded_targets = []
460
+ service_patterns = []
461
+ service_fuzzy_matches = []
462
+ all_service_names = []
463
+ filtering_stats = {'total_services': 0, 'instrumented_services': 0, 'filtered_out': 0}
464
+
465
+ logger.debug(
466
+ f'expand_service_wildcard_patterns_paginated: Processing {len(targets)} targets with max_results={max_results}'
467
+ )
468
+ logger.debug(f'Received next_token: {next_token is not None}')
469
+
470
+ # First pass: identify patterns and collect non-wildcard targets
471
+ for i, target in enumerate(targets):
472
+ logger.debug(f'Target {i}: {target}')
473
+
474
+ if not isinstance(target, dict):
475
+ expanded_targets.append(target)
476
+ continue
477
+
478
+ target_type = target.get('Type', '').lower()
479
+ logger.debug(f'Target {i} type: {target_type}')
480
+
481
+ if target_type == 'service':
482
+ # Check multiple possible locations for service name
483
+ service_name = None
484
+
485
+ # Check Data.Service.Name (full format)
486
+ service_data = target.get('Data', {})
487
+ if isinstance(service_data, dict):
488
+ service_info = service_data.get('Service', {})
489
+ if isinstance(service_info, dict):
490
+ service_name = service_info.get('Name', '')
491
+
492
+ # Check shorthand Service field
493
+ if not service_name:
494
+ service_name = target.get('Service', '')
495
+
496
+ logger.debug(f"Target {i} service name: '{service_name}'")
497
+
498
+ if isinstance(service_name, str) and service_name:
499
+ if '*' in service_name:
500
+ logger.debug(f"Target {i} identified as wildcard pattern: '{service_name}'")
501
+ service_patterns.append((target, service_name))
502
+ else:
503
+ # Check if this might be a fuzzy match candidate
504
+ service_fuzzy_matches.append((target, service_name))
505
+ else:
506
+ logger.debug(f'Target {i} has no valid service name, passing through')
507
+ expanded_targets.append(target)
508
+ else:
509
+ # Non-service targets pass through unchanged
510
+ logger.debug(f'Target {i} is not a service target, passing through')
511
+ expanded_targets.append(target)
512
+
513
+ # Expand service patterns and fuzzy matches with pagination
514
+ if service_patterns or service_fuzzy_matches:
515
+ logger.debug(
516
+ f'Expanding {len(service_patterns)} service wildcard patterns and {len(service_fuzzy_matches)} fuzzy matches with pagination'
517
+ )
518
+ try:
519
+ # Use the common pagination function
520
+ instrumented_services, returned_next_token, all_service_names, filtering_stats = (
521
+ _fetch_instrumented_services_with_pagination(
522
+ unix_start, unix_end, next_token, max_results, applicationsignals_client
523
+ )
524
+ )
525
+
526
+ # Handle wildcard patterns
527
+ for original_target, pattern in service_patterns:
528
+ search_term = pattern.strip('*').lower() if pattern != '*' else ''
529
+ matches_found = 0
530
+
531
+ for service in instrumented_services:
532
+ service_attrs = service.get('KeyAttributes', {})
533
+ service_name = service_attrs.get('Name', '')
534
+ environment = service_attrs.get('Environment', '')
535
+
536
+ # Apply search filter
537
+ if search_term == '' or search_term in service_name.lower():
538
+ expanded_targets.append(_create_service_target(service_name, environment))
539
+ matches_found += 1
540
+ logger.debug(
541
+ f"Added instrumented service: Name='{service_name}', Environment='{environment}'"
542
+ )
543
+
544
+ logger.debug(
545
+ f"Service pattern '{pattern}' expanded to {matches_found} instrumented targets in this batch"
546
+ )
547
+
548
+ # Handle fuzzy matches for inexact service names
549
+ for original_target, inexact_name in service_fuzzy_matches:
550
+ best_matches = []
551
+
552
+ # Calculate similarity scores for all instrumented services
553
+ for service in instrumented_services:
554
+ service_attrs = service.get('KeyAttributes', {})
555
+ service_name = service_attrs.get('Name', '')
556
+ if not service_name:
557
+ continue
558
+
559
+ score = calculate_name_similarity(inexact_name, service_name, 'service')
560
+
561
+ if score >= FUZZY_MATCH_THRESHOLD: # Minimum threshold for consideration
562
+ best_matches.append(
563
+ (service_name, service_attrs.get('Environment'), score)
564
+ )
565
+
566
+ # Sort by score and take the best matches
567
+ best_matches.sort(key=lambda x: x[2], reverse=True)
568
+
569
+ if best_matches:
570
+ # If we have a very high score match, use only that
571
+ if best_matches[0][2] >= HIGH_CONFIDENCE_MATCH_THRESHOLD:
572
+ matched_services = [best_matches[0]]
573
+ else:
574
+ # Otherwise, take top 3 matches above threshold
575
+ matched_services = best_matches[:3]
576
+
577
+ logger.info(
578
+ f"Fuzzy matching service '{inexact_name}' found {len(matched_services)} instrumented candidates in this batch:"
579
+ )
580
+ for service_name, environment, score in matched_services:
581
+ logger.info(f" - '{service_name}' in '{environment}' (score: {score})")
582
+ expanded_targets.append(_create_service_target(service_name, environment))
583
+ else:
584
+ logger.warning(
585
+ f"No fuzzy matches found for service name '{inexact_name}' (no candidates above threshold) in this batch"
586
+ )
587
+ # Keep the original target - let the API handle the error
588
+ expanded_targets.append(original_target)
589
+
590
+ return (expanded_targets, returned_next_token, all_service_names, filtering_stats)
591
+
592
+ except Exception as e:
593
+ logger.warning(f'Failed to expand service patterns and fuzzy matches: {e}')
594
+ # When expansion fails, we need to return an error rather than passing wildcards to validation
595
+ # This prevents the validation phase from seeing wildcard patterns
596
+ if service_patterns or service_fuzzy_matches:
597
+ pattern_names = [pattern for _, pattern in service_patterns] + [
598
+ name for _, name in service_fuzzy_matches
599
+ ]
600
+ raise ValueError(
601
+ f'Failed to expand service wildcard patterns {pattern_names}. '
602
+ f'This may be due to AWS API access issues or missing services. '
603
+ f'Error: {str(e)}'
604
+ )
605
+
606
+ return expanded_targets, None, all_service_names, filtering_stats
607
+
608
+
609
+ def expand_slo_wildcard_patterns(
610
+ targets: List[dict],
611
+ next_token: Optional[str] = None,
612
+ max_results: int = 5,
613
+ applicationsignals_client=None,
614
+ ) -> Tuple[List[dict], Optional[str], List[str]]:
615
+ """Expand wildcard patterns for SLO targets with pagination support.
616
+
617
+ Args:
618
+ targets: List of target dictionaries
619
+ next_token: Token for pagination from previous list_service_level_objectives call
620
+ max_results: Maximum number of SLOs to return
621
+ applicationsignals_client: AWS Application Signals client
622
+
623
+ Returns:
624
+ Tuple of (expanded_targets, next_token, slo_names_in_batch)
625
+ """
626
+ if applicationsignals_client is None:
627
+ from .aws_clients import applicationsignals_client
628
+
629
+ expanded_targets = []
630
+ wildcard_patterns = []
631
+ slo_names_in_batch = []
632
+
633
+ for target in targets:
634
+ if isinstance(target, dict):
635
+ ttype = target.get('Type', '').lower()
636
+ if ttype == 'slo':
637
+ # Check for wildcard patterns in SLO names
638
+ slo_data = target.get('Data', {}).get('Slo', {})
639
+
640
+ # BUG FIX: Handle case where Slo is a string instead of dict
641
+ if isinstance(slo_data, str):
642
+ # Malformed input - Slo should be a dict with SloName key
643
+ raise ValueError(
644
+ f"Invalid SLO target format. Expected {{'Type':'slo','Data':{{'Slo':{{'SloName':'name'}}}}}} "
645
+ f"but got {{'Slo':'{slo_data}'}}. The 'Slo' field must be a dictionary with 'SloName' key."
646
+ )
647
+ elif isinstance(slo_data, dict):
648
+ slo_name = slo_data.get('SloName', '')
649
+ else:
650
+ # Handle other unexpected types
651
+ raise ValueError(
652
+ f"Invalid SLO target format. The 'Slo' field must be a dictionary with 'SloName' key, "
653
+ f'but got {type(slo_data).__name__}: {slo_data}'
654
+ )
655
+
656
+ if '*' in slo_name:
657
+ wildcard_patterns.append((target, slo_name))
658
+ else:
659
+ expanded_targets.append(target)
660
+ else:
661
+ expanded_targets.append(target)
662
+ else:
663
+ expanded_targets.append(target)
664
+
665
+ # Expand wildcard patterns for SLOs
666
+ if wildcard_patterns:
667
+ logger.debug(f'Expanding {len(wildcard_patterns)} SLO wildcard patterns')
668
+ try:
669
+ list_slos_params = {
670
+ 'MaxResults': max_results,
671
+ 'IncludeLinkedAccounts': True,
672
+ }
673
+
674
+ if next_token:
675
+ list_slos_params['NextToken'] = next_token
676
+
677
+ slos_response = applicationsignals_client.list_service_level_objectives(
678
+ **list_slos_params
679
+ )
680
+ slos_batch = slos_response.get('SloSummaries', [])
681
+ returned_next_token = slos_response.get('NextToken')
682
+
683
+ # Collect all SLO names from this batch
684
+ for slo in slos_batch:
685
+ slo_name = slo.get('Name', '')
686
+ slo_names_in_batch.append(slo_name)
687
+
688
+ # Handle wildcard patterns
689
+ for original_target, pattern in wildcard_patterns:
690
+ search_term = pattern.strip('*').lower() if pattern != '*' else ''
691
+ matches_found = 0
692
+
693
+ for slo in slos_batch:
694
+ slo_name = slo.get('Name', '')
695
+ if search_term == '' or search_term in slo_name.lower():
696
+ expanded_targets.append(
697
+ {
698
+ 'Type': 'slo',
699
+ 'Data': {
700
+ 'Slo': {'SloName': slo_name, 'SloArn': slo.get('Arn', '')}
701
+ },
702
+ }
703
+ )
704
+ matches_found += 1
705
+
706
+ logger.debug(f"SLO pattern '{pattern}' expanded to {matches_found} targets")
707
+ return expanded_targets, returned_next_token, slo_names_in_batch
708
+ except Exception as e:
709
+ logger.warning(f'Failed to expand SLO patterns: {e}')
710
+ raise ValueError(f'Failed to expand SLO wildcard patterns. {str(e)}')
711
+
712
+ return expanded_targets, None, slo_names_in_batch
713
+
714
+
715
+ def expand_service_operation_wildcard_patterns(
716
+ targets: List[dict],
717
+ unix_start: int,
718
+ unix_end: int,
719
+ next_token: Optional[str] = None,
720
+ max_results: int = 5,
721
+ applicationsignals_client=None,
722
+ ) -> Tuple[List[dict], Optional[str], List[str], Dict[str, int]]:
723
+ """Expand wildcard patterns for service operation targets with pagination support.
724
+
725
+ Args:
726
+ targets: List of target dictionaries
727
+ unix_start: Start time as unix timestamp
728
+ unix_end: End time as unix timestamp
729
+ next_token: Token for pagination from previous list_services call
730
+ max_results: Maximum number of services to return
731
+ applicationsignals_client: AWS Application Signals client
732
+
733
+ Returns:
734
+ Tuple of (expanded_targets, next_token, all_service_names, filtering_stats)
735
+ filtering_stats contains: {'total_services': int, 'instrumented_services': int, 'filtered_out': int}
736
+ """
737
+ if applicationsignals_client is None:
738
+ from .aws_clients import applicationsignals_client
739
+
740
+ expanded_targets = []
741
+ wildcard_patterns = []
742
+ all_service_names = []
743
+ filtering_stats = {'total_services': 0, 'instrumented_services': 0, 'filtered_out': 0}
744
+
745
+ for target in targets:
746
+ if isinstance(target, dict):
747
+ ttype = target.get('Type', '').lower()
748
+ if ttype == 'service_operation':
749
+ # Check for wildcard patterns in service names OR operation names
750
+ service_op_data = target.get('Data', {}).get('ServiceOperation', {})
751
+ service_data = service_op_data.get('Service', {})
752
+ service_name = service_data.get('Name', '')
753
+ operation = service_op_data.get('Operation', '')
754
+
755
+ # Check if either service name or operation has wildcards
756
+ if '*' in service_name or '*' in operation:
757
+ wildcard_patterns.append((target, service_name, operation))
758
+ else:
759
+ expanded_targets.append(target)
760
+ else:
761
+ expanded_targets.append(target)
762
+ else:
763
+ expanded_targets.append(target)
764
+
765
+ # Expand wildcard patterns for service operations
766
+ if wildcard_patterns:
767
+ logger.debug(
768
+ f'Expanding {len(wildcard_patterns)} service operation wildcard patterns with pagination'
769
+ )
770
+ try:
771
+ # Use the common pagination function
772
+ instrumented_services, returned_next_token, all_service_names, filtering_stats = (
773
+ _fetch_instrumented_services_with_pagination(
774
+ unix_start, unix_end, next_token, max_results, applicationsignals_client
775
+ )
776
+ )
777
+
778
+ for original_target, service_pattern, operation_pattern in wildcard_patterns:
779
+ service_search_term = (
780
+ service_pattern.strip('*').lower() if service_pattern != '*' else ''
781
+ )
782
+ operation_search_term = (
783
+ operation_pattern.strip('*').lower() if operation_pattern != '*' else ''
784
+ )
785
+ matches_found = 0
786
+
787
+ # Get the original metric type from the pattern
788
+ service_op_data = original_target.get('Data', {}).get('ServiceOperation', {})
789
+ metric_type = service_op_data.get('MetricType', 'Latency')
790
+
791
+ # Find matching services from instrumented services only
792
+ matching_services = []
793
+ for service in instrumented_services:
794
+ service_attrs = service.get('KeyAttributes', {})
795
+ service_name = service_attrs.get('Name', '')
796
+
797
+ # Check if service matches the pattern
798
+ if '*' not in service_pattern:
799
+ # Exact service name match
800
+ if service_name == service_pattern:
801
+ matching_services.append(service)
802
+ else:
803
+ # Wildcard service name match
804
+ if (
805
+ service_search_term == ''
806
+ or service_search_term in service_name.lower()
807
+ ):
808
+ matching_services.append(service)
809
+
810
+ logger.debug(
811
+ f"Found {len(matching_services)} instrumented services matching pattern '{service_pattern}'"
812
+ )
813
+
814
+ # For each matching service, get operations and expand operation patterns
815
+ for service in matching_services:
816
+ service_attrs = service.get('KeyAttributes', {})
817
+ service_name = service_attrs.get('Name', '')
818
+ environment = service_attrs.get('Environment', '')
819
+
820
+ try:
821
+ # Get operations for this service
822
+ operations_response = applicationsignals_client.list_service_operations(
823
+ StartTime=datetime.fromtimestamp(unix_start, tz=timezone.utc),
824
+ EndTime=datetime.fromtimestamp(unix_end, tz=timezone.utc),
825
+ KeyAttributes=service_attrs,
826
+ MaxResults=100,
827
+ )
828
+
829
+ operations = operations_response.get('Operations', [])
830
+ logger.debug(
831
+ f"Found {len(operations)} operations for service '{service_name}'"
832
+ )
833
+
834
+ # Filter operations based on operation pattern
835
+ for operation in operations:
836
+ operation_name = operation.get('Name', '')
837
+
838
+ # Check if operation matches the pattern
839
+ operation_matches = False
840
+ if '*' not in operation_pattern:
841
+ # Exact operation name match
842
+ operation_matches = operation_name == operation_pattern
843
+ else:
844
+ # Wildcard operation name match
845
+ if operation_search_term == '':
846
+ # Match all operations
847
+ operation_matches = True
848
+ else:
849
+ # Check if operation contains the search term
850
+ operation_matches = (
851
+ operation_search_term in operation_name.lower()
852
+ )
853
+
854
+ if operation_matches:
855
+ # Check if this operation has the required metric type
856
+ metric_refs = operation.get('MetricReferences', [])
857
+ has_metric_type = any(
858
+ ref.get('MetricType', '') == metric_type
859
+ or (
860
+ metric_type == 'Availability'
861
+ and ref.get('MetricType', '') == 'Fault'
862
+ )
863
+ for ref in metric_refs
864
+ )
865
+
866
+ if has_metric_type:
867
+ service_target = _create_service_target(
868
+ service_name, environment
869
+ )
870
+ expanded_targets.append(
871
+ {
872
+ 'Type': 'service_operation',
873
+ 'Data': {
874
+ 'ServiceOperation': {
875
+ 'Service': service_target['Data']['Service'],
876
+ 'Operation': operation_name,
877
+ 'MetricType': metric_type,
878
+ }
879
+ },
880
+ }
881
+ )
882
+ matches_found += 1
883
+ logger.debug(
884
+ f'Added operation: {service_name} -> {operation_name} ({metric_type})'
885
+ )
886
+ else:
887
+ logger.debug(
888
+ f'Skipping operation {operation_name} - no {metric_type} metric available'
889
+ )
890
+
891
+ except Exception as e:
892
+ logger.warning(
893
+ f"Failed to get operations for service '{service_name}': {e}"
894
+ )
895
+ continue
896
+
897
+ logger.debug(
898
+ f"Service operation pattern '{service_pattern}' + '{operation_pattern}' expanded to {matches_found} targets"
899
+ )
900
+
901
+ return (
902
+ expanded_targets,
903
+ returned_next_token,
904
+ all_service_names,
905
+ filtering_stats,
906
+ )
907
+
908
+ except Exception as e:
909
+ logger.warning(f'Failed to expand service operation patterns: {e}')
910
+ raise ValueError(f'Failed to expand service operation wildcard patterns. {str(e)}')
911
+
912
+ return expanded_targets, None, all_service_names, filtering_stats