awslabs.cloudwatch-appsignals-mcp-server 0.1.9__py3-none-any.whl → 0.1.11__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -402,6 +402,26 @@ async def query_sampled_traces(
402
402
  return obj.isoformat()
403
403
  return obj
404
404
 
405
+ # Helper function to extract fault message from root causes for deduplication
406
+ def get_fault_message(trace_data):
407
+ """Extract fault message from a trace for deduplication.
408
+
409
+ Only checks FaultRootCauses (5xx server errors) since this is the primary
410
+ use case for root cause investigation. Traces without fault messages are
411
+ not deduplicated.
412
+ """
413
+ # Only check FaultRootCauses for deduplication
414
+ root_causes = trace_data.get('FaultRootCauses', [])
415
+ if root_causes:
416
+ for cause in root_causes:
417
+ services = cause.get('Services', [])
418
+ for service in services:
419
+ exceptions = service.get('Exceptions', [])
420
+ if exceptions and exceptions[0].get('Message'):
421
+ return exceptions[0].get('Message')
422
+ return None
423
+
424
+ # Build trace summaries (original format)
405
425
  trace_summaries = []
406
426
  for trace in traces:
407
427
  # Create a simplified trace data structure to reduce size
@@ -417,17 +437,11 @@ async def query_sampled_traces(
417
437
 
418
438
  # Only include root causes if they exist (to save space)
419
439
  if trace.get('ErrorRootCauses'):
420
- trace_data['ErrorRootCauses'] = trace.get('ErrorRootCauses', [])[
421
- :3
422
- ] # Limit to first 3
440
+ trace_data['ErrorRootCauses'] = trace.get('ErrorRootCauses', [])[:3]
423
441
  if trace.get('FaultRootCauses'):
424
- trace_data['FaultRootCauses'] = trace.get('FaultRootCauses', [])[
425
- :3
426
- ] # Limit to first 3
442
+ trace_data['FaultRootCauses'] = trace.get('FaultRootCauses', [])[:3]
427
443
  if trace.get('ResponseTimeRootCauses'):
428
- trace_data['ResponseTimeRootCauses'] = trace.get('ResponseTimeRootCauses', [])[
429
- :3
430
- ] # Limit to first 3
444
+ trace_data['ResponseTimeRootCauses'] = trace.get('ResponseTimeRootCauses', [])[:3]
431
445
 
432
446
  # Include limited annotations for key operations
433
447
  annotations = trace.get('Annotations', {})
@@ -447,15 +461,50 @@ async def query_sampled_traces(
447
461
  # Convert any datetime objects to ISO format strings
448
462
  for key, value in trace_data.items():
449
463
  trace_data[key] = convert_datetime(value)
464
+
450
465
  trace_summaries.append(trace_data)
451
466
 
467
+ # Deduplicate trace summaries by fault message
468
+ seen_faults = {}
469
+ deduped_trace_summaries = []
470
+
471
+ for trace_summary in trace_summaries:
472
+ # Check if this trace has an error
473
+ has_issues = (
474
+ trace_summary.get('HasError')
475
+ or trace_summary.get('HasFault')
476
+ or trace_summary.get('HasThrottle')
477
+ )
478
+
479
+ if not has_issues:
480
+ # Always include healthy traces
481
+ deduped_trace_summaries.append(trace_summary)
482
+ continue
483
+
484
+ # Extract fault message for deduplication (only checks FaultRootCauses)
485
+ fault_msg = get_fault_message(trace_summary)
486
+
487
+ if fault_msg and fault_msg in seen_faults:
488
+ # Skip this trace - we already have one with the same fault message
489
+ seen_faults[fault_msg]['count'] += 1
490
+ logger.debug(
491
+ f'Skipping duplicate trace {trace_summary.get("Id")} - fault message already seen: {fault_msg[:100]}...'
492
+ )
493
+ continue
494
+ else:
495
+ # First time seeing this fault (or no fault message) - include it
496
+ deduped_trace_summaries.append(trace_summary)
497
+ if fault_msg:
498
+ seen_faults[fault_msg] = {'count': 1}
499
+
452
500
  # Check transaction search status
453
501
  is_tx_search_enabled, tx_destination, tx_status = check_transaction_search_enabled(region)
454
502
 
503
+ # Build response with original format but deduplicated traces
455
504
  result_data = {
456
- 'TraceSummaries': trace_summaries,
457
- 'TraceCount': len(trace_summaries),
458
- 'Message': f'Retrieved {len(trace_summaries)} traces (limited to prevent size issues)',
505
+ 'TraceSummaries': deduped_trace_summaries,
506
+ 'TraceCount': len(deduped_trace_summaries),
507
+ 'Message': f'Retrieved {len(deduped_trace_summaries)} unique traces from {len(trace_summaries)} total (deduplicated by fault message)',
459
508
  'SamplingNote': "⚠️ This data is from X-Ray's 5% sampling. Results may not show all errors or issues.",
460
509
  'TransactionSearchStatus': {
461
510
  'enabled': is_tx_search_enabled,
@@ -467,9 +516,18 @@ async def query_sampled_traces(
467
516
  },
468
517
  }
469
518
 
519
+ # Add dedup stats if we actually deduped anything
520
+ if len(deduped_trace_summaries) < len(trace_summaries):
521
+ duplicates_removed = len(trace_summaries) - len(deduped_trace_summaries)
522
+ result_data['DeduplicationStats'] = {
523
+ 'OriginalTraceCount': len(trace_summaries),
524
+ 'DuplicatesRemoved': duplicates_removed,
525
+ 'UniqueFaultMessages': len(seen_faults),
526
+ }
527
+
470
528
  elapsed_time = timer() - start_time_perf
471
529
  logger.info(
472
- f'query_sampled_traces completed in {elapsed_time:.3f}s - retrieved {len(trace_summaries)} traces'
530
+ f'query_sampled_traces completed in {elapsed_time:.3f}s - retrieved {len(deduped_trace_summaries)} unique traces from {len(trace_summaries)} total'
473
531
  )
474
532
  return json.dumps(result_data, indent=2)
475
533
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: awslabs.cloudwatch-appsignals-mcp-server
3
- Version: 0.1.9
3
+ Version: 0.1.11
4
4
  Summary: An AWS Labs Model Context Protocol (MCP) server for AWS Application Signals
5
5
  Project-URL: Homepage, https://awslabs.github.io/mcp/
6
6
  Project-URL: Documentation, https://awslabs.github.io/mcp/servers/cloudwatch-appsignals-mcp-server/
@@ -181,7 +181,35 @@ FILTER attributes.aws.local.service = "payment-service" and attributes.aws.local
181
181
  - `duration > 5` - Find slow requests (over 5 seconds)
182
182
  - `annotation[aws.local.operation]="GET /api/orders"` - Filter by specific operation
183
183
 
184
- #### 12. **`list_slis`** - Legacy SLI Status Report (Specialized Tool)
184
+ #### 12. **`analyze_canary_failures`** - Comprehensive Canary Failure Analysis
185
+ **Deep dive into CloudWatch Synthetics canary failures with root cause identification**
186
+
187
+ - Comprehensive canary failure analysis with deep dive into issues
188
+ - Analyze historical patterns and specific incident details
189
+ - Get comprehensive artifact analysis including logs, screenshots, and HAR files
190
+ - Receive actionable recommendations based on AWS debugging methodology
191
+ - Correlate canary failures with Application Signals telemetry data
192
+ - Identify performance degradation and availability issues across service dependencies
193
+
194
+ **Key Features:**
195
+ - **Failure Pattern Analysis**: Identifies recurring failure modes and temporal patterns
196
+ - **Artifact Deep Dive**: Analyzes canary logs, screenshots, and network traces for root causes
197
+ - **Service Correlation**: Links canary failures to upstream/downstream service issues using Application Signals
198
+ - **Performance Insights**: Detects latency spikes, fault rates, and connection issues
199
+ - **Actionable Remediation**: Provides specific steps based on AWS operational best practices
200
+ - **IAM Analysis**: Validates IAM roles and permissions for common canary access issues
201
+ - **Backend Service Integration**: Correlates canary failures with backend service errors and exceptions
202
+
203
+ **Common Use Cases:**
204
+ - Incident Response: Rapid diagnosis of canary failures during outages
205
+ - Performance Investigation: Understanding latency and availability degradation
206
+ - Dependency Analysis: Identifying which services are causing canary failures
207
+ - Historical Trending: Analyzing failure patterns over time for proactive improvements
208
+ - Root Cause Analysis: Deep dive into specific failure scenarios with full context
209
+ - Infrastructure Issues: Diagnose S3 access, VPC connectivity, and browser target problems
210
+ - Backend Service Debugging: Identify application code issues affecting canary success
211
+
212
+ #### 13. **`list_slis`** - Legacy SLI Status Report (Specialized Tool)
185
213
  **Use `audit_services()` as the PRIMARY tool for service auditing**
186
214
 
187
215
  - Basic report showing summary counts (total, healthy, breached, insufficient data)
@@ -526,6 +554,68 @@ Found 8 services being monitored:
526
554
  3. Review payment-api-service timeout configurations
527
555
  ```
528
556
 
557
+ ### Example 6: Canary Failure Analysis and Root Cause Investigation
558
+ ```
559
+ User: "My Pet Clinic canaries are failing. Can you help me diagnose the issues?"
560
+ Assistant: I'll analyze your Pet Clinic canaries to identify the root causes of failures.
561
+
562
+ [Uses analyze_canary_failures for comprehensive canary analysis]
563
+
564
+ analyze_canary_failures(canary_name="pc-visit-vet")
565
+ analyze_canary_failures(canary_name="pc-add-visit")
566
+ analyze_canary_failures(canary_name="webapp-erorrpagecanary")
567
+
568
+ 🔍 CANARY FAILURE ANALYSIS RESULTS:
569
+
570
+ 🔴 CRITICAL ISSUES IDENTIFIED:
571
+
572
+ **pc-visit-vet canary:**
573
+ • Root Cause: S3 bucket access issue
574
+ • Error Pattern: Exit status 127, "No such file or directory"
575
+ • Failure Count: 5 consecutive failures
576
+ • IAM Analysis: ✅ Role exists but S3 bucket ARN patterns incorrect in policies
577
+
578
+ **pc-add-visit canary:**
579
+ • Root Cause: Selector timeout + backend service errors
580
+ • Error Pattern: 30000ms timeout waiting for UI element + MissingFormatArgumentException
581
+ • Backend Issue: Format specifier '% o' error in BedrockRuntimeV1Service.invokeTitanModel()
582
+ • Performance: 34 second average response time, 0% success rate
583
+
584
+ **webapp-erorrpagecanary:**
585
+ • Root Cause: Browser target close during selector wait
586
+ • Error Pattern: "Target closed" waiting for `#jsError` selector
587
+ • Failure Count: 5 consecutive failures with 60000ms connection timeouts
588
+
589
+ 🔍 BACKEND SERVICE CORRELATION:
590
+ • MissingFormatArgumentException detected in Pet Clinic backend
591
+ • Location: org.springframework.samples.petclinic.customers.aws.BedrockRuntimeV1Service.invokeTitanModel (line 75)
592
+ • Impact: Affects multiple canaries testing Pet Clinic functionality
593
+ • 20% fault rate on GET /api/customer/diagnose/owners/{ownerId}/pets/{petId}
594
+
595
+ 🛠️ RECOMMENDED ACTIONS:
596
+
597
+ **Immediate (Critical):**
598
+ 1. Fix S3 bucket ARN patterns in pc-visit-vet IAM policy
599
+ 2. Fix format string bug in BedrockRuntimeV1Service: change '% o' to '%s' or correct format
600
+ 3. Add VPC permissions to canary IAM roles if Lambda runs in VPC
601
+
602
+ **Infrastructure (High Priority):**
603
+ 4. Investigate browser target stability issues (webapp-erorrpagecanary)
604
+ 5. Review canary timeout configurations - consider increasing from 30s to 60s
605
+ 6. Implement circuit breaker pattern for external service dependencies
606
+
607
+ **Monitoring (Medium Priority):**
608
+ 7. Add Application Signals monitoring for canary success rates
609
+ 8. Set up alerts for consecutive canary failures (>3 failures)
610
+ 9. Implement canary health dashboard with real-time status
611
+
612
+ 🎯 EXPECTED OUTCOMES:
613
+ • S3 access fix: Immediate resolution of pc-visit-vet failures
614
+ • Backend service fix: 80%+ improvement in Pet Clinic canary success rates
615
+ • Infrastructure improvements: Reduced browser target close errors
616
+ • Enhanced monitoring: Proactive failure detection and faster resolution
617
+ ```
618
+
529
619
  ## Recommended Workflows
530
620
 
531
621
  ### 🎯 Primary Audit Workflow (Most Common)
@@ -0,0 +1,19 @@
1
+ awslabs/__init__.py,sha256=WuqxdDgUZylWNmVoPKiK7qGsTB_G4UmuXIrJ-VBwDew,731
2
+ awslabs/cloudwatch_appsignals_mcp_server/__init__.py,sha256=-03Q_rK9fNWDftPT4JwNlDDVrjN7thel1i8Q5pByG6o,682
3
+ awslabs/cloudwatch_appsignals_mcp_server/audit_presentation_utils.py,sha256=xYJz0I-wdigYKxAaVLjyoMUh2UQpwlZM7sFxfL2pPmw,8923
4
+ awslabs/cloudwatch_appsignals_mcp_server/audit_utils.py,sha256=mcXxVjla0Wnh3ItuSralPFBhRWvhoWLgtvIHcAGJQog,31031
5
+ awslabs/cloudwatch_appsignals_mcp_server/aws_clients.py,sha256=YbUeyz_yz1n5e9EfOYYXSTZNmnkvkVF0iXrByT5aB-A,4726
6
+ awslabs/cloudwatch_appsignals_mcp_server/canary_utils.py,sha256=bymLDQ1kFeNIJwMRcWOLXOd2b0NyWSqmmwMrdXMxqPg,37456
7
+ awslabs/cloudwatch_appsignals_mcp_server/server.py,sha256=ZoYd-UgY7jFOsXLnV5gWTKDzFkmbvF2vpqEnHLgbO5Q,67903
8
+ awslabs/cloudwatch_appsignals_mcp_server/service_audit_utils.py,sha256=i-6emomFio4xsVsb5iRWvOzuHI7vo7WXe7VlLMD-qK8,9659
9
+ awslabs/cloudwatch_appsignals_mcp_server/service_tools.py,sha256=iJnkROnR0FdxEgF0LJb5zYNcD-CCSa9LVXwUqxU1_tI,29093
10
+ awslabs/cloudwatch_appsignals_mcp_server/sli_report_client.py,sha256=LGs7tDLVVa3mbT_maTefwGEA3cl3fNVft9brh3lVTzM,12374
11
+ awslabs/cloudwatch_appsignals_mcp_server/slo_tools.py,sha256=dMLGqeZYHu2adk9uquBGIZkMZStb-puzlI0xtKhxYNI,17824
12
+ awslabs/cloudwatch_appsignals_mcp_server/trace_tools.py,sha256=SMIaxStaJNZOU4GaAFkUiNXrb978bPTlF7MRBRJVEP8,31785
13
+ awslabs/cloudwatch_appsignals_mcp_server/utils.py,sha256=nZBqiCBAUewQft26FVf4IGL4P1b152VAcG9Y7Mh0gZY,5782
14
+ awslabs_cloudwatch_appsignals_mcp_server-0.1.11.dist-info/METADATA,sha256=iXciAJl-rIk4WiFMWy6AQcLGpCdFgKgcjue52r1BkWQ,31858
15
+ awslabs_cloudwatch_appsignals_mcp_server-0.1.11.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
16
+ awslabs_cloudwatch_appsignals_mcp_server-0.1.11.dist-info/entry_points.txt,sha256=iGwIMLU6AsBawl2Fhqi9GoeWdMGIVtg86-McaaNQqAQ,114
17
+ awslabs_cloudwatch_appsignals_mcp_server-0.1.11.dist-info/licenses/LICENSE,sha256=zE1N4JILDTkSIDtdmqdnKKxKEQh_VdqeoAV2230eNOI,10141
18
+ awslabs_cloudwatch_appsignals_mcp_server-0.1.11.dist-info/licenses/NOTICE,sha256=Pfbul2Ga0IJU2RMZFHC8QwDvNk72WO2XMn9l3390VYs,108
19
+ awslabs_cloudwatch_appsignals_mcp_server-0.1.11.dist-info/RECORD,,
@@ -1,18 +0,0 @@
1
- awslabs/__init__.py,sha256=WuqxdDgUZylWNmVoPKiK7qGsTB_G4UmuXIrJ-VBwDew,731
2
- awslabs/cloudwatch_appsignals_mcp_server/__init__.py,sha256=cPlKPRqNSL5d4RkWGdDJ_7jay1qZs4ppnntzxDdr4nw,681
3
- awslabs/cloudwatch_appsignals_mcp_server/audit_presentation_utils.py,sha256=xYJz0I-wdigYKxAaVLjyoMUh2UQpwlZM7sFxfL2pPmw,8923
4
- awslabs/cloudwatch_appsignals_mcp_server/audit_utils.py,sha256=Mqa8q3MUMpDKRFJQgUoKYikknGO4sBIe2_-0e2BWlCA,30765
5
- awslabs/cloudwatch_appsignals_mcp_server/aws_clients.py,sha256=I-amnrnVLGv-gAPkEYo-AxvmqktBjpdEuB1pjeTO1Fs,3542
6
- awslabs/cloudwatch_appsignals_mcp_server/server.py,sha256=nZ_s6lKQBf77Wdy8eyGLYPDmOBGAq1ZNH41BMpdYwQw,41569
7
- awslabs/cloudwatch_appsignals_mcp_server/service_audit_utils.py,sha256=i-6emomFio4xsVsb5iRWvOzuHI7vo7WXe7VlLMD-qK8,9659
8
- awslabs/cloudwatch_appsignals_mcp_server/service_tools.py,sha256=iJnkROnR0FdxEgF0LJb5zYNcD-CCSa9LVXwUqxU1_tI,29093
9
- awslabs/cloudwatch_appsignals_mcp_server/sli_report_client.py,sha256=LGs7tDLVVa3mbT_maTefwGEA3cl3fNVft9brh3lVTzM,12374
10
- awslabs/cloudwatch_appsignals_mcp_server/slo_tools.py,sha256=dMLGqeZYHu2adk9uquBGIZkMZStb-puzlI0xtKhxYNI,17824
11
- awslabs/cloudwatch_appsignals_mcp_server/trace_tools.py,sha256=F-vanuK8preiRHyI-EbZoOcxTueycDk-rRRPtjmKg5E,28990
12
- awslabs/cloudwatch_appsignals_mcp_server/utils.py,sha256=nZBqiCBAUewQft26FVf4IGL4P1b152VAcG9Y7Mh0gZY,5782
13
- awslabs_cloudwatch_appsignals_mcp_server-0.1.9.dist-info/METADATA,sha256=WQuVsqGbrqcKZp0SA4cPBIGA0fSRRc_UNJ1ueUcjW78,27162
14
- awslabs_cloudwatch_appsignals_mcp_server-0.1.9.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
15
- awslabs_cloudwatch_appsignals_mcp_server-0.1.9.dist-info/entry_points.txt,sha256=iGwIMLU6AsBawl2Fhqi9GoeWdMGIVtg86-McaaNQqAQ,114
16
- awslabs_cloudwatch_appsignals_mcp_server-0.1.9.dist-info/licenses/LICENSE,sha256=zE1N4JILDTkSIDtdmqdnKKxKEQh_VdqeoAV2230eNOI,10141
17
- awslabs_cloudwatch_appsignals_mcp_server-0.1.9.dist-info/licenses/NOTICE,sha256=Pfbul2Ga0IJU2RMZFHC8QwDvNk72WO2XMn9l3390VYs,108
18
- awslabs_cloudwatch_appsignals_mcp_server-0.1.9.dist-info/RECORD,,