PyPI - awslabs.cloudwatch-appsignals-mcp-server - Versions diffs - 0.1.9__py3-none-any.whl → 0.1.11__py3-none-any.whl - Mend

awslabs.cloudwatch-appsignals-mcp-server 0.1.9py3-none-any.whl → 0.1.11py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

awslabs/cloudwatch_appsignals_mcp_server/trace_tools.py CHANGED Viewed

@@ -402,6 +402,26 @@ async def query_sampled_traces(
                 return obj.isoformat()
             return obj
+        # Helper function to extract fault message from root causes for deduplication
+        def get_fault_message(trace_data):
+            """Extract fault message from a trace for deduplication.
+            Only checks FaultRootCauses (5xx server errors) since this is the primary
+            use case for root cause investigation. Traces without fault messages are
+            not deduplicated.
+            """
+            # Only check FaultRootCauses for deduplication
+            root_causes = trace_data.get('FaultRootCauses', [])
+            if root_causes:
+                for cause in root_causes:
+                    services = cause.get('Services', [])
+                    for service in services:
+                        exceptions = service.get('Exceptions', [])
+                        if exceptions and exceptions[0].get('Message'):
+                            return exceptions[0].get('Message')
+            return None
+        # Build trace summaries (original format)
         trace_summaries = []
         for trace in traces:
             # Create a simplified trace data structure to reduce size
@@ -417,17 +437,11 @@ async def query_sampled_traces(
             # Only include root causes if they exist (to save space)
             if trace.get('ErrorRootCauses'):
-                trace_data['ErrorRootCauses'] = trace.get('ErrorRootCauses', [])[
-                    :3
-                ]  # Limit to first 3
+                trace_data['ErrorRootCauses'] = trace.get('ErrorRootCauses', [])[:3]
             if trace.get('FaultRootCauses'):
-                trace_data['FaultRootCauses'] = trace.get('FaultRootCauses', [])[
-                    :3
-                ]  # Limit to first 3
+                trace_data['FaultRootCauses'] = trace.get('FaultRootCauses', [])[:3]
             if trace.get('ResponseTimeRootCauses'):
-                trace_data['ResponseTimeRootCauses'] = trace.get('ResponseTimeRootCauses', [])[
-                    :3
-                ]  # Limit to first 3
+                trace_data['ResponseTimeRootCauses'] = trace.get('ResponseTimeRootCauses', [])[:3]
             # Include limited annotations for key operations
             annotations = trace.get('Annotations', {})
@@ -447,15 +461,50 @@ async def query_sampled_traces(
             # Convert any datetime objects to ISO format strings
             for key, value in trace_data.items():
                 trace_data[key] = convert_datetime(value)
             trace_summaries.append(trace_data)
+        # Deduplicate trace summaries by fault message
+        seen_faults = {}
+        deduped_trace_summaries = []
+        for trace_summary in trace_summaries:
+            # Check if this trace has an error
+            has_issues = (
+                trace_summary.get('HasError')
+                or trace_summary.get('HasFault')
+                or trace_summary.get('HasThrottle')
+            )
+            if not has_issues:
+                # Always include healthy traces
+                deduped_trace_summaries.append(trace_summary)
+                continue
+            # Extract fault message for deduplication (only checks FaultRootCauses)
+            fault_msg = get_fault_message(trace_summary)
+            if fault_msg and fault_msg in seen_faults:
+                # Skip this trace - we already have one with the same fault message
+                seen_faults[fault_msg]['count'] += 1
+                logger.debug(
+                    f'Skipping duplicate trace {trace_summary.get("Id")} - fault message already seen: {fault_msg[:100]}...'
+                )
+                continue
+            else:
+                # First time seeing this fault (or no fault message) - include it
+                deduped_trace_summaries.append(trace_summary)
+                if fault_msg:
+                    seen_faults[fault_msg] = {'count': 1}
         # Check transaction search status
         is_tx_search_enabled, tx_destination, tx_status = check_transaction_search_enabled(region)
+        # Build response with original format but deduplicated traces
         result_data = {
-            'TraceSummaries': trace_summaries,
-            'TraceCount': len(trace_summaries),
-            'Message': f'Retrieved {len(trace_summaries)} traces (limited to prevent size issues)',
+            'TraceSummaries': deduped_trace_summaries,
+            'TraceCount': len(deduped_trace_summaries),
+            'Message': f'Retrieved {len(deduped_trace_summaries)} unique traces from {len(trace_summaries)} total (deduplicated by fault message)',
             'SamplingNote': "⚠️ This data is from X-Ray's 5% sampling. Results may not show all errors or issues.",
             'TransactionSearchStatus': {
                 'enabled': is_tx_search_enabled,
@@ -467,9 +516,18 @@ async def query_sampled_traces(
             },
         }
+        # Add dedup stats if we actually deduped anything
+        if len(deduped_trace_summaries) < len(trace_summaries):
+            duplicates_removed = len(trace_summaries) - len(deduped_trace_summaries)
+            result_data['DeduplicationStats'] = {
+                'OriginalTraceCount': len(trace_summaries),
+                'DuplicatesRemoved': duplicates_removed,
+                'UniqueFaultMessages': len(seen_faults),
+            }
         elapsed_time = timer() - start_time_perf
         logger.info(
-            f'query_sampled_traces completed in {elapsed_time:.3f}s - retrieved {len(trace_summaries)} traces'
+            f'query_sampled_traces completed in {elapsed_time:.3f}s - retrieved {len(deduped_trace_summaries)} unique traces from {len(trace_summaries)} total'
         )
         return json.dumps(result_data, indent=2)

{awslabs_cloudwatch_appsignals_mcp_server-0.1.9.dist-info → awslabs_cloudwatch_appsignals_mcp_server-0.1.11.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: awslabs.cloudwatch-appsignals-mcp-server
-Version: 0.1.9
+Version: 0.1.11
 Summary: An AWS Labs Model Context Protocol (MCP) server for AWS Application Signals
 Project-URL: Homepage, https://awslabs.github.io/mcp/
 Project-URL: Documentation, https://awslabs.github.io/mcp/servers/cloudwatch-appsignals-mcp-server/
@@ -181,7 +181,35 @@ FILTER attributes.aws.local.service = "payment-service" and attributes.aws.local
 - `duration > 5` - Find slow requests (over 5 seconds)
 - `annotation[aws.local.operation]="GET /api/orders"` - Filter by specific operation
-#### 12. **`list_slis`** - Legacy SLI Status Report (Specialized Tool)
+#### 12. **`analyze_canary_failures`** - Comprehensive Canary Failure Analysis
+**Deep dive into CloudWatch Synthetics canary failures with root cause identification**
+- Comprehensive canary failure analysis with deep dive into issues
+- Analyze historical patterns and specific incident details
+- Get comprehensive artifact analysis including logs, screenshots, and HAR files
+- Receive actionable recommendations based on AWS debugging methodology
+- Correlate canary failures with Application Signals telemetry data
+- Identify performance degradation and availability issues across service dependencies
+**Key Features:**
+- **Failure Pattern Analysis**: Identifies recurring failure modes and temporal patterns
+- **Artifact Deep Dive**: Analyzes canary logs, screenshots, and network traces for root causes
+- **Service Correlation**: Links canary failures to upstream/downstream service issues using Application Signals
+- **Performance Insights**: Detects latency spikes, fault rates, and connection issues
+- **Actionable Remediation**: Provides specific steps based on AWS operational best practices
+- **IAM Analysis**: Validates IAM roles and permissions for common canary access issues
+- **Backend Service Integration**: Correlates canary failures with backend service errors and exceptions
+**Common Use Cases:**
+- Incident Response: Rapid diagnosis of canary failures during outages
+- Performance Investigation: Understanding latency and availability degradation
+- Dependency Analysis: Identifying which services are causing canary failures
+- Historical Trending: Analyzing failure patterns over time for proactive improvements
+- Root Cause Analysis: Deep dive into specific failure scenarios with full context
+- Infrastructure Issues: Diagnose S3 access, VPC connectivity, and browser target problems
+- Backend Service Debugging: Identify application code issues affecting canary success
+#### 13. **`list_slis`** - Legacy SLI Status Report (Specialized Tool)
 **Use `audit_services()` as the PRIMARY tool for service auditing**
 - Basic report showing summary counts (total, healthy, breached, insufficient data)
@@ -526,6 +554,68 @@ Found 8 services being monitored:
 3. Review payment-api-service timeout configurations
 ```
+### Example 6: Canary Failure Analysis and Root Cause Investigation
+```
+User: "My Pet Clinic canaries are failing. Can you help me diagnose the issues?"
+Assistant: I'll analyze your Pet Clinic canaries to identify the root causes of failures.
+[Uses analyze_canary_failures for comprehensive canary analysis]
+analyze_canary_failures(canary_name="pc-visit-vet")
+analyze_canary_failures(canary_name="pc-add-visit")
+analyze_canary_failures(canary_name="webapp-erorrpagecanary")
+🔍 CANARY FAILURE ANALYSIS RESULTS:
+🔴 CRITICAL ISSUES IDENTIFIED:
+**pc-visit-vet canary:**
+• Root Cause: S3 bucket access issue
+• Error Pattern: Exit status 127, "No such file or directory"
+• Failure Count: 5 consecutive failures
+• IAM Analysis: ✅ Role exists but S3 bucket ARN patterns incorrect in policies
+**pc-add-visit canary:**
+• Root Cause: Selector timeout + backend service errors
+• Error Pattern: 30000ms timeout waiting for UI element + MissingFormatArgumentException
+• Backend Issue: Format specifier '% o' error in BedrockRuntimeV1Service.invokeTitanModel()
+• Performance: 34 second average response time, 0% success rate
+**webapp-erorrpagecanary:**
+• Root Cause: Browser target close during selector wait
+• Error Pattern: "Target closed" waiting for `#jsError` selector
+• Failure Count: 5 consecutive failures with 60000ms connection timeouts
+🔍 BACKEND SERVICE CORRELATION:
+• MissingFormatArgumentException detected in Pet Clinic backend
+• Location: org.springframework.samples.petclinic.customers.aws.BedrockRuntimeV1Service.invokeTitanModel (line 75)
+• Impact: Affects multiple canaries testing Pet Clinic functionality
+• 20% fault rate on GET /api/customer/diagnose/owners/{ownerId}/pets/{petId}
+🛠️ RECOMMENDED ACTIONS:
+**Immediate (Critical):**
+1. Fix S3 bucket ARN patterns in pc-visit-vet IAM policy
+2. Fix format string bug in BedrockRuntimeV1Service: change '% o' to '%s' or correct format
+3. Add VPC permissions to canary IAM roles if Lambda runs in VPC
+**Infrastructure (High Priority):**
+4. Investigate browser target stability issues (webapp-erorrpagecanary)
+5. Review canary timeout configurations - consider increasing from 30s to 60s
+6. Implement circuit breaker pattern for external service dependencies
+**Monitoring (Medium Priority):**
+7. Add Application Signals monitoring for canary success rates
+8. Set up alerts for consecutive canary failures (>3 failures)
+9. Implement canary health dashboard with real-time status
+🎯 EXPECTED OUTCOMES:
+• S3 access fix: Immediate resolution of pc-visit-vet failures
+• Backend service fix: 80%+ improvement in Pet Clinic canary success rates
+• Infrastructure improvements: Reduced browser target close errors
+• Enhanced monitoring: Proactive failure detection and faster resolution
+```
 ## Recommended Workflows
 ### 🎯 Primary Audit Workflow (Most Common)

awslabs_cloudwatch_appsignals_mcp_server-0.1.11.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,19 @@
+awslabs/__init__.py,sha256=WuqxdDgUZylWNmVoPKiK7qGsTB_G4UmuXIrJ-VBwDew,731
+awslabs/cloudwatch_appsignals_mcp_server/__init__.py,sha256=-03Q_rK9fNWDftPT4JwNlDDVrjN7thel1i8Q5pByG6o,682
+awslabs/cloudwatch_appsignals_mcp_server/audit_presentation_utils.py,sha256=xYJz0I-wdigYKxAaVLjyoMUh2UQpwlZM7sFxfL2pPmw,8923
+awslabs/cloudwatch_appsignals_mcp_server/audit_utils.py,sha256=mcXxVjla0Wnh3ItuSralPFBhRWvhoWLgtvIHcAGJQog,31031
+awslabs/cloudwatch_appsignals_mcp_server/aws_clients.py,sha256=YbUeyz_yz1n5e9EfOYYXSTZNmnkvkVF0iXrByT5aB-A,4726
+awslabs/cloudwatch_appsignals_mcp_server/canary_utils.py,sha256=bymLDQ1kFeNIJwMRcWOLXOd2b0NyWSqmmwMrdXMxqPg,37456
+awslabs/cloudwatch_appsignals_mcp_server/server.py,sha256=ZoYd-UgY7jFOsXLnV5gWTKDzFkmbvF2vpqEnHLgbO5Q,67903
+awslabs/cloudwatch_appsignals_mcp_server/service_audit_utils.py,sha256=i-6emomFio4xsVsb5iRWvOzuHI7vo7WXe7VlLMD-qK8,9659
+awslabs/cloudwatch_appsignals_mcp_server/service_tools.py,sha256=iJnkROnR0FdxEgF0LJb5zYNcD-CCSa9LVXwUqxU1_tI,29093
+awslabs/cloudwatch_appsignals_mcp_server/sli_report_client.py,sha256=LGs7tDLVVa3mbT_maTefwGEA3cl3fNVft9brh3lVTzM,12374
+awslabs/cloudwatch_appsignals_mcp_server/slo_tools.py,sha256=dMLGqeZYHu2adk9uquBGIZkMZStb-puzlI0xtKhxYNI,17824
+awslabs/cloudwatch_appsignals_mcp_server/trace_tools.py,sha256=SMIaxStaJNZOU4GaAFkUiNXrb978bPTlF7MRBRJVEP8,31785
+awslabs/cloudwatch_appsignals_mcp_server/utils.py,sha256=nZBqiCBAUewQft26FVf4IGL4P1b152VAcG9Y7Mh0gZY,5782
+awslabs_cloudwatch_appsignals_mcp_server-0.1.11.dist-info/METADATA,sha256=iXciAJl-rIk4WiFMWy6AQcLGpCdFgKgcjue52r1BkWQ,31858
+awslabs_cloudwatch_appsignals_mcp_server-0.1.11.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
+awslabs_cloudwatch_appsignals_mcp_server-0.1.11.dist-info/entry_points.txt,sha256=iGwIMLU6AsBawl2Fhqi9GoeWdMGIVtg86-McaaNQqAQ,114
+awslabs_cloudwatch_appsignals_mcp_server-0.1.11.dist-info/licenses/LICENSE,sha256=zE1N4JILDTkSIDtdmqdnKKxKEQh_VdqeoAV2230eNOI,10141
+awslabs_cloudwatch_appsignals_mcp_server-0.1.11.dist-info/licenses/NOTICE,sha256=Pfbul2Ga0IJU2RMZFHC8QwDvNk72WO2XMn9l3390VYs,108
+awslabs_cloudwatch_appsignals_mcp_server-0.1.11.dist-info/RECORD,,

awslabs_cloudwatch_appsignals_mcp_server-0.1.9.dist-info/RECORD DELETED Viewed

@@ -1,18 +0,0 @@
-awslabs/__init__.py,sha256=WuqxdDgUZylWNmVoPKiK7qGsTB_G4UmuXIrJ-VBwDew,731
-awslabs/cloudwatch_appsignals_mcp_server/__init__.py,sha256=cPlKPRqNSL5d4RkWGdDJ_7jay1qZs4ppnntzxDdr4nw,681
-awslabs/cloudwatch_appsignals_mcp_server/audit_presentation_utils.py,sha256=xYJz0I-wdigYKxAaVLjyoMUh2UQpwlZM7sFxfL2pPmw,8923
-awslabs/cloudwatch_appsignals_mcp_server/audit_utils.py,sha256=Mqa8q3MUMpDKRFJQgUoKYikknGO4sBIe2_-0e2BWlCA,30765
-awslabs/cloudwatch_appsignals_mcp_server/aws_clients.py,sha256=I-amnrnVLGv-gAPkEYo-AxvmqktBjpdEuB1pjeTO1Fs,3542
-awslabs/cloudwatch_appsignals_mcp_server/server.py,sha256=nZ_s6lKQBf77Wdy8eyGLYPDmOBGAq1ZNH41BMpdYwQw,41569
-awslabs/cloudwatch_appsignals_mcp_server/service_audit_utils.py,sha256=i-6emomFio4xsVsb5iRWvOzuHI7vo7WXe7VlLMD-qK8,9659
-awslabs/cloudwatch_appsignals_mcp_server/service_tools.py,sha256=iJnkROnR0FdxEgF0LJb5zYNcD-CCSa9LVXwUqxU1_tI,29093
-awslabs/cloudwatch_appsignals_mcp_server/sli_report_client.py,sha256=LGs7tDLVVa3mbT_maTefwGEA3cl3fNVft9brh3lVTzM,12374
-awslabs/cloudwatch_appsignals_mcp_server/slo_tools.py,sha256=dMLGqeZYHu2adk9uquBGIZkMZStb-puzlI0xtKhxYNI,17824
-awslabs/cloudwatch_appsignals_mcp_server/trace_tools.py,sha256=F-vanuK8preiRHyI-EbZoOcxTueycDk-rRRPtjmKg5E,28990
-awslabs/cloudwatch_appsignals_mcp_server/utils.py,sha256=nZBqiCBAUewQft26FVf4IGL4P1b152VAcG9Y7Mh0gZY,5782
-awslabs_cloudwatch_appsignals_mcp_server-0.1.9.dist-info/METADATA,sha256=WQuVsqGbrqcKZp0SA4cPBIGA0fSRRc_UNJ1ueUcjW78,27162
-awslabs_cloudwatch_appsignals_mcp_server-0.1.9.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
-awslabs_cloudwatch_appsignals_mcp_server-0.1.9.dist-info/entry_points.txt,sha256=iGwIMLU6AsBawl2Fhqi9GoeWdMGIVtg86-McaaNQqAQ,114
-awslabs_cloudwatch_appsignals_mcp_server-0.1.9.dist-info/licenses/LICENSE,sha256=zE1N4JILDTkSIDtdmqdnKKxKEQh_VdqeoAV2230eNOI,10141
-awslabs_cloudwatch_appsignals_mcp_server-0.1.9.dist-info/licenses/NOTICE,sha256=Pfbul2Ga0IJU2RMZFHC8QwDvNk72WO2XMn9l3390VYs,108
-awslabs_cloudwatch_appsignals_mcp_server-0.1.9.dist-info/RECORD,,

{awslabs_cloudwatch_appsignals_mcp_server-0.1.9.dist-info → awslabs_cloudwatch_appsignals_mcp_server-0.1.11.dist-info}/WHEEL RENAMED Viewed

File without changes

{awslabs_cloudwatch_appsignals_mcp_server-0.1.9.dist-info → awslabs_cloudwatch_appsignals_mcp_server-0.1.11.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{awslabs_cloudwatch_appsignals_mcp_server-0.1.9.dist-info → awslabs_cloudwatch_appsignals_mcp_server-0.1.11.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{awslabs_cloudwatch_appsignals_mcp_server-0.1.9.dist-info → awslabs_cloudwatch_appsignals_mcp_server-0.1.11.dist-info}/licenses/NOTICE RENAMED Viewed

File without changes

awslabs.cloudwatch-appsignals-mcp-server 0.1.9__py3-none-any.whl → 0.1.11__py3-none-any.whl

awslabs.cloudwatch-appsignals-mcp-server 0.1.9py3-none-any.whl → 0.1.11py3-none-any.whl