awslabs.cloudwatch-appsignals-mcp-server 0.1.9__tar.gz → 0.1.11__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {awslabs_cloudwatch_appsignals_mcp_server-0.1.9 → awslabs_cloudwatch_appsignals_mcp_server-0.1.11}/Dockerfile +2 -2
- {awslabs_cloudwatch_appsignals_mcp_server-0.1.9 → awslabs_cloudwatch_appsignals_mcp_server-0.1.11}/PKG-INFO +92 -2
- {awslabs_cloudwatch_appsignals_mcp_server-0.1.9 → awslabs_cloudwatch_appsignals_mcp_server-0.1.11}/README.md +91 -1
- {awslabs_cloudwatch_appsignals_mcp_server-0.1.9 → awslabs_cloudwatch_appsignals_mcp_server-0.1.11}/awslabs/cloudwatch_appsignals_mcp_server/__init__.py +1 -1
- {awslabs_cloudwatch_appsignals_mcp_server-0.1.9 → awslabs_cloudwatch_appsignals_mcp_server-0.1.11}/awslabs/cloudwatch_appsignals_mcp_server/audit_utils.py +6 -1
- {awslabs_cloudwatch_appsignals_mcp_server-0.1.9 → awslabs_cloudwatch_appsignals_mcp_server-0.1.11}/awslabs/cloudwatch_appsignals_mcp_server/aws_clients.py +28 -2
- awslabs_cloudwatch_appsignals_mcp_server-0.1.11/awslabs/cloudwatch_appsignals_mcp_server/canary_utils.py +910 -0
- {awslabs_cloudwatch_appsignals_mcp_server-0.1.9 → awslabs_cloudwatch_appsignals_mcp_server-0.1.11}/awslabs/cloudwatch_appsignals_mcp_server/server.py +536 -1
- {awslabs_cloudwatch_appsignals_mcp_server-0.1.9 → awslabs_cloudwatch_appsignals_mcp_server-0.1.11}/awslabs/cloudwatch_appsignals_mcp_server/trace_tools.py +71 -13
- {awslabs_cloudwatch_appsignals_mcp_server-0.1.9 → awslabs_cloudwatch_appsignals_mcp_server-0.1.11}/pyproject.toml +1 -1
- {awslabs_cloudwatch_appsignals_mcp_server-0.1.9 → awslabs_cloudwatch_appsignals_mcp_server-0.1.11}/tests/test_audit_utils.py +45 -0
- {awslabs_cloudwatch_appsignals_mcp_server-0.1.9 → awslabs_cloudwatch_appsignals_mcp_server-0.1.11}/tests/test_aws_profile.py +4 -2
- awslabs_cloudwatch_appsignals_mcp_server-0.1.11/tests/test_canary_utils.py +1479 -0
- {awslabs_cloudwatch_appsignals_mcp_server-0.1.9 → awslabs_cloudwatch_appsignals_mcp_server-0.1.11}/tests/test_initialization.py +20 -0
- {awslabs_cloudwatch_appsignals_mcp_server-0.1.9 → awslabs_cloudwatch_appsignals_mcp_server-0.1.11}/tests/test_server.py +1310 -5
- {awslabs_cloudwatch_appsignals_mcp_server-0.1.9 → awslabs_cloudwatch_appsignals_mcp_server-0.1.11}/uv.lock +1 -1
- {awslabs_cloudwatch_appsignals_mcp_server-0.1.9 → awslabs_cloudwatch_appsignals_mcp_server-0.1.11}/.gitignore +0 -0
- {awslabs_cloudwatch_appsignals_mcp_server-0.1.9 → awslabs_cloudwatch_appsignals_mcp_server-0.1.11}/.python-version +0 -0
- {awslabs_cloudwatch_appsignals_mcp_server-0.1.9 → awslabs_cloudwatch_appsignals_mcp_server-0.1.11}/CHANGELOG.md +0 -0
- {awslabs_cloudwatch_appsignals_mcp_server-0.1.9 → awslabs_cloudwatch_appsignals_mcp_server-0.1.11}/LICENSE +0 -0
- {awslabs_cloudwatch_appsignals_mcp_server-0.1.9 → awslabs_cloudwatch_appsignals_mcp_server-0.1.11}/NOTICE +0 -0
- {awslabs_cloudwatch_appsignals_mcp_server-0.1.9 → awslabs_cloudwatch_appsignals_mcp_server-0.1.11}/awslabs/__init__.py +0 -0
- {awslabs_cloudwatch_appsignals_mcp_server-0.1.9 → awslabs_cloudwatch_appsignals_mcp_server-0.1.11}/awslabs/cloudwatch_appsignals_mcp_server/audit_presentation_utils.py +0 -0
- {awslabs_cloudwatch_appsignals_mcp_server-0.1.9 → awslabs_cloudwatch_appsignals_mcp_server-0.1.11}/awslabs/cloudwatch_appsignals_mcp_server/service_audit_utils.py +0 -0
- {awslabs_cloudwatch_appsignals_mcp_server-0.1.9 → awslabs_cloudwatch_appsignals_mcp_server-0.1.11}/awslabs/cloudwatch_appsignals_mcp_server/service_tools.py +0 -0
- {awslabs_cloudwatch_appsignals_mcp_server-0.1.9 → awslabs_cloudwatch_appsignals_mcp_server-0.1.11}/awslabs/cloudwatch_appsignals_mcp_server/sli_report_client.py +0 -0
- {awslabs_cloudwatch_appsignals_mcp_server-0.1.9 → awslabs_cloudwatch_appsignals_mcp_server-0.1.11}/awslabs/cloudwatch_appsignals_mcp_server/slo_tools.py +0 -0
- {awslabs_cloudwatch_appsignals_mcp_server-0.1.9 → awslabs_cloudwatch_appsignals_mcp_server-0.1.11}/awslabs/cloudwatch_appsignals_mcp_server/utils.py +0 -0
- {awslabs_cloudwatch_appsignals_mcp_server-0.1.9 → awslabs_cloudwatch_appsignals_mcp_server-0.1.11}/docker-healthcheck.sh +0 -0
- {awslabs_cloudwatch_appsignals_mcp_server-0.1.9 → awslabs_cloudwatch_appsignals_mcp_server-0.1.11}/tests/conftest.py +0 -0
- {awslabs_cloudwatch_appsignals_mcp_server-0.1.9 → awslabs_cloudwatch_appsignals_mcp_server-0.1.11}/tests/test_audit_presentation_utils.py +0 -0
- {awslabs_cloudwatch_appsignals_mcp_server-0.1.9 → awslabs_cloudwatch_appsignals_mcp_server-0.1.11}/tests/test_server_audit_functions.py +0 -0
- {awslabs_cloudwatch_appsignals_mcp_server-0.1.9 → awslabs_cloudwatch_appsignals_mcp_server-0.1.11}/tests/test_server_audit_tools.py +0 -0
- {awslabs_cloudwatch_appsignals_mcp_server-0.1.9 → awslabs_cloudwatch_appsignals_mcp_server-0.1.11}/tests/test_service_audit_utils.py +0 -0
- {awslabs_cloudwatch_appsignals_mcp_server-0.1.9 → awslabs_cloudwatch_appsignals_mcp_server-0.1.11}/tests/test_service_tools_operations.py +0 -0
- {awslabs_cloudwatch_appsignals_mcp_server-0.1.9 → awslabs_cloudwatch_appsignals_mcp_server-0.1.11}/tests/test_sli_report_client.py +0 -0
- {awslabs_cloudwatch_appsignals_mcp_server-0.1.9 → awslabs_cloudwatch_appsignals_mcp_server-0.1.11}/tests/test_slo_tools.py +0 -0
- {awslabs_cloudwatch_appsignals_mcp_server-0.1.9 → awslabs_cloudwatch_appsignals_mcp_server-0.1.11}/tests/test_utils.py +0 -0
- {awslabs_cloudwatch_appsignals_mcp_server-0.1.9 → awslabs_cloudwatch_appsignals_mcp_server-0.1.11}/uv-requirements.txt +0 -0
|
@@ -13,7 +13,7 @@
|
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
15
|
# dependabot should continue to update this to the latest hash.
|
|
16
|
-
FROM public.ecr.aws/docker/library/python:3.13
|
|
16
|
+
FROM public.ecr.aws/docker/library/python:3.13-alpine@sha256:070342a0cc1011532c0e69972cce2bbc6cc633eba294bae1d12abea8bd05303b AS uv
|
|
17
17
|
|
|
18
18
|
# Install the project into `/app`
|
|
19
19
|
WORKDIR /app
|
|
@@ -61,7 +61,7 @@ RUN --mount=type=cache,target=/root/.cache/uv \
|
|
|
61
61
|
# Make the directory just in case it doesn't exist
|
|
62
62
|
RUN mkdir -p /root/.local
|
|
63
63
|
|
|
64
|
-
FROM public.ecr.aws/docker/library/python:3.13
|
|
64
|
+
FROM public.ecr.aws/docker/library/python:3.13-alpine@sha256:070342a0cc1011532c0e69972cce2bbc6cc633eba294bae1d12abea8bd05303b
|
|
65
65
|
|
|
66
66
|
# Place executables in the environment at the front of the path and include other binaries
|
|
67
67
|
ENV PATH="/app/.venv/bin:$PATH" \
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: awslabs.cloudwatch-appsignals-mcp-server
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.11
|
|
4
4
|
Summary: An AWS Labs Model Context Protocol (MCP) server for AWS Application Signals
|
|
5
5
|
Project-URL: Homepage, https://awslabs.github.io/mcp/
|
|
6
6
|
Project-URL: Documentation, https://awslabs.github.io/mcp/servers/cloudwatch-appsignals-mcp-server/
|
|
@@ -181,7 +181,35 @@ FILTER attributes.aws.local.service = "payment-service" and attributes.aws.local
|
|
|
181
181
|
- `duration > 5` - Find slow requests (over 5 seconds)
|
|
182
182
|
- `annotation[aws.local.operation]="GET /api/orders"` - Filter by specific operation
|
|
183
183
|
|
|
184
|
-
#### 12. **`
|
|
184
|
+
#### 12. **`analyze_canary_failures`** - Comprehensive Canary Failure Analysis
|
|
185
|
+
**Deep dive into CloudWatch Synthetics canary failures with root cause identification**
|
|
186
|
+
|
|
187
|
+
- Comprehensive canary failure analysis with deep dive into issues
|
|
188
|
+
- Analyze historical patterns and specific incident details
|
|
189
|
+
- Get comprehensive artifact analysis including logs, screenshots, and HAR files
|
|
190
|
+
- Receive actionable recommendations based on AWS debugging methodology
|
|
191
|
+
- Correlate canary failures with Application Signals telemetry data
|
|
192
|
+
- Identify performance degradation and availability issues across service dependencies
|
|
193
|
+
|
|
194
|
+
**Key Features:**
|
|
195
|
+
- **Failure Pattern Analysis**: Identifies recurring failure modes and temporal patterns
|
|
196
|
+
- **Artifact Deep Dive**: Analyzes canary logs, screenshots, and network traces for root causes
|
|
197
|
+
- **Service Correlation**: Links canary failures to upstream/downstream service issues using Application Signals
|
|
198
|
+
- **Performance Insights**: Detects latency spikes, fault rates, and connection issues
|
|
199
|
+
- **Actionable Remediation**: Provides specific steps based on AWS operational best practices
|
|
200
|
+
- **IAM Analysis**: Validates IAM roles and permissions for common canary access issues
|
|
201
|
+
- **Backend Service Integration**: Correlates canary failures with backend service errors and exceptions
|
|
202
|
+
|
|
203
|
+
**Common Use Cases:**
|
|
204
|
+
- Incident Response: Rapid diagnosis of canary failures during outages
|
|
205
|
+
- Performance Investigation: Understanding latency and availability degradation
|
|
206
|
+
- Dependency Analysis: Identifying which services are causing canary failures
|
|
207
|
+
- Historical Trending: Analyzing failure patterns over time for proactive improvements
|
|
208
|
+
- Root Cause Analysis: Deep dive into specific failure scenarios with full context
|
|
209
|
+
- Infrastructure Issues: Diagnose S3 access, VPC connectivity, and browser target problems
|
|
210
|
+
- Backend Service Debugging: Identify application code issues affecting canary success
|
|
211
|
+
|
|
212
|
+
#### 13. **`list_slis`** - Legacy SLI Status Report (Specialized Tool)
|
|
185
213
|
**Use `audit_services()` as the PRIMARY tool for service auditing**
|
|
186
214
|
|
|
187
215
|
- Basic report showing summary counts (total, healthy, breached, insufficient data)
|
|
@@ -526,6 +554,68 @@ Found 8 services being monitored:
|
|
|
526
554
|
3. Review payment-api-service timeout configurations
|
|
527
555
|
```
|
|
528
556
|
|
|
557
|
+
### Example 6: Canary Failure Analysis and Root Cause Investigation
|
|
558
|
+
```
|
|
559
|
+
User: "My Pet Clinic canaries are failing. Can you help me diagnose the issues?"
|
|
560
|
+
Assistant: I'll analyze your Pet Clinic canaries to identify the root causes of failures.
|
|
561
|
+
|
|
562
|
+
[Uses analyze_canary_failures for comprehensive canary analysis]
|
|
563
|
+
|
|
564
|
+
analyze_canary_failures(canary_name="pc-visit-vet")
|
|
565
|
+
analyze_canary_failures(canary_name="pc-add-visit")
|
|
566
|
+
analyze_canary_failures(canary_name="webapp-erorrpagecanary")
|
|
567
|
+
|
|
568
|
+
🔍 CANARY FAILURE ANALYSIS RESULTS:
|
|
569
|
+
|
|
570
|
+
🔴 CRITICAL ISSUES IDENTIFIED:
|
|
571
|
+
|
|
572
|
+
**pc-visit-vet canary:**
|
|
573
|
+
• Root Cause: S3 bucket access issue
|
|
574
|
+
• Error Pattern: Exit status 127, "No such file or directory"
|
|
575
|
+
• Failure Count: 5 consecutive failures
|
|
576
|
+
• IAM Analysis: ✅ Role exists but S3 bucket ARN patterns incorrect in policies
|
|
577
|
+
|
|
578
|
+
**pc-add-visit canary:**
|
|
579
|
+
• Root Cause: Selector timeout + backend service errors
|
|
580
|
+
• Error Pattern: 30000ms timeout waiting for UI element + MissingFormatArgumentException
|
|
581
|
+
• Backend Issue: Format specifier '% o' error in BedrockRuntimeV1Service.invokeTitanModel()
|
|
582
|
+
• Performance: 34 second average response time, 0% success rate
|
|
583
|
+
|
|
584
|
+
**webapp-erorrpagecanary:**
|
|
585
|
+
• Root Cause: Browser target close during selector wait
|
|
586
|
+
• Error Pattern: "Target closed" waiting for `#jsError` selector
|
|
587
|
+
• Failure Count: 5 consecutive failures with 60000ms connection timeouts
|
|
588
|
+
|
|
589
|
+
🔍 BACKEND SERVICE CORRELATION:
|
|
590
|
+
• MissingFormatArgumentException detected in Pet Clinic backend
|
|
591
|
+
• Location: org.springframework.samples.petclinic.customers.aws.BedrockRuntimeV1Service.invokeTitanModel (line 75)
|
|
592
|
+
• Impact: Affects multiple canaries testing Pet Clinic functionality
|
|
593
|
+
• 20% fault rate on GET /api/customer/diagnose/owners/{ownerId}/pets/{petId}
|
|
594
|
+
|
|
595
|
+
🛠️ RECOMMENDED ACTIONS:
|
|
596
|
+
|
|
597
|
+
**Immediate (Critical):**
|
|
598
|
+
1. Fix S3 bucket ARN patterns in pc-visit-vet IAM policy
|
|
599
|
+
2. Fix format string bug in BedrockRuntimeV1Service: change '% o' to '%s' or correct format
|
|
600
|
+
3. Add VPC permissions to canary IAM roles if Lambda runs in VPC
|
|
601
|
+
|
|
602
|
+
**Infrastructure (High Priority):**
|
|
603
|
+
4. Investigate browser target stability issues (webapp-erorrpagecanary)
|
|
604
|
+
5. Review canary timeout configurations - consider increasing from 30s to 60s
|
|
605
|
+
6. Implement circuit breaker pattern for external service dependencies
|
|
606
|
+
|
|
607
|
+
**Monitoring (Medium Priority):**
|
|
608
|
+
7. Add Application Signals monitoring for canary success rates
|
|
609
|
+
8. Set up alerts for consecutive canary failures (>3 failures)
|
|
610
|
+
9. Implement canary health dashboard with real-time status
|
|
611
|
+
|
|
612
|
+
🎯 EXPECTED OUTCOMES:
|
|
613
|
+
• S3 access fix: Immediate resolution of pc-visit-vet failures
|
|
614
|
+
• Backend service fix: 80%+ improvement in Pet Clinic canary success rates
|
|
615
|
+
• Infrastructure improvements: Reduced browser target close errors
|
|
616
|
+
• Enhanced monitoring: Proactive failure detection and faster resolution
|
|
617
|
+
```
|
|
618
|
+
|
|
529
619
|
## Recommended Workflows
|
|
530
620
|
|
|
531
621
|
### 🎯 Primary Audit Workflow (Most Common)
|
|
@@ -151,7 +151,35 @@ FILTER attributes.aws.local.service = "payment-service" and attributes.aws.local
|
|
|
151
151
|
- `duration > 5` - Find slow requests (over 5 seconds)
|
|
152
152
|
- `annotation[aws.local.operation]="GET /api/orders"` - Filter by specific operation
|
|
153
153
|
|
|
154
|
-
#### 12. **`
|
|
154
|
+
#### 12. **`analyze_canary_failures`** - Comprehensive Canary Failure Analysis
|
|
155
|
+
**Deep dive into CloudWatch Synthetics canary failures with root cause identification**
|
|
156
|
+
|
|
157
|
+
- Comprehensive canary failure analysis with deep dive into issues
|
|
158
|
+
- Analyze historical patterns and specific incident details
|
|
159
|
+
- Get comprehensive artifact analysis including logs, screenshots, and HAR files
|
|
160
|
+
- Receive actionable recommendations based on AWS debugging methodology
|
|
161
|
+
- Correlate canary failures with Application Signals telemetry data
|
|
162
|
+
- Identify performance degradation and availability issues across service dependencies
|
|
163
|
+
|
|
164
|
+
**Key Features:**
|
|
165
|
+
- **Failure Pattern Analysis**: Identifies recurring failure modes and temporal patterns
|
|
166
|
+
- **Artifact Deep Dive**: Analyzes canary logs, screenshots, and network traces for root causes
|
|
167
|
+
- **Service Correlation**: Links canary failures to upstream/downstream service issues using Application Signals
|
|
168
|
+
- **Performance Insights**: Detects latency spikes, fault rates, and connection issues
|
|
169
|
+
- **Actionable Remediation**: Provides specific steps based on AWS operational best practices
|
|
170
|
+
- **IAM Analysis**: Validates IAM roles and permissions for common canary access issues
|
|
171
|
+
- **Backend Service Integration**: Correlates canary failures with backend service errors and exceptions
|
|
172
|
+
|
|
173
|
+
**Common Use Cases:**
|
|
174
|
+
- Incident Response: Rapid diagnosis of canary failures during outages
|
|
175
|
+
- Performance Investigation: Understanding latency and availability degradation
|
|
176
|
+
- Dependency Analysis: Identifying which services are causing canary failures
|
|
177
|
+
- Historical Trending: Analyzing failure patterns over time for proactive improvements
|
|
178
|
+
- Root Cause Analysis: Deep dive into specific failure scenarios with full context
|
|
179
|
+
- Infrastructure Issues: Diagnose S3 access, VPC connectivity, and browser target problems
|
|
180
|
+
- Backend Service Debugging: Identify application code issues affecting canary success
|
|
181
|
+
|
|
182
|
+
#### 13. **`list_slis`** - Legacy SLI Status Report (Specialized Tool)
|
|
155
183
|
**Use `audit_services()` as the PRIMARY tool for service auditing**
|
|
156
184
|
|
|
157
185
|
- Basic report showing summary counts (total, healthy, breached, insufficient data)
|
|
@@ -496,6 +524,68 @@ Found 8 services being monitored:
|
|
|
496
524
|
3. Review payment-api-service timeout configurations
|
|
497
525
|
```
|
|
498
526
|
|
|
527
|
+
### Example 6: Canary Failure Analysis and Root Cause Investigation
|
|
528
|
+
```
|
|
529
|
+
User: "My Pet Clinic canaries are failing. Can you help me diagnose the issues?"
|
|
530
|
+
Assistant: I'll analyze your Pet Clinic canaries to identify the root causes of failures.
|
|
531
|
+
|
|
532
|
+
[Uses analyze_canary_failures for comprehensive canary analysis]
|
|
533
|
+
|
|
534
|
+
analyze_canary_failures(canary_name="pc-visit-vet")
|
|
535
|
+
analyze_canary_failures(canary_name="pc-add-visit")
|
|
536
|
+
analyze_canary_failures(canary_name="webapp-erorrpagecanary")
|
|
537
|
+
|
|
538
|
+
🔍 CANARY FAILURE ANALYSIS RESULTS:
|
|
539
|
+
|
|
540
|
+
🔴 CRITICAL ISSUES IDENTIFIED:
|
|
541
|
+
|
|
542
|
+
**pc-visit-vet canary:**
|
|
543
|
+
• Root Cause: S3 bucket access issue
|
|
544
|
+
• Error Pattern: Exit status 127, "No such file or directory"
|
|
545
|
+
• Failure Count: 5 consecutive failures
|
|
546
|
+
• IAM Analysis: ✅ Role exists but S3 bucket ARN patterns incorrect in policies
|
|
547
|
+
|
|
548
|
+
**pc-add-visit canary:**
|
|
549
|
+
• Root Cause: Selector timeout + backend service errors
|
|
550
|
+
• Error Pattern: 30000ms timeout waiting for UI element + MissingFormatArgumentException
|
|
551
|
+
• Backend Issue: Format specifier '% o' error in BedrockRuntimeV1Service.invokeTitanModel()
|
|
552
|
+
• Performance: 34 second average response time, 0% success rate
|
|
553
|
+
|
|
554
|
+
**webapp-erorrpagecanary:**
|
|
555
|
+
• Root Cause: Browser target close during selector wait
|
|
556
|
+
• Error Pattern: "Target closed" waiting for `#jsError` selector
|
|
557
|
+
• Failure Count: 5 consecutive failures with 60000ms connection timeouts
|
|
558
|
+
|
|
559
|
+
🔍 BACKEND SERVICE CORRELATION:
|
|
560
|
+
• MissingFormatArgumentException detected in Pet Clinic backend
|
|
561
|
+
• Location: org.springframework.samples.petclinic.customers.aws.BedrockRuntimeV1Service.invokeTitanModel (line 75)
|
|
562
|
+
• Impact: Affects multiple canaries testing Pet Clinic functionality
|
|
563
|
+
• 20% fault rate on GET /api/customer/diagnose/owners/{ownerId}/pets/{petId}
|
|
564
|
+
|
|
565
|
+
🛠️ RECOMMENDED ACTIONS:
|
|
566
|
+
|
|
567
|
+
**Immediate (Critical):**
|
|
568
|
+
1. Fix S3 bucket ARN patterns in pc-visit-vet IAM policy
|
|
569
|
+
2. Fix format string bug in BedrockRuntimeV1Service: change '% o' to '%s' or correct format
|
|
570
|
+
3. Add VPC permissions to canary IAM roles if Lambda runs in VPC
|
|
571
|
+
|
|
572
|
+
**Infrastructure (High Priority):**
|
|
573
|
+
4. Investigate browser target stability issues (webapp-erorrpagecanary)
|
|
574
|
+
5. Review canary timeout configurations - consider increasing from 30s to 60s
|
|
575
|
+
6. Implement circuit breaker pattern for external service dependencies
|
|
576
|
+
|
|
577
|
+
**Monitoring (Medium Priority):**
|
|
578
|
+
7. Add Application Signals monitoring for canary success rates
|
|
579
|
+
8. Set up alerts for consecutive canary failures (>3 failures)
|
|
580
|
+
9. Implement canary health dashboard with real-time status
|
|
581
|
+
|
|
582
|
+
🎯 EXPECTED OUTCOMES:
|
|
583
|
+
• S3 access fix: Immediate resolution of pc-visit-vet failures
|
|
584
|
+
• Backend service fix: 80%+ improvement in Pet Clinic canary success rates
|
|
585
|
+
• Infrastructure improvements: Reduced browser target close errors
|
|
586
|
+
• Enhanced monitoring: Proactive failure detection and faster resolution
|
|
587
|
+
```
|
|
588
|
+
|
|
499
589
|
## Recommended Workflows
|
|
500
590
|
|
|
501
591
|
### 🎯 Primary Audit Workflow (Most Common)
|
|
@@ -654,7 +654,12 @@ def expand_service_operation_wildcard_patterns(
|
|
|
654
654
|
# Check if this operation has the required metric type
|
|
655
655
|
metric_refs = operation.get('MetricReferences', [])
|
|
656
656
|
has_metric_type = any(
|
|
657
|
-
ref.get('MetricType', '') == metric_type
|
|
657
|
+
ref.get('MetricType', '') == metric_type
|
|
658
|
+
or (
|
|
659
|
+
metric_type == 'Availability'
|
|
660
|
+
and ref.get('MetricType', '') == 'Fault'
|
|
661
|
+
)
|
|
662
|
+
for ref in metric_refs
|
|
658
663
|
)
|
|
659
664
|
|
|
660
665
|
if has_metric_type:
|
|
@@ -35,6 +35,7 @@ def _initialize_aws_clients():
|
|
|
35
35
|
logs_endpoint = os.environ.get('MCP_LOGS_ENDPOINT')
|
|
36
36
|
cloudwatch_endpoint = os.environ.get('MCP_CLOUDWATCH_ENDPOINT')
|
|
37
37
|
xray_endpoint = os.environ.get('MCP_XRAY_ENDPOINT')
|
|
38
|
+
synthetics_endpoint = os.environ.get('MCP_SYNTHETICS_ENDPOINT')
|
|
38
39
|
|
|
39
40
|
# Log endpoint overrides
|
|
40
41
|
if appsignals_endpoint:
|
|
@@ -45,6 +46,8 @@ def _initialize_aws_clients():
|
|
|
45
46
|
logger.debug(f'Using CloudWatch endpoint override: {cloudwatch_endpoint}')
|
|
46
47
|
if xray_endpoint:
|
|
47
48
|
logger.debug(f'Using X-Ray endpoint override: {xray_endpoint}')
|
|
49
|
+
if synthetics_endpoint:
|
|
50
|
+
logger.debug(f'Using Synthetics endpoint override: {synthetics_endpoint}')
|
|
48
51
|
|
|
49
52
|
# Check for AWS_PROFILE environment variable
|
|
50
53
|
if aws_profile := os.environ.get('AWS_PROFILE'):
|
|
@@ -59,6 +62,11 @@ def _initialize_aws_clients():
|
|
|
59
62
|
)
|
|
60
63
|
cloudwatch = session.client('cloudwatch', config=config, endpoint_url=cloudwatch_endpoint)
|
|
61
64
|
xray = session.client('xray', config=config, endpoint_url=xray_endpoint)
|
|
65
|
+
synthetics = session.client('synthetics', config=config, endpoint_url=synthetics_endpoint)
|
|
66
|
+
s3 = session.client('s3', config=config)
|
|
67
|
+
iam = session.client('iam', config=config)
|
|
68
|
+
lambda_client = session.client('lambda', config=config)
|
|
69
|
+
sts = session.client('sts', config=config)
|
|
62
70
|
else:
|
|
63
71
|
logs = boto3.client(
|
|
64
72
|
'logs', region_name=AWS_REGION, config=config, endpoint_url=logs_endpoint
|
|
@@ -75,14 +83,32 @@ def _initialize_aws_clients():
|
|
|
75
83
|
xray = boto3.client(
|
|
76
84
|
'xray', region_name=AWS_REGION, config=config, endpoint_url=xray_endpoint
|
|
77
85
|
)
|
|
86
|
+
# Additional clients for canary functionality
|
|
87
|
+
synthetics = boto3.client(
|
|
88
|
+
'synthetics', region_name=AWS_REGION, config=config, endpoint_url=synthetics_endpoint
|
|
89
|
+
)
|
|
90
|
+
s3 = boto3.client('s3', region_name=AWS_REGION, config=config)
|
|
91
|
+
iam = boto3.client('iam', region_name=AWS_REGION, config=config)
|
|
92
|
+
lambda_client = boto3.client('lambda', region_name=AWS_REGION, config=config)
|
|
93
|
+
sts = boto3.client('sts', region_name=AWS_REGION, config=config)
|
|
78
94
|
|
|
79
95
|
logger.debug('AWS clients initialized successfully')
|
|
80
|
-
return logs, appsignals, cloudwatch, xray
|
|
96
|
+
return logs, appsignals, cloudwatch, xray, synthetics, s3, iam, lambda_client, sts
|
|
81
97
|
|
|
82
98
|
|
|
83
99
|
# Initialize clients at module level
|
|
84
100
|
try:
|
|
85
|
-
|
|
101
|
+
(
|
|
102
|
+
logs_client,
|
|
103
|
+
appsignals_client,
|
|
104
|
+
cloudwatch_client,
|
|
105
|
+
xray_client,
|
|
106
|
+
synthetics_client,
|
|
107
|
+
s3_client,
|
|
108
|
+
iam_client,
|
|
109
|
+
lambda_client,
|
|
110
|
+
sts_client,
|
|
111
|
+
) = _initialize_aws_clients()
|
|
86
112
|
except Exception as e:
|
|
87
113
|
logger.error(f'Failed to initialize AWS clients: {str(e)}')
|
|
88
114
|
raise
|