awslabs.cloudwatch-appsignals-mcp-server 0.1.7__tar.gz → 0.1.8__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. awslabs_cloudwatch_appsignals_mcp_server-0.1.8/PKG-INFO +636 -0
  2. awslabs_cloudwatch_appsignals_mcp_server-0.1.8/README.md +606 -0
  3. {awslabs_cloudwatch_appsignals_mcp_server-0.1.7 → awslabs_cloudwatch_appsignals_mcp_server-0.1.8}/awslabs/cloudwatch_appsignals_mcp_server/__init__.py +1 -1
  4. awslabs_cloudwatch_appsignals_mcp_server-0.1.8/awslabs/cloudwatch_appsignals_mcp_server/audit_presentation_utils.py +231 -0
  5. awslabs_cloudwatch_appsignals_mcp_server-0.1.8/awslabs/cloudwatch_appsignals_mcp_server/audit_utils.py +699 -0
  6. awslabs_cloudwatch_appsignals_mcp_server-0.1.8/awslabs/cloudwatch_appsignals_mcp_server/aws_clients.py +88 -0
  7. awslabs_cloudwatch_appsignals_mcp_server-0.1.8/awslabs/cloudwatch_appsignals_mcp_server/server.py +820 -0
  8. awslabs_cloudwatch_appsignals_mcp_server-0.1.8/awslabs/cloudwatch_appsignals_mcp_server/service_audit_utils.py +231 -0
  9. awslabs_cloudwatch_appsignals_mcp_server-0.1.8/awslabs/cloudwatch_appsignals_mcp_server/service_tools.py +659 -0
  10. {awslabs_cloudwatch_appsignals_mcp_server-0.1.7 → awslabs_cloudwatch_appsignals_mcp_server-0.1.8}/awslabs/cloudwatch_appsignals_mcp_server/sli_report_client.py +5 -12
  11. awslabs_cloudwatch_appsignals_mcp_server-0.1.8/awslabs/cloudwatch_appsignals_mcp_server/slo_tools.py +386 -0
  12. awslabs_cloudwatch_appsignals_mcp_server-0.1.8/awslabs/cloudwatch_appsignals_mcp_server/trace_tools.py +658 -0
  13. awslabs_cloudwatch_appsignals_mcp_server-0.1.8/awslabs/cloudwatch_appsignals_mcp_server/utils.py +172 -0
  14. {awslabs_cloudwatch_appsignals_mcp_server-0.1.7 → awslabs_cloudwatch_appsignals_mcp_server-0.1.8}/pyproject.toml +2 -1
  15. awslabs_cloudwatch_appsignals_mcp_server-0.1.8/tests/test_audit_presentation_utils.py +361 -0
  16. awslabs_cloudwatch_appsignals_mcp_server-0.1.8/tests/test_audit_utils.py +647 -0
  17. {awslabs_cloudwatch_appsignals_mcp_server-0.1.7 → awslabs_cloudwatch_appsignals_mcp_server-0.1.8}/tests/test_aws_profile.py +5 -3
  18. {awslabs_cloudwatch_appsignals_mcp_server-0.1.7 → awslabs_cloudwatch_appsignals_mcp_server-0.1.8}/tests/test_initialization.py +3 -3
  19. {awslabs_cloudwatch_appsignals_mcp_server-0.1.7 → awslabs_cloudwatch_appsignals_mcp_server-0.1.8}/tests/test_server.py +136 -58
  20. awslabs_cloudwatch_appsignals_mcp_server-0.1.8/tests/test_server_audit_functions.py +542 -0
  21. awslabs_cloudwatch_appsignals_mcp_server-0.1.8/tests/test_server_audit_tools.py +165 -0
  22. awslabs_cloudwatch_appsignals_mcp_server-0.1.8/tests/test_service_audit_utils.py +446 -0
  23. awslabs_cloudwatch_appsignals_mcp_server-0.1.8/tests/test_service_tools_operations.py +584 -0
  24. {awslabs_cloudwatch_appsignals_mcp_server-0.1.7 → awslabs_cloudwatch_appsignals_mcp_server-0.1.8}/tests/test_sli_report_client.py +15 -27
  25. awslabs_cloudwatch_appsignals_mcp_server-0.1.8/tests/test_slo_tools.py +536 -0
  26. awslabs_cloudwatch_appsignals_mcp_server-0.1.8/tests/test_utils.py +294 -0
  27. {awslabs_cloudwatch_appsignals_mcp_server-0.1.7 → awslabs_cloudwatch_appsignals_mcp_server-0.1.8}/uv.lock +743 -717
  28. awslabs_cloudwatch_appsignals_mcp_server-0.1.7/PKG-INFO +0 -350
  29. awslabs_cloudwatch_appsignals_mcp_server-0.1.7/README.md +0 -320
  30. awslabs_cloudwatch_appsignals_mcp_server-0.1.7/awslabs/cloudwatch_appsignals_mcp_server/server.py +0 -1365
  31. {awslabs_cloudwatch_appsignals_mcp_server-0.1.7 → awslabs_cloudwatch_appsignals_mcp_server-0.1.8}/.gitignore +0 -0
  32. {awslabs_cloudwatch_appsignals_mcp_server-0.1.7 → awslabs_cloudwatch_appsignals_mcp_server-0.1.8}/.python-version +0 -0
  33. {awslabs_cloudwatch_appsignals_mcp_server-0.1.7 → awslabs_cloudwatch_appsignals_mcp_server-0.1.8}/CHANGELOG.md +0 -0
  34. {awslabs_cloudwatch_appsignals_mcp_server-0.1.7 → awslabs_cloudwatch_appsignals_mcp_server-0.1.8}/Dockerfile +0 -0
  35. {awslabs_cloudwatch_appsignals_mcp_server-0.1.7 → awslabs_cloudwatch_appsignals_mcp_server-0.1.8}/LICENSE +0 -0
  36. {awslabs_cloudwatch_appsignals_mcp_server-0.1.7 → awslabs_cloudwatch_appsignals_mcp_server-0.1.8}/NOTICE +0 -0
  37. {awslabs_cloudwatch_appsignals_mcp_server-0.1.7 → awslabs_cloudwatch_appsignals_mcp_server-0.1.8}/awslabs/__init__.py +0 -0
  38. {awslabs_cloudwatch_appsignals_mcp_server-0.1.7 → awslabs_cloudwatch_appsignals_mcp_server-0.1.8}/docker-healthcheck.sh +0 -0
  39. {awslabs_cloudwatch_appsignals_mcp_server-0.1.7 → awslabs_cloudwatch_appsignals_mcp_server-0.1.8}/tests/conftest.py +0 -0
  40. {awslabs_cloudwatch_appsignals_mcp_server-0.1.7 → awslabs_cloudwatch_appsignals_mcp_server-0.1.8}/uv-requirements.txt +0 -0
@@ -0,0 +1,636 @@
1
+ Metadata-Version: 2.4
2
+ Name: awslabs.cloudwatch-appsignals-mcp-server
3
+ Version: 0.1.8
4
+ Summary: An AWS Labs Model Context Protocol (MCP) server for AWS Application Signals
5
+ Project-URL: Homepage, https://awslabs.github.io/mcp/
6
+ Project-URL: Documentation, https://awslabs.github.io/mcp/servers/cloudwatch-appsignals-mcp-server/
7
+ Project-URL: Source, https://github.com/awslabs/mcp.git
8
+ Project-URL: Bug Tracker, https://github.com/awslabs/mcp/issues
9
+ Project-URL: Changelog, https://github.com/awslabs/mcp/blob/main/src/cloudwatch-appsignals-mcp-server/CHANGELOG.md
10
+ Author: Amazon Web Services
11
+ Author-email: AWSLabs MCP <203918161+awslabs-mcp@users.noreply.github.com>
12
+ License: Apache-2.0
13
+ License-File: LICENSE
14
+ License-File: NOTICE
15
+ Classifier: License :: OSI Approved :: Apache Software License
16
+ Classifier: Operating System :: OS Independent
17
+ Classifier: Programming Language :: Python
18
+ Classifier: Programming Language :: Python :: 3
19
+ Classifier: Programming Language :: Python :: 3.10
20
+ Classifier: Programming Language :: Python :: 3.11
21
+ Classifier: Programming Language :: Python :: 3.12
22
+ Classifier: Programming Language :: Python :: 3.13
23
+ Requires-Python: >=3.10
24
+ Requires-Dist: boto3>=1.37.24
25
+ Requires-Dist: httpx>=0.24.0
26
+ Requires-Dist: loguru>=0.7.3
27
+ Requires-Dist: mcp[cli]>=1.11.0
28
+ Requires-Dist: pydantic>=2.11.1
29
+ Description-Content-Type: text/markdown
30
+
31
+ # CloudWatch Application Signals MCP Server
32
+
33
+ An MCP (Model Context Protocol) server that provides comprehensive tools for monitoring and analyzing AWS services using [AWS Application Signals](https://docs.aws.amazon.com/AmazonCloudWatch/latest/monitoring/CloudWatch-Application-Signals.html).
34
+
35
+ This server enables AI assistants like Claude, GitHub Copilot, and Amazon Q to help you monitor service health, analyze performance metrics, track SLO compliance, and investigate issues using distributed tracing with advanced audit capabilities and root cause analysis.
36
+
37
+ ## Key Features
38
+
39
+ 1. **Comprehensive Service Auditing** - Monitor overall service health, diagnose root causes, and recommend actionable fixes with built-in APM expertise
40
+ 2. **Advanced SLO Compliance Monitoring** - Track Service Level Objectives with breach detection and root cause analysis
41
+ 3. **Operation-Level Performance Analysis** - Deep dive into specific API endpoints and operations
42
+ 4. **100% Trace Visibility** - Query OpenTelemetry spans data via Transaction Search for complete observability
43
+ 5. **Multi-Service Analysis** - Audit multiple services simultaneously with automatic batching
44
+ 6. **Natural Language Insights** - Generate business insights from telemetry data through natural language queries
45
+
46
+ ## Prerequisites
47
+
48
+ 1. [Sign-Up for an AWS account](https://aws.amazon.com/free/?trk=78b916d7-7c94-4cab-98d9-0ce5e648dd5f&sc_channel=ps&ef_id=Cj0KCQjwxJvBBhDuARIsAGUgNfjOZq8r2bH2OfcYfYTht5v5I1Bn0lBKiI2Ii71A8Gk39ZU5cwMLPkcaAo_CEALw_wcB:G:s&s_kwcid=AL!4422!3!432339156162!e!!g!!aws%20sign%20up!9572385111!102212379327&gad_campaignid=9572385111&gbraid=0AAAAADjHtp99c5A9DUyUaUQVhVEoi8of3&gclid=Cj0KCQjwxJvBBhDuARIsAGUgNfjOZq8r2bH2OfcYfYTht5v5I1Bn0lBKiI2Ii71A8Gk39ZU5cwMLPkcaAo_CEALw_wcB)
49
+ 2. [Enable Application Signals](https://docs.aws.amazon.com/AmazonCloudWatch/latest/monitoring/CloudWatch-Application-Monitoring-Sections.html) for your applications
50
+ 3. Install `uv` from [Astral](https://docs.astral.sh/uv/getting-started/installation/) or the [GitHub README](https://github.com/astral-sh/uv#installation)
51
+ 4. Install Python using `uv python install 3.10`
52
+
53
+ ## Available Tools
54
+
55
+ ### 🥇 Primary Audit Tools (Use These First)
56
+
57
+ #### 1. **`audit_services`** ⭐ **PRIMARY SERVICE AUDIT TOOL**
58
+ **The #1 tool for comprehensive AWS service health auditing and monitoring**
59
+
60
+ - **USE THIS FIRST** for all service-level auditing tasks
61
+ - Comprehensive health assessment with actionable insights and recommendations
62
+ - Multi-service analysis with automatic batching (audit 1-100+ services simultaneously)
63
+ - SLO compliance monitoring with automatic breach detection
64
+ - Root cause analysis with traces, logs, and metrics correlation
65
+ - Issue prioritization by severity (critical, warning, info findings)
66
+ - **Wildcard Pattern Support**: Use `*payment*` for automatic service discovery
67
+ - Performance optimized for fast execution across multiple targets
68
+
69
+ **Key Use Cases:**
70
+ - `audit_services(service_targets='[{"Type":"service","Data":{"Service":{"Type":"Service","Name":"*"}}}]')` - Audit all services
71
+ - `audit_services(service_targets='[{"Type":"service","Data":{"Service":{"Type":"Service","Name":"*payment*"}}}]')` - Audit payment services
72
+ - `audit_services(..., auditors="all")` - Comprehensive root cause analysis with all auditors
73
+
74
+ #### 2. **`audit_slos`** ⭐ **PRIMARY SLO AUDIT TOOL**
75
+ **The #1 tool for comprehensive SLO compliance monitoring and breach analysis**
76
+
77
+ - **PREFERRED TOOL** for SLO root cause analysis after using `get_slo()`
78
+ - Much more comprehensive than individual trace tools - provides integrated analysis
79
+ - Combines traces, logs, metrics, and dependencies in a single audit
80
+ - Automatic SLO breach detection with prioritized findings
81
+ - **Wildcard Pattern Support**: Use `*payment*` for automatic SLO discovery
82
+ - Actionable recommendations based on multi-dimensional analysis
83
+
84
+ **Key Use Cases:**
85
+ - `audit_slos(slo_targets='[{"Type":"slo","Data":{"Slo":{"SloName":"*"}}}]')` - Audit all SLOs
86
+ - `audit_slos(..., auditors="all")` - Comprehensive root cause analysis for SLO breaches
87
+
88
+ #### 3. **`audit_service_operations`** 🥇 **PRIMARY OPERATION AUDIT TOOL**
89
+ **The #1 RECOMMENDED tool for operation-specific analysis and performance investigation**
90
+
91
+ - **PREFERRED OVER audit_services()** for operation-level auditing
92
+ - Precision targeting of exact operation behavior vs. service-wide averages
93
+ - Actionable insights with specific error traces and dependency failures
94
+ - Code-level detail with exact stack traces and timeout locations
95
+ - **Wildcard Pattern Support**: Use `*GET*` for specific operation types
96
+ - Focused analysis that eliminates noise from other operations
97
+
98
+ **Key Use Cases:**
99
+ - `audit_service_operations(operation_targets='[{"Type":"service_operation","Data":{"ServiceOperation":{"Service":{"Type":"Service","Name":"*payment*"},"Operation":"*GET*","MetricType":"Latency"}}}]')` - Audit GET operations in payment services
100
+ - `audit_service_operations(..., auditors="all")` - Root cause analysis for specific operations
101
+
102
+ ### 📊 Service Discovery & Information Tools
103
+
104
+ #### 4. **`list_monitored_services`** - Service Discovery Tool
105
+ **OPTIONAL TOOL** - `audit_services()` can automatically discover services using wildcard patterns
106
+
107
+ - Get detailed overview of all monitored services in your environment
108
+ - Discover specific service names and environments for manual audit target construction
109
+ - **RECOMMENDED**: Use `audit_services()` with wildcard patterns instead for comprehensive discovery AND analysis
110
+
111
+ #### 5. **`get_service_detail`** - Service Metadata Tool
112
+ **For basic service metadata and configuration details**
113
+
114
+ - Service metadata and configuration (platform information, key attributes)
115
+ - Service-level metrics (Latency, Error, Fault aggregates)
116
+ - Log groups associated with the service
117
+ - **IMPORTANT**: This tool does NOT provide operation names - use `audit_services()` for operation discovery
118
+
119
+ #### 6. **`list_service_operations`** - Operation Discovery Tool
120
+ **CRITICAL LIMITATION**: Only discovers operations that have been ACTIVELY INVOKED in the specified time window
121
+
122
+ - Basic operation inventory for RECENTLY ACTIVE operations only (max 24 hours)
123
+ - Empty results ≠ no operations exist, just no recent invocations
124
+ - **RECOMMENDED**: Use `audit_services()` FIRST for comprehensive operation discovery and analysis
125
+
126
+ ### 🎯 SLO Management Tools
127
+
128
+ #### 7. **`get_slo`** - SLO Configuration Details
129
+ **Essential for understanding SLO configuration before deep investigation**
130
+
131
+ - Comprehensive SLO configuration details (metrics, thresholds, goals)
132
+ - Operation names and key attributes for further investigation
133
+ - Metric type (LATENCY or AVAILABILITY) and comparison operators
134
+ - **NEXT STEP**: Use `audit_slos()` with `auditors="all"` for root cause analysis
135
+
136
+ #### 8. **`list_slos`** - SLO Discovery
137
+ **List all Service Level Objectives in Application Signals**
138
+
139
+ - Complete list of all SLOs in your account with names and ARNs
140
+ - Filter SLOs by service attributes
141
+ - Basic SLO information including creation time and operation names
142
+ - Useful for SLO discovery and finding SLO names for use with other tools
143
+
144
+ ### 📈 Metrics & Performance Tools
145
+
146
+ #### 9. **`query_service_metrics`** - CloudWatch Metrics Analysis
147
+ **Get CloudWatch metrics for specific Application Signals services**
148
+
149
+ - Analyze service performance (latency, throughput, error rates)
150
+ - View trends over time with both standard statistics and percentiles
151
+ - Automatic granularity adjustment based on time range
152
+ - Summary statistics with recent data points and timestamps
153
+
154
+ ### 🔍 Advanced Trace & Log Analysis Tools
155
+
156
+ #### 10. **`search_transaction_spans`** - 100% Trace Visibility
157
+ **Query OpenTelemetry Spans data via Transaction Search (100% sampled data)**
158
+
159
+ - **100% sampled data** vs X-Ray's 5% sampling for more accurate results
160
+ - Query "aws/spans" log group with CloudWatch Logs Insights
161
+ - Generate business performance insights and summaries
162
+ - **IMPORTANT**: Always include a limit in queries to prevent overwhelming context
163
+
164
+ **Example Query:**
165
+ ```
166
+ FILTER attributes.aws.local.service = "payment-service" and attributes.aws.local.environment = "eks:production"
167
+ | STATS avg(duration) as avg_latency by attributes.aws.local.operation
168
+ | LIMIT 50
169
+ ```
170
+
171
+ #### 11. **`query_sampled_traces`** - X-Ray Trace Analysis (Secondary Tool)
172
+ **Query AWS X-Ray traces (5% sampled data) for trace investigation**
173
+
174
+ - **⚠️ IMPORTANT**: Consider using `audit_slos()` with `auditors="all"` instead for comprehensive root cause analysis
175
+ - Uses X-Ray's 5% sampled trace data - may miss critical errors
176
+ - Limited context compared to comprehensive audit tools
177
+ - **RECOMMENDATION**: Use `get_service_detail()` for operation discovery and `audit_slos()` for root cause analysis
178
+
179
+ **Common Filter Expressions:**
180
+ - `service("service-name"){fault = true}` - Find traces with faults (5xx errors)
181
+ - `duration > 5` - Find slow requests (over 5 seconds)
182
+ - `annotation[aws.local.operation]="GET /api/orders"` - Filter by specific operation
183
+
184
+ #### 12. **`list_slis`** - Legacy SLI Status Report (Specialized Tool)
185
+ **Use `audit_services()` as the PRIMARY tool for service auditing**
186
+
187
+ - Basic report showing summary counts (total, healthy, breached, insufficient data)
188
+ - Simple list of breached services with SLO names
189
+ - **IMPORTANT**: `audit_services()` is the PRIMARY and PREFERRED tool for all service auditing tasks
190
+ - Only use this tool for legacy SLI status report format specifically
191
+
192
+ ## Installation
193
+
194
+ ### One-Click Installation
195
+
196
+ | Cursor | VS Code |
197
+ |:------:|:-------:|
198
+ | [![Install MCP Server](https://cursor.com/deeplink/mcp-install-light.svg)](https://cursor.com/en/install-mcp?name=awslabs.cloudwatch-appsignals-mcp-server&config=eyJhdXRvQXBwcm92ZSI6W10sImRpc2FibGVkIjpmYWxzZSwidGltZW91dCI6NjAsImNvbW1hbmQiOiJ1dnggYXdzbGFicy5jbG91ZHdhdGNoLWFwcHNpZ25hbHMtbWNwLXNlcnZlckBsYXRlc3QiLCJlbnYiOnsiQVdTX1BST0ZJTEUiOiJbVGhlIEFXUyBQcm9maWxlIE5hbWUgdG8gdXNlIGZvciBBV1MgYWNjZXNzXSIsIkFXU19SRUdJT04iOiJbVGhlIEFXUyByZWdpb24gdG8gcnVuIGluXSIsIkZBU1RNQ1BfTE9HX0xFVkVMIjoiRVJST1IifSwidHJhbnNwb3J0VHlwZSI6InN0ZGlvIn0%3D) | [![Install on VS Code](https://img.shields.io/badge/Install_on-VS_Code-FF9900?style=flat-square&logo=visualstudiocode&logoColor=white)](https://insiders.vscode.dev/redirect/mcp/install?name=CloudWatch%20Application%20Signals%20MCP%20Server&config=%7B%22autoApprove%22%3A%5B%5D%2C%22disabled%22%3Afalse%2C%22timeout%22%3A60%2C%22command%22%3A%22uvx%22%2C%22args%22%3A%5B%22awslabs.cloudwatch-appsignals-mcp-server%40latest%22%5D%2C%22env%22%3A%7B%22AWS_PROFILE%22%3A%22%5BThe%20AWS%20Profile%20Name%20to%20use%20for%20AWS%20access%5D%22%2C%22AWS_REGION%22%3A%22%5BThe%20AWS%20region%20to%20run%20in%5D%22%2C%22FASTMCP_LOG_LEVEL%22%3A%22ERROR%22%7D%2C%22transportType%22%3A%22stdio%22%7D) |
199
+
200
+ ### Installing via `uv`
201
+
202
+ When using [`uv`](https://docs.astral.sh/uv/) no specific installation is needed. We will
203
+ use [`uvx`](https://docs.astral.sh/uv/guides/tools/) to directly run *awslabs.cloudwatch-appsignals-mcp-server*.
204
+
205
+ ### Installing for Amazon Q (Preview)
206
+
207
+ - Start Amazon Q Developer CLI from [here](https://github.com/aws/amazon-q-developer-cli).
208
+ - Add the following configuration in `~/.aws/amazonq/mcp.json` file.
209
+ ```json
210
+ {
211
+ "mcpServers": {
212
+ "awslabs.cloudwatch-appsignals-mcp": {
213
+ "autoApprove": [],
214
+ "disabled": false,
215
+ "command": "uvx",
216
+ "args": [
217
+ "awslabs.cloudwatch-appsignals-mcp-server@latest"
218
+ ],
219
+ "env": {
220
+ "AWS_PROFILE": "[The AWS Profile Name to use for AWS access]",
221
+ "AWS_REGION": "[AWS Region]",
222
+ "FASTMCP_LOG_LEVEL": "ERROR"
223
+ },
224
+ "transportType": "stdio"
225
+ }
226
+ }
227
+ }
228
+ ```
229
+
230
+ ### Installing via Claude Desktop
231
+
232
+ On MacOS: `~/Library/Application\ Support/Claude/claude_desktop_config.json`
233
+ On Windows: `%APPDATA%/Claude/claude_desktop_config.json`
234
+
235
+ <details>
236
+ <summary>Development/Unpublished Servers Configuration</summary>
237
+ When installing a development or unpublished server, add the `--directory` flag:
238
+
239
+ ```json
240
+ {
241
+ "mcpServers": {
242
+ "awslabs.cloudwatch-appsignals-mcp-server": {
243
+ "command": "uvx",
244
+ "args": ["--from", "/absolute/path/to/cloudwatch-appsignals-mcp-server", "awslabs.cloudwatch-appsignals-mcp-server"],
245
+ "env": {
246
+ "AWS_PROFILE": "[The AWS Profile Name to use for AWS access]",
247
+ "AWS_REGION": "[AWS Region]"
248
+ }
249
+ }
250
+ }
251
+ }
252
+ ```
253
+ </details>
254
+
255
+ <details>
256
+ <summary>Published Servers Configuration</summary>
257
+
258
+ ```json
259
+ {
260
+ "mcpServers": {
261
+ "awslabs.cloudwatch-appsignals-mcp-server": {
262
+ "command": "uvx",
263
+ "args": ["awslabs.cloudwatch-appsignals-mcp-server@latest"],
264
+ "env": {
265
+ "AWS_PROFILE": "[The AWS Profile Name to use for AWS access]",
266
+ "AWS_REGION": "[AWS Region]"
267
+ }
268
+ }
269
+ }
270
+ }
271
+ ```
272
+ </details>
273
+
274
+ ### Windows Installation
275
+
276
+ For Windows users, the MCP server configuration format is slightly different:
277
+
278
+ ```json
279
+ {
280
+ "mcpServers": {
281
+ "awslabs.cloudwatch-appsignals-mcp-server": {
282
+ "disabled": false,
283
+ "timeout": 60,
284
+ "type": "stdio",
285
+ "command": "uv",
286
+ "args": [
287
+ "tool",
288
+ "run",
289
+ "--from",
290
+ "awslabs.cloudwatch-appsignals-mcp-server@latest",
291
+ "awslabs.cloudwatch-appsignals-mcp-server.exe"
292
+ ],
293
+ "env": {
294
+ "FASTMCP_LOG_LEVEL": "ERROR",
295
+ "AWS_PROFILE": "your-aws-profile",
296
+ "AWS_REGION": "us-east-1"
297
+ }
298
+ }
299
+ }
300
+ }
301
+ ```
302
+
303
+ ### Build and install docker image locally on the same host of your LLM client
304
+
305
+ 1. `git clone https://github.com/awslabs/mcp.git`
306
+ 2. Go to sub-directory 'src/cloudwatch-appsignals-mcp-server/'
307
+ 3. Run 'docker build -t awslabs/cloudwatch-appsignals-mcp-server:latest .'
308
+
309
+ ### Add or update your LLM client's config with following:
310
+ ```json
311
+ {
312
+ "mcpServers": {
313
+ "awslabs.cloudwatch-appsignals-mcp-server": {
314
+ "command": "docker",
315
+ "args": [
316
+ "run",
317
+ "-i",
318
+ "--rm",
319
+ "-v", "${HOME}/.aws:/root/.aws:ro",
320
+ "-e", "AWS_PROFILE=[The AWS Profile Name to use for AWS access]",
321
+ "-e", "AWS_REGION=[AWS Region]",
322
+ "awslabs/cloudwatch-appsignals-mcp-server:latest"
323
+ ]
324
+ }
325
+ }
326
+ }
327
+ ```
328
+
329
+ ### Debugging
330
+
331
+ You can use the MCP inspector to debug the server. For `uvx` installations:
332
+
333
+ ```bash
334
+ npx @modelcontextprotocol/inspector uvx awslabs.cloudwatch-appsignals-mcp-server
335
+ ```
336
+
337
+ Or if you've installed the package in a specific directory:
338
+
339
+ ```bash
340
+ cd /path/to/cloudwatch-appsignals-mcp-server
341
+ npx @modelcontextprotocol/inspector uv run awslabs.cloudwatch-appsignals-mcp-server
342
+ ```
343
+
344
+ ## Usage Examples
345
+
346
+ ### Example 1: Comprehensive Service Health Audit (Recommended Workflow)
347
+ ```
348
+ User: "Audit the health of all my payment services"
349
+ Assistant: I'll perform a comprehensive health audit of all your payment services using the primary audit tool.
350
+
351
+ [Uses audit_services with wildcard pattern]
352
+
353
+ audit_services(
354
+ service_targets='[{"Type":"service","Data":{"Service":{"Type":"Service","Name":"*payment*"}}}]',
355
+ auditors="slo,operation_metric"
356
+ )
357
+
358
+ Results: Found 3 payment services with the following findings:
359
+
360
+ 🔴 CRITICAL FINDINGS:
361
+ • payment-api-service: SLO breach detected - Latency SLO violated (p99: 2.1s > 1.5s threshold)
362
+ • payment-processor: High error rate detected (8.2% > 5% threshold)
363
+
364
+ ⚠️ WARNING FINDINGS:
365
+ • payment-gateway: Dependency timeout issues with external payment provider
366
+
367
+ ✅ HEALTHY:
368
+ • payment-validator: All metrics within normal ranges
369
+
370
+ Recommendations:
371
+ 1. Investigate payment-api-service latency issues - check database connection pool
372
+ 2. Review payment-processor error logs for root cause analysis
373
+ 3. Consider circuit breaker pattern for payment-gateway external dependencies
374
+ ```
375
+
376
+ ### Example 2: SLO Breach Root Cause Analysis
377
+ ```
378
+ User: "I have an SLO breach for my checkout service. Can you help me investigate?"
379
+ Assistant: I'll help you investigate the SLO breach. Let me first get the SLO configuration and then perform a comprehensive root cause analysis.
380
+
381
+ [Uses get_slo to understand configuration]
382
+ [Then uses audit_slos with auditors="all" for comprehensive analysis]
383
+
384
+ audit_slos(
385
+ slo_targets='[{"Type":"slo","Data":{"Slo":{"SloName":"checkout-service-latency-slo"}}}]',
386
+ auditors="all"
387
+ )
388
+
389
+ Root Cause Analysis Results:
390
+
391
+ 🔴 SLO BREACH CONFIRMED:
392
+ • checkout-service-latency-slo: 87.3% attainment (< 95% goal)
393
+ • Breach duration: 2.5 hours
394
+ • Impact: 1,247 requests affected
395
+
396
+ 🔍 ROOT CAUSE IDENTIFIED:
397
+ • Database connection pool exhaustion detected
398
+ • Trace analysis shows 95% of slow requests waiting for DB connections
399
+ • Dependency on payment-service also showing elevated latency
400
+
401
+ 📊 SUPPORTING EVIDENCE:
402
+ • P99 latency: 3.2s (threshold: 2.0s)
403
+ • Error rate spike: 12% during breach period
404
+ • 15 error traces with "connection timeout" messages
405
+
406
+ 🛠️ RECOMMENDED ACTIONS:
407
+ 1. Increase database connection pool size from 10 to 25
408
+ 2. Implement connection pool monitoring alerts
409
+ 3. Review payment-service dependency performance
410
+ 4. Consider implementing request queuing for high load periods
411
+ ```
412
+
413
+ ### Example 3: Operation-Level Performance Investigation
414
+ ```
415
+ User: "The GET operations in my payment services are slow. Can you investigate?"
416
+ Assistant: I'll investigate the performance of GET operations specifically in your payment services using operation-level analysis.
417
+
418
+ [Uses audit_service_operations for precise operation targeting]
419
+
420
+ audit_service_operations(
421
+ operation_targets='[{"Type":"service_operation","Data":{"ServiceOperation":{"Service":{"Type":"Service","Name":"*payment*"},"Operation":"*GET*","MetricType":"Latency"}}}]',
422
+ auditors="operation_metric,trace"
423
+ )
424
+
425
+ Operation Performance Analysis Results:
426
+
427
+ 🎯 GET OPERATIONS ANALYZED:
428
+ • payment-api: GET /api/payments/{id} - P99: 1.8s
429
+ • payment-api: GET /api/payments/history - P99: 3.2s ⚠️
430
+ • payment-processor: GET /api/process/status - P99: 0.8s ✅
431
+
432
+ 🔴 PERFORMANCE ISSUES IDENTIFIED:
433
+ • GET /api/payments/history: Consistently slow (3.2s avg)
434
+ - Root cause: Full table scan on payments table
435
+ - 89% of requests show database query time > 2.5s
436
+ - Missing index on user_id + created_date columns
437
+
438
+ 📈 PERFORMANCE RECOMMENDATIONS:
439
+ 1. Add composite index: CREATE INDEX idx_payments_user_date ON payments(user_id, created_date)
440
+ 2. Implement pagination for history endpoint (limit 50 records)
441
+ 3. Consider caching for frequently accessed payment histories
442
+ 4. Add query timeout protection (max 5s)
443
+
444
+ 🚀 EXPECTED IMPROVEMENT:
445
+ • Estimated latency reduction: 3.2s → 0.4s (87% improvement)
446
+ • Database load reduction: ~75%
447
+ ```
448
+
449
+ ### Example 4: Transaction Search for 100% Trace Visibility
450
+ ```
451
+ User: "I want to analyze all the errors in my checkout service from the last hour"
452
+ Assistant: I'll use Transaction Search to analyze all errors with 100% trace visibility (no sampling).
453
+
454
+ [Uses search_transaction_spans for complete trace data]
455
+
456
+ search_transaction_spans(
457
+ log_group_name="aws/spans",
458
+ query_string='FILTER attributes.aws.local.service = "checkout-service" and attributes.http.status_code >= 400 | STATS count() as error_count by attributes.http.status_code, attributes.aws.local.operation | SORT error_count DESC | LIMIT 20',
459
+ start_time="2024-01-15T10:00:00Z",
460
+ end_time="2024-01-15T11:00:00Z"
461
+ )
462
+
463
+ ✅ Transaction Search Results (100% sampled data):
464
+
465
+ ERROR BREAKDOWN (Last Hour):
466
+ • HTTP 500 errors: 47 occurrences
467
+ - POST /api/checkout/complete: 31 errors
468
+ - POST /api/checkout/validate: 16 errors
469
+
470
+ • HTTP 404 errors: 23 occurrences
471
+ - GET /api/checkout/{id}: 23 errors
472
+
473
+ • HTTP 503 errors: 12 occurrences
474
+ - POST /api/checkout/payment: 12 errors
475
+
476
+ 🔍 KEY INSIGHTS:
477
+ • 82 total errors out of 1,247 requests (6.6% error rate)
478
+ • Most errors (66%) in checkout completion flow
479
+ • Service unavailable errors correlate with payment service dependency
480
+
481
+ 🛠️ IMMEDIATE ACTIONS NEEDED:
482
+ 1. Investigate checkout completion logic for 500 errors
483
+ 2. Add validation for checkout ID existence (404s)
484
+ 3. Implement circuit breaker for payment service calls (503s)
485
+ ```
486
+
487
+ ### Example 5: Service Discovery and Overview
488
+ ```
489
+ User: "What services are being monitored and what's their current health status?"
490
+ Assistant: I'll get an overview of all monitored services and their health status.
491
+
492
+ [Uses list_monitored_services for discovery, then audit_services for health analysis]
493
+
494
+ Found 8 services being monitored:
495
+
496
+ 📊 SERVICE INVENTORY:
497
+ • api-gateway-service (AWS::ApiGateway::RestApi) - Production
498
+ • checkout-service (AWS::ECS::Service) - Production
499
+ • payment-api-service (AWS::ECS::Service) - Production
500
+ • payment-processor (AWS::Lambda::Function) - Production
501
+ • user-service (AWS::EKS::Service) - Production
502
+ • inventory-service (AWS::EKS::Service) - Production
503
+ • notification-lambda (AWS::Lambda::Function) - Production
504
+ • analytics-service (AWS::ECS::Service) - Production
505
+
506
+ [Then performs health audit]
507
+
508
+ 🏥 HEALTH STATUS SUMMARY:
509
+ ✅ HEALTHY (5 services):
510
+ • api-gateway-service: All SLOs met, low latency
511
+ • user-service: Excellent performance metrics
512
+ • inventory-service: All operations within thresholds
513
+ • notification-lambda: Fast execution, no errors
514
+ • analytics-service: Stable performance
515
+
516
+ ⚠️ NEEDS ATTENTION (2 services):
517
+ • checkout-service: Elevated latency on completion operations
518
+ • payment-api-service: Intermittent timeout issues
519
+
520
+ 🔴 CRITICAL (1 service):
521
+ • payment-processor: SLO breach - error rate 8.2% (threshold: 5%)
522
+
523
+ 💡 RECOMMENDATIONS:
524
+ 1. Focus immediate attention on payment-processor error investigation
525
+ 2. Monitor checkout-service latency trends
526
+ 3. Review payment-api-service timeout configurations
527
+ ```
528
+
529
+ ## Recommended Workflows
530
+
531
+ ### 🎯 Primary Audit Workflow (Most Common)
532
+ 1. **Start with `audit_services()`** - Use wildcard patterns for automatic service discovery
533
+ 2. **Review findings summary** - Let user choose which issues to investigate further
534
+ 3. **Deep dive with `auditors="all"`** - For selected services needing root cause analysis
535
+
536
+ ### 🔍 SLO Investigation Workflow
537
+ 1. **Use `get_slo()`** - Understand SLO configuration and thresholds
538
+ 2. **Use `audit_slos()` with `auditors="all"`** - Comprehensive root cause analysis
539
+ 3. **Follow actionable recommendations** - Implement suggested fixes
540
+
541
+ ### ⚡ Operation Performance Workflow
542
+ 1. **Use `audit_service_operations()`** - Target specific operations with precision
543
+ 2. **Apply wildcard patterns** - e.g., `*GET*` for all GET operations
544
+ 3. **Root cause analysis** - Use `auditors="all"` for detailed investigation
545
+
546
+ ### 📊 Complete Observability Workflow
547
+ 1. **Service Discovery** - `audit_services()` with wildcard patterns
548
+ 2. **SLO Compliance** - `audit_slos()` for breach detection
549
+ 3. **Operation Analysis** - `audit_service_operations()` for endpoint-specific issues
550
+ 4. **Trace Investigation** - `search_transaction_spans()` for 100% trace visibility
551
+
552
+ ## Configuration
553
+
554
+ ### Required AWS Permissions
555
+
556
+ The server requires the following AWS IAM permissions:
557
+
558
+ ```json
559
+ {
560
+ "Version": "2012-10-17",
561
+ "Statement": [
562
+ {
563
+ "Effect": "Allow",
564
+ "Action": [
565
+ "application-signals:ListServices",
566
+ "application-signals:GetService",
567
+ "application-signals:ListServiceOperations",
568
+ "application-signals:ListServiceLevelObjectives",
569
+ "application-signals:GetServiceLevelObjective",
570
+ "application-signals:BatchGetServiceLevelObjectiveBudgetReport",
571
+ "cloudwatch:GetMetricData",
572
+ "cloudwatch:GetMetricStatistics",
573
+ "logs:GetQueryResults",
574
+ "logs:StartQuery",
575
+ "logs:StopQuery",
576
+ "xray:GetTraceSummaries",
577
+ "xray:BatchGetTraces",
578
+ "xray:GetTraceSegmentDestination"
579
+ ],
580
+ "Resource": "*"
581
+ }
582
+ ]
583
+ }
584
+ ```
585
+
586
+ ### Environment Variables
587
+
588
+ - `AWS_PROFILE` - AWS profile name to use for authentication (defaults to `default` profile)
589
+ - `AWS_REGION` - AWS region (defaults to us-east-1)
590
+ - `MCP_CLOUDWATCH_APPSIGNALS_LOG_LEVEL` - Logging level (defaults to INFO)
591
+ - `AUDITOR_LOG_PATH` - Path for audit log files (defaults to /tmp)
592
+
593
+ ### AWS Credentials
594
+
595
+ This server uses AWS profiles for authentication. Set the `AWS_PROFILE` environment variable to use a specific profile from your `~/.aws/credentials` file.
596
+
597
+ The server will use the standard AWS credential chain via boto3, which includes:
598
+ - AWS Profile specified by `AWS_PROFILE` environment variable
599
+ - Default profile from AWS credentials file
600
+ - IAM roles when running on EC2, ECS, Lambda, etc.
601
+
602
+ ### Transaction Search Configuration
603
+
604
+ For 100% trace visibility, enable AWS X-Ray Transaction Search:
605
+ 1. Configure X-Ray to send traces to CloudWatch Logs
606
+ 2. Set destination to 'CloudWatchLogs' with status 'ACTIVE'
607
+ 3. This enables the `search_transaction_spans()` tool for complete observability
608
+
609
+ Without Transaction Search, you'll only have access to 5% sampled trace data through X-Ray.
610
+
611
+ ## Development
612
+
613
+ This server is part of the AWS Labs MCP collection. For development and contribution guidelines, please see the main repository documentation.
614
+
615
+ ### Running Tests
616
+
617
+ To run the comprehensive test suite that validates all use case examples and tool functionality:
618
+
619
+ ```bash
620
+ cd src/cloudwatch-appsignals-mcp-server
621
+ python -m pytest tests/test_use_case_examples.py -v
622
+ ```
623
+
624
+ This test file verifies that all use case examples in the tool documentation call the correct tools with the right parameters and target formats. It includes tests for:
625
+
626
+ - All documented use cases for `audit_services()`, `audit_slos()`, and `audit_service_operations()`
627
+ - Target format validation (service, SLO, and operation targets)
628
+ - Wildcard pattern expansion functionality
629
+ - Auditor selection for different scenarios
630
+ - JSON format validation for all documentation examples
631
+
632
+ The tests use mocked AWS clients to prevent real API calls while validating the tool logic and parameter handling.
633
+
634
+ ## License
635
+
636
+ This project is licensed under the Apache License, Version 2.0. See the LICENSE file for details.