awslabs.dynamodb-mcp-server 1.0.9__py3-none-any.whl → 2.0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of awslabs.dynamodb-mcp-server might be problematic. Click here for more details.

@@ -0,0 +1,383 @@
1
+ # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ """Database analyzer classes for source database analysis."""
16
+
17
+ import json
18
+ import os
19
+ from awslabs.dynamodb_mcp_server.database_analysis_queries import get_query_resource
20
+ from awslabs.mysql_mcp_server.server import DBConnection, DummyCtx
21
+ from awslabs.mysql_mcp_server.server import run_query as mysql_query
22
+ from datetime import datetime
23
+ from loguru import logger
24
+ from typing import Any, Dict, List, Tuple
25
+
26
+
27
+ DEFAULT_ANALYSIS_DAYS = 30
28
+ DEFAULT_MAX_QUERY_RESULTS = 500
29
+ SECONDS_PER_DAY = 86400
30
+ DDL_PREFIXES = ('CREATE ', 'DROP ', 'ALTER ', 'TRUNCATE ')
31
+
32
+
33
+ class DatabaseAnalyzer:
34
+ """Base class for database analyzers."""
35
+
36
+ @staticmethod
37
+ def build_connection_params(source_db_type: str, **kwargs) -> Dict[str, Any]:
38
+ """Build connection parameters for database analysis.
39
+
40
+ Args:
41
+ source_db_type: Type of source database (e.g., 'mysql')
42
+ **kwargs: Connection parameters (aws_cluster_arn, aws_secret_arn, etc.)
43
+
44
+ Returns:
45
+ Dictionary of connection parameters
46
+
47
+ Raises:
48
+ ValueError: If database type is not supported
49
+ """
50
+ if source_db_type == 'mysql':
51
+ user_provided_dir = kwargs.get('output_dir')
52
+
53
+ # Validate user-provided directory
54
+ if not os.path.isabs(user_provided_dir):
55
+ raise ValueError(f'Output directory must be an absolute path: {user_provided_dir}')
56
+ if not os.path.isdir(user_provided_dir) or not os.access(user_provided_dir, os.W_OK):
57
+ raise ValueError(
58
+ f'Output directory does not exist or is not writable: {user_provided_dir}'
59
+ )
60
+ output_dir = user_provided_dir
61
+
62
+ return {
63
+ 'cluster_arn': kwargs.get('aws_cluster_arn') or os.getenv('MYSQL_CLUSTER_ARN'),
64
+ 'secret_arn': kwargs.get('aws_secret_arn') or os.getenv('MYSQL_SECRET_ARN'),
65
+ 'database': kwargs.get('database_name') or os.getenv('MYSQL_DATABASE'),
66
+ 'region': kwargs.get('aws_region') or os.getenv('AWS_REGION'),
67
+ 'max_results': kwargs.get('max_query_results')
68
+ or int(os.getenv('MYSQL_MAX_QUERY_RESULTS', str(DEFAULT_MAX_QUERY_RESULTS))),
69
+ 'pattern_analysis_days': kwargs.get(
70
+ 'pattern_analysis_days', DEFAULT_ANALYSIS_DAYS
71
+ ),
72
+ 'output_dir': output_dir,
73
+ }
74
+ raise ValueError(f'Unsupported database type: {source_db_type}')
75
+
76
+ @staticmethod
77
+ def validate_connection_params(
78
+ source_db_type: str, connection_params: Dict[str, Any]
79
+ ) -> Tuple[List[str], Dict[str, str]]:
80
+ """Validate connection parameters for database type.
81
+
82
+ Args:
83
+ source_db_type: Type of source database
84
+ connection_params: Dictionary of connection parameters
85
+
86
+ Returns:
87
+ Tuple of (missing_params, param_descriptions)
88
+ """
89
+ if source_db_type == 'mysql':
90
+ required_params = ['cluster_arn', 'secret_arn', 'database', 'region']
91
+ missing_params = [
92
+ param
93
+ for param in required_params
94
+ if not connection_params.get(param)
95
+ or (
96
+ isinstance(connection_params[param], str)
97
+ and connection_params[param].strip() == ''
98
+ )
99
+ ]
100
+
101
+ param_descriptions = {
102
+ 'cluster_arn': 'AWS cluster ARN',
103
+ 'secret_arn': 'AWS secret ARN',
104
+ 'database': 'Database name',
105
+ 'region': 'AWS region',
106
+ }
107
+ return missing_params, param_descriptions
108
+ return [], {}
109
+
110
+ @staticmethod
111
+ def save_analysis_files(
112
+ results: Dict[str, Any],
113
+ source_db_type: str,
114
+ database: str,
115
+ pattern_analysis_days: int,
116
+ max_results: int,
117
+ output_dir: str,
118
+ ) -> Tuple[List[str], List[str]]:
119
+ """Save analysis results to JSON files.
120
+
121
+ Args:
122
+ results: Dictionary of query results
123
+ source_db_type: Type of source database
124
+ database: Database name
125
+ pattern_analysis_days: Number of days to analyze the logs for pattern analysis query
126
+ max_results: Maximum results per query
127
+ output_dir: Absolute directory path where the timestamped output analysis folder will be created
128
+
129
+ Returns:
130
+ Tuple of (saved_files, save_errors)
131
+ """
132
+ saved_files = []
133
+ save_errors = []
134
+
135
+ logger.info(f'save_analysis_files called with {len(results) if results else 0} results')
136
+
137
+ if not results:
138
+ logger.warning('No results to save - returning empty lists')
139
+ return saved_files, save_errors
140
+
141
+ timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
142
+ analysis_folder = os.path.join(output_dir, f'database_analysis_{timestamp}')
143
+ logger.info(f'Creating analysis folder: {analysis_folder}')
144
+
145
+ try:
146
+ os.makedirs(analysis_folder, exist_ok=True)
147
+ logger.info(f'Created folder at: {analysis_folder}')
148
+ except OSError as e:
149
+ logger.error(f'Failed to create analysis folder: {str(e)}')
150
+ save_errors.append(f'Failed to create folder {analysis_folder}: {str(e)}')
151
+ return saved_files, save_errors
152
+
153
+ for query_name, query_result in results.items():
154
+ filename = os.path.join(analysis_folder, f'{query_name}_results.json')
155
+
156
+ analysis_data = query_result['data']
157
+ if query_name == 'query_pattern_analysis':
158
+ analysis_data = DatabaseAnalyzer.filter_pattern_data(
159
+ analysis_data, pattern_analysis_days
160
+ )
161
+
162
+ try:
163
+ with open(filename, 'w') as f:
164
+ json.dump(
165
+ {
166
+ 'query_name': query_name,
167
+ 'description': query_result['description'],
168
+ 'source_db_type': source_db_type,
169
+ 'database': database,
170
+ 'pattern_analysis_days': pattern_analysis_days,
171
+ 'max_query_results': max_results,
172
+ 'data': analysis_data,
173
+ },
174
+ f,
175
+ indent=2,
176
+ default=str,
177
+ )
178
+ saved_files.append(filename)
179
+ logger.info(f'Saved {query_name} results to {filename}')
180
+ except Exception as e:
181
+ logger.error(f'Failed to save {query_name}: {str(e)}')
182
+ save_errors.append(f'Failed to save {query_name}: {str(e)}')
183
+
184
+ return saved_files, save_errors
185
+
186
+ @staticmethod
187
+ def filter_pattern_data(
188
+ data: List[Dict[str, Any]], pattern_analysis_days: int
189
+ ) -> List[Dict[str, Any]]:
190
+ """Filter pattern analysis data to exclude DDL statements and add RPS calculations.
191
+
192
+ Args:
193
+ data: List of query pattern dictionaries
194
+ pattern_analysis_days: Number of days in analysis period
195
+
196
+ Returns:
197
+ Filtered list with calculated RPS added to each pattern
198
+ """
199
+ if not data:
200
+ return data
201
+
202
+ total_seconds = (pattern_analysis_days or DEFAULT_ANALYSIS_DAYS) * SECONDS_PER_DAY
203
+ filtered_patterns = []
204
+
205
+ for pattern in data:
206
+ digest = pattern.get('DIGEST_TEXT', '')
207
+ # Skip DDL statements
208
+ if not any(digest.upper().startswith(prefix) for prefix in DDL_PREFIXES):
209
+ pattern_with_rps = pattern.copy()
210
+ count = pattern.get('COUNT_STAR', 0)
211
+ pattern_with_rps['calculated_rps'] = (
212
+ round(count / total_seconds, 6) if total_seconds > 0 else 0
213
+ )
214
+ filtered_patterns.append(pattern_with_rps)
215
+
216
+ return filtered_patterns
217
+
218
+
219
+ class MySQLAnalyzer(DatabaseAnalyzer):
220
+ """MySQL-specific database analyzer."""
221
+
222
+ SCHEMA_QUERIES = [
223
+ 'table_analysis',
224
+ 'column_analysis',
225
+ 'foreign_key_analysis',
226
+ 'index_analysis',
227
+ ]
228
+ ACCESS_PATTERN_QUERIES = ['performance_schema_check', 'query_pattern_analysis']
229
+
230
+ @staticmethod
231
+ def is_performance_schema_enabled(result):
232
+ """Check if MySQL performance schema is enabled from query result."""
233
+ if result and len(result) > 0:
234
+ performance_schema_value = str(
235
+ result[0].get('', '0')
236
+ ) # Key is empty string by mysql package design, so checking only value here
237
+ return performance_schema_value == '1'
238
+ return False
239
+
240
+ def __init__(self, connection_params):
241
+ """Initialize MySQL analyzer with connection parameters."""
242
+ self.cluster_arn = connection_params['cluster_arn']
243
+ self.secret_arn = connection_params['secret_arn']
244
+ self.database = connection_params['database']
245
+ self.region = connection_params['region']
246
+ self.max_results = connection_params['max_results']
247
+ self.pattern_analysis_days = connection_params['pattern_analysis_days']
248
+
249
+ async def _run_query(self, sql, query_parameters=None):
250
+ """Internal method to run SQL queries against MySQL database."""
251
+ try:
252
+ # Create a new connection with current parameters
253
+ db_connection = DBConnection(
254
+ self.cluster_arn, self.secret_arn, self.database, self.region, True
255
+ )
256
+ # Pass connection parameter directly to mysql_query
257
+ result = await mysql_query(sql, DummyCtx(), db_connection, query_parameters)
258
+ return result
259
+ except Exception as e:
260
+ logger.error(f'MySQL query execution failed - {type(e).__name__}: {str(e)}')
261
+ return [{'error': f'MySQL query failed: {str(e)}'}]
262
+
263
+ async def execute_query_batch(
264
+ self, query_names: List[str], pattern_analysis_days: int = None
265
+ ) -> Tuple[Dict[str, Any], List[str]]:
266
+ """Execute a batch of analysis queries.
267
+
268
+ Args:
269
+ query_names: List of query names to execute
270
+ pattern_analysis_days: Optional analysis period for pattern queries
271
+
272
+ Returns:
273
+ Tuple of (results_dict, errors_list)
274
+ """
275
+ results = {}
276
+ errors = []
277
+
278
+ for query_name in query_names:
279
+ try:
280
+ # Get query with appropriate parameters
281
+ if query_name == 'query_pattern_analysis' and pattern_analysis_days:
282
+ query = get_query_resource(
283
+ query_name,
284
+ max_query_results=self.max_results,
285
+ target_database=self.database,
286
+ pattern_analysis_days=pattern_analysis_days,
287
+ )
288
+ else:
289
+ query = get_query_resource(
290
+ query_name,
291
+ max_query_results=self.max_results,
292
+ target_database=self.database,
293
+ )
294
+
295
+ result = await self._run_query(query['sql'])
296
+
297
+ if result and isinstance(result, list) and len(result) > 0:
298
+ if 'error' in result[0]:
299
+ errors.append(f'{query_name}: {result[0]["error"]}')
300
+ else:
301
+ results[query_name] = {
302
+ 'description': query['description'],
303
+ 'data': result,
304
+ }
305
+ else:
306
+ # Handle empty results
307
+ results[query_name] = {
308
+ 'description': query['description'],
309
+ 'data': [],
310
+ }
311
+
312
+ except Exception as e:
313
+ errors.append(f'{query_name}: {str(e)}')
314
+
315
+ return results, errors
316
+
317
+ @classmethod
318
+ async def analyze(cls, connection_params: Dict[str, Any]) -> Dict[str, Any]:
319
+ """Execute MySQL-specific analysis workflow.
320
+
321
+ Args:
322
+ connection_params: Dictionary of connection parameters
323
+
324
+ Returns:
325
+ Dictionary containing results, errors, and performance schema status
326
+ """
327
+ analyzer = cls(connection_params)
328
+
329
+ # Execute schema analysis
330
+ schema_results, schema_errors = await analyzer.execute_query_batch(cls.SCHEMA_QUERIES)
331
+
332
+ # Execute performance schema check
333
+ (
334
+ performance_schema_check_results,
335
+ performance_schema_check_errors,
336
+ ) = await analyzer.execute_query_batch(['performance_schema_check'])
337
+
338
+ performance_enabled = False
339
+ all_results = {**schema_results}
340
+ all_errors = schema_errors + performance_schema_check_errors
341
+
342
+ # Check performance schema status and run pattern analysis if enabled
343
+ if 'performance_schema_check' in performance_schema_check_results:
344
+ performance_enabled = cls.is_performance_schema_enabled(
345
+ performance_schema_check_results['performance_schema_check']['data']
346
+ )
347
+
348
+ if performance_enabled:
349
+ pattern_results, pattern_errors = await analyzer.execute_query_batch(
350
+ ['query_pattern_analysis'], analyzer.pattern_analysis_days
351
+ )
352
+ all_results.update(pattern_results)
353
+ all_errors.extend(pattern_errors)
354
+ if not performance_enabled:
355
+ all_errors.append('Performance Schema disabled - skipping query_pattern_analysis')
356
+
357
+ return {
358
+ 'results': all_results,
359
+ 'errors': all_errors,
360
+ 'performance_enabled': performance_enabled,
361
+ 'performance_feature': 'Performance Schema',
362
+ }
363
+
364
+
365
+ class DatabaseAnalyzerRegistry:
366
+ """Registry for database-specific analyzers."""
367
+
368
+ _analyzers = {
369
+ 'mysql': MySQLAnalyzer,
370
+ }
371
+
372
+ @classmethod
373
+ def get_analyzer(cls, source_db_type: str):
374
+ """Get the appropriate analyzer class for the database type."""
375
+ analyzer = cls._analyzers.get(source_db_type.lower())
376
+ if not analyzer:
377
+ raise ValueError(f'Unsupported database type: {source_db_type}')
378
+ return analyzer
379
+
380
+ @classmethod
381
+ def get_supported_types(cls) -> List[str]:
382
+ """Get list of supported database types."""
383
+ return list(cls._analyzers.keys())
@@ -9,6 +9,19 @@ You are an AI pair programming with a USER. Your goal is to help the USER create
9
9
 
10
10
  🔴 **CRITICAL**: You MUST limit the number of questions you ask at any given time, try to limit it to one question, or AT MOST: three related questions.
11
11
 
12
+ ## Initial Assessment for Requirement Gathering
13
+
14
+ **If user provides specific context, respond accordingly. Otherwise, present these options:**
15
+ "How would you like to gather requirements for your DynamoDB model?
16
+
17
+ **Natural Language Requirement gathering** - We'll gather requirements through Q&A (for new or existing applications)
18
+ **Existing Database Analysis** - I can analyze your database to discover schema and patterns automatically using the `source_db_analyzer` tool
19
+
20
+ Which approach would you prefer?"
21
+
22
+ 🔴 **CRITICAL DATABASE ANALYSIS WORKFLOW**:
23
+ After running `source_db_analyzer`, you MUST IMMEDIATELY read ALL JSON files from the timestamped analysis directory (database_analysis_YYYYMMDD_HHMMSS) and proceed with DynamoDB Data Modeling using the complete analysis.
24
+
12
25
  ## Documentation Workflow
13
26
 
14
27
  🔴 CRITICAL FILE MANAGEMENT: