awslabs.dynamodb-mcp-server 2.0.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (27) hide show
  1. awslabs/__init__.py +17 -0
  2. awslabs/dynamodb_mcp_server/__init__.py +17 -0
  3. awslabs/dynamodb_mcp_server/cdk_generator/__init__.py +19 -0
  4. awslabs/dynamodb_mcp_server/cdk_generator/generator.py +276 -0
  5. awslabs/dynamodb_mcp_server/cdk_generator/models.py +521 -0
  6. awslabs/dynamodb_mcp_server/cdk_generator/templates/README.md +57 -0
  7. awslabs/dynamodb_mcp_server/cdk_generator/templates/stack.ts.j2 +70 -0
  8. awslabs/dynamodb_mcp_server/common.py +94 -0
  9. awslabs/dynamodb_mcp_server/db_analyzer/__init__.py +30 -0
  10. awslabs/dynamodb_mcp_server/db_analyzer/analyzer_utils.py +394 -0
  11. awslabs/dynamodb_mcp_server/db_analyzer/base_plugin.py +355 -0
  12. awslabs/dynamodb_mcp_server/db_analyzer/mysql.py +450 -0
  13. awslabs/dynamodb_mcp_server/db_analyzer/plugin_registry.py +73 -0
  14. awslabs/dynamodb_mcp_server/db_analyzer/postgresql.py +215 -0
  15. awslabs/dynamodb_mcp_server/db_analyzer/sqlserver.py +255 -0
  16. awslabs/dynamodb_mcp_server/markdown_formatter.py +513 -0
  17. awslabs/dynamodb_mcp_server/model_validation_utils.py +845 -0
  18. awslabs/dynamodb_mcp_server/prompts/dynamodb_architect.md +851 -0
  19. awslabs/dynamodb_mcp_server/prompts/json_generation_guide.md +185 -0
  20. awslabs/dynamodb_mcp_server/prompts/transform_model_validation_result.md +168 -0
  21. awslabs/dynamodb_mcp_server/server.py +524 -0
  22. awslabs_dynamodb_mcp_server-2.0.10.dist-info/METADATA +306 -0
  23. awslabs_dynamodb_mcp_server-2.0.10.dist-info/RECORD +27 -0
  24. awslabs_dynamodb_mcp_server-2.0.10.dist-info/WHEEL +4 -0
  25. awslabs_dynamodb_mcp_server-2.0.10.dist-info/entry_points.txt +2 -0
  26. awslabs_dynamodb_mcp_server-2.0.10.dist-info/licenses/LICENSE +175 -0
  27. awslabs_dynamodb_mcp_server-2.0.10.dist-info/licenses/NOTICE +2 -0
@@ -0,0 +1,94 @@
1
+ # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ import os
16
+ import re
17
+ from functools import wraps
18
+ from typing import Callable
19
+
20
+
21
+ def validate_database_name(database_name: str) -> None:
22
+ """Validate database name.
23
+
24
+ Args:
25
+ database_name: The database name to validate
26
+
27
+ Raises:
28
+ ValueError: If the database name contains invalid characters or exceeds length limit
29
+ """
30
+ # Max identifier length: SQL Server=128, MySQL=64, PostgreSQL=63
31
+ # Use 128 as upper bound; each database will enforce its own limit
32
+ MAX_DB_NAME_LENGTH = 128
33
+
34
+ if len(database_name) > MAX_DB_NAME_LENGTH:
35
+ raise ValueError(
36
+ f'Invalid database name: {database_name}. '
37
+ f'Database name must not exceed {MAX_DB_NAME_LENGTH} characters.'
38
+ )
39
+
40
+ if not re.match(r'^[a-zA-Z0-9_.$-]+$', database_name):
41
+ raise ValueError(
42
+ f'Invalid database name: {database_name}. '
43
+ 'Only alphanumeric characters, underscores, periods, dollar signs, and hyphens are allowed.'
44
+ )
45
+
46
+
47
+ def validate_path_within_directory(
48
+ file_path: str, base_dir: str, path_description: str = 'file path'
49
+ ) -> str:
50
+ """Validate that a resolved path is within the base directory.
51
+
52
+ Args:
53
+ file_path: The file path to validate (can be relative or absolute)
54
+ base_dir: The base directory that the file must be within
55
+ path_description: Description of the path for error messages (e.g., "query output file")
56
+
57
+ Returns:
58
+ The canonical absolute path if validation succeeds
59
+
60
+ Raises:
61
+ ValueError: If the path resolves outside the base directory
62
+ """
63
+ real_base = os.path.normpath(os.path.realpath(base_dir))
64
+ real_file = os.path.normpath(os.path.realpath(file_path))
65
+
66
+ if not (real_file.startswith(real_base + os.sep) or real_file == real_base):
67
+ raise ValueError(
68
+ f'Path traversal detected: {path_description} resolves outside {base_dir}'
69
+ )
70
+
71
+ return real_file
72
+
73
+
74
+ def handle_exceptions(func: Callable) -> Callable:
75
+ """Decorator to handle exceptions in DynamoDB operations.
76
+
77
+ Wraps the function in a try-catch block and returns any exceptions
78
+ in a standardized error format.
79
+
80
+ Args:
81
+ func: The function to wrap
82
+
83
+ Returns:
84
+ The wrapped function that handles exceptions
85
+ """
86
+
87
+ @wraps(func)
88
+ async def wrapper(*args, **kwargs):
89
+ try:
90
+ return await func(*args, **kwargs)
91
+ except Exception as e:
92
+ return {'error': str(e)}
93
+
94
+ return wrapper
@@ -0,0 +1,30 @@
1
+ # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ """Database analyzer plugins package."""
16
+
17
+ from awslabs.dynamodb_mcp_server.db_analyzer.base_plugin import DatabasePlugin
18
+ from awslabs.dynamodb_mcp_server.db_analyzer.mysql import MySQLPlugin
19
+ from awslabs.dynamodb_mcp_server.db_analyzer.plugin_registry import PluginRegistry
20
+ from awslabs.dynamodb_mcp_server.db_analyzer.postgresql import PostgreSQLPlugin
21
+ from awslabs.dynamodb_mcp_server.db_analyzer.sqlserver import SQLServerPlugin
22
+
23
+
24
+ __all__ = [
25
+ 'DatabasePlugin',
26
+ 'MySQLPlugin',
27
+ 'PostgreSQLPlugin',
28
+ 'SQLServerPlugin',
29
+ 'PluginRegistry',
30
+ ]
@@ -0,0 +1,394 @@
1
+ # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ """Utility functions for source database analyzer."""
16
+
17
+ import os
18
+ from awslabs.dynamodb_mcp_server.common import validate_path_within_directory
19
+ from awslabs.dynamodb_mcp_server.db_analyzer.base_plugin import DatabasePlugin
20
+ from awslabs.dynamodb_mcp_server.markdown_formatter import MarkdownFormatter
21
+ from datetime import datetime
22
+ from loguru import logger
23
+ from typing import Any, Dict, List, Tuple
24
+
25
+
26
+ DEFAULT_ANALYSIS_DAYS = 30
27
+ DEFAULT_MAX_QUERY_RESULTS = 500
28
+
29
+
30
+ def resolve_and_validate_path(file_path: str, base_dir: str, path_type: str) -> str:
31
+ """Resolve and validate file path within base directory."""
32
+ if not os.path.isabs(file_path):
33
+ resolved = os.path.join(base_dir, file_path.lstrip('./'))
34
+ else:
35
+ resolved = file_path
36
+ return validate_path_within_directory(resolved, base_dir, path_type)
37
+
38
+
39
+ DEFAULT_MYSQL_PORT = 3306
40
+
41
+
42
+ def build_connection_params(source_db_type: str, **kwargs) -> Dict[str, Any]:
43
+ """Build connection parameters for database analysis.
44
+
45
+ Args:
46
+ source_db_type: Type of source database (e.g., 'mysql')
47
+ **kwargs: Connection parameters (aws_cluster_arn, aws_secret_arn, hostname, port, etc.)
48
+
49
+ Returns:
50
+ Dictionary of connection parameters
51
+
52
+ Raises:
53
+ ValueError: If database type is not supported
54
+ """
55
+ if source_db_type == 'mysql':
56
+ user_provided_dir = kwargs.get('output_dir')
57
+
58
+ # Validate user-provided directory
59
+ if not os.path.isabs(user_provided_dir):
60
+ raise ValueError(f'Output directory must be an absolute path: {user_provided_dir}')
61
+ if not os.path.isdir(user_provided_dir) or not os.access(user_provided_dir, os.W_OK):
62
+ raise ValueError(
63
+ f'Output directory does not exist or is not writable: {user_provided_dir}'
64
+ )
65
+ output_dir = user_provided_dir
66
+
67
+ # Validate port parameter
68
+ port_value = kwargs.get('port') or os.getenv('MYSQL_PORT', str(DEFAULT_MYSQL_PORT))
69
+ port = int(port_value) if str(port_value).isdigit() else DEFAULT_MYSQL_PORT
70
+
71
+ # Determine connection method
72
+ # Priority: explicit args > env vars, and cluster_arn > hostname within each level
73
+ cluster_arn = kwargs.get('aws_cluster_arn')
74
+ hostname = kwargs.get('hostname')
75
+
76
+ if cluster_arn:
77
+ # Explicit cluster_arn - use RDS Data API-based access
78
+ hostname = None
79
+ elif hostname:
80
+ # Explicit hostname - use connection-based access
81
+ cluster_arn = None
82
+ else:
83
+ # Fall back to env vars with same precedence
84
+ cluster_arn = os.getenv('MYSQL_CLUSTER_ARN')
85
+ hostname = os.getenv('MYSQL_HOSTNAME') if not cluster_arn else None
86
+
87
+ return {
88
+ 'cluster_arn': cluster_arn,
89
+ 'secret_arn': kwargs.get('aws_secret_arn') or os.getenv('MYSQL_SECRET_ARN'),
90
+ 'database': kwargs.get('database_name') or os.getenv('MYSQL_DATABASE'),
91
+ 'region': kwargs.get('aws_region') or os.getenv('AWS_REGION'),
92
+ 'hostname': hostname,
93
+ 'port': port,
94
+ 'max_results': kwargs.get('max_query_results')
95
+ or int(os.getenv('MYSQL_MAX_QUERY_RESULTS', str(DEFAULT_MAX_QUERY_RESULTS))),
96
+ 'pattern_analysis_days': kwargs.get('pattern_analysis_days', DEFAULT_ANALYSIS_DAYS),
97
+ 'output_dir': output_dir,
98
+ }
99
+ raise ValueError(f'Unsupported database type: {source_db_type}')
100
+
101
+
102
+ def validate_connection_params(
103
+ source_db_type: str, connection_params: Dict[str, Any]
104
+ ) -> Tuple[List[str], Dict[str, str]]:
105
+ """Validate connection parameters for database type.
106
+
107
+ Args:
108
+ source_db_type: Type of source database
109
+ connection_params: Dictionary of connection parameters
110
+
111
+ Returns:
112
+ Tuple of (missing_params, param_descriptions)
113
+ """
114
+ if source_db_type == 'mysql':
115
+ missing_params = []
116
+ param_descriptions = {}
117
+ cluster_arn = connection_params.get('cluster_arn')
118
+ hostname = connection_params.get('hostname')
119
+
120
+ # Check for either RDS Data API-based or connection-based access
121
+ has_rds_data_api = bool(isinstance(cluster_arn, str) and cluster_arn.strip())
122
+ has_connection_based = bool(isinstance(hostname, str) and hostname.strip())
123
+
124
+ # Check that we have a connection method
125
+ if not has_rds_data_api and not has_connection_based:
126
+ missing_params.append('cluster_arn OR hostname')
127
+ param_descriptions['cluster_arn OR hostname'] = (
128
+ 'Required: Either aws_cluster_arn (for RDS Data API-based access) '
129
+ 'OR hostname (for connection-based access)'
130
+ )
131
+
132
+ # Check common required parameters
133
+ common_required_params = ['secret_arn', 'database', 'region']
134
+ for param in common_required_params:
135
+ if not connection_params.get(param) or (
136
+ isinstance(connection_params[param], str)
137
+ and connection_params[param].strip() == ''
138
+ ):
139
+ missing_params.append(param)
140
+ param_descriptions.update(
141
+ {
142
+ 'secret_arn': 'Secrets Manager secret ARN containing DB credentials', # pragma: allowlist secret
143
+ 'database': 'Database name to analyze',
144
+ 'region': 'AWS region where your database instance and Secrets Manager are located',
145
+ }
146
+ )
147
+ return missing_params, param_descriptions
148
+ return [], {}
149
+
150
+
151
+ def save_analysis_files(
152
+ results: Dict[str, Any],
153
+ source_db_type: str,
154
+ database: str,
155
+ pattern_analysis_days: int,
156
+ max_results: int,
157
+ output_dir: str,
158
+ plugin: DatabasePlugin,
159
+ performance_enabled: bool = True,
160
+ skipped_queries: List[str] = None,
161
+ ) -> Tuple[List[str], List[str]]:
162
+ """Save analysis results to Markdown files using MarkdownFormatter.
163
+
164
+ Args:
165
+ results: Dictionary of query results
166
+ source_db_type: Type of source database
167
+ database: Database name
168
+ pattern_analysis_days: Number of days to analyze the logs for pattern analysis query
169
+ max_results: Maximum results per query
170
+ output_dir: Absolute directory path where the timestamped output folder will be created
171
+ plugin: DatabasePlugin instance for getting query definitions (REQUIRED)
172
+ performance_enabled: Whether performance schema is enabled
173
+ skipped_queries: List of query names that were skipped during analysis
174
+
175
+ Returns:
176
+ Tuple of (saved_files, save_errors)
177
+ """
178
+ if plugin is None:
179
+ raise ValueError('plugin parameter is required and cannot be None')
180
+
181
+ saved_files = []
182
+ save_errors = []
183
+
184
+ logger.info(f'save_analysis_files called with {len(results) if results else 0} results')
185
+
186
+ if not results:
187
+ logger.warning('No results to save - returning empty lists')
188
+ return saved_files, save_errors
189
+
190
+ timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
191
+ analysis_folder = os.path.join(output_dir, f'database_analysis_{timestamp}')
192
+ logger.info(f'Creating analysis folder: {analysis_folder}')
193
+
194
+ try:
195
+ os.makedirs(analysis_folder, exist_ok=True)
196
+ logger.info(f'Created folder at: {analysis_folder}')
197
+ except OSError as e:
198
+ logger.error(f'Failed to create analysis folder: {str(e)}')
199
+ save_errors.append(f'Failed to create folder {analysis_folder}: {str(e)}')
200
+ return saved_files, save_errors
201
+
202
+ # Prepare metadata for MarkdownFormatter
203
+ metadata = {
204
+ 'database': database,
205
+ 'source_db_type': source_db_type,
206
+ 'analysis_period': f'{pattern_analysis_days} days',
207
+ 'max_query_results': max_results,
208
+ 'performance_enabled': performance_enabled,
209
+ 'skipped_queries': skipped_queries or [],
210
+ }
211
+
212
+ # Use MarkdownFormatter to generate files
213
+ try:
214
+ formatter = MarkdownFormatter(results, metadata, analysis_folder, plugin=plugin)
215
+ generated_files, generation_errors = formatter.generate_all_files()
216
+ saved_files = generated_files
217
+
218
+ # Convert error tuples to error strings
219
+ if generation_errors:
220
+ for query_name, error_msg in generation_errors:
221
+ save_errors.append(f'{query_name}: {error_msg}')
222
+
223
+ logger.info(
224
+ f'Successfully generated {len(saved_files)} Markdown files with {len(save_errors)} errors'
225
+ )
226
+ except Exception as e:
227
+ logger.error(f'Failed to generate Markdown files: {str(e)}')
228
+ save_errors.append(f'Failed to generate Markdown files: {str(e)}')
229
+
230
+ return saved_files, save_errors
231
+
232
+
233
+ def generate_query_file(
234
+ plugin,
235
+ database_name: str,
236
+ max_results: int,
237
+ query_output_file: str,
238
+ output_dir: str,
239
+ source_db_type: str,
240
+ ) -> str:
241
+ """Generate SQL query file for self-service mode."""
242
+ if not database_name:
243
+ return 'database_name is required for self-service mode to generate queries.'
244
+
245
+ resolved_query_file = resolve_and_validate_path(
246
+ query_output_file, output_dir, 'query output file'
247
+ )
248
+
249
+ query_dir = os.path.dirname(resolved_query_file)
250
+ if query_dir and not os.path.exists(query_dir):
251
+ os.makedirs(query_dir, exist_ok=True)
252
+
253
+ output_file = plugin.write_queries_to_file(database_name, max_results, resolved_query_file)
254
+
255
+ return f"""SQL queries have been written to: {output_file}
256
+
257
+ Next Steps:
258
+ 1. Run these queries against your {source_db_type} database
259
+ 2. Save the results to a text file (pipe-separated format)
260
+ 3. Call this tool again with:
261
+ - execution_mode='self_service'
262
+ - result_input_file='<path_to_your_results_file>'
263
+ - Same database_name and output_dir
264
+
265
+ Example commands:
266
+ - MySQL: mysql -u user -p -D {database_name} --table < {output_file} > results.txt
267
+ - PostgreSQL: psql -d {database_name} -f {output_file} > results.txt
268
+ - SQL Server: sqlcmd -d {database_name} -i {output_file} -o results.txt
269
+
270
+ IMPORTANT for MySQL: The --table flag is required to produce pipe-separated output that can be parsed correctly.
271
+
272
+ After running queries, provide the results file path to continue analysis."""
273
+
274
+
275
+ def parse_results_and_generate_analysis(
276
+ plugin,
277
+ result_input_file: str,
278
+ output_dir: str,
279
+ database_name: str,
280
+ pattern_analysis_days: int,
281
+ max_results: int,
282
+ source_db_type: str,
283
+ ) -> str:
284
+ """Parse query results and generate analysis files."""
285
+ resolved_result_file = validate_path_within_directory(
286
+ result_input_file, output_dir, 'result input file'
287
+ )
288
+ if not os.path.exists(resolved_result_file):
289
+ raise FileNotFoundError(f'Result file not found: {resolved_result_file}')
290
+
291
+ logger.info(f'Parsing query results from: {resolved_result_file}')
292
+ results = plugin.parse_results_from_file(resolved_result_file)
293
+
294
+ if not results:
295
+ return f'No query results found in file: {resolved_result_file}. Please check the file format.'
296
+
297
+ saved_files, save_errors = save_analysis_files(
298
+ results,
299
+ source_db_type,
300
+ database_name,
301
+ pattern_analysis_days or 30,
302
+ max_results,
303
+ output_dir,
304
+ plugin,
305
+ performance_enabled=True,
306
+ skipped_queries=[],
307
+ )
308
+
309
+ return build_analysis_report(
310
+ saved_files, save_errors, database_name, result_input_file, is_self_service=True
311
+ )
312
+
313
+
314
+ async def execute_managed_analysis(plugin, connection_params: dict, source_db_type: str) -> str:
315
+ """Execute managed mode analysis via AWS RDS Data API."""
316
+ analysis_result = await plugin.execute_managed_mode(connection_params)
317
+
318
+ saved_files, save_errors = save_analysis_files(
319
+ analysis_result['results'],
320
+ source_db_type,
321
+ connection_params.get('database'),
322
+ connection_params.get('pattern_analysis_days'),
323
+ connection_params.get('max_results'),
324
+ connection_params.get('output_dir'),
325
+ plugin,
326
+ analysis_result.get('performance_enabled', True),
327
+ analysis_result.get('skipped_queries', []),
328
+ )
329
+
330
+ if analysis_result['results']:
331
+ return build_analysis_report(
332
+ saved_files,
333
+ save_errors,
334
+ connection_params.get('database'),
335
+ None,
336
+ is_self_service=False,
337
+ analysis_period=connection_params.get('pattern_analysis_days'),
338
+ )
339
+ else:
340
+ return build_failure_report(analysis_result['errors'])
341
+
342
+
343
+ def build_analysis_report(
344
+ saved_files: list,
345
+ save_errors: list,
346
+ database_name: str,
347
+ source_file: str = None,
348
+ is_self_service: bool = False,
349
+ analysis_period: int = None,
350
+ ) -> str:
351
+ """Build analysis completion report."""
352
+ mode = 'Self-Service Mode' if is_self_service else 'Managed Mode'
353
+ report = [f'Database Analysis Complete ({mode})', '']
354
+
355
+ summary = ['Summary:', f'- Database: {database_name}']
356
+ if source_file:
357
+ summary.append(f'- Source: {source_file}')
358
+ if analysis_period:
359
+ summary.append(f'- Analysis Period: {analysis_period} days')
360
+ summary.extend(
361
+ ['**CRITICAL: Read ALL Analysis Files**', '', 'Follow these steps IN ORDER:', '']
362
+ )
363
+ report.extend(summary)
364
+
365
+ workflow = [
366
+ '1. Read manifest.md from the timestamped analysis directory',
367
+ ' - Lists all generated analysis files by category',
368
+ '',
369
+ '2. Read EVERY file listed in the manifest',
370
+ ' - Each file contains critical information for data modeling',
371
+ '',
372
+ '3. After reading all files, use dynamodb_data_modeling tool',
373
+ ' - Extract entities and relationships from schema files',
374
+ ' - Identify access patterns from performance files',
375
+ ' - Document findings in dynamodb_requirement.md',
376
+ ]
377
+ report.extend(workflow)
378
+
379
+ if saved_files:
380
+ report.extend(['', 'Generated Analysis Files (Read All):'])
381
+ report.extend(f'- {f}' for f in saved_files)
382
+
383
+ if save_errors:
384
+ report.extend(['', 'File Save Errors:'])
385
+ report.extend(f'- {e}' for e in save_errors)
386
+
387
+ return '\n'.join(report)
388
+
389
+
390
+ def build_failure_report(errors: list) -> str:
391
+ """Build failure report when all queries fail."""
392
+ return f'Database Analysis Failed\n\nAll {len(errors)} queries failed:\n' + '\n'.join(
393
+ f'{i}. {error}' for i, error in enumerate(errors, 1)
394
+ )