atlan-application-sdk 0.1.1rc39__py3-none-any.whl → 0.1.1rc41__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. application_sdk/activities/.cursor/BUGBOT.md +424 -0
  2. application_sdk/activities/metadata_extraction/sql.py +400 -25
  3. application_sdk/application/__init__.py +2 -0
  4. application_sdk/application/metadata_extraction/sql.py +3 -0
  5. application_sdk/clients/.cursor/BUGBOT.md +280 -0
  6. application_sdk/clients/models.py +42 -0
  7. application_sdk/clients/sql.py +127 -87
  8. application_sdk/clients/temporal.py +3 -1
  9. application_sdk/common/.cursor/BUGBOT.md +316 -0
  10. application_sdk/common/aws_utils.py +259 -11
  11. application_sdk/common/utils.py +145 -9
  12. application_sdk/constants.py +8 -0
  13. application_sdk/decorators/.cursor/BUGBOT.md +279 -0
  14. application_sdk/handlers/__init__.py +8 -1
  15. application_sdk/handlers/sql.py +63 -22
  16. application_sdk/inputs/.cursor/BUGBOT.md +250 -0
  17. application_sdk/interceptors/.cursor/BUGBOT.md +320 -0
  18. application_sdk/interceptors/cleanup.py +171 -0
  19. application_sdk/interceptors/events.py +6 -6
  20. application_sdk/observability/decorators/observability_decorator.py +36 -22
  21. application_sdk/outputs/.cursor/BUGBOT.md +295 -0
  22. application_sdk/outputs/iceberg.py +4 -0
  23. application_sdk/outputs/json.py +6 -0
  24. application_sdk/outputs/parquet.py +13 -3
  25. application_sdk/server/.cursor/BUGBOT.md +442 -0
  26. application_sdk/server/fastapi/__init__.py +59 -3
  27. application_sdk/server/fastapi/models.py +27 -0
  28. application_sdk/services/objectstore.py +16 -3
  29. application_sdk/version.py +1 -1
  30. application_sdk/workflows/.cursor/BUGBOT.md +218 -0
  31. {atlan_application_sdk-0.1.1rc39.dist-info → atlan_application_sdk-0.1.1rc41.dist-info}/METADATA +1 -1
  32. {atlan_application_sdk-0.1.1rc39.dist-info → atlan_application_sdk-0.1.1rc41.dist-info}/RECORD +35 -24
  33. {atlan_application_sdk-0.1.1rc39.dist-info → atlan_application_sdk-0.1.1rc41.dist-info}/WHEEL +0 -0
  34. {atlan_application_sdk-0.1.1rc39.dist-info → atlan_application_sdk-0.1.1rc41.dist-info}/licenses/LICENSE +0 -0
  35. {atlan_application_sdk-0.1.1rc39.dist-info → atlan_application_sdk-0.1.1rc41.dist-info}/licenses/NOTICE +0 -0
@@ -0,0 +1,316 @@
1
+ # Common Code Review Guidelines - Shared Utilities and Constants
2
+
3
+ ## Context-Specific Patterns
4
+
5
+ This directory contains shared utilities, constants, error codes, and common functionality used across the SDK. Code here must be high-quality, well-tested, and designed for reuse.
6
+
7
+ ### Phase 1: Critical Common Code Safety Issues
8
+
9
+ **Constants Management:**
10
+
11
+ - **All magic strings and numbers must be moved to constants.py**: No hardcoded values scattered across the codebase
12
+ - **Centralized configuration**: Related constants should be grouped together with clear naming
13
+ - **Environment variable patterns**: Use consistent naming conventions for environment variables
14
+ - **Shared constant keys**: Constants used by multiple modules (like configuration keys) must be defined here
15
+
16
+ **Error Code Standardization:**
17
+
18
+ - **Use internal SDK error codes**: All custom exceptions should be defined in `error_codes.py`
19
+ - **Specific exception types**: No generic `Exception` or `ValueError` for SDK-specific errors
20
+ - **Error hierarchies**: Related errors should inherit from common base exceptions
21
+ - **Consistent error messages**: Similar errors should have consistent message formats
22
+
23
+ ```python
24
+ # ✅ DO: Proper constants and error management
25
+ # In constants.py
26
+ DISTRIBUTED_LOCK_CONFIG_KEY = "distributed_lock_config"
27
+ DEFAULT_LOCK_TTL_SECONDS = 300
28
+ DEFAULT_MAX_LOCKS = 10
29
+ REDIS_KEY_PREFIX = "application_sdk"
30
+
31
+ # Environment variable naming conventions
32
+ FAIL_WORKFLOW_ON_REDIS_UNAVAILABLE = os.getenv("FAIL_WORKFLOW_ON_REDIS_UNAVAILABLE", "false").lower() == "true"
33
+ DATABASE_TIMEOUT_SECONDS = int(os.getenv("DATABASE_TIMEOUT_SECONDS", "30"))
34
+
35
+ # In error_codes.py
36
+ class SDKError(Exception):
37
+ """Base exception for all SDK errors."""
38
+
39
+ class ClientError(SDKError):
40
+ """Errors related to client operations."""
41
+
42
+ class LockAcquisitionError(SDKError):
43
+ """Errors related to distributed lock operations."""
44
+
45
+ # ❌ REJECT: Scattered constants and generic errors
46
+ # Found across multiple files:
47
+ LOCK_TTL = 300 # In one file
48
+ DEFAULT_TIMEOUT = 300 # In another file
49
+ "distributed_lock" # Hardcoded string in various places
50
+
51
+ # Using generic exceptions:
52
+ raise Exception("Lock failed") # Should be LockAcquisitionError
53
+ raise ValueError("Invalid config") # Should be ConfigurationError
54
+ ```
55
+
56
+ ### Phase 2: Utility Architecture Patterns
57
+
58
+ **Utility Function Design:**
59
+
60
+ - **Single responsibility**: Each utility function should do exactly one thing
61
+ - **Pure functions**: Utilities should avoid side effects where possible
62
+ - **Type safety**: All utility functions must have comprehensive type hints
63
+ - **Error handling**: Utilities must handle edge cases gracefully
64
+ - **Documentation**: Complete docstrings with usage examples
65
+
66
+ **Code Reuse and DRY Principles:**
67
+
68
+ - **Extract repeated logic**: Common patterns across modules should become utility functions
69
+ - **Consolidate similar utilities**: Functions with overlapping purposes should be unified
70
+ - **Shared abstractions**: Common interface patterns should be abstracted into base classes
71
+ - **Configuration utilities**: Common configuration patterns should be centralized
72
+
73
+ ```python
74
+ # ✅ DO: Proper utility function design
75
+ def validate_environment_variable(
76
+ var_name: str,
77
+ default_value: str,
78
+ valid_values: Optional[List[str]] = None,
79
+ value_type: type = str
80
+ ) -> Any:
81
+ """
82
+ Validate and convert environment variable with comprehensive error handling.
83
+
84
+ Args:
85
+ var_name: Name of environment variable
86
+ default_value: Fallback value if not set
87
+ valid_values: List of allowed values (optional)
88
+ value_type: Expected type for conversion
89
+
90
+ Returns:
91
+ Validated and converted value
92
+
93
+ Raises:
94
+ ConfigurationError: If value is invalid or conversion fails
95
+
96
+ Example:
97
+ >>> timeout = validate_environment_variable(
98
+ ... "DB_TIMEOUT", "30", value_type=int
99
+ ... )
100
+ >>> mode = validate_environment_variable(
101
+ ... "LOG_LEVEL", "INFO", valid_values=["DEBUG", "INFO", "WARNING", "ERROR"]
102
+ ... )
103
+ """
104
+ raw_value = os.getenv(var_name, default_value)
105
+
106
+ try:
107
+ # Type conversion
108
+ if value_type == bool:
109
+ converted_value = raw_value.lower() in ('true', '1', 'yes', 'on')
110
+ elif value_type == int:
111
+ converted_value = int(raw_value)
112
+ elif value_type == float:
113
+ converted_value = float(raw_value)
114
+ else:
115
+ converted_value = raw_value
116
+
117
+ # Validation
118
+ if valid_values and converted_value not in valid_values:
119
+ raise ConfigurationError(
120
+ f"Invalid value for {var_name}: {raw_value}. "
121
+ f"Valid values: {valid_values}"
122
+ )
123
+
124
+ return converted_value
125
+
126
+ except (ValueError, TypeError) as e:
127
+ raise ConfigurationError(
128
+ f"Failed to convert {var_name}={raw_value} to {value_type.__name__}: {e}"
129
+ )
130
+
131
+ # ❌ REJECT: Poor utility design
132
+ def bad_get_config(name): # No type hints, no validation, no documentation
133
+ return os.getenv(name, "") # No defaults, no error handling
134
+ ```
135
+
136
+ ### Phase 3: Common Code Testing Requirements
137
+
138
+ **Utility Testing Standards:**
139
+
140
+ - **Comprehensive edge case testing**: Test all possible input combinations
141
+ - **Error condition testing**: Verify proper error handling for invalid inputs
142
+ - **Type safety testing**: Test with various input types to verify type hints
143
+ - **Integration testing**: Test utilities in context of actual usage
144
+ - **Performance testing**: Ensure utilities don't create performance bottlenecks
145
+
146
+ **Shared Code Quality:**
147
+
148
+ - All utility functions must have corresponding unit tests
149
+ - Test coverage must be >90% for common utilities
150
+ - Include property-based testing with hypothesis for complex utilities
151
+ - Mock external dependencies in utility tests
152
+ - Test thread safety for utilities used in concurrent contexts
153
+
154
+ ### Phase 4: Performance and Reusability
155
+
156
+ **Utility Performance:**
157
+
158
+ - **Caching for expensive operations**: Cache results of expensive utility calculations
159
+ - **Async where appropriate**: Use async for I/O utilities, sync for CPU-bound utilities
160
+ - **Memory efficiency**: Avoid creating unnecessary object copies in utilities
161
+ - **Algorithm efficiency**: Use appropriate data structures and algorithms
162
+
163
+ **Reusability Patterns:**
164
+
165
+ - **Generic implementations**: Write utilities that work for multiple use cases
166
+ - **Parameterizable behavior**: Allow customization through parameters, not hardcoded behavior
167
+ - **Composable utilities**: Design utilities that can be easily combined
168
+ - **Backwards compatibility**: Maintain API stability for widely-used utilities
169
+
170
+ ### Phase 5: Common Code Maintainability
171
+
172
+ **Documentation and Examples:**
173
+
174
+ - **Complete documentation**: All public utilities must have comprehensive docstrings
175
+ - **Usage examples**: Include realistic examples showing typical usage patterns
176
+ - **Performance characteristics**: Document time/space complexity for non-trivial utilities
177
+ - **Thread safety**: Document whether utilities are thread-safe
178
+ - **Version compatibility**: Document any version-specific behaviors
179
+
180
+ **Code Organization:**
181
+
182
+ - **Logical grouping**: Group related utilities in appropriately named modules
183
+ - **Consistent interfaces**: Similar utilities should have consistent parameter patterns
184
+ - **Clear abstractions**: Separate interface definitions from implementations
185
+ - **Dependency management**: Minimize dependencies in common utilities
186
+
187
+ ---
188
+
189
+ ## Common Code Anti-Patterns
190
+
191
+ **Always Reject:**
192
+
193
+ - **Scattered constants**: Magic numbers or strings not centralized in constants.py
194
+ - **Generic exceptions**: Using `Exception`, `ValueError`, or `RuntimeError` instead of SDK-specific errors
195
+ - **Duplicate utilities**: Multiple functions doing essentially the same thing
196
+ - **Poor error handling**: Utilities without proper exception handling
197
+ - **Missing validation**: Utilities that don't validate their inputs
198
+ - **Undocumented utilities**: Shared code without proper documentation
199
+
200
+ **Constants Management Anti-Patterns:**
201
+
202
+ ```python
203
+ # ❌ REJECT: Scattered constants across files
204
+ # In multiple different files:
205
+ LOCK_TTL = 300 # locks.py
206
+ DEFAULT_TIMEOUT = 300 # client.py
207
+ MAX_RETRIES = 3 # activities.py
208
+ "distributed_lock_config" # Hardcoded string in 5 different places
209
+
210
+ # ✅ REQUIRE: Centralized constants
211
+ # In constants.py only:
212
+ DEFAULT_LOCK_TTL_SECONDS = 300
213
+ DEFAULT_DATABASE_TIMEOUT_SECONDS = 300
214
+ DEFAULT_MAX_RETRY_ATTEMPTS = 3
215
+ DISTRIBUTED_LOCK_CONFIG_KEY = "distributed_lock_config"
216
+
217
+ # Other files import from constants:
218
+ from application_sdk.constants import DISTRIBUTED_LOCK_CONFIG_KEY, DEFAULT_LOCK_TTL_SECONDS
219
+ ```
220
+
221
+ **Error Handling Anti-Patterns:**
222
+
223
+ ```python
224
+ # ❌ REJECT: Generic error handling
225
+ def bad_utility_function(value: str) -> dict:
226
+ if not value:
227
+ raise ValueError("Invalid value") # Generic error
228
+
229
+ try:
230
+ result = process_value(value)
231
+ return result
232
+ except Exception as e:
233
+ raise Exception(f"Processing failed: {e}") # Generic error
234
+
235
+ # ✅ REQUIRE: SDK-specific error handling
236
+ from application_sdk.common.error_codes import ValidationError, ProcessingError
237
+
238
+ def good_utility_function(value: str) -> dict:
239
+ """Utility function with proper error handling."""
240
+
241
+ if not value or not value.strip():
242
+ raise ValidationError(f"Value cannot be empty or whitespace: '{value}'")
243
+
244
+ try:
245
+ result = process_value(value)
246
+ if not result:
247
+ raise ProcessingError(f"Processing returned empty result for value: '{value}'")
248
+ return result
249
+
250
+ except ProcessingError:
251
+ raise # Re-raise SDK errors
252
+ except Exception as e:
253
+ raise ProcessingError(f"Unexpected error processing '{value}': {e}")
254
+ ```
255
+
256
+ **Code Duplication Anti-Patterns:**
257
+
258
+ ```python
259
+ # ❌ REJECT: Repeated logic in multiple files
260
+ # Found in client.py:
261
+ def setup_database_connection(host, port, user, password):
262
+ connection_string = f"postgresql://{user}:{password}@{host}:{port}"
263
+ return create_connection(connection_string)
264
+
265
+ # Found in activities.py:
266
+ def create_db_connection(host, port, user, password):
267
+ conn_str = f"postgresql://{user}:{password}@{host}:{port}"
268
+ return establish_connection(conn_str)
269
+
270
+ # ✅ REQUIRE: Extracted shared utility
271
+ # In common/utils.py:
272
+ def build_database_connection_string(
273
+ host: str,
274
+ port: int,
275
+ username: str,
276
+ password: str,
277
+ database: Optional[str] = None,
278
+ ssl_mode: str = "require"
279
+ ) -> str:
280
+ """
281
+ Build a standardized database connection string.
282
+
283
+ Used consistently across all database clients and activities.
284
+ """
285
+ base_url = f"postgresql://{username}:{password}@{host}:{port}"
286
+ if database:
287
+ base_url += f"/{database}"
288
+
289
+ params = []
290
+ if ssl_mode:
291
+ params.append(f"sslmode={ssl_mode}")
292
+
293
+ if params:
294
+ base_url += "?" + "&".join(params)
295
+
296
+ return base_url
297
+
298
+ # Other modules import and use the shared utility:
299
+ from application_sdk.common.utils import build_database_connection_string
300
+ ```
301
+
302
+ ## Educational Context for Common Code Reviews
303
+
304
+ When reviewing common code, emphasize:
305
+
306
+ 1. **Consistency Impact**: "Centralized constants and utilities ensure consistency across the entire SDK. Scattered constants lead to inconsistencies and make global changes nearly impossible."
307
+
308
+ 2. **Maintainability Impact**: "Well-designed utilities reduce code duplication and make the codebase easier to maintain. Changes to common functionality only need to be made in one place."
309
+
310
+ 3. **Error Handling Impact**: "SDK-specific exceptions provide clearer error messages and enable better error handling throughout the application. Generic exceptions hide the root cause and make debugging difficult."
311
+
312
+ 4. **Reusability Impact**: "Properly designed common utilities can be reused across multiple contexts, reducing development time and ensuring consistent behavior."
313
+
314
+ 5. **Performance Impact**: "Shared utilities are called frequently throughout the application. Performance issues in common code have amplified impact across the entire system."
315
+
316
+ 6. **Testing Impact**: "Common utilities require especially thorough testing because they're used in many contexts. Bugs in utilities affect multiple parts of the system simultaneously."
@@ -1,4 +1,13 @@
1
+ import re
2
+ from typing import Any, Dict, Optional
3
+
4
+ import boto3
5
+ from sqlalchemy.engine.url import URL
6
+
1
7
  from application_sdk.constants import AWS_SESSION_NAME
8
+ from application_sdk.observability.logger_adaptor import get_logger
9
+
10
+ logger = get_logger(__name__)
2
11
 
3
12
 
4
13
  def get_region_name_from_hostname(hostname: str) -> str:
@@ -12,11 +21,14 @@ def get_region_name_from_hostname(hostname: str) -> str:
12
21
  Returns:
13
22
  str: AWS region name
14
23
  """
15
- parts = hostname.split(".")
16
- for part in parts:
17
- if part.startswith(("us-", "eu-", "ap-", "ca-", "me-", "sa-", "af-")):
18
- return part
19
- raise ValueError(f"Could not find valid AWS region in hostname: {hostname}")
24
+ match = re.search(r"\.([a-z]{2}-[a-z]+-\d)\.", hostname)
25
+ if match:
26
+ return match.group(1)
27
+ # Some services may use - instead of . (rare)
28
+ match = re.search(r"-([a-z]{2}-[a-z]+-\d)\.", hostname)
29
+ if match:
30
+ return match.group(1)
31
+ raise ValueError("Could not find valid AWS region from hostname")
20
32
 
21
33
 
22
34
  def generate_aws_rds_token_with_iam_role(
@@ -55,12 +67,10 @@ def generate_aws_rds_token_with_iam_role(
55
67
  )
56
68
 
57
69
  credentials = assumed_role["Credentials"]
58
- aws_client = client(
59
- "rds",
60
- aws_access_key_id=credentials["AccessKeyId"],
61
- aws_secret_access_key=credentials["SecretAccessKey"],
62
- aws_session_token=credentials["SessionToken"],
63
- region_name=region or get_region_name_from_hostname(host),
70
+ aws_client = create_aws_client(
71
+ service="rds",
72
+ region=region or get_region_name_from_hostname(host),
73
+ temp_credentials=credentials,
64
74
  )
65
75
  token: str = aws_client.generate_db_auth_token(
66
76
  DBHostname=host, Port=port, DBUsername=user
@@ -107,3 +117,241 @@ def generate_aws_rds_token_with_iam_user(
107
117
  return token
108
118
  except Exception as e:
109
119
  raise Exception(f"Failed to get user credentials: {str(e)}")
120
+
121
+
122
+ def get_cluster_identifier(aws_client) -> Optional[str]:
123
+ """
124
+ Retrieve the cluster identifier from AWS Redshift clusters.
125
+
126
+ Args:
127
+ aws_client: Boto3 Redshift client instance
128
+
129
+ Returns:
130
+ str: The cluster identifier
131
+
132
+ Raises:
133
+ RuntimeError: If no clusters are found
134
+ """
135
+ clusters = aws_client.describe_clusters()
136
+
137
+ for cluster in clusters["Clusters"]:
138
+ cluster_identifier = cluster.get("ClusterIdentifier")
139
+ if cluster_identifier:
140
+ # Optionally, you can add logic to filter clusters if needed
141
+ # we are reading first clusters ID if not provided
142
+ return cluster_identifier # Just return the string
143
+ return None
144
+
145
+
146
+ def create_aws_session(credentials: Dict[str, Any]) -> boto3.Session:
147
+ """
148
+ Create a boto3 session with AWS credentials.
149
+
150
+ Args:
151
+ credentials: Dictionary containing AWS credentials
152
+
153
+ Returns:
154
+ boto3.Session: Configured boto3 session
155
+ """
156
+ aws_access_key_id = credentials.get("aws_access_key_id") or credentials.get(
157
+ "username"
158
+ )
159
+ aws_secret_access_key = credentials.get("aws_secret_access_key") or credentials.get(
160
+ "password"
161
+ )
162
+
163
+ return boto3.Session(
164
+ aws_access_key_id=aws_access_key_id,
165
+ aws_secret_access_key=aws_secret_access_key,
166
+ )
167
+
168
+
169
+ def get_cluster_credentials(
170
+ aws_client, credentials: Dict[str, Any], extra: Dict[str, Any]
171
+ ) -> Dict[str, str]:
172
+ """
173
+ Retrieve cluster credentials using IAM authentication.
174
+
175
+ Args:
176
+ aws_client: Boto3 Redshift client instance
177
+ credentials: Dictionary containing connection credentials
178
+
179
+ Returns:
180
+ Dict[str, str]: Dictionary containing DbUser and DbPassword
181
+ """
182
+ database = extra["database"]
183
+ cluster_identifier = credentials.get("cluster_id") or get_cluster_identifier(
184
+ aws_client
185
+ )
186
+ return aws_client.get_cluster_credentials_with_iam(
187
+ DbName=database,
188
+ ClusterIdentifier=cluster_identifier,
189
+ )
190
+
191
+
192
+ def create_aws_client(
193
+ service: str,
194
+ region: str,
195
+ session: Optional[boto3.Session] = None,
196
+ temp_credentials: Optional[Dict[str, str]] = None,
197
+ use_default_credentials: bool = False,
198
+ ) -> Any:
199
+ """
200
+ Create an AWS client with flexible credential options.
201
+
202
+ Args:
203
+ service: AWS service name (e.g., 'redshift', 'redshift-serverless', 'sts', 'rds')
204
+ region: AWS region name
205
+ session: Optional boto3 session instance. If provided, uses session credentials
206
+ temp_credentials: Optional dictionary containing temporary credentials from assume_role.
207
+ Must contain 'AccessKeyId', 'SecretAccessKey', and 'SessionToken'
208
+ use_default_credentials: If True, uses default AWS credentials (environment, IAM role, etc.)
209
+ This is the fallback if no other credentials are provided
210
+
211
+ Returns:
212
+ AWS client instance
213
+
214
+ Raises:
215
+ ValueError: If invalid credential combination is provided
216
+ Exception: If client creation fails
217
+
218
+ Examples:
219
+ Using temporary credentials::
220
+
221
+ client = create_aws_client(
222
+ service="redshift",
223
+ region="us-east-1",
224
+ temp_credentials={
225
+ "AccessKeyId": "AKIA...",
226
+ "SecretAccessKey": "...",
227
+ "SessionToken": "..."
228
+ }
229
+ )
230
+
231
+ Using a session::
232
+
233
+ session = boto3.Session(profile_name="my-profile")
234
+ client = create_aws_client(
235
+ service="rds",
236
+ region="us-west-2",
237
+ session=session
238
+ )
239
+
240
+ Using default credentials::
241
+
242
+ client = create_aws_client(
243
+ service="sts",
244
+ region="us-east-1",
245
+ use_default_credentials=True
246
+ )
247
+ """
248
+ # Validate credential options
249
+ credential_sources = sum(
250
+ [session is not None, temp_credentials is not None, use_default_credentials]
251
+ )
252
+
253
+ if credential_sources == 0:
254
+ raise ValueError("At least one credential source must be provided")
255
+ if credential_sources > 1:
256
+ raise ValueError("Only one credential source should be provided at a time")
257
+
258
+ try:
259
+ # Priority 1: Use provided session
260
+ if session is not None:
261
+ logger.debug(
262
+ f"Creating {service} client using provided session in region {region}"
263
+ )
264
+ return session.client(service, region_name=region) # type: ignore
265
+
266
+ # Priority 2: Use temporary credentials
267
+ if temp_credentials is not None:
268
+ logger.debug(
269
+ f"Creating {service} client using temporary credentials in region {region}"
270
+ )
271
+ return boto3.client( # type: ignore
272
+ service,
273
+ aws_access_key_id=temp_credentials["AccessKeyId"],
274
+ aws_secret_access_key=temp_credentials["SecretAccessKey"],
275
+ aws_session_token=temp_credentials["SessionToken"],
276
+ region_name=region,
277
+ )
278
+
279
+ # Priority 3: Use default credentials
280
+ if use_default_credentials:
281
+ logger.debug(
282
+ f"Creating {service} client using default credentials in region {region}"
283
+ )
284
+ return boto3.client(service, region_name=region) # type: ignore
285
+
286
+ except Exception as e:
287
+ logger.error(f"Failed to create {service} client in region {region}: {e}")
288
+ raise Exception(f"Failed to create {service} client: {str(e)}")
289
+
290
+
291
+ def create_engine_url(
292
+ drivername: str,
293
+ credentials: Dict[str, Any],
294
+ cluster_credentials: Dict[str, str],
295
+ extra: Dict[str, Any],
296
+ ) -> URL:
297
+ """
298
+ Create SQLAlchemy engine URL for Redshift connection.
299
+
300
+ Args:
301
+ credentials: Dictionary containing connection credentials
302
+ cluster_credentials: Dictionary containing DbUser and DbPassword
303
+
304
+ Returns:
305
+ URL: SQLAlchemy engine URL
306
+ """
307
+ host = credentials["host"]
308
+ port = credentials.get("port")
309
+ database = extra["database"]
310
+
311
+ return URL.create(
312
+ drivername=drivername,
313
+ username=cluster_credentials["DbUser"],
314
+ password=cluster_credentials["DbPassword"],
315
+ host=host,
316
+ port=port,
317
+ database=database,
318
+ )
319
+
320
+
321
+ def get_all_aws_regions() -> list[str]:
322
+ """
323
+ Get all available AWS regions dynamically using EC2 describe_regions API.
324
+ Returns:
325
+ list[str]: List of all AWS region names
326
+ Raises:
327
+ Exception: If unable to retrieve regions from AWS
328
+ """
329
+ try:
330
+ # Use us-east-1 as the default region for the EC2 client since it's always available
331
+ ec2_client = boto3.client("ec2", region_name="us-east-1")
332
+ response = ec2_client.describe_regions()
333
+ regions = [region["RegionName"] for region in response["Regions"]]
334
+ return sorted(regions) # Sort for consistent ordering
335
+ except Exception as e:
336
+ # Fallback to a comprehensive hardcoded list if API call fails
337
+ logger.warning(
338
+ f"Failed to retrieve AWS regions dynamically: {e}. Using fallback list."
339
+ )
340
+ return [
341
+ "ap-northeast-1",
342
+ "ap-south-1",
343
+ "ap-southeast-1",
344
+ "ap-southeast-2",
345
+ "aws-global",
346
+ "ca-central-1",
347
+ "eu-central-1",
348
+ "eu-north-1",
349
+ "eu-west-1",
350
+ "eu-west-2",
351
+ "eu-west-3",
352
+ "sa-east-1",
353
+ "us-east-1",
354
+ "us-east-2",
355
+ "us-west-1",
356
+ "us-west-2",
357
+ ]