atlan-application-sdk 0.1.1rc38__py3-none-any.whl → 0.1.1rc40__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- application_sdk/activities/.cursor/BUGBOT.md +424 -0
- application_sdk/clients/.cursor/BUGBOT.md +280 -0
- application_sdk/clients/sql.py +110 -74
- application_sdk/clients/temporal.py +4 -2
- application_sdk/common/.cursor/BUGBOT.md +316 -0
- application_sdk/constants.py +8 -0
- application_sdk/decorators/.cursor/BUGBOT.md +279 -0
- application_sdk/inputs/.cursor/BUGBOT.md +250 -0
- application_sdk/interceptors/.cursor/BUGBOT.md +320 -0
- application_sdk/interceptors/cleanup.py +171 -0
- application_sdk/interceptors/events.py +6 -6
- application_sdk/outputs/.cursor/BUGBOT.md +295 -0
- application_sdk/outputs/iceberg.py +4 -0
- application_sdk/outputs/json.py +6 -0
- application_sdk/outputs/parquet.py +89 -34
- application_sdk/server/.cursor/BUGBOT.md +442 -0
- application_sdk/services/objectstore.py +98 -20
- application_sdk/version.py +1 -1
- application_sdk/workflows/.cursor/BUGBOT.md +218 -0
- {atlan_application_sdk-0.1.1rc38.dist-info → atlan_application_sdk-0.1.1rc40.dist-info}/METADATA +1 -1
- {atlan_application_sdk-0.1.1rc38.dist-info → atlan_application_sdk-0.1.1rc40.dist-info}/RECORD +24 -14
- {atlan_application_sdk-0.1.1rc38.dist-info → atlan_application_sdk-0.1.1rc40.dist-info}/WHEEL +0 -0
- {atlan_application_sdk-0.1.1rc38.dist-info → atlan_application_sdk-0.1.1rc40.dist-info}/licenses/LICENSE +0 -0
- {atlan_application_sdk-0.1.1rc38.dist-info → atlan_application_sdk-0.1.1rc40.dist-info}/licenses/NOTICE +0 -0
application_sdk/clients/sql.py
CHANGED
|
@@ -71,25 +71,36 @@ class BaseSQLClient(ClientInterface):
|
|
|
71
71
|
self.sql_alchemy_connect_args = sql_alchemy_connect_args
|
|
72
72
|
|
|
73
73
|
async def load(self, credentials: Dict[str, Any]) -> None:
|
|
74
|
-
"""Load and
|
|
74
|
+
"""Load credentials and prepare engine for lazy connections.
|
|
75
|
+
|
|
76
|
+
This method now only stores credentials and creates the engine without
|
|
77
|
+
establishing a persistent connection. Connections are created on-demand.
|
|
75
78
|
|
|
76
79
|
Args:
|
|
77
80
|
credentials (Dict[str, Any]): Database connection credentials.
|
|
78
81
|
|
|
79
82
|
Raises:
|
|
80
|
-
ClientError: If
|
|
83
|
+
ClientError: If credentials are invalid or engine creation fails
|
|
81
84
|
"""
|
|
82
85
|
self.credentials = credentials # Update the instance credentials
|
|
83
86
|
try:
|
|
84
87
|
from sqlalchemy import create_engine
|
|
85
88
|
|
|
89
|
+
# Create engine but no persistent connection
|
|
86
90
|
self.engine = create_engine(
|
|
87
91
|
self.get_sqlalchemy_connection_string(),
|
|
88
92
|
connect_args=self.sql_alchemy_connect_args,
|
|
89
93
|
pool_pre_ping=True,
|
|
90
94
|
)
|
|
91
|
-
|
|
92
|
-
|
|
95
|
+
|
|
96
|
+
# Test connection briefly to validate credentials
|
|
97
|
+
with self.engine.connect() as _:
|
|
98
|
+
pass # Connection test successful
|
|
99
|
+
|
|
100
|
+
# Don't store persistent connection
|
|
101
|
+
self.connection = None
|
|
102
|
+
|
|
103
|
+
except Exception as e:
|
|
93
104
|
logger.error(
|
|
94
105
|
f"{ClientError.SQL_CLIENT_AUTH_ERROR}: Error loading SQL client: {str(e)}"
|
|
95
106
|
)
|
|
@@ -100,8 +111,10 @@ class BaseSQLClient(ClientInterface):
|
|
|
100
111
|
|
|
101
112
|
async def close(self) -> None:
|
|
102
113
|
"""Close the database connection."""
|
|
103
|
-
if self.
|
|
104
|
-
self.
|
|
114
|
+
if self.engine:
|
|
115
|
+
self.engine.dispose()
|
|
116
|
+
self.engine = None
|
|
117
|
+
self.connection = None # Should already be None, but ensure cleanup
|
|
105
118
|
|
|
106
119
|
def get_iam_user_token(self):
|
|
107
120
|
"""Get an IAM user token for AWS RDS database authentication.
|
|
@@ -309,11 +322,11 @@ class BaseSQLClient(ClientInterface):
|
|
|
309
322
|
return conn_str
|
|
310
323
|
|
|
311
324
|
async def run_query(self, query: str, batch_size: int = 100000):
|
|
312
|
-
"""Execute a SQL query and return results in batches.
|
|
325
|
+
"""Execute a SQL query and return results in batches using lazy connections.
|
|
313
326
|
|
|
314
|
-
This method
|
|
315
|
-
|
|
316
|
-
|
|
327
|
+
This method creates a connection on-demand, executes the query in batches,
|
|
328
|
+
and automatically closes the connection when done. This prevents memory
|
|
329
|
+
leaks from persistent connections.
|
|
317
330
|
|
|
318
331
|
Args:
|
|
319
332
|
query (str): SQL query to execute.
|
|
@@ -325,44 +338,47 @@ class BaseSQLClient(ClientInterface):
|
|
|
325
338
|
a dictionary mapping column names to values.
|
|
326
339
|
|
|
327
340
|
Raises:
|
|
328
|
-
ValueError: If
|
|
341
|
+
ValueError: If engine is not initialized.
|
|
329
342
|
Exception: If query execution fails.
|
|
330
343
|
"""
|
|
331
|
-
if not self.
|
|
332
|
-
raise ValueError("
|
|
333
|
-
loop = asyncio.get_running_loop()
|
|
334
|
-
|
|
335
|
-
if self.use_server_side_cursor:
|
|
336
|
-
self.connection.execution_options(yield_per=batch_size)
|
|
344
|
+
if not self.engine:
|
|
345
|
+
raise ValueError("Engine is not initialized. Call load() first.")
|
|
337
346
|
|
|
347
|
+
loop = asyncio.get_running_loop()
|
|
338
348
|
logger.info(f"Running query: {query}")
|
|
339
349
|
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
350
|
+
# Use context manager for automatic connection cleanup
|
|
351
|
+
with self.engine.connect() as connection:
|
|
352
|
+
if self.use_server_side_cursor:
|
|
353
|
+
connection = connection.execution_options(yield_per=batch_size)
|
|
343
354
|
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
if not cursor or not cursor.cursor:
|
|
348
|
-
raise ValueError("Cursor is not supported")
|
|
349
|
-
column_names: List[str] = [
|
|
350
|
-
description.name.lower()
|
|
351
|
-
for description in cursor.cursor.description
|
|
352
|
-
]
|
|
355
|
+
with ThreadPoolExecutor() as pool:
|
|
356
|
+
try:
|
|
357
|
+
from sqlalchemy import text
|
|
353
358
|
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
pool, cursor.fetchmany, batch_size
|
|
359
|
+
cursor = await loop.run_in_executor(
|
|
360
|
+
pool, connection.execute, text(query)
|
|
357
361
|
)
|
|
358
|
-
if not
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
|
|
362
|
+
if not cursor or not cursor.cursor:
|
|
363
|
+
raise ValueError("Cursor is not supported")
|
|
364
|
+
column_names: List[str] = [
|
|
365
|
+
description.name.lower()
|
|
366
|
+
for description in cursor.cursor.description
|
|
367
|
+
]
|
|
368
|
+
|
|
369
|
+
while True:
|
|
370
|
+
rows = await loop.run_in_executor(
|
|
371
|
+
pool, cursor.fetchmany, batch_size
|
|
372
|
+
)
|
|
373
|
+
if not rows:
|
|
374
|
+
break
|
|
375
|
+
|
|
376
|
+
results = [dict(zip(column_names, row)) for row in rows]
|
|
377
|
+
yield results
|
|
378
|
+
except Exception as e:
|
|
379
|
+
logger.error("Error running query in batch: {error}", error=str(e))
|
|
380
|
+
raise e
|
|
381
|
+
# Connection automatically closed by context manager
|
|
366
382
|
|
|
367
383
|
logger.info("Query execution completed")
|
|
368
384
|
|
|
@@ -386,22 +402,23 @@ class AsyncBaseSQLClient(BaseSQLClient):
|
|
|
386
402
|
engine: "AsyncEngine"
|
|
387
403
|
|
|
388
404
|
async def load(self, credentials: Dict[str, Any]) -> None:
|
|
389
|
-
"""Load and
|
|
405
|
+
"""Load credentials and prepare async engine for lazy connections.
|
|
390
406
|
|
|
391
|
-
This method creates an async
|
|
392
|
-
|
|
407
|
+
This method stores credentials and creates an async engine without establishing
|
|
408
|
+
a persistent connection. Connections are created on-demand for better memory efficiency.
|
|
393
409
|
|
|
394
410
|
Args:
|
|
395
411
|
credentials (Dict[str, Any]): Database connection credentials including
|
|
396
412
|
host, port, username, password, and other connection parameters.
|
|
397
413
|
|
|
398
414
|
Raises:
|
|
399
|
-
ValueError: If
|
|
415
|
+
ValueError: If credentials are invalid or engine creation fails.
|
|
400
416
|
"""
|
|
401
417
|
self.credentials = credentials
|
|
402
418
|
try:
|
|
403
419
|
from sqlalchemy.ext.asyncio import create_async_engine
|
|
404
420
|
|
|
421
|
+
# Create async engine but no persistent connection
|
|
405
422
|
self.engine = create_async_engine(
|
|
406
423
|
self.get_sqlalchemy_connection_string(),
|
|
407
424
|
connect_args=self.sql_alchemy_connect_args,
|
|
@@ -409,7 +426,14 @@ class AsyncBaseSQLClient(BaseSQLClient):
|
|
|
409
426
|
)
|
|
410
427
|
if not self.engine:
|
|
411
428
|
raise ValueError("Failed to create async engine")
|
|
412
|
-
|
|
429
|
+
|
|
430
|
+
# Test connection briefly to validate credentials
|
|
431
|
+
async with self.engine.connect() as _:
|
|
432
|
+
pass # Connection test successful
|
|
433
|
+
|
|
434
|
+
# Don't store persistent connection
|
|
435
|
+
self.connection = None
|
|
436
|
+
|
|
413
437
|
except Exception as e:
|
|
414
438
|
logger.error(f"Error establishing database connection: {str(e)}")
|
|
415
439
|
if self.engine:
|
|
@@ -417,11 +441,19 @@ class AsyncBaseSQLClient(BaseSQLClient):
|
|
|
417
441
|
self.engine = None
|
|
418
442
|
raise ValueError(str(e))
|
|
419
443
|
|
|
444
|
+
async def close(self) -> None:
|
|
445
|
+
"""Close the async database connection and dispose of the engine."""
|
|
446
|
+
if self.engine:
|
|
447
|
+
await self.engine.dispose()
|
|
448
|
+
self.engine = None
|
|
449
|
+
self.connection = None
|
|
450
|
+
|
|
420
451
|
async def run_query(self, query: str, batch_size: int = 100000):
|
|
421
|
-
"""Execute a SQL query asynchronously and return results in batches.
|
|
452
|
+
"""Execute a SQL query asynchronously and return results in batches using lazy connections.
|
|
422
453
|
|
|
423
|
-
This method
|
|
424
|
-
|
|
454
|
+
This method creates an async connection on-demand, executes the query in batches,
|
|
455
|
+
and automatically closes the connection when done. This prevents memory leaks
|
|
456
|
+
from persistent connections.
|
|
425
457
|
|
|
426
458
|
Args:
|
|
427
459
|
query (str): SQL query to execute.
|
|
@@ -433,42 +465,46 @@ class AsyncBaseSQLClient(BaseSQLClient):
|
|
|
433
465
|
a dictionary mapping column names to values.
|
|
434
466
|
|
|
435
467
|
Raises:
|
|
468
|
+
ValueError: If engine is not initialized.
|
|
436
469
|
Exception: If query execution fails.
|
|
437
470
|
"""
|
|
438
|
-
if not self.
|
|
439
|
-
raise ValueError("
|
|
471
|
+
if not self.engine:
|
|
472
|
+
raise ValueError("Engine is not initialized. Call load() first.")
|
|
440
473
|
|
|
441
474
|
logger.info(f"Running query: {query}")
|
|
442
475
|
use_server_side_cursor = self.use_server_side_cursor
|
|
443
476
|
|
|
444
|
-
|
|
445
|
-
|
|
446
|
-
|
|
447
|
-
|
|
448
|
-
await self.connection.execution_options(yield_per=batch_size)
|
|
449
|
-
|
|
450
|
-
result = (
|
|
451
|
-
await self.connection.stream(text(query))
|
|
452
|
-
if use_server_side_cursor
|
|
453
|
-
else await self.connection.execute(text(query))
|
|
454
|
-
)
|
|
477
|
+
# Use async context manager for automatic connection cleanup
|
|
478
|
+
async with self.engine.connect() as connection:
|
|
479
|
+
try:
|
|
480
|
+
from sqlalchemy import text
|
|
455
481
|
|
|
456
|
-
|
|
482
|
+
if use_server_side_cursor:
|
|
483
|
+
connection = connection.execution_options(yield_per=batch_size)
|
|
457
484
|
|
|
458
|
-
|
|
459
|
-
|
|
460
|
-
await result.fetchmany(batch_size)
|
|
485
|
+
result = (
|
|
486
|
+
await connection.stream(text(query))
|
|
461
487
|
if use_server_side_cursor
|
|
462
|
-
else
|
|
463
|
-
if result.cursor
|
|
464
|
-
else None
|
|
488
|
+
else await connection.execute(text(query))
|
|
465
489
|
)
|
|
466
|
-
if not rows:
|
|
467
|
-
break
|
|
468
|
-
yield [dict(zip(column_names, row)) for row in rows]
|
|
469
490
|
|
|
470
|
-
|
|
471
|
-
|
|
472
|
-
|
|
491
|
+
column_names = list(result.keys())
|
|
492
|
+
|
|
493
|
+
while True:
|
|
494
|
+
rows = (
|
|
495
|
+
await result.fetchmany(batch_size)
|
|
496
|
+
if use_server_side_cursor
|
|
497
|
+
else result.cursor.fetchmany(batch_size)
|
|
498
|
+
if result.cursor
|
|
499
|
+
else None
|
|
500
|
+
)
|
|
501
|
+
if not rows:
|
|
502
|
+
break
|
|
503
|
+
yield [dict(zip(column_names, row)) for row in rows]
|
|
504
|
+
|
|
505
|
+
except Exception as e:
|
|
506
|
+
logger.error(f"Error executing query: {str(e)}")
|
|
507
|
+
raise
|
|
508
|
+
# Async connection automatically closed by context manager
|
|
473
509
|
|
|
474
510
|
logger.info("Query execution completed")
|
|
@@ -26,6 +26,7 @@ from application_sdk.constants import (
|
|
|
26
26
|
WORKFLOW_PORT,
|
|
27
27
|
WORKFLOW_TLS_ENABLED_KEY,
|
|
28
28
|
)
|
|
29
|
+
from application_sdk.interceptors.cleanup import CleanupInterceptor, cleanup
|
|
29
30
|
from application_sdk.interceptors.events import EventInterceptor, publish_event
|
|
30
31
|
from application_sdk.interceptors.lock import RedisLockInterceptor
|
|
31
32
|
from application_sdk.observability.logger_adaptor import get_logger
|
|
@@ -151,7 +152,7 @@ class TemporalWorkflowClient(WorkflowClient):
|
|
|
151
152
|
await asyncio.sleep(refresh_interval)
|
|
152
153
|
|
|
153
154
|
# Get fresh token
|
|
154
|
-
token = await self.auth_manager.get_access_token()
|
|
155
|
+
token = await self.auth_manager.get_access_token(force_refresh=True)
|
|
155
156
|
if self.client:
|
|
156
157
|
self.client.api_key = token
|
|
157
158
|
logger.info("Updated client RPC metadata with fresh token")
|
|
@@ -359,7 +360,7 @@ class TemporalWorkflowClient(WorkflowClient):
|
|
|
359
360
|
)
|
|
360
361
|
|
|
361
362
|
# Start with provided activities and add system activities
|
|
362
|
-
final_activities = list(activities) + [publish_event]
|
|
363
|
+
final_activities = list(activities) + [publish_event, cleanup]
|
|
363
364
|
|
|
364
365
|
# Add lock management activities if needed
|
|
365
366
|
if not IS_LOCKING_DISABLED:
|
|
@@ -395,6 +396,7 @@ class TemporalWorkflowClient(WorkflowClient):
|
|
|
395
396
|
activity_executor=activity_executor,
|
|
396
397
|
interceptors=[
|
|
397
398
|
EventInterceptor(),
|
|
399
|
+
CleanupInterceptor(),
|
|
398
400
|
RedisLockInterceptor(activities_dict),
|
|
399
401
|
],
|
|
400
402
|
)
|
|
@@ -0,0 +1,316 @@
|
|
|
1
|
+
# Common Code Review Guidelines - Shared Utilities and Constants
|
|
2
|
+
|
|
3
|
+
## Context-Specific Patterns
|
|
4
|
+
|
|
5
|
+
This directory contains shared utilities, constants, error codes, and common functionality used across the SDK. Code here must be high-quality, well-tested, and designed for reuse.
|
|
6
|
+
|
|
7
|
+
### Phase 1: Critical Common Code Safety Issues
|
|
8
|
+
|
|
9
|
+
**Constants Management:**
|
|
10
|
+
|
|
11
|
+
- **All magic strings and numbers must be moved to constants.py**: No hardcoded values scattered across the codebase
|
|
12
|
+
- **Centralized configuration**: Related constants should be grouped together with clear naming
|
|
13
|
+
- **Environment variable patterns**: Use consistent naming conventions for environment variables
|
|
14
|
+
- **Shared constant keys**: Constants used by multiple modules (like configuration keys) must be defined here
|
|
15
|
+
|
|
16
|
+
**Error Code Standardization:**
|
|
17
|
+
|
|
18
|
+
- **Use internal SDK error codes**: All custom exceptions should be defined in `error_codes.py`
|
|
19
|
+
- **Specific exception types**: No generic `Exception` or `ValueError` for SDK-specific errors
|
|
20
|
+
- **Error hierarchies**: Related errors should inherit from common base exceptions
|
|
21
|
+
- **Consistent error messages**: Similar errors should have consistent message formats
|
|
22
|
+
|
|
23
|
+
```python
|
|
24
|
+
# ✅ DO: Proper constants and error management
|
|
25
|
+
# In constants.py
|
|
26
|
+
DISTRIBUTED_LOCK_CONFIG_KEY = "distributed_lock_config"
|
|
27
|
+
DEFAULT_LOCK_TTL_SECONDS = 300
|
|
28
|
+
DEFAULT_MAX_LOCKS = 10
|
|
29
|
+
REDIS_KEY_PREFIX = "application_sdk"
|
|
30
|
+
|
|
31
|
+
# Environment variable naming conventions
|
|
32
|
+
FAIL_WORKFLOW_ON_REDIS_UNAVAILABLE = os.getenv("FAIL_WORKFLOW_ON_REDIS_UNAVAILABLE", "false").lower() == "true"
|
|
33
|
+
DATABASE_TIMEOUT_SECONDS = int(os.getenv("DATABASE_TIMEOUT_SECONDS", "30"))
|
|
34
|
+
|
|
35
|
+
# In error_codes.py
|
|
36
|
+
class SDKError(Exception):
|
|
37
|
+
"""Base exception for all SDK errors."""
|
|
38
|
+
|
|
39
|
+
class ClientError(SDKError):
|
|
40
|
+
"""Errors related to client operations."""
|
|
41
|
+
|
|
42
|
+
class LockAcquisitionError(SDKError):
|
|
43
|
+
"""Errors related to distributed lock operations."""
|
|
44
|
+
|
|
45
|
+
# ❌ REJECT: Scattered constants and generic errors
|
|
46
|
+
# Found across multiple files:
|
|
47
|
+
LOCK_TTL = 300 # In one file
|
|
48
|
+
DEFAULT_TIMEOUT = 300 # In another file
|
|
49
|
+
"distributed_lock" # Hardcoded string in various places
|
|
50
|
+
|
|
51
|
+
# Using generic exceptions:
|
|
52
|
+
raise Exception("Lock failed") # Should be LockAcquisitionError
|
|
53
|
+
raise ValueError("Invalid config") # Should be ConfigurationError
|
|
54
|
+
```
|
|
55
|
+
|
|
56
|
+
### Phase 2: Utility Architecture Patterns
|
|
57
|
+
|
|
58
|
+
**Utility Function Design:**
|
|
59
|
+
|
|
60
|
+
- **Single responsibility**: Each utility function should do exactly one thing
|
|
61
|
+
- **Pure functions**: Utilities should avoid side effects where possible
|
|
62
|
+
- **Type safety**: All utility functions must have comprehensive type hints
|
|
63
|
+
- **Error handling**: Utilities must handle edge cases gracefully
|
|
64
|
+
- **Documentation**: Complete docstrings with usage examples
|
|
65
|
+
|
|
66
|
+
**Code Reuse and DRY Principles:**
|
|
67
|
+
|
|
68
|
+
- **Extract repeated logic**: Common patterns across modules should become utility functions
|
|
69
|
+
- **Consolidate similar utilities**: Functions with overlapping purposes should be unified
|
|
70
|
+
- **Shared abstractions**: Common interface patterns should be abstracted into base classes
|
|
71
|
+
- **Configuration utilities**: Common configuration patterns should be centralized
|
|
72
|
+
|
|
73
|
+
```python
|
|
74
|
+
# ✅ DO: Proper utility function design
|
|
75
|
+
def validate_environment_variable(
|
|
76
|
+
var_name: str,
|
|
77
|
+
default_value: str,
|
|
78
|
+
valid_values: Optional[List[str]] = None,
|
|
79
|
+
value_type: type = str
|
|
80
|
+
) -> Any:
|
|
81
|
+
"""
|
|
82
|
+
Validate and convert environment variable with comprehensive error handling.
|
|
83
|
+
|
|
84
|
+
Args:
|
|
85
|
+
var_name: Name of environment variable
|
|
86
|
+
default_value: Fallback value if not set
|
|
87
|
+
valid_values: List of allowed values (optional)
|
|
88
|
+
value_type: Expected type for conversion
|
|
89
|
+
|
|
90
|
+
Returns:
|
|
91
|
+
Validated and converted value
|
|
92
|
+
|
|
93
|
+
Raises:
|
|
94
|
+
ConfigurationError: If value is invalid or conversion fails
|
|
95
|
+
|
|
96
|
+
Example:
|
|
97
|
+
>>> timeout = validate_environment_variable(
|
|
98
|
+
... "DB_TIMEOUT", "30", value_type=int
|
|
99
|
+
... )
|
|
100
|
+
>>> mode = validate_environment_variable(
|
|
101
|
+
... "LOG_LEVEL", "INFO", valid_values=["DEBUG", "INFO", "WARNING", "ERROR"]
|
|
102
|
+
... )
|
|
103
|
+
"""
|
|
104
|
+
raw_value = os.getenv(var_name, default_value)
|
|
105
|
+
|
|
106
|
+
try:
|
|
107
|
+
# Type conversion
|
|
108
|
+
if value_type == bool:
|
|
109
|
+
converted_value = raw_value.lower() in ('true', '1', 'yes', 'on')
|
|
110
|
+
elif value_type == int:
|
|
111
|
+
converted_value = int(raw_value)
|
|
112
|
+
elif value_type == float:
|
|
113
|
+
converted_value = float(raw_value)
|
|
114
|
+
else:
|
|
115
|
+
converted_value = raw_value
|
|
116
|
+
|
|
117
|
+
# Validation
|
|
118
|
+
if valid_values and converted_value not in valid_values:
|
|
119
|
+
raise ConfigurationError(
|
|
120
|
+
f"Invalid value for {var_name}: {raw_value}. "
|
|
121
|
+
f"Valid values: {valid_values}"
|
|
122
|
+
)
|
|
123
|
+
|
|
124
|
+
return converted_value
|
|
125
|
+
|
|
126
|
+
except (ValueError, TypeError) as e:
|
|
127
|
+
raise ConfigurationError(
|
|
128
|
+
f"Failed to convert {var_name}={raw_value} to {value_type.__name__}: {e}"
|
|
129
|
+
)
|
|
130
|
+
|
|
131
|
+
# ❌ REJECT: Poor utility design
|
|
132
|
+
def bad_get_config(name): # No type hints, no validation, no documentation
|
|
133
|
+
return os.getenv(name, "") # No defaults, no error handling
|
|
134
|
+
```
|
|
135
|
+
|
|
136
|
+
### Phase 3: Common Code Testing Requirements
|
|
137
|
+
|
|
138
|
+
**Utility Testing Standards:**
|
|
139
|
+
|
|
140
|
+
- **Comprehensive edge case testing**: Test all possible input combinations
|
|
141
|
+
- **Error condition testing**: Verify proper error handling for invalid inputs
|
|
142
|
+
- **Type safety testing**: Test with various input types to verify type hints
|
|
143
|
+
- **Integration testing**: Test utilities in context of actual usage
|
|
144
|
+
- **Performance testing**: Ensure utilities don't create performance bottlenecks
|
|
145
|
+
|
|
146
|
+
**Shared Code Quality:**
|
|
147
|
+
|
|
148
|
+
- All utility functions must have corresponding unit tests
|
|
149
|
+
- Test coverage must be >90% for common utilities
|
|
150
|
+
- Include property-based testing with hypothesis for complex utilities
|
|
151
|
+
- Mock external dependencies in utility tests
|
|
152
|
+
- Test thread safety for utilities used in concurrent contexts
|
|
153
|
+
|
|
154
|
+
### Phase 4: Performance and Reusability
|
|
155
|
+
|
|
156
|
+
**Utility Performance:**
|
|
157
|
+
|
|
158
|
+
- **Caching for expensive operations**: Cache results of expensive utility calculations
|
|
159
|
+
- **Async where appropriate**: Use async for I/O utilities, sync for CPU-bound utilities
|
|
160
|
+
- **Memory efficiency**: Avoid creating unnecessary object copies in utilities
|
|
161
|
+
- **Algorithm efficiency**: Use appropriate data structures and algorithms
|
|
162
|
+
|
|
163
|
+
**Reusability Patterns:**
|
|
164
|
+
|
|
165
|
+
- **Generic implementations**: Write utilities that work for multiple use cases
|
|
166
|
+
- **Parameterizable behavior**: Allow customization through parameters, not hardcoded behavior
|
|
167
|
+
- **Composable utilities**: Design utilities that can be easily combined
|
|
168
|
+
- **Backwards compatibility**: Maintain API stability for widely-used utilities
|
|
169
|
+
|
|
170
|
+
### Phase 5: Common Code Maintainability
|
|
171
|
+
|
|
172
|
+
**Documentation and Examples:**
|
|
173
|
+
|
|
174
|
+
- **Complete documentation**: All public utilities must have comprehensive docstrings
|
|
175
|
+
- **Usage examples**: Include realistic examples showing typical usage patterns
|
|
176
|
+
- **Performance characteristics**: Document time/space complexity for non-trivial utilities
|
|
177
|
+
- **Thread safety**: Document whether utilities are thread-safe
|
|
178
|
+
- **Version compatibility**: Document any version-specific behaviors
|
|
179
|
+
|
|
180
|
+
**Code Organization:**
|
|
181
|
+
|
|
182
|
+
- **Logical grouping**: Group related utilities in appropriately named modules
|
|
183
|
+
- **Consistent interfaces**: Similar utilities should have consistent parameter patterns
|
|
184
|
+
- **Clear abstractions**: Separate interface definitions from implementations
|
|
185
|
+
- **Dependency management**: Minimize dependencies in common utilities
|
|
186
|
+
|
|
187
|
+
---
|
|
188
|
+
|
|
189
|
+
## Common Code Anti-Patterns
|
|
190
|
+
|
|
191
|
+
**Always Reject:**
|
|
192
|
+
|
|
193
|
+
- **Scattered constants**: Magic numbers or strings not centralized in constants.py
|
|
194
|
+
- **Generic exceptions**: Using `Exception`, `ValueError`, or `RuntimeError` instead of SDK-specific errors
|
|
195
|
+
- **Duplicate utilities**: Multiple functions doing essentially the same thing
|
|
196
|
+
- **Poor error handling**: Utilities without proper exception handling
|
|
197
|
+
- **Missing validation**: Utilities that don't validate their inputs
|
|
198
|
+
- **Undocumented utilities**: Shared code without proper documentation
|
|
199
|
+
|
|
200
|
+
**Constants Management Anti-Patterns:**
|
|
201
|
+
|
|
202
|
+
```python
|
|
203
|
+
# ❌ REJECT: Scattered constants across files
|
|
204
|
+
# In multiple different files:
|
|
205
|
+
LOCK_TTL = 300 # locks.py
|
|
206
|
+
DEFAULT_TIMEOUT = 300 # client.py
|
|
207
|
+
MAX_RETRIES = 3 # activities.py
|
|
208
|
+
"distributed_lock_config" # Hardcoded string in 5 different places
|
|
209
|
+
|
|
210
|
+
# ✅ REQUIRE: Centralized constants
|
|
211
|
+
# In constants.py only:
|
|
212
|
+
DEFAULT_LOCK_TTL_SECONDS = 300
|
|
213
|
+
DEFAULT_DATABASE_TIMEOUT_SECONDS = 300
|
|
214
|
+
DEFAULT_MAX_RETRY_ATTEMPTS = 3
|
|
215
|
+
DISTRIBUTED_LOCK_CONFIG_KEY = "distributed_lock_config"
|
|
216
|
+
|
|
217
|
+
# Other files import from constants:
|
|
218
|
+
from application_sdk.constants import DISTRIBUTED_LOCK_CONFIG_KEY, DEFAULT_LOCK_TTL_SECONDS
|
|
219
|
+
```
|
|
220
|
+
|
|
221
|
+
**Error Handling Anti-Patterns:**
|
|
222
|
+
|
|
223
|
+
```python
|
|
224
|
+
# ❌ REJECT: Generic error handling
|
|
225
|
+
def bad_utility_function(value: str) -> dict:
|
|
226
|
+
if not value:
|
|
227
|
+
raise ValueError("Invalid value") # Generic error
|
|
228
|
+
|
|
229
|
+
try:
|
|
230
|
+
result = process_value(value)
|
|
231
|
+
return result
|
|
232
|
+
except Exception as e:
|
|
233
|
+
raise Exception(f"Processing failed: {e}") # Generic error
|
|
234
|
+
|
|
235
|
+
# ✅ REQUIRE: SDK-specific error handling
|
|
236
|
+
from application_sdk.common.error_codes import ValidationError, ProcessingError
|
|
237
|
+
|
|
238
|
+
def good_utility_function(value: str) -> dict:
|
|
239
|
+
"""Utility function with proper error handling."""
|
|
240
|
+
|
|
241
|
+
if not value or not value.strip():
|
|
242
|
+
raise ValidationError(f"Value cannot be empty or whitespace: '{value}'")
|
|
243
|
+
|
|
244
|
+
try:
|
|
245
|
+
result = process_value(value)
|
|
246
|
+
if not result:
|
|
247
|
+
raise ProcessingError(f"Processing returned empty result for value: '{value}'")
|
|
248
|
+
return result
|
|
249
|
+
|
|
250
|
+
except ProcessingError:
|
|
251
|
+
raise # Re-raise SDK errors
|
|
252
|
+
except Exception as e:
|
|
253
|
+
raise ProcessingError(f"Unexpected error processing '{value}': {e}")
|
|
254
|
+
```
|
|
255
|
+
|
|
256
|
+
**Code Duplication Anti-Patterns:**
|
|
257
|
+
|
|
258
|
+
```python
|
|
259
|
+
# ❌ REJECT: Repeated logic in multiple files
|
|
260
|
+
# Found in client.py:
|
|
261
|
+
def setup_database_connection(host, port, user, password):
|
|
262
|
+
connection_string = f"postgresql://{user}:{password}@{host}:{port}"
|
|
263
|
+
return create_connection(connection_string)
|
|
264
|
+
|
|
265
|
+
# Found in activities.py:
|
|
266
|
+
def create_db_connection(host, port, user, password):
|
|
267
|
+
conn_str = f"postgresql://{user}:{password}@{host}:{port}"
|
|
268
|
+
return establish_connection(conn_str)
|
|
269
|
+
|
|
270
|
+
# ✅ REQUIRE: Extracted shared utility
|
|
271
|
+
# In common/utils.py:
|
|
272
|
+
def build_database_connection_string(
|
|
273
|
+
host: str,
|
|
274
|
+
port: int,
|
|
275
|
+
username: str,
|
|
276
|
+
password: str,
|
|
277
|
+
database: Optional[str] = None,
|
|
278
|
+
ssl_mode: str = "require"
|
|
279
|
+
) -> str:
|
|
280
|
+
"""
|
|
281
|
+
Build a standardized database connection string.
|
|
282
|
+
|
|
283
|
+
Used consistently across all database clients and activities.
|
|
284
|
+
"""
|
|
285
|
+
base_url = f"postgresql://{username}:{password}@{host}:{port}"
|
|
286
|
+
if database:
|
|
287
|
+
base_url += f"/{database}"
|
|
288
|
+
|
|
289
|
+
params = []
|
|
290
|
+
if ssl_mode:
|
|
291
|
+
params.append(f"sslmode={ssl_mode}")
|
|
292
|
+
|
|
293
|
+
if params:
|
|
294
|
+
base_url += "?" + "&".join(params)
|
|
295
|
+
|
|
296
|
+
return base_url
|
|
297
|
+
|
|
298
|
+
# Other modules import and use the shared utility:
|
|
299
|
+
from application_sdk.common.utils import build_database_connection_string
|
|
300
|
+
```
|
|
301
|
+
|
|
302
|
+
## Educational Context for Common Code Reviews
|
|
303
|
+
|
|
304
|
+
When reviewing common code, emphasize:
|
|
305
|
+
|
|
306
|
+
1. **Consistency Impact**: "Centralized constants and utilities ensure consistency across the entire SDK. Scattered constants lead to inconsistencies and make global changes nearly impossible."
|
|
307
|
+
|
|
308
|
+
2. **Maintainability Impact**: "Well-designed utilities reduce code duplication and make the codebase easier to maintain. Changes to common functionality only need to be made in one place."
|
|
309
|
+
|
|
310
|
+
3. **Error Handling Impact**: "SDK-specific exceptions provide clearer error messages and enable better error handling throughout the application. Generic exceptions hide the root cause and make debugging difficult."
|
|
311
|
+
|
|
312
|
+
4. **Reusability Impact**: "Properly designed common utilities can be reused across multiple contexts, reducing development time and ensuring consistent behavior."
|
|
313
|
+
|
|
314
|
+
5. **Performance Impact**: "Shared utilities are called frequently throughout the application. Performance issues in common code have amplified impact across the entire system."
|
|
315
|
+
|
|
316
|
+
6. **Testing Impact**: "Common utilities require especially thorough testing because they're used in many contexts. Bugs in utilities affect multiple parts of the system simultaneously."
|
application_sdk/constants.py
CHANGED
|
@@ -59,6 +59,14 @@ WORKFLOW_OUTPUT_PATH_TEMPLATE = (
|
|
|
59
59
|
# Temporary Path (used to store intermediate files)
|
|
60
60
|
TEMPORARY_PATH = os.getenv("ATLAN_TEMPORARY_PATH", "./local/tmp/")
|
|
61
61
|
|
|
62
|
+
# Cleanup Paths (custom paths for cleanup operations, supports multiple paths separated by comma)
|
|
63
|
+
# If empty, cleanup activities will default to workflow-specific paths at runtime
|
|
64
|
+
CLEANUP_BASE_PATHS = [
|
|
65
|
+
path.strip()
|
|
66
|
+
for path in os.getenv("ATLAN_CLEANUP_BASE_PATHS", "").split(",")
|
|
67
|
+
if path.strip()
|
|
68
|
+
]
|
|
69
|
+
|
|
62
70
|
# State Store Constants
|
|
63
71
|
#: Path template for state store files (example: objectstore://bucket/persistent-artifacts/apps/{application_name}/{state_type}/{id}/config.json)
|
|
64
72
|
STATE_STORE_PATH_TEMPLATE = (
|