atlan-application-sdk 0.1.1rc39__py3-none-any.whl → 0.1.1rc41__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- application_sdk/activities/.cursor/BUGBOT.md +424 -0
- application_sdk/activities/metadata_extraction/sql.py +400 -25
- application_sdk/application/__init__.py +2 -0
- application_sdk/application/metadata_extraction/sql.py +3 -0
- application_sdk/clients/.cursor/BUGBOT.md +280 -0
- application_sdk/clients/models.py +42 -0
- application_sdk/clients/sql.py +127 -87
- application_sdk/clients/temporal.py +3 -1
- application_sdk/common/.cursor/BUGBOT.md +316 -0
- application_sdk/common/aws_utils.py +259 -11
- application_sdk/common/utils.py +145 -9
- application_sdk/constants.py +8 -0
- application_sdk/decorators/.cursor/BUGBOT.md +279 -0
- application_sdk/handlers/__init__.py +8 -1
- application_sdk/handlers/sql.py +63 -22
- application_sdk/inputs/.cursor/BUGBOT.md +250 -0
- application_sdk/interceptors/.cursor/BUGBOT.md +320 -0
- application_sdk/interceptors/cleanup.py +171 -0
- application_sdk/interceptors/events.py +6 -6
- application_sdk/observability/decorators/observability_decorator.py +36 -22
- application_sdk/outputs/.cursor/BUGBOT.md +295 -0
- application_sdk/outputs/iceberg.py +4 -0
- application_sdk/outputs/json.py +6 -0
- application_sdk/outputs/parquet.py +13 -3
- application_sdk/server/.cursor/BUGBOT.md +442 -0
- application_sdk/server/fastapi/__init__.py +59 -3
- application_sdk/server/fastapi/models.py +27 -0
- application_sdk/services/objectstore.py +16 -3
- application_sdk/version.py +1 -1
- application_sdk/workflows/.cursor/BUGBOT.md +218 -0
- {atlan_application_sdk-0.1.1rc39.dist-info → atlan_application_sdk-0.1.1rc41.dist-info}/METADATA +1 -1
- {atlan_application_sdk-0.1.1rc39.dist-info → atlan_application_sdk-0.1.1rc41.dist-info}/RECORD +35 -24
- {atlan_application_sdk-0.1.1rc39.dist-info → atlan_application_sdk-0.1.1rc41.dist-info}/WHEEL +0 -0
- {atlan_application_sdk-0.1.1rc39.dist-info → atlan_application_sdk-0.1.1rc41.dist-info}/licenses/LICENSE +0 -0
- {atlan_application_sdk-0.1.1rc39.dist-info → atlan_application_sdk-0.1.1rc41.dist-info}/licenses/NOTICE +0 -0
application_sdk/common/utils.py
CHANGED
|
@@ -17,8 +17,12 @@ from typing import (
|
|
|
17
17
|
Union,
|
|
18
18
|
)
|
|
19
19
|
|
|
20
|
+
from application_sdk.activities.common.utils import get_object_store_prefix
|
|
20
21
|
from application_sdk.common.error_codes import CommonError
|
|
22
|
+
from application_sdk.constants import TEMPORARY_PATH
|
|
23
|
+
from application_sdk.inputs.sql_query import SQLQueryInput
|
|
21
24
|
from application_sdk.observability.logger_adaptor import get_logger
|
|
25
|
+
from application_sdk.services.objectstore import ObjectStore
|
|
22
26
|
|
|
23
27
|
logger = get_logger(__name__)
|
|
24
28
|
|
|
@@ -106,10 +110,42 @@ def extract_database_names_from_regex_common(
|
|
|
106
110
|
return empty_default
|
|
107
111
|
|
|
108
112
|
|
|
113
|
+
def transform_posix_regex(regex_pattern: str) -> str:
|
|
114
|
+
r"""
|
|
115
|
+
Transform regex pattern for POSIX compatibility.
|
|
116
|
+
|
|
117
|
+
Rules:
|
|
118
|
+
1. Add ^ before each database name before \.
|
|
119
|
+
2. Add an additional . between \. and * if * follows \.
|
|
120
|
+
|
|
121
|
+
Example: 'dev\.public$|dev\.atlan_test_schema$|wide_world_importers\.*'
|
|
122
|
+
Becomes: '^dev\.public$|^dev\.atlan_test_schema$|^wide_world_importers\..*'
|
|
123
|
+
"""
|
|
124
|
+
if not regex_pattern:
|
|
125
|
+
return regex_pattern
|
|
126
|
+
|
|
127
|
+
# Split by | to handle each pattern separately
|
|
128
|
+
patterns = regex_pattern.split("|")
|
|
129
|
+
transformed_patterns = []
|
|
130
|
+
|
|
131
|
+
for pattern in patterns:
|
|
132
|
+
# Add ^ at the beginning if it's not already there
|
|
133
|
+
if not pattern.startswith("^"):
|
|
134
|
+
pattern = "^" + pattern
|
|
135
|
+
|
|
136
|
+
# Add additional . between \. and * if * follows \.
|
|
137
|
+
pattern = re.sub(r"\\\.\*", r"\..*", pattern)
|
|
138
|
+
|
|
139
|
+
transformed_patterns.append(pattern)
|
|
140
|
+
|
|
141
|
+
return "|".join(transformed_patterns)
|
|
142
|
+
|
|
143
|
+
|
|
109
144
|
def prepare_query(
|
|
110
145
|
query: Optional[str],
|
|
111
146
|
workflow_args: Dict[str, Any],
|
|
112
147
|
temp_table_regex_sql: Optional[str] = "",
|
|
148
|
+
use_posix_regex: Optional[bool] = False,
|
|
113
149
|
) -> Optional[str]:
|
|
114
150
|
"""
|
|
115
151
|
Prepares a SQL query by applying include and exclude filters, and optional
|
|
@@ -158,6 +194,14 @@ def prepare_query(
|
|
|
158
194
|
include_filter, exclude_filter
|
|
159
195
|
)
|
|
160
196
|
|
|
197
|
+
if use_posix_regex:
|
|
198
|
+
normalized_include_regex_posix = transform_posix_regex(
|
|
199
|
+
normalized_include_regex
|
|
200
|
+
)
|
|
201
|
+
normalized_exclude_regex_posix = transform_posix_regex(
|
|
202
|
+
normalized_exclude_regex
|
|
203
|
+
)
|
|
204
|
+
|
|
161
205
|
# Extract database names from the normalized regex patterns
|
|
162
206
|
include_databases = extract_database_names_from_regex_common(
|
|
163
207
|
normalized_regex=normalized_include_regex,
|
|
@@ -176,15 +220,26 @@ def prepare_query(
|
|
|
176
220
|
)
|
|
177
221
|
exclude_views = workflow_args.get("metadata", {}).get("exclude_views", False)
|
|
178
222
|
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
223
|
+
if use_posix_regex:
|
|
224
|
+
return query.format(
|
|
225
|
+
include_databases=include_databases,
|
|
226
|
+
exclude_databases=exclude_databases,
|
|
227
|
+
normalized_include_regex=normalized_include_regex_posix,
|
|
228
|
+
normalized_exclude_regex=normalized_exclude_regex_posix,
|
|
229
|
+
temp_table_regex_sql=temp_table_regex_sql,
|
|
230
|
+
exclude_empty_tables=exclude_empty_tables,
|
|
231
|
+
exclude_views=exclude_views,
|
|
232
|
+
)
|
|
233
|
+
else:
|
|
234
|
+
return query.format(
|
|
235
|
+
include_databases=include_databases,
|
|
236
|
+
exclude_databases=exclude_databases,
|
|
237
|
+
normalized_include_regex=normalized_include_regex,
|
|
238
|
+
normalized_exclude_regex=normalized_exclude_regex,
|
|
239
|
+
temp_table_regex_sql=temp_table_regex_sql,
|
|
240
|
+
exclude_empty_tables=exclude_empty_tables,
|
|
241
|
+
exclude_views=exclude_views,
|
|
242
|
+
)
|
|
188
243
|
except CommonError as e:
|
|
189
244
|
# Extract the original error message from the CommonError
|
|
190
245
|
error_message = str(e).split(": ", 1)[-1] if ": " in str(e) else str(e)
|
|
@@ -195,6 +250,47 @@ def prepare_query(
|
|
|
195
250
|
return None
|
|
196
251
|
|
|
197
252
|
|
|
253
|
+
async def get_database_names(
|
|
254
|
+
sql_client, workflow_args, fetch_database_sql
|
|
255
|
+
) -> Optional[List[str]]:
|
|
256
|
+
"""
|
|
257
|
+
Get the database names from the workflow args if include-filter is present
|
|
258
|
+
Args:
|
|
259
|
+
workflow_args: The workflow args
|
|
260
|
+
Returns:
|
|
261
|
+
List[str]: The database names
|
|
262
|
+
"""
|
|
263
|
+
database_names = parse_filter_input(
|
|
264
|
+
workflow_args.get("metadata", {}).get("include-filter", {})
|
|
265
|
+
)
|
|
266
|
+
|
|
267
|
+
database_names = [
|
|
268
|
+
re.sub(r"^[^\w]+|[^\w]+$", "", database_name)
|
|
269
|
+
for database_name in database_names
|
|
270
|
+
]
|
|
271
|
+
if not database_names:
|
|
272
|
+
# if database_names are not provided in the include-filter, we'll run the query to get all the database names
|
|
273
|
+
# because by default for an empty include-filter, we fetch details corresponding to all the databases.
|
|
274
|
+
temp_table_regex_sql = workflow_args.get("metadata", {}).get(
|
|
275
|
+
"temp-table-regex", ""
|
|
276
|
+
)
|
|
277
|
+
prepared_query = prepare_query(
|
|
278
|
+
query=fetch_database_sql,
|
|
279
|
+
workflow_args=workflow_args,
|
|
280
|
+
temp_table_regex_sql=temp_table_regex_sql,
|
|
281
|
+
use_posix_regex=True,
|
|
282
|
+
)
|
|
283
|
+
# We'll run the query to get all the database names
|
|
284
|
+
database_sql_input = SQLQueryInput(
|
|
285
|
+
engine=sql_client.engine,
|
|
286
|
+
query=prepared_query, # type: ignore
|
|
287
|
+
chunk_size=None,
|
|
288
|
+
)
|
|
289
|
+
database_dataframe = await database_sql_input.get_dataframe()
|
|
290
|
+
database_names = list(database_dataframe["database_name"])
|
|
291
|
+
return database_names
|
|
292
|
+
|
|
293
|
+
|
|
198
294
|
def parse_filter_input(
|
|
199
295
|
filter_input: Union[str, Dict[str, Any], None],
|
|
200
296
|
) -> Dict[str, Any]:
|
|
@@ -416,6 +512,46 @@ def parse_credentials_extra(credentials: Dict[str, Any]) -> Dict[str, Any]:
|
|
|
416
512
|
return extra # We know it's a Dict[str, Any] due to the Union type and str check
|
|
417
513
|
|
|
418
514
|
|
|
515
|
+
def has_custom_control_config(workflow_args: Dict[str, Any]) -> bool:
|
|
516
|
+
"""
|
|
517
|
+
Check if custom control configuration is present in workflow arguments.
|
|
518
|
+
|
|
519
|
+
Args:
|
|
520
|
+
workflow_args: The workflow arguments
|
|
521
|
+
|
|
522
|
+
Returns:
|
|
523
|
+
bool: True if custom control configuration is present, False otherwise
|
|
524
|
+
"""
|
|
525
|
+
return (
|
|
526
|
+
workflow_args.get("control-config-strategy") == "custom"
|
|
527
|
+
and workflow_args.get("control-config") is not None
|
|
528
|
+
)
|
|
529
|
+
|
|
530
|
+
|
|
531
|
+
async def get_file_names(output_path: str, typename: str) -> List[str]:
|
|
532
|
+
"""
|
|
533
|
+
Get file names for a specific asset type from the transformed directory.
|
|
534
|
+
|
|
535
|
+
Args:
|
|
536
|
+
output_path (str): The base output path
|
|
537
|
+
typename (str): The asset type (e.g., 'table', 'schema', 'column')
|
|
538
|
+
|
|
539
|
+
Returns:
|
|
540
|
+
List[str]: List of relative file paths for the asset type
|
|
541
|
+
"""
|
|
542
|
+
|
|
543
|
+
source = get_object_store_prefix(os.path.join(output_path, typename))
|
|
544
|
+
await ObjectStore.download_prefix(source, TEMPORARY_PATH)
|
|
545
|
+
|
|
546
|
+
file_pattern = os.path.join(output_path, typename, "*.json")
|
|
547
|
+
file_names = glob.glob(file_pattern)
|
|
548
|
+
file_name_list = [
|
|
549
|
+
"/".join(file_name.rsplit("/", 2)[-2:]) for file_name in file_names
|
|
550
|
+
]
|
|
551
|
+
|
|
552
|
+
return file_name_list
|
|
553
|
+
|
|
554
|
+
|
|
419
555
|
def run_sync(func):
|
|
420
556
|
"""Run a function in a thread pool executor.
|
|
421
557
|
|
application_sdk/constants.py
CHANGED
|
@@ -59,6 +59,14 @@ WORKFLOW_OUTPUT_PATH_TEMPLATE = (
|
|
|
59
59
|
# Temporary Path (used to store intermediate files)
|
|
60
60
|
TEMPORARY_PATH = os.getenv("ATLAN_TEMPORARY_PATH", "./local/tmp/")
|
|
61
61
|
|
|
62
|
+
# Cleanup Paths (custom paths for cleanup operations, supports multiple paths separated by comma)
|
|
63
|
+
# If empty, cleanup activities will default to workflow-specific paths at runtime
|
|
64
|
+
CLEANUP_BASE_PATHS = [
|
|
65
|
+
path.strip()
|
|
66
|
+
for path in os.getenv("ATLAN_CLEANUP_BASE_PATHS", "").split(",")
|
|
67
|
+
if path.strip()
|
|
68
|
+
]
|
|
69
|
+
|
|
62
70
|
# State Store Constants
|
|
63
71
|
#: Path template for state store files (example: objectstore://bucket/persistent-artifacts/apps/{application_name}/{state_type}/{id}/config.json)
|
|
64
72
|
STATE_STORE_PATH_TEMPLATE = (
|
|
@@ -0,0 +1,279 @@
|
|
|
1
|
+
# Decorator Code Review Guidelines - Centralized Function Decorators
|
|
2
|
+
|
|
3
|
+
## Context-Specific Patterns
|
|
4
|
+
|
|
5
|
+
This directory contains all decorator implementations for the Application SDK. Decorators must be centralized here to avoid scattered functionality and ensure consistent patterns.
|
|
6
|
+
|
|
7
|
+
### Phase 1: Critical Decorator Safety Issues
|
|
8
|
+
|
|
9
|
+
**Decorator Centralization:**
|
|
10
|
+
|
|
11
|
+
- **ALL decorators must be in this directory**: No decorators should exist in other modules (lock/, observability/, etc.)
|
|
12
|
+
- **Consolidate scattered decorators**: If decorators are found elsewhere, they must be moved here
|
|
13
|
+
- **Single responsibility per file**: Each decorator type should have its own file (locks.py, observability_decorator.py)
|
|
14
|
+
- **Proper imports**: Other modules should import decorators from here, not define their own
|
|
15
|
+
|
|
16
|
+
**Type Safety and Function Signatures:**
|
|
17
|
+
|
|
18
|
+
- All decorators must preserve function signatures and type hints
|
|
19
|
+
- Use `functools.wraps` to maintain function metadata
|
|
20
|
+
- Generic decorators must use proper type annotations
|
|
21
|
+
- Return types must match the original function's return type
|
|
22
|
+
|
|
23
|
+
```python
|
|
24
|
+
# ✅ DO: Proper decorator type safety
|
|
25
|
+
from typing import Callable, Any, TypeVar, ParamSpec
|
|
26
|
+
from functools import wraps
|
|
27
|
+
|
|
28
|
+
P = ParamSpec('P')
|
|
29
|
+
T = TypeVar('T')
|
|
30
|
+
|
|
31
|
+
def my_decorator(func: Callable[P, T]) -> Callable[P, T]:
|
|
32
|
+
@wraps(func)
|
|
33
|
+
def wrapper(*args: P.args, **kwargs: P.kwargs) -> T:
|
|
34
|
+
# Decorator logic
|
|
35
|
+
return func(*args, **kwargs)
|
|
36
|
+
return wrapper
|
|
37
|
+
|
|
38
|
+
# ❌ NEVER: Poor type annotations
|
|
39
|
+
def bad_decorator(func): # No type hints
|
|
40
|
+
def wrapper(*args, **kwargs): # No type preservation
|
|
41
|
+
return func(*args, **kwargs)
|
|
42
|
+
return wrapper # Missing @wraps
|
|
43
|
+
```
|
|
44
|
+
|
|
45
|
+
### Phase 2: Decorator Architecture Patterns
|
|
46
|
+
|
|
47
|
+
**Proper Decorator Structure:**
|
|
48
|
+
|
|
49
|
+
- **Parameterized decorators**: Support both `@decorator` and `@decorator(param=value)` usage patterns
|
|
50
|
+
- **Error handling**: Decorators must not swallow exceptions unless explicitly designed to do so
|
|
51
|
+
- **Resource cleanup**: Decorators that acquire resources must ensure cleanup in finally blocks
|
|
52
|
+
- **Context preservation**: Maintain original function context and metadata
|
|
53
|
+
|
|
54
|
+
**Configuration Management:**
|
|
55
|
+
|
|
56
|
+
- **Centralized constants**: All decorator configuration should use constants from this directory
|
|
57
|
+
- **Shared configuration**: Related decorators should share configuration patterns
|
|
58
|
+
- **Environment awareness**: Decorators should work in both development and production environments
|
|
59
|
+
|
|
60
|
+
```python
|
|
61
|
+
# ✅ DO: Proper decorator configuration
|
|
62
|
+
from application_sdk.constants import DEFAULT_LOCK_TTL, DEFAULT_MAX_LOCKS
|
|
63
|
+
|
|
64
|
+
# Shared configuration for lock decorators
|
|
65
|
+
LOCK_CONFIG_KEY = "distributed_lock_config" # Centralized key
|
|
66
|
+
|
|
67
|
+
def distributed_lock(
|
|
68
|
+
lock_name: Optional[str] = None,
|
|
69
|
+
max_locks: int = DEFAULT_MAX_LOCKS,
|
|
70
|
+
ttl_seconds: int = DEFAULT_LOCK_TTL
|
|
71
|
+
):
|
|
72
|
+
"""Distributed lock decorator with proper defaults and configuration."""
|
|
73
|
+
|
|
74
|
+
def decorator(func: Callable[P, T]) -> Callable[P, T]:
|
|
75
|
+
@wraps(func)
|
|
76
|
+
async def wrapper(*args: P.args, **kwargs: P.kwargs) -> T:
|
|
77
|
+
# Use centralized configuration
|
|
78
|
+
actual_lock_name = lock_name or f"{func.__module__}.{func.__name__}"
|
|
79
|
+
|
|
80
|
+
# Store config in activity context using shared key
|
|
81
|
+
activity_info = activity.info()
|
|
82
|
+
activity_info.memo[LOCK_CONFIG_KEY] = {
|
|
83
|
+
"lock_name": actual_lock_name,
|
|
84
|
+
"max_locks": max_locks,
|
|
85
|
+
"ttl_seconds": ttl_seconds
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
return await func(*args, **kwargs)
|
|
89
|
+
return wrapper
|
|
90
|
+
return decorator
|
|
91
|
+
|
|
92
|
+
# ❌ REJECT: Scattered constants and configuration
|
|
93
|
+
def bad_lock_decorator(max_locks=10): # Hardcoded default
|
|
94
|
+
LOCK_KEY = "my_lock_key" # Should be centralized
|
|
95
|
+
# Configuration scattered across files
|
|
96
|
+
```
|
|
97
|
+
|
|
98
|
+
### Phase 3: Decorator Testing Requirements
|
|
99
|
+
|
|
100
|
+
**Comprehensive Decorator Testing:**
|
|
101
|
+
|
|
102
|
+
- **Function preservation**: Test that decorators preserve original function behavior
|
|
103
|
+
- **Type safety**: Verify type hints are maintained after decoration
|
|
104
|
+
- **Error propagation**: Ensure exceptions are properly handled and propagated
|
|
105
|
+
- **Resource cleanup**: Test cleanup behavior in both success and failure cases
|
|
106
|
+
- **Configuration validation**: Test all configuration parameters and edge cases
|
|
107
|
+
|
|
108
|
+
```python
|
|
109
|
+
# ✅ DO: Comprehensive decorator testing
|
|
110
|
+
@pytest.mark.asyncio
|
|
111
|
+
class TestDistributedLockDecorator:
|
|
112
|
+
"""Test suite for distributed lock decorator."""
|
|
113
|
+
|
|
114
|
+
async def test_function_signature_preservation(self):
|
|
115
|
+
"""Test that decorator preserves function signature and types."""
|
|
116
|
+
|
|
117
|
+
@distributed_lock("test_lock")
|
|
118
|
+
async def test_function(param1: str, param2: int = 10) -> dict:
|
|
119
|
+
"""Test function docstring."""
|
|
120
|
+
return {"param1": param1, "param2": param2}
|
|
121
|
+
|
|
122
|
+
# Verify signature preservation
|
|
123
|
+
assert test_function.__name__ == "test_function"
|
|
124
|
+
assert test_function.__doc__ == "Test function docstring."
|
|
125
|
+
|
|
126
|
+
# Verify function still works
|
|
127
|
+
result = await test_function("test", 20)
|
|
128
|
+
assert result == {"param1": "test", "param2": 20}
|
|
129
|
+
|
|
130
|
+
async def test_error_propagation(self):
|
|
131
|
+
"""Test that decorator properly propagates exceptions."""
|
|
132
|
+
|
|
133
|
+
@distributed_lock("error_lock")
|
|
134
|
+
async def failing_function():
|
|
135
|
+
raise ValueError("Test error")
|
|
136
|
+
|
|
137
|
+
# Verify exception is propagated, not swallowed
|
|
138
|
+
with pytest.raises(ValueError, match="Test error"):
|
|
139
|
+
await failing_function()
|
|
140
|
+
|
|
141
|
+
async def test_resource_cleanup_on_failure(self, mock_lock_manager):
|
|
142
|
+
"""Test that resources are cleaned up even when function fails."""
|
|
143
|
+
|
|
144
|
+
@distributed_lock("cleanup_test")
|
|
145
|
+
async def failing_function():
|
|
146
|
+
raise RuntimeError("Simulated failure")
|
|
147
|
+
|
|
148
|
+
mock_lock_manager.acquire_lock.return_value.__aenter__ = AsyncMock()
|
|
149
|
+
mock_lock_manager.acquire_lock.return_value.__aexit__ = AsyncMock()
|
|
150
|
+
|
|
151
|
+
with pytest.raises(RuntimeError):
|
|
152
|
+
await failing_function()
|
|
153
|
+
|
|
154
|
+
# Verify cleanup was called
|
|
155
|
+
mock_lock_manager.acquire_lock.return_value.__aexit__.assert_called_once()
|
|
156
|
+
```
|
|
157
|
+
|
|
158
|
+
### Phase 4: Performance and Integration
|
|
159
|
+
|
|
160
|
+
**Decorator Performance:**
|
|
161
|
+
|
|
162
|
+
- **Minimal overhead**: Decorators should add minimal performance overhead
|
|
163
|
+
- **Async compatibility**: All decorators must work correctly with async functions
|
|
164
|
+
- **Context manager efficiency**: Use efficient context managers for resource management
|
|
165
|
+
- **Caching**: Cache expensive decorator setup operations where appropriate
|
|
166
|
+
|
|
167
|
+
**Integration Patterns:**
|
|
168
|
+
|
|
169
|
+
- **Temporal integration**: Decorators must work correctly with Temporal activities and workflows
|
|
170
|
+
- **Observability integration**: Integrate with logging, metrics, and tracing systems
|
|
171
|
+
- **Error handling integration**: Work correctly with the SDK's error handling patterns
|
|
172
|
+
|
|
173
|
+
### Phase 5: Decorator Maintainability
|
|
174
|
+
|
|
175
|
+
**Code Organization:**
|
|
176
|
+
|
|
177
|
+
- **One decorator type per file**: Keep related decorators together (all lock decorators in locks.py)
|
|
178
|
+
- **Clear naming**: Decorator files should clearly indicate their purpose
|
|
179
|
+
- **Consistent patterns**: All decorators should follow the same structural patterns
|
|
180
|
+
- **Documentation**: Each decorator must have comprehensive docstrings with usage examples
|
|
181
|
+
|
|
182
|
+
**Backwards Compatibility:**
|
|
183
|
+
|
|
184
|
+
- **API stability**: Decorator APIs should be stable across versions
|
|
185
|
+
- **Graceful deprecation**: Deprecated decorators should include migration guidance
|
|
186
|
+
- **Version compatibility**: Support existing usage patterns when adding new features
|
|
187
|
+
|
|
188
|
+
---
|
|
189
|
+
|
|
190
|
+
## Decorator-Specific Anti-Patterns
|
|
191
|
+
|
|
192
|
+
**Always Reject:**
|
|
193
|
+
|
|
194
|
+
- **Scattered decorators**: Decorators defined outside this directory
|
|
195
|
+
- **Missing type safety**: Decorators without proper type annotations
|
|
196
|
+
- **Resource leaks**: Decorators that don't clean up resources properly
|
|
197
|
+
- **Exception swallowing**: Decorators that hide exceptions unintentionally
|
|
198
|
+
- **Poor configuration**: Hardcoded values that should be configurable
|
|
199
|
+
- **No function preservation**: Decorators that don't preserve original function metadata
|
|
200
|
+
|
|
201
|
+
**Centralization Anti-Patterns:**
|
|
202
|
+
|
|
203
|
+
```python
|
|
204
|
+
# ❌ REJECT: Decorators in wrong locations
|
|
205
|
+
# Found in application_sdk/lock/__init__.py
|
|
206
|
+
def needs_lock(max_locks=10):
|
|
207
|
+
"""Should be in decorators/locks.py instead"""
|
|
208
|
+
|
|
209
|
+
# Found in application_sdk/observability/some_module.py
|
|
210
|
+
def trace_activity(func):
|
|
211
|
+
"""Should be in decorators/observability_decorator.py"""
|
|
212
|
+
|
|
213
|
+
# ✅ REQUIRE: Centralized decorators
|
|
214
|
+
# In application_sdk/decorators/locks.py
|
|
215
|
+
def needs_lock(max_locks: int = DEFAULT_MAX_LOCKS):
|
|
216
|
+
"""Properly located distributed lock decorator"""
|
|
217
|
+
|
|
218
|
+
# In application_sdk/decorators/observability_decorator.py
|
|
219
|
+
def observability(logger=None, metrics=None, traces=None):
|
|
220
|
+
"""Properly located observability decorator"""
|
|
221
|
+
```
|
|
222
|
+
|
|
223
|
+
**Type Safety Anti-Patterns:**
|
|
224
|
+
|
|
225
|
+
```python
|
|
226
|
+
# ❌ REJECT: Poor type safety
|
|
227
|
+
def bad_decorator(func): # No type annotations
|
|
228
|
+
def wrapper(*args, **kwargs): # No parameter specifications
|
|
229
|
+
return func(*args, **kwargs)
|
|
230
|
+
return wrapper # Missing @wraps, no return type
|
|
231
|
+
|
|
232
|
+
# ✅ REQUIRE: Proper type safety
|
|
233
|
+
from typing import Callable, TypeVar, ParamSpec
|
|
234
|
+
from functools import wraps
|
|
235
|
+
|
|
236
|
+
P = ParamSpec('P')
|
|
237
|
+
T = TypeVar('T')
|
|
238
|
+
|
|
239
|
+
def good_decorator(func: Callable[P, T]) -> Callable[P, T]:
|
|
240
|
+
@wraps(func)
|
|
241
|
+
def wrapper(*args: P.args, **kwargs: P.kwargs) -> T:
|
|
242
|
+
return func(*args, **kwargs)
|
|
243
|
+
return wrapper
|
|
244
|
+
```
|
|
245
|
+
|
|
246
|
+
**Configuration Anti-Patterns:**
|
|
247
|
+
|
|
248
|
+
```python
|
|
249
|
+
# ❌ REJECT: Scattered configuration
|
|
250
|
+
# Different files using different keys for same concept
|
|
251
|
+
LOCK_KEY_1 = "lock_config" # In locks.py
|
|
252
|
+
LOCK_KEY_2 = "distributed_lock" # In interceptors.py
|
|
253
|
+
DEFAULT_TTL = 300 # Hardcoded in decorator
|
|
254
|
+
|
|
255
|
+
# ✅ REQUIRE: Centralized configuration
|
|
256
|
+
# In application_sdk/constants.py
|
|
257
|
+
DISTRIBUTED_LOCK_CONFIG_KEY = "distributed_lock_config"
|
|
258
|
+
DEFAULT_LOCK_TTL = 300
|
|
259
|
+
DEFAULT_MAX_LOCKS = 10
|
|
260
|
+
|
|
261
|
+
# In decorators using shared constants
|
|
262
|
+
from application_sdk.constants import DISTRIBUTED_LOCK_CONFIG_KEY, DEFAULT_LOCK_TTL
|
|
263
|
+
```
|
|
264
|
+
|
|
265
|
+
## Educational Context for Decorator Reviews
|
|
266
|
+
|
|
267
|
+
When reviewing decorator code, emphasize:
|
|
268
|
+
|
|
269
|
+
1. **Centralization Impact**: "Scattered decorators create maintenance nightmares. When the same decorator logic appears in multiple places, bugs get fixed in some places but not others. Centralization ensures consistency and reduces maintenance burden."
|
|
270
|
+
|
|
271
|
+
2. **Type Safety Impact**: "Decorators that don't preserve type information break IDE support, static analysis, and developer productivity. Proper type annotations are essential for maintaining code quality in large codebases."
|
|
272
|
+
|
|
273
|
+
3. **Resource Management Impact**: "Decorators often manage resources (locks, connections, contexts). Poor resource management in decorators can cause system-wide issues because they're used across many functions."
|
|
274
|
+
|
|
275
|
+
4. **Function Preservation Impact**: "Decorators that don't preserve original function metadata break debugging, introspection, and documentation tools. Using @functools.wraps is not optional."
|
|
276
|
+
|
|
277
|
+
5. **Testing Impact**: "Decorators are cross-cutting concerns that affect many functions. Bugs in decorators have amplified impact, making thorough testing especially critical."
|
|
278
|
+
|
|
279
|
+
6. **Performance Impact**: "Decorators add overhead to every function call they wrap. Inefficient decorators can degrade system performance across the entire application."
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
from abc import ABC, abstractmethod
|
|
2
|
-
from typing import Any
|
|
2
|
+
from typing import Any, Dict
|
|
3
3
|
|
|
4
4
|
|
|
5
5
|
class HandlerInterface(ABC):
|
|
@@ -37,3 +37,10 @@ class HandlerInterface(ABC):
|
|
|
37
37
|
To be implemented by the subclass
|
|
38
38
|
"""
|
|
39
39
|
raise NotImplementedError("fetch_metadata method not implemented")
|
|
40
|
+
|
|
41
|
+
@staticmethod
|
|
42
|
+
async def get_configmap(config_map_id: str) -> Dict[str, Any]:
|
|
43
|
+
"""
|
|
44
|
+
Static method to get the configmap
|
|
45
|
+
"""
|
|
46
|
+
return {}
|
application_sdk/handlers/sql.py
CHANGED
|
@@ -56,9 +56,13 @@ class BaseSQLHandler(HandlerInterface):
|
|
|
56
56
|
schema_alias_key: str = SQLConstants.SCHEMA_ALIAS_KEY.value
|
|
57
57
|
database_result_key: str = SQLConstants.DATABASE_RESULT_KEY.value
|
|
58
58
|
schema_result_key: str = SQLConstants.SCHEMA_RESULT_KEY.value
|
|
59
|
+
multidb: bool = False
|
|
59
60
|
|
|
60
|
-
def __init__(
|
|
61
|
+
def __init__(
|
|
62
|
+
self, sql_client: BaseSQLClient | None = None, multidb: Optional[bool] = False
|
|
63
|
+
):
|
|
61
64
|
self.sql_client = sql_client
|
|
65
|
+
self.multidb = multidb
|
|
62
66
|
|
|
63
67
|
async def load(self, credentials: Dict[str, Any]) -> None:
|
|
64
68
|
"""
|
|
@@ -294,35 +298,26 @@ class BaseSQLHandler(HandlerInterface):
|
|
|
294
298
|
return False, f"{db}.{sch} schema"
|
|
295
299
|
return True, ""
|
|
296
300
|
|
|
297
|
-
async def tables_check(
|
|
298
|
-
self,
|
|
299
|
-
payload: Dict[str, Any],
|
|
300
|
-
) -> Dict[str, Any]:
|
|
301
|
+
async def tables_check(self, payload: Dict[str, Any]) -> Dict[str, Any]:
|
|
301
302
|
"""
|
|
302
303
|
Method to check the count of tables
|
|
303
304
|
"""
|
|
304
305
|
logger.info("Starting tables check")
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
engine=self.sql_client.engine, query=query, chunk_size=None
|
|
314
|
-
)
|
|
315
|
-
sql_input = await sql_input.get_dataframe()
|
|
316
|
-
try:
|
|
317
|
-
result = 0
|
|
318
|
-
for row in sql_input.to_dict(orient="records"):
|
|
319
|
-
result += row["count"]
|
|
306
|
+
|
|
307
|
+
def _sum_counts_from_records(records_iter) -> int:
|
|
308
|
+
total = 0
|
|
309
|
+
for row in records_iter:
|
|
310
|
+
total += row["count"]
|
|
311
|
+
return total
|
|
312
|
+
|
|
313
|
+
def _build_success(total: int) -> Dict[str, Any]:
|
|
320
314
|
return {
|
|
321
315
|
"success": True,
|
|
322
|
-
"successMessage": f"Tables check successful. Table count: {
|
|
316
|
+
"successMessage": f"Tables check successful. Table count: {total}",
|
|
323
317
|
"failureMessage": "",
|
|
324
318
|
}
|
|
325
|
-
|
|
319
|
+
|
|
320
|
+
def _build_failure(exc: Exception) -> Dict[str, Any]:
|
|
326
321
|
logger.error("Error during tables check", exc_info=True)
|
|
327
322
|
return {
|
|
328
323
|
"success": False,
|
|
@@ -331,6 +326,52 @@ class BaseSQLHandler(HandlerInterface):
|
|
|
331
326
|
"error": str(exc),
|
|
332
327
|
}
|
|
333
328
|
|
|
329
|
+
if self.multidb:
|
|
330
|
+
try:
|
|
331
|
+
from application_sdk.activities.metadata_extraction.sql import (
|
|
332
|
+
BaseSQLMetadataExtractionActivities,
|
|
333
|
+
)
|
|
334
|
+
|
|
335
|
+
# Use the base query executor in multidb mode to get concatenated df
|
|
336
|
+
activities = BaseSQLMetadataExtractionActivities()
|
|
337
|
+
activities.multidb = True
|
|
338
|
+
concatenated_df = await activities.query_executor(
|
|
339
|
+
sql_engine=self.sql_client.engine if self.sql_client else None,
|
|
340
|
+
sql_query=self.tables_check_sql,
|
|
341
|
+
workflow_args=payload,
|
|
342
|
+
output_suffix="raw/table",
|
|
343
|
+
typename="table",
|
|
344
|
+
write_to_file=False,
|
|
345
|
+
concatenate=True,
|
|
346
|
+
return_dataframe=True,
|
|
347
|
+
sql_client=self.sql_client,
|
|
348
|
+
)
|
|
349
|
+
|
|
350
|
+
if concatenated_df is None:
|
|
351
|
+
return _build_success(0)
|
|
352
|
+
|
|
353
|
+
total = int(concatenated_df["count"].sum()) # type: ignore[index]
|
|
354
|
+
return _build_success(total)
|
|
355
|
+
except Exception as exc:
|
|
356
|
+
return _build_failure(exc)
|
|
357
|
+
else:
|
|
358
|
+
query = prepare_query(
|
|
359
|
+
query=self.tables_check_sql,
|
|
360
|
+
workflow_args=payload,
|
|
361
|
+
temp_table_regex_sql=self.extract_temp_table_regex_table_sql,
|
|
362
|
+
)
|
|
363
|
+
if not query:
|
|
364
|
+
raise ValueError("tables_check_sql is not defined")
|
|
365
|
+
sql_input = SQLQueryInput(
|
|
366
|
+
engine=self.sql_client.engine, query=query, chunk_size=None
|
|
367
|
+
)
|
|
368
|
+
sql_input = await sql_input.get_dataframe()
|
|
369
|
+
try:
|
|
370
|
+
total = _sum_counts_from_records(sql_input.to_dict(orient="records"))
|
|
371
|
+
return _build_success(total)
|
|
372
|
+
except Exception as exc:
|
|
373
|
+
return _build_failure(exc)
|
|
374
|
+
|
|
334
375
|
async def check_client_version(self) -> Dict[str, Any]:
|
|
335
376
|
"""
|
|
336
377
|
Check if the client version meets the minimum required version.
|