atlan-application-sdk 0.1.1rc39__py3-none-any.whl → 0.1.1rc41__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. application_sdk/activities/.cursor/BUGBOT.md +424 -0
  2. application_sdk/activities/metadata_extraction/sql.py +400 -25
  3. application_sdk/application/__init__.py +2 -0
  4. application_sdk/application/metadata_extraction/sql.py +3 -0
  5. application_sdk/clients/.cursor/BUGBOT.md +280 -0
  6. application_sdk/clients/models.py +42 -0
  7. application_sdk/clients/sql.py +127 -87
  8. application_sdk/clients/temporal.py +3 -1
  9. application_sdk/common/.cursor/BUGBOT.md +316 -0
  10. application_sdk/common/aws_utils.py +259 -11
  11. application_sdk/common/utils.py +145 -9
  12. application_sdk/constants.py +8 -0
  13. application_sdk/decorators/.cursor/BUGBOT.md +279 -0
  14. application_sdk/handlers/__init__.py +8 -1
  15. application_sdk/handlers/sql.py +63 -22
  16. application_sdk/inputs/.cursor/BUGBOT.md +250 -0
  17. application_sdk/interceptors/.cursor/BUGBOT.md +320 -0
  18. application_sdk/interceptors/cleanup.py +171 -0
  19. application_sdk/interceptors/events.py +6 -6
  20. application_sdk/observability/decorators/observability_decorator.py +36 -22
  21. application_sdk/outputs/.cursor/BUGBOT.md +295 -0
  22. application_sdk/outputs/iceberg.py +4 -0
  23. application_sdk/outputs/json.py +6 -0
  24. application_sdk/outputs/parquet.py +13 -3
  25. application_sdk/server/.cursor/BUGBOT.md +442 -0
  26. application_sdk/server/fastapi/__init__.py +59 -3
  27. application_sdk/server/fastapi/models.py +27 -0
  28. application_sdk/services/objectstore.py +16 -3
  29. application_sdk/version.py +1 -1
  30. application_sdk/workflows/.cursor/BUGBOT.md +218 -0
  31. {atlan_application_sdk-0.1.1rc39.dist-info → atlan_application_sdk-0.1.1rc41.dist-info}/METADATA +1 -1
  32. {atlan_application_sdk-0.1.1rc39.dist-info → atlan_application_sdk-0.1.1rc41.dist-info}/RECORD +35 -24
  33. {atlan_application_sdk-0.1.1rc39.dist-info → atlan_application_sdk-0.1.1rc41.dist-info}/WHEEL +0 -0
  34. {atlan_application_sdk-0.1.1rc39.dist-info → atlan_application_sdk-0.1.1rc41.dist-info}/licenses/LICENSE +0 -0
  35. {atlan_application_sdk-0.1.1rc39.dist-info → atlan_application_sdk-0.1.1rc41.dist-info}/licenses/NOTICE +0 -0
@@ -17,8 +17,12 @@ from typing import (
17
17
  Union,
18
18
  )
19
19
 
20
+ from application_sdk.activities.common.utils import get_object_store_prefix
20
21
  from application_sdk.common.error_codes import CommonError
22
+ from application_sdk.constants import TEMPORARY_PATH
23
+ from application_sdk.inputs.sql_query import SQLQueryInput
21
24
  from application_sdk.observability.logger_adaptor import get_logger
25
+ from application_sdk.services.objectstore import ObjectStore
22
26
 
23
27
  logger = get_logger(__name__)
24
28
 
@@ -106,10 +110,42 @@ def extract_database_names_from_regex_common(
106
110
  return empty_default
107
111
 
108
112
 
113
+ def transform_posix_regex(regex_pattern: str) -> str:
114
+ r"""
115
+ Transform regex pattern for POSIX compatibility.
116
+
117
+ Rules:
118
+ 1. Add ^ before each database name before \.
119
+ 2. Add an additional . between \. and * if * follows \.
120
+
121
+ Example: 'dev\.public$|dev\.atlan_test_schema$|wide_world_importers\.*'
122
+ Becomes: '^dev\.public$|^dev\.atlan_test_schema$|^wide_world_importers\..*'
123
+ """
124
+ if not regex_pattern:
125
+ return regex_pattern
126
+
127
+ # Split by | to handle each pattern separately
128
+ patterns = regex_pattern.split("|")
129
+ transformed_patterns = []
130
+
131
+ for pattern in patterns:
132
+ # Add ^ at the beginning if it's not already there
133
+ if not pattern.startswith("^"):
134
+ pattern = "^" + pattern
135
+
136
+ # Add additional . between \. and * if * follows \.
137
+ pattern = re.sub(r"\\\.\*", r"\..*", pattern)
138
+
139
+ transformed_patterns.append(pattern)
140
+
141
+ return "|".join(transformed_patterns)
142
+
143
+
109
144
  def prepare_query(
110
145
  query: Optional[str],
111
146
  workflow_args: Dict[str, Any],
112
147
  temp_table_regex_sql: Optional[str] = "",
148
+ use_posix_regex: Optional[bool] = False,
113
149
  ) -> Optional[str]:
114
150
  """
115
151
  Prepares a SQL query by applying include and exclude filters, and optional
@@ -158,6 +194,14 @@ def prepare_query(
158
194
  include_filter, exclude_filter
159
195
  )
160
196
 
197
+ if use_posix_regex:
198
+ normalized_include_regex_posix = transform_posix_regex(
199
+ normalized_include_regex
200
+ )
201
+ normalized_exclude_regex_posix = transform_posix_regex(
202
+ normalized_exclude_regex
203
+ )
204
+
161
205
  # Extract database names from the normalized regex patterns
162
206
  include_databases = extract_database_names_from_regex_common(
163
207
  normalized_regex=normalized_include_regex,
@@ -176,15 +220,26 @@ def prepare_query(
176
220
  )
177
221
  exclude_views = workflow_args.get("metadata", {}).get("exclude_views", False)
178
222
 
179
- return query.format(
180
- include_databases=include_databases,
181
- exclude_databases=exclude_databases,
182
- normalized_include_regex=normalized_include_regex,
183
- normalized_exclude_regex=normalized_exclude_regex,
184
- temp_table_regex_sql=temp_table_regex_sql,
185
- exclude_empty_tables=exclude_empty_tables,
186
- exclude_views=exclude_views,
187
- )
223
+ if use_posix_regex:
224
+ return query.format(
225
+ include_databases=include_databases,
226
+ exclude_databases=exclude_databases,
227
+ normalized_include_regex=normalized_include_regex_posix,
228
+ normalized_exclude_regex=normalized_exclude_regex_posix,
229
+ temp_table_regex_sql=temp_table_regex_sql,
230
+ exclude_empty_tables=exclude_empty_tables,
231
+ exclude_views=exclude_views,
232
+ )
233
+ else:
234
+ return query.format(
235
+ include_databases=include_databases,
236
+ exclude_databases=exclude_databases,
237
+ normalized_include_regex=normalized_include_regex,
238
+ normalized_exclude_regex=normalized_exclude_regex,
239
+ temp_table_regex_sql=temp_table_regex_sql,
240
+ exclude_empty_tables=exclude_empty_tables,
241
+ exclude_views=exclude_views,
242
+ )
188
243
  except CommonError as e:
189
244
  # Extract the original error message from the CommonError
190
245
  error_message = str(e).split(": ", 1)[-1] if ": " in str(e) else str(e)
@@ -195,6 +250,47 @@ def prepare_query(
195
250
  return None
196
251
 
197
252
 
253
+ async def get_database_names(
254
+ sql_client, workflow_args, fetch_database_sql
255
+ ) -> Optional[List[str]]:
256
+ """
257
+ Get the database names from the workflow args if include-filter is present
258
+ Args:
259
+ workflow_args: The workflow args
260
+ Returns:
261
+ List[str]: The database names
262
+ """
263
+ database_names = parse_filter_input(
264
+ workflow_args.get("metadata", {}).get("include-filter", {})
265
+ )
266
+
267
+ database_names = [
268
+ re.sub(r"^[^\w]+|[^\w]+$", "", database_name)
269
+ for database_name in database_names
270
+ ]
271
+ if not database_names:
272
+ # if database_names are not provided in the include-filter, we'll run the query to get all the database names
273
+ # because by default for an empty include-filter, we fetch details corresponding to all the databases.
274
+ temp_table_regex_sql = workflow_args.get("metadata", {}).get(
275
+ "temp-table-regex", ""
276
+ )
277
+ prepared_query = prepare_query(
278
+ query=fetch_database_sql,
279
+ workflow_args=workflow_args,
280
+ temp_table_regex_sql=temp_table_regex_sql,
281
+ use_posix_regex=True,
282
+ )
283
+ # We'll run the query to get all the database names
284
+ database_sql_input = SQLQueryInput(
285
+ engine=sql_client.engine,
286
+ query=prepared_query, # type: ignore
287
+ chunk_size=None,
288
+ )
289
+ database_dataframe = await database_sql_input.get_dataframe()
290
+ database_names = list(database_dataframe["database_name"])
291
+ return database_names
292
+
293
+
198
294
  def parse_filter_input(
199
295
  filter_input: Union[str, Dict[str, Any], None],
200
296
  ) -> Dict[str, Any]:
@@ -416,6 +512,46 @@ def parse_credentials_extra(credentials: Dict[str, Any]) -> Dict[str, Any]:
416
512
  return extra # We know it's a Dict[str, Any] due to the Union type and str check
417
513
 
418
514
 
515
+ def has_custom_control_config(workflow_args: Dict[str, Any]) -> bool:
516
+ """
517
+ Check if custom control configuration is present in workflow arguments.
518
+
519
+ Args:
520
+ workflow_args: The workflow arguments
521
+
522
+ Returns:
523
+ bool: True if custom control configuration is present, False otherwise
524
+ """
525
+ return (
526
+ workflow_args.get("control-config-strategy") == "custom"
527
+ and workflow_args.get("control-config") is not None
528
+ )
529
+
530
+
531
+ async def get_file_names(output_path: str, typename: str) -> List[str]:
532
+ """
533
+ Get file names for a specific asset type from the transformed directory.
534
+
535
+ Args:
536
+ output_path (str): The base output path
537
+ typename (str): The asset type (e.g., 'table', 'schema', 'column')
538
+
539
+ Returns:
540
+ List[str]: List of relative file paths for the asset type
541
+ """
542
+
543
+ source = get_object_store_prefix(os.path.join(output_path, typename))
544
+ await ObjectStore.download_prefix(source, TEMPORARY_PATH)
545
+
546
+ file_pattern = os.path.join(output_path, typename, "*.json")
547
+ file_names = glob.glob(file_pattern)
548
+ file_name_list = [
549
+ "/".join(file_name.rsplit("/", 2)[-2:]) for file_name in file_names
550
+ ]
551
+
552
+ return file_name_list
553
+
554
+
419
555
  def run_sync(func):
420
556
  """Run a function in a thread pool executor.
421
557
 
@@ -59,6 +59,14 @@ WORKFLOW_OUTPUT_PATH_TEMPLATE = (
59
59
  # Temporary Path (used to store intermediate files)
60
60
  TEMPORARY_PATH = os.getenv("ATLAN_TEMPORARY_PATH", "./local/tmp/")
61
61
 
62
+ # Cleanup Paths (custom paths for cleanup operations, supports multiple paths separated by comma)
63
+ # If empty, cleanup activities will default to workflow-specific paths at runtime
64
+ CLEANUP_BASE_PATHS = [
65
+ path.strip()
66
+ for path in os.getenv("ATLAN_CLEANUP_BASE_PATHS", "").split(",")
67
+ if path.strip()
68
+ ]
69
+
62
70
  # State Store Constants
63
71
  #: Path template for state store files (example: objectstore://bucket/persistent-artifacts/apps/{application_name}/{state_type}/{id}/config.json)
64
72
  STATE_STORE_PATH_TEMPLATE = (
@@ -0,0 +1,279 @@
1
+ # Decorator Code Review Guidelines - Centralized Function Decorators
2
+
3
+ ## Context-Specific Patterns
4
+
5
+ This directory contains all decorator implementations for the Application SDK. Decorators must be centralized here to avoid scattered functionality and ensure consistent patterns.
6
+
7
+ ### Phase 1: Critical Decorator Safety Issues
8
+
9
+ **Decorator Centralization:**
10
+
11
+ - **ALL decorators must be in this directory**: No decorators should exist in other modules (lock/, observability/, etc.)
12
+ - **Consolidate scattered decorators**: If decorators are found elsewhere, they must be moved here
13
+ - **Single responsibility per file**: Each decorator type should have its own file (locks.py, observability_decorator.py)
14
+ - **Proper imports**: Other modules should import decorators from here, not define their own
15
+
16
+ **Type Safety and Function Signatures:**
17
+
18
+ - All decorators must preserve function signatures and type hints
19
+ - Use `functools.wraps` to maintain function metadata
20
+ - Generic decorators must use proper type annotations
21
+ - Return types must match the original function's return type
22
+
23
+ ```python
24
+ # ✅ DO: Proper decorator type safety
25
+ from typing import Callable, Any, TypeVar, ParamSpec
26
+ from functools import wraps
27
+
28
+ P = ParamSpec('P')
29
+ T = TypeVar('T')
30
+
31
+ def my_decorator(func: Callable[P, T]) -> Callable[P, T]:
32
+ @wraps(func)
33
+ def wrapper(*args: P.args, **kwargs: P.kwargs) -> T:
34
+ # Decorator logic
35
+ return func(*args, **kwargs)
36
+ return wrapper
37
+
38
+ # ❌ NEVER: Poor type annotations
39
+ def bad_decorator(func): # No type hints
40
+ def wrapper(*args, **kwargs): # No type preservation
41
+ return func(*args, **kwargs)
42
+ return wrapper # Missing @wraps
43
+ ```
44
+
45
+ ### Phase 2: Decorator Architecture Patterns
46
+
47
+ **Proper Decorator Structure:**
48
+
49
+ - **Parameterized decorators**: Support both `@decorator` and `@decorator(param=value)` usage patterns
50
+ - **Error handling**: Decorators must not swallow exceptions unless explicitly designed to do so
51
+ - **Resource cleanup**: Decorators that acquire resources must ensure cleanup in finally blocks
52
+ - **Context preservation**: Maintain original function context and metadata
53
+
54
+ **Configuration Management:**
55
+
56
+ - **Centralized constants**: All decorator configuration should use constants from this directory
57
+ - **Shared configuration**: Related decorators should share configuration patterns
58
+ - **Environment awareness**: Decorators should work in both development and production environments
59
+
60
+ ```python
61
+ # ✅ DO: Proper decorator configuration
62
+ from application_sdk.constants import DEFAULT_LOCK_TTL, DEFAULT_MAX_LOCKS
63
+
64
+ # Shared configuration for lock decorators
65
+ LOCK_CONFIG_KEY = "distributed_lock_config" # Centralized key
66
+
67
+ def distributed_lock(
68
+ lock_name: Optional[str] = None,
69
+ max_locks: int = DEFAULT_MAX_LOCKS,
70
+ ttl_seconds: int = DEFAULT_LOCK_TTL
71
+ ):
72
+ """Distributed lock decorator with proper defaults and configuration."""
73
+
74
+ def decorator(func: Callable[P, T]) -> Callable[P, T]:
75
+ @wraps(func)
76
+ async def wrapper(*args: P.args, **kwargs: P.kwargs) -> T:
77
+ # Use centralized configuration
78
+ actual_lock_name = lock_name or f"{func.__module__}.{func.__name__}"
79
+
80
+ # Store config in activity context using shared key
81
+ activity_info = activity.info()
82
+ activity_info.memo[LOCK_CONFIG_KEY] = {
83
+ "lock_name": actual_lock_name,
84
+ "max_locks": max_locks,
85
+ "ttl_seconds": ttl_seconds
86
+ }
87
+
88
+ return await func(*args, **kwargs)
89
+ return wrapper
90
+ return decorator
91
+
92
+ # ❌ REJECT: Scattered constants and configuration
93
+ def bad_lock_decorator(max_locks=10): # Hardcoded default
94
+ LOCK_KEY = "my_lock_key" # Should be centralized
95
+ # Configuration scattered across files
96
+ ```
97
+
98
+ ### Phase 3: Decorator Testing Requirements
99
+
100
+ **Comprehensive Decorator Testing:**
101
+
102
+ - **Function preservation**: Test that decorators preserve original function behavior
103
+ - **Type safety**: Verify type hints are maintained after decoration
104
+ - **Error propagation**: Ensure exceptions are properly handled and propagated
105
+ - **Resource cleanup**: Test cleanup behavior in both success and failure cases
106
+ - **Configuration validation**: Test all configuration parameters and edge cases
107
+
108
+ ```python
109
+ # ✅ DO: Comprehensive decorator testing
110
+ @pytest.mark.asyncio
111
+ class TestDistributedLockDecorator:
112
+ """Test suite for distributed lock decorator."""
113
+
114
+ async def test_function_signature_preservation(self):
115
+ """Test that decorator preserves function signature and types."""
116
+
117
+ @distributed_lock("test_lock")
118
+ async def test_function(param1: str, param2: int = 10) -> dict:
119
+ """Test function docstring."""
120
+ return {"param1": param1, "param2": param2}
121
+
122
+ # Verify signature preservation
123
+ assert test_function.__name__ == "test_function"
124
+ assert test_function.__doc__ == "Test function docstring."
125
+
126
+ # Verify function still works
127
+ result = await test_function("test", 20)
128
+ assert result == {"param1": "test", "param2": 20}
129
+
130
+ async def test_error_propagation(self):
131
+ """Test that decorator properly propagates exceptions."""
132
+
133
+ @distributed_lock("error_lock")
134
+ async def failing_function():
135
+ raise ValueError("Test error")
136
+
137
+ # Verify exception is propagated, not swallowed
138
+ with pytest.raises(ValueError, match="Test error"):
139
+ await failing_function()
140
+
141
+ async def test_resource_cleanup_on_failure(self, mock_lock_manager):
142
+ """Test that resources are cleaned up even when function fails."""
143
+
144
+ @distributed_lock("cleanup_test")
145
+ async def failing_function():
146
+ raise RuntimeError("Simulated failure")
147
+
148
+ mock_lock_manager.acquire_lock.return_value.__aenter__ = AsyncMock()
149
+ mock_lock_manager.acquire_lock.return_value.__aexit__ = AsyncMock()
150
+
151
+ with pytest.raises(RuntimeError):
152
+ await failing_function()
153
+
154
+ # Verify cleanup was called
155
+ mock_lock_manager.acquire_lock.return_value.__aexit__.assert_called_once()
156
+ ```
157
+
158
+ ### Phase 4: Performance and Integration
159
+
160
+ **Decorator Performance:**
161
+
162
+ - **Minimal overhead**: Decorators should add minimal performance overhead
163
+ - **Async compatibility**: All decorators must work correctly with async functions
164
+ - **Context manager efficiency**: Use efficient context managers for resource management
165
+ - **Caching**: Cache expensive decorator setup operations where appropriate
166
+
167
+ **Integration Patterns:**
168
+
169
+ - **Temporal integration**: Decorators must work correctly with Temporal activities and workflows
170
+ - **Observability integration**: Integrate with logging, metrics, and tracing systems
171
+ - **Error handling integration**: Work correctly with the SDK's error handling patterns
172
+
173
+ ### Phase 5: Decorator Maintainability
174
+
175
+ **Code Organization:**
176
+
177
+ - **One decorator type per file**: Keep related decorators together (all lock decorators in locks.py)
178
+ - **Clear naming**: Decorator files should clearly indicate their purpose
179
+ - **Consistent patterns**: All decorators should follow the same structural patterns
180
+ - **Documentation**: Each decorator must have comprehensive docstrings with usage examples
181
+
182
+ **Backwards Compatibility:**
183
+
184
+ - **API stability**: Decorator APIs should be stable across versions
185
+ - **Graceful deprecation**: Deprecated decorators should include migration guidance
186
+ - **Version compatibility**: Support existing usage patterns when adding new features
187
+
188
+ ---
189
+
190
+ ## Decorator-Specific Anti-Patterns
191
+
192
+ **Always Reject:**
193
+
194
+ - **Scattered decorators**: Decorators defined outside this directory
195
+ - **Missing type safety**: Decorators without proper type annotations
196
+ - **Resource leaks**: Decorators that don't clean up resources properly
197
+ - **Exception swallowing**: Decorators that hide exceptions unintentionally
198
+ - **Poor configuration**: Hardcoded values that should be configurable
199
+ - **No function preservation**: Decorators that don't preserve original function metadata
200
+
201
+ **Centralization Anti-Patterns:**
202
+
203
+ ```python
204
+ # ❌ REJECT: Decorators in wrong locations
205
+ # Found in application_sdk/lock/__init__.py
206
+ def needs_lock(max_locks=10):
207
+ """Should be in decorators/locks.py instead"""
208
+
209
+ # Found in application_sdk/observability/some_module.py
210
+ def trace_activity(func):
211
+ """Should be in decorators/observability_decorator.py"""
212
+
213
+ # ✅ REQUIRE: Centralized decorators
214
+ # In application_sdk/decorators/locks.py
215
+ def needs_lock(max_locks: int = DEFAULT_MAX_LOCKS):
216
+ """Properly located distributed lock decorator"""
217
+
218
+ # In application_sdk/decorators/observability_decorator.py
219
+ def observability(logger=None, metrics=None, traces=None):
220
+ """Properly located observability decorator"""
221
+ ```
222
+
223
+ **Type Safety Anti-Patterns:**
224
+
225
+ ```python
226
+ # ❌ REJECT: Poor type safety
227
+ def bad_decorator(func): # No type annotations
228
+ def wrapper(*args, **kwargs): # No parameter specifications
229
+ return func(*args, **kwargs)
230
+ return wrapper # Missing @wraps, no return type
231
+
232
+ # ✅ REQUIRE: Proper type safety
233
+ from typing import Callable, TypeVar, ParamSpec
234
+ from functools import wraps
235
+
236
+ P = ParamSpec('P')
237
+ T = TypeVar('T')
238
+
239
+ def good_decorator(func: Callable[P, T]) -> Callable[P, T]:
240
+ @wraps(func)
241
+ def wrapper(*args: P.args, **kwargs: P.kwargs) -> T:
242
+ return func(*args, **kwargs)
243
+ return wrapper
244
+ ```
245
+
246
+ **Configuration Anti-Patterns:**
247
+
248
+ ```python
249
+ # ❌ REJECT: Scattered configuration
250
+ # Different files using different keys for same concept
251
+ LOCK_KEY_1 = "lock_config" # In locks.py
252
+ LOCK_KEY_2 = "distributed_lock" # In interceptors.py
253
+ DEFAULT_TTL = 300 # Hardcoded in decorator
254
+
255
+ # ✅ REQUIRE: Centralized configuration
256
+ # In application_sdk/constants.py
257
+ DISTRIBUTED_LOCK_CONFIG_KEY = "distributed_lock_config"
258
+ DEFAULT_LOCK_TTL = 300
259
+ DEFAULT_MAX_LOCKS = 10
260
+
261
+ # In decorators using shared constants
262
+ from application_sdk.constants import DISTRIBUTED_LOCK_CONFIG_KEY, DEFAULT_LOCK_TTL
263
+ ```
264
+
265
+ ## Educational Context for Decorator Reviews
266
+
267
+ When reviewing decorator code, emphasize:
268
+
269
+ 1. **Centralization Impact**: "Scattered decorators create maintenance nightmares. When the same decorator logic appears in multiple places, bugs get fixed in some places but not others. Centralization ensures consistency and reduces maintenance burden."
270
+
271
+ 2. **Type Safety Impact**: "Decorators that don't preserve type information break IDE support, static analysis, and developer productivity. Proper type annotations are essential for maintaining code quality in large codebases."
272
+
273
+ 3. **Resource Management Impact**: "Decorators often manage resources (locks, connections, contexts). Poor resource management in decorators can cause system-wide issues because they're used across many functions."
274
+
275
+ 4. **Function Preservation Impact**: "Decorators that don't preserve original function metadata break debugging, introspection, and documentation tools. Using @functools.wraps is not optional."
276
+
277
+ 5. **Testing Impact**: "Decorators are cross-cutting concerns that affect many functions. Bugs in decorators have amplified impact, making thorough testing especially critical."
278
+
279
+ 6. **Performance Impact**: "Decorators add overhead to every function call they wrap. Inefficient decorators can degrade system performance across the entire application."
@@ -1,5 +1,5 @@
1
1
  from abc import ABC, abstractmethod
2
- from typing import Any
2
+ from typing import Any, Dict
3
3
 
4
4
 
5
5
  class HandlerInterface(ABC):
@@ -37,3 +37,10 @@ class HandlerInterface(ABC):
37
37
  To be implemented by the subclass
38
38
  """
39
39
  raise NotImplementedError("fetch_metadata method not implemented")
40
+
41
+ @staticmethod
42
+ async def get_configmap(config_map_id: str) -> Dict[str, Any]:
43
+ """
44
+ Static method to get the configmap
45
+ """
46
+ return {}
@@ -56,9 +56,13 @@ class BaseSQLHandler(HandlerInterface):
56
56
  schema_alias_key: str = SQLConstants.SCHEMA_ALIAS_KEY.value
57
57
  database_result_key: str = SQLConstants.DATABASE_RESULT_KEY.value
58
58
  schema_result_key: str = SQLConstants.SCHEMA_RESULT_KEY.value
59
+ multidb: bool = False
59
60
 
60
- def __init__(self, sql_client: BaseSQLClient | None = None):
61
+ def __init__(
62
+ self, sql_client: BaseSQLClient | None = None, multidb: Optional[bool] = False
63
+ ):
61
64
  self.sql_client = sql_client
65
+ self.multidb = multidb
62
66
 
63
67
  async def load(self, credentials: Dict[str, Any]) -> None:
64
68
  """
@@ -294,35 +298,26 @@ class BaseSQLHandler(HandlerInterface):
294
298
  return False, f"{db}.{sch} schema"
295
299
  return True, ""
296
300
 
297
- async def tables_check(
298
- self,
299
- payload: Dict[str, Any],
300
- ) -> Dict[str, Any]:
301
+ async def tables_check(self, payload: Dict[str, Any]) -> Dict[str, Any]:
301
302
  """
302
303
  Method to check the count of tables
303
304
  """
304
305
  logger.info("Starting tables check")
305
- query = prepare_query(
306
- query=self.tables_check_sql,
307
- workflow_args=payload,
308
- temp_table_regex_sql=self.extract_temp_table_regex_table_sql,
309
- )
310
- if not query:
311
- raise ValueError("tables_check_sql is not defined")
312
- sql_input = SQLQueryInput(
313
- engine=self.sql_client.engine, query=query, chunk_size=None
314
- )
315
- sql_input = await sql_input.get_dataframe()
316
- try:
317
- result = 0
318
- for row in sql_input.to_dict(orient="records"):
319
- result += row["count"]
306
+
307
+ def _sum_counts_from_records(records_iter) -> int:
308
+ total = 0
309
+ for row in records_iter:
310
+ total += row["count"]
311
+ return total
312
+
313
+ def _build_success(total: int) -> Dict[str, Any]:
320
314
  return {
321
315
  "success": True,
322
- "successMessage": f"Tables check successful. Table count: {result}",
316
+ "successMessage": f"Tables check successful. Table count: {total}",
323
317
  "failureMessage": "",
324
318
  }
325
- except Exception as exc:
319
+
320
+ def _build_failure(exc: Exception) -> Dict[str, Any]:
326
321
  logger.error("Error during tables check", exc_info=True)
327
322
  return {
328
323
  "success": False,
@@ -331,6 +326,52 @@ class BaseSQLHandler(HandlerInterface):
331
326
  "error": str(exc),
332
327
  }
333
328
 
329
+ if self.multidb:
330
+ try:
331
+ from application_sdk.activities.metadata_extraction.sql import (
332
+ BaseSQLMetadataExtractionActivities,
333
+ )
334
+
335
+ # Use the base query executor in multidb mode to get concatenated df
336
+ activities = BaseSQLMetadataExtractionActivities()
337
+ activities.multidb = True
338
+ concatenated_df = await activities.query_executor(
339
+ sql_engine=self.sql_client.engine if self.sql_client else None,
340
+ sql_query=self.tables_check_sql,
341
+ workflow_args=payload,
342
+ output_suffix="raw/table",
343
+ typename="table",
344
+ write_to_file=False,
345
+ concatenate=True,
346
+ return_dataframe=True,
347
+ sql_client=self.sql_client,
348
+ )
349
+
350
+ if concatenated_df is None:
351
+ return _build_success(0)
352
+
353
+ total = int(concatenated_df["count"].sum()) # type: ignore[index]
354
+ return _build_success(total)
355
+ except Exception as exc:
356
+ return _build_failure(exc)
357
+ else:
358
+ query = prepare_query(
359
+ query=self.tables_check_sql,
360
+ workflow_args=payload,
361
+ temp_table_regex_sql=self.extract_temp_table_regex_table_sql,
362
+ )
363
+ if not query:
364
+ raise ValueError("tables_check_sql is not defined")
365
+ sql_input = SQLQueryInput(
366
+ engine=self.sql_client.engine, query=query, chunk_size=None
367
+ )
368
+ sql_input = await sql_input.get_dataframe()
369
+ try:
370
+ total = _sum_counts_from_records(sql_input.to_dict(orient="records"))
371
+ return _build_success(total)
372
+ except Exception as exc:
373
+ return _build_failure(exc)
374
+
334
375
  async def check_client_version(self) -> Dict[str, Any]:
335
376
  """
336
377
  Check if the client version meets the minimum required version.