atlan-application-sdk 0.1.1rc39__py3-none-any.whl → 0.1.1rc41__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- application_sdk/activities/.cursor/BUGBOT.md +424 -0
- application_sdk/activities/metadata_extraction/sql.py +400 -25
- application_sdk/application/__init__.py +2 -0
- application_sdk/application/metadata_extraction/sql.py +3 -0
- application_sdk/clients/.cursor/BUGBOT.md +280 -0
- application_sdk/clients/models.py +42 -0
- application_sdk/clients/sql.py +127 -87
- application_sdk/clients/temporal.py +3 -1
- application_sdk/common/.cursor/BUGBOT.md +316 -0
- application_sdk/common/aws_utils.py +259 -11
- application_sdk/common/utils.py +145 -9
- application_sdk/constants.py +8 -0
- application_sdk/decorators/.cursor/BUGBOT.md +279 -0
- application_sdk/handlers/__init__.py +8 -1
- application_sdk/handlers/sql.py +63 -22
- application_sdk/inputs/.cursor/BUGBOT.md +250 -0
- application_sdk/interceptors/.cursor/BUGBOT.md +320 -0
- application_sdk/interceptors/cleanup.py +171 -0
- application_sdk/interceptors/events.py +6 -6
- application_sdk/observability/decorators/observability_decorator.py +36 -22
- application_sdk/outputs/.cursor/BUGBOT.md +295 -0
- application_sdk/outputs/iceberg.py +4 -0
- application_sdk/outputs/json.py +6 -0
- application_sdk/outputs/parquet.py +13 -3
- application_sdk/server/.cursor/BUGBOT.md +442 -0
- application_sdk/server/fastapi/__init__.py +59 -3
- application_sdk/server/fastapi/models.py +27 -0
- application_sdk/services/objectstore.py +16 -3
- application_sdk/version.py +1 -1
- application_sdk/workflows/.cursor/BUGBOT.md +218 -0
- {atlan_application_sdk-0.1.1rc39.dist-info → atlan_application_sdk-0.1.1rc41.dist-info}/METADATA +1 -1
- {atlan_application_sdk-0.1.1rc39.dist-info → atlan_application_sdk-0.1.1rc41.dist-info}/RECORD +35 -24
- {atlan_application_sdk-0.1.1rc39.dist-info → atlan_application_sdk-0.1.1rc41.dist-info}/WHEEL +0 -0
- {atlan_application_sdk-0.1.1rc39.dist-info → atlan_application_sdk-0.1.1rc41.dist-info}/licenses/LICENSE +0 -0
- {atlan_application_sdk-0.1.1rc39.dist-info → atlan_application_sdk-0.1.1rc41.dist-info}/licenses/NOTICE +0 -0
application_sdk/clients/sql.py
CHANGED
|
@@ -7,13 +7,14 @@ database operations, supporting batch processing and server-side cursors.
|
|
|
7
7
|
|
|
8
8
|
import asyncio
|
|
9
9
|
from concurrent.futures import ThreadPoolExecutor
|
|
10
|
-
from typing import Any, Dict, List
|
|
10
|
+
from typing import Any, Dict, List, Optional
|
|
11
11
|
from urllib.parse import quote_plus
|
|
12
12
|
|
|
13
13
|
from sqlalchemy.ext.asyncio import AsyncConnection, AsyncEngine
|
|
14
14
|
from temporalio import activity
|
|
15
15
|
|
|
16
16
|
from application_sdk.clients import ClientInterface
|
|
17
|
+
from application_sdk.clients.models import DatabaseConfig
|
|
17
18
|
from application_sdk.common.aws_utils import (
|
|
18
19
|
generate_aws_rds_token_with_iam_role,
|
|
19
20
|
generate_aws_rds_token_with_iam_user,
|
|
@@ -48,7 +49,7 @@ class BaseSQLClient(ClientInterface):
|
|
|
48
49
|
credentials: Dict[str, Any] = {}
|
|
49
50
|
resolved_credentials: Dict[str, Any] = {}
|
|
50
51
|
use_server_side_cursor: bool = USE_SERVER_SIDE_CURSOR
|
|
51
|
-
DB_CONFIG:
|
|
52
|
+
DB_CONFIG: Optional[DatabaseConfig] = None
|
|
52
53
|
|
|
53
54
|
def __init__(
|
|
54
55
|
self,
|
|
@@ -71,25 +72,36 @@ class BaseSQLClient(ClientInterface):
|
|
|
71
72
|
self.sql_alchemy_connect_args = sql_alchemy_connect_args
|
|
72
73
|
|
|
73
74
|
async def load(self, credentials: Dict[str, Any]) -> None:
|
|
74
|
-
"""Load and
|
|
75
|
+
"""Load credentials and prepare engine for lazy connections.
|
|
76
|
+
|
|
77
|
+
This method now only stores credentials and creates the engine without
|
|
78
|
+
establishing a persistent connection. Connections are created on-demand.
|
|
75
79
|
|
|
76
80
|
Args:
|
|
77
81
|
credentials (Dict[str, Any]): Database connection credentials.
|
|
78
82
|
|
|
79
83
|
Raises:
|
|
80
|
-
ClientError: If
|
|
84
|
+
ClientError: If credentials are invalid or engine creation fails
|
|
81
85
|
"""
|
|
82
86
|
self.credentials = credentials # Update the instance credentials
|
|
83
87
|
try:
|
|
84
88
|
from sqlalchemy import create_engine
|
|
85
89
|
|
|
90
|
+
# Create engine but no persistent connection
|
|
86
91
|
self.engine = create_engine(
|
|
87
92
|
self.get_sqlalchemy_connection_string(),
|
|
88
93
|
connect_args=self.sql_alchemy_connect_args,
|
|
89
94
|
pool_pre_ping=True,
|
|
90
95
|
)
|
|
91
|
-
|
|
92
|
-
|
|
96
|
+
|
|
97
|
+
# Test connection briefly to validate credentials
|
|
98
|
+
with self.engine.connect() as _:
|
|
99
|
+
pass # Connection test successful
|
|
100
|
+
|
|
101
|
+
# Don't store persistent connection
|
|
102
|
+
self.connection = None
|
|
103
|
+
|
|
104
|
+
except Exception as e:
|
|
93
105
|
logger.error(
|
|
94
106
|
f"{ClientError.SQL_CLIENT_AUTH_ERROR}: Error loading SQL client: {str(e)}"
|
|
95
107
|
)
|
|
@@ -100,8 +112,10 @@ class BaseSQLClient(ClientInterface):
|
|
|
100
112
|
|
|
101
113
|
async def close(self) -> None:
|
|
102
114
|
"""Close the database connection."""
|
|
103
|
-
if self.
|
|
104
|
-
self.
|
|
115
|
+
if self.engine:
|
|
116
|
+
self.engine.dispose()
|
|
117
|
+
self.engine = None
|
|
118
|
+
self.connection = None # Should already be None, but ensure cleanup
|
|
105
119
|
|
|
106
120
|
def get_iam_user_token(self):
|
|
107
121
|
"""Get an IAM user token for AWS RDS database authentication.
|
|
@@ -249,7 +263,9 @@ class BaseSQLClient(ClientInterface):
|
|
|
249
263
|
Returns:
|
|
250
264
|
str: The updated URL with the dialect.
|
|
251
265
|
"""
|
|
252
|
-
|
|
266
|
+
if not self.DB_CONFIG:
|
|
267
|
+
raise ValueError("DB_CONFIG is not configured for this SQL client.")
|
|
268
|
+
installed_dialect = self.DB_CONFIG.template.split("://")[0]
|
|
253
269
|
url_dialect = sqlalchemy_url.split("://")[0]
|
|
254
270
|
if installed_dialect != url_dialect:
|
|
255
271
|
sqlalchemy_url = sqlalchemy_url.replace(url_dialect, installed_dialect)
|
|
@@ -268,6 +284,9 @@ class BaseSQLClient(ClientInterface):
|
|
|
268
284
|
Raises:
|
|
269
285
|
ValueError: If required connection parameters are missing.
|
|
270
286
|
"""
|
|
287
|
+
if not self.DB_CONFIG:
|
|
288
|
+
raise ValueError("DB_CONFIG is not configured for this SQL client.")
|
|
289
|
+
|
|
271
290
|
extra = parse_credentials_extra(self.credentials)
|
|
272
291
|
|
|
273
292
|
# TODO: Uncomment this when the native deployment is ready
|
|
@@ -280,7 +299,7 @@ class BaseSQLClient(ClientInterface):
|
|
|
280
299
|
|
|
281
300
|
# Prepare parameters
|
|
282
301
|
param_values = {}
|
|
283
|
-
for param in self.DB_CONFIG
|
|
302
|
+
for param in self.DB_CONFIG.required:
|
|
284
303
|
if param == "password":
|
|
285
304
|
param_values[param] = auth_token
|
|
286
305
|
else:
|
|
@@ -290,30 +309,28 @@ class BaseSQLClient(ClientInterface):
|
|
|
290
309
|
param_values[param] = value
|
|
291
310
|
|
|
292
311
|
# Fill in base template
|
|
293
|
-
conn_str = self.DB_CONFIG
|
|
312
|
+
conn_str = self.DB_CONFIG.template.format(**param_values)
|
|
294
313
|
|
|
295
314
|
# Append defaults if not already in the template
|
|
296
|
-
if self.DB_CONFIG.
|
|
297
|
-
conn_str = self.add_connection_params(conn_str, self.DB_CONFIG
|
|
315
|
+
if self.DB_CONFIG.defaults:
|
|
316
|
+
conn_str = self.add_connection_params(conn_str, self.DB_CONFIG.defaults)
|
|
298
317
|
|
|
299
|
-
if self.DB_CONFIG.
|
|
300
|
-
parameter_keys = self.DB_CONFIG
|
|
301
|
-
|
|
318
|
+
if self.DB_CONFIG.parameters:
|
|
319
|
+
parameter_keys = self.DB_CONFIG.parameters
|
|
320
|
+
parameter_values = {
|
|
302
321
|
key: self.credentials.get(key) or extra.get(key)
|
|
303
322
|
for key in parameter_keys
|
|
304
323
|
}
|
|
305
|
-
conn_str = self.add_connection_params(
|
|
306
|
-
conn_str, self.DB_CONFIG["parameters"]
|
|
307
|
-
)
|
|
324
|
+
conn_str = self.add_connection_params(conn_str, parameter_values)
|
|
308
325
|
|
|
309
326
|
return conn_str
|
|
310
327
|
|
|
311
328
|
async def run_query(self, query: str, batch_size: int = 100000):
|
|
312
|
-
"""Execute a SQL query and return results in batches.
|
|
329
|
+
"""Execute a SQL query and return results in batches using lazy connections.
|
|
313
330
|
|
|
314
|
-
This method
|
|
315
|
-
|
|
316
|
-
|
|
331
|
+
This method creates a connection on-demand, executes the query in batches,
|
|
332
|
+
and automatically closes the connection when done. This prevents memory
|
|
333
|
+
leaks from persistent connections.
|
|
317
334
|
|
|
318
335
|
Args:
|
|
319
336
|
query (str): SQL query to execute.
|
|
@@ -325,44 +342,47 @@ class BaseSQLClient(ClientInterface):
|
|
|
325
342
|
a dictionary mapping column names to values.
|
|
326
343
|
|
|
327
344
|
Raises:
|
|
328
|
-
ValueError: If
|
|
345
|
+
ValueError: If engine is not initialized.
|
|
329
346
|
Exception: If query execution fails.
|
|
330
347
|
"""
|
|
331
|
-
if not self.
|
|
332
|
-
raise ValueError("
|
|
333
|
-
loop = asyncio.get_running_loop()
|
|
334
|
-
|
|
335
|
-
if self.use_server_side_cursor:
|
|
336
|
-
self.connection.execution_options(yield_per=batch_size)
|
|
348
|
+
if not self.engine:
|
|
349
|
+
raise ValueError("Engine is not initialized. Call load() first.")
|
|
337
350
|
|
|
351
|
+
loop = asyncio.get_running_loop()
|
|
338
352
|
logger.info(f"Running query: {query}")
|
|
339
353
|
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
354
|
+
# Use context manager for automatic connection cleanup
|
|
355
|
+
with self.engine.connect() as connection:
|
|
356
|
+
if self.use_server_side_cursor:
|
|
357
|
+
connection = connection.execution_options(yield_per=batch_size)
|
|
343
358
|
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
if not cursor or not cursor.cursor:
|
|
348
|
-
raise ValueError("Cursor is not supported")
|
|
349
|
-
column_names: List[str] = [
|
|
350
|
-
description.name.lower()
|
|
351
|
-
for description in cursor.cursor.description
|
|
352
|
-
]
|
|
359
|
+
with ThreadPoolExecutor() as pool:
|
|
360
|
+
try:
|
|
361
|
+
from sqlalchemy import text
|
|
353
362
|
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
pool, cursor.fetchmany, batch_size
|
|
363
|
+
cursor = await loop.run_in_executor(
|
|
364
|
+
pool, connection.execute, text(query)
|
|
357
365
|
)
|
|
358
|
-
if not
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
|
|
366
|
+
if not cursor or not cursor.cursor:
|
|
367
|
+
raise ValueError("Cursor is not supported")
|
|
368
|
+
column_names: List[str] = [
|
|
369
|
+
description.name.lower()
|
|
370
|
+
for description in cursor.cursor.description
|
|
371
|
+
]
|
|
372
|
+
|
|
373
|
+
while True:
|
|
374
|
+
rows = await loop.run_in_executor(
|
|
375
|
+
pool, cursor.fetchmany, batch_size
|
|
376
|
+
)
|
|
377
|
+
if not rows:
|
|
378
|
+
break
|
|
379
|
+
|
|
380
|
+
results = [dict(zip(column_names, row)) for row in rows]
|
|
381
|
+
yield results
|
|
382
|
+
except Exception as e:
|
|
383
|
+
logger.error("Error running query in batch: {error}", error=str(e))
|
|
384
|
+
raise e
|
|
385
|
+
# Connection automatically closed by context manager
|
|
366
386
|
|
|
367
387
|
logger.info("Query execution completed")
|
|
368
388
|
|
|
@@ -386,22 +406,23 @@ class AsyncBaseSQLClient(BaseSQLClient):
|
|
|
386
406
|
engine: "AsyncEngine"
|
|
387
407
|
|
|
388
408
|
async def load(self, credentials: Dict[str, Any]) -> None:
|
|
389
|
-
"""Load and
|
|
409
|
+
"""Load credentials and prepare async engine for lazy connections.
|
|
390
410
|
|
|
391
|
-
This method creates an async
|
|
392
|
-
|
|
411
|
+
This method stores credentials and creates an async engine without establishing
|
|
412
|
+
a persistent connection. Connections are created on-demand for better memory efficiency.
|
|
393
413
|
|
|
394
414
|
Args:
|
|
395
415
|
credentials (Dict[str, Any]): Database connection credentials including
|
|
396
416
|
host, port, username, password, and other connection parameters.
|
|
397
417
|
|
|
398
418
|
Raises:
|
|
399
|
-
ValueError: If
|
|
419
|
+
ValueError: If credentials are invalid or engine creation fails.
|
|
400
420
|
"""
|
|
401
421
|
self.credentials = credentials
|
|
402
422
|
try:
|
|
403
423
|
from sqlalchemy.ext.asyncio import create_async_engine
|
|
404
424
|
|
|
425
|
+
# Create async engine but no persistent connection
|
|
405
426
|
self.engine = create_async_engine(
|
|
406
427
|
self.get_sqlalchemy_connection_string(),
|
|
407
428
|
connect_args=self.sql_alchemy_connect_args,
|
|
@@ -409,7 +430,14 @@ class AsyncBaseSQLClient(BaseSQLClient):
|
|
|
409
430
|
)
|
|
410
431
|
if not self.engine:
|
|
411
432
|
raise ValueError("Failed to create async engine")
|
|
412
|
-
|
|
433
|
+
|
|
434
|
+
# Test connection briefly to validate credentials
|
|
435
|
+
async with self.engine.connect() as _:
|
|
436
|
+
pass # Connection test successful
|
|
437
|
+
|
|
438
|
+
# Don't store persistent connection
|
|
439
|
+
self.connection = None
|
|
440
|
+
|
|
413
441
|
except Exception as e:
|
|
414
442
|
logger.error(f"Error establishing database connection: {str(e)}")
|
|
415
443
|
if self.engine:
|
|
@@ -417,11 +445,19 @@ class AsyncBaseSQLClient(BaseSQLClient):
|
|
|
417
445
|
self.engine = None
|
|
418
446
|
raise ValueError(str(e))
|
|
419
447
|
|
|
448
|
+
async def close(self) -> None:
|
|
449
|
+
"""Close the async database connection and dispose of the engine."""
|
|
450
|
+
if self.engine:
|
|
451
|
+
await self.engine.dispose()
|
|
452
|
+
self.engine = None
|
|
453
|
+
self.connection = None
|
|
454
|
+
|
|
420
455
|
async def run_query(self, query: str, batch_size: int = 100000):
|
|
421
|
-
"""Execute a SQL query asynchronously and return results in batches.
|
|
456
|
+
"""Execute a SQL query asynchronously and return results in batches using lazy connections.
|
|
422
457
|
|
|
423
|
-
This method
|
|
424
|
-
|
|
458
|
+
This method creates an async connection on-demand, executes the query in batches,
|
|
459
|
+
and automatically closes the connection when done. This prevents memory leaks
|
|
460
|
+
from persistent connections.
|
|
425
461
|
|
|
426
462
|
Args:
|
|
427
463
|
query (str): SQL query to execute.
|
|
@@ -433,42 +469,46 @@ class AsyncBaseSQLClient(BaseSQLClient):
|
|
|
433
469
|
a dictionary mapping column names to values.
|
|
434
470
|
|
|
435
471
|
Raises:
|
|
472
|
+
ValueError: If engine is not initialized.
|
|
436
473
|
Exception: If query execution fails.
|
|
437
474
|
"""
|
|
438
|
-
if not self.
|
|
439
|
-
raise ValueError("
|
|
475
|
+
if not self.engine:
|
|
476
|
+
raise ValueError("Engine is not initialized. Call load() first.")
|
|
440
477
|
|
|
441
478
|
logger.info(f"Running query: {query}")
|
|
442
479
|
use_server_side_cursor = self.use_server_side_cursor
|
|
443
480
|
|
|
444
|
-
|
|
445
|
-
|
|
446
|
-
|
|
447
|
-
|
|
448
|
-
await self.connection.execution_options(yield_per=batch_size)
|
|
449
|
-
|
|
450
|
-
result = (
|
|
451
|
-
await self.connection.stream(text(query))
|
|
452
|
-
if use_server_side_cursor
|
|
453
|
-
else await self.connection.execute(text(query))
|
|
454
|
-
)
|
|
481
|
+
# Use async context manager for automatic connection cleanup
|
|
482
|
+
async with self.engine.connect() as connection:
|
|
483
|
+
try:
|
|
484
|
+
from sqlalchemy import text
|
|
455
485
|
|
|
456
|
-
|
|
486
|
+
if use_server_side_cursor:
|
|
487
|
+
connection = connection.execution_options(yield_per=batch_size)
|
|
457
488
|
|
|
458
|
-
|
|
459
|
-
|
|
460
|
-
await result.fetchmany(batch_size)
|
|
489
|
+
result = (
|
|
490
|
+
await connection.stream(text(query))
|
|
461
491
|
if use_server_side_cursor
|
|
462
|
-
else
|
|
463
|
-
if result.cursor
|
|
464
|
-
else None
|
|
492
|
+
else await connection.execute(text(query))
|
|
465
493
|
)
|
|
466
|
-
if not rows:
|
|
467
|
-
break
|
|
468
|
-
yield [dict(zip(column_names, row)) for row in rows]
|
|
469
494
|
|
|
470
|
-
|
|
471
|
-
|
|
472
|
-
|
|
495
|
+
column_names = list(result.keys())
|
|
496
|
+
|
|
497
|
+
while True:
|
|
498
|
+
rows = (
|
|
499
|
+
await result.fetchmany(batch_size)
|
|
500
|
+
if use_server_side_cursor
|
|
501
|
+
else result.cursor.fetchmany(batch_size)
|
|
502
|
+
if result.cursor
|
|
503
|
+
else None
|
|
504
|
+
)
|
|
505
|
+
if not rows:
|
|
506
|
+
break
|
|
507
|
+
yield [dict(zip(column_names, row)) for row in rows]
|
|
508
|
+
|
|
509
|
+
except Exception as e:
|
|
510
|
+
logger.error(f"Error executing query: {str(e)}")
|
|
511
|
+
raise
|
|
512
|
+
# Async connection automatically closed by context manager
|
|
473
513
|
|
|
474
514
|
logger.info("Query execution completed")
|
|
@@ -26,6 +26,7 @@ from application_sdk.constants import (
|
|
|
26
26
|
WORKFLOW_PORT,
|
|
27
27
|
WORKFLOW_TLS_ENABLED_KEY,
|
|
28
28
|
)
|
|
29
|
+
from application_sdk.interceptors.cleanup import CleanupInterceptor, cleanup
|
|
29
30
|
from application_sdk.interceptors.events import EventInterceptor, publish_event
|
|
30
31
|
from application_sdk.interceptors.lock import RedisLockInterceptor
|
|
31
32
|
from application_sdk.observability.logger_adaptor import get_logger
|
|
@@ -359,7 +360,7 @@ class TemporalWorkflowClient(WorkflowClient):
|
|
|
359
360
|
)
|
|
360
361
|
|
|
361
362
|
# Start with provided activities and add system activities
|
|
362
|
-
final_activities = list(activities) + [publish_event]
|
|
363
|
+
final_activities = list(activities) + [publish_event, cleanup]
|
|
363
364
|
|
|
364
365
|
# Add lock management activities if needed
|
|
365
366
|
if not IS_LOCKING_DISABLED:
|
|
@@ -395,6 +396,7 @@ class TemporalWorkflowClient(WorkflowClient):
|
|
|
395
396
|
activity_executor=activity_executor,
|
|
396
397
|
interceptors=[
|
|
397
398
|
EventInterceptor(),
|
|
399
|
+
CleanupInterceptor(),
|
|
398
400
|
RedisLockInterceptor(activities_dict),
|
|
399
401
|
],
|
|
400
402
|
)
|