atlan-application-sdk 0.1.1rc39__py3-none-any.whl → 0.1.1rc41__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. application_sdk/activities/.cursor/BUGBOT.md +424 -0
  2. application_sdk/activities/metadata_extraction/sql.py +400 -25
  3. application_sdk/application/__init__.py +2 -0
  4. application_sdk/application/metadata_extraction/sql.py +3 -0
  5. application_sdk/clients/.cursor/BUGBOT.md +280 -0
  6. application_sdk/clients/models.py +42 -0
  7. application_sdk/clients/sql.py +127 -87
  8. application_sdk/clients/temporal.py +3 -1
  9. application_sdk/common/.cursor/BUGBOT.md +316 -0
  10. application_sdk/common/aws_utils.py +259 -11
  11. application_sdk/common/utils.py +145 -9
  12. application_sdk/constants.py +8 -0
  13. application_sdk/decorators/.cursor/BUGBOT.md +279 -0
  14. application_sdk/handlers/__init__.py +8 -1
  15. application_sdk/handlers/sql.py +63 -22
  16. application_sdk/inputs/.cursor/BUGBOT.md +250 -0
  17. application_sdk/interceptors/.cursor/BUGBOT.md +320 -0
  18. application_sdk/interceptors/cleanup.py +171 -0
  19. application_sdk/interceptors/events.py +6 -6
  20. application_sdk/observability/decorators/observability_decorator.py +36 -22
  21. application_sdk/outputs/.cursor/BUGBOT.md +295 -0
  22. application_sdk/outputs/iceberg.py +4 -0
  23. application_sdk/outputs/json.py +6 -0
  24. application_sdk/outputs/parquet.py +13 -3
  25. application_sdk/server/.cursor/BUGBOT.md +442 -0
  26. application_sdk/server/fastapi/__init__.py +59 -3
  27. application_sdk/server/fastapi/models.py +27 -0
  28. application_sdk/services/objectstore.py +16 -3
  29. application_sdk/version.py +1 -1
  30. application_sdk/workflows/.cursor/BUGBOT.md +218 -0
  31. {atlan_application_sdk-0.1.1rc39.dist-info → atlan_application_sdk-0.1.1rc41.dist-info}/METADATA +1 -1
  32. {atlan_application_sdk-0.1.1rc39.dist-info → atlan_application_sdk-0.1.1rc41.dist-info}/RECORD +35 -24
  33. {atlan_application_sdk-0.1.1rc39.dist-info → atlan_application_sdk-0.1.1rc41.dist-info}/WHEEL +0 -0
  34. {atlan_application_sdk-0.1.1rc39.dist-info → atlan_application_sdk-0.1.1rc41.dist-info}/licenses/LICENSE +0 -0
  35. {atlan_application_sdk-0.1.1rc39.dist-info → atlan_application_sdk-0.1.1rc41.dist-info}/licenses/NOTICE +0 -0
@@ -7,13 +7,14 @@ database operations, supporting batch processing and server-side cursors.
7
7
 
8
8
  import asyncio
9
9
  from concurrent.futures import ThreadPoolExecutor
10
- from typing import Any, Dict, List
10
+ from typing import Any, Dict, List, Optional
11
11
  from urllib.parse import quote_plus
12
12
 
13
13
  from sqlalchemy.ext.asyncio import AsyncConnection, AsyncEngine
14
14
  from temporalio import activity
15
15
 
16
16
  from application_sdk.clients import ClientInterface
17
+ from application_sdk.clients.models import DatabaseConfig
17
18
  from application_sdk.common.aws_utils import (
18
19
  generate_aws_rds_token_with_iam_role,
19
20
  generate_aws_rds_token_with_iam_user,
@@ -48,7 +49,7 @@ class BaseSQLClient(ClientInterface):
48
49
  credentials: Dict[str, Any] = {}
49
50
  resolved_credentials: Dict[str, Any] = {}
50
51
  use_server_side_cursor: bool = USE_SERVER_SIDE_CURSOR
51
- DB_CONFIG: Dict[str, Any] = {}
52
+ DB_CONFIG: Optional[DatabaseConfig] = None
52
53
 
53
54
  def __init__(
54
55
  self,
@@ -71,25 +72,36 @@ class BaseSQLClient(ClientInterface):
71
72
  self.sql_alchemy_connect_args = sql_alchemy_connect_args
72
73
 
73
74
  async def load(self, credentials: Dict[str, Any]) -> None:
74
- """Load and establish the database connection.
75
+ """Load credentials and prepare engine for lazy connections.
76
+
77
+ This method now only stores credentials and creates the engine without
78
+ establishing a persistent connection. Connections are created on-demand.
75
79
 
76
80
  Args:
77
81
  credentials (Dict[str, Any]): Database connection credentials.
78
82
 
79
83
  Raises:
80
- ClientError: If connection fails due to authentication or connection issues
84
+ ClientError: If credentials are invalid or engine creation fails
81
85
  """
82
86
  self.credentials = credentials # Update the instance credentials
83
87
  try:
84
88
  from sqlalchemy import create_engine
85
89
 
90
+ # Create engine but no persistent connection
86
91
  self.engine = create_engine(
87
92
  self.get_sqlalchemy_connection_string(),
88
93
  connect_args=self.sql_alchemy_connect_args,
89
94
  pool_pre_ping=True,
90
95
  )
91
- self.connection = self.engine.connect()
92
- except ClientError as e:
96
+
97
+ # Test connection briefly to validate credentials
98
+ with self.engine.connect() as _:
99
+ pass # Connection test successful
100
+
101
+ # Don't store persistent connection
102
+ self.connection = None
103
+
104
+ except Exception as e:
93
105
  logger.error(
94
106
  f"{ClientError.SQL_CLIENT_AUTH_ERROR}: Error loading SQL client: {str(e)}"
95
107
  )
@@ -100,8 +112,10 @@ class BaseSQLClient(ClientInterface):
100
112
 
101
113
  async def close(self) -> None:
102
114
  """Close the database connection."""
103
- if self.connection:
104
- self.connection.close()
115
+ if self.engine:
116
+ self.engine.dispose()
117
+ self.engine = None
118
+ self.connection = None # Should already be None, but ensure cleanup
105
119
 
106
120
  def get_iam_user_token(self):
107
121
  """Get an IAM user token for AWS RDS database authentication.
@@ -249,7 +263,9 @@ class BaseSQLClient(ClientInterface):
249
263
  Returns:
250
264
  str: The updated URL with the dialect.
251
265
  """
252
- installed_dialect = self.DB_CONFIG["template"].split("://")[0]
266
+ if not self.DB_CONFIG:
267
+ raise ValueError("DB_CONFIG is not configured for this SQL client.")
268
+ installed_dialect = self.DB_CONFIG.template.split("://")[0]
253
269
  url_dialect = sqlalchemy_url.split("://")[0]
254
270
  if installed_dialect != url_dialect:
255
271
  sqlalchemy_url = sqlalchemy_url.replace(url_dialect, installed_dialect)
@@ -268,6 +284,9 @@ class BaseSQLClient(ClientInterface):
268
284
  Raises:
269
285
  ValueError: If required connection parameters are missing.
270
286
  """
287
+ if not self.DB_CONFIG:
288
+ raise ValueError("DB_CONFIG is not configured for this SQL client.")
289
+
271
290
  extra = parse_credentials_extra(self.credentials)
272
291
 
273
292
  # TODO: Uncomment this when the native deployment is ready
@@ -280,7 +299,7 @@ class BaseSQLClient(ClientInterface):
280
299
 
281
300
  # Prepare parameters
282
301
  param_values = {}
283
- for param in self.DB_CONFIG["required"]:
302
+ for param in self.DB_CONFIG.required:
284
303
  if param == "password":
285
304
  param_values[param] = auth_token
286
305
  else:
@@ -290,30 +309,28 @@ class BaseSQLClient(ClientInterface):
290
309
  param_values[param] = value
291
310
 
292
311
  # Fill in base template
293
- conn_str = self.DB_CONFIG["template"].format(**param_values)
312
+ conn_str = self.DB_CONFIG.template.format(**param_values)
294
313
 
295
314
  # Append defaults if not already in the template
296
- if self.DB_CONFIG.get("defaults"):
297
- conn_str = self.add_connection_params(conn_str, self.DB_CONFIG["defaults"])
315
+ if self.DB_CONFIG.defaults:
316
+ conn_str = self.add_connection_params(conn_str, self.DB_CONFIG.defaults)
298
317
 
299
- if self.DB_CONFIG.get("parameters"):
300
- parameter_keys = self.DB_CONFIG["parameters"]
301
- self.DB_CONFIG["parameters"] = {
318
+ if self.DB_CONFIG.parameters:
319
+ parameter_keys = self.DB_CONFIG.parameters
320
+ parameter_values = {
302
321
  key: self.credentials.get(key) or extra.get(key)
303
322
  for key in parameter_keys
304
323
  }
305
- conn_str = self.add_connection_params(
306
- conn_str, self.DB_CONFIG["parameters"]
307
- )
324
+ conn_str = self.add_connection_params(conn_str, parameter_values)
308
325
 
309
326
  return conn_str
310
327
 
311
328
  async def run_query(self, query: str, batch_size: int = 100000):
312
- """Execute a SQL query and return results in batches.
329
+ """Execute a SQL query and return results in batches using lazy connections.
313
330
 
314
- This method executes the provided SQL query and yields results in batches
315
- to efficiently manage memory usage for large result sets. It supports both
316
- server-side and client-side cursors based on configuration.
331
+ This method creates a connection on-demand, executes the query in batches,
332
+ and automatically closes the connection when done. This prevents memory
333
+ leaks from persistent connections.
317
334
 
318
335
  Args:
319
336
  query (str): SQL query to execute.
@@ -325,44 +342,47 @@ class BaseSQLClient(ClientInterface):
325
342
  a dictionary mapping column names to values.
326
343
 
327
344
  Raises:
328
- ValueError: If database connection is not established.
345
+ ValueError: If engine is not initialized.
329
346
  Exception: If query execution fails.
330
347
  """
331
- if not self.connection:
332
- raise ValueError("Connection is not established")
333
- loop = asyncio.get_running_loop()
334
-
335
- if self.use_server_side_cursor:
336
- self.connection.execution_options(yield_per=batch_size)
348
+ if not self.engine:
349
+ raise ValueError("Engine is not initialized. Call load() first.")
337
350
 
351
+ loop = asyncio.get_running_loop()
338
352
  logger.info(f"Running query: {query}")
339
353
 
340
- with ThreadPoolExecutor() as pool:
341
- try:
342
- from sqlalchemy import text
354
+ # Use context manager for automatic connection cleanup
355
+ with self.engine.connect() as connection:
356
+ if self.use_server_side_cursor:
357
+ connection = connection.execution_options(yield_per=batch_size)
343
358
 
344
- cursor = await loop.run_in_executor(
345
- pool, self.connection.execute, text(query)
346
- )
347
- if not cursor or not cursor.cursor:
348
- raise ValueError("Cursor is not supported")
349
- column_names: List[str] = [
350
- description.name.lower()
351
- for description in cursor.cursor.description
352
- ]
359
+ with ThreadPoolExecutor() as pool:
360
+ try:
361
+ from sqlalchemy import text
353
362
 
354
- while True:
355
- rows = await loop.run_in_executor(
356
- pool, cursor.fetchmany, batch_size
363
+ cursor = await loop.run_in_executor(
364
+ pool, connection.execute, text(query)
357
365
  )
358
- if not rows:
359
- break
360
-
361
- results = [dict(zip(column_names, row)) for row in rows]
362
- yield results
363
- except Exception as e:
364
- logger.error("Error running query in batch: {error}", error=str(e))
365
- raise e
366
+ if not cursor or not cursor.cursor:
367
+ raise ValueError("Cursor is not supported")
368
+ column_names: List[str] = [
369
+ description.name.lower()
370
+ for description in cursor.cursor.description
371
+ ]
372
+
373
+ while True:
374
+ rows = await loop.run_in_executor(
375
+ pool, cursor.fetchmany, batch_size
376
+ )
377
+ if not rows:
378
+ break
379
+
380
+ results = [dict(zip(column_names, row)) for row in rows]
381
+ yield results
382
+ except Exception as e:
383
+ logger.error("Error running query in batch: {error}", error=str(e))
384
+ raise e
385
+ # Connection automatically closed by context manager
366
386
 
367
387
  logger.info("Query execution completed")
368
388
 
@@ -386,22 +406,23 @@ class AsyncBaseSQLClient(BaseSQLClient):
386
406
  engine: "AsyncEngine"
387
407
 
388
408
  async def load(self, credentials: Dict[str, Any]) -> None:
389
- """Load and establish an asynchronous database connection.
409
+ """Load credentials and prepare async engine for lazy connections.
390
410
 
391
- This method creates an async SQLAlchemy engine and establishes a connection
392
- to the database using the provided credentials.
411
+ This method stores credentials and creates an async engine without establishing
412
+ a persistent connection. Connections are created on-demand for better memory efficiency.
393
413
 
394
414
  Args:
395
415
  credentials (Dict[str, Any]): Database connection credentials including
396
416
  host, port, username, password, and other connection parameters.
397
417
 
398
418
  Raises:
399
- ValueError: If connection fails due to invalid credentials or connection issues.
419
+ ValueError: If credentials are invalid or engine creation fails.
400
420
  """
401
421
  self.credentials = credentials
402
422
  try:
403
423
  from sqlalchemy.ext.asyncio import create_async_engine
404
424
 
425
+ # Create async engine but no persistent connection
405
426
  self.engine = create_async_engine(
406
427
  self.get_sqlalchemy_connection_string(),
407
428
  connect_args=self.sql_alchemy_connect_args,
@@ -409,7 +430,14 @@ class AsyncBaseSQLClient(BaseSQLClient):
409
430
  )
410
431
  if not self.engine:
411
432
  raise ValueError("Failed to create async engine")
412
- self.connection = await self.engine.connect()
433
+
434
+ # Test connection briefly to validate credentials
435
+ async with self.engine.connect() as _:
436
+ pass # Connection test successful
437
+
438
+ # Don't store persistent connection
439
+ self.connection = None
440
+
413
441
  except Exception as e:
414
442
  logger.error(f"Error establishing database connection: {str(e)}")
415
443
  if self.engine:
@@ -417,11 +445,19 @@ class AsyncBaseSQLClient(BaseSQLClient):
417
445
  self.engine = None
418
446
  raise ValueError(str(e))
419
447
 
448
+ async def close(self) -> None:
449
+ """Close the async database connection and dispose of the engine."""
450
+ if self.engine:
451
+ await self.engine.dispose()
452
+ self.engine = None
453
+ self.connection = None
454
+
420
455
  async def run_query(self, query: str, batch_size: int = 100000):
421
- """Execute a SQL query asynchronously and return results in batches.
456
+ """Execute a SQL query asynchronously and return results in batches using lazy connections.
422
457
 
423
- This method executes the provided SQL query using an async connection and
424
- yields results in batches to manage memory usage for large result sets.
458
+ This method creates an async connection on-demand, executes the query in batches,
459
+ and automatically closes the connection when done. This prevents memory leaks
460
+ from persistent connections.
425
461
 
426
462
  Args:
427
463
  query (str): SQL query to execute.
@@ -433,42 +469,46 @@ class AsyncBaseSQLClient(BaseSQLClient):
433
469
  a dictionary mapping column names to values.
434
470
 
435
471
  Raises:
472
+ ValueError: If engine is not initialized.
436
473
  Exception: If query execution fails.
437
474
  """
438
- if not self.connection:
439
- raise ValueError("Connection is not established")
475
+ if not self.engine:
476
+ raise ValueError("Engine is not initialized. Call load() first.")
440
477
 
441
478
  logger.info(f"Running query: {query}")
442
479
  use_server_side_cursor = self.use_server_side_cursor
443
480
 
444
- try:
445
- from sqlalchemy import text
446
-
447
- if use_server_side_cursor:
448
- await self.connection.execution_options(yield_per=batch_size)
449
-
450
- result = (
451
- await self.connection.stream(text(query))
452
- if use_server_side_cursor
453
- else await self.connection.execute(text(query))
454
- )
481
+ # Use async context manager for automatic connection cleanup
482
+ async with self.engine.connect() as connection:
483
+ try:
484
+ from sqlalchemy import text
455
485
 
456
- column_names = list(result.keys())
486
+ if use_server_side_cursor:
487
+ connection = connection.execution_options(yield_per=batch_size)
457
488
 
458
- while True:
459
- rows = (
460
- await result.fetchmany(batch_size)
489
+ result = (
490
+ await connection.stream(text(query))
461
491
  if use_server_side_cursor
462
- else result.cursor.fetchmany(batch_size)
463
- if result.cursor
464
- else None
492
+ else await connection.execute(text(query))
465
493
  )
466
- if not rows:
467
- break
468
- yield [dict(zip(column_names, row)) for row in rows]
469
494
 
470
- except Exception as e:
471
- logger.error(f"Error executing query: {str(e)}")
472
- raise
495
+ column_names = list(result.keys())
496
+
497
+ while True:
498
+ rows = (
499
+ await result.fetchmany(batch_size)
500
+ if use_server_side_cursor
501
+ else result.cursor.fetchmany(batch_size)
502
+ if result.cursor
503
+ else None
504
+ )
505
+ if not rows:
506
+ break
507
+ yield [dict(zip(column_names, row)) for row in rows]
508
+
509
+ except Exception as e:
510
+ logger.error(f"Error executing query: {str(e)}")
511
+ raise
512
+ # Async connection automatically closed by context manager
473
513
 
474
514
  logger.info("Query execution completed")
@@ -26,6 +26,7 @@ from application_sdk.constants import (
26
26
  WORKFLOW_PORT,
27
27
  WORKFLOW_TLS_ENABLED_KEY,
28
28
  )
29
+ from application_sdk.interceptors.cleanup import CleanupInterceptor, cleanup
29
30
  from application_sdk.interceptors.events import EventInterceptor, publish_event
30
31
  from application_sdk.interceptors.lock import RedisLockInterceptor
31
32
  from application_sdk.observability.logger_adaptor import get_logger
@@ -359,7 +360,7 @@ class TemporalWorkflowClient(WorkflowClient):
359
360
  )
360
361
 
361
362
  # Start with provided activities and add system activities
362
- final_activities = list(activities) + [publish_event]
363
+ final_activities = list(activities) + [publish_event, cleanup]
363
364
 
364
365
  # Add lock management activities if needed
365
366
  if not IS_LOCKING_DISABLED:
@@ -395,6 +396,7 @@ class TemporalWorkflowClient(WorkflowClient):
395
396
  activity_executor=activity_executor,
396
397
  interceptors=[
397
398
  EventInterceptor(),
399
+ CleanupInterceptor(),
398
400
  RedisLockInterceptor(activities_dict),
399
401
  ],
400
402
  )