atlan-application-sdk 0.1.1rc39__py3-none-any.whl → 0.1.1rc41__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. application_sdk/activities/.cursor/BUGBOT.md +424 -0
  2. application_sdk/activities/metadata_extraction/sql.py +400 -25
  3. application_sdk/application/__init__.py +2 -0
  4. application_sdk/application/metadata_extraction/sql.py +3 -0
  5. application_sdk/clients/.cursor/BUGBOT.md +280 -0
  6. application_sdk/clients/models.py +42 -0
  7. application_sdk/clients/sql.py +127 -87
  8. application_sdk/clients/temporal.py +3 -1
  9. application_sdk/common/.cursor/BUGBOT.md +316 -0
  10. application_sdk/common/aws_utils.py +259 -11
  11. application_sdk/common/utils.py +145 -9
  12. application_sdk/constants.py +8 -0
  13. application_sdk/decorators/.cursor/BUGBOT.md +279 -0
  14. application_sdk/handlers/__init__.py +8 -1
  15. application_sdk/handlers/sql.py +63 -22
  16. application_sdk/inputs/.cursor/BUGBOT.md +250 -0
  17. application_sdk/interceptors/.cursor/BUGBOT.md +320 -0
  18. application_sdk/interceptors/cleanup.py +171 -0
  19. application_sdk/interceptors/events.py +6 -6
  20. application_sdk/observability/decorators/observability_decorator.py +36 -22
  21. application_sdk/outputs/.cursor/BUGBOT.md +295 -0
  22. application_sdk/outputs/iceberg.py +4 -0
  23. application_sdk/outputs/json.py +6 -0
  24. application_sdk/outputs/parquet.py +13 -3
  25. application_sdk/server/.cursor/BUGBOT.md +442 -0
  26. application_sdk/server/fastapi/__init__.py +59 -3
  27. application_sdk/server/fastapi/models.py +27 -0
  28. application_sdk/services/objectstore.py +16 -3
  29. application_sdk/version.py +1 -1
  30. application_sdk/workflows/.cursor/BUGBOT.md +218 -0
  31. {atlan_application_sdk-0.1.1rc39.dist-info → atlan_application_sdk-0.1.1rc41.dist-info}/METADATA +1 -1
  32. {atlan_application_sdk-0.1.1rc39.dist-info → atlan_application_sdk-0.1.1rc41.dist-info}/RECORD +35 -24
  33. {atlan_application_sdk-0.1.1rc39.dist-info → atlan_application_sdk-0.1.1rc41.dist-info}/WHEEL +0 -0
  34. {atlan_application_sdk-0.1.1rc39.dist-info → atlan_application_sdk-0.1.1rc41.dist-info}/licenses/LICENSE +0 -0
  35. {atlan_application_sdk-0.1.1rc39.dist-info → atlan_application_sdk-0.1.1rc41.dist-info}/licenses/NOTICE +0 -0
@@ -0,0 +1,280 @@
1
+ # Client Code Review Guidelines - Database and External Services
2
+
3
+ ## Context-Specific Patterns
4
+
5
+ This directory contains database clients, external service clients, and connection management code. These components are critical for data integrity, performance, and security.
6
+
7
+ ### Phase 1: Critical Client Safety Issues
8
+
9
+ **Database Connection Security:**
10
+
11
+ - SQL injection prevention through parameterized queries ONLY
12
+ - Connection strings must never contain hardcoded credentials
13
+ - Database passwords must be retrieved from secure credential stores
14
+ - SSL/TLS required for all external database connections
15
+ - Connection timeouts must be explicitly configured
16
+
17
+ **Example SQL Injection Prevention:**
18
+
19
+ ```python
20
+ # ✅ DO: Parameterized queries
21
+ cursor.execute("SELECT * FROM users WHERE id = %s", (user_id,))
22
+
23
+ # ❌ NEVER: String concatenation
24
+ cursor.execute(f"SELECT * FROM users WHERE id = {user_id}")
25
+ ```
26
+
27
+ ### Phase 2: Client Architecture Patterns
28
+
29
+ **Connection Pooling Requirements:**
30
+
31
+ - All database clients MUST use connection pooling
32
+ - Pool size must be configurable via environment variables
33
+ - Connection validation on checkout required
34
+ - Proper connection cleanup in finally blocks
35
+ - Connection leak detection in development/testing
36
+
37
+ **Class Responsibility Separation:**
38
+
39
+ - **Always flag multi-responsibility classes**: Classes handling both client functionality and domain-specific logic must be separated
40
+ - **Client vs Logic separation**: Database clients should handle connections, not business rules
41
+ - **Extract domain logic**: Lock management, caching, or processing logic should be in separate classes
42
+ - **Single purpose interfaces**: Each client class should have one clear responsibility
43
+
44
+ ```python
45
+ # ❌ REJECT: Mixed responsibilities
46
+ class RedisClient:
47
+ def connect(self):
48
+ """Handle connection setup"""
49
+ def acquire_lock(self, lock_name):
50
+ """Lock functionality - should be separate"""
51
+ def get_data(self, key):
52
+ """Client functionality"""
53
+
54
+ # ✅ REQUIRE: Separated responsibilities
55
+ class RedisClient:
56
+ def connect(self):
57
+ """Handle connection setup"""
58
+ def get_data(self, key):
59
+ """Client functionality"""
60
+
61
+ class RedisLockManager: # Separate class for lock functionality
62
+ def __init__(self, client: RedisClient):
63
+ self.client = client
64
+ def acquire_lock(self, lock_name):
65
+ """Lock-specific logic"""
66
+ ```
67
+
68
+ **Async Client Patterns:**
69
+
70
+ - Use async/await for all I/O operations
71
+ - Implement proper connection context managers
72
+ - Handle connection failures gracefully with retries
73
+ - Use asyncio connection pools, not synchronous pools
74
+
75
+ ```python
76
+ # ✅ DO: Proper async connection management
77
+ async def execute_query(self, query: str, params: tuple):
78
+ async with self.pool.acquire() as conn:
79
+ try:
80
+ return await conn.fetch(query, *params)
81
+ except Exception as e:
82
+ logger.error(f"Query failed: {query[:100]}...", exc_info=True)
83
+ raise
84
+ ```
85
+
86
+ **Configuration Management for Clients:**
87
+
88
+ - **Environment-specific settings**: All connection parameters must be externalized to environment variables
89
+ - **Default value validation**: Every configuration parameter must have a sensible default and validation
90
+ - **Development vs Production**: Client configurations must work in both environments
91
+ - **Configuration consolidation**: Related configuration should be grouped together
92
+
93
+ ```python
94
+ # ✅ DO: Proper client configuration
95
+ class DatabaseClientConfig:
96
+ def __init__(self):
97
+ self.host = os.getenv("DB_HOST", "localhost")
98
+ self.port = int(os.getenv("DB_PORT", "5432"))
99
+ self.max_connections = int(os.getenv("DB_MAX_CONNECTIONS", "20"))
100
+ self.timeout = int(os.getenv("DB_TIMEOUT_SECONDS", "30"))
101
+ self.ssl_required = os.getenv("DB_SSL_REQUIRED", "true").lower() == "true"
102
+ self._validate()
103
+
104
+ def _validate(self):
105
+ if self.max_connections <= 0:
106
+ raise ValueError("DB_MAX_CONNECTIONS must be positive")
107
+ if self.timeout <= 0:
108
+ raise ValueError("DB_TIMEOUT_SECONDS must be positive")
109
+
110
+ # ❌ REJECT: Poor configuration management
111
+ class BadDatabaseClient:
112
+ def __init__(self):
113
+ self.host = "localhost" # Hardcoded
114
+ self.connections = os.getenv("MAX_CONN") # No default, no validation
115
+ ```
116
+
117
+ ### Phase 3: Client Testing Requirements
118
+
119
+ **Database Client Testing:**
120
+
121
+ - Mock database connections in unit tests
122
+ - Use test databases for integration tests
123
+ - Test connection failure scenarios
124
+ - Verify connection pool behavior
125
+ - Test query parameter sanitization
126
+ - Include performance tests for connection pooling
127
+
128
+ **External Service Client Testing:**
129
+
130
+ - Mock external APIs in unit tests
131
+ - Test timeout and retry behaviors
132
+ - Test authentication failure scenarios
133
+ - Include circuit breaker tests
134
+ - Verify proper error handling and logging
135
+
136
+ ### Phase 4: Performance and Scalability
137
+
138
+ **Query Performance:**
139
+
140
+ - Flag SELECT \* queries without LIMIT
141
+ - Require WHERE clauses on indexed columns
142
+ - Batch operations when possible
143
+ - Use prepared statements for repeated queries
144
+ - Monitor and limit query execution time
145
+
146
+ **Connection Management Performance:**
147
+
148
+ - Connection pool size must match expected concurrency
149
+ - Connection validation queries must be lightweight
150
+ - Implement connection health checks
151
+ - Use connection keepalive for long-running connections
152
+ - Monitor connection pool metrics
153
+
154
+ **Resource Limit Validation:**
155
+
156
+ - **Key length constraints**: Validate Redis key lengths against limits (typically 512MB max)
157
+ - **Connection limits**: Ensure connection pool sizes don't exceed database limits
158
+ - **Query complexity**: Monitor and limit expensive query execution time
159
+ - **Memory constraints**: Validate result set sizes for large queries
160
+
161
+ ```python
162
+ # ✅ DO: Resource validation
163
+ def validate_redis_key(key: str) -> str:
164
+ if len(key.encode('utf-8')) > 512 * 1024 * 1024: # 512MB limit
165
+ raise ValueError(f"Redis key too long: {len(key)} bytes")
166
+ return key
167
+
168
+ def create_lock_key(application: str, resource: str, run_id: str) -> str:
169
+ key = f"{application}:{resource}:{run_id}"
170
+ return validate_redis_key(key)
171
+ ```
172
+
173
+ ### Phase 5: Client Maintainability
174
+
175
+ **Code Organization:**
176
+
177
+ - Separate client interface from implementation
178
+ - Use dependency injection for client configuration
179
+ - Implement proper logging with connection context
180
+ - Document connection parameters and requirements
181
+ - Follow consistent error handling patterns
182
+
183
+ **Error Handling Improvements:**
184
+
185
+ - **Comprehensive try-catch blocks**: All client operations that can fail must be wrapped in try-catch blocks
186
+ - **SDK-specific exceptions**: Use `ClientError` from `application_sdk/common/error_codes.py` instead of generic exceptions
187
+ - **Operation context**: Include operation details (query, connection info) in error messages
188
+ - **Retry vs fail-fast**: Distinguish between retryable connection errors and permanent failures
189
+
190
+ ```python
191
+ # ✅ DO: Comprehensive error handling
192
+ from application_sdk.common.error_codes import ClientError
193
+
194
+ async def execute_query(self, query: str, params: tuple = ()) -> list:
195
+ try:
196
+ async with self.pool.acquire() as conn:
197
+ return await conn.fetch(query, *params)
198
+ except ConnectionRefusedError as e:
199
+ # Retryable error
200
+ logger.warning(f"Database connection refused, will retry: {e}")
201
+ raise ClientError(f"Database temporarily unavailable: {e}")
202
+ except ValidationError as e:
203
+ # Non-retryable error
204
+ logger.error(f"Query validation failed: {query[:50]}...")
205
+ raise ClientError(f"Invalid query: {e}")
206
+ except Exception as e:
207
+ logger.error(f"Unexpected database error: {e}", exc_info=True)
208
+ raise ClientError(f"Database operation failed: {e}")
209
+ ```
210
+
211
+ **Configuration Management:**
212
+
213
+ - Externalize all connection parameters
214
+ - Support multiple environment configurations
215
+ - Implement configuration validation
216
+ - Use secure credential management
217
+ - Document all configuration options
218
+
219
+ ---
220
+
221
+ ## Client-Specific Anti-Patterns
222
+
223
+ **Always Reject:**
224
+
225
+ - Hardcoded connection strings or credentials
226
+ - Missing connection timeouts
227
+ - Synchronous database calls in async contexts
228
+ - SQL queries built through string concatenation
229
+ - Connection objects stored as instance variables
230
+ - Missing connection pool cleanup
231
+ - Generic exception handling without context
232
+ - Direct database connections without pooling
233
+
234
+ **Configuration Anti-Patterns:**
235
+
236
+ - **Missing environment variables**: Parameters that should be configurable but are hardcoded
237
+ - **No validation**: Environment variables used without type checking or range validation
238
+ - **Missing defaults**: Required configuration without sensible fallback values
239
+ - **Environment inconsistency**: Features that work in development but fail in production
240
+
241
+ **Connection Management Anti-Patterns:**
242
+
243
+ ```python
244
+ # ❌ REJECT: Poor connection management
245
+ class BadSQLClient:
246
+ def __init__(self):
247
+ self.conn = psycopg2.connect("host=localhost...") # No pooling
248
+
249
+ def query(self, sql):
250
+ cursor = self.conn.cursor()
251
+ cursor.execute(sql) # No parameterization
252
+ return cursor.fetchall() # No cleanup
253
+
254
+ # ✅ REQUIRE: Proper connection management
255
+ class GoodSQLClient:
256
+ def __init__(self, pool: ConnectionPool):
257
+ self.pool = pool
258
+
259
+ async def query(self, sql: str, params: tuple = ()):
260
+ async with self.pool.acquire() as conn:
261
+ try:
262
+ return await conn.fetch(sql, *params)
263
+ finally:
264
+ # Connection automatically returned to pool
265
+ pass
266
+ ```
267
+
268
+ ## Educational Context for Client Reviews
269
+
270
+ When reviewing client code, emphasize:
271
+
272
+ 1. **Security Impact**: "Database clients are the primary attack vector for SQL injection. Parameterized queries aren't just best practice - they're essential for protecting enterprise customer data."
273
+
274
+ 2. **Performance Impact**: "Connection pooling isn't optional at enterprise scale. Creating new connections for each query can overwhelm database servers and create bottlenecks that affect all users."
275
+
276
+ 3. **Reliability Impact**: "Proper error handling in clients determines whether temporary network issues cause cascading failures or graceful degradation."
277
+
278
+ 4. **Maintainability Impact**: "Client abstraction layers allow us to change databases or connection strategies without affecting business logic throughout the application."
279
+
280
+ 5. **Configuration Impact**: "Externalized configuration enables the same code to work across development, staging, and production environments. Missing this leads to environment-specific bugs that are hard to reproduce and fix."
@@ -0,0 +1,42 @@
1
+ """
2
+ Pydantic models for database client configurations.
3
+ This module provides Pydantic models for database connection configurations,
4
+ ensuring type safety and validation for database client settings.
5
+ """
6
+
7
+ from typing import Any, Dict, List, Optional
8
+
9
+ from pydantic import BaseModel, Field
10
+
11
+
12
+ class DatabaseConfig(BaseModel):
13
+ """
14
+ Pydantic model for database connection configuration.
15
+ This model defines the structure for database connection configurations,
16
+ including connection templates, required parameters, defaults, and additional
17
+ connection parameters.
18
+ """
19
+
20
+ template: str = Field(
21
+ ...,
22
+ description="SQLAlchemy connection string template with placeholders for connection parameters",
23
+ )
24
+ required: List[str] = Field(
25
+ default=[],
26
+ description="List of required connection parameters that must be provided",
27
+ )
28
+ defaults: Optional[Dict[str, Any]] = Field(
29
+ default=None,
30
+ description="Default connection parameters to be added to the connection string",
31
+ )
32
+ parameters: Optional[List[str]] = Field(
33
+ default=None,
34
+ description="List of additional connection parameter names that can be dynamically added from credentials",
35
+ )
36
+
37
+ class Config:
38
+ """Pydantic configuration for the DatabaseConfig model."""
39
+
40
+ extra = "forbid" # Prevent additional fields
41
+ validate_assignment = True # Validate on assignment
42
+ use_enum_values = True # Use enum values instead of enum objects