atlan-application-sdk 0.1.1rc39__py3-none-any.whl → 0.1.1rc41__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- application_sdk/activities/.cursor/BUGBOT.md +424 -0
- application_sdk/activities/metadata_extraction/sql.py +400 -25
- application_sdk/application/__init__.py +2 -0
- application_sdk/application/metadata_extraction/sql.py +3 -0
- application_sdk/clients/.cursor/BUGBOT.md +280 -0
- application_sdk/clients/models.py +42 -0
- application_sdk/clients/sql.py +127 -87
- application_sdk/clients/temporal.py +3 -1
- application_sdk/common/.cursor/BUGBOT.md +316 -0
- application_sdk/common/aws_utils.py +259 -11
- application_sdk/common/utils.py +145 -9
- application_sdk/constants.py +8 -0
- application_sdk/decorators/.cursor/BUGBOT.md +279 -0
- application_sdk/handlers/__init__.py +8 -1
- application_sdk/handlers/sql.py +63 -22
- application_sdk/inputs/.cursor/BUGBOT.md +250 -0
- application_sdk/interceptors/.cursor/BUGBOT.md +320 -0
- application_sdk/interceptors/cleanup.py +171 -0
- application_sdk/interceptors/events.py +6 -6
- application_sdk/observability/decorators/observability_decorator.py +36 -22
- application_sdk/outputs/.cursor/BUGBOT.md +295 -0
- application_sdk/outputs/iceberg.py +4 -0
- application_sdk/outputs/json.py +6 -0
- application_sdk/outputs/parquet.py +13 -3
- application_sdk/server/.cursor/BUGBOT.md +442 -0
- application_sdk/server/fastapi/__init__.py +59 -3
- application_sdk/server/fastapi/models.py +27 -0
- application_sdk/services/objectstore.py +16 -3
- application_sdk/version.py +1 -1
- application_sdk/workflows/.cursor/BUGBOT.md +218 -0
- {atlan_application_sdk-0.1.1rc39.dist-info → atlan_application_sdk-0.1.1rc41.dist-info}/METADATA +1 -1
- {atlan_application_sdk-0.1.1rc39.dist-info → atlan_application_sdk-0.1.1rc41.dist-info}/RECORD +35 -24
- {atlan_application_sdk-0.1.1rc39.dist-info → atlan_application_sdk-0.1.1rc41.dist-info}/WHEEL +0 -0
- {atlan_application_sdk-0.1.1rc39.dist-info → atlan_application_sdk-0.1.1rc41.dist-info}/licenses/LICENSE +0 -0
- {atlan_application_sdk-0.1.1rc39.dist-info → atlan_application_sdk-0.1.1rc41.dist-info}/licenses/NOTICE +0 -0
|
@@ -0,0 +1,280 @@
|
|
|
1
|
+
# Client Code Review Guidelines - Database and External Services
|
|
2
|
+
|
|
3
|
+
## Context-Specific Patterns
|
|
4
|
+
|
|
5
|
+
This directory contains database clients, external service clients, and connection management code. These components are critical for data integrity, performance, and security.
|
|
6
|
+
|
|
7
|
+
### Phase 1: Critical Client Safety Issues
|
|
8
|
+
|
|
9
|
+
**Database Connection Security:**
|
|
10
|
+
|
|
11
|
+
- SQL injection prevention through parameterized queries ONLY
|
|
12
|
+
- Connection strings must never contain hardcoded credentials
|
|
13
|
+
- Database passwords must be retrieved from secure credential stores
|
|
14
|
+
- SSL/TLS required for all external database connections
|
|
15
|
+
- Connection timeouts must be explicitly configured
|
|
16
|
+
|
|
17
|
+
**Example SQL Injection Prevention:**
|
|
18
|
+
|
|
19
|
+
```python
|
|
20
|
+
# ✅ DO: Parameterized queries
|
|
21
|
+
cursor.execute("SELECT * FROM users WHERE id = %s", (user_id,))
|
|
22
|
+
|
|
23
|
+
# ❌ NEVER: String concatenation
|
|
24
|
+
cursor.execute(f"SELECT * FROM users WHERE id = {user_id}")
|
|
25
|
+
```
|
|
26
|
+
|
|
27
|
+
### Phase 2: Client Architecture Patterns
|
|
28
|
+
|
|
29
|
+
**Connection Pooling Requirements:**
|
|
30
|
+
|
|
31
|
+
- All database clients MUST use connection pooling
|
|
32
|
+
- Pool size must be configurable via environment variables
|
|
33
|
+
- Connection validation on checkout required
|
|
34
|
+
- Proper connection cleanup in finally blocks
|
|
35
|
+
- Connection leak detection in development/testing
|
|
36
|
+
|
|
37
|
+
**Class Responsibility Separation:**
|
|
38
|
+
|
|
39
|
+
- **Always flag multi-responsibility classes**: Classes handling both client functionality and domain-specific logic must be separated
|
|
40
|
+
- **Client vs Logic separation**: Database clients should handle connections, not business rules
|
|
41
|
+
- **Extract domain logic**: Lock management, caching, or processing logic should be in separate classes
|
|
42
|
+
- **Single purpose interfaces**: Each client class should have one clear responsibility
|
|
43
|
+
|
|
44
|
+
```python
|
|
45
|
+
# ❌ REJECT: Mixed responsibilities
|
|
46
|
+
class RedisClient:
|
|
47
|
+
def connect(self):
|
|
48
|
+
"""Handle connection setup"""
|
|
49
|
+
def acquire_lock(self, lock_name):
|
|
50
|
+
"""Lock functionality - should be separate"""
|
|
51
|
+
def get_data(self, key):
|
|
52
|
+
"""Client functionality"""
|
|
53
|
+
|
|
54
|
+
# ✅ REQUIRE: Separated responsibilities
|
|
55
|
+
class RedisClient:
|
|
56
|
+
def connect(self):
|
|
57
|
+
"""Handle connection setup"""
|
|
58
|
+
def get_data(self, key):
|
|
59
|
+
"""Client functionality"""
|
|
60
|
+
|
|
61
|
+
class RedisLockManager: # Separate class for lock functionality
|
|
62
|
+
def __init__(self, client: RedisClient):
|
|
63
|
+
self.client = client
|
|
64
|
+
def acquire_lock(self, lock_name):
|
|
65
|
+
"""Lock-specific logic"""
|
|
66
|
+
```
|
|
67
|
+
|
|
68
|
+
**Async Client Patterns:**
|
|
69
|
+
|
|
70
|
+
- Use async/await for all I/O operations
|
|
71
|
+
- Implement proper connection context managers
|
|
72
|
+
- Handle connection failures gracefully with retries
|
|
73
|
+
- Use asyncio connection pools, not synchronous pools
|
|
74
|
+
|
|
75
|
+
```python
|
|
76
|
+
# ✅ DO: Proper async connection management
|
|
77
|
+
async def execute_query(self, query: str, params: tuple):
|
|
78
|
+
async with self.pool.acquire() as conn:
|
|
79
|
+
try:
|
|
80
|
+
return await conn.fetch(query, *params)
|
|
81
|
+
except Exception as e:
|
|
82
|
+
logger.error(f"Query failed: {query[:100]}...", exc_info=True)
|
|
83
|
+
raise
|
|
84
|
+
```
|
|
85
|
+
|
|
86
|
+
**Configuration Management for Clients:**
|
|
87
|
+
|
|
88
|
+
- **Environment-specific settings**: All connection parameters must be externalized to environment variables
|
|
89
|
+
- **Default value validation**: Every configuration parameter must have a sensible default and validation
|
|
90
|
+
- **Development vs Production**: Client configurations must work in both environments
|
|
91
|
+
- **Configuration consolidation**: Related configuration should be grouped together
|
|
92
|
+
|
|
93
|
+
```python
|
|
94
|
+
# ✅ DO: Proper client configuration
|
|
95
|
+
class DatabaseClientConfig:
|
|
96
|
+
def __init__(self):
|
|
97
|
+
self.host = os.getenv("DB_HOST", "localhost")
|
|
98
|
+
self.port = int(os.getenv("DB_PORT", "5432"))
|
|
99
|
+
self.max_connections = int(os.getenv("DB_MAX_CONNECTIONS", "20"))
|
|
100
|
+
self.timeout = int(os.getenv("DB_TIMEOUT_SECONDS", "30"))
|
|
101
|
+
self.ssl_required = os.getenv("DB_SSL_REQUIRED", "true").lower() == "true"
|
|
102
|
+
self._validate()
|
|
103
|
+
|
|
104
|
+
def _validate(self):
|
|
105
|
+
if self.max_connections <= 0:
|
|
106
|
+
raise ValueError("DB_MAX_CONNECTIONS must be positive")
|
|
107
|
+
if self.timeout <= 0:
|
|
108
|
+
raise ValueError("DB_TIMEOUT_SECONDS must be positive")
|
|
109
|
+
|
|
110
|
+
# ❌ REJECT: Poor configuration management
|
|
111
|
+
class BadDatabaseClient:
|
|
112
|
+
def __init__(self):
|
|
113
|
+
self.host = "localhost" # Hardcoded
|
|
114
|
+
self.connections = os.getenv("MAX_CONN") # No default, no validation
|
|
115
|
+
```
|
|
116
|
+
|
|
117
|
+
### Phase 3: Client Testing Requirements
|
|
118
|
+
|
|
119
|
+
**Database Client Testing:**
|
|
120
|
+
|
|
121
|
+
- Mock database connections in unit tests
|
|
122
|
+
- Use test databases for integration tests
|
|
123
|
+
- Test connection failure scenarios
|
|
124
|
+
- Verify connection pool behavior
|
|
125
|
+
- Test query parameter sanitization
|
|
126
|
+
- Include performance tests for connection pooling
|
|
127
|
+
|
|
128
|
+
**External Service Client Testing:**
|
|
129
|
+
|
|
130
|
+
- Mock external APIs in unit tests
|
|
131
|
+
- Test timeout and retry behaviors
|
|
132
|
+
- Test authentication failure scenarios
|
|
133
|
+
- Include circuit breaker tests
|
|
134
|
+
- Verify proper error handling and logging
|
|
135
|
+
|
|
136
|
+
### Phase 4: Performance and Scalability
|
|
137
|
+
|
|
138
|
+
**Query Performance:**
|
|
139
|
+
|
|
140
|
+
- Flag SELECT \* queries without LIMIT
|
|
141
|
+
- Require WHERE clauses on indexed columns
|
|
142
|
+
- Batch operations when possible
|
|
143
|
+
- Use prepared statements for repeated queries
|
|
144
|
+
- Monitor and limit query execution time
|
|
145
|
+
|
|
146
|
+
**Connection Management Performance:**
|
|
147
|
+
|
|
148
|
+
- Connection pool size must match expected concurrency
|
|
149
|
+
- Connection validation queries must be lightweight
|
|
150
|
+
- Implement connection health checks
|
|
151
|
+
- Use connection keepalive for long-running connections
|
|
152
|
+
- Monitor connection pool metrics
|
|
153
|
+
|
|
154
|
+
**Resource Limit Validation:**
|
|
155
|
+
|
|
156
|
+
- **Key length constraints**: Validate Redis key lengths against limits (typically 512MB max)
|
|
157
|
+
- **Connection limits**: Ensure connection pool sizes don't exceed database limits
|
|
158
|
+
- **Query complexity**: Monitor and limit expensive query execution time
|
|
159
|
+
- **Memory constraints**: Validate result set sizes for large queries
|
|
160
|
+
|
|
161
|
+
```python
|
|
162
|
+
# ✅ DO: Resource validation
|
|
163
|
+
def validate_redis_key(key: str) -> str:
|
|
164
|
+
if len(key.encode('utf-8')) > 512 * 1024 * 1024: # 512MB limit
|
|
165
|
+
raise ValueError(f"Redis key too long: {len(key)} bytes")
|
|
166
|
+
return key
|
|
167
|
+
|
|
168
|
+
def create_lock_key(application: str, resource: str, run_id: str) -> str:
|
|
169
|
+
key = f"{application}:{resource}:{run_id}"
|
|
170
|
+
return validate_redis_key(key)
|
|
171
|
+
```
|
|
172
|
+
|
|
173
|
+
### Phase 5: Client Maintainability
|
|
174
|
+
|
|
175
|
+
**Code Organization:**
|
|
176
|
+
|
|
177
|
+
- Separate client interface from implementation
|
|
178
|
+
- Use dependency injection for client configuration
|
|
179
|
+
- Implement proper logging with connection context
|
|
180
|
+
- Document connection parameters and requirements
|
|
181
|
+
- Follow consistent error handling patterns
|
|
182
|
+
|
|
183
|
+
**Error Handling Improvements:**
|
|
184
|
+
|
|
185
|
+
- **Comprehensive try-catch blocks**: All client operations that can fail must be wrapped in try-catch blocks
|
|
186
|
+
- **SDK-specific exceptions**: Use `ClientError` from `application_sdk/common/error_codes.py` instead of generic exceptions
|
|
187
|
+
- **Operation context**: Include operation details (query, connection info) in error messages
|
|
188
|
+
- **Retry vs fail-fast**: Distinguish between retryable connection errors and permanent failures
|
|
189
|
+
|
|
190
|
+
```python
|
|
191
|
+
# ✅ DO: Comprehensive error handling
|
|
192
|
+
from application_sdk.common.error_codes import ClientError
|
|
193
|
+
|
|
194
|
+
async def execute_query(self, query: str, params: tuple = ()) -> list:
|
|
195
|
+
try:
|
|
196
|
+
async with self.pool.acquire() as conn:
|
|
197
|
+
return await conn.fetch(query, *params)
|
|
198
|
+
except ConnectionRefusedError as e:
|
|
199
|
+
# Retryable error
|
|
200
|
+
logger.warning(f"Database connection refused, will retry: {e}")
|
|
201
|
+
raise ClientError(f"Database temporarily unavailable: {e}")
|
|
202
|
+
except ValidationError as e:
|
|
203
|
+
# Non-retryable error
|
|
204
|
+
logger.error(f"Query validation failed: {query[:50]}...")
|
|
205
|
+
raise ClientError(f"Invalid query: {e}")
|
|
206
|
+
except Exception as e:
|
|
207
|
+
logger.error(f"Unexpected database error: {e}", exc_info=True)
|
|
208
|
+
raise ClientError(f"Database operation failed: {e}")
|
|
209
|
+
```
|
|
210
|
+
|
|
211
|
+
**Configuration Management:**
|
|
212
|
+
|
|
213
|
+
- Externalize all connection parameters
|
|
214
|
+
- Support multiple environment configurations
|
|
215
|
+
- Implement configuration validation
|
|
216
|
+
- Use secure credential management
|
|
217
|
+
- Document all configuration options
|
|
218
|
+
|
|
219
|
+
---
|
|
220
|
+
|
|
221
|
+
## Client-Specific Anti-Patterns
|
|
222
|
+
|
|
223
|
+
**Always Reject:**
|
|
224
|
+
|
|
225
|
+
- Hardcoded connection strings or credentials
|
|
226
|
+
- Missing connection timeouts
|
|
227
|
+
- Synchronous database calls in async contexts
|
|
228
|
+
- SQL queries built through string concatenation
|
|
229
|
+
- Connection objects stored as instance variables
|
|
230
|
+
- Missing connection pool cleanup
|
|
231
|
+
- Generic exception handling without context
|
|
232
|
+
- Direct database connections without pooling
|
|
233
|
+
|
|
234
|
+
**Configuration Anti-Patterns:**
|
|
235
|
+
|
|
236
|
+
- **Missing environment variables**: Parameters that should be configurable but are hardcoded
|
|
237
|
+
- **No validation**: Environment variables used without type checking or range validation
|
|
238
|
+
- **Missing defaults**: Required configuration without sensible fallback values
|
|
239
|
+
- **Environment inconsistency**: Features that work in development but fail in production
|
|
240
|
+
|
|
241
|
+
**Connection Management Anti-Patterns:**
|
|
242
|
+
|
|
243
|
+
```python
|
|
244
|
+
# ❌ REJECT: Poor connection management
|
|
245
|
+
class BadSQLClient:
|
|
246
|
+
def __init__(self):
|
|
247
|
+
self.conn = psycopg2.connect("host=localhost...") # No pooling
|
|
248
|
+
|
|
249
|
+
def query(self, sql):
|
|
250
|
+
cursor = self.conn.cursor()
|
|
251
|
+
cursor.execute(sql) # No parameterization
|
|
252
|
+
return cursor.fetchall() # No cleanup
|
|
253
|
+
|
|
254
|
+
# ✅ REQUIRE: Proper connection management
|
|
255
|
+
class GoodSQLClient:
|
|
256
|
+
def __init__(self, pool: ConnectionPool):
|
|
257
|
+
self.pool = pool
|
|
258
|
+
|
|
259
|
+
async def query(self, sql: str, params: tuple = ()):
|
|
260
|
+
async with self.pool.acquire() as conn:
|
|
261
|
+
try:
|
|
262
|
+
return await conn.fetch(sql, *params)
|
|
263
|
+
finally:
|
|
264
|
+
# Connection automatically returned to pool
|
|
265
|
+
pass
|
|
266
|
+
```
|
|
267
|
+
|
|
268
|
+
## Educational Context for Client Reviews
|
|
269
|
+
|
|
270
|
+
When reviewing client code, emphasize:
|
|
271
|
+
|
|
272
|
+
1. **Security Impact**: "Database clients are the primary attack vector for SQL injection. Parameterized queries aren't just best practice - they're essential for protecting enterprise customer data."
|
|
273
|
+
|
|
274
|
+
2. **Performance Impact**: "Connection pooling isn't optional at enterprise scale. Creating new connections for each query can overwhelm database servers and create bottlenecks that affect all users."
|
|
275
|
+
|
|
276
|
+
3. **Reliability Impact**: "Proper error handling in clients determines whether temporary network issues cause cascading failures or graceful degradation."
|
|
277
|
+
|
|
278
|
+
4. **Maintainability Impact**: "Client abstraction layers allow us to change databases or connection strategies without affecting business logic throughout the application."
|
|
279
|
+
|
|
280
|
+
5. **Configuration Impact**: "Externalized configuration enables the same code to work across development, staging, and production environments. Missing this leads to environment-specific bugs that are hard to reproduce and fix."
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Pydantic models for database client configurations.
|
|
3
|
+
This module provides Pydantic models for database connection configurations,
|
|
4
|
+
ensuring type safety and validation for database client settings.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from typing import Any, Dict, List, Optional
|
|
8
|
+
|
|
9
|
+
from pydantic import BaseModel, Field
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class DatabaseConfig(BaseModel):
|
|
13
|
+
"""
|
|
14
|
+
Pydantic model for database connection configuration.
|
|
15
|
+
This model defines the structure for database connection configurations,
|
|
16
|
+
including connection templates, required parameters, defaults, and additional
|
|
17
|
+
connection parameters.
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
template: str = Field(
|
|
21
|
+
...,
|
|
22
|
+
description="SQLAlchemy connection string template with placeholders for connection parameters",
|
|
23
|
+
)
|
|
24
|
+
required: List[str] = Field(
|
|
25
|
+
default=[],
|
|
26
|
+
description="List of required connection parameters that must be provided",
|
|
27
|
+
)
|
|
28
|
+
defaults: Optional[Dict[str, Any]] = Field(
|
|
29
|
+
default=None,
|
|
30
|
+
description="Default connection parameters to be added to the connection string",
|
|
31
|
+
)
|
|
32
|
+
parameters: Optional[List[str]] = Field(
|
|
33
|
+
default=None,
|
|
34
|
+
description="List of additional connection parameter names that can be dynamically added from credentials",
|
|
35
|
+
)
|
|
36
|
+
|
|
37
|
+
class Config:
|
|
38
|
+
"""Pydantic configuration for the DatabaseConfig model."""
|
|
39
|
+
|
|
40
|
+
extra = "forbid" # Prevent additional fields
|
|
41
|
+
validate_assignment = True # Validate on assignment
|
|
42
|
+
use_enum_values = True # Use enum values instead of enum objects
|