atlan-application-sdk 0.1.1rc39__py3-none-any.whl → 0.1.1rc40__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- application_sdk/activities/.cursor/BUGBOT.md +424 -0
- application_sdk/clients/.cursor/BUGBOT.md +280 -0
- application_sdk/clients/sql.py +110 -74
- application_sdk/clients/temporal.py +3 -1
- application_sdk/common/.cursor/BUGBOT.md +316 -0
- application_sdk/constants.py +8 -0
- application_sdk/decorators/.cursor/BUGBOT.md +279 -0
- application_sdk/inputs/.cursor/BUGBOT.md +250 -0
- application_sdk/interceptors/.cursor/BUGBOT.md +320 -0
- application_sdk/interceptors/cleanup.py +171 -0
- application_sdk/interceptors/events.py +6 -6
- application_sdk/outputs/.cursor/BUGBOT.md +295 -0
- application_sdk/outputs/iceberg.py +4 -0
- application_sdk/outputs/json.py +6 -0
- application_sdk/outputs/parquet.py +13 -3
- application_sdk/server/.cursor/BUGBOT.md +442 -0
- application_sdk/services/objectstore.py +16 -3
- application_sdk/version.py +1 -1
- application_sdk/workflows/.cursor/BUGBOT.md +218 -0
- {atlan_application_sdk-0.1.1rc39.dist-info → atlan_application_sdk-0.1.1rc40.dist-info}/METADATA +1 -1
- {atlan_application_sdk-0.1.1rc39.dist-info → atlan_application_sdk-0.1.1rc40.dist-info}/RECORD +24 -14
- {atlan_application_sdk-0.1.1rc39.dist-info → atlan_application_sdk-0.1.1rc40.dist-info}/WHEEL +0 -0
- {atlan_application_sdk-0.1.1rc39.dist-info → atlan_application_sdk-0.1.1rc40.dist-info}/licenses/LICENSE +0 -0
- {atlan_application_sdk-0.1.1rc39.dist-info → atlan_application_sdk-0.1.1rc40.dist-info}/licenses/NOTICE +0 -0
|
@@ -0,0 +1,424 @@
|
|
|
1
|
+
# Activity Code Review Guidelines - Temporal Activities
|
|
2
|
+
|
|
3
|
+
## Context-Specific Patterns
|
|
4
|
+
|
|
5
|
+
This directory contains Temporal activity implementations that perform the actual work of workflows. Activities handle external I/O, database operations, and non-deterministic tasks.
|
|
6
|
+
|
|
7
|
+
### Phase 1: Critical Activity Safety Issues
|
|
8
|
+
|
|
9
|
+
**External Resource Safety:**
|
|
10
|
+
|
|
11
|
+
- All external connections (database, API, file) must have explicit timeouts
|
|
12
|
+
- Connection failures must be handled gracefully with proper retry logic
|
|
13
|
+
- Resource cleanup must happen in finally blocks or context managers
|
|
14
|
+
- Sensitive data must not be logged or exposed in error messages
|
|
15
|
+
- All user inputs must be validated before processing
|
|
16
|
+
|
|
17
|
+
**Activity Timeout Management:**
|
|
18
|
+
|
|
19
|
+
- Activities must respect Temporal heartbeat timeouts for long-running operations
|
|
20
|
+
- Progress should be reported via heartbeat for operations > 30 seconds
|
|
21
|
+
- Activities should check for cancellation requests periodically
|
|
22
|
+
- Timeout values must be realistic for the operation being performed
|
|
23
|
+
|
|
24
|
+
```python
|
|
25
|
+
# ✅ DO: Proper activity with heartbeat and cancellation
|
|
26
|
+
@activity.defn
|
|
27
|
+
async def process_large_dataset_activity(dataset_config: dict) -> dict:
|
|
28
|
+
total_records = await get_record_count(dataset_config)
|
|
29
|
+
processed = 0
|
|
30
|
+
|
|
31
|
+
async for batch in process_in_batches(dataset_config):
|
|
32
|
+
# Check for cancellation
|
|
33
|
+
activity.heartbeat({"progress": processed, "total": total_records})
|
|
34
|
+
|
|
35
|
+
try:
|
|
36
|
+
await process_batch(batch)
|
|
37
|
+
processed += len(batch)
|
|
38
|
+
except Exception as e:
|
|
39
|
+
activity.logger.error(f"Batch processing failed: {e}", exc_info=True)
|
|
40
|
+
raise
|
|
41
|
+
|
|
42
|
+
return {"processed_records": processed}
|
|
43
|
+
|
|
44
|
+
# ❌ NEVER: Long-running activity without heartbeat
|
|
45
|
+
@activity.defn
|
|
46
|
+
async def bad_process_activity(data):
|
|
47
|
+
# No heartbeat, no cancellation check, no progress reporting
|
|
48
|
+
return await process_all_data_at_once(data)
|
|
49
|
+
```
|
|
50
|
+
|
|
51
|
+
### Phase 2: Activity Architecture Patterns
|
|
52
|
+
|
|
53
|
+
**Resource Management:**
|
|
54
|
+
|
|
55
|
+
- Use connection pooling for database operations
|
|
56
|
+
- Implement proper connection context managers
|
|
57
|
+
- Clean up temporary files and resources
|
|
58
|
+
- Handle partial failures gracefully
|
|
59
|
+
- Implement idempotent operations where possible
|
|
60
|
+
|
|
61
|
+
**Default Value Management:**
|
|
62
|
+
|
|
63
|
+
- **Always define sensible defaults**: Activity parameters should have reasonable default values where appropriate
|
|
64
|
+
- **Avoid required parameters for inferable values**: Values like `owner_id` that can be derived (e.g., from `application_name:run_id`) should not be required parameters
|
|
65
|
+
- **Default TTL values**: Lock operations, cache entries, and timeouts should have documented default values (e.g., 300 seconds for locks)
|
|
66
|
+
- **Environment-based defaults**: Different environments (dev/prod) may need different defaults
|
|
67
|
+
|
|
68
|
+
```python
|
|
69
|
+
# ✅ DO: Proper default value management
|
|
70
|
+
@activity.defn
|
|
71
|
+
async def acquire_distributed_lock_activity(
|
|
72
|
+
lock_name: str,
|
|
73
|
+
max_locks: int = 10, # Sensible default
|
|
74
|
+
ttl_seconds: int = 300, # 5 minutes default
|
|
75
|
+
owner_id: Optional[str] = None # Will be inferred
|
|
76
|
+
) -> dict:
|
|
77
|
+
"""Acquire a distributed lock with proper defaults."""
|
|
78
|
+
|
|
79
|
+
# Infer owner_id if not provided
|
|
80
|
+
if owner_id is None:
|
|
81
|
+
workflow_info = activity.info().workflow_execution
|
|
82
|
+
owner_id = f"{workflow_info.workflow_type}:{workflow_info.workflow_id}"
|
|
83
|
+
|
|
84
|
+
# Validate parameters
|
|
85
|
+
if max_locks <= 0:
|
|
86
|
+
raise ValueError(f"max_locks must be positive, got: {max_locks}")
|
|
87
|
+
|
|
88
|
+
return await lock_manager.acquire_lock(lock_name, max_locks, ttl_seconds, owner_id)
|
|
89
|
+
|
|
90
|
+
# ❌ REJECT: Poor parameter management
|
|
91
|
+
@activity.defn
|
|
92
|
+
async def bad_acquire_lock_activity(
|
|
93
|
+
lock_name: str,
|
|
94
|
+
max_locks: int, # No default
|
|
95
|
+
ttl_seconds: int, # No default
|
|
96
|
+
owner_id: str, # Required but could be inferred
|
|
97
|
+
application_name: str, # Redundant - should be inferred
|
|
98
|
+
run_id: str # Redundant - should be inferred
|
|
99
|
+
) -> dict:
|
|
100
|
+
# Forces users to pass values that could be automatically determined
|
|
101
|
+
pass
|
|
102
|
+
```
|
|
103
|
+
|
|
104
|
+
**Error Handling and Retries:**
|
|
105
|
+
|
|
106
|
+
- Distinguish between retryable and non-retryable errors
|
|
107
|
+
- Use specific exception types for different error conditions
|
|
108
|
+
- Log errors with sufficient context for debugging
|
|
109
|
+
- Implement exponential backoff for retryable operations
|
|
110
|
+
- Preserve error context across retries
|
|
111
|
+
|
|
112
|
+
```python
|
|
113
|
+
# ✅ DO: Proper error handling with context
|
|
114
|
+
@activity.defn
|
|
115
|
+
async def extract_metadata_activity(connection_config: dict) -> dict:
|
|
116
|
+
client = None
|
|
117
|
+
try:
|
|
118
|
+
client = await create_database_client(connection_config)
|
|
119
|
+
await client.validate_connection()
|
|
120
|
+
|
|
121
|
+
metadata = await client.extract_metadata()
|
|
122
|
+
|
|
123
|
+
activity.logger.info(
|
|
124
|
+
f"Extracted metadata for {len(metadata)} objects",
|
|
125
|
+
extra={"database": connection_config.get("database", "unknown")}
|
|
126
|
+
)
|
|
127
|
+
|
|
128
|
+
return metadata
|
|
129
|
+
|
|
130
|
+
except ConnectionError as e:
|
|
131
|
+
# Retryable error
|
|
132
|
+
activity.logger.warning(f"Connection failed, will retry: {e}")
|
|
133
|
+
raise # Let Temporal handle retry
|
|
134
|
+
|
|
135
|
+
except ValidationError as e:
|
|
136
|
+
# Non-retryable error
|
|
137
|
+
activity.logger.error(f"Invalid connection config: {e}")
|
|
138
|
+
raise ApplicationError(f"Configuration validation failed: {e}", non_retryable=True)
|
|
139
|
+
|
|
140
|
+
finally:
|
|
141
|
+
if client:
|
|
142
|
+
await client.close()
|
|
143
|
+
```
|
|
144
|
+
|
|
145
|
+
**Resource Validation and Limits:**
|
|
146
|
+
|
|
147
|
+
- **Key length validation**: Ensure generated keys (Redis, cache) don't exceed system limits
|
|
148
|
+
- **Memory constraints**: Validate that operations won't exceed available memory
|
|
149
|
+
- **Connection limits**: Check that concurrent operations stay within connection pool limits
|
|
150
|
+
- **Processing time estimates**: Validate that operations can complete within activity timeouts
|
|
151
|
+
|
|
152
|
+
```python
|
|
153
|
+
# ✅ DO: Resource validation
|
|
154
|
+
@activity.defn
|
|
155
|
+
async def process_with_validation_activity(
|
|
156
|
+
resource_name: str,
|
|
157
|
+
data_size_mb: int,
|
|
158
|
+
max_processing_time_minutes: int = 30
|
|
159
|
+
) -> dict:
|
|
160
|
+
"""Process data with proper resource validation."""
|
|
161
|
+
|
|
162
|
+
# Validate resource constraints
|
|
163
|
+
if len(resource_name.encode('utf-8')) > 512 * 1024 * 1024: # 512MB Redis key limit
|
|
164
|
+
raise ValueError(f"Resource name too long: {len(resource_name)} bytes")
|
|
165
|
+
|
|
166
|
+
if data_size_mb > 1000: # 1GB memory limit
|
|
167
|
+
raise ValueError(f"Data size {data_size_mb}MB exceeds 1GB limit")
|
|
168
|
+
|
|
169
|
+
# Validate processing time against activity timeout
|
|
170
|
+
activity_timeout = activity.info().start_to_close_timeout
|
|
171
|
+
if max_processing_time_minutes * 60 > activity_timeout.total_seconds():
|
|
172
|
+
raise ValueError(f"Processing time {max_processing_time_minutes}m exceeds timeout")
|
|
173
|
+
|
|
174
|
+
return await process_data(resource_name, data_size_mb)
|
|
175
|
+
```
|
|
176
|
+
|
|
177
|
+
### Phase 3: Activity Testing Requirements
|
|
178
|
+
|
|
179
|
+
**Activity Testing Standards:**
|
|
180
|
+
|
|
181
|
+
- Test activities independently from workflows
|
|
182
|
+
- Mock external dependencies (databases, APIs, file systems)
|
|
183
|
+
- Test timeout and cancellation behaviors
|
|
184
|
+
- Test retry scenarios with different error types
|
|
185
|
+
- Include performance tests for long-running activities
|
|
186
|
+
- Test heartbeat and progress reporting
|
|
187
|
+
|
|
188
|
+
**Integration Testing:**
|
|
189
|
+
|
|
190
|
+
- Use test databases/services for integration tests
|
|
191
|
+
- Test real connection failures and recovery
|
|
192
|
+
- Verify proper resource cleanup
|
|
193
|
+
- Test activity behavior under load
|
|
194
|
+
- Include end-to-end tests with real workflows
|
|
195
|
+
|
|
196
|
+
### Phase 4: Performance and Scalability
|
|
197
|
+
|
|
198
|
+
**Activity Performance:**
|
|
199
|
+
|
|
200
|
+
- Use async/await for all I/O operations
|
|
201
|
+
- Implement proper batching for bulk operations
|
|
202
|
+
- Use streaming for large datasets
|
|
203
|
+
- Monitor activity execution time and resource usage
|
|
204
|
+
- Optimize database queries and API calls
|
|
205
|
+
|
|
206
|
+
**Memory Management:**
|
|
207
|
+
|
|
208
|
+
- Process large datasets in chunks, not all at once
|
|
209
|
+
- Use generators for memory-efficient iteration
|
|
210
|
+
- Clean up large objects explicitly
|
|
211
|
+
- Monitor memory usage in long-running activities
|
|
212
|
+
- Use appropriate data types and structures
|
|
213
|
+
|
|
214
|
+
```python
|
|
215
|
+
# ✅ DO: Memory-efficient processing
|
|
216
|
+
@activity.defn
|
|
217
|
+
async def process_large_file_activity(file_path: str, chunk_size: int = 1000) -> dict:
|
|
218
|
+
processed_count = 0
|
|
219
|
+
|
|
220
|
+
async with aiofiles.open(file_path, 'r') as file:
|
|
221
|
+
chunk = []
|
|
222
|
+
async for line in file:
|
|
223
|
+
chunk.append(line.strip())
|
|
224
|
+
|
|
225
|
+
if len(chunk) >= chunk_size:
|
|
226
|
+
await process_chunk(chunk)
|
|
227
|
+
processed_count += len(chunk)
|
|
228
|
+
chunk = []
|
|
229
|
+
|
|
230
|
+
# Report progress and check for cancellation
|
|
231
|
+
activity.heartbeat({"processed": processed_count})
|
|
232
|
+
|
|
233
|
+
# Process remaining items
|
|
234
|
+
if chunk:
|
|
235
|
+
await process_chunk(chunk)
|
|
236
|
+
processed_count += len(chunk)
|
|
237
|
+
|
|
238
|
+
return {"total_processed": processed_count}
|
|
239
|
+
|
|
240
|
+
# ❌ NEVER: Load entire file into memory
|
|
241
|
+
@activity.defn
|
|
242
|
+
async def bad_file_activity(file_path: str):
|
|
243
|
+
with open(file_path, 'r') as file:
|
|
244
|
+
all_lines = file.readlines() # Memory intensive!
|
|
245
|
+
return process_all_lines(all_lines)
|
|
246
|
+
```
|
|
247
|
+
|
|
248
|
+
**Parallelization Opportunities:**
|
|
249
|
+
|
|
250
|
+
- **Flag sequential operations**: When processing multiple files or resources, suggest parallel processing
|
|
251
|
+
- **Batch operations**: Group related operations to reduce overhead
|
|
252
|
+
- **Connection reuse**: Optimize connection usage across operations
|
|
253
|
+
- **Async patterns**: Ensure I/O operations don't block other processing
|
|
254
|
+
|
|
255
|
+
### Phase 5: Activity Maintainability
|
|
256
|
+
|
|
257
|
+
**Code Organization:**
|
|
258
|
+
|
|
259
|
+
- Keep activities focused on a single responsibility
|
|
260
|
+
- Use dependency injection for external services
|
|
261
|
+
- Implement proper logging with activity context
|
|
262
|
+
- Document activity parameters and return values
|
|
263
|
+
- Follow consistent naming conventions
|
|
264
|
+
|
|
265
|
+
**Configuration and Environment:**
|
|
266
|
+
|
|
267
|
+
- Externalize all configuration parameters
|
|
268
|
+
- Use environment-specific settings appropriately
|
|
269
|
+
- Validate configuration before using it
|
|
270
|
+
- Support development and production configurations
|
|
271
|
+
- Document all required configuration options
|
|
272
|
+
|
|
273
|
+
**Error Context Enhancement:**
|
|
274
|
+
|
|
275
|
+
- **Operation identification**: Include the specific operation that failed in error messages
|
|
276
|
+
- **Parameter context**: Log relevant parameters (sanitized) when operations fail
|
|
277
|
+
- **Resource state**: Include information about resource availability/state in errors
|
|
278
|
+
- **Recovery suggestions**: Where possible, include suggestions for resolving errors
|
|
279
|
+
|
|
280
|
+
```python
|
|
281
|
+
# ✅ DO: Enhanced error context
|
|
282
|
+
@activity.defn
|
|
283
|
+
async def enhanced_error_activity(
|
|
284
|
+
database_name: str,
|
|
285
|
+
table_names: List[str],
|
|
286
|
+
timeout_seconds: int = 300
|
|
287
|
+
) -> dict:
|
|
288
|
+
"""Activity with comprehensive error context."""
|
|
289
|
+
|
|
290
|
+
try:
|
|
291
|
+
result = await extract_table_metadata(database_name, table_names, timeout_seconds)
|
|
292
|
+
return result
|
|
293
|
+
|
|
294
|
+
except ConnectionTimeout as e:
|
|
295
|
+
activity.logger.error(
|
|
296
|
+
f"Database connection timeout during metadata extraction",
|
|
297
|
+
extra={
|
|
298
|
+
"database": database_name,
|
|
299
|
+
"tables_requested": len(table_names),
|
|
300
|
+
"timeout_used": timeout_seconds,
|
|
301
|
+
"suggestion": "Consider increasing timeout or reducing table count"
|
|
302
|
+
}
|
|
303
|
+
)
|
|
304
|
+
raise ApplicationError(
|
|
305
|
+
f"Metadata extraction timed out after {timeout_seconds}s for database '{database_name}' "
|
|
306
|
+
f"with {len(table_names)} tables. Consider reducing scope or increasing timeout.",
|
|
307
|
+
non_retryable=True
|
|
308
|
+
)
|
|
309
|
+
|
|
310
|
+
except InsufficientPrivileges as e:
|
|
311
|
+
activity.logger.error(
|
|
312
|
+
f"Insufficient database privileges for metadata extraction",
|
|
313
|
+
extra={
|
|
314
|
+
"database": database_name,
|
|
315
|
+
"required_privileges": ["SELECT", "INFORMATION_SCHEMA_READ"],
|
|
316
|
+
"suggestion": "Grant required database privileges to connection user"
|
|
317
|
+
}
|
|
318
|
+
)
|
|
319
|
+
raise ApplicationError(
|
|
320
|
+
f"Missing database privileges for '{database_name}'. "
|
|
321
|
+
f"Ensure connection user has SELECT and INFORMATION_SCHEMA access.",
|
|
322
|
+
non_retryable=True
|
|
323
|
+
)
|
|
324
|
+
```
|
|
325
|
+
|
|
326
|
+
---
|
|
327
|
+
|
|
328
|
+
## Activity-Specific Anti-Patterns
|
|
329
|
+
|
|
330
|
+
**Always Reject:**
|
|
331
|
+
|
|
332
|
+
- Activities without proper timeout handling
|
|
333
|
+
- Long-running activities without heartbeat reporting
|
|
334
|
+
- Missing resource cleanup (connections, files, etc.)
|
|
335
|
+
- Generic exception handling without specific error types
|
|
336
|
+
- Activities that don't handle cancellation
|
|
337
|
+
- Synchronous I/O operations in async activities
|
|
338
|
+
- Missing logging for error conditions
|
|
339
|
+
- Activities without proper input validation
|
|
340
|
+
|
|
341
|
+
**Parameter Management Anti-Patterns:**
|
|
342
|
+
|
|
343
|
+
- **Over-parameterization**: Requiring parameters that can be inferred from context
|
|
344
|
+
- **Missing defaults**: Parameters without reasonable default values
|
|
345
|
+
- **No validation**: Accepting parameters without validating constraints
|
|
346
|
+
- **Redundant parameters**: Multiple parameters representing the same concept
|
|
347
|
+
|
|
348
|
+
**Resource Management Anti-Patterns:**
|
|
349
|
+
|
|
350
|
+
```python
|
|
351
|
+
# ❌ REJECT: Poor resource management
|
|
352
|
+
@activity.defn
|
|
353
|
+
async def bad_database_activity(query: str):
|
|
354
|
+
# No connection pooling, no cleanup, no error handling
|
|
355
|
+
conn = await psycopg.connect("host=localhost...")
|
|
356
|
+
result = await conn.execute(query) # No timeout
|
|
357
|
+
return result.fetchall() # Connection never closed
|
|
358
|
+
|
|
359
|
+
# ✅ REQUIRE: Proper resource management
|
|
360
|
+
@activity.defn
|
|
361
|
+
async def good_database_activity(query: str, params: tuple = ()) -> list:
|
|
362
|
+
async with get_connection_pool().acquire() as conn:
|
|
363
|
+
try:
|
|
364
|
+
# Set query timeout
|
|
365
|
+
async with conn.cursor() as cursor:
|
|
366
|
+
await cursor.execute(query, params)
|
|
367
|
+
return await cursor.fetchall()
|
|
368
|
+
except Exception as e:
|
|
369
|
+
activity.logger.error(f"Database query failed: {query[:100]}...", exc_info=True)
|
|
370
|
+
raise
|
|
371
|
+
# Connection automatically returned to pool
|
|
372
|
+
```
|
|
373
|
+
|
|
374
|
+
**Heartbeat and Cancellation Anti-Patterns:**
|
|
375
|
+
|
|
376
|
+
```python
|
|
377
|
+
# ❌ REJECT: No heartbeat or cancellation handling
|
|
378
|
+
@activity.defn
|
|
379
|
+
async def bad_long_running_activity(data_list: list):
|
|
380
|
+
results = []
|
|
381
|
+
for item in data_list: # Could take hours
|
|
382
|
+
result = await expensive_operation(item)
|
|
383
|
+
results.append(result)
|
|
384
|
+
return results
|
|
385
|
+
|
|
386
|
+
# ✅ REQUIRE: Proper heartbeat and cancellation
|
|
387
|
+
@activity.defn
|
|
388
|
+
async def good_long_running_activity(data_list: list) -> list:
|
|
389
|
+
results = []
|
|
390
|
+
total_items = len(data_list)
|
|
391
|
+
|
|
392
|
+
for i, item in enumerate(data_list):
|
|
393
|
+
# Check for cancellation and report progress
|
|
394
|
+
activity.heartbeat({
|
|
395
|
+
"processed": i,
|
|
396
|
+
"total": total_items,
|
|
397
|
+
"percent_complete": (i / total_items) * 100
|
|
398
|
+
})
|
|
399
|
+
|
|
400
|
+
try:
|
|
401
|
+
result = await expensive_operation(item)
|
|
402
|
+
results.append(result)
|
|
403
|
+
except Exception as e:
|
|
404
|
+
activity.logger.error(f"Processing failed for item {i}: {e}")
|
|
405
|
+
raise
|
|
406
|
+
|
|
407
|
+
return results
|
|
408
|
+
```
|
|
409
|
+
|
|
410
|
+
## Educational Context for Activity Reviews
|
|
411
|
+
|
|
412
|
+
When reviewing activity code, emphasize:
|
|
413
|
+
|
|
414
|
+
1. **Reliability Impact**: "Activities are where the real work happens. Proper error handling and resource management in activities determines whether workflows succeed or fail under real-world conditions."
|
|
415
|
+
|
|
416
|
+
2. **Performance Impact**: "Activity performance directly affects workflow execution time. Inefficient activities create bottlenecks that slow down entire business processes."
|
|
417
|
+
|
|
418
|
+
3. **Observability Impact**: "Activity logging and heartbeat reporting are essential for monitoring long-running processes. Without proper observability, debugging workflow issues becomes nearly impossible."
|
|
419
|
+
|
|
420
|
+
4. **Resource Impact**: "Activities consume actual system resources. Poor resource management in activities can cause memory leaks, connection pool exhaustion, and system instability."
|
|
421
|
+
|
|
422
|
+
5. **Cancellation Impact**: "Activities that don't handle cancellation properly can continue consuming resources even after workflows are cancelled, leading to resource waste and potential system overload."
|
|
423
|
+
|
|
424
|
+
6. **Parameter Design Impact**: "Well-designed activity parameters with sensible defaults make activities easier to use and less error-prone. Over-parameterization creates maintenance burden and increases the chance of configuration errors."
|
|
@@ -0,0 +1,280 @@
|
|
|
1
|
+
# Client Code Review Guidelines - Database and External Services
|
|
2
|
+
|
|
3
|
+
## Context-Specific Patterns
|
|
4
|
+
|
|
5
|
+
This directory contains database clients, external service clients, and connection management code. These components are critical for data integrity, performance, and security.
|
|
6
|
+
|
|
7
|
+
### Phase 1: Critical Client Safety Issues
|
|
8
|
+
|
|
9
|
+
**Database Connection Security:**
|
|
10
|
+
|
|
11
|
+
- SQL injection prevention through parameterized queries ONLY
|
|
12
|
+
- Connection strings must never contain hardcoded credentials
|
|
13
|
+
- Database passwords must be retrieved from secure credential stores
|
|
14
|
+
- SSL/TLS required for all external database connections
|
|
15
|
+
- Connection timeouts must be explicitly configured
|
|
16
|
+
|
|
17
|
+
**Example SQL Injection Prevention:**
|
|
18
|
+
|
|
19
|
+
```python
|
|
20
|
+
# ✅ DO: Parameterized queries
|
|
21
|
+
cursor.execute("SELECT * FROM users WHERE id = %s", (user_id,))
|
|
22
|
+
|
|
23
|
+
# ❌ NEVER: String concatenation
|
|
24
|
+
cursor.execute(f"SELECT * FROM users WHERE id = {user_id}")
|
|
25
|
+
```
|
|
26
|
+
|
|
27
|
+
### Phase 2: Client Architecture Patterns
|
|
28
|
+
|
|
29
|
+
**Connection Pooling Requirements:**
|
|
30
|
+
|
|
31
|
+
- All database clients MUST use connection pooling
|
|
32
|
+
- Pool size must be configurable via environment variables
|
|
33
|
+
- Connection validation on checkout required
|
|
34
|
+
- Proper connection cleanup in finally blocks
|
|
35
|
+
- Connection leak detection in development/testing
|
|
36
|
+
|
|
37
|
+
**Class Responsibility Separation:**
|
|
38
|
+
|
|
39
|
+
- **Always flag multi-responsibility classes**: Classes handling both client functionality and domain-specific logic must be separated
|
|
40
|
+
- **Client vs Logic separation**: Database clients should handle connections, not business rules
|
|
41
|
+
- **Extract domain logic**: Lock management, caching, or processing logic should be in separate classes
|
|
42
|
+
- **Single purpose interfaces**: Each client class should have one clear responsibility
|
|
43
|
+
|
|
44
|
+
```python
|
|
45
|
+
# ❌ REJECT: Mixed responsibilities
|
|
46
|
+
class RedisClient:
|
|
47
|
+
def connect(self):
|
|
48
|
+
"""Handle connection setup"""
|
|
49
|
+
def acquire_lock(self, lock_name):
|
|
50
|
+
"""Lock functionality - should be separate"""
|
|
51
|
+
def get_data(self, key):
|
|
52
|
+
"""Client functionality"""
|
|
53
|
+
|
|
54
|
+
# ✅ REQUIRE: Separated responsibilities
|
|
55
|
+
class RedisClient:
|
|
56
|
+
def connect(self):
|
|
57
|
+
"""Handle connection setup"""
|
|
58
|
+
def get_data(self, key):
|
|
59
|
+
"""Client functionality"""
|
|
60
|
+
|
|
61
|
+
class RedisLockManager: # Separate class for lock functionality
|
|
62
|
+
def __init__(self, client: RedisClient):
|
|
63
|
+
self.client = client
|
|
64
|
+
def acquire_lock(self, lock_name):
|
|
65
|
+
"""Lock-specific logic"""
|
|
66
|
+
```
|
|
67
|
+
|
|
68
|
+
**Async Client Patterns:**
|
|
69
|
+
|
|
70
|
+
- Use async/await for all I/O operations
|
|
71
|
+
- Implement proper connection context managers
|
|
72
|
+
- Handle connection failures gracefully with retries
|
|
73
|
+
- Use asyncio connection pools, not synchronous pools
|
|
74
|
+
|
|
75
|
+
```python
|
|
76
|
+
# ✅ DO: Proper async connection management
|
|
77
|
+
async def execute_query(self, query: str, params: tuple):
|
|
78
|
+
async with self.pool.acquire() as conn:
|
|
79
|
+
try:
|
|
80
|
+
return await conn.fetch(query, *params)
|
|
81
|
+
except Exception as e:
|
|
82
|
+
logger.error(f"Query failed: {query[:100]}...", exc_info=True)
|
|
83
|
+
raise
|
|
84
|
+
```
|
|
85
|
+
|
|
86
|
+
**Configuration Management for Clients:**
|
|
87
|
+
|
|
88
|
+
- **Environment-specific settings**: All connection parameters must be externalized to environment variables
|
|
89
|
+
- **Default value validation**: Every configuration parameter must have a sensible default and validation
|
|
90
|
+
- **Development vs Production**: Client configurations must work in both environments
|
|
91
|
+
- **Configuration consolidation**: Related configuration should be grouped together
|
|
92
|
+
|
|
93
|
+
```python
|
|
94
|
+
# ✅ DO: Proper client configuration
|
|
95
|
+
class DatabaseClientConfig:
|
|
96
|
+
def __init__(self):
|
|
97
|
+
self.host = os.getenv("DB_HOST", "localhost")
|
|
98
|
+
self.port = int(os.getenv("DB_PORT", "5432"))
|
|
99
|
+
self.max_connections = int(os.getenv("DB_MAX_CONNECTIONS", "20"))
|
|
100
|
+
self.timeout = int(os.getenv("DB_TIMEOUT_SECONDS", "30"))
|
|
101
|
+
self.ssl_required = os.getenv("DB_SSL_REQUIRED", "true").lower() == "true"
|
|
102
|
+
self._validate()
|
|
103
|
+
|
|
104
|
+
def _validate(self):
|
|
105
|
+
if self.max_connections <= 0:
|
|
106
|
+
raise ValueError("DB_MAX_CONNECTIONS must be positive")
|
|
107
|
+
if self.timeout <= 0:
|
|
108
|
+
raise ValueError("DB_TIMEOUT_SECONDS must be positive")
|
|
109
|
+
|
|
110
|
+
# ❌ REJECT: Poor configuration management
|
|
111
|
+
class BadDatabaseClient:
|
|
112
|
+
def __init__(self):
|
|
113
|
+
self.host = "localhost" # Hardcoded
|
|
114
|
+
self.connections = os.getenv("MAX_CONN") # No default, no validation
|
|
115
|
+
```
|
|
116
|
+
|
|
117
|
+
### Phase 3: Client Testing Requirements
|
|
118
|
+
|
|
119
|
+
**Database Client Testing:**
|
|
120
|
+
|
|
121
|
+
- Mock database connections in unit tests
|
|
122
|
+
- Use test databases for integration tests
|
|
123
|
+
- Test connection failure scenarios
|
|
124
|
+
- Verify connection pool behavior
|
|
125
|
+
- Test query parameter sanitization
|
|
126
|
+
- Include performance tests for connection pooling
|
|
127
|
+
|
|
128
|
+
**External Service Client Testing:**
|
|
129
|
+
|
|
130
|
+
- Mock external APIs in unit tests
|
|
131
|
+
- Test timeout and retry behaviors
|
|
132
|
+
- Test authentication failure scenarios
|
|
133
|
+
- Include circuit breaker tests
|
|
134
|
+
- Verify proper error handling and logging
|
|
135
|
+
|
|
136
|
+
### Phase 4: Performance and Scalability
|
|
137
|
+
|
|
138
|
+
**Query Performance:**
|
|
139
|
+
|
|
140
|
+
- Flag SELECT \* queries without LIMIT
|
|
141
|
+
- Require WHERE clauses on indexed columns
|
|
142
|
+
- Batch operations when possible
|
|
143
|
+
- Use prepared statements for repeated queries
|
|
144
|
+
- Monitor and limit query execution time
|
|
145
|
+
|
|
146
|
+
**Connection Management Performance:**
|
|
147
|
+
|
|
148
|
+
- Connection pool size must match expected concurrency
|
|
149
|
+
- Connection validation queries must be lightweight
|
|
150
|
+
- Implement connection health checks
|
|
151
|
+
- Use connection keepalive for long-running connections
|
|
152
|
+
- Monitor connection pool metrics
|
|
153
|
+
|
|
154
|
+
**Resource Limit Validation:**
|
|
155
|
+
|
|
156
|
+
- **Key length constraints**: Validate Redis key lengths against limits (typically 512MB max)
|
|
157
|
+
- **Connection limits**: Ensure connection pool sizes don't exceed database limits
|
|
158
|
+
- **Query complexity**: Monitor and limit expensive query execution time
|
|
159
|
+
- **Memory constraints**: Validate result set sizes for large queries
|
|
160
|
+
|
|
161
|
+
```python
|
|
162
|
+
# ✅ DO: Resource validation
|
|
163
|
+
def validate_redis_key(key: str) -> str:
|
|
164
|
+
if len(key.encode('utf-8')) > 512 * 1024 * 1024: # 512MB limit
|
|
165
|
+
raise ValueError(f"Redis key too long: {len(key)} bytes")
|
|
166
|
+
return key
|
|
167
|
+
|
|
168
|
+
def create_lock_key(application: str, resource: str, run_id: str) -> str:
|
|
169
|
+
key = f"{application}:{resource}:{run_id}"
|
|
170
|
+
return validate_redis_key(key)
|
|
171
|
+
```
|
|
172
|
+
|
|
173
|
+
### Phase 5: Client Maintainability
|
|
174
|
+
|
|
175
|
+
**Code Organization:**
|
|
176
|
+
|
|
177
|
+
- Separate client interface from implementation
|
|
178
|
+
- Use dependency injection for client configuration
|
|
179
|
+
- Implement proper logging with connection context
|
|
180
|
+
- Document connection parameters and requirements
|
|
181
|
+
- Follow consistent error handling patterns
|
|
182
|
+
|
|
183
|
+
**Error Handling Improvements:**
|
|
184
|
+
|
|
185
|
+
- **Comprehensive try-catch blocks**: All client operations that can fail must be wrapped in try-catch blocks
|
|
186
|
+
- **SDK-specific exceptions**: Use `ClientError` from `application_sdk/common/error_codes.py` instead of generic exceptions
|
|
187
|
+
- **Operation context**: Include operation details (query, connection info) in error messages
|
|
188
|
+
- **Retry vs fail-fast**: Distinguish between retryable connection errors and permanent failures
|
|
189
|
+
|
|
190
|
+
```python
|
|
191
|
+
# ✅ DO: Comprehensive error handling
|
|
192
|
+
from application_sdk.common.error_codes import ClientError
|
|
193
|
+
|
|
194
|
+
async def execute_query(self, query: str, params: tuple = ()) -> list:
|
|
195
|
+
try:
|
|
196
|
+
async with self.pool.acquire() as conn:
|
|
197
|
+
return await conn.fetch(query, *params)
|
|
198
|
+
except ConnectionRefusedError as e:
|
|
199
|
+
# Retryable error
|
|
200
|
+
logger.warning(f"Database connection refused, will retry: {e}")
|
|
201
|
+
raise ClientError(f"Database temporarily unavailable: {e}")
|
|
202
|
+
except ValidationError as e:
|
|
203
|
+
# Non-retryable error
|
|
204
|
+
logger.error(f"Query validation failed: {query[:50]}...")
|
|
205
|
+
raise ClientError(f"Invalid query: {e}")
|
|
206
|
+
except Exception as e:
|
|
207
|
+
logger.error(f"Unexpected database error: {e}", exc_info=True)
|
|
208
|
+
raise ClientError(f"Database operation failed: {e}")
|
|
209
|
+
```
|
|
210
|
+
|
|
211
|
+
**Configuration Management:**
|
|
212
|
+
|
|
213
|
+
- Externalize all connection parameters
|
|
214
|
+
- Support multiple environment configurations
|
|
215
|
+
- Implement configuration validation
|
|
216
|
+
- Use secure credential management
|
|
217
|
+
- Document all configuration options
|
|
218
|
+
|
|
219
|
+
---
|
|
220
|
+
|
|
221
|
+
## Client-Specific Anti-Patterns
|
|
222
|
+
|
|
223
|
+
**Always Reject:**
|
|
224
|
+
|
|
225
|
+
- Hardcoded connection strings or credentials
|
|
226
|
+
- Missing connection timeouts
|
|
227
|
+
- Synchronous database calls in async contexts
|
|
228
|
+
- SQL queries built through string concatenation
|
|
229
|
+
- Connection objects stored as instance variables
|
|
230
|
+
- Missing connection pool cleanup
|
|
231
|
+
- Generic exception handling without context
|
|
232
|
+
- Direct database connections without pooling
|
|
233
|
+
|
|
234
|
+
**Configuration Anti-Patterns:**
|
|
235
|
+
|
|
236
|
+
- **Missing environment variables**: Parameters that should be configurable but are hardcoded
|
|
237
|
+
- **No validation**: Environment variables used without type checking or range validation
|
|
238
|
+
- **Missing defaults**: Required configuration without sensible fallback values
|
|
239
|
+
- **Environment inconsistency**: Features that work in development but fail in production
|
|
240
|
+
|
|
241
|
+
**Connection Management Anti-Patterns:**
|
|
242
|
+
|
|
243
|
+
```python
|
|
244
|
+
# ❌ REJECT: Poor connection management
|
|
245
|
+
class BadSQLClient:
|
|
246
|
+
def __init__(self):
|
|
247
|
+
self.conn = psycopg2.connect("host=localhost...") # No pooling
|
|
248
|
+
|
|
249
|
+
def query(self, sql):
|
|
250
|
+
cursor = self.conn.cursor()
|
|
251
|
+
cursor.execute(sql) # No parameterization
|
|
252
|
+
return cursor.fetchall() # No cleanup
|
|
253
|
+
|
|
254
|
+
# ✅ REQUIRE: Proper connection management
|
|
255
|
+
class GoodSQLClient:
|
|
256
|
+
def __init__(self, pool: ConnectionPool):
|
|
257
|
+
self.pool = pool
|
|
258
|
+
|
|
259
|
+
async def query(self, sql: str, params: tuple = ()):
|
|
260
|
+
async with self.pool.acquire() as conn:
|
|
261
|
+
try:
|
|
262
|
+
return await conn.fetch(sql, *params)
|
|
263
|
+
finally:
|
|
264
|
+
# Connection automatically returned to pool
|
|
265
|
+
pass
|
|
266
|
+
```
|
|
267
|
+
|
|
268
|
+
## Educational Context for Client Reviews
|
|
269
|
+
|
|
270
|
+
When reviewing client code, emphasize:
|
|
271
|
+
|
|
272
|
+
1. **Security Impact**: "Database clients are the primary attack vector for SQL injection. Parameterized queries aren't just best practice - they're essential for protecting enterprise customer data."
|
|
273
|
+
|
|
274
|
+
2. **Performance Impact**: "Connection pooling isn't optional at enterprise scale. Creating new connections for each query can overwhelm database servers and create bottlenecks that affect all users."
|
|
275
|
+
|
|
276
|
+
3. **Reliability Impact**: "Proper error handling in clients determines whether temporary network issues cause cascading failures or graceful degradation."
|
|
277
|
+
|
|
278
|
+
4. **Maintainability Impact**: "Client abstraction layers allow us to change databases or connection strategies without affecting business logic throughout the application."
|
|
279
|
+
|
|
280
|
+
5. **Configuration Impact**: "Externalized configuration enables the same code to work across development, staging, and production environments. Missing this leads to environment-specific bugs that are hard to reproduce and fix."
|