atlan-application-sdk 0.1.1rc39__py3-none-any.whl → 0.1.1rc41__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- application_sdk/activities/.cursor/BUGBOT.md +424 -0
- application_sdk/activities/metadata_extraction/sql.py +400 -25
- application_sdk/application/__init__.py +2 -0
- application_sdk/application/metadata_extraction/sql.py +3 -0
- application_sdk/clients/.cursor/BUGBOT.md +280 -0
- application_sdk/clients/models.py +42 -0
- application_sdk/clients/sql.py +127 -87
- application_sdk/clients/temporal.py +3 -1
- application_sdk/common/.cursor/BUGBOT.md +316 -0
- application_sdk/common/aws_utils.py +259 -11
- application_sdk/common/utils.py +145 -9
- application_sdk/constants.py +8 -0
- application_sdk/decorators/.cursor/BUGBOT.md +279 -0
- application_sdk/handlers/__init__.py +8 -1
- application_sdk/handlers/sql.py +63 -22
- application_sdk/inputs/.cursor/BUGBOT.md +250 -0
- application_sdk/interceptors/.cursor/BUGBOT.md +320 -0
- application_sdk/interceptors/cleanup.py +171 -0
- application_sdk/interceptors/events.py +6 -6
- application_sdk/observability/decorators/observability_decorator.py +36 -22
- application_sdk/outputs/.cursor/BUGBOT.md +295 -0
- application_sdk/outputs/iceberg.py +4 -0
- application_sdk/outputs/json.py +6 -0
- application_sdk/outputs/parquet.py +13 -3
- application_sdk/server/.cursor/BUGBOT.md +442 -0
- application_sdk/server/fastapi/__init__.py +59 -3
- application_sdk/server/fastapi/models.py +27 -0
- application_sdk/services/objectstore.py +16 -3
- application_sdk/version.py +1 -1
- application_sdk/workflows/.cursor/BUGBOT.md +218 -0
- {atlan_application_sdk-0.1.1rc39.dist-info → atlan_application_sdk-0.1.1rc41.dist-info}/METADATA +1 -1
- {atlan_application_sdk-0.1.1rc39.dist-info → atlan_application_sdk-0.1.1rc41.dist-info}/RECORD +35 -24
- {atlan_application_sdk-0.1.1rc39.dist-info → atlan_application_sdk-0.1.1rc41.dist-info}/WHEEL +0 -0
- {atlan_application_sdk-0.1.1rc39.dist-info → atlan_application_sdk-0.1.1rc41.dist-info}/licenses/LICENSE +0 -0
- {atlan_application_sdk-0.1.1rc39.dist-info → atlan_application_sdk-0.1.1rc41.dist-info}/licenses/NOTICE +0 -0
|
@@ -0,0 +1,250 @@
|
|
|
1
|
+
# Input Code Review Guidelines - Data Input Processing
|
|
2
|
+
|
|
3
|
+
## Context-Specific Patterns
|
|
4
|
+
|
|
5
|
+
This directory contains input processing implementations for various data formats (JSON, Parquet, SQL). Input processors must handle data efficiently while maintaining data integrity and performance.
|
|
6
|
+
|
|
7
|
+
### Phase 1: Critical Input Safety Issues
|
|
8
|
+
|
|
9
|
+
**Object Store Path Management:**
|
|
10
|
+
|
|
11
|
+
- **Correct path calculation**: Source paths must use the actual object store prefix, not derived local paths
|
|
12
|
+
- **Path validation**: Verify that object store keys are valid and within constraints
|
|
13
|
+
- **User-provided prefixes**: Respect user-configured input prefixes and download paths
|
|
14
|
+
- **Path consistency**: Ensure downloaded files match the expected object store locations
|
|
15
|
+
|
|
16
|
+
**Data Validation and Security:**
|
|
17
|
+
|
|
18
|
+
- All input data must be validated before processing
|
|
19
|
+
- File size limits must be enforced to prevent resource exhaustion
|
|
20
|
+
- File type validation required for uploaded/downloaded files
|
|
21
|
+
- Malicious file content detection for executable or script files
|
|
22
|
+
- Input path traversal prevention
|
|
23
|
+
|
|
24
|
+
```python
|
|
25
|
+
# ✅ DO: Proper object store path handling
|
|
26
|
+
class JsonInput:
|
|
27
|
+
async def download_from_object_store(
|
|
28
|
+
self,
|
|
29
|
+
input_prefix: str, # User-provided prefix
|
|
30
|
+
local_destination: str
|
|
31
|
+
) -> List[str]:
|
|
32
|
+
"""Download files with correct path handling."""
|
|
33
|
+
|
|
34
|
+
# Use the actual input prefix, not derived local path
|
|
35
|
+
object_store_source = input_prefix # Keep user's intended source
|
|
36
|
+
|
|
37
|
+
downloaded_files = await self.object_store.download_files(
|
|
38
|
+
source=object_store_source,
|
|
39
|
+
destination=local_destination
|
|
40
|
+
)
|
|
41
|
+
|
|
42
|
+
return downloaded_files
|
|
43
|
+
|
|
44
|
+
# ❌ REJECT: Incorrect path handling
|
|
45
|
+
class BadJsonInput:
|
|
46
|
+
async def download_from_object_store(
|
|
47
|
+
self,
|
|
48
|
+
input_prefix: str,
|
|
49
|
+
local_destination: str
|
|
50
|
+
) -> List[str]:
|
|
51
|
+
# Wrong: derives object store path from local path
|
|
52
|
+
object_store_source = get_object_store_prefix(local_destination)
|
|
53
|
+
# This ignores the user's actual input_prefix!
|
|
54
|
+
|
|
55
|
+
return await self.object_store.download_files(
|
|
56
|
+
source=object_store_source, # Wrong source!
|
|
57
|
+
destination=local_destination
|
|
58
|
+
)
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
### Phase 2: Input Architecture Patterns
|
|
62
|
+
|
|
63
|
+
**Performance Optimization Requirements:**
|
|
64
|
+
|
|
65
|
+
- **Parallelization opportunities**: Flag sequential file operations that could be parallelized
|
|
66
|
+
- **Batch processing**: Group related operations to reduce overhead
|
|
67
|
+
- **Memory efficiency**: Process large files in chunks, not all at once
|
|
68
|
+
- **Connection reuse**: Optimize object store connections across operations
|
|
69
|
+
|
|
70
|
+
**Resource Management:**
|
|
71
|
+
|
|
72
|
+
- Use proper connection pooling for object store operations
|
|
73
|
+
- Implement timeout handling for download operations
|
|
74
|
+
- Clean up temporary files after processing
|
|
75
|
+
- Handle partial download failures gracefully
|
|
76
|
+
- Monitor memory usage during large file processing
|
|
77
|
+
|
|
78
|
+
```python
|
|
79
|
+
# ✅ DO: Parallelized file processing
|
|
80
|
+
async def download_multiple_files_parallel(
|
|
81
|
+
self,
|
|
82
|
+
file_paths: List[str],
|
|
83
|
+
destination_dir: str
|
|
84
|
+
) -> List[str]:
|
|
85
|
+
"""Download multiple files in parallel for better performance."""
|
|
86
|
+
|
|
87
|
+
async def download_single_file(file_path: str) -> str:
|
|
88
|
+
"""Download a single file with error handling."""
|
|
89
|
+
try:
|
|
90
|
+
return await self.object_store.download_file(
|
|
91
|
+
source=file_path,
|
|
92
|
+
destination=os.path.join(destination_dir, os.path.basename(file_path))
|
|
93
|
+
)
|
|
94
|
+
except Exception as e:
|
|
95
|
+
logger.error(f"Failed to download {file_path}: {e}")
|
|
96
|
+
raise
|
|
97
|
+
|
|
98
|
+
# Parallel processing with controlled concurrency
|
|
99
|
+
semaphore = asyncio.Semaphore(10) # Limit concurrent downloads
|
|
100
|
+
|
|
101
|
+
async def download_with_semaphore(file_path: str) -> str:
|
|
102
|
+
async with semaphore:
|
|
103
|
+
return await download_single_file(file_path)
|
|
104
|
+
|
|
105
|
+
tasks = [download_with_semaphore(path) for path in file_paths]
|
|
106
|
+
return await asyncio.gather(*tasks)
|
|
107
|
+
|
|
108
|
+
# ❌ REJECT: Sequential processing
|
|
109
|
+
async def download_multiple_files_sequential(self, file_paths: List[str]) -> List[str]:
|
|
110
|
+
"""Sequential download - should be flagged for parallelization."""
|
|
111
|
+
downloaded = []
|
|
112
|
+
for file_path in file_paths: # FLAG: Could be parallelized
|
|
113
|
+
result = await self.object_store.download_file(file_path)
|
|
114
|
+
downloaded.append(result)
|
|
115
|
+
return downloaded
|
|
116
|
+
```
|
|
117
|
+
|
|
118
|
+
### Phase 3: Input Testing Requirements
|
|
119
|
+
|
|
120
|
+
**Data Input Testing:**
|
|
121
|
+
|
|
122
|
+
- Test with various file formats and sizes
|
|
123
|
+
- Test malformed data handling
|
|
124
|
+
- Test partial download/upload scenarios
|
|
125
|
+
- Mock object store operations in unit tests
|
|
126
|
+
- Include integration tests with real object store
|
|
127
|
+
- Test error recovery and retry logic
|
|
128
|
+
|
|
129
|
+
**Performance Testing:**
|
|
130
|
+
|
|
131
|
+
- Include tests for large file processing
|
|
132
|
+
- Test memory usage with different chunk sizes
|
|
133
|
+
- Test concurrent download/upload operations
|
|
134
|
+
- Verify timeout handling works correctly
|
|
135
|
+
- Test connection pool behavior
|
|
136
|
+
|
|
137
|
+
### Phase 4: Performance and Scalability
|
|
138
|
+
|
|
139
|
+
**Data Processing Efficiency:**
|
|
140
|
+
|
|
141
|
+
- Use streaming for large files instead of loading entirely into memory
|
|
142
|
+
- Implement proper chunking for batch operations
|
|
143
|
+
- Use async generators for memory-efficient data processing
|
|
144
|
+
- Monitor memory usage and processing time
|
|
145
|
+
- Optimize file I/O operations
|
|
146
|
+
|
|
147
|
+
**Object Store Optimization:**
|
|
148
|
+
|
|
149
|
+
- Use connection pooling for object store clients
|
|
150
|
+
- Implement proper retry logic for transient failures
|
|
151
|
+
- Use parallel operations where appropriate
|
|
152
|
+
- Cache frequently accessed metadata
|
|
153
|
+
- Monitor object store operation metrics
|
|
154
|
+
|
|
155
|
+
### Phase 5: Input Data Maintainability
|
|
156
|
+
|
|
157
|
+
**Error Handling and Recovery:**
|
|
158
|
+
|
|
159
|
+
- Implement comprehensive error handling for all input operations
|
|
160
|
+
- Provide meaningful error messages with context
|
|
161
|
+
- Handle partial failures gracefully (some files fail, others succeed)
|
|
162
|
+
- Implement proper retry logic for transient failures
|
|
163
|
+
- Log all input operations with sufficient context
|
|
164
|
+
|
|
165
|
+
**Configuration Management:**
|
|
166
|
+
|
|
167
|
+
- Externalize all input-related configuration
|
|
168
|
+
- Support different input sources and formats
|
|
169
|
+
- Validate input configuration before processing
|
|
170
|
+
- Document all supported input parameters
|
|
171
|
+
- Handle environment-specific input requirements
|
|
172
|
+
|
|
173
|
+
---
|
|
174
|
+
|
|
175
|
+
## Input-Specific Anti-Patterns
|
|
176
|
+
|
|
177
|
+
**Always Reject:**
|
|
178
|
+
|
|
179
|
+
- **Path calculation errors**: Using local paths to derive object store paths
|
|
180
|
+
- **Sequential processing**: Processing multiple files sequentially when parallel processing is possible
|
|
181
|
+
- **Memory inefficiency**: Loading large files entirely into memory
|
|
182
|
+
- **Missing error handling**: Input operations without proper try-catch blocks
|
|
183
|
+
- **Poor path validation**: Not validating object store keys or file paths
|
|
184
|
+
- **Resource leaks**: Not cleaning up temporary files or connections
|
|
185
|
+
|
|
186
|
+
**Object Store Anti-Patterns:**
|
|
187
|
+
|
|
188
|
+
```python
|
|
189
|
+
# ❌ REJECT: Incorrect object store usage
|
|
190
|
+
class BadInputProcessor:
|
|
191
|
+
async def process_files(self, local_files: List[str]):
|
|
192
|
+
# Wrong: derives object store path from local path
|
|
193
|
+
for local_file in local_files:
|
|
194
|
+
object_store_key = get_object_store_prefix(local_file) # Incorrect!
|
|
195
|
+
await self.object_store.download_file(object_store_key, local_file)
|
|
196
|
+
|
|
197
|
+
# ✅ REQUIRE: Correct object store usage
|
|
198
|
+
class GoodInputProcessor:
|
|
199
|
+
async def process_files(
|
|
200
|
+
self,
|
|
201
|
+
object_store_paths: List[str], # Actual object store paths
|
|
202
|
+
local_destination_dir: str
|
|
203
|
+
):
|
|
204
|
+
# Use actual object store paths, not derived ones
|
|
205
|
+
for object_store_path in object_store_paths:
|
|
206
|
+
local_file = os.path.join(
|
|
207
|
+
local_destination_dir,
|
|
208
|
+
os.path.basename(object_store_path)
|
|
209
|
+
)
|
|
210
|
+
await self.object_store.download_file(object_store_path, local_file)
|
|
211
|
+
```
|
|
212
|
+
|
|
213
|
+
**Performance Anti-Patterns:**
|
|
214
|
+
|
|
215
|
+
```python
|
|
216
|
+
# ❌ REJECT: Sequential file processing
|
|
217
|
+
async def process_files_sequential(file_list: List[str]):
|
|
218
|
+
results = []
|
|
219
|
+
for file_path in file_list: # Should be parallelized
|
|
220
|
+
result = await process_single_file(file_path)
|
|
221
|
+
results.append(result)
|
|
222
|
+
return results
|
|
223
|
+
|
|
224
|
+
# ✅ REQUIRE: Parallel file processing
|
|
225
|
+
async def process_files_parallel(file_list: List[str], max_concurrency: int = 10):
|
|
226
|
+
semaphore = asyncio.Semaphore(max_concurrency)
|
|
227
|
+
|
|
228
|
+
async def process_with_semaphore(file_path: str):
|
|
229
|
+
async with semaphore:
|
|
230
|
+
return await process_single_file(file_path)
|
|
231
|
+
|
|
232
|
+
tasks = [process_with_semaphore(path) for path in file_list]
|
|
233
|
+
return await asyncio.gather(*tasks, return_exceptions=True)
|
|
234
|
+
```
|
|
235
|
+
|
|
236
|
+
## Educational Context for Input Reviews
|
|
237
|
+
|
|
238
|
+
When reviewing input code, emphasize:
|
|
239
|
+
|
|
240
|
+
1. **Data Integrity Impact**: "Incorrect object store path handling can cause data loss or corruption. Files uploaded to wrong locations become inaccessible, breaking data processing pipelines."
|
|
241
|
+
|
|
242
|
+
2. **Performance Impact**: "Sequential file processing creates unnecessary bottlenecks. For enterprise datasets with hundreds of files, parallelization can reduce processing time from hours to minutes."
|
|
243
|
+
|
|
244
|
+
3. **Resource Impact**: "Poor memory management in input processing can cause out-of-memory errors with large datasets. Streaming and chunking are essential for enterprise-scale data processing."
|
|
245
|
+
|
|
246
|
+
4. **User Experience Impact**: "Input path handling errors are often silent until runtime, causing difficult-to-debug failures. Proper validation and clear error messages save hours of troubleshooting."
|
|
247
|
+
|
|
248
|
+
5. **Scalability Impact**: "Input processing patterns that work for small datasets can fail catastrophically at enterprise scale. Always design for the largest expected dataset size."
|
|
249
|
+
|
|
250
|
+
6. **Reliability Impact**: "Input operations are often the first point of failure in data pipelines. Robust error handling and retry logic in input processing prevents entire workflows from failing due to transient issues."
|
|
@@ -0,0 +1,320 @@
|
|
|
1
|
+
# Interceptor Code Review Guidelines - Temporal Interceptors
|
|
2
|
+
|
|
3
|
+
## Context-Specific Patterns
|
|
4
|
+
|
|
5
|
+
This directory contains Temporal interceptor implementations that provide cross-cutting functionality like distributed locking, observability, and event handling. Interceptors must be robust and not interfere with normal workflow/activity execution.
|
|
6
|
+
|
|
7
|
+
### Phase 1: Critical Interceptor Safety Issues
|
|
8
|
+
|
|
9
|
+
**Infinite Loop Prevention:**
|
|
10
|
+
|
|
11
|
+
- **Lock acquisition loops must have termination conditions**: No `while True` loops without max retries or timeouts
|
|
12
|
+
- **Bounded retry logic**: All retry mechanisms must have explicit limits
|
|
13
|
+
- **Timeout enforcement**: Operations must respect activity and workflow timeouts
|
|
14
|
+
- **Resource exhaustion prevention**: Prevent scenarios that could consume all available resources
|
|
15
|
+
|
|
16
|
+
**Resource Management in Interceptors:**
|
|
17
|
+
|
|
18
|
+
- **Context manager handling**: Ensure proper cleanup when context managers fail
|
|
19
|
+
- **Connection lifecycle**: Don't hold connections longer than necessary
|
|
20
|
+
- **Lock timing**: Acquire locks as late as possible, release as early as possible
|
|
21
|
+
- **Error state cleanup**: Clean up resources even when intercepted operations fail
|
|
22
|
+
|
|
23
|
+
```python
|
|
24
|
+
# ✅ DO: Bounded lock acquisition with proper cleanup
|
|
25
|
+
class GoodLockInterceptor:
|
|
26
|
+
async def intercept_activity(self, next_fn, input):
|
|
27
|
+
"""Intercept with bounded retry and proper cleanup."""
|
|
28
|
+
max_retries = 10
|
|
29
|
+
retry_count = 0
|
|
30
|
+
|
|
31
|
+
while retry_count < max_retries: # Bounded loop
|
|
32
|
+
try:
|
|
33
|
+
# Acquire lock with timeout
|
|
34
|
+
async with self.lock_manager.acquire_lock(
|
|
35
|
+
lock_name=self.lock_name,
|
|
36
|
+
timeout=30 # Maximum lock wait time
|
|
37
|
+
) as lock:
|
|
38
|
+
# Execute activity while holding lock
|
|
39
|
+
return await next_fn(input)
|
|
40
|
+
|
|
41
|
+
except LockUnavailableError:
|
|
42
|
+
retry_count += 1
|
|
43
|
+
if retry_count >= max_retries:
|
|
44
|
+
raise LockAcquisitionError(
|
|
45
|
+
f"Failed to acquire lock after {max_retries} attempts"
|
|
46
|
+
)
|
|
47
|
+
|
|
48
|
+
# Exponential backoff, but bounded
|
|
49
|
+
delay = min(10 + random.randint(0, 10), 60) # Max 60 seconds
|
|
50
|
+
await asyncio.sleep(delay) # Non-blocking sleep
|
|
51
|
+
|
|
52
|
+
raise LockAcquisitionError("Lock acquisition retries exhausted")
|
|
53
|
+
|
|
54
|
+
# ❌ NEVER: Infinite loops and poor resource management
|
|
55
|
+
class BadLockInterceptor:
|
|
56
|
+
async def intercept_activity(self, next_fn, input):
|
|
57
|
+
while True: # INFINITE LOOP!
|
|
58
|
+
try:
|
|
59
|
+
with self.dapr_client.try_lock(lock_name) as response:
|
|
60
|
+
if response.success:
|
|
61
|
+
result = await next_fn(input)
|
|
62
|
+
return result
|
|
63
|
+
else:
|
|
64
|
+
time.sleep(10) # BLOCKING SLEEP IN ASYNC!
|
|
65
|
+
# SLEEP INSIDE CONTEXT MANAGER - RESOURCE LEAK!
|
|
66
|
+
|
|
67
|
+
except Exception as e:
|
|
68
|
+
raise # No retry for transient errors
|
|
69
|
+
```
|
|
70
|
+
|
|
71
|
+
**Parameter Validation and Edge Cases:**
|
|
72
|
+
|
|
73
|
+
- **Range validation**: Validate parameters that are used in range operations (random.randint, array slicing)
|
|
74
|
+
- **Zero and negative handling**: Explicitly handle edge cases like 0 values, negative numbers
|
|
75
|
+
- **Type validation**: Ensure interceptor parameters match expected types
|
|
76
|
+
- **Configuration validation**: Validate interceptor configuration before use
|
|
77
|
+
|
|
78
|
+
```python
|
|
79
|
+
# ✅ DO: Comprehensive parameter validation
|
|
80
|
+
def validate_lock_configuration(max_locks: int, ttl_seconds: int) -> None:
|
|
81
|
+
"""Validate lock configuration parameters."""
|
|
82
|
+
|
|
83
|
+
if max_locks <= 0:
|
|
84
|
+
raise ValueError(f"max_locks must be positive, got: {max_locks}")
|
|
85
|
+
|
|
86
|
+
if max_locks > 1000: # Reasonable upper bound
|
|
87
|
+
raise ValueError(f"max_locks too high (max 1000), got: {max_locks}")
|
|
88
|
+
|
|
89
|
+
if ttl_seconds <= 0:
|
|
90
|
+
raise ValueError(f"ttl_seconds must be positive, got: {ttl_seconds}")
|
|
91
|
+
|
|
92
|
+
if ttl_seconds > 3600: # 1 hour max
|
|
93
|
+
raise ValueError(f"ttl_seconds too high (max 3600), got: {ttl_seconds}")
|
|
94
|
+
|
|
95
|
+
def get_random_lock_slot(max_locks: int) -> int:
|
|
96
|
+
"""Get random lock slot with proper validation."""
|
|
97
|
+
validate_lock_configuration(max_locks, 1) # Quick validation
|
|
98
|
+
return random.randint(0, max_locks - 1) # Safe after validation
|
|
99
|
+
|
|
100
|
+
# ❌ REJECT: No validation leading to crashes
|
|
101
|
+
def bad_random_slot(max_locks: int) -> int:
|
|
102
|
+
# Crashes if max_locks is 0 or negative
|
|
103
|
+
return random.randint(0, max_locks - 1) # ValueError!
|
|
104
|
+
```
|
|
105
|
+
|
|
106
|
+
### Phase 2: Interceptor Architecture Patterns
|
|
107
|
+
|
|
108
|
+
**Lock Acquisition and Release Timing:**
|
|
109
|
+
|
|
110
|
+
- **Late acquisition**: Acquire locks as close to the protected operation as possible
|
|
111
|
+
- **Early release**: Release locks immediately after the protected operation completes
|
|
112
|
+
- **Timeout alignment**: Lock TTL should align with activity execution time
|
|
113
|
+
- **Race condition prevention**: Ensure locks are held for the entire duration of the protected operation
|
|
114
|
+
|
|
115
|
+
**Error Handling in Interceptors:**
|
|
116
|
+
|
|
117
|
+
- **Transient error retry**: Distinguish between retryable errors (connection failures) and permanent errors (invalid configuration)
|
|
118
|
+
- **Interceptor isolation**: Interceptor failures should not prevent other interceptors from running
|
|
119
|
+
- **Activity result preservation**: Don't modify or lose activity results due to interceptor errors
|
|
120
|
+
- **Cleanup on failure**: Ensure resources are cleaned up even when intercepted operations fail
|
|
121
|
+
|
|
122
|
+
```python
|
|
123
|
+
# ✅ DO: Proper lock timing and error handling
|
|
124
|
+
class ProperLockInterceptor:
|
|
125
|
+
async def intercept_activity(self, next_fn, input) -> Any:
|
|
126
|
+
"""Intercept with proper timing and error handling."""
|
|
127
|
+
lock_acquired = False
|
|
128
|
+
lock_handle = None
|
|
129
|
+
|
|
130
|
+
try:
|
|
131
|
+
# Validate configuration first
|
|
132
|
+
if self.max_locks <= 0:
|
|
133
|
+
raise ConfigurationError(f"Invalid max_locks: {self.max_locks}")
|
|
134
|
+
|
|
135
|
+
# Bounded retry with exponential backoff
|
|
136
|
+
for attempt in range(self.max_retries):
|
|
137
|
+
try:
|
|
138
|
+
# Late acquisition - right before protected operation
|
|
139
|
+
lock_handle = await self.lock_manager.acquire_lock(
|
|
140
|
+
lock_name=self.lock_name,
|
|
141
|
+
ttl_seconds=self.ttl_seconds,
|
|
142
|
+
timeout=self.lock_timeout
|
|
143
|
+
)
|
|
144
|
+
lock_acquired = True
|
|
145
|
+
break
|
|
146
|
+
|
|
147
|
+
except LockUnavailableError:
|
|
148
|
+
if attempt >= self.max_retries - 1:
|
|
149
|
+
raise LockAcquisitionError(f"Could not acquire lock after {self.max_retries} attempts")
|
|
150
|
+
|
|
151
|
+
# Exponential backoff with jitter
|
|
152
|
+
delay = min(2 ** attempt + random.uniform(0, 1), self.max_delay)
|
|
153
|
+
await asyncio.sleep(delay)
|
|
154
|
+
continue
|
|
155
|
+
|
|
156
|
+
except (ConnectionError, TimeoutError) as e:
|
|
157
|
+
# Transient errors - retry
|
|
158
|
+
logger.warning(f"Transient lock error on attempt {attempt + 1}: {e}")
|
|
159
|
+
if attempt >= self.max_retries - 1:
|
|
160
|
+
raise LockAcquisitionError(f"Lock service unavailable after {self.max_retries} attempts")
|
|
161
|
+
await asyncio.sleep(1) # Brief delay for transient errors
|
|
162
|
+
continue
|
|
163
|
+
|
|
164
|
+
# Execute protected operation while holding lock
|
|
165
|
+
result = await next_fn(input)
|
|
166
|
+
|
|
167
|
+
# Early release - immediately after operation
|
|
168
|
+
if lock_handle:
|
|
169
|
+
await self.lock_manager.release_lock(lock_handle)
|
|
170
|
+
lock_acquired = False
|
|
171
|
+
|
|
172
|
+
return result
|
|
173
|
+
|
|
174
|
+
finally:
|
|
175
|
+
# Cleanup: ensure lock is released even on failure
|
|
176
|
+
if lock_acquired and lock_handle:
|
|
177
|
+
try:
|
|
178
|
+
await self.lock_manager.release_lock(lock_handle)
|
|
179
|
+
except Exception as e:
|
|
180
|
+
logger.warning(f"Failed to release lock during cleanup: {e}")
|
|
181
|
+
|
|
182
|
+
# ❌ REJECT: Poor timing and error handling
|
|
183
|
+
class BadLockInterceptor:
|
|
184
|
+
async def intercept_activity(self, next_fn, input):
|
|
185
|
+
# No parameter validation
|
|
186
|
+
while True: # Infinite loop
|
|
187
|
+
lock = await self.acquire_lock()
|
|
188
|
+
if lock:
|
|
189
|
+
result = await next_fn(input)
|
|
190
|
+
# Lock released too early - before result is processed
|
|
191
|
+
await self.release_lock(lock)
|
|
192
|
+
return result
|
|
193
|
+
time.sleep(10) # Blocking sleep
|
|
194
|
+
```
|
|
195
|
+
|
|
196
|
+
### Phase 3: Interceptor Testing Requirements
|
|
197
|
+
|
|
198
|
+
**Interceptor Testing Standards:**
|
|
199
|
+
|
|
200
|
+
- **Test failure scenarios**: Verify behavior when locks can't be acquired, connections fail, etc.
|
|
201
|
+
- **Test retry logic**: Ensure bounded retries work correctly with various failure patterns
|
|
202
|
+
- **Test resource cleanup**: Verify proper cleanup in both success and failure cases
|
|
203
|
+
- **Test timing**: Ensure locks are acquired/released at correct times
|
|
204
|
+
- **Test integration**: Verify interceptors work correctly with actual activities/workflows
|
|
205
|
+
|
|
206
|
+
**Edge Case Testing:**
|
|
207
|
+
|
|
208
|
+
- **Zero and negative parameters**: Test with edge case values like `max_locks=0`
|
|
209
|
+
- **Timeout scenarios**: Test behavior when operations exceed configured timeouts
|
|
210
|
+
- **Concurrent access**: Test interceptor behavior under high concurrency
|
|
211
|
+
- **Resource exhaustion**: Test behavior when external resources are unavailable
|
|
212
|
+
|
|
213
|
+
### Phase 4: Performance and Scalability
|
|
214
|
+
|
|
215
|
+
**Interceptor Performance:**
|
|
216
|
+
|
|
217
|
+
- **Minimal overhead**: Interceptors should add minimal latency to operation execution
|
|
218
|
+
- **Efficient lock management**: Use optimal strategies for lock acquisition and release
|
|
219
|
+
- **Connection pooling**: Reuse connections to external services (Dapr, Redis, etc.)
|
|
220
|
+
- **Async efficiency**: Never block the event loop in async interceptors
|
|
221
|
+
|
|
222
|
+
**Scalability Patterns:**
|
|
223
|
+
|
|
224
|
+
- **Bounded resource usage**: Prevent interceptors from consuming unbounded resources
|
|
225
|
+
- **Graceful degradation**: Handle scenarios where external services are unavailable
|
|
226
|
+
- **Circuit breaker patterns**: Implement circuit breakers for external service dependencies
|
|
227
|
+
- **Monitoring and metrics**: Include appropriate metrics for interceptor performance
|
|
228
|
+
|
|
229
|
+
### Phase 5: Interceptor Maintainability
|
|
230
|
+
|
|
231
|
+
**Code Organization:**
|
|
232
|
+
|
|
233
|
+
- **Single responsibility**: Each interceptor should handle one cross-cutting concern
|
|
234
|
+
- **Clear interfaces**: Interceptor interfaces should be well-defined and documented
|
|
235
|
+
- **Configuration externalization**: All interceptor behavior should be configurable
|
|
236
|
+
- **Error reporting**: Provide clear error messages when interceptors fail
|
|
237
|
+
|
|
238
|
+
**Integration Safety:**
|
|
239
|
+
|
|
240
|
+
- **Non-interference**: Interceptors should not interfere with each other
|
|
241
|
+
- **Order independence**: Interceptor order should not affect correctness (when possible)
|
|
242
|
+
- **Backwards compatibility**: Changes to interceptors should maintain API compatibility
|
|
243
|
+
- **Graceful failure**: Interceptor failures should not prevent core functionality
|
|
244
|
+
|
|
245
|
+
---
|
|
246
|
+
|
|
247
|
+
## Interceptor-Specific Anti-Patterns
|
|
248
|
+
|
|
249
|
+
**Always Reject:**
|
|
250
|
+
|
|
251
|
+
- **Infinite retry loops**: `while True` without bounded conditions
|
|
252
|
+
- **Resource leaks in context managers**: Sleeping or blocking inside context managers
|
|
253
|
+
- **Parameter validation gaps**: Not validating inputs that are used in range operations
|
|
254
|
+
- **Blocking operations in async**: Using synchronous operations that block the event loop
|
|
255
|
+
- **Generic error handling**: Not distinguishing between retryable and permanent errors
|
|
256
|
+
- **Lock timing issues**: Releasing locks before operations complete
|
|
257
|
+
- **Missing cleanup**: Not cleaning up resources in failure scenarios
|
|
258
|
+
|
|
259
|
+
**Lock Acquisition Anti-Patterns:**
|
|
260
|
+
|
|
261
|
+
```python
|
|
262
|
+
# ❌ REJECT: Multiple critical issues
|
|
263
|
+
class CriticallyFlawedInterceptor:
|
|
264
|
+
async def intercept(self, next_fn, input):
|
|
265
|
+
while True: # 1. Infinite loop
|
|
266
|
+
async with dapr_client.try_lock(lock_name) as response:
|
|
267
|
+
if response.success:
|
|
268
|
+
return await next_fn(input)
|
|
269
|
+
else:
|
|
270
|
+
time.sleep(10) # 2. Blocking sleep in async
|
|
271
|
+
# 3. Sleep inside context manager - resource leak
|
|
272
|
+
|
|
273
|
+
# 4. No parameter validation for max_locks
|
|
274
|
+
slot = random.randint(0, max_locks - 1) # Crashes if max_locks <= 0
|
|
275
|
+
|
|
276
|
+
# ✅ REQUIRE: Proper implementation
|
|
277
|
+
class WellImplementedInterceptor:
|
|
278
|
+
def __init__(self, max_locks: int = 10, max_retries: int = 5):
|
|
279
|
+
# Validate configuration at initialization
|
|
280
|
+
if max_locks <= 0:
|
|
281
|
+
raise ValueError(f"max_locks must be positive: {max_locks}")
|
|
282
|
+
self.max_locks = max_locks
|
|
283
|
+
self.max_retries = max_retries
|
|
284
|
+
|
|
285
|
+
async def intercept(self, next_fn, input) -> Any:
|
|
286
|
+
for attempt in range(self.max_retries): # Bounded retries
|
|
287
|
+
try:
|
|
288
|
+
# Context manager properly exits before sleep
|
|
289
|
+
async with self.lock_client.try_lock(self.lock_name) as response:
|
|
290
|
+
if response.success:
|
|
291
|
+
return await next_fn(input)
|
|
292
|
+
|
|
293
|
+
# Sleep outside context manager
|
|
294
|
+
if attempt < self.max_retries - 1:
|
|
295
|
+
await asyncio.sleep(min(2 ** attempt, 30)) # Non-blocking sleep
|
|
296
|
+
|
|
297
|
+
except ConnectionError:
|
|
298
|
+
# Retry transient errors
|
|
299
|
+
if attempt >= self.max_retries - 1:
|
|
300
|
+
raise
|
|
301
|
+
await asyncio.sleep(1)
|
|
302
|
+
|
|
303
|
+
raise LockAcquisitionError("Could not acquire lock within retry limit")
|
|
304
|
+
```
|
|
305
|
+
|
|
306
|
+
## Educational Context for Interceptor Reviews
|
|
307
|
+
|
|
308
|
+
When reviewing interceptor code, emphasize:
|
|
309
|
+
|
|
310
|
+
1. **System Stability Impact**: "Interceptors run for every activity/workflow execution. Infinite loops or resource leaks in interceptors can bring down the entire system by exhausting resources."
|
|
311
|
+
|
|
312
|
+
2. **Performance Impact**: "Interceptor overhead affects every operation. Blocking operations in async interceptors can degrade performance for all concurrent executions."
|
|
313
|
+
|
|
314
|
+
3. **Reliability Impact**: "Poor error handling in interceptors can mask or cause cascading failures. Proper error distinction and recovery logic are essential for system reliability."
|
|
315
|
+
|
|
316
|
+
4. **Resource Impact**: "Interceptors often manage external resources (locks, connections). Resource leaks in interceptors compound quickly under load and can cause system-wide failures."
|
|
317
|
+
|
|
318
|
+
5. **Debugging Impact**: "Interceptor issues are often hard to debug because they affect multiple operations. Clear error messages and proper logging are critical for troubleshooting."
|
|
319
|
+
|
|
320
|
+
6. **Scalability Impact**: "Interceptor patterns that work under light load can fail catastrophically under heavy load. Always design for high-concurrency scenarios with proper resource bounds."
|