atlan-application-sdk 0.1.1rc39__py3-none-any.whl → 0.1.1rc40__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- application_sdk/activities/.cursor/BUGBOT.md +424 -0
- application_sdk/clients/.cursor/BUGBOT.md +280 -0
- application_sdk/clients/sql.py +110 -74
- application_sdk/clients/temporal.py +3 -1
- application_sdk/common/.cursor/BUGBOT.md +316 -0
- application_sdk/constants.py +8 -0
- application_sdk/decorators/.cursor/BUGBOT.md +279 -0
- application_sdk/inputs/.cursor/BUGBOT.md +250 -0
- application_sdk/interceptors/.cursor/BUGBOT.md +320 -0
- application_sdk/interceptors/cleanup.py +171 -0
- application_sdk/interceptors/events.py +6 -6
- application_sdk/outputs/.cursor/BUGBOT.md +295 -0
- application_sdk/outputs/iceberg.py +4 -0
- application_sdk/outputs/json.py +6 -0
- application_sdk/outputs/parquet.py +13 -3
- application_sdk/server/.cursor/BUGBOT.md +442 -0
- application_sdk/services/objectstore.py +16 -3
- application_sdk/version.py +1 -1
- application_sdk/workflows/.cursor/BUGBOT.md +218 -0
- {atlan_application_sdk-0.1.1rc39.dist-info → atlan_application_sdk-0.1.1rc40.dist-info}/METADATA +1 -1
- {atlan_application_sdk-0.1.1rc39.dist-info → atlan_application_sdk-0.1.1rc40.dist-info}/RECORD +24 -14
- {atlan_application_sdk-0.1.1rc39.dist-info → atlan_application_sdk-0.1.1rc40.dist-info}/WHEEL +0 -0
- {atlan_application_sdk-0.1.1rc39.dist-info → atlan_application_sdk-0.1.1rc40.dist-info}/licenses/LICENSE +0 -0
- {atlan_application_sdk-0.1.1rc39.dist-info → atlan_application_sdk-0.1.1rc40.dist-info}/licenses/NOTICE +0 -0
|
@@ -0,0 +1,320 @@
|
|
|
1
|
+
# Interceptor Code Review Guidelines - Temporal Interceptors
|
|
2
|
+
|
|
3
|
+
## Context-Specific Patterns
|
|
4
|
+
|
|
5
|
+
This directory contains Temporal interceptor implementations that provide cross-cutting functionality like distributed locking, observability, and event handling. Interceptors must be robust and not interfere with normal workflow/activity execution.
|
|
6
|
+
|
|
7
|
+
### Phase 1: Critical Interceptor Safety Issues
|
|
8
|
+
|
|
9
|
+
**Infinite Loop Prevention:**
|
|
10
|
+
|
|
11
|
+
- **Lock acquisition loops must have termination conditions**: No `while True` loops without max retries or timeouts
|
|
12
|
+
- **Bounded retry logic**: All retry mechanisms must have explicit limits
|
|
13
|
+
- **Timeout enforcement**: Operations must respect activity and workflow timeouts
|
|
14
|
+
- **Resource exhaustion prevention**: Prevent scenarios that could consume all available resources
|
|
15
|
+
|
|
16
|
+
**Resource Management in Interceptors:**
|
|
17
|
+
|
|
18
|
+
- **Context manager handling**: Ensure proper cleanup when context managers fail
|
|
19
|
+
- **Connection lifecycle**: Don't hold connections longer than necessary
|
|
20
|
+
- **Lock timing**: Acquire locks as late as possible, release as early as possible
|
|
21
|
+
- **Error state cleanup**: Clean up resources even when intercepted operations fail
|
|
22
|
+
|
|
23
|
+
```python
|
|
24
|
+
# ✅ DO: Bounded lock acquisition with proper cleanup
|
|
25
|
+
class GoodLockInterceptor:
|
|
26
|
+
async def intercept_activity(self, next_fn, input):
|
|
27
|
+
"""Intercept with bounded retry and proper cleanup."""
|
|
28
|
+
max_retries = 10
|
|
29
|
+
retry_count = 0
|
|
30
|
+
|
|
31
|
+
while retry_count < max_retries: # Bounded loop
|
|
32
|
+
try:
|
|
33
|
+
# Acquire lock with timeout
|
|
34
|
+
async with self.lock_manager.acquire_lock(
|
|
35
|
+
lock_name=self.lock_name,
|
|
36
|
+
timeout=30 # Maximum lock wait time
|
|
37
|
+
) as lock:
|
|
38
|
+
# Execute activity while holding lock
|
|
39
|
+
return await next_fn(input)
|
|
40
|
+
|
|
41
|
+
except LockUnavailableError:
|
|
42
|
+
retry_count += 1
|
|
43
|
+
if retry_count >= max_retries:
|
|
44
|
+
raise LockAcquisitionError(
|
|
45
|
+
f"Failed to acquire lock after {max_retries} attempts"
|
|
46
|
+
)
|
|
47
|
+
|
|
48
|
+
# Exponential backoff, but bounded
|
|
49
|
+
delay = min(10 + random.randint(0, 10), 60) # Max 60 seconds
|
|
50
|
+
await asyncio.sleep(delay) # Non-blocking sleep
|
|
51
|
+
|
|
52
|
+
raise LockAcquisitionError("Lock acquisition retries exhausted")
|
|
53
|
+
|
|
54
|
+
# ❌ NEVER: Infinite loops and poor resource management
|
|
55
|
+
class BadLockInterceptor:
|
|
56
|
+
async def intercept_activity(self, next_fn, input):
|
|
57
|
+
while True: # INFINITE LOOP!
|
|
58
|
+
try:
|
|
59
|
+
with self.dapr_client.try_lock(lock_name) as response:
|
|
60
|
+
if response.success:
|
|
61
|
+
result = await next_fn(input)
|
|
62
|
+
return result
|
|
63
|
+
else:
|
|
64
|
+
time.sleep(10) # BLOCKING SLEEP IN ASYNC!
|
|
65
|
+
# SLEEP INSIDE CONTEXT MANAGER - RESOURCE LEAK!
|
|
66
|
+
|
|
67
|
+
except Exception as e:
|
|
68
|
+
raise # No retry for transient errors
|
|
69
|
+
```
|
|
70
|
+
|
|
71
|
+
**Parameter Validation and Edge Cases:**
|
|
72
|
+
|
|
73
|
+
- **Range validation**: Validate parameters that are used in range operations (random.randint, array slicing)
|
|
74
|
+
- **Zero and negative handling**: Explicitly handle edge cases like 0 values, negative numbers
|
|
75
|
+
- **Type validation**: Ensure interceptor parameters match expected types
|
|
76
|
+
- **Configuration validation**: Validate interceptor configuration before use
|
|
77
|
+
|
|
78
|
+
```python
|
|
79
|
+
# ✅ DO: Comprehensive parameter validation
|
|
80
|
+
def validate_lock_configuration(max_locks: int, ttl_seconds: int) -> None:
|
|
81
|
+
"""Validate lock configuration parameters."""
|
|
82
|
+
|
|
83
|
+
if max_locks <= 0:
|
|
84
|
+
raise ValueError(f"max_locks must be positive, got: {max_locks}")
|
|
85
|
+
|
|
86
|
+
if max_locks > 1000: # Reasonable upper bound
|
|
87
|
+
raise ValueError(f"max_locks too high (max 1000), got: {max_locks}")
|
|
88
|
+
|
|
89
|
+
if ttl_seconds <= 0:
|
|
90
|
+
raise ValueError(f"ttl_seconds must be positive, got: {ttl_seconds}")
|
|
91
|
+
|
|
92
|
+
if ttl_seconds > 3600: # 1 hour max
|
|
93
|
+
raise ValueError(f"ttl_seconds too high (max 3600), got: {ttl_seconds}")
|
|
94
|
+
|
|
95
|
+
def get_random_lock_slot(max_locks: int) -> int:
|
|
96
|
+
"""Get random lock slot with proper validation."""
|
|
97
|
+
validate_lock_configuration(max_locks, 1) # Quick validation
|
|
98
|
+
return random.randint(0, max_locks - 1) # Safe after validation
|
|
99
|
+
|
|
100
|
+
# ❌ REJECT: No validation leading to crashes
|
|
101
|
+
def bad_random_slot(max_locks: int) -> int:
|
|
102
|
+
# Crashes if max_locks is 0 or negative
|
|
103
|
+
return random.randint(0, max_locks - 1) # ValueError!
|
|
104
|
+
```
|
|
105
|
+
|
|
106
|
+
### Phase 2: Interceptor Architecture Patterns
|
|
107
|
+
|
|
108
|
+
**Lock Acquisition and Release Timing:**
|
|
109
|
+
|
|
110
|
+
- **Late acquisition**: Acquire locks as close to the protected operation as possible
|
|
111
|
+
- **Early release**: Release locks immediately after the protected operation completes
|
|
112
|
+
- **Timeout alignment**: Lock TTL should align with activity execution time
|
|
113
|
+
- **Race condition prevention**: Ensure locks are held for the entire duration of the protected operation
|
|
114
|
+
|
|
115
|
+
**Error Handling in Interceptors:**
|
|
116
|
+
|
|
117
|
+
- **Transient error retry**: Distinguish between retryable errors (connection failures) and permanent errors (invalid configuration)
|
|
118
|
+
- **Interceptor isolation**: Interceptor failures should not prevent other interceptors from running
|
|
119
|
+
- **Activity result preservation**: Don't modify or lose activity results due to interceptor errors
|
|
120
|
+
- **Cleanup on failure**: Ensure resources are cleaned up even when intercepted operations fail
|
|
121
|
+
|
|
122
|
+
```python
|
|
123
|
+
# ✅ DO: Proper lock timing and error handling
|
|
124
|
+
class ProperLockInterceptor:
|
|
125
|
+
async def intercept_activity(self, next_fn, input) -> Any:
|
|
126
|
+
"""Intercept with proper timing and error handling."""
|
|
127
|
+
lock_acquired = False
|
|
128
|
+
lock_handle = None
|
|
129
|
+
|
|
130
|
+
try:
|
|
131
|
+
# Validate configuration first
|
|
132
|
+
if self.max_locks <= 0:
|
|
133
|
+
raise ConfigurationError(f"Invalid max_locks: {self.max_locks}")
|
|
134
|
+
|
|
135
|
+
# Bounded retry with exponential backoff
|
|
136
|
+
for attempt in range(self.max_retries):
|
|
137
|
+
try:
|
|
138
|
+
# Late acquisition - right before protected operation
|
|
139
|
+
lock_handle = await self.lock_manager.acquire_lock(
|
|
140
|
+
lock_name=self.lock_name,
|
|
141
|
+
ttl_seconds=self.ttl_seconds,
|
|
142
|
+
timeout=self.lock_timeout
|
|
143
|
+
)
|
|
144
|
+
lock_acquired = True
|
|
145
|
+
break
|
|
146
|
+
|
|
147
|
+
except LockUnavailableError:
|
|
148
|
+
if attempt >= self.max_retries - 1:
|
|
149
|
+
raise LockAcquisitionError(f"Could not acquire lock after {self.max_retries} attempts")
|
|
150
|
+
|
|
151
|
+
# Exponential backoff with jitter
|
|
152
|
+
delay = min(2 ** attempt + random.uniform(0, 1), self.max_delay)
|
|
153
|
+
await asyncio.sleep(delay)
|
|
154
|
+
continue
|
|
155
|
+
|
|
156
|
+
except (ConnectionError, TimeoutError) as e:
|
|
157
|
+
# Transient errors - retry
|
|
158
|
+
logger.warning(f"Transient lock error on attempt {attempt + 1}: {e}")
|
|
159
|
+
if attempt >= self.max_retries - 1:
|
|
160
|
+
raise LockAcquisitionError(f"Lock service unavailable after {self.max_retries} attempts")
|
|
161
|
+
await asyncio.sleep(1) # Brief delay for transient errors
|
|
162
|
+
continue
|
|
163
|
+
|
|
164
|
+
# Execute protected operation while holding lock
|
|
165
|
+
result = await next_fn(input)
|
|
166
|
+
|
|
167
|
+
# Early release - immediately after operation
|
|
168
|
+
if lock_handle:
|
|
169
|
+
await self.lock_manager.release_lock(lock_handle)
|
|
170
|
+
lock_acquired = False
|
|
171
|
+
|
|
172
|
+
return result
|
|
173
|
+
|
|
174
|
+
finally:
|
|
175
|
+
# Cleanup: ensure lock is released even on failure
|
|
176
|
+
if lock_acquired and lock_handle:
|
|
177
|
+
try:
|
|
178
|
+
await self.lock_manager.release_lock(lock_handle)
|
|
179
|
+
except Exception as e:
|
|
180
|
+
logger.warning(f"Failed to release lock during cleanup: {e}")
|
|
181
|
+
|
|
182
|
+
# ❌ REJECT: Poor timing and error handling
|
|
183
|
+
class BadLockInterceptor:
|
|
184
|
+
async def intercept_activity(self, next_fn, input):
|
|
185
|
+
# No parameter validation
|
|
186
|
+
while True: # Infinite loop
|
|
187
|
+
lock = await self.acquire_lock()
|
|
188
|
+
if lock:
|
|
189
|
+
result = await next_fn(input)
|
|
190
|
+
# Lock released too early - before result is processed
|
|
191
|
+
await self.release_lock(lock)
|
|
192
|
+
return result
|
|
193
|
+
time.sleep(10) # Blocking sleep
|
|
194
|
+
```
|
|
195
|
+
|
|
196
|
+
### Phase 3: Interceptor Testing Requirements
|
|
197
|
+
|
|
198
|
+
**Interceptor Testing Standards:**
|
|
199
|
+
|
|
200
|
+
- **Test failure scenarios**: Verify behavior when locks can't be acquired, connections fail, etc.
|
|
201
|
+
- **Test retry logic**: Ensure bounded retries work correctly with various failure patterns
|
|
202
|
+
- **Test resource cleanup**: Verify proper cleanup in both success and failure cases
|
|
203
|
+
- **Test timing**: Ensure locks are acquired/released at correct times
|
|
204
|
+
- **Test integration**: Verify interceptors work correctly with actual activities/workflows
|
|
205
|
+
|
|
206
|
+
**Edge Case Testing:**
|
|
207
|
+
|
|
208
|
+
- **Zero and negative parameters**: Test with edge case values like `max_locks=0`
|
|
209
|
+
- **Timeout scenarios**: Test behavior when operations exceed configured timeouts
|
|
210
|
+
- **Concurrent access**: Test interceptor behavior under high concurrency
|
|
211
|
+
- **Resource exhaustion**: Test behavior when external resources are unavailable
|
|
212
|
+
|
|
213
|
+
### Phase 4: Performance and Scalability
|
|
214
|
+
|
|
215
|
+
**Interceptor Performance:**
|
|
216
|
+
|
|
217
|
+
- **Minimal overhead**: Interceptors should add minimal latency to operation execution
|
|
218
|
+
- **Efficient lock management**: Use optimal strategies for lock acquisition and release
|
|
219
|
+
- **Connection pooling**: Reuse connections to external services (Dapr, Redis, etc.)
|
|
220
|
+
- **Async efficiency**: Never block the event loop in async interceptors
|
|
221
|
+
|
|
222
|
+
**Scalability Patterns:**
|
|
223
|
+
|
|
224
|
+
- **Bounded resource usage**: Prevent interceptors from consuming unbounded resources
|
|
225
|
+
- **Graceful degradation**: Handle scenarios where external services are unavailable
|
|
226
|
+
- **Circuit breaker patterns**: Implement circuit breakers for external service dependencies
|
|
227
|
+
- **Monitoring and metrics**: Include appropriate metrics for interceptor performance
|
|
228
|
+
|
|
229
|
+
### Phase 5: Interceptor Maintainability
|
|
230
|
+
|
|
231
|
+
**Code Organization:**
|
|
232
|
+
|
|
233
|
+
- **Single responsibility**: Each interceptor should handle one cross-cutting concern
|
|
234
|
+
- **Clear interfaces**: Interceptor interfaces should be well-defined and documented
|
|
235
|
+
- **Configuration externalization**: All interceptor behavior should be configurable
|
|
236
|
+
- **Error reporting**: Provide clear error messages when interceptors fail
|
|
237
|
+
|
|
238
|
+
**Integration Safety:**
|
|
239
|
+
|
|
240
|
+
- **Non-interference**: Interceptors should not interfere with each other
|
|
241
|
+
- **Order independence**: Interceptor order should not affect correctness (when possible)
|
|
242
|
+
- **Backwards compatibility**: Changes to interceptors should maintain API compatibility
|
|
243
|
+
- **Graceful failure**: Interceptor failures should not prevent core functionality
|
|
244
|
+
|
|
245
|
+
---
|
|
246
|
+
|
|
247
|
+
## Interceptor-Specific Anti-Patterns
|
|
248
|
+
|
|
249
|
+
**Always Reject:**
|
|
250
|
+
|
|
251
|
+
- **Infinite retry loops**: `while True` without bounded conditions
|
|
252
|
+
- **Resource leaks in context managers**: Sleeping or blocking inside context managers
|
|
253
|
+
- **Parameter validation gaps**: Not validating inputs that are used in range operations
|
|
254
|
+
- **Blocking operations in async**: Using synchronous operations that block the event loop
|
|
255
|
+
- **Generic error handling**: Not distinguishing between retryable and permanent errors
|
|
256
|
+
- **Lock timing issues**: Releasing locks before operations complete
|
|
257
|
+
- **Missing cleanup**: Not cleaning up resources in failure scenarios
|
|
258
|
+
|
|
259
|
+
**Lock Acquisition Anti-Patterns:**
|
|
260
|
+
|
|
261
|
+
```python
|
|
262
|
+
# ❌ REJECT: Multiple critical issues
|
|
263
|
+
class CriticallyFlawedInterceptor:
|
|
264
|
+
async def intercept(self, next_fn, input):
|
|
265
|
+
while True: # 1. Infinite loop
|
|
266
|
+
async with dapr_client.try_lock(lock_name) as response:
|
|
267
|
+
if response.success:
|
|
268
|
+
return await next_fn(input)
|
|
269
|
+
else:
|
|
270
|
+
time.sleep(10) # 2. Blocking sleep in async
|
|
271
|
+
# 3. Sleep inside context manager - resource leak
|
|
272
|
+
|
|
273
|
+
# 4. No parameter validation for max_locks
|
|
274
|
+
slot = random.randint(0, max_locks - 1) # Crashes if max_locks <= 0
|
|
275
|
+
|
|
276
|
+
# ✅ REQUIRE: Proper implementation
|
|
277
|
+
class WellImplementedInterceptor:
|
|
278
|
+
def __init__(self, max_locks: int = 10, max_retries: int = 5):
|
|
279
|
+
# Validate configuration at initialization
|
|
280
|
+
if max_locks <= 0:
|
|
281
|
+
raise ValueError(f"max_locks must be positive: {max_locks}")
|
|
282
|
+
self.max_locks = max_locks
|
|
283
|
+
self.max_retries = max_retries
|
|
284
|
+
|
|
285
|
+
async def intercept(self, next_fn, input) -> Any:
|
|
286
|
+
for attempt in range(self.max_retries): # Bounded retries
|
|
287
|
+
try:
|
|
288
|
+
# Context manager properly exits before sleep
|
|
289
|
+
async with self.lock_client.try_lock(self.lock_name) as response:
|
|
290
|
+
if response.success:
|
|
291
|
+
return await next_fn(input)
|
|
292
|
+
|
|
293
|
+
# Sleep outside context manager
|
|
294
|
+
if attempt < self.max_retries - 1:
|
|
295
|
+
await asyncio.sleep(min(2 ** attempt, 30)) # Non-blocking sleep
|
|
296
|
+
|
|
297
|
+
except ConnectionError:
|
|
298
|
+
# Retry transient errors
|
|
299
|
+
if attempt >= self.max_retries - 1:
|
|
300
|
+
raise
|
|
301
|
+
await asyncio.sleep(1)
|
|
302
|
+
|
|
303
|
+
raise LockAcquisitionError("Could not acquire lock within retry limit")
|
|
304
|
+
```
|
|
305
|
+
|
|
306
|
+
## Educational Context for Interceptor Reviews
|
|
307
|
+
|
|
308
|
+
When reviewing interceptor code, emphasize:
|
|
309
|
+
|
|
310
|
+
1. **System Stability Impact**: "Interceptors run for every activity/workflow execution. Infinite loops or resource leaks in interceptors can bring down the entire system by exhausting resources."
|
|
311
|
+
|
|
312
|
+
2. **Performance Impact**: "Interceptor overhead affects every operation. Blocking operations in async interceptors can degrade performance for all concurrent executions."
|
|
313
|
+
|
|
314
|
+
3. **Reliability Impact**: "Poor error handling in interceptors can mask or cause cascading failures. Proper error distinction and recovery logic are essential for system reliability."
|
|
315
|
+
|
|
316
|
+
4. **Resource Impact**: "Interceptors often manage external resources (locks, connections). Resource leaks in interceptors compound quickly under load and can cause system-wide failures."
|
|
317
|
+
|
|
318
|
+
5. **Debugging Impact**: "Interceptor issues are often hard to debug because they affect multiple operations. Clear error messages and proper logging are critical for troubleshooting."
|
|
319
|
+
|
|
320
|
+
6. **Scalability Impact**: "Interceptor patterns that work under light load can fail catastrophically under heavy load. Always design for high-concurrency scenarios with proper resource bounds."
|
|
@@ -0,0 +1,171 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import shutil
|
|
3
|
+
from datetime import timedelta
|
|
4
|
+
from typing import Any, Dict, List, Optional, Type
|
|
5
|
+
|
|
6
|
+
from pydantic import BaseModel
|
|
7
|
+
from temporalio import activity, workflow
|
|
8
|
+
from temporalio.common import RetryPolicy
|
|
9
|
+
from temporalio.worker import (
|
|
10
|
+
ExecuteWorkflowInput,
|
|
11
|
+
Interceptor,
|
|
12
|
+
WorkflowInboundInterceptor,
|
|
13
|
+
WorkflowInterceptorClassInput,
|
|
14
|
+
)
|
|
15
|
+
|
|
16
|
+
from application_sdk.activities.common.utils import build_output_path
|
|
17
|
+
from application_sdk.constants import CLEANUP_BASE_PATHS, TEMPORARY_PATH
|
|
18
|
+
from application_sdk.observability.logger_adaptor import get_logger
|
|
19
|
+
|
|
20
|
+
logger = get_logger(__name__)
|
|
21
|
+
activity.logger = logger
|
|
22
|
+
workflow.logger = logger
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class CleanupResult(BaseModel):
|
|
26
|
+
"""Result model for cleanup operations.
|
|
27
|
+
|
|
28
|
+
Attributes:
|
|
29
|
+
path_results (Dict[str, bool]): Cleanup results for each path (True=success, False=failure)
|
|
30
|
+
"""
|
|
31
|
+
|
|
32
|
+
path_results: Dict[str, bool]
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
@activity.defn
|
|
36
|
+
async def cleanup() -> CleanupResult:
|
|
37
|
+
"""Clean up temporary artifacts and activity state for the current workflow.
|
|
38
|
+
|
|
39
|
+
Performs two types of cleanup:
|
|
40
|
+
1. File cleanup: Removes all contents from configured base paths or default workflow directory
|
|
41
|
+
2. State cleanup: Clears activity state for the current workflow (includes resource cleanup)
|
|
42
|
+
|
|
43
|
+
Uses CLEANUP_BASE_PATHS constant or defaults to workflow-specific artifacts directory.
|
|
44
|
+
|
|
45
|
+
Returns:
|
|
46
|
+
CleanupResult: Structured cleanup results with path results and summary statistics.
|
|
47
|
+
"""
|
|
48
|
+
path_results: Dict[str, bool] = {}
|
|
49
|
+
base_paths: List[str] = [os.path.join(TEMPORARY_PATH, build_output_path())]
|
|
50
|
+
|
|
51
|
+
# Use configured paths or default to workflow-specific artifacts directory
|
|
52
|
+
if CLEANUP_BASE_PATHS:
|
|
53
|
+
base_paths = CLEANUP_BASE_PATHS
|
|
54
|
+
logger.info(f"Using CLEANUP_BASE_PATHS: {base_paths} for cleanup")
|
|
55
|
+
|
|
56
|
+
logger.info(f"Cleaning up all contents from base paths: {base_paths}")
|
|
57
|
+
|
|
58
|
+
for base_path in base_paths:
|
|
59
|
+
try:
|
|
60
|
+
if os.path.exists(base_path):
|
|
61
|
+
if os.path.isdir(base_path):
|
|
62
|
+
# Remove entire directory and recreate it empty
|
|
63
|
+
shutil.rmtree(base_path)
|
|
64
|
+
logger.info(f"Cleaned up all contents from: {base_path}")
|
|
65
|
+
path_results[base_path] = True
|
|
66
|
+
else:
|
|
67
|
+
logger.warning(f"Path is not a directory: {base_path}")
|
|
68
|
+
path_results[base_path] = False
|
|
69
|
+
else:
|
|
70
|
+
logger.debug(f"Directory doesn't exist: {base_path}")
|
|
71
|
+
path_results[base_path] = True
|
|
72
|
+
|
|
73
|
+
except Exception as e:
|
|
74
|
+
logger.error(f"Unexpected error cleaning up {base_path}: {e}")
|
|
75
|
+
path_results[base_path] = False
|
|
76
|
+
|
|
77
|
+
return CleanupResult(
|
|
78
|
+
path_results=path_results,
|
|
79
|
+
)
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
class CleanupWorkflowInboundInterceptor(WorkflowInboundInterceptor):
|
|
83
|
+
"""Interceptor for workflow-level app artifacts cleanup.
|
|
84
|
+
|
|
85
|
+
This interceptor cleans up the entire app directory structure when the workflow
|
|
86
|
+
completes or fails, following the pattern: base_path/appname/workflow_id/run_id
|
|
87
|
+
Supports multiple base paths for comprehensive cleanup.
|
|
88
|
+
"""
|
|
89
|
+
|
|
90
|
+
async def execute_workflow(self, input: ExecuteWorkflowInput) -> Any:
|
|
91
|
+
"""Execute a workflow with app artifacts cleanup.
|
|
92
|
+
|
|
93
|
+
Args:
|
|
94
|
+
input (ExecuteWorkflowInput): The workflow execution input
|
|
95
|
+
|
|
96
|
+
Returns:
|
|
97
|
+
Any: The result of the workflow execution
|
|
98
|
+
|
|
99
|
+
Raises:
|
|
100
|
+
Exception: Re-raises any exceptions from workflow execution
|
|
101
|
+
"""
|
|
102
|
+
output = None
|
|
103
|
+
try:
|
|
104
|
+
output = await super().execute_workflow(input)
|
|
105
|
+
except Exception:
|
|
106
|
+
raise
|
|
107
|
+
|
|
108
|
+
finally:
|
|
109
|
+
# Always attempt cleanup regardless of workflow success/failure
|
|
110
|
+
try:
|
|
111
|
+
await workflow.execute_activity(
|
|
112
|
+
cleanup,
|
|
113
|
+
schedule_to_close_timeout=timedelta(minutes=5),
|
|
114
|
+
retry_policy=RetryPolicy(
|
|
115
|
+
maximum_attempts=3,
|
|
116
|
+
),
|
|
117
|
+
summary="This activity is used to cleanup the local artifacts and the activity state after the workflow is completed.",
|
|
118
|
+
)
|
|
119
|
+
|
|
120
|
+
logger.info("Cleanup completed successfully")
|
|
121
|
+
|
|
122
|
+
except Exception as e:
|
|
123
|
+
logger.warning(f"Failed to cleanup artifacts: {e}")
|
|
124
|
+
# Don't re-raise - cleanup failures shouldn't fail the workflow
|
|
125
|
+
|
|
126
|
+
return output
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
class CleanupInterceptor(Interceptor):
|
|
130
|
+
"""Temporal interceptor for automatic app artifacts cleanup.
|
|
131
|
+
|
|
132
|
+
This interceptor provides cleanup capabilities for application artifacts
|
|
133
|
+
across multiple base paths following the pattern: base_path/appname/workflow_id/run_id
|
|
134
|
+
|
|
135
|
+
Features:
|
|
136
|
+
- Automatic cleanup of app-specific artifact directories
|
|
137
|
+
- Cleanup on workflow completion or failure
|
|
138
|
+
- Supports multiple cleanup paths via ATLAN_CLEANUP_BASE_PATHS env var
|
|
139
|
+
- Simple activity-based cleanup logic
|
|
140
|
+
- Comprehensive error handling and logging
|
|
141
|
+
|
|
142
|
+
Example:
|
|
143
|
+
>>> # Register the interceptor with Temporal worker
|
|
144
|
+
>>> worker = Worker(
|
|
145
|
+
... client,
|
|
146
|
+
... task_queue="my-task-queue",
|
|
147
|
+
... workflows=[MyWorkflow],
|
|
148
|
+
... activities=[my_activity, cleanup],
|
|
149
|
+
... interceptors=[CleanupInterceptor()]
|
|
150
|
+
... )
|
|
151
|
+
|
|
152
|
+
Environment Configuration:
|
|
153
|
+
>>> # Single path (default)
|
|
154
|
+
>>> ATLAN_CLEANUP_BASE_PATHS="./local/tmp/artifacts/apps"
|
|
155
|
+
|
|
156
|
+
>>> # Multiple paths (comma-separated)
|
|
157
|
+
>>> ATLAN_CLEANUP_BASE_PATHS="./local/tmp/artifacts/apps,/storage/temp/apps,/shared/cleanup/apps"
|
|
158
|
+
"""
|
|
159
|
+
|
|
160
|
+
def workflow_interceptor_class(
|
|
161
|
+
self, input: WorkflowInterceptorClassInput
|
|
162
|
+
) -> Optional[Type[WorkflowInboundInterceptor]]:
|
|
163
|
+
"""Get the workflow interceptor class for cleanup.
|
|
164
|
+
|
|
165
|
+
Args:
|
|
166
|
+
input (WorkflowInterceptorClassInput): The interceptor input
|
|
167
|
+
|
|
168
|
+
Returns:
|
|
169
|
+
Optional[Type[WorkflowInboundInterceptor]]: The workflow interceptor class
|
|
170
|
+
"""
|
|
171
|
+
return CleanupWorkflowInboundInterceptor
|
|
@@ -23,6 +23,8 @@ from application_sdk.observability.logger_adaptor import get_logger
|
|
|
23
23
|
from application_sdk.services.eventstore import EventStore
|
|
24
24
|
|
|
25
25
|
logger = get_logger(__name__)
|
|
26
|
+
activity.logger = logger
|
|
27
|
+
workflow.logger = logger
|
|
26
28
|
|
|
27
29
|
TEMPORAL_NOT_FOUND_FAILURE = (
|
|
28
30
|
"type.googleapis.com/temporal.api.errordetails.v1.NotFoundFailure"
|
|
@@ -41,9 +43,9 @@ async def publish_event(event_data: dict) -> None:
|
|
|
41
43
|
try:
|
|
42
44
|
event = Event(**event_data)
|
|
43
45
|
await EventStore.publish_event(event)
|
|
44
|
-
|
|
46
|
+
logger.info(f"Published event: {event_data.get('event_name','')}")
|
|
45
47
|
except Exception as e:
|
|
46
|
-
|
|
48
|
+
logger.error(f"Failed to publish event: {e}")
|
|
47
49
|
raise
|
|
48
50
|
|
|
49
51
|
|
|
@@ -123,7 +125,7 @@ class EventWorkflowInboundInterceptor(WorkflowInboundInterceptor):
|
|
|
123
125
|
retry_policy=RetryPolicy(maximum_attempts=3),
|
|
124
126
|
)
|
|
125
127
|
except Exception as e:
|
|
126
|
-
|
|
128
|
+
logger.warning(f"Failed to publish workflow start event: {e}")
|
|
127
129
|
# Don't fail the workflow if event publishing fails
|
|
128
130
|
|
|
129
131
|
output = None
|
|
@@ -152,9 +154,7 @@ class EventWorkflowInboundInterceptor(WorkflowInboundInterceptor):
|
|
|
152
154
|
retry_policy=RetryPolicy(maximum_attempts=3),
|
|
153
155
|
)
|
|
154
156
|
except Exception as publish_error:
|
|
155
|
-
|
|
156
|
-
f"Failed to publish workflow end event: {publish_error}"
|
|
157
|
-
)
|
|
157
|
+
logger.warning(f"Failed to publish workflow end event: {publish_error}")
|
|
158
158
|
|
|
159
159
|
return output
|
|
160
160
|
|