atlan-application-sdk 0.1.1rc39__py3-none-any.whl → 0.1.1rc40__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,320 @@
1
+ # Interceptor Code Review Guidelines - Temporal Interceptors
2
+
3
+ ## Context-Specific Patterns
4
+
5
+ This directory contains Temporal interceptor implementations that provide cross-cutting functionality like distributed locking, observability, and event handling. Interceptors must be robust and not interfere with normal workflow/activity execution.
6
+
7
+ ### Phase 1: Critical Interceptor Safety Issues
8
+
9
+ **Infinite Loop Prevention:**
10
+
11
+ - **Lock acquisition loops must have termination conditions**: No `while True` loops without max retries or timeouts
12
+ - **Bounded retry logic**: All retry mechanisms must have explicit limits
13
+ - **Timeout enforcement**: Operations must respect activity and workflow timeouts
14
+ - **Resource exhaustion prevention**: Prevent scenarios that could consume all available resources
15
+
16
+ **Resource Management in Interceptors:**
17
+
18
+ - **Context manager handling**: Ensure proper cleanup when context managers fail
19
+ - **Connection lifecycle**: Don't hold connections longer than necessary
20
+ - **Lock timing**: Acquire locks as late as possible, release as early as possible
21
+ - **Error state cleanup**: Clean up resources even when intercepted operations fail
22
+
23
+ ```python
24
+ # ✅ DO: Bounded lock acquisition with proper cleanup
25
+ class GoodLockInterceptor:
26
+ async def intercept_activity(self, next_fn, input):
27
+ """Intercept with bounded retry and proper cleanup."""
28
+ max_retries = 10
29
+ retry_count = 0
30
+
31
+ while retry_count < max_retries: # Bounded loop
32
+ try:
33
+ # Acquire lock with timeout
34
+ async with self.lock_manager.acquire_lock(
35
+ lock_name=self.lock_name,
36
+ timeout=30 # Maximum lock wait time
37
+ ) as lock:
38
+ # Execute activity while holding lock
39
+ return await next_fn(input)
40
+
41
+ except LockUnavailableError:
42
+ retry_count += 1
43
+ if retry_count >= max_retries:
44
+ raise LockAcquisitionError(
45
+ f"Failed to acquire lock after {max_retries} attempts"
46
+ )
47
+
48
+ # Exponential backoff, but bounded
49
+ delay = min(10 + random.randint(0, 10), 60) # Max 60 seconds
50
+ await asyncio.sleep(delay) # Non-blocking sleep
51
+
52
+ raise LockAcquisitionError("Lock acquisition retries exhausted")
53
+
54
+ # ❌ NEVER: Infinite loops and poor resource management
55
+ class BadLockInterceptor:
56
+ async def intercept_activity(self, next_fn, input):
57
+ while True: # INFINITE LOOP!
58
+ try:
59
+ with self.dapr_client.try_lock(lock_name) as response:
60
+ if response.success:
61
+ result = await next_fn(input)
62
+ return result
63
+ else:
64
+ time.sleep(10) # BLOCKING SLEEP IN ASYNC!
65
+ # SLEEP INSIDE CONTEXT MANAGER - RESOURCE LEAK!
66
+
67
+ except Exception as e:
68
+ raise # No retry for transient errors
69
+ ```
70
+
71
+ **Parameter Validation and Edge Cases:**
72
+
73
+ - **Range validation**: Validate parameters that are used in range operations (random.randint, array slicing)
74
+ - **Zero and negative handling**: Explicitly handle edge cases like 0 values, negative numbers
75
+ - **Type validation**: Ensure interceptor parameters match expected types
76
+ - **Configuration validation**: Validate interceptor configuration before use
77
+
78
+ ```python
79
+ # ✅ DO: Comprehensive parameter validation
80
+ def validate_lock_configuration(max_locks: int, ttl_seconds: int) -> None:
81
+ """Validate lock configuration parameters."""
82
+
83
+ if max_locks <= 0:
84
+ raise ValueError(f"max_locks must be positive, got: {max_locks}")
85
+
86
+ if max_locks > 1000: # Reasonable upper bound
87
+ raise ValueError(f"max_locks too high (max 1000), got: {max_locks}")
88
+
89
+ if ttl_seconds <= 0:
90
+ raise ValueError(f"ttl_seconds must be positive, got: {ttl_seconds}")
91
+
92
+ if ttl_seconds > 3600: # 1 hour max
93
+ raise ValueError(f"ttl_seconds too high (max 3600), got: {ttl_seconds}")
94
+
95
+ def get_random_lock_slot(max_locks: int) -> int:
96
+ """Get random lock slot with proper validation."""
97
+ validate_lock_configuration(max_locks, 1) # Quick validation
98
+ return random.randint(0, max_locks - 1) # Safe after validation
99
+
100
+ # ❌ REJECT: No validation leading to crashes
101
+ def bad_random_slot(max_locks: int) -> int:
102
+ # Crashes if max_locks is 0 or negative
103
+ return random.randint(0, max_locks - 1) # ValueError!
104
+ ```
105
+
106
+ ### Phase 2: Interceptor Architecture Patterns
107
+
108
+ **Lock Acquisition and Release Timing:**
109
+
110
+ - **Late acquisition**: Acquire locks as close to the protected operation as possible
111
+ - **Early release**: Release locks immediately after the protected operation completes
112
+ - **Timeout alignment**: Lock TTL should align with activity execution time
113
+ - **Race condition prevention**: Ensure locks are held for the entire duration of the protected operation
114
+
115
+ **Error Handling in Interceptors:**
116
+
117
+ - **Transient error retry**: Distinguish between retryable errors (connection failures) and permanent errors (invalid configuration)
118
+ - **Interceptor isolation**: Interceptor failures should not prevent other interceptors from running
119
+ - **Activity result preservation**: Don't modify or lose activity results due to interceptor errors
120
+ - **Cleanup on failure**: Ensure resources are cleaned up even when intercepted operations fail
121
+
122
+ ```python
123
+ # ✅ DO: Proper lock timing and error handling
124
+ class ProperLockInterceptor:
125
+ async def intercept_activity(self, next_fn, input) -> Any:
126
+ """Intercept with proper timing and error handling."""
127
+ lock_acquired = False
128
+ lock_handle = None
129
+
130
+ try:
131
+ # Validate configuration first
132
+ if self.max_locks <= 0:
133
+ raise ConfigurationError(f"Invalid max_locks: {self.max_locks}")
134
+
135
+ # Bounded retry with exponential backoff
136
+ for attempt in range(self.max_retries):
137
+ try:
138
+ # Late acquisition - right before protected operation
139
+ lock_handle = await self.lock_manager.acquire_lock(
140
+ lock_name=self.lock_name,
141
+ ttl_seconds=self.ttl_seconds,
142
+ timeout=self.lock_timeout
143
+ )
144
+ lock_acquired = True
145
+ break
146
+
147
+ except LockUnavailableError:
148
+ if attempt >= self.max_retries - 1:
149
+ raise LockAcquisitionError(f"Could not acquire lock after {self.max_retries} attempts")
150
+
151
+ # Exponential backoff with jitter
152
+ delay = min(2 ** attempt + random.uniform(0, 1), self.max_delay)
153
+ await asyncio.sleep(delay)
154
+ continue
155
+
156
+ except (ConnectionError, TimeoutError) as e:
157
+ # Transient errors - retry
158
+ logger.warning(f"Transient lock error on attempt {attempt + 1}: {e}")
159
+ if attempt >= self.max_retries - 1:
160
+ raise LockAcquisitionError(f"Lock service unavailable after {self.max_retries} attempts")
161
+ await asyncio.sleep(1) # Brief delay for transient errors
162
+ continue
163
+
164
+ # Execute protected operation while holding lock
165
+ result = await next_fn(input)
166
+
167
+ # Early release - immediately after operation
168
+ if lock_handle:
169
+ await self.lock_manager.release_lock(lock_handle)
170
+ lock_acquired = False
171
+
172
+ return result
173
+
174
+ finally:
175
+ # Cleanup: ensure lock is released even on failure
176
+ if lock_acquired and lock_handle:
177
+ try:
178
+ await self.lock_manager.release_lock(lock_handle)
179
+ except Exception as e:
180
+ logger.warning(f"Failed to release lock during cleanup: {e}")
181
+
182
+ # ❌ REJECT: Poor timing and error handling
183
+ class BadLockInterceptor:
184
+ async def intercept_activity(self, next_fn, input):
185
+ # No parameter validation
186
+ while True: # Infinite loop
187
+ lock = await self.acquire_lock()
188
+ if lock:
189
+ result = await next_fn(input)
190
+ # Lock released too early - before result is processed
191
+ await self.release_lock(lock)
192
+ return result
193
+ time.sleep(10) # Blocking sleep
194
+ ```
195
+
196
+ ### Phase 3: Interceptor Testing Requirements
197
+
198
+ **Interceptor Testing Standards:**
199
+
200
+ - **Test failure scenarios**: Verify behavior when locks can't be acquired, connections fail, etc.
201
+ - **Test retry logic**: Ensure bounded retries work correctly with various failure patterns
202
+ - **Test resource cleanup**: Verify proper cleanup in both success and failure cases
203
+ - **Test timing**: Ensure locks are acquired/released at correct times
204
+ - **Test integration**: Verify interceptors work correctly with actual activities/workflows
205
+
206
+ **Edge Case Testing:**
207
+
208
+ - **Zero and negative parameters**: Test with edge case values like `max_locks=0`
209
+ - **Timeout scenarios**: Test behavior when operations exceed configured timeouts
210
+ - **Concurrent access**: Test interceptor behavior under high concurrency
211
+ - **Resource exhaustion**: Test behavior when external resources are unavailable
212
+
213
+ ### Phase 4: Performance and Scalability
214
+
215
+ **Interceptor Performance:**
216
+
217
+ - **Minimal overhead**: Interceptors should add minimal latency to operation execution
218
+ - **Efficient lock management**: Use optimal strategies for lock acquisition and release
219
+ - **Connection pooling**: Reuse connections to external services (Dapr, Redis, etc.)
220
+ - **Async efficiency**: Never block the event loop in async interceptors
221
+
222
+ **Scalability Patterns:**
223
+
224
+ - **Bounded resource usage**: Prevent interceptors from consuming unbounded resources
225
+ - **Graceful degradation**: Handle scenarios where external services are unavailable
226
+ - **Circuit breaker patterns**: Implement circuit breakers for external service dependencies
227
+ - **Monitoring and metrics**: Include appropriate metrics for interceptor performance
228
+
229
+ ### Phase 5: Interceptor Maintainability
230
+
231
+ **Code Organization:**
232
+
233
+ - **Single responsibility**: Each interceptor should handle one cross-cutting concern
234
+ - **Clear interfaces**: Interceptor interfaces should be well-defined and documented
235
+ - **Configuration externalization**: All interceptor behavior should be configurable
236
+ - **Error reporting**: Provide clear error messages when interceptors fail
237
+
238
+ **Integration Safety:**
239
+
240
+ - **Non-interference**: Interceptors should not interfere with each other
241
+ - **Order independence**: Interceptor order should not affect correctness (when possible)
242
+ - **Backwards compatibility**: Changes to interceptors should maintain API compatibility
243
+ - **Graceful failure**: Interceptor failures should not prevent core functionality
244
+
245
+ ---
246
+
247
+ ## Interceptor-Specific Anti-Patterns
248
+
249
+ **Always Reject:**
250
+
251
+ - **Infinite retry loops**: `while True` without bounded conditions
252
+ - **Resource leaks in context managers**: Sleeping or blocking inside context managers
253
+ - **Parameter validation gaps**: Not validating inputs that are used in range operations
254
+ - **Blocking operations in async**: Using synchronous operations that block the event loop
255
+ - **Generic error handling**: Not distinguishing between retryable and permanent errors
256
+ - **Lock timing issues**: Releasing locks before operations complete
257
+ - **Missing cleanup**: Not cleaning up resources in failure scenarios
258
+
259
+ **Lock Acquisition Anti-Patterns:**
260
+
261
+ ```python
262
+ # ❌ REJECT: Multiple critical issues
263
+ class CriticallyFlawedInterceptor:
264
+ async def intercept(self, next_fn, input):
265
+ while True: # 1. Infinite loop
266
+ async with dapr_client.try_lock(lock_name) as response:
267
+ if response.success:
268
+ return await next_fn(input)
269
+ else:
270
+ time.sleep(10) # 2. Blocking sleep in async
271
+ # 3. Sleep inside context manager - resource leak
272
+
273
+ # 4. No parameter validation for max_locks
274
+ slot = random.randint(0, max_locks - 1) # Crashes if max_locks <= 0
275
+
276
+ # ✅ REQUIRE: Proper implementation
277
+ class WellImplementedInterceptor:
278
+ def __init__(self, max_locks: int = 10, max_retries: int = 5):
279
+ # Validate configuration at initialization
280
+ if max_locks <= 0:
281
+ raise ValueError(f"max_locks must be positive: {max_locks}")
282
+ self.max_locks = max_locks
283
+ self.max_retries = max_retries
284
+
285
+ async def intercept(self, next_fn, input) -> Any:
286
+ for attempt in range(self.max_retries): # Bounded retries
287
+ try:
288
+ # Context manager properly exits before sleep
289
+ async with self.lock_client.try_lock(self.lock_name) as response:
290
+ if response.success:
291
+ return await next_fn(input)
292
+
293
+ # Sleep outside context manager
294
+ if attempt < self.max_retries - 1:
295
+ await asyncio.sleep(min(2 ** attempt, 30)) # Non-blocking sleep
296
+
297
+ except ConnectionError:
298
+ # Retry transient errors
299
+ if attempt >= self.max_retries - 1:
300
+ raise
301
+ await asyncio.sleep(1)
302
+
303
+ raise LockAcquisitionError("Could not acquire lock within retry limit")
304
+ ```
305
+
306
+ ## Educational Context for Interceptor Reviews
307
+
308
+ When reviewing interceptor code, emphasize:
309
+
310
+ 1. **System Stability Impact**: "Interceptors run for every activity/workflow execution. Infinite loops or resource leaks in interceptors can bring down the entire system by exhausting resources."
311
+
312
+ 2. **Performance Impact**: "Interceptor overhead affects every operation. Blocking operations in async interceptors can degrade performance for all concurrent executions."
313
+
314
+ 3. **Reliability Impact**: "Poor error handling in interceptors can mask or cause cascading failures. Proper error distinction and recovery logic are essential for system reliability."
315
+
316
+ 4. **Resource Impact**: "Interceptors often manage external resources (locks, connections). Resource leaks in interceptors compound quickly under load and can cause system-wide failures."
317
+
318
+ 5. **Debugging Impact**: "Interceptor issues are often hard to debug because they affect multiple operations. Clear error messages and proper logging are critical for troubleshooting."
319
+
320
+ 6. **Scalability Impact**: "Interceptor patterns that work under light load can fail catastrophically under heavy load. Always design for high-concurrency scenarios with proper resource bounds."
@@ -0,0 +1,171 @@
1
+ import os
2
+ import shutil
3
+ from datetime import timedelta
4
+ from typing import Any, Dict, List, Optional, Type
5
+
6
+ from pydantic import BaseModel
7
+ from temporalio import activity, workflow
8
+ from temporalio.common import RetryPolicy
9
+ from temporalio.worker import (
10
+ ExecuteWorkflowInput,
11
+ Interceptor,
12
+ WorkflowInboundInterceptor,
13
+ WorkflowInterceptorClassInput,
14
+ )
15
+
16
+ from application_sdk.activities.common.utils import build_output_path
17
+ from application_sdk.constants import CLEANUP_BASE_PATHS, TEMPORARY_PATH
18
+ from application_sdk.observability.logger_adaptor import get_logger
19
+
20
+ logger = get_logger(__name__)
21
+ activity.logger = logger
22
+ workflow.logger = logger
23
+
24
+
25
+ class CleanupResult(BaseModel):
26
+ """Result model for cleanup operations.
27
+
28
+ Attributes:
29
+ path_results (Dict[str, bool]): Cleanup results for each path (True=success, False=failure)
30
+ """
31
+
32
+ path_results: Dict[str, bool]
33
+
34
+
35
+ @activity.defn
36
+ async def cleanup() -> CleanupResult:
37
+ """Clean up temporary artifacts and activity state for the current workflow.
38
+
39
+ Performs two types of cleanup:
40
+ 1. File cleanup: Removes all contents from configured base paths or default workflow directory
41
+ 2. State cleanup: Clears activity state for the current workflow (includes resource cleanup)
42
+
43
+ Uses CLEANUP_BASE_PATHS constant or defaults to workflow-specific artifacts directory.
44
+
45
+ Returns:
46
+ CleanupResult: Structured cleanup results with path results and summary statistics.
47
+ """
48
+ path_results: Dict[str, bool] = {}
49
+ base_paths: List[str] = [os.path.join(TEMPORARY_PATH, build_output_path())]
50
+
51
+ # Use configured paths or default to workflow-specific artifacts directory
52
+ if CLEANUP_BASE_PATHS:
53
+ base_paths = CLEANUP_BASE_PATHS
54
+ logger.info(f"Using CLEANUP_BASE_PATHS: {base_paths} for cleanup")
55
+
56
+ logger.info(f"Cleaning up all contents from base paths: {base_paths}")
57
+
58
+ for base_path in base_paths:
59
+ try:
60
+ if os.path.exists(base_path):
61
+ if os.path.isdir(base_path):
62
+ # Remove entire directory and recreate it empty
63
+ shutil.rmtree(base_path)
64
+ logger.info(f"Cleaned up all contents from: {base_path}")
65
+ path_results[base_path] = True
66
+ else:
67
+ logger.warning(f"Path is not a directory: {base_path}")
68
+ path_results[base_path] = False
69
+ else:
70
+ logger.debug(f"Directory doesn't exist: {base_path}")
71
+ path_results[base_path] = True
72
+
73
+ except Exception as e:
74
+ logger.error(f"Unexpected error cleaning up {base_path}: {e}")
75
+ path_results[base_path] = False
76
+
77
+ return CleanupResult(
78
+ path_results=path_results,
79
+ )
80
+
81
+
82
+ class CleanupWorkflowInboundInterceptor(WorkflowInboundInterceptor):
83
+ """Interceptor for workflow-level app artifacts cleanup.
84
+
85
+ This interceptor cleans up the entire app directory structure when the workflow
86
+ completes or fails, following the pattern: base_path/appname/workflow_id/run_id
87
+ Supports multiple base paths for comprehensive cleanup.
88
+ """
89
+
90
+ async def execute_workflow(self, input: ExecuteWorkflowInput) -> Any:
91
+ """Execute a workflow with app artifacts cleanup.
92
+
93
+ Args:
94
+ input (ExecuteWorkflowInput): The workflow execution input
95
+
96
+ Returns:
97
+ Any: The result of the workflow execution
98
+
99
+ Raises:
100
+ Exception: Re-raises any exceptions from workflow execution
101
+ """
102
+ output = None
103
+ try:
104
+ output = await super().execute_workflow(input)
105
+ except Exception:
106
+ raise
107
+
108
+ finally:
109
+ # Always attempt cleanup regardless of workflow success/failure
110
+ try:
111
+ await workflow.execute_activity(
112
+ cleanup,
113
+ schedule_to_close_timeout=timedelta(minutes=5),
114
+ retry_policy=RetryPolicy(
115
+ maximum_attempts=3,
116
+ ),
117
+ summary="This activity is used to cleanup the local artifacts and the activity state after the workflow is completed.",
118
+ )
119
+
120
+ logger.info("Cleanup completed successfully")
121
+
122
+ except Exception as e:
123
+ logger.warning(f"Failed to cleanup artifacts: {e}")
124
+ # Don't re-raise - cleanup failures shouldn't fail the workflow
125
+
126
+ return output
127
+
128
+
129
+ class CleanupInterceptor(Interceptor):
130
+ """Temporal interceptor for automatic app artifacts cleanup.
131
+
132
+ This interceptor provides cleanup capabilities for application artifacts
133
+ across multiple base paths following the pattern: base_path/appname/workflow_id/run_id
134
+
135
+ Features:
136
+ - Automatic cleanup of app-specific artifact directories
137
+ - Cleanup on workflow completion or failure
138
+ - Supports multiple cleanup paths via ATLAN_CLEANUP_BASE_PATHS env var
139
+ - Simple activity-based cleanup logic
140
+ - Comprehensive error handling and logging
141
+
142
+ Example:
143
+ >>> # Register the interceptor with Temporal worker
144
+ >>> worker = Worker(
145
+ ... client,
146
+ ... task_queue="my-task-queue",
147
+ ... workflows=[MyWorkflow],
148
+ ... activities=[my_activity, cleanup],
149
+ ... interceptors=[CleanupInterceptor()]
150
+ ... )
151
+
152
+ Environment Configuration:
153
+ >>> # Single path (default)
154
+ >>> ATLAN_CLEANUP_BASE_PATHS="./local/tmp/artifacts/apps"
155
+
156
+ >>> # Multiple paths (comma-separated)
157
+ >>> ATLAN_CLEANUP_BASE_PATHS="./local/tmp/artifacts/apps,/storage/temp/apps,/shared/cleanup/apps"
158
+ """
159
+
160
+ def workflow_interceptor_class(
161
+ self, input: WorkflowInterceptorClassInput
162
+ ) -> Optional[Type[WorkflowInboundInterceptor]]:
163
+ """Get the workflow interceptor class for cleanup.
164
+
165
+ Args:
166
+ input (WorkflowInterceptorClassInput): The interceptor input
167
+
168
+ Returns:
169
+ Optional[Type[WorkflowInboundInterceptor]]: The workflow interceptor class
170
+ """
171
+ return CleanupWorkflowInboundInterceptor
@@ -23,6 +23,8 @@ from application_sdk.observability.logger_adaptor import get_logger
23
23
  from application_sdk.services.eventstore import EventStore
24
24
 
25
25
  logger = get_logger(__name__)
26
+ activity.logger = logger
27
+ workflow.logger = logger
26
28
 
27
29
  TEMPORAL_NOT_FOUND_FAILURE = (
28
30
  "type.googleapis.com/temporal.api.errordetails.v1.NotFoundFailure"
@@ -41,9 +43,9 @@ async def publish_event(event_data: dict) -> None:
41
43
  try:
42
44
  event = Event(**event_data)
43
45
  await EventStore.publish_event(event)
44
- activity.logger.info(f"Published event: {event_data.get('event_name','')}")
46
+ logger.info(f"Published event: {event_data.get('event_name','')}")
45
47
  except Exception as e:
46
- activity.logger.error(f"Failed to publish event: {e}")
48
+ logger.error(f"Failed to publish event: {e}")
47
49
  raise
48
50
 
49
51
 
@@ -123,7 +125,7 @@ class EventWorkflowInboundInterceptor(WorkflowInboundInterceptor):
123
125
  retry_policy=RetryPolicy(maximum_attempts=3),
124
126
  )
125
127
  except Exception as e:
126
- workflow.logger.warning(f"Failed to publish workflow start event: {e}")
128
+ logger.warning(f"Failed to publish workflow start event: {e}")
127
129
  # Don't fail the workflow if event publishing fails
128
130
 
129
131
  output = None
@@ -152,9 +154,7 @@ class EventWorkflowInboundInterceptor(WorkflowInboundInterceptor):
152
154
  retry_policy=RetryPolicy(maximum_attempts=3),
153
155
  )
154
156
  except Exception as publish_error:
155
- workflow.logger.warning(
156
- f"Failed to publish workflow end event: {publish_error}"
157
- )
157
+ logger.warning(f"Failed to publish workflow end event: {publish_error}")
158
158
 
159
159
  return output
160
160