kailash 0.6.0__py3-none-any.whl → 0.6.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,382 @@
1
+ """Circuit Breaker pattern implementation for connection management.
2
+
3
+ This module implements the Circuit Breaker pattern to prevent cascading failures
4
+ in connection pools and database operations. It provides automatic failure detection,
5
+ recovery testing, and graceful degradation.
6
+
7
+ The circuit breaker has three states:
8
+ - CLOSED: Normal operation, requests pass through
9
+ - OPEN: Failures detected, requests fail fast
10
+ - HALF_OPEN: Testing recovery, limited requests allowed
11
+
12
+ Example:
13
+ >>> breaker = ConnectionCircuitBreaker(
14
+ ... failure_threshold=5,
15
+ ... recovery_timeout=60,
16
+ ... half_open_requests=3
17
+ ... )
18
+ >>>
19
+ >>> # Wrap connection operations
20
+ >>> async with breaker.call() as protected:
21
+ ... result = await connection.execute(query)
22
+ """
23
+
24
+ import asyncio
25
+ import logging
26
+ import time
27
+ from collections import deque
28
+ from dataclasses import dataclass, field
29
+ from datetime import datetime, timedelta
30
+ from enum import Enum
31
+ from typing import Any, Callable, Dict, Generic, List, Optional, TypeVar
32
+
33
+ logger = logging.getLogger(__name__)
34
+
35
+ T = TypeVar("T")
36
+
37
+
38
+ class CircuitState(Enum):
39
+ """Circuit breaker states."""
40
+
41
+ CLOSED = "closed" # Normal operation
42
+ OPEN = "open" # Failing fast
43
+ HALF_OPEN = "half_open" # Testing recovery
44
+
45
+
46
+ class CircuitBreakerError(Exception):
47
+ """Raised when circuit breaker is open."""
48
+
49
+ pass
50
+
51
+
52
+ @dataclass
53
+ class CircuitBreakerConfig:
54
+ """Configuration for circuit breaker behavior."""
55
+
56
+ failure_threshold: int = 5 # Failures before opening
57
+ success_threshold: int = 3 # Successes to close from half-open
58
+ recovery_timeout: int = 60 # Seconds before trying half-open
59
+ half_open_requests: int = 3 # Requests allowed in half-open
60
+ error_rate_threshold: float = 0.5 # Error rate to trigger open
61
+ window_size: int = 100 # Rolling window for error rate
62
+ excluded_exceptions: List[type] = field(default_factory=list) # Don't count these
63
+
64
+
65
+ @dataclass
66
+ class CircuitBreakerMetrics:
67
+ """Metrics tracking for circuit breaker."""
68
+
69
+ total_calls: int = 0
70
+ successful_calls: int = 0
71
+ failed_calls: int = 0
72
+ rejected_calls: int = 0
73
+ state_transitions: List[Dict[str, Any]] = field(default_factory=list)
74
+ last_failure_time: Optional[float] = None
75
+ consecutive_failures: int = 0
76
+ consecutive_successes: int = 0
77
+
78
+ def record_success(self):
79
+ """Record successful call."""
80
+ self.total_calls += 1
81
+ self.successful_calls += 1
82
+ self.consecutive_successes += 1
83
+ self.consecutive_failures = 0
84
+
85
+ def record_failure(self):
86
+ """Record failed call."""
87
+ self.total_calls += 1
88
+ self.failed_calls += 1
89
+ self.consecutive_failures += 1
90
+ self.consecutive_successes = 0
91
+ self.last_failure_time = time.time()
92
+
93
+ def record_rejection(self):
94
+ """Record rejected call (circuit open)."""
95
+ self.rejected_calls += 1
96
+
97
+ def get_error_rate(self) -> float:
98
+ """Calculate current error rate."""
99
+ if self.total_calls == 0:
100
+ return 0.0
101
+ return self.failed_calls / self.total_calls
102
+
103
+
104
+ class ConnectionCircuitBreaker(Generic[T]):
105
+ """Circuit breaker for database connections and operations.
106
+
107
+ Monitors failures and prevents cascading failures by failing fast
108
+ when error threshold is reached. Automatically tests recovery
109
+ after timeout period.
110
+ """
111
+
112
+ def __init__(self, config: Optional[CircuitBreakerConfig] = None):
113
+ """Initialize circuit breaker with configuration."""
114
+ self.config = config or CircuitBreakerConfig()
115
+ self.state = CircuitState.CLOSED
116
+ self.metrics = CircuitBreakerMetrics()
117
+ self._lock = asyncio.Lock()
118
+ self._half_open_requests = 0
119
+ self._last_state_change = time.time()
120
+ self._rolling_window = deque(maxlen=self.config.window_size)
121
+ self._listeners: List[Callable] = []
122
+
123
+ async def call(self, func: Callable[..., T], *args, **kwargs) -> T:
124
+ """Execute function with circuit breaker protection.
125
+
126
+ Args:
127
+ func: Async function to protect
128
+ *args: Function arguments
129
+ **kwargs: Function keyword arguments
130
+
131
+ Returns:
132
+ Function result
133
+
134
+ Raises:
135
+ CircuitBreakerError: If circuit is open
136
+ Exception: If function fails
137
+ """
138
+ async with self._lock:
139
+ # Check if we should transition states
140
+ await self._check_state_transition()
141
+
142
+ if self.state == CircuitState.OPEN:
143
+ self.metrics.record_rejection()
144
+ raise CircuitBreakerError(
145
+ f"Circuit breaker is OPEN. "
146
+ f"Rejected after {self.metrics.consecutive_failures} failures. "
147
+ f"Will retry in {self._time_until_recovery():.1f}s"
148
+ )
149
+
150
+ if self.state == CircuitState.HALF_OPEN:
151
+ if self._half_open_requests >= self.config.half_open_requests:
152
+ self.metrics.record_rejection()
153
+ raise CircuitBreakerError(
154
+ "Circuit breaker is HALF_OPEN but request limit reached"
155
+ )
156
+ self._half_open_requests += 1
157
+
158
+ # Execute the function
159
+ start_time = time.time()
160
+ try:
161
+ result = await func(*args, **kwargs)
162
+ await self._record_success()
163
+ return result
164
+ except Exception as e:
165
+ # Check if this exception should be counted
166
+ if not any(
167
+ isinstance(e, exc_type) for exc_type in self.config.excluded_exceptions
168
+ ):
169
+ await self._record_failure(e)
170
+ raise
171
+
172
+ async def _check_state_transition(self):
173
+ """Check if state should transition based on metrics."""
174
+ current_time = time.time()
175
+
176
+ if self.state == CircuitState.CLOSED:
177
+ # Check if we should open
178
+ if self._should_open():
179
+ await self._transition_to(CircuitState.OPEN)
180
+
181
+ elif self.state == CircuitState.OPEN:
182
+ # Check if we should try recovery
183
+ time_since_open = current_time - self._last_state_change
184
+ if time_since_open >= self.config.recovery_timeout:
185
+ await self._transition_to(CircuitState.HALF_OPEN)
186
+ self._half_open_requests = 0
187
+
188
+ elif self.state == CircuitState.HALF_OPEN:
189
+ # This is handled after request execution
190
+ pass
191
+
192
+ def _should_open(self) -> bool:
193
+ """Determine if circuit should open based on failures."""
194
+ # Check consecutive failures
195
+ if self.metrics.consecutive_failures >= self.config.failure_threshold:
196
+ return True
197
+
198
+ # Check error rate in rolling window
199
+ if len(self._rolling_window) >= self.config.window_size / 2:
200
+ error_count = sum(1 for success in self._rolling_window if not success)
201
+ error_rate = error_count / len(self._rolling_window)
202
+ if error_rate >= self.config.error_rate_threshold:
203
+ return True
204
+
205
+ return False
206
+
207
+ async def _record_success(self):
208
+ """Record successful execution."""
209
+ async with self._lock:
210
+ self.metrics.record_success()
211
+ self._rolling_window.append(True)
212
+
213
+ if self.state == CircuitState.HALF_OPEN:
214
+ if self.metrics.consecutive_successes >= self.config.success_threshold:
215
+ await self._transition_to(CircuitState.CLOSED)
216
+
217
+ async def _record_failure(self, error: Exception):
218
+ """Record failed execution."""
219
+ async with self._lock:
220
+ self.metrics.record_failure()
221
+ self._rolling_window.append(False)
222
+
223
+ if self.state == CircuitState.HALF_OPEN:
224
+ # Single failure in half-open goes back to open
225
+ await self._transition_to(CircuitState.OPEN)
226
+ elif self.state == CircuitState.CLOSED:
227
+ # Check if we should open the circuit
228
+ if self._should_open():
229
+ await self._transition_to(CircuitState.OPEN)
230
+
231
+ logger.warning(
232
+ f"Circuit breaker recorded failure: {type(error).__name__}: {error}"
233
+ )
234
+
235
+ async def _transition_to(self, new_state: CircuitState):
236
+ """Transition to new state and notify listeners."""
237
+ old_state = self.state
238
+ self.state = new_state
239
+ self._last_state_change = time.time()
240
+
241
+ # Reset counters on state change
242
+ if new_state == CircuitState.CLOSED:
243
+ self.metrics.consecutive_failures = 0
244
+ elif new_state == CircuitState.OPEN:
245
+ self.metrics.consecutive_successes = 0
246
+
247
+ # Record transition
248
+ self.metrics.state_transitions.append(
249
+ {
250
+ "from": old_state.value,
251
+ "to": new_state.value,
252
+ "timestamp": datetime.now().isoformat(),
253
+ "reason": self._get_transition_reason(old_state, new_state),
254
+ }
255
+ )
256
+
257
+ logger.info(
258
+ f"Circuit breaker transitioned from {old_state.value} to {new_state.value}"
259
+ )
260
+
261
+ # Notify listeners
262
+ for listener in self._listeners:
263
+ try:
264
+ await listener(old_state, new_state, self.metrics)
265
+ except Exception as e:
266
+ logger.error(f"Error notifying circuit breaker listener: {e}")
267
+
268
+ def _get_transition_reason(
269
+ self, old_state: CircuitState, new_state: CircuitState
270
+ ) -> str:
271
+ """Get human-readable reason for state transition."""
272
+ if old_state == CircuitState.CLOSED and new_state == CircuitState.OPEN:
273
+ return f"Failure threshold reached ({self.metrics.consecutive_failures} failures)"
274
+ elif old_state == CircuitState.OPEN and new_state == CircuitState.HALF_OPEN:
275
+ return f"Recovery timeout elapsed ({self.config.recovery_timeout}s)"
276
+ elif old_state == CircuitState.HALF_OPEN and new_state == CircuitState.CLOSED:
277
+ return f"Success threshold reached ({self.metrics.consecutive_successes} successes)"
278
+ elif old_state == CircuitState.HALF_OPEN and new_state == CircuitState.OPEN:
279
+ return "Failure during recovery test"
280
+ return "Unknown reason"
281
+
282
+ def _time_until_recovery(self) -> float:
283
+ """Calculate seconds until recovery attempt."""
284
+ if self.state != CircuitState.OPEN:
285
+ return 0.0
286
+ elapsed = time.time() - self._last_state_change
287
+ remaining = self.config.recovery_timeout - elapsed
288
+ return max(0.0, remaining)
289
+
290
+ async def force_open(self, reason: str = "Manual override"):
291
+ """Manually open the circuit breaker."""
292
+ async with self._lock:
293
+ if self.state != CircuitState.OPEN:
294
+ logger.warning(f"Manually opening circuit breaker: {reason}")
295
+ await self._transition_to(CircuitState.OPEN)
296
+
297
+ async def force_close(self, reason: str = "Manual override"):
298
+ """Manually close the circuit breaker."""
299
+ async with self._lock:
300
+ if self.state != CircuitState.CLOSED:
301
+ logger.warning(f"Manually closing circuit breaker: {reason}")
302
+ self.metrics.consecutive_failures = 0
303
+ self.metrics.consecutive_successes = 0
304
+ await self._transition_to(CircuitState.CLOSED)
305
+
306
+ async def reset(self):
307
+ """Reset circuit breaker to initial state."""
308
+ async with self._lock:
309
+ self.state = CircuitState.CLOSED
310
+ self.metrics = CircuitBreakerMetrics()
311
+ self._rolling_window.clear()
312
+ self._half_open_requests = 0
313
+ self._last_state_change = time.time()
314
+ logger.info("Circuit breaker reset to initial state")
315
+
316
+ def add_listener(self, listener: Callable):
317
+ """Add state change listener."""
318
+ self._listeners.append(listener)
319
+
320
+ def remove_listener(self, listener: Callable):
321
+ """Remove state change listener."""
322
+ if listener in self._listeners:
323
+ self._listeners.remove(listener)
324
+
325
+ def get_status(self) -> Dict[str, Any]:
326
+ """Get current circuit breaker status."""
327
+ return {
328
+ "state": self.state.value,
329
+ "metrics": {
330
+ "total_calls": self.metrics.total_calls,
331
+ "successful_calls": self.metrics.successful_calls,
332
+ "failed_calls": self.metrics.failed_calls,
333
+ "rejected_calls": self.metrics.rejected_calls,
334
+ "error_rate": self.metrics.get_error_rate(),
335
+ "consecutive_failures": self.metrics.consecutive_failures,
336
+ "consecutive_successes": self.metrics.consecutive_successes,
337
+ },
338
+ "config": {
339
+ "failure_threshold": self.config.failure_threshold,
340
+ "success_threshold": self.config.success_threshold,
341
+ "recovery_timeout": self.config.recovery_timeout,
342
+ "error_rate_threshold": self.config.error_rate_threshold,
343
+ },
344
+ "time_until_recovery": (
345
+ self._time_until_recovery() if self.state == CircuitState.OPEN else None
346
+ ),
347
+ "state_transitions": self.metrics.state_transitions[
348
+ -5:
349
+ ], # Last 5 transitions
350
+ }
351
+
352
+
353
+ class CircuitBreakerManager:
354
+ """Manages multiple circuit breakers for different resources."""
355
+
356
+ def __init__(self):
357
+ """Initialize circuit breaker manager."""
358
+ self._breakers: Dict[str, ConnectionCircuitBreaker] = {}
359
+ self._default_config = CircuitBreakerConfig()
360
+
361
+ def get_or_create(
362
+ self, name: str, config: Optional[CircuitBreakerConfig] = None
363
+ ) -> ConnectionCircuitBreaker:
364
+ """Get existing or create new circuit breaker."""
365
+ if name not in self._breakers:
366
+ self._breakers[name] = ConnectionCircuitBreaker(
367
+ config or self._default_config
368
+ )
369
+ return self._breakers[name]
370
+
371
+ def get_all_status(self) -> Dict[str, Dict[str, Any]]:
372
+ """Get status of all circuit breakers."""
373
+ return {name: breaker.get_status() for name, breaker in self._breakers.items()}
374
+
375
+ async def reset_all(self):
376
+ """Reset all circuit breakers."""
377
+ for breaker in self._breakers.values():
378
+ await breaker.reset()
379
+
380
+ def set_default_config(self, config: CircuitBreakerConfig):
381
+ """Set default configuration for new breakers."""
382
+ self._default_config = config
@@ -72,7 +72,7 @@ class MiddlewareAccessControlManager:
72
72
  """Check if user can access a specific session."""
73
73
 
74
74
  # Use Kailash permission check node
75
- result = self.permission_check_node.process(
75
+ result = self.permission_check_node.execute(
76
76
  {
77
77
  "user_context": user_context,
78
78
  "resource_type": "session",
@@ -114,7 +114,7 @@ class MiddlewareAccessControlManager:
114
114
 
115
115
  # Audit logging using Kailash audit node
116
116
  if self.enable_audit and self.audit_node:
117
- self.audit_node.process(
117
+ self.audit_node.execute(
118
118
  {
119
119
  "event_type": "workflow_access_check",
120
120
  "user_id": user_context.user_id,
@@ -192,7 +192,7 @@ class MiddlewareAccessControlManager:
192
192
  ) -> Dict[str, Any]:
193
193
  """Assign role to user using Kailash role management node."""
194
194
 
195
- result = self.role_mgmt_node.process(
195
+ result = self.role_mgmt_node.execute(
196
196
  {
197
197
  "action": "assign_role",
198
198
  "user_id": user_id,
@@ -239,7 +239,7 @@ class MiddlewareAccessControlManager:
239
239
 
240
240
  # Audit the rule creation
241
241
  if self.enable_audit and self.audit_node:
242
- self.audit_node.process(
242
+ self.audit_node.execute(
243
243
  {
244
244
  "event_type": "permission_rule_created",
245
245
  "rule_data": rule_data,
@@ -366,7 +366,7 @@ class MiddlewareAuthenticationMiddleware:
366
366
  try:
367
367
  # This would typically validate JWT token
368
368
  # For now, simulating with credential manager
369
- cred_result = self.credential_manager.process(
369
+ cred_result = self.credential_manager.execute(
370
370
  {"action": "validate_token", "token": token}
371
371
  )
372
372
 
@@ -388,7 +388,7 @@ class MiddlewareAuthenticationMiddleware:
388
388
 
389
389
  except Exception as e:
390
390
  # Log security event using Kailash security event node
391
- self.access_manager.security_event_node.process(
391
+ self.access_manager.security_event_node.execute(
392
392
  {
393
393
  "event_type": "authentication_failure",
394
394
  "error": str(e),
@@ -371,7 +371,7 @@ EXPLANATION:
371
371
 
372
372
  try:
373
373
  result = await asyncio.to_thread(
374
- self.llm_node.process, messages=[{"role": "user", "content": prompt}]
374
+ self.llm_node.execute, messages=[{"role": "user", "content": prompt}]
375
375
  )
376
376
 
377
377
  # Extract content from response
@@ -847,10 +847,10 @@ What would you like to work on? Just describe what you want to accomplish and I'
847
847
  """Store chat message with embedding in vector database."""
848
848
  try:
849
849
  # Generate embedding
850
- embedding_result = await self.embedding_node.process({"text": content})
850
+ embedding_result = self.embedding_node.execute(text=content)
851
851
 
852
852
  # Store in database (simplified for now)
853
- await self.vector_db.process(
853
+ self.vector_db.execute(
854
854
  {
855
855
  "query": "INSERT INTO chat_messages (id, session_id, user_id, content, role, timestamp) VALUES (?, ?, ?, ?, ?, ?)",
856
856
  "parameters": [
@@ -875,10 +875,10 @@ What would you like to work on? Just describe what you want to accomplish and I'
875
875
  """Find similar past conversations using vector search."""
876
876
  try:
877
877
  # Generate query embedding
878
- query_embedding = await self.embedding_node.process({"text": query})
878
+ query_embedding = self.embedding_node.execute(text=query)
879
879
 
880
880
  # Search for similar messages (simplified for now)
881
- search_result = await self.vector_db.process(
881
+ search_result = self.vector_db.execute(
882
882
  {
883
883
  "query": "SELECT * FROM chat_messages WHERE role = 'user' ORDER BY timestamp DESC LIMIT ?",
884
884
  "parameters": [limit * 2],
@@ -930,7 +930,7 @@ What would you like to work on? Just describe what you want to accomplish and I'
930
930
 
931
931
  try:
932
932
  # Generate query embedding
933
- query_embedding = await self.embedding_node.process({"text": query})
933
+ query_embedding = self.embedding_node.execute(text=query)
934
934
 
935
935
  # Prepare filters
936
936
  filters = {}
@@ -948,7 +948,7 @@ What would you like to work on? Just describe what you want to accomplish and I'
948
948
  query_parts.append("ORDER BY timestamp DESC LIMIT ?")
949
949
  params.append(limit)
950
950
 
951
- search_result = await self.vector_db.process(
951
+ search_result = self.vector_db.execute(
952
952
  {"query": " ".join(query_parts), "parameters": params}
953
953
  )
954
954
 
@@ -225,10 +225,8 @@ class APIGateway:
225
225
  # Data transformer for request/response formatting
226
226
  self.data_transformer = DataTransformer(
227
227
  name="gateway_transformer",
228
- transformations=[
229
- {"type": "validate", "schema": "api_response"},
230
- {"type": "add_field", "field": "timestamp", "value": "now()"},
231
- ],
228
+ # Transformations will be provided at runtime
229
+ transformations=[],
232
230
  )
233
231
 
234
232
  # Credential manager for gateway security
@@ -362,17 +360,9 @@ class APIGateway:
362
360
  "active": session.active,
363
361
  }
364
362
 
365
- transformed = await self.data_transformer.process(
366
- {
367
- "data": response_data,
368
- "transformations": [
369
- {
370
- "type": "add_field",
371
- "field": "api_version",
372
- "value": self.version,
373
- }
374
- ],
375
- }
363
+ transformed = self.data_transformer.execute(
364
+ data=response_data,
365
+ transformations=[f"{{**data, 'api_version': '{self.version}'}}"],
376
366
  )
377
367
 
378
368
  return SessionResponse(**transformed["result"])
@@ -13,7 +13,7 @@ import logging
13
13
  import time
14
14
  import uuid
15
15
  from dataclasses import dataclass, field
16
- from datetime import datetime
16
+ from datetime import UTC, datetime
17
17
  from enum import Enum
18
18
  from typing import Any, AsyncIterator, Callable, Dict, List, Optional
19
19
 
@@ -55,7 +55,7 @@ class RequestEvent:
55
55
  event_id: str = field(default_factory=lambda: f"evt_{uuid.uuid4().hex[:12]}")
56
56
  event_type: EventType = EventType.REQUEST_CREATED
57
57
  request_id: str = ""
58
- timestamp: datetime = field(default_factory=datetime.utcnow)
58
+ timestamp: datetime = field(default_factory=lambda: datetime.now(UTC))
59
59
  sequence_number: int = 0
60
60
  data: Dict[str, Any] = field(default_factory=dict)
61
61
  metadata: Dict[str, Any] = field(default_factory=dict)
@@ -103,6 +103,7 @@ class EventStore:
103
103
  # In-memory buffer
104
104
  self._buffer: List[RequestEvent] = []
105
105
  self._buffer_lock = asyncio.Lock()
106
+ self._flush_in_progress = False
106
107
 
107
108
  # Event stream
108
109
  self._event_stream: List[RequestEvent] = []
@@ -120,7 +121,16 @@ class EventStore:
120
121
  self.flush_count = 0
121
122
 
122
123
  # Start flush task
123
- self._flush_task = asyncio.create_task(self._flush_loop())
124
+ try:
125
+ self._flush_task = asyncio.create_task(self._flush_loop())
126
+ except RuntimeError:
127
+ # If no event loop is running, defer task creation
128
+ self._flush_task = None
129
+
130
+ async def _ensure_flush_task(self):
131
+ """Ensure the flush task is running."""
132
+ if self._flush_task is None:
133
+ self._flush_task = asyncio.create_task(self._flush_loop())
124
134
 
125
135
  async def append(
126
136
  self,
@@ -130,6 +140,9 @@ class EventStore:
130
140
  metadata: Optional[Dict[str, Any]] = None,
131
141
  ) -> RequestEvent:
132
142
  """Append an event to the store."""
143
+ # Ensure flush task is running
144
+ await self._ensure_flush_task()
145
+
133
146
  async with self._buffer_lock:
134
147
  # Get next sequence number
135
148
  sequence = self._sequences.get(request_id, 0)
@@ -148,19 +161,27 @@ class EventStore:
148
161
  self._buffer.append(event)
149
162
  self.event_count += 1
150
163
 
151
- # Flush if buffer is full
152
- if len(self._buffer) >= self.batch_size:
153
- await self._flush_buffer()
164
+ # Check if we need to flush (but don't flush inside the lock)
165
+ needs_flush = len(self._buffer) >= self.batch_size
154
166
 
155
- # Apply projections
156
- await self._apply_projections(event)
167
+ # Apply projections outside the lock
168
+ await self._apply_projections(event)
157
169
 
158
- logger.debug(
159
- f"Appended event {event.event_type.value} for request {request_id} "
160
- f"(seq: {sequence})"
161
- )
170
+ # Flush if needed (outside the lock to avoid deadlock)
171
+ if needs_flush and not self._flush_in_progress:
172
+ # Set flag to prevent concurrent flushes
173
+ self._flush_in_progress = True
174
+ try:
175
+ await self._flush_buffer()
176
+ finally:
177
+ self._flush_in_progress = False
162
178
 
163
- return event
179
+ logger.debug(
180
+ f"Appended event {event.event_type.value} for request {request_id} "
181
+ f"(seq: {sequence})"
182
+ )
183
+
184
+ return event
164
185
 
165
186
  async def get_events(
166
187
  self,
@@ -233,6 +254,9 @@ class EventStore:
233
254
  follow: bool = False,
234
255
  ) -> AsyncIterator[RequestEvent]:
235
256
  """Stream events as they occur."""
257
+ # Ensure buffer is flushed before streaming
258
+ await self._flush_buffer()
259
+
236
260
  last_index = 0
237
261
 
238
262
  while True:
@@ -294,12 +318,19 @@ class EventStore:
294
318
 
295
319
  async def _flush_buffer(self) -> None:
296
320
  """Flush event buffer to storage."""
297
- async with self._buffer_lock:
298
- if not self._buffer:
299
- return
300
-
301
- events_to_flush = self._buffer.copy()
302
- self._buffer.clear()
321
+ # Acquire lock with timeout to prevent deadlock
322
+ try:
323
+ # Use wait_for to add timeout on lock acquisition
324
+ async with asyncio.timeout(1.0): # 1 second timeout
325
+ async with self._buffer_lock:
326
+ if not self._buffer:
327
+ return
328
+
329
+ events_to_flush = self._buffer.copy()
330
+ self._buffer.clear()
331
+ except asyncio.TimeoutError:
332
+ logger.warning("Timeout acquiring buffer lock during flush")
333
+ return
303
334
 
304
335
  # Add to in-memory stream
305
336
  async with self._stream_lock:
@@ -317,10 +348,16 @@ class EventStore:
317
348
  while True:
318
349
  try:
319
350
  await asyncio.sleep(self.flush_interval)
320
- await self._flush_buffer()
351
+ if not self._flush_in_progress:
352
+ self._flush_in_progress = True
353
+ try:
354
+ await self._flush_buffer()
355
+ finally:
356
+ self._flush_in_progress = False
321
357
  except asyncio.CancelledError:
322
358
  # Final flush before shutdown
323
- await self._flush_buffer()
359
+ if not self._flush_in_progress:
360
+ await self._flush_buffer()
324
361
  break
325
362
  except Exception as e:
326
363
  logger.error(f"Flush error: {e}")
@@ -388,11 +425,14 @@ class EventStore:
388
425
 
389
426
  async def close(self) -> None:
390
427
  """Close event store and flush remaining events."""
391
- self._flush_task.cancel()
392
- try:
393
- await self._flush_task
394
- except asyncio.CancelledError:
395
- pass
428
+ if self._flush_task is not None:
429
+ self._flush_task.cancel()
430
+ try:
431
+ await self._flush_task
432
+ except asyncio.CancelledError:
433
+ pass
434
+ # Final flush
435
+ await self._flush_buffer()
396
436
 
397
437
 
398
438
  # Example projection handlers