ccproxy-api 0.1.4__py3-none-any.whl → 0.1.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ccproxy/_version.py +2 -2
- ccproxy/adapters/openai/adapter.py +1 -1
- ccproxy/adapters/openai/streaming.py +1 -0
- ccproxy/api/app.py +134 -224
- ccproxy/api/dependencies.py +22 -2
- ccproxy/api/middleware/errors.py +27 -3
- ccproxy/api/middleware/logging.py +4 -0
- ccproxy/api/responses.py +6 -1
- ccproxy/api/routes/claude.py +222 -17
- ccproxy/api/routes/proxy.py +25 -6
- ccproxy/api/services/permission_service.py +2 -2
- ccproxy/claude_sdk/__init__.py +4 -8
- ccproxy/claude_sdk/client.py +661 -131
- ccproxy/claude_sdk/exceptions.py +16 -0
- ccproxy/claude_sdk/manager.py +219 -0
- ccproxy/claude_sdk/message_queue.py +342 -0
- ccproxy/claude_sdk/options.py +5 -0
- ccproxy/claude_sdk/session_client.py +546 -0
- ccproxy/claude_sdk/session_pool.py +550 -0
- ccproxy/claude_sdk/stream_handle.py +538 -0
- ccproxy/claude_sdk/stream_worker.py +392 -0
- ccproxy/claude_sdk/streaming.py +53 -11
- ccproxy/cli/commands/serve.py +96 -0
- ccproxy/cli/options/claude_options.py +47 -0
- ccproxy/config/__init__.py +0 -3
- ccproxy/config/claude.py +171 -23
- ccproxy/config/discovery.py +10 -1
- ccproxy/config/scheduler.py +4 -4
- ccproxy/config/settings.py +19 -1
- ccproxy/core/http_transformers.py +305 -73
- ccproxy/core/logging.py +108 -12
- ccproxy/core/transformers.py +5 -0
- ccproxy/models/claude_sdk.py +57 -0
- ccproxy/models/detection.py +126 -0
- ccproxy/observability/access_logger.py +72 -14
- ccproxy/observability/metrics.py +151 -0
- ccproxy/observability/storage/duckdb_simple.py +12 -0
- ccproxy/observability/storage/models.py +16 -0
- ccproxy/observability/streaming_response.py +107 -0
- ccproxy/scheduler/manager.py +31 -6
- ccproxy/scheduler/tasks.py +122 -0
- ccproxy/services/claude_detection_service.py +269 -0
- ccproxy/services/claude_sdk_service.py +333 -130
- ccproxy/services/proxy_service.py +91 -200
- ccproxy/utils/__init__.py +9 -1
- ccproxy/utils/disconnection_monitor.py +83 -0
- ccproxy/utils/id_generator.py +12 -0
- ccproxy/utils/startup_helpers.py +408 -0
- {ccproxy_api-0.1.4.dist-info → ccproxy_api-0.1.5.dist-info}/METADATA +29 -2
- {ccproxy_api-0.1.4.dist-info → ccproxy_api-0.1.5.dist-info}/RECORD +53 -41
- ccproxy/config/loader.py +0 -105
- {ccproxy_api-0.1.4.dist-info → ccproxy_api-0.1.5.dist-info}/WHEEL +0 -0
- {ccproxy_api-0.1.4.dist-info → ccproxy_api-0.1.5.dist-info}/entry_points.txt +0 -0
- {ccproxy_api-0.1.4.dist-info → ccproxy_api-0.1.5.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,546 @@
|
|
|
1
|
+
"""Session client for persistent Claude SDK connections."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import asyncio
|
|
6
|
+
import time
|
|
7
|
+
from enum import Enum
|
|
8
|
+
from typing import Any
|
|
9
|
+
|
|
10
|
+
import structlog
|
|
11
|
+
from claude_code_sdk import ClaudeCodeOptions
|
|
12
|
+
from pydantic import BaseModel
|
|
13
|
+
|
|
14
|
+
from ccproxy.core.async_utils import patched_typing
|
|
15
|
+
from ccproxy.utils.id_generator import generate_client_id
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
with patched_typing():
|
|
19
|
+
from claude_code_sdk import ClaudeSDKClient as ImportedClaudeSDKClient
|
|
20
|
+
|
|
21
|
+
logger = structlog.get_logger(__name__)
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class SessionStatus(str, Enum):
|
|
25
|
+
"""Session lifecycle status."""
|
|
26
|
+
|
|
27
|
+
ACTIVE = "active"
|
|
28
|
+
IDLE = "idle"
|
|
29
|
+
CONNECTING = "connecting"
|
|
30
|
+
INTERRUPTING = "interrupting"
|
|
31
|
+
DISCONNECTED = "disconnected"
|
|
32
|
+
ERROR = "error"
|
|
33
|
+
EXPIRED = "expired"
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
class SessionMetrics(BaseModel):
|
|
37
|
+
"""Session performance metrics."""
|
|
38
|
+
|
|
39
|
+
created_at: float
|
|
40
|
+
last_used: float
|
|
41
|
+
message_count: int = 0
|
|
42
|
+
error_count: int = 0
|
|
43
|
+
total_duration_ms: float = 0.0
|
|
44
|
+
|
|
45
|
+
@property
|
|
46
|
+
def age_seconds(self) -> float:
|
|
47
|
+
return time.time() - self.created_at
|
|
48
|
+
|
|
49
|
+
@property
|
|
50
|
+
def idle_seconds(self) -> float:
|
|
51
|
+
return time.time() - self.last_used
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
class SessionClient:
|
|
55
|
+
"""Manages a persistent Claude SDK connection with session state."""
|
|
56
|
+
|
|
57
|
+
def __init__(
|
|
58
|
+
self,
|
|
59
|
+
session_id: str,
|
|
60
|
+
options: ClaudeCodeOptions,
|
|
61
|
+
client_id: str | None = None,
|
|
62
|
+
ttl_seconds: int = 3600,
|
|
63
|
+
):
|
|
64
|
+
self.session_id = session_id
|
|
65
|
+
self.client_id = client_id or generate_client_id()
|
|
66
|
+
self.options = options
|
|
67
|
+
self.ttl_seconds = ttl_seconds
|
|
68
|
+
|
|
69
|
+
# SDK client and connection state
|
|
70
|
+
self.claude_client: ImportedClaudeSDKClient | None = None
|
|
71
|
+
self.sdk_session_id: str | None = None
|
|
72
|
+
|
|
73
|
+
# Session management
|
|
74
|
+
self.status = SessionStatus.IDLE
|
|
75
|
+
self.lock = asyncio.Lock() # Prevent concurrent access
|
|
76
|
+
self.metrics = SessionMetrics(created_at=time.time(), last_used=time.time())
|
|
77
|
+
|
|
78
|
+
# Error handling
|
|
79
|
+
self.last_error: Exception | None = None
|
|
80
|
+
self.connection_attempts = 0
|
|
81
|
+
self.max_connection_attempts = 3
|
|
82
|
+
|
|
83
|
+
# Background connection task
|
|
84
|
+
self._connection_task: asyncio.Task[bool] | None = None
|
|
85
|
+
|
|
86
|
+
# Active stream tracking
|
|
87
|
+
self.active_stream_task: asyncio.Task[None] | None = None
|
|
88
|
+
self.has_active_stream: bool = False
|
|
89
|
+
self.active_stream_handle: Any = (
|
|
90
|
+
None # StreamHandle when using queue-based approach
|
|
91
|
+
)
|
|
92
|
+
|
|
93
|
+
# Interrupt synchronization
|
|
94
|
+
self._interrupt_complete_event = asyncio.Event()
|
|
95
|
+
self._interrupt_complete_event.set() # Initially set (not interrupting)
|
|
96
|
+
|
|
97
|
+
# Session reuse tracking
|
|
98
|
+
self.is_newly_created = True # Flag to track if this is a new session
|
|
99
|
+
|
|
100
|
+
async def connect(self) -> bool:
|
|
101
|
+
"""Establish connection to Claude SDK."""
|
|
102
|
+
async with self.lock:
|
|
103
|
+
if self.status == SessionStatus.ACTIVE and self.claude_client:
|
|
104
|
+
return True
|
|
105
|
+
|
|
106
|
+
try:
|
|
107
|
+
self.status = SessionStatus.CONNECTING
|
|
108
|
+
self.connection_attempts += 1
|
|
109
|
+
|
|
110
|
+
logger.debug(
|
|
111
|
+
"session_connecting",
|
|
112
|
+
session_id=self.session_id,
|
|
113
|
+
client_id=self.client_id,
|
|
114
|
+
attempt=self.connection_attempts,
|
|
115
|
+
)
|
|
116
|
+
|
|
117
|
+
self.claude_client = ImportedClaudeSDKClient(self.options)
|
|
118
|
+
await self.claude_client.connect()
|
|
119
|
+
|
|
120
|
+
self.status = SessionStatus.ACTIVE
|
|
121
|
+
self.last_error = None
|
|
122
|
+
|
|
123
|
+
logger.debug(
|
|
124
|
+
"session_connected",
|
|
125
|
+
session_id=self.session_id,
|
|
126
|
+
client_id=self.client_id,
|
|
127
|
+
attempt=self.connection_attempts,
|
|
128
|
+
)
|
|
129
|
+
|
|
130
|
+
return True
|
|
131
|
+
|
|
132
|
+
except Exception as e:
|
|
133
|
+
self.status = SessionStatus.ERROR
|
|
134
|
+
self.last_error = e
|
|
135
|
+
self.metrics.error_count += 1
|
|
136
|
+
|
|
137
|
+
logger.error(
|
|
138
|
+
"session_connection_failed",
|
|
139
|
+
session_id=self.session_id,
|
|
140
|
+
attempt=self.connection_attempts,
|
|
141
|
+
error=str(e),
|
|
142
|
+
exc_info=True,
|
|
143
|
+
)
|
|
144
|
+
|
|
145
|
+
if self.connection_attempts >= self.max_connection_attempts:
|
|
146
|
+
logger.error(
|
|
147
|
+
"session_connection_exhausted",
|
|
148
|
+
session_id=self.session_id,
|
|
149
|
+
max_attempts=self.max_connection_attempts,
|
|
150
|
+
)
|
|
151
|
+
|
|
152
|
+
return False
|
|
153
|
+
|
|
154
|
+
def connect_background(self) -> asyncio.Task[bool]:
|
|
155
|
+
"""Start connection in background without blocking.
|
|
156
|
+
|
|
157
|
+
Returns:
|
|
158
|
+
Task that completes when connection is established
|
|
159
|
+
"""
|
|
160
|
+
if self._connection_task is None or self._connection_task.done():
|
|
161
|
+
self._connection_task = asyncio.create_task(self._connect_async())
|
|
162
|
+
logger.debug(
|
|
163
|
+
"session_background_connection_started",
|
|
164
|
+
session_id=self.session_id,
|
|
165
|
+
)
|
|
166
|
+
return self._connection_task
|
|
167
|
+
|
|
168
|
+
async def _connect_async(self) -> bool:
|
|
169
|
+
"""Internal async connection method for background task."""
|
|
170
|
+
try:
|
|
171
|
+
return await self.connect()
|
|
172
|
+
except Exception as e:
|
|
173
|
+
logger.error(
|
|
174
|
+
"session_background_connection_failed",
|
|
175
|
+
session_id=self.session_id,
|
|
176
|
+
error=str(e),
|
|
177
|
+
)
|
|
178
|
+
return False
|
|
179
|
+
|
|
180
|
+
async def ensure_connected(self) -> bool:
|
|
181
|
+
"""Ensure connection is established, waiting for background task if needed."""
|
|
182
|
+
if self._connection_task and not self._connection_task.done():
|
|
183
|
+
# Wait for background connection to complete
|
|
184
|
+
return await self._connection_task
|
|
185
|
+
return await self.connect()
|
|
186
|
+
|
|
187
|
+
async def disconnect(self) -> None:
|
|
188
|
+
"""Gracefully disconnect from Claude SDK."""
|
|
189
|
+
async with self.lock:
|
|
190
|
+
if self.claude_client:
|
|
191
|
+
try:
|
|
192
|
+
await self.claude_client.disconnect()
|
|
193
|
+
logger.debug("session_disconnected", session_id=self.session_id)
|
|
194
|
+
except Exception as e:
|
|
195
|
+
logger.warning(
|
|
196
|
+
"session_disconnect_error",
|
|
197
|
+
session_id=self.session_id,
|
|
198
|
+
error=str(e),
|
|
199
|
+
)
|
|
200
|
+
finally:
|
|
201
|
+
self.claude_client = None
|
|
202
|
+
self.status = SessionStatus.DISCONNECTED
|
|
203
|
+
|
|
204
|
+
async def interrupt(self) -> None:
|
|
205
|
+
"""Interrupt any ongoing operations with timeout and force disconnect fallback."""
|
|
206
|
+
if not self.claude_client:
|
|
207
|
+
logger.debug(
|
|
208
|
+
"session_interrupt_no_client",
|
|
209
|
+
session_id=self.session_id,
|
|
210
|
+
)
|
|
211
|
+
return
|
|
212
|
+
|
|
213
|
+
# Check if already interrupting to prevent duplicate interrupt calls
|
|
214
|
+
if self.status == SessionStatus.INTERRUPTING:
|
|
215
|
+
logger.debug(
|
|
216
|
+
"session_interrupt_already_in_progress",
|
|
217
|
+
session_id=self.session_id,
|
|
218
|
+
message="Interrupt already in progress, skipping duplicate call",
|
|
219
|
+
)
|
|
220
|
+
return
|
|
221
|
+
|
|
222
|
+
# Set status to INTERRUPTING to prevent reuse during interrupt
|
|
223
|
+
self.status = SessionStatus.INTERRUPTING
|
|
224
|
+
|
|
225
|
+
# Clear the interrupt completion event to signal that interrupt is starting
|
|
226
|
+
self._interrupt_complete_event.clear()
|
|
227
|
+
|
|
228
|
+
logger.debug(
|
|
229
|
+
"session_interrupting",
|
|
230
|
+
session_id=self.session_id,
|
|
231
|
+
status=self.status.value,
|
|
232
|
+
)
|
|
233
|
+
|
|
234
|
+
# Set up a hard timeout for the entire interrupt operation
|
|
235
|
+
start_time = asyncio.get_event_loop().time()
|
|
236
|
+
max_interrupt_time = 15.0 # Maximum 15 seconds for entire interrupt
|
|
237
|
+
|
|
238
|
+
try:
|
|
239
|
+
# First, interrupt the stream handle if available
|
|
240
|
+
if self.active_stream_handle:
|
|
241
|
+
logger.debug(
|
|
242
|
+
"session_interrupt_via_stream_handle",
|
|
243
|
+
session_id=self.session_id,
|
|
244
|
+
handle_id=self.active_stream_handle.handle_id,
|
|
245
|
+
message="Interrupting via stream handle first",
|
|
246
|
+
)
|
|
247
|
+
|
|
248
|
+
try:
|
|
249
|
+
# Interrupt the stream handle - this stops the worker
|
|
250
|
+
interrupted = await self.active_stream_handle.interrupt()
|
|
251
|
+
if interrupted:
|
|
252
|
+
logger.debug(
|
|
253
|
+
"session_stream_handle_interrupted",
|
|
254
|
+
session_id=self.session_id,
|
|
255
|
+
handle_id=self.active_stream_handle.handle_id,
|
|
256
|
+
)
|
|
257
|
+
# Clear the handle reference
|
|
258
|
+
self.active_stream_handle = None
|
|
259
|
+
except Exception as e:
|
|
260
|
+
logger.warning(
|
|
261
|
+
"session_stream_handle_interrupt_error",
|
|
262
|
+
session_id=self.session_id,
|
|
263
|
+
error=str(e),
|
|
264
|
+
message="Failed to interrupt stream handle, continuing with SDK interrupt",
|
|
265
|
+
)
|
|
266
|
+
|
|
267
|
+
# Now call SDK interrupt - should complete quickly since worker is stopped
|
|
268
|
+
logger.debug(
|
|
269
|
+
"session_interrupt_calling_sdk",
|
|
270
|
+
session_id=self.session_id,
|
|
271
|
+
message="Calling SDK interrupt method",
|
|
272
|
+
)
|
|
273
|
+
|
|
274
|
+
try:
|
|
275
|
+
# Call interrupt directly with timeout - avoid creating separate tasks
|
|
276
|
+
await asyncio.wait_for(self.claude_client.interrupt(), timeout=30.0)
|
|
277
|
+
logger.debug(
|
|
278
|
+
"session_interrupted_gracefully", session_id=self.session_id
|
|
279
|
+
)
|
|
280
|
+
# Reset status after successful interrupt
|
|
281
|
+
self.status = SessionStatus.DISCONNECTED
|
|
282
|
+
|
|
283
|
+
except TimeoutError:
|
|
284
|
+
# Interrupt timed out
|
|
285
|
+
logger.warning(
|
|
286
|
+
"session_interrupt_sdk_timeout",
|
|
287
|
+
session_id=self.session_id,
|
|
288
|
+
message="SDK interrupt timed out after 30 seconds",
|
|
289
|
+
)
|
|
290
|
+
raise TimeoutError("Interrupt timed out") from None
|
|
291
|
+
|
|
292
|
+
except TimeoutError:
|
|
293
|
+
logger.warning(
|
|
294
|
+
"session_interrupt_timeout",
|
|
295
|
+
session_id=self.session_id,
|
|
296
|
+
message="Graceful interrupt timed out, forcing disconnect",
|
|
297
|
+
)
|
|
298
|
+
|
|
299
|
+
# Force disconnect if interrupt hangs
|
|
300
|
+
await self._force_disconnect()
|
|
301
|
+
|
|
302
|
+
except Exception as e:
|
|
303
|
+
logger.warning(
|
|
304
|
+
"session_interrupt_error",
|
|
305
|
+
session_id=self.session_id,
|
|
306
|
+
error=str(e),
|
|
307
|
+
error_type=type(e).__name__,
|
|
308
|
+
)
|
|
309
|
+
|
|
310
|
+
# If interrupt fails, try force disconnect as fallback
|
|
311
|
+
try:
|
|
312
|
+
logger.debug(
|
|
313
|
+
"session_interrupt_fallback_disconnect",
|
|
314
|
+
session_id=self.session_id,
|
|
315
|
+
)
|
|
316
|
+
await self._force_disconnect()
|
|
317
|
+
except Exception as disconnect_error:
|
|
318
|
+
logger.error(
|
|
319
|
+
"session_force_disconnect_failed",
|
|
320
|
+
session_id=self.session_id,
|
|
321
|
+
error=str(disconnect_error),
|
|
322
|
+
error_type=type(disconnect_error).__name__,
|
|
323
|
+
)
|
|
324
|
+
finally:
|
|
325
|
+
# Final safety check - ensure we don't hang forever
|
|
326
|
+
total_elapsed = asyncio.get_event_loop().time() - start_time
|
|
327
|
+
if total_elapsed > max_interrupt_time:
|
|
328
|
+
logger.error(
|
|
329
|
+
"session_interrupt_max_time_exceeded",
|
|
330
|
+
session_id=self.session_id,
|
|
331
|
+
elapsed_seconds=total_elapsed,
|
|
332
|
+
max_seconds=max_interrupt_time,
|
|
333
|
+
message="Interrupt operation exceeded maximum time limit",
|
|
334
|
+
)
|
|
335
|
+
|
|
336
|
+
# Always reset status from INTERRUPTING
|
|
337
|
+
if self.status == SessionStatus.INTERRUPTING:
|
|
338
|
+
# Force mark as disconnected
|
|
339
|
+
self.status = SessionStatus.DISCONNECTED
|
|
340
|
+
self.claude_client = None
|
|
341
|
+
|
|
342
|
+
# Mark stream as no longer active
|
|
343
|
+
self.has_active_stream = False
|
|
344
|
+
|
|
345
|
+
# Signal that interrupt has completed (success or failure)
|
|
346
|
+
self._interrupt_complete_event.set()
|
|
347
|
+
|
|
348
|
+
async def _force_disconnect(self) -> None:
|
|
349
|
+
"""Force disconnect the session when interrupt fails or times out."""
|
|
350
|
+
logger.warning(
|
|
351
|
+
"session_force_disconnecting",
|
|
352
|
+
session_id=self.session_id,
|
|
353
|
+
message="Force disconnecting stuck session",
|
|
354
|
+
)
|
|
355
|
+
|
|
356
|
+
# Try to drain any active stream first with timeout
|
|
357
|
+
try:
|
|
358
|
+
await asyncio.wait_for(
|
|
359
|
+
self.drain_active_stream(),
|
|
360
|
+
timeout=5.0, # 5 second timeout for draining in force disconnect
|
|
361
|
+
)
|
|
362
|
+
except TimeoutError:
|
|
363
|
+
logger.warning(
|
|
364
|
+
"session_force_drain_timeout",
|
|
365
|
+
session_id=self.session_id,
|
|
366
|
+
message="Force disconnect stream draining timed out after 5 seconds",
|
|
367
|
+
)
|
|
368
|
+
|
|
369
|
+
try:
|
|
370
|
+
if self.claude_client:
|
|
371
|
+
# Try to disconnect with timeout
|
|
372
|
+
await asyncio.wait_for(
|
|
373
|
+
self.claude_client.disconnect(),
|
|
374
|
+
timeout=3.0, # 3 second timeout for disconnect
|
|
375
|
+
)
|
|
376
|
+
except Exception as e:
|
|
377
|
+
logger.warning(
|
|
378
|
+
"session_force_disconnect_error",
|
|
379
|
+
session_id=self.session_id,
|
|
380
|
+
error=str(e),
|
|
381
|
+
)
|
|
382
|
+
finally:
|
|
383
|
+
# Always clean up the client reference and mark as disconnected
|
|
384
|
+
self.claude_client = None
|
|
385
|
+
self.status = SessionStatus.DISCONNECTED
|
|
386
|
+
self.last_error = Exception(
|
|
387
|
+
"Session force disconnected due to hanging operation"
|
|
388
|
+
)
|
|
389
|
+
|
|
390
|
+
logger.warning(
|
|
391
|
+
"session_force_disconnected",
|
|
392
|
+
session_id=self.session_id,
|
|
393
|
+
message="Session forcibly disconnected and marked for cleanup",
|
|
394
|
+
)
|
|
395
|
+
|
|
396
|
+
async def drain_active_stream(self) -> None:
|
|
397
|
+
"""Drain any active stream to prevent stale messages on reconnection."""
|
|
398
|
+
if not self.has_active_stream:
|
|
399
|
+
logger.debug(
|
|
400
|
+
"session_no_active_stream_to_drain",
|
|
401
|
+
session_id=self.session_id,
|
|
402
|
+
)
|
|
403
|
+
return
|
|
404
|
+
|
|
405
|
+
logger.debug(
|
|
406
|
+
"session_draining_active_stream",
|
|
407
|
+
session_id=self.session_id,
|
|
408
|
+
message="Draining active stream after client disconnection",
|
|
409
|
+
)
|
|
410
|
+
|
|
411
|
+
# With queue-based architecture, we use the stream handle
|
|
412
|
+
if self.active_stream_handle:
|
|
413
|
+
logger.debug(
|
|
414
|
+
"session_draining_via_handle",
|
|
415
|
+
session_id=self.session_id,
|
|
416
|
+
handle_id=self.active_stream_handle.handle_id,
|
|
417
|
+
message="Using stream handle to drain messages",
|
|
418
|
+
)
|
|
419
|
+
|
|
420
|
+
try:
|
|
421
|
+
# Wait for the worker to complete
|
|
422
|
+
completed = await self.active_stream_handle.wait_for_completion(
|
|
423
|
+
timeout=30.0
|
|
424
|
+
)
|
|
425
|
+
if completed:
|
|
426
|
+
logger.debug(
|
|
427
|
+
"session_stream_drained_via_handle",
|
|
428
|
+
session_id=self.session_id,
|
|
429
|
+
handle_id=self.active_stream_handle.handle_id,
|
|
430
|
+
)
|
|
431
|
+
else:
|
|
432
|
+
logger.warning(
|
|
433
|
+
"session_stream_drain_timeout_via_handle",
|
|
434
|
+
session_id=self.session_id,
|
|
435
|
+
handle_id=self.active_stream_handle.handle_id,
|
|
436
|
+
message="Stream drain timed out after 30 seconds",
|
|
437
|
+
)
|
|
438
|
+
except Exception as e:
|
|
439
|
+
logger.error(
|
|
440
|
+
"session_stream_drain_error_via_handle",
|
|
441
|
+
session_id=self.session_id,
|
|
442
|
+
handle_id=self.active_stream_handle.handle_id,
|
|
443
|
+
error=str(e),
|
|
444
|
+
error_type=type(e).__name__,
|
|
445
|
+
)
|
|
446
|
+
finally:
|
|
447
|
+
self.active_stream_handle = None
|
|
448
|
+
self.has_active_stream = False
|
|
449
|
+
self.active_stream_task = None
|
|
450
|
+
|
|
451
|
+
return
|
|
452
|
+
|
|
453
|
+
# Legacy path - should not happen with queue-based architecture
|
|
454
|
+
logger.warning(
|
|
455
|
+
"session_no_handle_for_drain",
|
|
456
|
+
session_id=self.session_id,
|
|
457
|
+
message="No stream handle available for draining",
|
|
458
|
+
)
|
|
459
|
+
self.has_active_stream = False
|
|
460
|
+
self.active_stream_task = None
|
|
461
|
+
|
|
462
|
+
async def wait_for_interrupt_complete(self, timeout: float = 5.0) -> bool:
|
|
463
|
+
"""Wait for any in-progress interrupt to complete.
|
|
464
|
+
|
|
465
|
+
Args:
|
|
466
|
+
timeout: Maximum time to wait in seconds
|
|
467
|
+
|
|
468
|
+
Returns:
|
|
469
|
+
True if interrupt completed within timeout, False if timed out
|
|
470
|
+
"""
|
|
471
|
+
try:
|
|
472
|
+
await asyncio.wait_for(
|
|
473
|
+
self._interrupt_complete_event.wait(), timeout=timeout
|
|
474
|
+
)
|
|
475
|
+
logger.debug(
|
|
476
|
+
"session_interrupt_wait_completed",
|
|
477
|
+
session_id=self.session_id,
|
|
478
|
+
message="Interrupt completion event signaled",
|
|
479
|
+
)
|
|
480
|
+
return True
|
|
481
|
+
except TimeoutError:
|
|
482
|
+
logger.warning(
|
|
483
|
+
"session_interrupt_wait_timeout",
|
|
484
|
+
session_id=self.session_id,
|
|
485
|
+
timeout=timeout,
|
|
486
|
+
message="Timeout waiting for interrupt to complete",
|
|
487
|
+
)
|
|
488
|
+
return False
|
|
489
|
+
|
|
490
|
+
async def is_healthy(self) -> bool:
|
|
491
|
+
"""Check if the session connection is healthy."""
|
|
492
|
+
# Add health check logic here if Claude SDK provides it
|
|
493
|
+
# For now, assume active status means healthy
|
|
494
|
+
return bool(self.claude_client and self.status == SessionStatus.ACTIVE)
|
|
495
|
+
|
|
496
|
+
def is_expired(self) -> bool:
|
|
497
|
+
"""Check if session has exceeded TTL."""
|
|
498
|
+
return self.metrics.age_seconds > self.ttl_seconds
|
|
499
|
+
|
|
500
|
+
def update_usage(self) -> None:
|
|
501
|
+
"""Update session usage metrics."""
|
|
502
|
+
old_message_count = self.metrics.message_count
|
|
503
|
+
self.metrics.last_used = time.time()
|
|
504
|
+
self.metrics.message_count += 1
|
|
505
|
+
|
|
506
|
+
# Mark session as reused after first usage
|
|
507
|
+
if self.is_newly_created and self.metrics.message_count > 1:
|
|
508
|
+
self.is_newly_created = False
|
|
509
|
+
|
|
510
|
+
logger.debug(
|
|
511
|
+
"session_usage_updated",
|
|
512
|
+
session_id=self.session_id,
|
|
513
|
+
message_count=self.metrics.message_count,
|
|
514
|
+
previous_message_count=old_message_count,
|
|
515
|
+
age_seconds=self.metrics.age_seconds,
|
|
516
|
+
idle_seconds=self.metrics.idle_seconds,
|
|
517
|
+
is_newly_created=self.is_newly_created,
|
|
518
|
+
)
|
|
519
|
+
|
|
520
|
+
def mark_as_reused(self) -> None:
|
|
521
|
+
"""Mark this session as being reused (not newly created)."""
|
|
522
|
+
self.is_newly_created = False
|
|
523
|
+
|
|
524
|
+
def should_cleanup(
|
|
525
|
+
self, idle_threshold: int = 300, stuck_threshold: int = 900
|
|
526
|
+
) -> bool:
|
|
527
|
+
"""Determine if session should be cleaned up.
|
|
528
|
+
|
|
529
|
+
Args:
|
|
530
|
+
idle_threshold: Max idle time in seconds before cleanup
|
|
531
|
+
stuck_threshold: Max time a session can be ACTIVE without going idle (indicating stuck)
|
|
532
|
+
"""
|
|
533
|
+
# Check if session has been stuck in ACTIVE state too long
|
|
534
|
+
is_potentially_stuck = (
|
|
535
|
+
self.status == SessionStatus.ACTIVE
|
|
536
|
+
and self.metrics.idle_seconds < 10 # Still being used but...
|
|
537
|
+
and self.metrics.age_seconds
|
|
538
|
+
> stuck_threshold # ...has been active way too long
|
|
539
|
+
)
|
|
540
|
+
|
|
541
|
+
return (
|
|
542
|
+
self.is_expired()
|
|
543
|
+
or self.metrics.idle_seconds > idle_threshold
|
|
544
|
+
or self.status in (SessionStatus.ERROR, SessionStatus.DISCONNECTED)
|
|
545
|
+
or is_potentially_stuck
|
|
546
|
+
)
|