ccproxy-api 0.1.4__py3-none-any.whl → 0.1.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ccproxy/_version.py +2 -2
- ccproxy/adapters/openai/adapter.py +1 -1
- ccproxy/adapters/openai/streaming.py +1 -0
- ccproxy/api/app.py +134 -224
- ccproxy/api/dependencies.py +22 -2
- ccproxy/api/middleware/errors.py +27 -3
- ccproxy/api/middleware/logging.py +4 -0
- ccproxy/api/responses.py +6 -1
- ccproxy/api/routes/claude.py +222 -17
- ccproxy/api/routes/proxy.py +25 -6
- ccproxy/api/services/permission_service.py +2 -2
- ccproxy/claude_sdk/__init__.py +4 -8
- ccproxy/claude_sdk/client.py +661 -131
- ccproxy/claude_sdk/exceptions.py +16 -0
- ccproxy/claude_sdk/manager.py +219 -0
- ccproxy/claude_sdk/message_queue.py +342 -0
- ccproxy/claude_sdk/options.py +5 -0
- ccproxy/claude_sdk/session_client.py +546 -0
- ccproxy/claude_sdk/session_pool.py +550 -0
- ccproxy/claude_sdk/stream_handle.py +538 -0
- ccproxy/claude_sdk/stream_worker.py +392 -0
- ccproxy/claude_sdk/streaming.py +53 -11
- ccproxy/cli/commands/serve.py +96 -0
- ccproxy/cli/options/claude_options.py +47 -0
- ccproxy/config/__init__.py +0 -3
- ccproxy/config/claude.py +171 -23
- ccproxy/config/discovery.py +10 -1
- ccproxy/config/scheduler.py +4 -4
- ccproxy/config/settings.py +19 -1
- ccproxy/core/http_transformers.py +305 -73
- ccproxy/core/logging.py +108 -12
- ccproxy/core/transformers.py +5 -0
- ccproxy/models/claude_sdk.py +57 -0
- ccproxy/models/detection.py +126 -0
- ccproxy/observability/access_logger.py +72 -14
- ccproxy/observability/metrics.py +151 -0
- ccproxy/observability/storage/duckdb_simple.py +12 -0
- ccproxy/observability/storage/models.py +16 -0
- ccproxy/observability/streaming_response.py +107 -0
- ccproxy/scheduler/manager.py +31 -6
- ccproxy/scheduler/tasks.py +122 -0
- ccproxy/services/claude_detection_service.py +269 -0
- ccproxy/services/claude_sdk_service.py +333 -130
- ccproxy/services/proxy_service.py +91 -200
- ccproxy/utils/__init__.py +9 -1
- ccproxy/utils/disconnection_monitor.py +83 -0
- ccproxy/utils/id_generator.py +12 -0
- ccproxy/utils/startup_helpers.py +408 -0
- {ccproxy_api-0.1.4.dist-info → ccproxy_api-0.1.5.dist-info}/METADATA +29 -2
- {ccproxy_api-0.1.4.dist-info → ccproxy_api-0.1.5.dist-info}/RECORD +53 -41
- ccproxy/config/loader.py +0 -105
- {ccproxy_api-0.1.4.dist-info → ccproxy_api-0.1.5.dist-info}/WHEEL +0 -0
- {ccproxy_api-0.1.4.dist-info → ccproxy_api-0.1.5.dist-info}/entry_points.txt +0 -0
- {ccproxy_api-0.1.4.dist-info → ccproxy_api-0.1.5.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,550 @@
|
|
|
1
|
+
"""Session-aware connection pool for persistent Claude SDK connections."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import asyncio
|
|
6
|
+
import contextlib
|
|
7
|
+
from typing import TYPE_CHECKING, Any
|
|
8
|
+
|
|
9
|
+
import structlog
|
|
10
|
+
from claude_code_sdk import ClaudeCodeOptions
|
|
11
|
+
|
|
12
|
+
from ccproxy.claude_sdk.session_client import SessionClient, SessionStatus
|
|
13
|
+
from ccproxy.config.claude import SessionPoolSettings
|
|
14
|
+
from ccproxy.core.errors import ClaudeProxyError, ServiceUnavailableError
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
if TYPE_CHECKING:
|
|
18
|
+
pass
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
logger = structlog.get_logger(__name__)
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class SessionPool:
|
|
25
|
+
"""Manages persistent Claude SDK connections by session."""
|
|
26
|
+
|
|
27
|
+
def __init__(self, config: SessionPoolSettings | None = None):
|
|
28
|
+
self.config = config or SessionPoolSettings()
|
|
29
|
+
self.sessions: dict[str, SessionClient] = {}
|
|
30
|
+
self.cleanup_task: asyncio.Task[None] | None = None
|
|
31
|
+
self._shutdown = False
|
|
32
|
+
self._lock = asyncio.Lock()
|
|
33
|
+
|
|
34
|
+
async def start(self) -> None:
|
|
35
|
+
"""Start the session pool and cleanup task."""
|
|
36
|
+
if not self.config.enabled:
|
|
37
|
+
return
|
|
38
|
+
|
|
39
|
+
logger.debug(
|
|
40
|
+
"session_pool_starting",
|
|
41
|
+
max_sessions=self.config.max_sessions,
|
|
42
|
+
ttl=self.config.session_ttl,
|
|
43
|
+
cleanup_interval=self.config.cleanup_interval,
|
|
44
|
+
)
|
|
45
|
+
|
|
46
|
+
self.cleanup_task = asyncio.create_task(self._cleanup_loop())
|
|
47
|
+
|
|
48
|
+
async def stop(self) -> None:
|
|
49
|
+
"""Stop the session pool and cleanup all sessions."""
|
|
50
|
+
self._shutdown = True
|
|
51
|
+
|
|
52
|
+
if self.cleanup_task:
|
|
53
|
+
self.cleanup_task.cancel()
|
|
54
|
+
with contextlib.suppress(asyncio.CancelledError):
|
|
55
|
+
await self.cleanup_task
|
|
56
|
+
|
|
57
|
+
# Disconnect all active sessions
|
|
58
|
+
async with self._lock:
|
|
59
|
+
disconnect_tasks = [
|
|
60
|
+
session_client.disconnect() for session_client in self.sessions.values()
|
|
61
|
+
]
|
|
62
|
+
|
|
63
|
+
if disconnect_tasks:
|
|
64
|
+
await asyncio.gather(*disconnect_tasks, return_exceptions=True)
|
|
65
|
+
|
|
66
|
+
self.sessions.clear()
|
|
67
|
+
|
|
68
|
+
logger.debug("session_pool_stopped")
|
|
69
|
+
|
|
70
|
+
async def get_session_client(
|
|
71
|
+
self, session_id: str, options: ClaudeCodeOptions
|
|
72
|
+
) -> SessionClient:
|
|
73
|
+
"""Get or create a session context for the given session_id."""
|
|
74
|
+
logger.debug(
|
|
75
|
+
"session_pool_get_client_start",
|
|
76
|
+
session_id=session_id,
|
|
77
|
+
pool_enabled=self.config.enabled,
|
|
78
|
+
current_sessions=len(self.sessions),
|
|
79
|
+
max_sessions=self.config.max_sessions,
|
|
80
|
+
session_exists=session_id in self.sessions,
|
|
81
|
+
)
|
|
82
|
+
|
|
83
|
+
if not self.config.enabled:
|
|
84
|
+
logger.error("session_pool_disabled", session_id=session_id)
|
|
85
|
+
raise ClaudeProxyError(
|
|
86
|
+
message="Session pool is disabled",
|
|
87
|
+
error_type="configuration_error",
|
|
88
|
+
status_code=500,
|
|
89
|
+
)
|
|
90
|
+
|
|
91
|
+
# Check session limit and get/create session
|
|
92
|
+
async with self._lock:
|
|
93
|
+
if (
|
|
94
|
+
session_id not in self.sessions
|
|
95
|
+
and len(self.sessions) >= self.config.max_sessions
|
|
96
|
+
):
|
|
97
|
+
logger.error(
|
|
98
|
+
"session_pool_at_capacity",
|
|
99
|
+
session_id=session_id,
|
|
100
|
+
current_sessions=len(self.sessions),
|
|
101
|
+
max_sessions=self.config.max_sessions,
|
|
102
|
+
)
|
|
103
|
+
raise ServiceUnavailableError(
|
|
104
|
+
f"Session pool at capacity: {self.config.max_sessions}"
|
|
105
|
+
)
|
|
106
|
+
options.continue_conversation = True
|
|
107
|
+
# Get existing session or create new one
|
|
108
|
+
if session_id in self.sessions:
|
|
109
|
+
session_client = self.sessions[session_id]
|
|
110
|
+
logger.debug(
|
|
111
|
+
"session_pool_existing_session_found",
|
|
112
|
+
session_id=session_id,
|
|
113
|
+
client_id=session_client.client_id,
|
|
114
|
+
session_status=session_client.status.value,
|
|
115
|
+
)
|
|
116
|
+
|
|
117
|
+
# Check if session is currently being interrupted
|
|
118
|
+
if session_client.status.value == "interrupting":
|
|
119
|
+
logger.warning(
|
|
120
|
+
"session_pool_interrupting_session",
|
|
121
|
+
session_id=session_id,
|
|
122
|
+
client_id=session_client.client_id,
|
|
123
|
+
message="Session is currently being interrupted, waiting for completion then creating new session",
|
|
124
|
+
)
|
|
125
|
+
# Wait for the interrupt process to complete properly
|
|
126
|
+
interrupt_completed = (
|
|
127
|
+
await session_client.wait_for_interrupt_complete(timeout=5.0)
|
|
128
|
+
)
|
|
129
|
+
if interrupt_completed:
|
|
130
|
+
logger.debug(
|
|
131
|
+
"session_pool_interrupt_completed",
|
|
132
|
+
session_id=session_id,
|
|
133
|
+
client_id=session_client.client_id,
|
|
134
|
+
message="Interrupt completed successfully, proceeding with session replacement",
|
|
135
|
+
)
|
|
136
|
+
else:
|
|
137
|
+
logger.warning(
|
|
138
|
+
"session_pool_interrupt_timeout",
|
|
139
|
+
session_id=session_id,
|
|
140
|
+
client_id=session_client.client_id,
|
|
141
|
+
message="Interrupt did not complete within 5 seconds, proceeding anyway",
|
|
142
|
+
)
|
|
143
|
+
# Don't try to reuse a session that was being interrupted
|
|
144
|
+
await self._remove_session_unlocked(session_id)
|
|
145
|
+
session_client = await self._create_session_unlocked(
|
|
146
|
+
session_id, options
|
|
147
|
+
)
|
|
148
|
+
# Check if session has an active stream that needs cleanup
|
|
149
|
+
elif (
|
|
150
|
+
session_client.has_active_stream
|
|
151
|
+
or session_client.active_stream_handle
|
|
152
|
+
):
|
|
153
|
+
logger.debug(
|
|
154
|
+
"session_pool_active_stream_detected",
|
|
155
|
+
session_id=session_id,
|
|
156
|
+
client_id=session_client.client_id,
|
|
157
|
+
has_stream=session_client.has_active_stream,
|
|
158
|
+
has_handle=bool(session_client.active_stream_handle),
|
|
159
|
+
idle_seconds=session_client.metrics.idle_seconds,
|
|
160
|
+
message="Session has active stream/handle, checking if cleanup needed",
|
|
161
|
+
)
|
|
162
|
+
|
|
163
|
+
# Check timeout types based on proper message lifecycle timing
|
|
164
|
+
# - No SystemMessage received within configured timeout (first chunk timeout) -> terminate session
|
|
165
|
+
# - SystemMessage received but no activity for configured timeout (ongoing timeout) -> interrupt stream
|
|
166
|
+
# - Never check for completed streams (ResultMessage received)
|
|
167
|
+
handle = session_client.active_stream_handle
|
|
168
|
+
if handle is not None:
|
|
169
|
+
is_first_chunk_timeout = handle.is_first_chunk_timeout()
|
|
170
|
+
is_ongoing_timeout = handle.is_ongoing_timeout()
|
|
171
|
+
else:
|
|
172
|
+
# Handle was cleared by another thread, no timeout checks needed
|
|
173
|
+
is_first_chunk_timeout = False
|
|
174
|
+
is_ongoing_timeout = False
|
|
175
|
+
|
|
176
|
+
if session_client.active_stream_handle and (
|
|
177
|
+
is_first_chunk_timeout or is_ongoing_timeout
|
|
178
|
+
):
|
|
179
|
+
old_handle_id = session_client.active_stream_handle.handle_id
|
|
180
|
+
|
|
181
|
+
if is_first_chunk_timeout:
|
|
182
|
+
# First chunk timeout indicates connection issue - terminate session client
|
|
183
|
+
logger.warning(
|
|
184
|
+
"session_pool_first_chunk_timeout",
|
|
185
|
+
session_id=session_id,
|
|
186
|
+
old_handle_id=old_handle_id,
|
|
187
|
+
idle_seconds=session_client.active_stream_handle.idle_seconds,
|
|
188
|
+
message=f"No first chunk received within {self.config.stream_first_chunk_timeout} seconds, terminating session client",
|
|
189
|
+
)
|
|
190
|
+
|
|
191
|
+
# Remove the entire session - connection is likely broken
|
|
192
|
+
await self._remove_session_unlocked(session_id)
|
|
193
|
+
session_client = await self._create_session_unlocked(
|
|
194
|
+
session_id, options
|
|
195
|
+
)
|
|
196
|
+
|
|
197
|
+
elif is_ongoing_timeout:
|
|
198
|
+
# Ongoing timeout - interrupt the stream but keep session
|
|
199
|
+
logger.info(
|
|
200
|
+
"session_pool_interrupting_ongoing_timeout",
|
|
201
|
+
session_id=session_id,
|
|
202
|
+
old_handle_id=old_handle_id,
|
|
203
|
+
idle_seconds=session_client.active_stream_handle.idle_seconds,
|
|
204
|
+
has_first_chunk=session_client.active_stream_handle.has_first_chunk,
|
|
205
|
+
is_completed=session_client.active_stream_handle.is_completed,
|
|
206
|
+
message=f"Stream idle for {self.config.stream_ongoing_timeout}+ seconds, interrupting stream but keeping session",
|
|
207
|
+
)
|
|
208
|
+
|
|
209
|
+
try:
|
|
210
|
+
# Interrupt the old stream handle to stop its worker
|
|
211
|
+
interrupted = await session_client.active_stream_handle.interrupt()
|
|
212
|
+
if interrupted:
|
|
213
|
+
logger.info(
|
|
214
|
+
"session_pool_interrupted_ongoing_timeout",
|
|
215
|
+
session_id=session_id,
|
|
216
|
+
old_handle_id=old_handle_id,
|
|
217
|
+
message="Successfully interrupted ongoing timeout stream",
|
|
218
|
+
)
|
|
219
|
+
else:
|
|
220
|
+
logger.debug(
|
|
221
|
+
"session_pool_interrupt_ongoing_not_needed",
|
|
222
|
+
session_id=session_id,
|
|
223
|
+
old_handle_id=old_handle_id,
|
|
224
|
+
message="Ongoing timeout stream was already completed",
|
|
225
|
+
)
|
|
226
|
+
except Exception as e:
|
|
227
|
+
logger.warning(
|
|
228
|
+
"session_pool_interrupt_ongoing_failed",
|
|
229
|
+
session_id=session_id,
|
|
230
|
+
old_handle_id=old_handle_id,
|
|
231
|
+
error=str(e),
|
|
232
|
+
error_type=type(e).__name__,
|
|
233
|
+
message="Failed to interrupt ongoing timeout stream, clearing anyway",
|
|
234
|
+
)
|
|
235
|
+
finally:
|
|
236
|
+
# Always clear the handle after interrupt attempt
|
|
237
|
+
session_client.active_stream_handle = None
|
|
238
|
+
session_client.has_active_stream = False
|
|
239
|
+
elif session_client.active_stream_handle and not (
|
|
240
|
+
is_first_chunk_timeout or is_ongoing_timeout
|
|
241
|
+
):
|
|
242
|
+
# Stream is recent, likely from a previous request that just finished
|
|
243
|
+
# Just clear the handle without interrupting to allow immediate reuse
|
|
244
|
+
logger.debug(
|
|
245
|
+
"session_pool_clearing_recent_stream",
|
|
246
|
+
session_id=session_id,
|
|
247
|
+
old_handle_id=session_client.active_stream_handle.handle_id,
|
|
248
|
+
idle_seconds=session_client.active_stream_handle.idle_seconds,
|
|
249
|
+
has_first_chunk=session_client.active_stream_handle.has_first_chunk,
|
|
250
|
+
is_completed=session_client.active_stream_handle.is_completed,
|
|
251
|
+
message="Clearing recent stream handle for immediate reuse",
|
|
252
|
+
)
|
|
253
|
+
session_client.active_stream_handle = None
|
|
254
|
+
session_client.has_active_stream = False
|
|
255
|
+
else:
|
|
256
|
+
# No handle but has_active_stream flag is set, just clear the flag
|
|
257
|
+
session_client.has_active_stream = False
|
|
258
|
+
|
|
259
|
+
logger.debug(
|
|
260
|
+
"session_pool_stream_cleared",
|
|
261
|
+
session_id=session_id,
|
|
262
|
+
client_id=session_client.client_id,
|
|
263
|
+
was_interrupted=(is_first_chunk_timeout or is_ongoing_timeout),
|
|
264
|
+
was_recent=not (is_first_chunk_timeout or is_ongoing_timeout),
|
|
265
|
+
was_first_chunk_timeout=is_first_chunk_timeout,
|
|
266
|
+
was_ongoing_timeout=is_ongoing_timeout,
|
|
267
|
+
message="Stream state cleared, session ready for reuse",
|
|
268
|
+
)
|
|
269
|
+
# Check if session is still valid
|
|
270
|
+
elif session_client.is_expired():
|
|
271
|
+
logger.debug("session_expired", session_id=session_id)
|
|
272
|
+
await self._remove_session_unlocked(session_id)
|
|
273
|
+
session_client = await self._create_session_unlocked(
|
|
274
|
+
session_id, options
|
|
275
|
+
)
|
|
276
|
+
elif (
|
|
277
|
+
not await session_client.is_healthy()
|
|
278
|
+
and self.config.connection_recovery
|
|
279
|
+
):
|
|
280
|
+
logger.debug("session_unhealthy_recovering", session_id=session_id)
|
|
281
|
+
await session_client.connect()
|
|
282
|
+
# Mark session as reused since we're recovering an existing session
|
|
283
|
+
session_client.mark_as_reused()
|
|
284
|
+
else:
|
|
285
|
+
logger.debug(
|
|
286
|
+
"session_pool_reusing_healthy_session",
|
|
287
|
+
session_id=session_id,
|
|
288
|
+
client_id=session_client.client_id,
|
|
289
|
+
)
|
|
290
|
+
# Mark session as reused
|
|
291
|
+
session_client.mark_as_reused()
|
|
292
|
+
else:
|
|
293
|
+
logger.debug("session_pool_creating_new_session", session_id=session_id)
|
|
294
|
+
session_client = await self._create_session_unlocked(
|
|
295
|
+
session_id, options
|
|
296
|
+
)
|
|
297
|
+
|
|
298
|
+
# Ensure session is connected before returning (inside lock to prevent race conditions)
|
|
299
|
+
if not await session_client.ensure_connected():
|
|
300
|
+
logger.error(
|
|
301
|
+
"session_pool_connection_failed",
|
|
302
|
+
session_id=session_id,
|
|
303
|
+
)
|
|
304
|
+
raise ServiceUnavailableError(
|
|
305
|
+
f"Failed to establish session connection: {session_id}"
|
|
306
|
+
)
|
|
307
|
+
|
|
308
|
+
logger.debug(
|
|
309
|
+
"session_pool_get_client_complete",
|
|
310
|
+
session_id=session_id,
|
|
311
|
+
client_id=session_client.client_id,
|
|
312
|
+
session_status=session_client.status,
|
|
313
|
+
session_age_seconds=session_client.metrics.age_seconds,
|
|
314
|
+
session_message_count=session_client.metrics.message_count,
|
|
315
|
+
)
|
|
316
|
+
return session_client
|
|
317
|
+
|
|
318
|
+
async def _create_session(
|
|
319
|
+
self, session_id: str, options: ClaudeCodeOptions
|
|
320
|
+
) -> SessionClient:
|
|
321
|
+
"""Create a new session context (acquires lock)."""
|
|
322
|
+
async with self._lock:
|
|
323
|
+
return await self._create_session_unlocked(session_id, options)
|
|
324
|
+
|
|
325
|
+
async def _create_session_unlocked(
|
|
326
|
+
self, session_id: str, options: ClaudeCodeOptions
|
|
327
|
+
) -> SessionClient:
|
|
328
|
+
"""Create a new session context (requires lock to be held)."""
|
|
329
|
+
session_client = SessionClient(
|
|
330
|
+
session_id=session_id, options=options, ttl_seconds=self.config.session_ttl
|
|
331
|
+
)
|
|
332
|
+
|
|
333
|
+
# Start connection in background
|
|
334
|
+
connection_task = session_client.connect_background()
|
|
335
|
+
|
|
336
|
+
# Add to sessions immediately (will connect in background)
|
|
337
|
+
self.sessions[session_id] = session_client
|
|
338
|
+
|
|
339
|
+
# Optionally wait for connection to verify it works
|
|
340
|
+
# For now, we'll let it connect in background and check on first use
|
|
341
|
+
logger.debug(
|
|
342
|
+
"session_connecting_background",
|
|
343
|
+
session_id=session_id,
|
|
344
|
+
client_id=session_client.client_id,
|
|
345
|
+
)
|
|
346
|
+
|
|
347
|
+
logger.debug(
|
|
348
|
+
"session_created",
|
|
349
|
+
session_id=session_id,
|
|
350
|
+
client_id=session_client.client_id,
|
|
351
|
+
total_sessions=len(self.sessions),
|
|
352
|
+
)
|
|
353
|
+
|
|
354
|
+
return session_client
|
|
355
|
+
|
|
356
|
+
async def _remove_session(self, session_id: str) -> None:
|
|
357
|
+
"""Remove and cleanup a session (acquires lock)."""
|
|
358
|
+
async with self._lock:
|
|
359
|
+
await self._remove_session_unlocked(session_id)
|
|
360
|
+
|
|
361
|
+
async def _remove_session_unlocked(self, session_id: str) -> None:
|
|
362
|
+
"""Remove and cleanup a session (requires lock to be held)."""
|
|
363
|
+
if session_id not in self.sessions:
|
|
364
|
+
return
|
|
365
|
+
|
|
366
|
+
session_client = self.sessions.pop(session_id)
|
|
367
|
+
await session_client.disconnect()
|
|
368
|
+
|
|
369
|
+
logger.debug(
|
|
370
|
+
"session_removed",
|
|
371
|
+
session_id=session_id,
|
|
372
|
+
total_sessions=len(self.sessions),
|
|
373
|
+
age_seconds=session_client.metrics.age_seconds,
|
|
374
|
+
message_count=session_client.metrics.message_count,
|
|
375
|
+
)
|
|
376
|
+
|
|
377
|
+
async def _cleanup_loop(self) -> None:
|
|
378
|
+
"""Background task to cleanup expired sessions."""
|
|
379
|
+
while not self._shutdown:
|
|
380
|
+
try:
|
|
381
|
+
await asyncio.sleep(self.config.cleanup_interval)
|
|
382
|
+
await self._cleanup_sessions()
|
|
383
|
+
except asyncio.CancelledError:
|
|
384
|
+
break
|
|
385
|
+
except Exception as e:
|
|
386
|
+
logger.error("session_cleanup_error", error=str(e), exc_info=True)
|
|
387
|
+
|
|
388
|
+
async def _cleanup_sessions(self) -> None:
|
|
389
|
+
"""Remove expired, idle, and stuck sessions."""
|
|
390
|
+
sessions_to_remove = []
|
|
391
|
+
stuck_sessions = []
|
|
392
|
+
|
|
393
|
+
# Get a snapshot of sessions to check
|
|
394
|
+
async with self._lock:
|
|
395
|
+
sessions_snapshot = list(self.sessions.items())
|
|
396
|
+
|
|
397
|
+
# Check sessions outside the lock to avoid holding it too long
|
|
398
|
+
for session_id, session_client in sessions_snapshot:
|
|
399
|
+
# Check if session is potentially stuck (active too long)
|
|
400
|
+
is_stuck = (
|
|
401
|
+
session_client.status.value == "active"
|
|
402
|
+
and session_client.metrics.idle_seconds < 10
|
|
403
|
+
and session_client.metrics.age_seconds > 900 # 15 minutes
|
|
404
|
+
)
|
|
405
|
+
|
|
406
|
+
if is_stuck:
|
|
407
|
+
stuck_sessions.append(session_id)
|
|
408
|
+
logger.warning(
|
|
409
|
+
"session_stuck_detected",
|
|
410
|
+
session_id=session_id,
|
|
411
|
+
age_seconds=session_client.metrics.age_seconds,
|
|
412
|
+
idle_seconds=session_client.metrics.idle_seconds,
|
|
413
|
+
message_count=session_client.metrics.message_count,
|
|
414
|
+
message="Session appears stuck, will interrupt and cleanup",
|
|
415
|
+
)
|
|
416
|
+
|
|
417
|
+
# Try to interrupt stuck session before cleanup
|
|
418
|
+
try:
|
|
419
|
+
await session_client.interrupt()
|
|
420
|
+
except Exception as e:
|
|
421
|
+
logger.warning(
|
|
422
|
+
"session_stuck_interrupt_failed",
|
|
423
|
+
session_id=session_id,
|
|
424
|
+
error=str(e),
|
|
425
|
+
)
|
|
426
|
+
|
|
427
|
+
# Check normal cleanup criteria (including stuck sessions)
|
|
428
|
+
if session_client.should_cleanup(
|
|
429
|
+
self.config.idle_threshold, stuck_threshold=900
|
|
430
|
+
):
|
|
431
|
+
sessions_to_remove.append(session_id)
|
|
432
|
+
|
|
433
|
+
if sessions_to_remove:
|
|
434
|
+
logger.debug(
|
|
435
|
+
"session_cleanup_starting",
|
|
436
|
+
sessions_to_remove=len(sessions_to_remove),
|
|
437
|
+
stuck_sessions=len(stuck_sessions),
|
|
438
|
+
total_sessions=len(self.sessions),
|
|
439
|
+
)
|
|
440
|
+
|
|
441
|
+
for session_id in sessions_to_remove:
|
|
442
|
+
await self._remove_session(session_id)
|
|
443
|
+
|
|
444
|
+
async def interrupt_session(self, session_id: str) -> bool:
|
|
445
|
+
"""Interrupt a specific session due to client disconnection.
|
|
446
|
+
|
|
447
|
+
Args:
|
|
448
|
+
session_id: The session ID to interrupt
|
|
449
|
+
|
|
450
|
+
Returns:
|
|
451
|
+
True if session was found and interrupted, False otherwise
|
|
452
|
+
"""
|
|
453
|
+
async with self._lock:
|
|
454
|
+
if session_id not in self.sessions:
|
|
455
|
+
logger.warning("session_not_found", session_id=session_id)
|
|
456
|
+
return False
|
|
457
|
+
|
|
458
|
+
session_client = self.sessions[session_id]
|
|
459
|
+
|
|
460
|
+
try:
|
|
461
|
+
# Interrupt the session with 30-second timeout (allows for longer SDK response times)
|
|
462
|
+
await asyncio.wait_for(session_client.interrupt(), timeout=30.0)
|
|
463
|
+
logger.debug("session_interrupted", session_id=session_id)
|
|
464
|
+
|
|
465
|
+
# Remove the session to prevent reuse
|
|
466
|
+
await self._remove_session(session_id)
|
|
467
|
+
return True
|
|
468
|
+
|
|
469
|
+
except (TimeoutError, Exception) as e:
|
|
470
|
+
logger.error(
|
|
471
|
+
"session_interrupt_failed",
|
|
472
|
+
session_id=session_id,
|
|
473
|
+
error=str(e)
|
|
474
|
+
if not isinstance(e, TimeoutError)
|
|
475
|
+
else "Timeout after 30s",
|
|
476
|
+
)
|
|
477
|
+
# Always remove the session on failure
|
|
478
|
+
with contextlib.suppress(Exception):
|
|
479
|
+
await self._remove_session(session_id)
|
|
480
|
+
return False
|
|
481
|
+
|
|
482
|
+
async def interrupt_all_sessions(self) -> int:
|
|
483
|
+
"""Interrupt all active sessions (stops ongoing operations).
|
|
484
|
+
|
|
485
|
+
Returns:
|
|
486
|
+
Number of sessions that were interrupted
|
|
487
|
+
"""
|
|
488
|
+
# Get snapshot of all sessions
|
|
489
|
+
async with self._lock:
|
|
490
|
+
session_items = list(self.sessions.items())
|
|
491
|
+
|
|
492
|
+
interrupted_count = 0
|
|
493
|
+
|
|
494
|
+
logger.debug(
|
|
495
|
+
"session_interrupt_all_requested",
|
|
496
|
+
total_sessions=len(session_items),
|
|
497
|
+
)
|
|
498
|
+
|
|
499
|
+
for session_id, session_client in session_items:
|
|
500
|
+
try:
|
|
501
|
+
await session_client.interrupt()
|
|
502
|
+
interrupted_count += 1
|
|
503
|
+
except Exception as e:
|
|
504
|
+
logger.error(
|
|
505
|
+
"session_interrupt_failed_during_all",
|
|
506
|
+
session_id=session_id,
|
|
507
|
+
error=str(e),
|
|
508
|
+
)
|
|
509
|
+
|
|
510
|
+
logger.debug(
|
|
511
|
+
"session_interrupt_all_completed",
|
|
512
|
+
interrupted_count=interrupted_count,
|
|
513
|
+
total_requested=len(session_items),
|
|
514
|
+
)
|
|
515
|
+
|
|
516
|
+
return interrupted_count
|
|
517
|
+
|
|
518
|
+
async def has_session(self, session_id: str) -> bool:
|
|
519
|
+
"""Check if a session exists in the pool.
|
|
520
|
+
|
|
521
|
+
Args:
|
|
522
|
+
session_id: The session ID to check
|
|
523
|
+
|
|
524
|
+
Returns:
|
|
525
|
+
True if session exists, False otherwise
|
|
526
|
+
"""
|
|
527
|
+
async with self._lock:
|
|
528
|
+
return session_id in self.sessions
|
|
529
|
+
|
|
530
|
+
async def get_stats(self) -> dict[str, Any]:
|
|
531
|
+
"""Get session pool statistics."""
|
|
532
|
+
async with self._lock:
|
|
533
|
+
sessions_list = list(self.sessions.values())
|
|
534
|
+
total_sessions = len(self.sessions)
|
|
535
|
+
|
|
536
|
+
active_sessions = sum(
|
|
537
|
+
1 for s in sessions_list if s.status == SessionStatus.ACTIVE
|
|
538
|
+
)
|
|
539
|
+
|
|
540
|
+
total_messages = sum(s.metrics.message_count for s in sessions_list)
|
|
541
|
+
|
|
542
|
+
return {
|
|
543
|
+
"enabled": self.config.enabled,
|
|
544
|
+
"total_sessions": total_sessions,
|
|
545
|
+
"active_sessions": active_sessions,
|
|
546
|
+
"max_sessions": self.config.max_sessions,
|
|
547
|
+
"total_messages": total_messages,
|
|
548
|
+
"session_ttl": self.config.session_ttl,
|
|
549
|
+
"cleanup_interval": self.config.cleanup_interval,
|
|
550
|
+
}
|