ripperdoc 0.2.7__py3-none-any.whl → 0.2.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (87) hide show
  1. ripperdoc/__init__.py +1 -1
  2. ripperdoc/cli/cli.py +33 -115
  3. ripperdoc/cli/commands/__init__.py +70 -6
  4. ripperdoc/cli/commands/agents_cmd.py +6 -3
  5. ripperdoc/cli/commands/clear_cmd.py +1 -4
  6. ripperdoc/cli/commands/config_cmd.py +1 -1
  7. ripperdoc/cli/commands/context_cmd.py +3 -2
  8. ripperdoc/cli/commands/doctor_cmd.py +18 -4
  9. ripperdoc/cli/commands/help_cmd.py +11 -1
  10. ripperdoc/cli/commands/hooks_cmd.py +610 -0
  11. ripperdoc/cli/commands/models_cmd.py +26 -9
  12. ripperdoc/cli/commands/permissions_cmd.py +57 -37
  13. ripperdoc/cli/commands/resume_cmd.py +6 -4
  14. ripperdoc/cli/commands/status_cmd.py +4 -4
  15. ripperdoc/cli/commands/tasks_cmd.py +8 -4
  16. ripperdoc/cli/ui/file_mention_completer.py +64 -8
  17. ripperdoc/cli/ui/interrupt_handler.py +3 -4
  18. ripperdoc/cli/ui/message_display.py +5 -3
  19. ripperdoc/cli/ui/panels.py +13 -10
  20. ripperdoc/cli/ui/provider_options.py +247 -0
  21. ripperdoc/cli/ui/rich_ui.py +196 -77
  22. ripperdoc/cli/ui/spinner.py +25 -1
  23. ripperdoc/cli/ui/tool_renderers.py +8 -2
  24. ripperdoc/cli/ui/wizard.py +215 -0
  25. ripperdoc/core/agents.py +9 -3
  26. ripperdoc/core/config.py +49 -12
  27. ripperdoc/core/custom_commands.py +412 -0
  28. ripperdoc/core/default_tools.py +11 -2
  29. ripperdoc/core/hooks/__init__.py +99 -0
  30. ripperdoc/core/hooks/config.py +301 -0
  31. ripperdoc/core/hooks/events.py +535 -0
  32. ripperdoc/core/hooks/executor.py +496 -0
  33. ripperdoc/core/hooks/integration.py +344 -0
  34. ripperdoc/core/hooks/manager.py +745 -0
  35. ripperdoc/core/permissions.py +40 -8
  36. ripperdoc/core/providers/anthropic.py +548 -68
  37. ripperdoc/core/providers/gemini.py +70 -5
  38. ripperdoc/core/providers/openai.py +60 -5
  39. ripperdoc/core/query.py +140 -39
  40. ripperdoc/core/query_utils.py +2 -0
  41. ripperdoc/core/skills.py +9 -3
  42. ripperdoc/core/system_prompt.py +4 -2
  43. ripperdoc/core/tool.py +9 -5
  44. ripperdoc/sdk/client.py +2 -2
  45. ripperdoc/tools/ask_user_question_tool.py +5 -3
  46. ripperdoc/tools/background_shell.py +2 -1
  47. ripperdoc/tools/bash_output_tool.py +1 -1
  48. ripperdoc/tools/bash_tool.py +30 -20
  49. ripperdoc/tools/dynamic_mcp_tool.py +29 -8
  50. ripperdoc/tools/enter_plan_mode_tool.py +1 -1
  51. ripperdoc/tools/exit_plan_mode_tool.py +1 -1
  52. ripperdoc/tools/file_edit_tool.py +8 -4
  53. ripperdoc/tools/file_read_tool.py +9 -5
  54. ripperdoc/tools/file_write_tool.py +9 -5
  55. ripperdoc/tools/glob_tool.py +3 -2
  56. ripperdoc/tools/grep_tool.py +3 -2
  57. ripperdoc/tools/kill_bash_tool.py +1 -1
  58. ripperdoc/tools/ls_tool.py +1 -1
  59. ripperdoc/tools/mcp_tools.py +13 -10
  60. ripperdoc/tools/multi_edit_tool.py +8 -7
  61. ripperdoc/tools/notebook_edit_tool.py +7 -4
  62. ripperdoc/tools/skill_tool.py +1 -1
  63. ripperdoc/tools/task_tool.py +5 -4
  64. ripperdoc/tools/todo_tool.py +2 -2
  65. ripperdoc/tools/tool_search_tool.py +3 -2
  66. ripperdoc/utils/conversation_compaction.py +11 -7
  67. ripperdoc/utils/file_watch.py +8 -2
  68. ripperdoc/utils/json_utils.py +2 -1
  69. ripperdoc/utils/mcp.py +11 -3
  70. ripperdoc/utils/memory.py +4 -2
  71. ripperdoc/utils/message_compaction.py +21 -7
  72. ripperdoc/utils/message_formatting.py +11 -7
  73. ripperdoc/utils/messages.py +105 -66
  74. ripperdoc/utils/path_ignore.py +38 -12
  75. ripperdoc/utils/permissions/path_validation_utils.py +2 -1
  76. ripperdoc/utils/permissions/shell_command_validation.py +427 -91
  77. ripperdoc/utils/safe_get_cwd.py +2 -1
  78. ripperdoc/utils/session_history.py +13 -6
  79. ripperdoc/utils/todo.py +2 -1
  80. ripperdoc/utils/token_estimation.py +6 -1
  81. {ripperdoc-0.2.7.dist-info → ripperdoc-0.2.9.dist-info}/METADATA +24 -3
  82. ripperdoc-0.2.9.dist-info/RECORD +123 -0
  83. ripperdoc-0.2.7.dist-info/RECORD +0 -113
  84. {ripperdoc-0.2.7.dist-info → ripperdoc-0.2.9.dist-info}/WHEEL +0 -0
  85. {ripperdoc-0.2.7.dist-info → ripperdoc-0.2.9.dist-info}/entry_points.txt +0 -0
  86. {ripperdoc-0.2.7.dist-info → ripperdoc-0.2.9.dist-info}/licenses/LICENSE +0 -0
  87. {ripperdoc-0.2.7.dist-info → ripperdoc-0.2.9.dist-info}/top_level.txt +0 -0
@@ -3,10 +3,13 @@
3
3
  from __future__ import annotations
4
4
 
5
5
  import asyncio
6
+ import json
6
7
  import time
7
8
  from typing import Any, Awaitable, Callable, Dict, List, Optional
9
+ from uuid import uuid4
8
10
 
9
11
  import anthropic
12
+ import httpx
10
13
  from anthropic import AsyncAnthropic
11
14
 
12
15
  from ripperdoc.core.config import ModelProfile
@@ -15,13 +18,11 @@ from ripperdoc.core.providers.base import (
15
18
  ProviderClient,
16
19
  ProviderResponse,
17
20
  call_with_timeout_and_retries,
18
- iter_with_timeout,
19
21
  sanitize_tool_history,
20
22
  )
21
23
  from ripperdoc.core.query_utils import (
22
24
  anthropic_usage_tokens,
23
25
  build_anthropic_tool_schemas,
24
- content_blocks_from_anthropic_response,
25
26
  estimate_cost_usd,
26
27
  )
27
28
  from ripperdoc.core.tool import Tool
@@ -63,8 +64,99 @@ def _classify_anthropic_error(exc: Exception) -> tuple[str, str]:
63
64
  return "unknown_error", f"Unexpected error ({exc_type}): {exc_msg}"
64
65
 
65
66
 
67
+ def _content_blocks_from_stream_state(
68
+ collected_text: List[str],
69
+ collected_thinking: List[str],
70
+ collected_tool_calls: Dict[int, Dict[str, Any]],
71
+ ) -> List[Dict[str, Any]]:
72
+ """Build content blocks from accumulated stream state."""
73
+ blocks: List[Dict[str, Any]] = []
74
+
75
+ # Add thinking block if present
76
+ if collected_thinking:
77
+ blocks.append(
78
+ {
79
+ "type": "thinking",
80
+ "thinking": "".join(collected_thinking),
81
+ }
82
+ )
83
+
84
+ # Add text block if present
85
+ if collected_text:
86
+ blocks.append(
87
+ {
88
+ "type": "text",
89
+ "text": "".join(collected_text),
90
+ }
91
+ )
92
+
93
+ # Add tool_use blocks
94
+ for idx in sorted(collected_tool_calls.keys()):
95
+ call = collected_tool_calls[idx]
96
+ name = call.get("name")
97
+ if not name:
98
+ continue
99
+ tool_use_id = call.get("id") or str(uuid4())
100
+ blocks.append(
101
+ {
102
+ "type": "tool_use",
103
+ "tool_use_id": tool_use_id,
104
+ "name": name,
105
+ "input": call.get("input", {}),
106
+ }
107
+ )
108
+
109
+ return blocks
110
+
111
+
112
+ def _content_blocks_from_response(response: Any) -> List[Dict[str, Any]]:
113
+ """Normalize Anthropic response content to our internal block format."""
114
+ blocks: List[Dict[str, Any]] = []
115
+ for block in getattr(response, "content", []) or []:
116
+ btype = getattr(block, "type", None)
117
+ if btype == "text":
118
+ blocks.append({"type": "text", "text": getattr(block, "text", "")})
119
+ elif btype == "thinking":
120
+ blocks.append(
121
+ {
122
+ "type": "thinking",
123
+ "thinking": getattr(block, "thinking", None) or "",
124
+ "signature": getattr(block, "signature", None),
125
+ }
126
+ )
127
+ elif btype == "redacted_thinking":
128
+ blocks.append(
129
+ {
130
+ "type": "redacted_thinking",
131
+ "data": getattr(block, "data", None),
132
+ "signature": getattr(block, "signature", None),
133
+ }
134
+ )
135
+ elif btype == "tool_use":
136
+ raw_input = getattr(block, "input", {}) or {}
137
+ blocks.append(
138
+ {
139
+ "type": "tool_use",
140
+ "tool_use_id": getattr(block, "id", None) or str(uuid4()),
141
+ "name": getattr(block, "name", None),
142
+ "input": raw_input if isinstance(raw_input, dict) else {},
143
+ }
144
+ )
145
+ return blocks
146
+
147
+
66
148
  class AnthropicClient(ProviderClient):
67
- """Anthropic client with streaming and non-streaming support."""
149
+ """Anthropic client with streaming and non-streaming support.
150
+
151
+ Streaming mode (default):
152
+ - Uses event-based streaming to capture both thinking and text tokens
153
+ - Timeout applies per-token (chunk), not to the entire request
154
+ - Thinking tokens are streamed in real-time via progress_callback
155
+
156
+ Non-streaming mode:
157
+ - Makes a single blocking request
158
+ - Timeout applies to the entire request
159
+ """
68
160
 
69
161
  def __init__(self, client_factory: Optional[Callable[[], Awaitable[AsyncAnthropic]]] = None):
70
162
  self._client_factory = client_factory
@@ -109,6 +201,15 @@ class AnthropicClient(ProviderClient):
109
201
  except Exception as exc:
110
202
  duration_ms = (time.time() - start_time) * 1000
111
203
  error_code, error_message = _classify_anthropic_error(exc)
204
+ logger.debug(
205
+ "[anthropic_client] Exception details",
206
+ extra={
207
+ "model": model_profile.model,
208
+ "exception_type": type(exc).__name__,
209
+ "exception_str": str(exc),
210
+ "error_code": error_code,
211
+ },
212
+ )
112
213
  logger.error(
113
214
  "[anthropic_client] API call failed",
114
215
  extra={
@@ -141,95 +242,115 @@ class AnthropicClient(ProviderClient):
141
242
  ) -> ProviderResponse:
142
243
  """Internal implementation of call, may raise exceptions."""
143
244
  tool_schemas = await build_anthropic_tool_schemas(tools)
144
- collected_text: List[str] = []
145
- reasoning_parts: List[str] = []
146
245
  response_metadata: Dict[str, Any] = {}
147
246
 
148
- anthropic_kwargs = {"base_url": model_profile.api_base}
247
+ logger.debug(
248
+ "[anthropic_client] Preparing request",
249
+ extra={
250
+ "model": model_profile.model,
251
+ "tool_mode": tool_mode,
252
+ "stream": stream,
253
+ "max_thinking_tokens": max_thinking_tokens,
254
+ "num_tools": len(tool_schemas),
255
+ },
256
+ )
257
+
258
+ anthropic_kwargs: Dict[str, Any] = {}
259
+ if model_profile.api_base:
260
+ anthropic_kwargs["base_url"] = model_profile.api_base
149
261
  if model_profile.api_key:
150
262
  anthropic_kwargs["api_key"] = model_profile.api_key
151
263
  auth_token = getattr(model_profile, "auth_token", None)
152
264
  if auth_token:
153
265
  anthropic_kwargs["auth_token"] = auth_token
154
266
 
267
+ # Set timeout for the Anthropic SDK client
268
+ # For streaming, we want a long timeout since models may take time to start responding
269
+ # httpx.Timeout: (connect, read, write, pool)
270
+ if stream:
271
+ # For streaming: long read timeout, reasonable connect timeout
272
+ # The read timeout applies to waiting for each chunk from the server
273
+ timeout_config = httpx.Timeout(
274
+ connect=60.0, # 60 seconds to establish connection
275
+ read=600.0, # 10 minutes to wait for each chunk (model may be thinking)
276
+ write=60.0, # 60 seconds to send request
277
+ pool=60.0, # 60 seconds to get connection from pool
278
+ )
279
+ anthropic_kwargs["timeout"] = timeout_config
280
+ elif request_timeout and request_timeout > 0:
281
+ # For non-streaming: use the provided timeout
282
+ anthropic_kwargs["timeout"] = request_timeout
283
+
155
284
  normalized_messages = sanitize_tool_history(list(normalized_messages))
156
285
 
157
286
  thinking_payload: Optional[Dict[str, Any]] = None
158
287
  if max_thinking_tokens > 0:
159
288
  thinking_payload = {"type": "enabled", "budget_tokens": max_thinking_tokens}
160
289
 
161
- async with await self._client(anthropic_kwargs) as client:
290
+ # Build common request kwargs
291
+ request_kwargs: Dict[str, Any] = {
292
+ "model": model_profile.model,
293
+ "max_tokens": model_profile.max_tokens,
294
+ "system": system_prompt,
295
+ "messages": normalized_messages,
296
+ "temperature": model_profile.temperature,
297
+ }
298
+ if tool_schemas:
299
+ request_kwargs["tools"] = tool_schemas
300
+ if thinking_payload:
301
+ request_kwargs["thinking"] = thinking_payload
162
302
 
163
- async def _stream_request() -> Any:
164
- stream_cm = client.messages.stream(
165
- model=model_profile.model,
166
- max_tokens=model_profile.max_tokens,
167
- system=system_prompt,
168
- messages=normalized_messages, # type: ignore[arg-type]
169
- tools=tool_schemas if tool_schemas else None, # type: ignore
170
- temperature=model_profile.temperature,
171
- thinking=thinking_payload, # type: ignore[arg-type]
303
+ logger.debug(
304
+ "[anthropic_client] Request parameters",
305
+ extra={
306
+ "model": model_profile.model,
307
+ "request_kwargs": json.dumps(
308
+ {k: v for k, v in request_kwargs.items() if k != "messages"},
309
+ ensure_ascii=False,
310
+ default=str,
311
+ )[:1000],
312
+ "thinking_payload": json.dumps(thinking_payload, ensure_ascii=False)
313
+ if thinking_payload
314
+ else None,
315
+ },
316
+ )
317
+
318
+ async with await self._client(anthropic_kwargs) as client:
319
+ if stream:
320
+ # Streaming mode: use event-based streaming with per-token timeout
321
+ content_blocks, usage_tokens = await self._stream_request(
322
+ client=client,
323
+ request_kwargs=request_kwargs,
324
+ progress_callback=progress_callback,
325
+ request_timeout=request_timeout,
326
+ max_retries=max_retries,
327
+ response_metadata=response_metadata,
172
328
  )
173
- stream_resp = (
174
- await asyncio.wait_for(stream_cm.__aenter__(), timeout=request_timeout)
175
- if request_timeout and request_timeout > 0
176
- else await stream_cm.__aenter__()
329
+ else:
330
+ # Non-streaming mode: single request with overall timeout
331
+ content_blocks, usage_tokens = await self._non_stream_request(
332
+ client=client,
333
+ request_kwargs=request_kwargs,
334
+ request_timeout=request_timeout,
335
+ max_retries=max_retries,
336
+ response_metadata=response_metadata,
177
337
  )
178
- try:
179
- async for text in iter_with_timeout(stream_resp.text_stream, request_timeout):
180
- if text:
181
- collected_text.append(text)
182
- if progress_callback:
183
- try:
184
- await progress_callback(text)
185
- except (RuntimeError, ValueError, TypeError, OSError) as cb_exc:
186
- logger.warning(
187
- "[anthropic_client] Stream callback failed: %s: %s",
188
- type(cb_exc).__name__, cb_exc,
189
- )
190
- getter = getattr(stream_resp, "get_final_response", None) or getattr(
191
- stream_resp, "get_final_message", None
192
- )
193
- if getter:
194
- return await getter()
195
- return None
196
- finally:
197
- await stream_cm.__aexit__(None, None, None)
198
-
199
- async def _non_stream_request() -> Any:
200
- return await client.messages.create(
201
- model=model_profile.model,
202
- max_tokens=model_profile.max_tokens,
203
- system=system_prompt,
204
- messages=normalized_messages, # type: ignore[arg-type]
205
- tools=tool_schemas if tool_schemas else None, # type: ignore
206
- temperature=model_profile.temperature,
207
- thinking=thinking_payload, # type: ignore[arg-type]
208
- )
209
-
210
- timeout_for_call = None if stream else request_timeout
211
- response = await call_with_timeout_and_retries(
212
- _stream_request if stream else _non_stream_request,
213
- timeout_for_call,
214
- max_retries,
215
- )
216
338
 
217
339
  duration_ms = (time.time() - start_time) * 1000
218
- usage_tokens = anthropic_usage_tokens(getattr(response, "usage", None))
219
340
  cost_usd = estimate_cost_usd(model_profile, usage_tokens)
220
341
  record_usage(
221
342
  model_profile.model, duration_ms=duration_ms, cost_usd=cost_usd, **usage_tokens
222
343
  )
223
344
 
224
- content_blocks = content_blocks_from_anthropic_response(response, tool_mode)
225
- for blk in content_blocks:
226
- if blk.get("type") == "thinking":
227
- thinking_text = blk.get("thinking") or blk.get("text") or ""
228
- if thinking_text:
229
- reasoning_parts.append(str(thinking_text))
230
- if reasoning_parts:
231
- response_metadata["reasoning_content"] = "\n".join(reasoning_parts)
232
- # Streaming progress is handled via text_stream; final content retains thinking blocks.
345
+ logger.debug(
346
+ "[anthropic_client] Response content blocks",
347
+ extra={
348
+ "model": model_profile.model,
349
+ "content_blocks": json.dumps(content_blocks, ensure_ascii=False)[:1000],
350
+ "usage_tokens": json.dumps(usage_tokens, ensure_ascii=False),
351
+ "metadata": json.dumps(response_metadata, ensure_ascii=False)[:500],
352
+ },
353
+ )
233
354
 
234
355
  logger.info(
235
356
  "[anthropic_client] Response received",
@@ -238,6 +359,8 @@ class AnthropicClient(ProviderClient):
238
359
  "duration_ms": round(duration_ms, 2),
239
360
  "tool_mode": tool_mode,
240
361
  "tool_schemas": len(tool_schemas),
362
+ "stream": stream,
363
+ "content_blocks": len(content_blocks),
241
364
  },
242
365
  )
243
366
 
@@ -248,3 +371,360 @@ class AnthropicClient(ProviderClient):
248
371
  duration_ms=duration_ms,
249
372
  metadata=response_metadata,
250
373
  )
374
+
375
+ async def _stream_request(
376
+ self,
377
+ *,
378
+ client: AsyncAnthropic,
379
+ request_kwargs: Dict[str, Any],
380
+ progress_callback: Optional[ProgressCallback],
381
+ request_timeout: Optional[float],
382
+ max_retries: int,
383
+ response_metadata: Dict[str, Any],
384
+ ) -> tuple[List[Dict[str, Any]], Dict[str, int]]:
385
+ """Execute a streaming request with per-token timeout.
386
+
387
+ Uses Anthropic's event-based streaming API to capture:
388
+ - thinking tokens (streamed in real-time)
389
+ - text tokens (streamed in real-time)
390
+ - tool_use blocks
391
+
392
+ In streaming mode:
393
+ - Connection timeout uses request_timeout
394
+ - Per-event timeout is disabled (None) because the model may take
395
+ a long time to generate the first token (especially during thinking)
396
+ - Once streaming starts, events should flow continuously
397
+ """
398
+ collected_text: List[str] = []
399
+ collected_thinking: List[str] = []
400
+ collected_tool_calls: Dict[int, Dict[str, Any]] = {}
401
+ usage_tokens: Dict[str, int] = {}
402
+
403
+ # Use mutable containers to track state across event handling
404
+ current_block_index_ref: List[int] = [-1]
405
+ current_block_type_ref: List[Optional[str]] = [None]
406
+
407
+ event_count = 0
408
+ message_stop_received = False
409
+
410
+ async def _do_stream() -> None:
411
+ nonlocal event_count, message_stop_received
412
+ event_count = 0
413
+ message_stop_received = False
414
+
415
+ logger.debug(
416
+ "[anthropic_client] Initiating stream request",
417
+ extra={
418
+ "model": request_kwargs.get("model"),
419
+ },
420
+ )
421
+
422
+ # Create the stream - this initiates the connection
423
+ stream = client.messages.stream(**request_kwargs)
424
+
425
+ # Enter the stream context
426
+ stream_manager = await stream.__aenter__()
427
+
428
+ try:
429
+ # Iterate over events
430
+ # Some API proxies don't properly close the stream after message_stop,
431
+ # so we break out of the loop when we receive message_stop
432
+ async for event in stream_manager:
433
+ event_count += 1
434
+ event_type = getattr(event, "type", "unknown")
435
+
436
+ await self._handle_stream_event(
437
+ event=event,
438
+ collected_text=collected_text,
439
+ collected_thinking=collected_thinking,
440
+ collected_tool_calls=collected_tool_calls,
441
+ usage_tokens=usage_tokens,
442
+ progress_callback=progress_callback,
443
+ current_block_index_ref=current_block_index_ref,
444
+ current_block_type_ref=current_block_type_ref,
445
+ )
446
+
447
+ # Check if we received message_stop - break out of loop
448
+ # Some API proxies don't properly close the SSE stream
449
+ if event_type == "message_stop":
450
+ message_stop_received = True
451
+ break
452
+
453
+ except Exception:
454
+ raise
455
+ finally:
456
+ try:
457
+ # Use timeout for __aexit__ in case the stream doesn't close properly
458
+ await asyncio.wait_for(stream.__aexit__(None, None, None), timeout=5.0)
459
+ except asyncio.TimeoutError:
460
+ pass # Stream didn't close properly, continue anyway
461
+ except Exception:
462
+ pass # Ignore __aexit__ errors
463
+
464
+ # For streaming, we don't use call_with_timeout_and_retries on the whole operation
465
+ # Instead, timeout is applied per-event inside _iter_events_with_timeout
466
+ # But we still want retries for connection failures
467
+ attempts = max(0, int(max_retries)) + 1
468
+ last_error: Optional[Exception] = None
469
+
470
+ for attempt in range(1, attempts + 1):
471
+ try:
472
+ # Reset state for retry
473
+ collected_text.clear()
474
+ collected_thinking.clear()
475
+ collected_tool_calls.clear()
476
+ usage_tokens.clear()
477
+ current_block_index_ref[0] = -1
478
+ current_block_type_ref[0] = None
479
+
480
+ await _do_stream()
481
+ break # Success
482
+ except asyncio.TimeoutError as exc:
483
+ last_error = exc
484
+ if attempt == attempts:
485
+ break
486
+ delay = 0.5 * (2 ** (attempt - 1)) # Exponential backoff
487
+ logger.warning(
488
+ "[anthropic_client] Stream timed out; retrying",
489
+ extra={
490
+ "attempt": attempt,
491
+ "max_retries": max_retries,
492
+ "delay_seconds": delay,
493
+ },
494
+ )
495
+ await asyncio.sleep(delay)
496
+ except asyncio.CancelledError:
497
+ raise
498
+ except (RuntimeError, ValueError, TypeError, OSError, ConnectionError) as exc:
499
+ # Non-timeout errors: retry for connection errors only
500
+ if isinstance(exc, (OSError, ConnectionError)):
501
+ last_error = exc
502
+ if attempt == attempts:
503
+ raise
504
+ delay = 0.5 * (2 ** (attempt - 1))
505
+ logger.warning(
506
+ "[anthropic_client] Connection error; retrying",
507
+ extra={
508
+ "attempt": attempt,
509
+ "error": str(exc),
510
+ },
511
+ )
512
+ await asyncio.sleep(delay)
513
+ else:
514
+ raise
515
+
516
+ if (
517
+ last_error
518
+ and not collected_text
519
+ and not collected_thinking
520
+ and not collected_tool_calls
521
+ ):
522
+ raise RuntimeError(f"Stream failed after {attempts} attempts") from last_error
523
+
524
+ # Store reasoning content in metadata
525
+ if collected_thinking:
526
+ response_metadata["reasoning_content"] = "".join(collected_thinking)
527
+
528
+ content_blocks = _content_blocks_from_stream_state(
529
+ collected_text, collected_thinking, collected_tool_calls
530
+ )
531
+
532
+ return content_blocks, usage_tokens
533
+
534
+ async def _handle_stream_event(
535
+ self,
536
+ *,
537
+ event: Any,
538
+ collected_text: List[str],
539
+ collected_thinking: List[str],
540
+ collected_tool_calls: Dict[int, Dict[str, Any]],
541
+ usage_tokens: Dict[str, int],
542
+ progress_callback: Optional[ProgressCallback],
543
+ current_block_index_ref: List[int],
544
+ current_block_type_ref: List[Optional[str]],
545
+ ) -> None:
546
+ """Handle a single stream event.
547
+
548
+ Supports both standard Anthropic API events and non-standard formats
549
+ from API proxies like aiping.cn.
550
+
551
+ Standard Anthropic events:
552
+ - message_start, content_block_start, content_block_delta, content_block_stop
553
+ - message_delta, message_stop
554
+
555
+ Non-standard events (aiping.cn style):
556
+ - thinking (direct thinking content)
557
+ - text (direct text content)
558
+ - signature (thinking signature)
559
+ """
560
+ event_type = getattr(event, "type", None)
561
+
562
+ if event_type == "message_start":
563
+ # Extract initial usage info if available
564
+ message = getattr(event, "message", None)
565
+ if message:
566
+ usage = getattr(message, "usage", None)
567
+ if usage:
568
+ usage_tokens.update(anthropic_usage_tokens(usage))
569
+
570
+ elif event_type == "content_block_start":
571
+ # New content block starting
572
+ index = getattr(event, "index", 0)
573
+ content_block = getattr(event, "content_block", None)
574
+ if content_block:
575
+ block_type = getattr(content_block, "type", None)
576
+ current_block_index_ref[0] = index
577
+ current_block_type_ref[0] = block_type
578
+
579
+ if block_type == "tool_use":
580
+ # Initialize tool call state
581
+ collected_tool_calls[index] = {
582
+ "id": getattr(content_block, "id", None),
583
+ "name": getattr(content_block, "name", None),
584
+ "input_json": "",
585
+ "input": {},
586
+ }
587
+ # Announce tool start
588
+ if progress_callback:
589
+ tool_name = getattr(content_block, "name", "unknown")
590
+ try:
591
+ await progress_callback(f"[tool:{tool_name}]")
592
+ except (RuntimeError, ValueError, TypeError, OSError):
593
+ pass
594
+
595
+ elif event_type == "content_block_delta":
596
+ # Content delta within a block
597
+ index = getattr(event, "index", current_block_index_ref[0])
598
+ delta = getattr(event, "delta", None)
599
+ if not delta:
600
+ return
601
+
602
+ delta_type = getattr(delta, "type", None)
603
+
604
+ if delta_type == "thinking_delta":
605
+ # Thinking content delta
606
+ thinking_text = getattr(delta, "thinking", "")
607
+ if thinking_text:
608
+ collected_thinking.append(thinking_text)
609
+ if progress_callback:
610
+ try:
611
+ await progress_callback(thinking_text)
612
+ except (RuntimeError, ValueError, TypeError, OSError) as cb_exc:
613
+ logger.warning(
614
+ "[anthropic_client] Progress callback failed: %s: %s",
615
+ type(cb_exc).__name__,
616
+ cb_exc,
617
+ )
618
+
619
+ elif delta_type == "text_delta":
620
+ # Text content delta
621
+ text = getattr(delta, "text", "")
622
+ if text:
623
+ collected_text.append(text)
624
+ if progress_callback:
625
+ try:
626
+ await progress_callback(text)
627
+ except (RuntimeError, ValueError, TypeError, OSError) as cb_exc:
628
+ logger.warning(
629
+ "[anthropic_client] Progress callback failed: %s: %s",
630
+ type(cb_exc).__name__,
631
+ cb_exc,
632
+ )
633
+
634
+ elif delta_type == "input_json_delta":
635
+ # Tool input JSON delta
636
+ partial_json = getattr(delta, "partial_json", "")
637
+ if partial_json and index in collected_tool_calls:
638
+ collected_tool_calls[index]["input_json"] += partial_json
639
+ if progress_callback:
640
+ try:
641
+ await progress_callback(partial_json)
642
+ except (RuntimeError, ValueError, TypeError, OSError):
643
+ pass
644
+
645
+ # ===== Non-standard events from aiping.cn and similar proxies =====
646
+ # NOTE: aiping.cn sends BOTH standard (content_block_delta) and non-standard
647
+ # (text, thinking) events. We only process the non-standard events if we
648
+ # haven't already collected content from standard events in this block.
649
+ # This is controlled by checking if the standard delta was processed.
650
+
651
+ elif event_type == "thinking":
652
+ # Direct thinking content (non-standard, aiping.cn style)
653
+ # Skip - already handled via content_block_delta (aiping.cn sends both)
654
+ pass
655
+
656
+ elif event_type == "text":
657
+ # Direct text content (non-standard, aiping.cn style)
658
+ # Skip - already handled via content_block_delta (aiping.cn sends both)
659
+ pass
660
+
661
+ elif event_type == "signature":
662
+ # Thinking signature (non-standard, aiping.cn style)
663
+ pass
664
+
665
+ # ===== Standard events continued =====
666
+
667
+ elif event_type == "content_block_stop":
668
+ # Content block finished
669
+ index = getattr(event, "index", current_block_index_ref[0])
670
+
671
+ # Parse accumulated JSON for tool calls
672
+ if index in collected_tool_calls:
673
+ import json
674
+
675
+ json_str = collected_tool_calls[index].get("input_json", "")
676
+ if json_str:
677
+ try:
678
+ collected_tool_calls[index]["input"] = json.loads(json_str)
679
+ except json.JSONDecodeError:
680
+ logger.warning(
681
+ "[anthropic_client] Failed to parse tool input JSON",
682
+ extra={"json": json_str[:200]},
683
+ )
684
+ collected_tool_calls[index]["input"] = {}
685
+
686
+ elif event_type == "message_delta":
687
+ # Message-level delta (usually contains usage info at the end)
688
+ usage = getattr(event, "usage", None)
689
+ if usage:
690
+ # Update with final usage - output_tokens comes here
691
+ usage_tokens["output_tokens"] = getattr(usage, "output_tokens", 0)
692
+
693
+ elif event_type == "message_stop":
694
+ # Message complete
695
+ pass
696
+
697
+ # Unknown event types are silently ignored
698
+
699
+ async def _non_stream_request(
700
+ self,
701
+ *,
702
+ client: AsyncAnthropic,
703
+ request_kwargs: Dict[str, Any],
704
+ request_timeout: Optional[float],
705
+ max_retries: int,
706
+ response_metadata: Dict[str, Any],
707
+ ) -> tuple[List[Dict[str, Any]], Dict[str, int]]:
708
+ """Execute a non-streaming request with overall timeout."""
709
+
710
+ async def _do_request() -> Any:
711
+ return await client.messages.create(**request_kwargs)
712
+
713
+ response = await call_with_timeout_and_retries(
714
+ _do_request,
715
+ request_timeout,
716
+ max_retries,
717
+ )
718
+
719
+ usage_tokens = anthropic_usage_tokens(getattr(response, "usage", None))
720
+ content_blocks = _content_blocks_from_response(response)
721
+
722
+ # Extract reasoning content for metadata
723
+ for block in content_blocks:
724
+ if block.get("type") == "thinking":
725
+ thinking_text = block.get("thinking") or ""
726
+ if thinking_text:
727
+ response_metadata["reasoning_content"] = thinking_text
728
+ break
729
+
730
+ return content_blocks, usage_tokens