ripperdoc 0.2.6__py3-none-any.whl → 0.2.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. ripperdoc/__init__.py +1 -1
  2. ripperdoc/cli/cli.py +5 -0
  3. ripperdoc/cli/commands/__init__.py +71 -6
  4. ripperdoc/cli/commands/clear_cmd.py +1 -0
  5. ripperdoc/cli/commands/exit_cmd.py +1 -1
  6. ripperdoc/cli/commands/help_cmd.py +11 -1
  7. ripperdoc/cli/commands/hooks_cmd.py +636 -0
  8. ripperdoc/cli/commands/permissions_cmd.py +36 -34
  9. ripperdoc/cli/commands/resume_cmd.py +71 -37
  10. ripperdoc/cli/ui/file_mention_completer.py +276 -0
  11. ripperdoc/cli/ui/helpers.py +100 -3
  12. ripperdoc/cli/ui/interrupt_handler.py +175 -0
  13. ripperdoc/cli/ui/message_display.py +249 -0
  14. ripperdoc/cli/ui/panels.py +63 -0
  15. ripperdoc/cli/ui/rich_ui.py +233 -648
  16. ripperdoc/cli/ui/tool_renderers.py +2 -2
  17. ripperdoc/core/agents.py +4 -4
  18. ripperdoc/core/custom_commands.py +411 -0
  19. ripperdoc/core/hooks/__init__.py +99 -0
  20. ripperdoc/core/hooks/config.py +303 -0
  21. ripperdoc/core/hooks/events.py +540 -0
  22. ripperdoc/core/hooks/executor.py +498 -0
  23. ripperdoc/core/hooks/integration.py +353 -0
  24. ripperdoc/core/hooks/manager.py +720 -0
  25. ripperdoc/core/providers/anthropic.py +476 -69
  26. ripperdoc/core/query.py +61 -4
  27. ripperdoc/core/query_utils.py +1 -1
  28. ripperdoc/core/tool.py +1 -1
  29. ripperdoc/tools/bash_tool.py +5 -5
  30. ripperdoc/tools/file_edit_tool.py +2 -2
  31. ripperdoc/tools/file_read_tool.py +2 -2
  32. ripperdoc/tools/multi_edit_tool.py +1 -1
  33. ripperdoc/utils/conversation_compaction.py +476 -0
  34. ripperdoc/utils/message_compaction.py +109 -154
  35. ripperdoc/utils/message_formatting.py +216 -0
  36. ripperdoc/utils/messages.py +31 -9
  37. ripperdoc/utils/path_ignore.py +3 -4
  38. ripperdoc/utils/session_history.py +19 -7
  39. {ripperdoc-0.2.6.dist-info → ripperdoc-0.2.8.dist-info}/METADATA +24 -3
  40. {ripperdoc-0.2.6.dist-info → ripperdoc-0.2.8.dist-info}/RECORD +44 -30
  41. {ripperdoc-0.2.6.dist-info → ripperdoc-0.2.8.dist-info}/WHEEL +0 -0
  42. {ripperdoc-0.2.6.dist-info → ripperdoc-0.2.8.dist-info}/entry_points.txt +0 -0
  43. {ripperdoc-0.2.6.dist-info → ripperdoc-0.2.8.dist-info}/licenses/LICENSE +0 -0
  44. {ripperdoc-0.2.6.dist-info → ripperdoc-0.2.8.dist-info}/top_level.txt +0 -0
@@ -5,8 +5,10 @@ from __future__ import annotations
5
5
  import asyncio
6
6
  import time
7
7
  from typing import Any, Awaitable, Callable, Dict, List, Optional
8
+ from uuid import uuid4
8
9
 
9
10
  import anthropic
11
+ import httpx
10
12
  from anthropic import AsyncAnthropic
11
13
 
12
14
  from ripperdoc.core.config import ModelProfile
@@ -15,13 +17,11 @@ from ripperdoc.core.providers.base import (
15
17
  ProviderClient,
16
18
  ProviderResponse,
17
19
  call_with_timeout_and_retries,
18
- iter_with_timeout,
19
20
  sanitize_tool_history,
20
21
  )
21
22
  from ripperdoc.core.query_utils import (
22
23
  anthropic_usage_tokens,
23
24
  build_anthropic_tool_schemas,
24
- content_blocks_from_anthropic_response,
25
25
  estimate_cost_usd,
26
26
  )
27
27
  from ripperdoc.core.tool import Tool
@@ -63,8 +63,87 @@ def _classify_anthropic_error(exc: Exception) -> tuple[str, str]:
63
63
  return "unknown_error", f"Unexpected error ({exc_type}): {exc_msg}"
64
64
 
65
65
 
66
+ def _content_blocks_from_stream_state(
67
+ collected_text: List[str],
68
+ collected_thinking: List[str],
69
+ collected_tool_calls: Dict[int, Dict[str, Any]],
70
+ ) -> List[Dict[str, Any]]:
71
+ """Build content blocks from accumulated stream state."""
72
+ blocks: List[Dict[str, Any]] = []
73
+
74
+ # Add thinking block if present
75
+ if collected_thinking:
76
+ blocks.append({
77
+ "type": "thinking",
78
+ "thinking": "".join(collected_thinking),
79
+ })
80
+
81
+ # Add text block if present
82
+ if collected_text:
83
+ blocks.append({
84
+ "type": "text",
85
+ "text": "".join(collected_text),
86
+ })
87
+
88
+ # Add tool_use blocks
89
+ for idx in sorted(collected_tool_calls.keys()):
90
+ call = collected_tool_calls[idx]
91
+ name = call.get("name")
92
+ if not name:
93
+ continue
94
+ tool_use_id = call.get("id") or str(uuid4())
95
+ blocks.append({
96
+ "type": "tool_use",
97
+ "tool_use_id": tool_use_id,
98
+ "name": name,
99
+ "input": call.get("input", {}),
100
+ })
101
+
102
+ return blocks
103
+
104
+
105
+ def _content_blocks_from_response(response: Any) -> List[Dict[str, Any]]:
106
+ """Normalize Anthropic response content to our internal block format."""
107
+ blocks: List[Dict[str, Any]] = []
108
+ for block in getattr(response, "content", []) or []:
109
+ btype = getattr(block, "type", None)
110
+ if btype == "text":
111
+ blocks.append({"type": "text", "text": getattr(block, "text", "")})
112
+ elif btype == "thinking":
113
+ blocks.append({
114
+ "type": "thinking",
115
+ "thinking": getattr(block, "thinking", None) or "",
116
+ "signature": getattr(block, "signature", None),
117
+ })
118
+ elif btype == "redacted_thinking":
119
+ blocks.append({
120
+ "type": "redacted_thinking",
121
+ "data": getattr(block, "data", None),
122
+ "signature": getattr(block, "signature", None),
123
+ })
124
+ elif btype == "tool_use":
125
+ raw_input = getattr(block, "input", {}) or {}
126
+ blocks.append({
127
+ "type": "tool_use",
128
+ "tool_use_id": getattr(block, "id", None) or str(uuid4()),
129
+ "name": getattr(block, "name", None),
130
+ "input": raw_input if isinstance(raw_input, dict) else {},
131
+ })
132
+ return blocks
133
+
134
+
66
135
  class AnthropicClient(ProviderClient):
67
- """Anthropic client with streaming and non-streaming support."""
136
+ """Anthropic client with streaming and non-streaming support.
137
+
138
+ Streaming mode (default):
139
+ - Uses event-based streaming to capture both thinking and text tokens
140
+ - Timeout applies per-token (chunk), not to the entire request
141
+ - Thinking tokens are streamed in real-time via progress_callback
142
+
143
+ Non-streaming mode:
144
+ - Makes a single blocking request
145
+ - Timeout applies to the entire request
146
+ """
68
147
 
69
148
  def __init__(self, client_factory: Optional[Callable[[], Awaitable[AsyncAnthropic]]] = None):
70
149
  self._client_factory = client_factory
@@ -141,96 +220,80 @@ class AnthropicClient(ProviderClient):
141
220
  ) -> ProviderResponse:
142
221
  """Internal implementation of call, may raise exceptions."""
143
222
  tool_schemas = await build_anthropic_tool_schemas(tools)
144
- collected_text: List[str] = []
145
- reasoning_parts: List[str] = []
146
223
  response_metadata: Dict[str, Any] = {}
147
224
 
148
- anthropic_kwargs = {"base_url": model_profile.api_base}
225
+ anthropic_kwargs: Dict[str, Any] = {}
226
+ if model_profile.api_base:
227
+ anthropic_kwargs["base_url"] = model_profile.api_base
149
228
  if model_profile.api_key:
150
229
  anthropic_kwargs["api_key"] = model_profile.api_key
151
230
  auth_token = getattr(model_profile, "auth_token", None)
152
231
  if auth_token:
153
232
  anthropic_kwargs["auth_token"] = auth_token
154
233
 
234
+ # Set timeout for the Anthropic SDK client
235
+ # For streaming, we want a long timeout since models may take time to start responding
236
+ # httpx.Timeout: (connect, read, write, pool)
237
+ if stream:
238
+ # For streaming: long read timeout, reasonable connect timeout
239
+ # The read timeout applies to waiting for each chunk from the server
240
+ timeout_config = httpx.Timeout(
241
+ connect=60.0, # 60 seconds to establish connection
242
+ read=600.0, # 10 minutes to wait for each chunk (model may be thinking)
243
+ write=60.0, # 60 seconds to send request
244
+ pool=60.0, # 60 seconds to get connection from pool
245
+ )
246
+ anthropic_kwargs["timeout"] = timeout_config
247
+ elif request_timeout and request_timeout > 0:
248
+ # For non-streaming: use the provided timeout
249
+ anthropic_kwargs["timeout"] = request_timeout
250
+
155
251
  normalized_messages = sanitize_tool_history(list(normalized_messages))
156
252
 
157
253
  thinking_payload: Optional[Dict[str, Any]] = None
158
254
  if max_thinking_tokens > 0:
159
255
  thinking_payload = {"type": "enabled", "budget_tokens": max_thinking_tokens}
160
256
 
161
- async with await self._client(anthropic_kwargs) as client:
257
+ # Build common request kwargs
258
+ request_kwargs: Dict[str, Any] = {
259
+ "model": model_profile.model,
260
+ "max_tokens": model_profile.max_tokens,
261
+ "system": system_prompt,
262
+ "messages": normalized_messages,
263
+ "temperature": model_profile.temperature,
264
+ }
265
+ if tool_schemas:
266
+ request_kwargs["tools"] = tool_schemas
267
+ if thinking_payload:
268
+ request_kwargs["thinking"] = thinking_payload
162
269
 
163
- async def _stream_request() -> Any:
164
- stream_cm = client.messages.stream(
165
- model=model_profile.model,
166
- max_tokens=model_profile.max_tokens,
167
- system=system_prompt,
168
- messages=normalized_messages, # type: ignore[arg-type]
169
- tools=tool_schemas if tool_schemas else None, # type: ignore
170
- temperature=model_profile.temperature,
171
- thinking=thinking_payload, # type: ignore[arg-type]
172
- )
173
- stream_resp = (
174
- await asyncio.wait_for(stream_cm.__aenter__(), timeout=request_timeout)
175
- if request_timeout and request_timeout > 0
176
- else await stream_cm.__aenter__()
270
+ async with await self._client(anthropic_kwargs) as client:
271
+ if stream:
272
+ # Streaming mode: use event-based streaming with per-token timeout
273
+ content_blocks, usage_tokens = await self._stream_request(
274
+ client=client,
275
+ request_kwargs=request_kwargs,
276
+ progress_callback=progress_callback,
277
+ request_timeout=request_timeout,
278
+ max_retries=max_retries,
279
+ response_metadata=response_metadata,
177
280
  )
178
- try:
179
- async for text in iter_with_timeout(stream_resp.text_stream, request_timeout):
180
- if text:
181
- collected_text.append(text)
182
- if progress_callback:
183
- try:
184
- await progress_callback(text)
185
- except (RuntimeError, ValueError, TypeError, OSError) as cb_exc:
186
- logger.warning(
187
- "[anthropic_client] Stream callback failed: %s: %s",
188
- type(cb_exc).__name__, cb_exc,
189
- )
190
- getter = getattr(stream_resp, "get_final_response", None) or getattr(
191
- stream_resp, "get_final_message", None
192
- )
193
- if getter:
194
- return await getter()
195
- return None
196
- finally:
197
- await stream_cm.__aexit__(None, None, None)
198
-
199
- async def _non_stream_request() -> Any:
200
- return await client.messages.create(
201
- model=model_profile.model,
202
- max_tokens=model_profile.max_tokens,
203
- system=system_prompt,
204
- messages=normalized_messages, # type: ignore[arg-type]
205
- tools=tool_schemas if tool_schemas else None, # type: ignore
206
- temperature=model_profile.temperature,
207
- thinking=thinking_payload, # type: ignore[arg-type]
281
+ else:
282
+ # Non-streaming mode: single request with overall timeout
283
+ content_blocks, usage_tokens = await self._non_stream_request(
284
+ client=client,
285
+ request_kwargs=request_kwargs,
286
+ request_timeout=request_timeout,
287
+ max_retries=max_retries,
288
+ response_metadata=response_metadata,
208
289
  )
209
290
 
210
- timeout_for_call = None if stream else request_timeout
211
- response = await call_with_timeout_and_retries(
212
- _stream_request if stream else _non_stream_request,
213
- timeout_for_call,
214
- max_retries,
215
- )
216
-
217
291
  duration_ms = (time.time() - start_time) * 1000
218
- usage_tokens = anthropic_usage_tokens(getattr(response, "usage", None))
219
292
  cost_usd = estimate_cost_usd(model_profile, usage_tokens)
220
293
  record_usage(
221
294
  model_profile.model, duration_ms=duration_ms, cost_usd=cost_usd, **usage_tokens
222
295
  )
223
296
 
224
- content_blocks = content_blocks_from_anthropic_response(response, tool_mode)
225
- for blk in content_blocks:
226
- if blk.get("type") == "thinking":
227
- thinking_text = blk.get("thinking") or blk.get("text") or ""
228
- if thinking_text:
229
- reasoning_parts.append(str(thinking_text))
230
- if reasoning_parts:
231
- response_metadata["reasoning_content"] = "\n".join(reasoning_parts)
232
- # Streaming progress is handled via text_stream; final content retains thinking blocks.
233
-
234
297
  logger.info(
235
298
  "[anthropic_client] Response received",
236
299
  extra={
@@ -238,6 +301,8 @@ class AnthropicClient(ProviderClient):
238
301
  "duration_ms": round(duration_ms, 2),
239
302
  "tool_mode": tool_mode,
240
303
  "tool_schemas": len(tool_schemas),
304
+ "stream": stream,
305
+ "content_blocks": len(content_blocks),
241
306
  },
242
307
  )
243
308
 
@@ -248,3 +313,345 @@ class AnthropicClient(ProviderClient):
248
313
  duration_ms=duration_ms,
249
314
  metadata=response_metadata,
250
315
  )
316
+
317
+ async def _stream_request(
318
+ self,
319
+ *,
320
+ client: AsyncAnthropic,
321
+ request_kwargs: Dict[str, Any],
322
+ progress_callback: Optional[ProgressCallback],
323
+ request_timeout: Optional[float],
324
+ max_retries: int,
325
+ response_metadata: Dict[str, Any],
326
+ ) -> tuple[List[Dict[str, Any]], Dict[str, int]]:
327
+ """Execute a streaming request with per-token timeout.
328
+
329
+ Uses Anthropic's event-based streaming API to capture:
330
+ - thinking tokens (streamed in real-time)
331
+ - text tokens (streamed in real-time)
332
+ - tool_use blocks
333
+
334
+ In streaming mode:
335
+ - Connection timeout uses request_timeout
336
+ - Per-event timeout is disabled (None) because the model may take
337
+ a long time to generate the first token (especially during thinking)
338
+ - Once streaming starts, events should flow continuously
339
+ """
340
+ collected_text: List[str] = []
341
+ collected_thinking: List[str] = []
342
+ collected_tool_calls: Dict[int, Dict[str, Any]] = {}
343
+ usage_tokens: Dict[str, int] = {}
344
+
345
+ # Use mutable containers to track state across event handling
346
+ current_block_index_ref: List[int] = [-1]
347
+ current_block_type_ref: List[Optional[str]] = [None]
348
+
349
+ event_count = 0
350
+ message_stop_received = False
351
+
352
+ async def _do_stream() -> None:
353
+ nonlocal event_count, message_stop_received
354
+ event_count = 0
355
+ message_stop_received = False
356
+
357
+ # Create the stream - this initiates the connection
358
+ stream = client.messages.stream(**request_kwargs)
359
+
360
+ # Enter the stream context
361
+ stream_manager = await stream.__aenter__()
362
+
363
+ try:
364
+ # Iterate over events
365
+ # Some API proxies don't properly close the stream after message_stop,
366
+ # so we break out of the loop when we receive message_stop
367
+ async for event in stream_manager:
368
+ event_count += 1
369
+ event_type = getattr(event, "type", "unknown")
370
+
371
+ await self._handle_stream_event(
372
+ event=event,
373
+ collected_text=collected_text,
374
+ collected_thinking=collected_thinking,
375
+ collected_tool_calls=collected_tool_calls,
376
+ usage_tokens=usage_tokens,
377
+ progress_callback=progress_callback,
378
+ current_block_index_ref=current_block_index_ref,
379
+ current_block_type_ref=current_block_type_ref,
380
+ )
381
+
382
+ # Check if we received message_stop - break out of loop
383
+ # Some API proxies don't properly close the SSE stream
384
+ if event_type == "message_stop":
385
+ message_stop_received = True
386
+ break
387
+
388
+ except Exception:
389
+ raise
390
+ finally:
391
+ try:
392
+ # Use timeout for __aexit__ in case the stream doesn't close properly
393
+ await asyncio.wait_for(stream.__aexit__(None, None, None), timeout=5.0)
394
+ except asyncio.TimeoutError:
395
+ pass # Stream didn't close properly, continue anyway
396
+ except Exception:
397
+ pass # Ignore __aexit__ errors
398
+
399
+ # For streaming, we don't use call_with_timeout_and_retries on the whole operation
400
+ # Instead, timeout is applied per-event inside _iter_events_with_timeout
401
+ # But we still want retries for connection failures
402
+ attempts = max(0, int(max_retries)) + 1
403
+ last_error: Optional[Exception] = None
404
+
405
+ for attempt in range(1, attempts + 1):
406
+ try:
407
+ # Reset state for retry
408
+ collected_text.clear()
409
+ collected_thinking.clear()
410
+ collected_tool_calls.clear()
411
+ usage_tokens.clear()
412
+ current_block_index_ref[0] = -1
413
+ current_block_type_ref[0] = None
414
+
415
+ await _do_stream()
416
+ break # Success
417
+ except asyncio.TimeoutError as exc:
418
+ last_error = exc
419
+ if attempt == attempts:
420
+ break
421
+ delay = 0.5 * (2 ** (attempt - 1)) # Exponential backoff
422
+ logger.warning(
423
+ "[anthropic_client] Stream timed out; retrying",
424
+ extra={
425
+ "attempt": attempt,
426
+ "max_retries": max_retries,
427
+ "delay_seconds": delay,
428
+ },
429
+ )
430
+ await asyncio.sleep(delay)
431
+ except asyncio.CancelledError:
432
+ raise
433
+ except (RuntimeError, ValueError, TypeError, OSError, ConnectionError) as exc:
434
+ # Non-timeout errors: retry for connection errors only
435
+ if isinstance(exc, (OSError, ConnectionError)):
436
+ last_error = exc
437
+ if attempt == attempts:
438
+ raise
439
+ delay = 0.5 * (2 ** (attempt - 1))
440
+ logger.warning(
441
+ "[anthropic_client] Connection error; retrying",
442
+ extra={
443
+ "attempt": attempt,
444
+ "error": str(exc),
445
+ },
446
+ )
447
+ await asyncio.sleep(delay)
448
+ else:
449
+ raise
450
+
451
+ if last_error and not collected_text and not collected_thinking and not collected_tool_calls:
452
+ raise RuntimeError(f"Stream failed after {attempts} attempts") from last_error
453
+
454
+ # Store reasoning content in metadata
455
+ if collected_thinking:
456
+ response_metadata["reasoning_content"] = "".join(collected_thinking)
457
+
458
+ content_blocks = _content_blocks_from_stream_state(
459
+ collected_text, collected_thinking, collected_tool_calls
460
+ )
461
+
462
+ return content_blocks, usage_tokens
463
+
464
+ async def _handle_stream_event(
465
+ self,
466
+ *,
467
+ event: Any,
468
+ collected_text: List[str],
469
+ collected_thinking: List[str],
470
+ collected_tool_calls: Dict[int, Dict[str, Any]],
471
+ usage_tokens: Dict[str, int],
472
+ progress_callback: Optional[ProgressCallback],
473
+ current_block_index_ref: List[int],
474
+ current_block_type_ref: List[Optional[str]],
475
+ ) -> None:
476
+ """Handle a single stream event.
477
+
478
+ Supports both standard Anthropic API events and non-standard formats
479
+ from API proxies like aiping.cn.
480
+
481
+ Standard Anthropic events:
482
+ - message_start, content_block_start, content_block_delta, content_block_stop
483
+ - message_delta, message_stop
484
+
485
+ Non-standard events (aiping.cn style):
486
+ - thinking (direct thinking content)
487
+ - text (direct text content)
488
+ - signature (thinking signature)
489
+ """
490
+ event_type = getattr(event, "type", None)
491
+
492
+ if event_type == "message_start":
493
+ # Extract initial usage info if available
494
+ message = getattr(event, "message", None)
495
+ if message:
496
+ usage = getattr(message, "usage", None)
497
+ if usage:
498
+ usage_tokens.update(anthropic_usage_tokens(usage))
499
+
500
+ elif event_type == "content_block_start":
501
+ # New content block starting
502
+ index = getattr(event, "index", 0)
503
+ content_block = getattr(event, "content_block", None)
504
+ if content_block:
505
+ block_type = getattr(content_block, "type", None)
506
+ current_block_index_ref[0] = index
507
+ current_block_type_ref[0] = block_type
508
+
509
+ if block_type == "tool_use":
510
+ # Initialize tool call state
511
+ collected_tool_calls[index] = {
512
+ "id": getattr(content_block, "id", None),
513
+ "name": getattr(content_block, "name", None),
514
+ "input_json": "",
515
+ "input": {},
516
+ }
517
+ # Announce tool start
518
+ if progress_callback:
519
+ tool_name = getattr(content_block, "name", "unknown")
520
+ try:
521
+ await progress_callback(f"[tool:{tool_name}]")
522
+ except (RuntimeError, ValueError, TypeError, OSError):
523
+ pass
524
+
525
+ elif event_type == "content_block_delta":
526
+ # Content delta within a block
527
+ index = getattr(event, "index", current_block_index_ref[0])
528
+ delta = getattr(event, "delta", None)
529
+ if not delta:
530
+ return
531
+
532
+ delta_type = getattr(delta, "type", None)
533
+
534
+ if delta_type == "thinking_delta":
535
+ # Thinking content delta
536
+ thinking_text = getattr(delta, "thinking", "")
537
+ if thinking_text:
538
+ collected_thinking.append(thinking_text)
539
+ if progress_callback:
540
+ try:
541
+ await progress_callback(thinking_text)
542
+ except (RuntimeError, ValueError, TypeError, OSError) as cb_exc:
543
+ logger.warning(
544
+ "[anthropic_client] Progress callback failed: %s: %s",
545
+ type(cb_exc).__name__, cb_exc,
546
+ )
547
+
548
+ elif delta_type == "text_delta":
549
+ # Text content delta
550
+ text = getattr(delta, "text", "")
551
+ if text:
552
+ collected_text.append(text)
553
+ if progress_callback:
554
+ try:
555
+ await progress_callback(text)
556
+ except (RuntimeError, ValueError, TypeError, OSError) as cb_exc:
557
+ logger.warning(
558
+ "[anthropic_client] Progress callback failed: %s: %s",
559
+ type(cb_exc).__name__, cb_exc,
560
+ )
561
+
562
+ elif delta_type == "input_json_delta":
563
+ # Tool input JSON delta
564
+ partial_json = getattr(delta, "partial_json", "")
565
+ if partial_json and index in collected_tool_calls:
566
+ collected_tool_calls[index]["input_json"] += partial_json
567
+ if progress_callback:
568
+ try:
569
+ await progress_callback(partial_json)
570
+ except (RuntimeError, ValueError, TypeError, OSError):
571
+ pass
572
+
573
+ # ===== Non-standard events from aiping.cn and similar proxies =====
574
+ # NOTE: aiping.cn sends BOTH standard (content_block_delta) and non-standard
575
+ # (text, thinking) events. We only process the non-standard events if we
576
+ # haven't already collected content from standard events in this block.
577
+ # This is controlled by checking if the standard delta was processed.
578
+
579
+ elif event_type == "thinking":
580
+ # Direct thinking content (non-standard, aiping.cn style)
581
+ # Skip - already handled via content_block_delta (aiping.cn sends both)
582
+ pass
583
+
584
+ elif event_type == "text":
585
+ # Direct text content (non-standard, aiping.cn style)
586
+ # Skip - already handled via content_block_delta (aiping.cn sends both)
587
+ pass
588
+
589
+ elif event_type == "signature":
590
+ # Thinking signature (non-standard, aiping.cn style)
591
+ pass
592
+
593
+ # ===== Standard events continued =====
594
+
595
+ elif event_type == "content_block_stop":
596
+ # Content block finished
597
+ index = getattr(event, "index", current_block_index_ref[0])
598
+
599
+ # Parse accumulated JSON for tool calls
600
+ if index in collected_tool_calls:
601
+ import json
602
+ json_str = collected_tool_calls[index].get("input_json", "")
603
+ if json_str:
604
+ try:
605
+ collected_tool_calls[index]["input"] = json.loads(json_str)
606
+ except json.JSONDecodeError:
607
+ logger.warning(
608
+ "[anthropic_client] Failed to parse tool input JSON",
609
+ extra={"json": json_str[:200]},
610
+ )
611
+ collected_tool_calls[index]["input"] = {}
612
+
613
+ elif event_type == "message_delta":
614
+ # Message-level delta (usually contains usage info at the end)
615
+ usage = getattr(event, "usage", None)
616
+ if usage:
617
+ # Update with final usage - output_tokens comes here
618
+ usage_tokens["output_tokens"] = getattr(usage, "output_tokens", 0)
619
+
620
+ elif event_type == "message_stop":
621
+ # Message complete
622
+ pass
623
+
624
+ # Unknown event types are silently ignored
625
+
626
+ async def _non_stream_request(
627
+ self,
628
+ *,
629
+ client: AsyncAnthropic,
630
+ request_kwargs: Dict[str, Any],
631
+ request_timeout: Optional[float],
632
+ max_retries: int,
633
+ response_metadata: Dict[str, Any],
634
+ ) -> tuple[List[Dict[str, Any]], Dict[str, int]]:
635
+ """Execute a non-streaming request with overall timeout."""
636
+
637
+ async def _do_request() -> Any:
638
+ return await client.messages.create(**request_kwargs)
639
+
640
+ response = await call_with_timeout_and_retries(
641
+ _do_request,
642
+ request_timeout,
643
+ max_retries,
644
+ )
645
+
646
+ usage_tokens = anthropic_usage_tokens(getattr(response, "usage", None))
647
+ content_blocks = _content_blocks_from_response(response)
648
+
649
+ # Extract reasoning content for metadata
650
+ for block in content_blocks:
651
+ if block.get("type") == "thinking":
652
+ thinking_text = block.get("thinking") or ""
653
+ if thinking_text:
654
+ response_metadata["reasoning_content"] = thinking_text
655
+ break
656
+
657
+ return content_blocks, usage_tokens