ripperdoc 0.2.6__py3-none-any.whl → 0.2.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ripperdoc/__init__.py +1 -1
- ripperdoc/cli/cli.py +5 -0
- ripperdoc/cli/commands/__init__.py +71 -6
- ripperdoc/cli/commands/clear_cmd.py +1 -0
- ripperdoc/cli/commands/exit_cmd.py +1 -1
- ripperdoc/cli/commands/help_cmd.py +11 -1
- ripperdoc/cli/commands/hooks_cmd.py +636 -0
- ripperdoc/cli/commands/permissions_cmd.py +36 -34
- ripperdoc/cli/commands/resume_cmd.py +71 -37
- ripperdoc/cli/ui/file_mention_completer.py +276 -0
- ripperdoc/cli/ui/helpers.py +100 -3
- ripperdoc/cli/ui/interrupt_handler.py +175 -0
- ripperdoc/cli/ui/message_display.py +249 -0
- ripperdoc/cli/ui/panels.py +63 -0
- ripperdoc/cli/ui/rich_ui.py +233 -648
- ripperdoc/cli/ui/tool_renderers.py +2 -2
- ripperdoc/core/agents.py +4 -4
- ripperdoc/core/custom_commands.py +411 -0
- ripperdoc/core/hooks/__init__.py +99 -0
- ripperdoc/core/hooks/config.py +303 -0
- ripperdoc/core/hooks/events.py +540 -0
- ripperdoc/core/hooks/executor.py +498 -0
- ripperdoc/core/hooks/integration.py +353 -0
- ripperdoc/core/hooks/manager.py +720 -0
- ripperdoc/core/providers/anthropic.py +476 -69
- ripperdoc/core/query.py +61 -4
- ripperdoc/core/query_utils.py +1 -1
- ripperdoc/core/tool.py +1 -1
- ripperdoc/tools/bash_tool.py +5 -5
- ripperdoc/tools/file_edit_tool.py +2 -2
- ripperdoc/tools/file_read_tool.py +2 -2
- ripperdoc/tools/multi_edit_tool.py +1 -1
- ripperdoc/utils/conversation_compaction.py +476 -0
- ripperdoc/utils/message_compaction.py +109 -154
- ripperdoc/utils/message_formatting.py +216 -0
- ripperdoc/utils/messages.py +31 -9
- ripperdoc/utils/path_ignore.py +3 -4
- ripperdoc/utils/session_history.py +19 -7
- {ripperdoc-0.2.6.dist-info → ripperdoc-0.2.8.dist-info}/METADATA +24 -3
- {ripperdoc-0.2.6.dist-info → ripperdoc-0.2.8.dist-info}/RECORD +44 -30
- {ripperdoc-0.2.6.dist-info → ripperdoc-0.2.8.dist-info}/WHEEL +0 -0
- {ripperdoc-0.2.6.dist-info → ripperdoc-0.2.8.dist-info}/entry_points.txt +0 -0
- {ripperdoc-0.2.6.dist-info → ripperdoc-0.2.8.dist-info}/licenses/LICENSE +0 -0
- {ripperdoc-0.2.6.dist-info → ripperdoc-0.2.8.dist-info}/top_level.txt +0 -0
|
@@ -5,8 +5,10 @@ from __future__ import annotations
|
|
|
5
5
|
import asyncio
|
|
6
6
|
import time
|
|
7
7
|
from typing import Any, Awaitable, Callable, Dict, List, Optional
|
|
8
|
+
from uuid import uuid4
|
|
8
9
|
|
|
9
10
|
import anthropic
|
|
11
|
+
import httpx
|
|
10
12
|
from anthropic import AsyncAnthropic
|
|
11
13
|
|
|
12
14
|
from ripperdoc.core.config import ModelProfile
|
|
@@ -15,13 +17,11 @@ from ripperdoc.core.providers.base import (
|
|
|
15
17
|
ProviderClient,
|
|
16
18
|
ProviderResponse,
|
|
17
19
|
call_with_timeout_and_retries,
|
|
18
|
-
iter_with_timeout,
|
|
19
20
|
sanitize_tool_history,
|
|
20
21
|
)
|
|
21
22
|
from ripperdoc.core.query_utils import (
|
|
22
23
|
anthropic_usage_tokens,
|
|
23
24
|
build_anthropic_tool_schemas,
|
|
24
|
-
content_blocks_from_anthropic_response,
|
|
25
25
|
estimate_cost_usd,
|
|
26
26
|
)
|
|
27
27
|
from ripperdoc.core.tool import Tool
|
|
@@ -63,8 +63,87 @@ def _classify_anthropic_error(exc: Exception) -> tuple[str, str]:
|
|
|
63
63
|
return "unknown_error", f"Unexpected error ({exc_type}): {exc_msg}"
|
|
64
64
|
|
|
65
65
|
|
|
66
|
+
def _content_blocks_from_stream_state(
|
|
67
|
+
collected_text: List[str],
|
|
68
|
+
collected_thinking: List[str],
|
|
69
|
+
collected_tool_calls: Dict[int, Dict[str, Any]],
|
|
70
|
+
) -> List[Dict[str, Any]]:
|
|
71
|
+
"""Build content blocks from accumulated stream state."""
|
|
72
|
+
blocks: List[Dict[str, Any]] = []
|
|
73
|
+
|
|
74
|
+
# Add thinking block if present
|
|
75
|
+
if collected_thinking:
|
|
76
|
+
blocks.append({
|
|
77
|
+
"type": "thinking",
|
|
78
|
+
"thinking": "".join(collected_thinking),
|
|
79
|
+
})
|
|
80
|
+
|
|
81
|
+
# Add text block if present
|
|
82
|
+
if collected_text:
|
|
83
|
+
blocks.append({
|
|
84
|
+
"type": "text",
|
|
85
|
+
"text": "".join(collected_text),
|
|
86
|
+
})
|
|
87
|
+
|
|
88
|
+
# Add tool_use blocks
|
|
89
|
+
for idx in sorted(collected_tool_calls.keys()):
|
|
90
|
+
call = collected_tool_calls[idx]
|
|
91
|
+
name = call.get("name")
|
|
92
|
+
if not name:
|
|
93
|
+
continue
|
|
94
|
+
tool_use_id = call.get("id") or str(uuid4())
|
|
95
|
+
blocks.append({
|
|
96
|
+
"type": "tool_use",
|
|
97
|
+
"tool_use_id": tool_use_id,
|
|
98
|
+
"name": name,
|
|
99
|
+
"input": call.get("input", {}),
|
|
100
|
+
})
|
|
101
|
+
|
|
102
|
+
return blocks
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
def _content_blocks_from_response(response: Any) -> List[Dict[str, Any]]:
|
|
106
|
+
"""Normalize Anthropic response content to our internal block format."""
|
|
107
|
+
blocks: List[Dict[str, Any]] = []
|
|
108
|
+
for block in getattr(response, "content", []) or []:
|
|
109
|
+
btype = getattr(block, "type", None)
|
|
110
|
+
if btype == "text":
|
|
111
|
+
blocks.append({"type": "text", "text": getattr(block, "text", "")})
|
|
112
|
+
elif btype == "thinking":
|
|
113
|
+
blocks.append({
|
|
114
|
+
"type": "thinking",
|
|
115
|
+
"thinking": getattr(block, "thinking", None) or "",
|
|
116
|
+
"signature": getattr(block, "signature", None),
|
|
117
|
+
})
|
|
118
|
+
elif btype == "redacted_thinking":
|
|
119
|
+
blocks.append({
|
|
120
|
+
"type": "redacted_thinking",
|
|
121
|
+
"data": getattr(block, "data", None),
|
|
122
|
+
"signature": getattr(block, "signature", None),
|
|
123
|
+
})
|
|
124
|
+
elif btype == "tool_use":
|
|
125
|
+
raw_input = getattr(block, "input", {}) or {}
|
|
126
|
+
blocks.append({
|
|
127
|
+
"type": "tool_use",
|
|
128
|
+
"tool_use_id": getattr(block, "id", None) or str(uuid4()),
|
|
129
|
+
"name": getattr(block, "name", None),
|
|
130
|
+
"input": raw_input if isinstance(raw_input, dict) else {},
|
|
131
|
+
})
|
|
132
|
+
return blocks
|
|
133
|
+
|
|
134
|
+
|
|
66
135
|
class AnthropicClient(ProviderClient):
|
|
67
|
-
"""Anthropic client with streaming and non-streaming support.
|
|
136
|
+
"""Anthropic client with streaming and non-streaming support.
|
|
137
|
+
|
|
138
|
+
Streaming mode (default):
|
|
139
|
+
- Uses event-based streaming to capture both thinking and text tokens
|
|
140
|
+
- Timeout applies per-token (chunk), not to the entire request
|
|
141
|
+
- Thinking tokens are streamed in real-time via progress_callback
|
|
142
|
+
|
|
143
|
+
Non-streaming mode:
|
|
144
|
+
- Makes a single blocking request
|
|
145
|
+
- Timeout applies to the entire request
|
|
146
|
+
"""
|
|
68
147
|
|
|
69
148
|
def __init__(self, client_factory: Optional[Callable[[], Awaitable[AsyncAnthropic]]] = None):
|
|
70
149
|
self._client_factory = client_factory
|
|
@@ -141,96 +220,80 @@ class AnthropicClient(ProviderClient):
|
|
|
141
220
|
) -> ProviderResponse:
|
|
142
221
|
"""Internal implementation of call, may raise exceptions."""
|
|
143
222
|
tool_schemas = await build_anthropic_tool_schemas(tools)
|
|
144
|
-
collected_text: List[str] = []
|
|
145
|
-
reasoning_parts: List[str] = []
|
|
146
223
|
response_metadata: Dict[str, Any] = {}
|
|
147
224
|
|
|
148
|
-
anthropic_kwargs = {
|
|
225
|
+
anthropic_kwargs: Dict[str, Any] = {}
|
|
226
|
+
if model_profile.api_base:
|
|
227
|
+
anthropic_kwargs["base_url"] = model_profile.api_base
|
|
149
228
|
if model_profile.api_key:
|
|
150
229
|
anthropic_kwargs["api_key"] = model_profile.api_key
|
|
151
230
|
auth_token = getattr(model_profile, "auth_token", None)
|
|
152
231
|
if auth_token:
|
|
153
232
|
anthropic_kwargs["auth_token"] = auth_token
|
|
154
233
|
|
|
234
|
+
# Set timeout for the Anthropic SDK client
|
|
235
|
+
# For streaming, we want a long timeout since models may take time to start responding
|
|
236
|
+
# httpx.Timeout: (connect, read, write, pool)
|
|
237
|
+
if stream:
|
|
238
|
+
# For streaming: long read timeout, reasonable connect timeout
|
|
239
|
+
# The read timeout applies to waiting for each chunk from the server
|
|
240
|
+
timeout_config = httpx.Timeout(
|
|
241
|
+
connect=60.0, # 60 seconds to establish connection
|
|
242
|
+
read=600.0, # 10 minutes to wait for each chunk (model may be thinking)
|
|
243
|
+
write=60.0, # 60 seconds to send request
|
|
244
|
+
pool=60.0, # 60 seconds to get connection from pool
|
|
245
|
+
)
|
|
246
|
+
anthropic_kwargs["timeout"] = timeout_config
|
|
247
|
+
elif request_timeout and request_timeout > 0:
|
|
248
|
+
# For non-streaming: use the provided timeout
|
|
249
|
+
anthropic_kwargs["timeout"] = request_timeout
|
|
250
|
+
|
|
155
251
|
normalized_messages = sanitize_tool_history(list(normalized_messages))
|
|
156
252
|
|
|
157
253
|
thinking_payload: Optional[Dict[str, Any]] = None
|
|
158
254
|
if max_thinking_tokens > 0:
|
|
159
255
|
thinking_payload = {"type": "enabled", "budget_tokens": max_thinking_tokens}
|
|
160
256
|
|
|
161
|
-
|
|
257
|
+
# Build common request kwargs
|
|
258
|
+
request_kwargs: Dict[str, Any] = {
|
|
259
|
+
"model": model_profile.model,
|
|
260
|
+
"max_tokens": model_profile.max_tokens,
|
|
261
|
+
"system": system_prompt,
|
|
262
|
+
"messages": normalized_messages,
|
|
263
|
+
"temperature": model_profile.temperature,
|
|
264
|
+
}
|
|
265
|
+
if tool_schemas:
|
|
266
|
+
request_kwargs["tools"] = tool_schemas
|
|
267
|
+
if thinking_payload:
|
|
268
|
+
request_kwargs["thinking"] = thinking_payload
|
|
162
269
|
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
stream_resp = (
|
|
174
|
-
await asyncio.wait_for(stream_cm.__aenter__(), timeout=request_timeout)
|
|
175
|
-
if request_timeout and request_timeout > 0
|
|
176
|
-
else await stream_cm.__aenter__()
|
|
270
|
+
async with await self._client(anthropic_kwargs) as client:
|
|
271
|
+
if stream:
|
|
272
|
+
# Streaming mode: use event-based streaming with per-token timeout
|
|
273
|
+
content_blocks, usage_tokens = await self._stream_request(
|
|
274
|
+
client=client,
|
|
275
|
+
request_kwargs=request_kwargs,
|
|
276
|
+
progress_callback=progress_callback,
|
|
277
|
+
request_timeout=request_timeout,
|
|
278
|
+
max_retries=max_retries,
|
|
279
|
+
response_metadata=response_metadata,
|
|
177
280
|
)
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
logger.warning(
|
|
187
|
-
"[anthropic_client] Stream callback failed: %s: %s",
|
|
188
|
-
type(cb_exc).__name__, cb_exc,
|
|
189
|
-
)
|
|
190
|
-
getter = getattr(stream_resp, "get_final_response", None) or getattr(
|
|
191
|
-
stream_resp, "get_final_message", None
|
|
192
|
-
)
|
|
193
|
-
if getter:
|
|
194
|
-
return await getter()
|
|
195
|
-
return None
|
|
196
|
-
finally:
|
|
197
|
-
await stream_cm.__aexit__(None, None, None)
|
|
198
|
-
|
|
199
|
-
async def _non_stream_request() -> Any:
|
|
200
|
-
return await client.messages.create(
|
|
201
|
-
model=model_profile.model,
|
|
202
|
-
max_tokens=model_profile.max_tokens,
|
|
203
|
-
system=system_prompt,
|
|
204
|
-
messages=normalized_messages, # type: ignore[arg-type]
|
|
205
|
-
tools=tool_schemas if tool_schemas else None, # type: ignore
|
|
206
|
-
temperature=model_profile.temperature,
|
|
207
|
-
thinking=thinking_payload, # type: ignore[arg-type]
|
|
281
|
+
else:
|
|
282
|
+
# Non-streaming mode: single request with overall timeout
|
|
283
|
+
content_blocks, usage_tokens = await self._non_stream_request(
|
|
284
|
+
client=client,
|
|
285
|
+
request_kwargs=request_kwargs,
|
|
286
|
+
request_timeout=request_timeout,
|
|
287
|
+
max_retries=max_retries,
|
|
288
|
+
response_metadata=response_metadata,
|
|
208
289
|
)
|
|
209
290
|
|
|
210
|
-
timeout_for_call = None if stream else request_timeout
|
|
211
|
-
response = await call_with_timeout_and_retries(
|
|
212
|
-
_stream_request if stream else _non_stream_request,
|
|
213
|
-
timeout_for_call,
|
|
214
|
-
max_retries,
|
|
215
|
-
)
|
|
216
|
-
|
|
217
291
|
duration_ms = (time.time() - start_time) * 1000
|
|
218
|
-
usage_tokens = anthropic_usage_tokens(getattr(response, "usage", None))
|
|
219
292
|
cost_usd = estimate_cost_usd(model_profile, usage_tokens)
|
|
220
293
|
record_usage(
|
|
221
294
|
model_profile.model, duration_ms=duration_ms, cost_usd=cost_usd, **usage_tokens
|
|
222
295
|
)
|
|
223
296
|
|
|
224
|
-
content_blocks = content_blocks_from_anthropic_response(response, tool_mode)
|
|
225
|
-
for blk in content_blocks:
|
|
226
|
-
if blk.get("type") == "thinking":
|
|
227
|
-
thinking_text = blk.get("thinking") or blk.get("text") or ""
|
|
228
|
-
if thinking_text:
|
|
229
|
-
reasoning_parts.append(str(thinking_text))
|
|
230
|
-
if reasoning_parts:
|
|
231
|
-
response_metadata["reasoning_content"] = "\n".join(reasoning_parts)
|
|
232
|
-
# Streaming progress is handled via text_stream; final content retains thinking blocks.
|
|
233
|
-
|
|
234
297
|
logger.info(
|
|
235
298
|
"[anthropic_client] Response received",
|
|
236
299
|
extra={
|
|
@@ -238,6 +301,8 @@ class AnthropicClient(ProviderClient):
|
|
|
238
301
|
"duration_ms": round(duration_ms, 2),
|
|
239
302
|
"tool_mode": tool_mode,
|
|
240
303
|
"tool_schemas": len(tool_schemas),
|
|
304
|
+
"stream": stream,
|
|
305
|
+
"content_blocks": len(content_blocks),
|
|
241
306
|
},
|
|
242
307
|
)
|
|
243
308
|
|
|
@@ -248,3 +313,345 @@ class AnthropicClient(ProviderClient):
|
|
|
248
313
|
duration_ms=duration_ms,
|
|
249
314
|
metadata=response_metadata,
|
|
250
315
|
)
|
|
316
|
+
|
|
317
|
+
async def _stream_request(
|
|
318
|
+
self,
|
|
319
|
+
*,
|
|
320
|
+
client: AsyncAnthropic,
|
|
321
|
+
request_kwargs: Dict[str, Any],
|
|
322
|
+
progress_callback: Optional[ProgressCallback],
|
|
323
|
+
request_timeout: Optional[float],
|
|
324
|
+
max_retries: int,
|
|
325
|
+
response_metadata: Dict[str, Any],
|
|
326
|
+
) -> tuple[List[Dict[str, Any]], Dict[str, int]]:
|
|
327
|
+
"""Execute a streaming request with per-token timeout.
|
|
328
|
+
|
|
329
|
+
Uses Anthropic's event-based streaming API to capture:
|
|
330
|
+
- thinking tokens (streamed in real-time)
|
|
331
|
+
- text tokens (streamed in real-time)
|
|
332
|
+
- tool_use blocks
|
|
333
|
+
|
|
334
|
+
In streaming mode:
|
|
335
|
+
- Connection timeout uses request_timeout
|
|
336
|
+
- Per-event timeout is disabled (None) because the model may take
|
|
337
|
+
a long time to generate the first token (especially during thinking)
|
|
338
|
+
- Once streaming starts, events should flow continuously
|
|
339
|
+
"""
|
|
340
|
+
collected_text: List[str] = []
|
|
341
|
+
collected_thinking: List[str] = []
|
|
342
|
+
collected_tool_calls: Dict[int, Dict[str, Any]] = {}
|
|
343
|
+
usage_tokens: Dict[str, int] = {}
|
|
344
|
+
|
|
345
|
+
# Use mutable containers to track state across event handling
|
|
346
|
+
current_block_index_ref: List[int] = [-1]
|
|
347
|
+
current_block_type_ref: List[Optional[str]] = [None]
|
|
348
|
+
|
|
349
|
+
event_count = 0
|
|
350
|
+
message_stop_received = False
|
|
351
|
+
|
|
352
|
+
async def _do_stream() -> None:
|
|
353
|
+
nonlocal event_count, message_stop_received
|
|
354
|
+
event_count = 0
|
|
355
|
+
message_stop_received = False
|
|
356
|
+
|
|
357
|
+
# Create the stream - this initiates the connection
|
|
358
|
+
stream = client.messages.stream(**request_kwargs)
|
|
359
|
+
|
|
360
|
+
# Enter the stream context
|
|
361
|
+
stream_manager = await stream.__aenter__()
|
|
362
|
+
|
|
363
|
+
try:
|
|
364
|
+
# Iterate over events
|
|
365
|
+
# Some API proxies don't properly close the stream after message_stop,
|
|
366
|
+
# so we break out of the loop when we receive message_stop
|
|
367
|
+
async for event in stream_manager:
|
|
368
|
+
event_count += 1
|
|
369
|
+
event_type = getattr(event, "type", "unknown")
|
|
370
|
+
|
|
371
|
+
await self._handle_stream_event(
|
|
372
|
+
event=event,
|
|
373
|
+
collected_text=collected_text,
|
|
374
|
+
collected_thinking=collected_thinking,
|
|
375
|
+
collected_tool_calls=collected_tool_calls,
|
|
376
|
+
usage_tokens=usage_tokens,
|
|
377
|
+
progress_callback=progress_callback,
|
|
378
|
+
current_block_index_ref=current_block_index_ref,
|
|
379
|
+
current_block_type_ref=current_block_type_ref,
|
|
380
|
+
)
|
|
381
|
+
|
|
382
|
+
# Check if we received message_stop - break out of loop
|
|
383
|
+
# Some API proxies don't properly close the SSE stream
|
|
384
|
+
if event_type == "message_stop":
|
|
385
|
+
message_stop_received = True
|
|
386
|
+
break
|
|
387
|
+
|
|
388
|
+
except Exception:
|
|
389
|
+
raise
|
|
390
|
+
finally:
|
|
391
|
+
try:
|
|
392
|
+
# Use timeout for __aexit__ in case the stream doesn't close properly
|
|
393
|
+
await asyncio.wait_for(stream.__aexit__(None, None, None), timeout=5.0)
|
|
394
|
+
except asyncio.TimeoutError:
|
|
395
|
+
pass # Stream didn't close properly, continue anyway
|
|
396
|
+
except Exception:
|
|
397
|
+
pass # Ignore __aexit__ errors
|
|
398
|
+
|
|
399
|
+
# For streaming, we don't use call_with_timeout_and_retries on the whole operation
|
|
400
|
+
# Instead, timeout is applied per-event inside _iter_events_with_timeout
|
|
401
|
+
# But we still want retries for connection failures
|
|
402
|
+
attempts = max(0, int(max_retries)) + 1
|
|
403
|
+
last_error: Optional[Exception] = None
|
|
404
|
+
|
|
405
|
+
for attempt in range(1, attempts + 1):
|
|
406
|
+
try:
|
|
407
|
+
# Reset state for retry
|
|
408
|
+
collected_text.clear()
|
|
409
|
+
collected_thinking.clear()
|
|
410
|
+
collected_tool_calls.clear()
|
|
411
|
+
usage_tokens.clear()
|
|
412
|
+
current_block_index_ref[0] = -1
|
|
413
|
+
current_block_type_ref[0] = None
|
|
414
|
+
|
|
415
|
+
await _do_stream()
|
|
416
|
+
break # Success
|
|
417
|
+
except asyncio.TimeoutError as exc:
|
|
418
|
+
last_error = exc
|
|
419
|
+
if attempt == attempts:
|
|
420
|
+
break
|
|
421
|
+
delay = 0.5 * (2 ** (attempt - 1)) # Exponential backoff
|
|
422
|
+
logger.warning(
|
|
423
|
+
"[anthropic_client] Stream timed out; retrying",
|
|
424
|
+
extra={
|
|
425
|
+
"attempt": attempt,
|
|
426
|
+
"max_retries": max_retries,
|
|
427
|
+
"delay_seconds": delay,
|
|
428
|
+
},
|
|
429
|
+
)
|
|
430
|
+
await asyncio.sleep(delay)
|
|
431
|
+
except asyncio.CancelledError:
|
|
432
|
+
raise
|
|
433
|
+
except (RuntimeError, ValueError, TypeError, OSError, ConnectionError) as exc:
|
|
434
|
+
# Non-timeout errors: retry for connection errors only
|
|
435
|
+
if isinstance(exc, (OSError, ConnectionError)):
|
|
436
|
+
last_error = exc
|
|
437
|
+
if attempt == attempts:
|
|
438
|
+
raise
|
|
439
|
+
delay = 0.5 * (2 ** (attempt - 1))
|
|
440
|
+
logger.warning(
|
|
441
|
+
"[anthropic_client] Connection error; retrying",
|
|
442
|
+
extra={
|
|
443
|
+
"attempt": attempt,
|
|
444
|
+
"error": str(exc),
|
|
445
|
+
},
|
|
446
|
+
)
|
|
447
|
+
await asyncio.sleep(delay)
|
|
448
|
+
else:
|
|
449
|
+
raise
|
|
450
|
+
|
|
451
|
+
if last_error and not collected_text and not collected_thinking and not collected_tool_calls:
|
|
452
|
+
raise RuntimeError(f"Stream failed after {attempts} attempts") from last_error
|
|
453
|
+
|
|
454
|
+
# Store reasoning content in metadata
|
|
455
|
+
if collected_thinking:
|
|
456
|
+
response_metadata["reasoning_content"] = "".join(collected_thinking)
|
|
457
|
+
|
|
458
|
+
content_blocks = _content_blocks_from_stream_state(
|
|
459
|
+
collected_text, collected_thinking, collected_tool_calls
|
|
460
|
+
)
|
|
461
|
+
|
|
462
|
+
return content_blocks, usage_tokens
|
|
463
|
+
|
|
464
|
+
async def _handle_stream_event(
|
|
465
|
+
self,
|
|
466
|
+
*,
|
|
467
|
+
event: Any,
|
|
468
|
+
collected_text: List[str],
|
|
469
|
+
collected_thinking: List[str],
|
|
470
|
+
collected_tool_calls: Dict[int, Dict[str, Any]],
|
|
471
|
+
usage_tokens: Dict[str, int],
|
|
472
|
+
progress_callback: Optional[ProgressCallback],
|
|
473
|
+
current_block_index_ref: List[int],
|
|
474
|
+
current_block_type_ref: List[Optional[str]],
|
|
475
|
+
) -> None:
|
|
476
|
+
"""Handle a single stream event.
|
|
477
|
+
|
|
478
|
+
Supports both standard Anthropic API events and non-standard formats
|
|
479
|
+
from API proxies like aiping.cn.
|
|
480
|
+
|
|
481
|
+
Standard Anthropic events:
|
|
482
|
+
- message_start, content_block_start, content_block_delta, content_block_stop
|
|
483
|
+
- message_delta, message_stop
|
|
484
|
+
|
|
485
|
+
Non-standard events (aiping.cn style):
|
|
486
|
+
- thinking (direct thinking content)
|
|
487
|
+
- text (direct text content)
|
|
488
|
+
- signature (thinking signature)
|
|
489
|
+
"""
|
|
490
|
+
event_type = getattr(event, "type", None)
|
|
491
|
+
|
|
492
|
+
if event_type == "message_start":
|
|
493
|
+
# Extract initial usage info if available
|
|
494
|
+
message = getattr(event, "message", None)
|
|
495
|
+
if message:
|
|
496
|
+
usage = getattr(message, "usage", None)
|
|
497
|
+
if usage:
|
|
498
|
+
usage_tokens.update(anthropic_usage_tokens(usage))
|
|
499
|
+
|
|
500
|
+
elif event_type == "content_block_start":
|
|
501
|
+
# New content block starting
|
|
502
|
+
index = getattr(event, "index", 0)
|
|
503
|
+
content_block = getattr(event, "content_block", None)
|
|
504
|
+
if content_block:
|
|
505
|
+
block_type = getattr(content_block, "type", None)
|
|
506
|
+
current_block_index_ref[0] = index
|
|
507
|
+
current_block_type_ref[0] = block_type
|
|
508
|
+
|
|
509
|
+
if block_type == "tool_use":
|
|
510
|
+
# Initialize tool call state
|
|
511
|
+
collected_tool_calls[index] = {
|
|
512
|
+
"id": getattr(content_block, "id", None),
|
|
513
|
+
"name": getattr(content_block, "name", None),
|
|
514
|
+
"input_json": "",
|
|
515
|
+
"input": {},
|
|
516
|
+
}
|
|
517
|
+
# Announce tool start
|
|
518
|
+
if progress_callback:
|
|
519
|
+
tool_name = getattr(content_block, "name", "unknown")
|
|
520
|
+
try:
|
|
521
|
+
await progress_callback(f"[tool:{tool_name}]")
|
|
522
|
+
except (RuntimeError, ValueError, TypeError, OSError):
|
|
523
|
+
pass
|
|
524
|
+
|
|
525
|
+
elif event_type == "content_block_delta":
|
|
526
|
+
# Content delta within a block
|
|
527
|
+
index = getattr(event, "index", current_block_index_ref[0])
|
|
528
|
+
delta = getattr(event, "delta", None)
|
|
529
|
+
if not delta:
|
|
530
|
+
return
|
|
531
|
+
|
|
532
|
+
delta_type = getattr(delta, "type", None)
|
|
533
|
+
|
|
534
|
+
if delta_type == "thinking_delta":
|
|
535
|
+
# Thinking content delta
|
|
536
|
+
thinking_text = getattr(delta, "thinking", "")
|
|
537
|
+
if thinking_text:
|
|
538
|
+
collected_thinking.append(thinking_text)
|
|
539
|
+
if progress_callback:
|
|
540
|
+
try:
|
|
541
|
+
await progress_callback(thinking_text)
|
|
542
|
+
except (RuntimeError, ValueError, TypeError, OSError) as cb_exc:
|
|
543
|
+
logger.warning(
|
|
544
|
+
"[anthropic_client] Progress callback failed: %s: %s",
|
|
545
|
+
type(cb_exc).__name__, cb_exc,
|
|
546
|
+
)
|
|
547
|
+
|
|
548
|
+
elif delta_type == "text_delta":
|
|
549
|
+
# Text content delta
|
|
550
|
+
text = getattr(delta, "text", "")
|
|
551
|
+
if text:
|
|
552
|
+
collected_text.append(text)
|
|
553
|
+
if progress_callback:
|
|
554
|
+
try:
|
|
555
|
+
await progress_callback(text)
|
|
556
|
+
except (RuntimeError, ValueError, TypeError, OSError) as cb_exc:
|
|
557
|
+
logger.warning(
|
|
558
|
+
"[anthropic_client] Progress callback failed: %s: %s",
|
|
559
|
+
type(cb_exc).__name__, cb_exc,
|
|
560
|
+
)
|
|
561
|
+
|
|
562
|
+
elif delta_type == "input_json_delta":
|
|
563
|
+
# Tool input JSON delta
|
|
564
|
+
partial_json = getattr(delta, "partial_json", "")
|
|
565
|
+
if partial_json and index in collected_tool_calls:
|
|
566
|
+
collected_tool_calls[index]["input_json"] += partial_json
|
|
567
|
+
if progress_callback:
|
|
568
|
+
try:
|
|
569
|
+
await progress_callback(partial_json)
|
|
570
|
+
except (RuntimeError, ValueError, TypeError, OSError):
|
|
571
|
+
pass
|
|
572
|
+
|
|
573
|
+
# ===== Non-standard events from aiping.cn and similar proxies =====
|
|
574
|
+
# NOTE: aiping.cn sends BOTH standard (content_block_delta) and non-standard
|
|
575
|
+
# (text, thinking) events. We only process the non-standard events if we
|
|
576
|
+
# haven't already collected content from standard events in this block.
|
|
577
|
+
# This is controlled by checking if the standard delta was processed.
|
|
578
|
+
|
|
579
|
+
elif event_type == "thinking":
|
|
580
|
+
# Direct thinking content (non-standard, aiping.cn style)
|
|
581
|
+
# Skip - already handled via content_block_delta (aiping.cn sends both)
|
|
582
|
+
pass
|
|
583
|
+
|
|
584
|
+
elif event_type == "text":
|
|
585
|
+
# Direct text content (non-standard, aiping.cn style)
|
|
586
|
+
# Skip - already handled via content_block_delta (aiping.cn sends both)
|
|
587
|
+
pass
|
|
588
|
+
|
|
589
|
+
elif event_type == "signature":
|
|
590
|
+
# Thinking signature (non-standard, aiping.cn style)
|
|
591
|
+
pass
|
|
592
|
+
|
|
593
|
+
# ===== Standard events continued =====
|
|
594
|
+
|
|
595
|
+
elif event_type == "content_block_stop":
|
|
596
|
+
# Content block finished
|
|
597
|
+
index = getattr(event, "index", current_block_index_ref[0])
|
|
598
|
+
|
|
599
|
+
# Parse accumulated JSON for tool calls
|
|
600
|
+
if index in collected_tool_calls:
|
|
601
|
+
import json
|
|
602
|
+
json_str = collected_tool_calls[index].get("input_json", "")
|
|
603
|
+
if json_str:
|
|
604
|
+
try:
|
|
605
|
+
collected_tool_calls[index]["input"] = json.loads(json_str)
|
|
606
|
+
except json.JSONDecodeError:
|
|
607
|
+
logger.warning(
|
|
608
|
+
"[anthropic_client] Failed to parse tool input JSON",
|
|
609
|
+
extra={"json": json_str[:200]},
|
|
610
|
+
)
|
|
611
|
+
collected_tool_calls[index]["input"] = {}
|
|
612
|
+
|
|
613
|
+
elif event_type == "message_delta":
|
|
614
|
+
# Message-level delta (usually contains usage info at the end)
|
|
615
|
+
usage = getattr(event, "usage", None)
|
|
616
|
+
if usage:
|
|
617
|
+
# Update with final usage - output_tokens comes here
|
|
618
|
+
usage_tokens["output_tokens"] = getattr(usage, "output_tokens", 0)
|
|
619
|
+
|
|
620
|
+
elif event_type == "message_stop":
|
|
621
|
+
# Message complete
|
|
622
|
+
pass
|
|
623
|
+
|
|
624
|
+
# Unknown event types are silently ignored
|
|
625
|
+
|
|
626
|
+
async def _non_stream_request(
|
|
627
|
+
self,
|
|
628
|
+
*,
|
|
629
|
+
client: AsyncAnthropic,
|
|
630
|
+
request_kwargs: Dict[str, Any],
|
|
631
|
+
request_timeout: Optional[float],
|
|
632
|
+
max_retries: int,
|
|
633
|
+
response_metadata: Dict[str, Any],
|
|
634
|
+
) -> tuple[List[Dict[str, Any]], Dict[str, int]]:
|
|
635
|
+
"""Execute a non-streaming request with overall timeout."""
|
|
636
|
+
|
|
637
|
+
async def _do_request() -> Any:
|
|
638
|
+
return await client.messages.create(**request_kwargs)
|
|
639
|
+
|
|
640
|
+
response = await call_with_timeout_and_retries(
|
|
641
|
+
_do_request,
|
|
642
|
+
request_timeout,
|
|
643
|
+
max_retries,
|
|
644
|
+
)
|
|
645
|
+
|
|
646
|
+
usage_tokens = anthropic_usage_tokens(getattr(response, "usage", None))
|
|
647
|
+
content_blocks = _content_blocks_from_response(response)
|
|
648
|
+
|
|
649
|
+
# Extract reasoning content for metadata
|
|
650
|
+
for block in content_blocks:
|
|
651
|
+
if block.get("type") == "thinking":
|
|
652
|
+
thinking_text = block.get("thinking") or ""
|
|
653
|
+
if thinking_text:
|
|
654
|
+
response_metadata["reasoning_content"] = thinking_text
|
|
655
|
+
break
|
|
656
|
+
|
|
657
|
+
return content_blocks, usage_tokens
|