ripperdoc 0.2.7__py3-none-any.whl → 0.2.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ripperdoc/__init__.py +1 -1
- ripperdoc/cli/cli.py +33 -115
- ripperdoc/cli/commands/__init__.py +70 -6
- ripperdoc/cli/commands/agents_cmd.py +6 -3
- ripperdoc/cli/commands/clear_cmd.py +1 -4
- ripperdoc/cli/commands/config_cmd.py +1 -1
- ripperdoc/cli/commands/context_cmd.py +3 -2
- ripperdoc/cli/commands/doctor_cmd.py +18 -4
- ripperdoc/cli/commands/help_cmd.py +11 -1
- ripperdoc/cli/commands/hooks_cmd.py +610 -0
- ripperdoc/cli/commands/models_cmd.py +26 -9
- ripperdoc/cli/commands/permissions_cmd.py +57 -37
- ripperdoc/cli/commands/resume_cmd.py +6 -4
- ripperdoc/cli/commands/status_cmd.py +4 -4
- ripperdoc/cli/commands/tasks_cmd.py +8 -4
- ripperdoc/cli/ui/file_mention_completer.py +64 -8
- ripperdoc/cli/ui/interrupt_handler.py +3 -4
- ripperdoc/cli/ui/message_display.py +5 -3
- ripperdoc/cli/ui/panels.py +13 -10
- ripperdoc/cli/ui/provider_options.py +247 -0
- ripperdoc/cli/ui/rich_ui.py +196 -77
- ripperdoc/cli/ui/spinner.py +25 -1
- ripperdoc/cli/ui/tool_renderers.py +8 -2
- ripperdoc/cli/ui/wizard.py +215 -0
- ripperdoc/core/agents.py +9 -3
- ripperdoc/core/config.py +49 -12
- ripperdoc/core/custom_commands.py +412 -0
- ripperdoc/core/default_tools.py +11 -2
- ripperdoc/core/hooks/__init__.py +99 -0
- ripperdoc/core/hooks/config.py +301 -0
- ripperdoc/core/hooks/events.py +535 -0
- ripperdoc/core/hooks/executor.py +496 -0
- ripperdoc/core/hooks/integration.py +344 -0
- ripperdoc/core/hooks/manager.py +745 -0
- ripperdoc/core/permissions.py +40 -8
- ripperdoc/core/providers/anthropic.py +548 -68
- ripperdoc/core/providers/gemini.py +70 -5
- ripperdoc/core/providers/openai.py +60 -5
- ripperdoc/core/query.py +140 -39
- ripperdoc/core/query_utils.py +2 -0
- ripperdoc/core/skills.py +9 -3
- ripperdoc/core/system_prompt.py +4 -2
- ripperdoc/core/tool.py +9 -5
- ripperdoc/sdk/client.py +2 -2
- ripperdoc/tools/ask_user_question_tool.py +5 -3
- ripperdoc/tools/background_shell.py +2 -1
- ripperdoc/tools/bash_output_tool.py +1 -1
- ripperdoc/tools/bash_tool.py +30 -20
- ripperdoc/tools/dynamic_mcp_tool.py +29 -8
- ripperdoc/tools/enter_plan_mode_tool.py +1 -1
- ripperdoc/tools/exit_plan_mode_tool.py +1 -1
- ripperdoc/tools/file_edit_tool.py +8 -4
- ripperdoc/tools/file_read_tool.py +9 -5
- ripperdoc/tools/file_write_tool.py +9 -5
- ripperdoc/tools/glob_tool.py +3 -2
- ripperdoc/tools/grep_tool.py +3 -2
- ripperdoc/tools/kill_bash_tool.py +1 -1
- ripperdoc/tools/ls_tool.py +1 -1
- ripperdoc/tools/mcp_tools.py +13 -10
- ripperdoc/tools/multi_edit_tool.py +8 -7
- ripperdoc/tools/notebook_edit_tool.py +7 -4
- ripperdoc/tools/skill_tool.py +1 -1
- ripperdoc/tools/task_tool.py +5 -4
- ripperdoc/tools/todo_tool.py +2 -2
- ripperdoc/tools/tool_search_tool.py +3 -2
- ripperdoc/utils/conversation_compaction.py +11 -7
- ripperdoc/utils/file_watch.py +8 -2
- ripperdoc/utils/json_utils.py +2 -1
- ripperdoc/utils/mcp.py +11 -3
- ripperdoc/utils/memory.py +4 -2
- ripperdoc/utils/message_compaction.py +21 -7
- ripperdoc/utils/message_formatting.py +11 -7
- ripperdoc/utils/messages.py +105 -66
- ripperdoc/utils/path_ignore.py +38 -12
- ripperdoc/utils/permissions/path_validation_utils.py +2 -1
- ripperdoc/utils/permissions/shell_command_validation.py +427 -91
- ripperdoc/utils/safe_get_cwd.py +2 -1
- ripperdoc/utils/session_history.py +13 -6
- ripperdoc/utils/todo.py +2 -1
- ripperdoc/utils/token_estimation.py +6 -1
- {ripperdoc-0.2.7.dist-info → ripperdoc-0.2.9.dist-info}/METADATA +24 -3
- ripperdoc-0.2.9.dist-info/RECORD +123 -0
- ripperdoc-0.2.7.dist-info/RECORD +0 -113
- {ripperdoc-0.2.7.dist-info → ripperdoc-0.2.9.dist-info}/WHEEL +0 -0
- {ripperdoc-0.2.7.dist-info → ripperdoc-0.2.9.dist-info}/entry_points.txt +0 -0
- {ripperdoc-0.2.7.dist-info → ripperdoc-0.2.9.dist-info}/licenses/LICENSE +0 -0
- {ripperdoc-0.2.7.dist-info → ripperdoc-0.2.9.dist-info}/top_level.txt +0 -0
|
@@ -3,10 +3,13 @@
|
|
|
3
3
|
from __future__ import annotations
|
|
4
4
|
|
|
5
5
|
import asyncio
|
|
6
|
+
import json
|
|
6
7
|
import time
|
|
7
8
|
from typing import Any, Awaitable, Callable, Dict, List, Optional
|
|
9
|
+
from uuid import uuid4
|
|
8
10
|
|
|
9
11
|
import anthropic
|
|
12
|
+
import httpx
|
|
10
13
|
from anthropic import AsyncAnthropic
|
|
11
14
|
|
|
12
15
|
from ripperdoc.core.config import ModelProfile
|
|
@@ -15,13 +18,11 @@ from ripperdoc.core.providers.base import (
|
|
|
15
18
|
ProviderClient,
|
|
16
19
|
ProviderResponse,
|
|
17
20
|
call_with_timeout_and_retries,
|
|
18
|
-
iter_with_timeout,
|
|
19
21
|
sanitize_tool_history,
|
|
20
22
|
)
|
|
21
23
|
from ripperdoc.core.query_utils import (
|
|
22
24
|
anthropic_usage_tokens,
|
|
23
25
|
build_anthropic_tool_schemas,
|
|
24
|
-
content_blocks_from_anthropic_response,
|
|
25
26
|
estimate_cost_usd,
|
|
26
27
|
)
|
|
27
28
|
from ripperdoc.core.tool import Tool
|
|
@@ -63,8 +64,99 @@ def _classify_anthropic_error(exc: Exception) -> tuple[str, str]:
|
|
|
63
64
|
return "unknown_error", f"Unexpected error ({exc_type}): {exc_msg}"
|
|
64
65
|
|
|
65
66
|
|
|
67
|
+
def _content_blocks_from_stream_state(
|
|
68
|
+
collected_text: List[str],
|
|
69
|
+
collected_thinking: List[str],
|
|
70
|
+
collected_tool_calls: Dict[int, Dict[str, Any]],
|
|
71
|
+
) -> List[Dict[str, Any]]:
|
|
72
|
+
"""Build content blocks from accumulated stream state."""
|
|
73
|
+
blocks: List[Dict[str, Any]] = []
|
|
74
|
+
|
|
75
|
+
# Add thinking block if present
|
|
76
|
+
if collected_thinking:
|
|
77
|
+
blocks.append(
|
|
78
|
+
{
|
|
79
|
+
"type": "thinking",
|
|
80
|
+
"thinking": "".join(collected_thinking),
|
|
81
|
+
}
|
|
82
|
+
)
|
|
83
|
+
|
|
84
|
+
# Add text block if present
|
|
85
|
+
if collected_text:
|
|
86
|
+
blocks.append(
|
|
87
|
+
{
|
|
88
|
+
"type": "text",
|
|
89
|
+
"text": "".join(collected_text),
|
|
90
|
+
}
|
|
91
|
+
)
|
|
92
|
+
|
|
93
|
+
# Add tool_use blocks
|
|
94
|
+
for idx in sorted(collected_tool_calls.keys()):
|
|
95
|
+
call = collected_tool_calls[idx]
|
|
96
|
+
name = call.get("name")
|
|
97
|
+
if not name:
|
|
98
|
+
continue
|
|
99
|
+
tool_use_id = call.get("id") or str(uuid4())
|
|
100
|
+
blocks.append(
|
|
101
|
+
{
|
|
102
|
+
"type": "tool_use",
|
|
103
|
+
"tool_use_id": tool_use_id,
|
|
104
|
+
"name": name,
|
|
105
|
+
"input": call.get("input", {}),
|
|
106
|
+
}
|
|
107
|
+
)
|
|
108
|
+
|
|
109
|
+
return blocks
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
def _content_blocks_from_response(response: Any) -> List[Dict[str, Any]]:
|
|
113
|
+
"""Normalize Anthropic response content to our internal block format."""
|
|
114
|
+
blocks: List[Dict[str, Any]] = []
|
|
115
|
+
for block in getattr(response, "content", []) or []:
|
|
116
|
+
btype = getattr(block, "type", None)
|
|
117
|
+
if btype == "text":
|
|
118
|
+
blocks.append({"type": "text", "text": getattr(block, "text", "")})
|
|
119
|
+
elif btype == "thinking":
|
|
120
|
+
blocks.append(
|
|
121
|
+
{
|
|
122
|
+
"type": "thinking",
|
|
123
|
+
"thinking": getattr(block, "thinking", None) or "",
|
|
124
|
+
"signature": getattr(block, "signature", None),
|
|
125
|
+
}
|
|
126
|
+
)
|
|
127
|
+
elif btype == "redacted_thinking":
|
|
128
|
+
blocks.append(
|
|
129
|
+
{
|
|
130
|
+
"type": "redacted_thinking",
|
|
131
|
+
"data": getattr(block, "data", None),
|
|
132
|
+
"signature": getattr(block, "signature", None),
|
|
133
|
+
}
|
|
134
|
+
)
|
|
135
|
+
elif btype == "tool_use":
|
|
136
|
+
raw_input = getattr(block, "input", {}) or {}
|
|
137
|
+
blocks.append(
|
|
138
|
+
{
|
|
139
|
+
"type": "tool_use",
|
|
140
|
+
"tool_use_id": getattr(block, "id", None) or str(uuid4()),
|
|
141
|
+
"name": getattr(block, "name", None),
|
|
142
|
+
"input": raw_input if isinstance(raw_input, dict) else {},
|
|
143
|
+
}
|
|
144
|
+
)
|
|
145
|
+
return blocks
|
|
146
|
+
|
|
147
|
+
|
|
66
148
|
class AnthropicClient(ProviderClient):
|
|
67
|
-
"""Anthropic client with streaming and non-streaming support.
|
|
149
|
+
"""Anthropic client with streaming and non-streaming support.
|
|
150
|
+
|
|
151
|
+
Streaming mode (default):
|
|
152
|
+
- Uses event-based streaming to capture both thinking and text tokens
|
|
153
|
+
- Timeout applies per-token (chunk), not to the entire request
|
|
154
|
+
- Thinking tokens are streamed in real-time via progress_callback
|
|
155
|
+
|
|
156
|
+
Non-streaming mode:
|
|
157
|
+
- Makes a single blocking request
|
|
158
|
+
- Timeout applies to the entire request
|
|
159
|
+
"""
|
|
68
160
|
|
|
69
161
|
def __init__(self, client_factory: Optional[Callable[[], Awaitable[AsyncAnthropic]]] = None):
|
|
70
162
|
self._client_factory = client_factory
|
|
@@ -109,6 +201,15 @@ class AnthropicClient(ProviderClient):
|
|
|
109
201
|
except Exception as exc:
|
|
110
202
|
duration_ms = (time.time() - start_time) * 1000
|
|
111
203
|
error_code, error_message = _classify_anthropic_error(exc)
|
|
204
|
+
logger.debug(
|
|
205
|
+
"[anthropic_client] Exception details",
|
|
206
|
+
extra={
|
|
207
|
+
"model": model_profile.model,
|
|
208
|
+
"exception_type": type(exc).__name__,
|
|
209
|
+
"exception_str": str(exc),
|
|
210
|
+
"error_code": error_code,
|
|
211
|
+
},
|
|
212
|
+
)
|
|
112
213
|
logger.error(
|
|
113
214
|
"[anthropic_client] API call failed",
|
|
114
215
|
extra={
|
|
@@ -141,95 +242,115 @@ class AnthropicClient(ProviderClient):
|
|
|
141
242
|
) -> ProviderResponse:
|
|
142
243
|
"""Internal implementation of call, may raise exceptions."""
|
|
143
244
|
tool_schemas = await build_anthropic_tool_schemas(tools)
|
|
144
|
-
collected_text: List[str] = []
|
|
145
|
-
reasoning_parts: List[str] = []
|
|
146
245
|
response_metadata: Dict[str, Any] = {}
|
|
147
246
|
|
|
148
|
-
|
|
247
|
+
logger.debug(
|
|
248
|
+
"[anthropic_client] Preparing request",
|
|
249
|
+
extra={
|
|
250
|
+
"model": model_profile.model,
|
|
251
|
+
"tool_mode": tool_mode,
|
|
252
|
+
"stream": stream,
|
|
253
|
+
"max_thinking_tokens": max_thinking_tokens,
|
|
254
|
+
"num_tools": len(tool_schemas),
|
|
255
|
+
},
|
|
256
|
+
)
|
|
257
|
+
|
|
258
|
+
anthropic_kwargs: Dict[str, Any] = {}
|
|
259
|
+
if model_profile.api_base:
|
|
260
|
+
anthropic_kwargs["base_url"] = model_profile.api_base
|
|
149
261
|
if model_profile.api_key:
|
|
150
262
|
anthropic_kwargs["api_key"] = model_profile.api_key
|
|
151
263
|
auth_token = getattr(model_profile, "auth_token", None)
|
|
152
264
|
if auth_token:
|
|
153
265
|
anthropic_kwargs["auth_token"] = auth_token
|
|
154
266
|
|
|
267
|
+
# Set timeout for the Anthropic SDK client
|
|
268
|
+
# For streaming, we want a long timeout since models may take time to start responding
|
|
269
|
+
# httpx.Timeout: (connect, read, write, pool)
|
|
270
|
+
if stream:
|
|
271
|
+
# For streaming: long read timeout, reasonable connect timeout
|
|
272
|
+
# The read timeout applies to waiting for each chunk from the server
|
|
273
|
+
timeout_config = httpx.Timeout(
|
|
274
|
+
connect=60.0, # 60 seconds to establish connection
|
|
275
|
+
read=600.0, # 10 minutes to wait for each chunk (model may be thinking)
|
|
276
|
+
write=60.0, # 60 seconds to send request
|
|
277
|
+
pool=60.0, # 60 seconds to get connection from pool
|
|
278
|
+
)
|
|
279
|
+
anthropic_kwargs["timeout"] = timeout_config
|
|
280
|
+
elif request_timeout and request_timeout > 0:
|
|
281
|
+
# For non-streaming: use the provided timeout
|
|
282
|
+
anthropic_kwargs["timeout"] = request_timeout
|
|
283
|
+
|
|
155
284
|
normalized_messages = sanitize_tool_history(list(normalized_messages))
|
|
156
285
|
|
|
157
286
|
thinking_payload: Optional[Dict[str, Any]] = None
|
|
158
287
|
if max_thinking_tokens > 0:
|
|
159
288
|
thinking_payload = {"type": "enabled", "budget_tokens": max_thinking_tokens}
|
|
160
289
|
|
|
161
|
-
|
|
290
|
+
# Build common request kwargs
|
|
291
|
+
request_kwargs: Dict[str, Any] = {
|
|
292
|
+
"model": model_profile.model,
|
|
293
|
+
"max_tokens": model_profile.max_tokens,
|
|
294
|
+
"system": system_prompt,
|
|
295
|
+
"messages": normalized_messages,
|
|
296
|
+
"temperature": model_profile.temperature,
|
|
297
|
+
}
|
|
298
|
+
if tool_schemas:
|
|
299
|
+
request_kwargs["tools"] = tool_schemas
|
|
300
|
+
if thinking_payload:
|
|
301
|
+
request_kwargs["thinking"] = thinking_payload
|
|
162
302
|
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
303
|
+
logger.debug(
|
|
304
|
+
"[anthropic_client] Request parameters",
|
|
305
|
+
extra={
|
|
306
|
+
"model": model_profile.model,
|
|
307
|
+
"request_kwargs": json.dumps(
|
|
308
|
+
{k: v for k, v in request_kwargs.items() if k != "messages"},
|
|
309
|
+
ensure_ascii=False,
|
|
310
|
+
default=str,
|
|
311
|
+
)[:1000],
|
|
312
|
+
"thinking_payload": json.dumps(thinking_payload, ensure_ascii=False)
|
|
313
|
+
if thinking_payload
|
|
314
|
+
else None,
|
|
315
|
+
},
|
|
316
|
+
)
|
|
317
|
+
|
|
318
|
+
async with await self._client(anthropic_kwargs) as client:
|
|
319
|
+
if stream:
|
|
320
|
+
# Streaming mode: use event-based streaming with per-token timeout
|
|
321
|
+
content_blocks, usage_tokens = await self._stream_request(
|
|
322
|
+
client=client,
|
|
323
|
+
request_kwargs=request_kwargs,
|
|
324
|
+
progress_callback=progress_callback,
|
|
325
|
+
request_timeout=request_timeout,
|
|
326
|
+
max_retries=max_retries,
|
|
327
|
+
response_metadata=response_metadata,
|
|
172
328
|
)
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
329
|
+
else:
|
|
330
|
+
# Non-streaming mode: single request with overall timeout
|
|
331
|
+
content_blocks, usage_tokens = await self._non_stream_request(
|
|
332
|
+
client=client,
|
|
333
|
+
request_kwargs=request_kwargs,
|
|
334
|
+
request_timeout=request_timeout,
|
|
335
|
+
max_retries=max_retries,
|
|
336
|
+
response_metadata=response_metadata,
|
|
177
337
|
)
|
|
178
|
-
try:
|
|
179
|
-
async for text in iter_with_timeout(stream_resp.text_stream, request_timeout):
|
|
180
|
-
if text:
|
|
181
|
-
collected_text.append(text)
|
|
182
|
-
if progress_callback:
|
|
183
|
-
try:
|
|
184
|
-
await progress_callback(text)
|
|
185
|
-
except (RuntimeError, ValueError, TypeError, OSError) as cb_exc:
|
|
186
|
-
logger.warning(
|
|
187
|
-
"[anthropic_client] Stream callback failed: %s: %s",
|
|
188
|
-
type(cb_exc).__name__, cb_exc,
|
|
189
|
-
)
|
|
190
|
-
getter = getattr(stream_resp, "get_final_response", None) or getattr(
|
|
191
|
-
stream_resp, "get_final_message", None
|
|
192
|
-
)
|
|
193
|
-
if getter:
|
|
194
|
-
return await getter()
|
|
195
|
-
return None
|
|
196
|
-
finally:
|
|
197
|
-
await stream_cm.__aexit__(None, None, None)
|
|
198
|
-
|
|
199
|
-
async def _non_stream_request() -> Any:
|
|
200
|
-
return await client.messages.create(
|
|
201
|
-
model=model_profile.model,
|
|
202
|
-
max_tokens=model_profile.max_tokens,
|
|
203
|
-
system=system_prompt,
|
|
204
|
-
messages=normalized_messages, # type: ignore[arg-type]
|
|
205
|
-
tools=tool_schemas if tool_schemas else None, # type: ignore
|
|
206
|
-
temperature=model_profile.temperature,
|
|
207
|
-
thinking=thinking_payload, # type: ignore[arg-type]
|
|
208
|
-
)
|
|
209
|
-
|
|
210
|
-
timeout_for_call = None if stream else request_timeout
|
|
211
|
-
response = await call_with_timeout_and_retries(
|
|
212
|
-
_stream_request if stream else _non_stream_request,
|
|
213
|
-
timeout_for_call,
|
|
214
|
-
max_retries,
|
|
215
|
-
)
|
|
216
338
|
|
|
217
339
|
duration_ms = (time.time() - start_time) * 1000
|
|
218
|
-
usage_tokens = anthropic_usage_tokens(getattr(response, "usage", None))
|
|
219
340
|
cost_usd = estimate_cost_usd(model_profile, usage_tokens)
|
|
220
341
|
record_usage(
|
|
221
342
|
model_profile.model, duration_ms=duration_ms, cost_usd=cost_usd, **usage_tokens
|
|
222
343
|
)
|
|
223
344
|
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
345
|
+
logger.debug(
|
|
346
|
+
"[anthropic_client] Response content blocks",
|
|
347
|
+
extra={
|
|
348
|
+
"model": model_profile.model,
|
|
349
|
+
"content_blocks": json.dumps(content_blocks, ensure_ascii=False)[:1000],
|
|
350
|
+
"usage_tokens": json.dumps(usage_tokens, ensure_ascii=False),
|
|
351
|
+
"metadata": json.dumps(response_metadata, ensure_ascii=False)[:500],
|
|
352
|
+
},
|
|
353
|
+
)
|
|
233
354
|
|
|
234
355
|
logger.info(
|
|
235
356
|
"[anthropic_client] Response received",
|
|
@@ -238,6 +359,8 @@ class AnthropicClient(ProviderClient):
|
|
|
238
359
|
"duration_ms": round(duration_ms, 2),
|
|
239
360
|
"tool_mode": tool_mode,
|
|
240
361
|
"tool_schemas": len(tool_schemas),
|
|
362
|
+
"stream": stream,
|
|
363
|
+
"content_blocks": len(content_blocks),
|
|
241
364
|
},
|
|
242
365
|
)
|
|
243
366
|
|
|
@@ -248,3 +371,360 @@ class AnthropicClient(ProviderClient):
|
|
|
248
371
|
duration_ms=duration_ms,
|
|
249
372
|
metadata=response_metadata,
|
|
250
373
|
)
|
|
374
|
+
|
|
375
|
+
async def _stream_request(
|
|
376
|
+
self,
|
|
377
|
+
*,
|
|
378
|
+
client: AsyncAnthropic,
|
|
379
|
+
request_kwargs: Dict[str, Any],
|
|
380
|
+
progress_callback: Optional[ProgressCallback],
|
|
381
|
+
request_timeout: Optional[float],
|
|
382
|
+
max_retries: int,
|
|
383
|
+
response_metadata: Dict[str, Any],
|
|
384
|
+
) -> tuple[List[Dict[str, Any]], Dict[str, int]]:
|
|
385
|
+
"""Execute a streaming request with per-token timeout.
|
|
386
|
+
|
|
387
|
+
Uses Anthropic's event-based streaming API to capture:
|
|
388
|
+
- thinking tokens (streamed in real-time)
|
|
389
|
+
- text tokens (streamed in real-time)
|
|
390
|
+
- tool_use blocks
|
|
391
|
+
|
|
392
|
+
In streaming mode:
|
|
393
|
+
- Connection timeout uses request_timeout
|
|
394
|
+
- Per-event timeout is disabled (None) because the model may take
|
|
395
|
+
a long time to generate the first token (especially during thinking)
|
|
396
|
+
- Once streaming starts, events should flow continuously
|
|
397
|
+
"""
|
|
398
|
+
collected_text: List[str] = []
|
|
399
|
+
collected_thinking: List[str] = []
|
|
400
|
+
collected_tool_calls: Dict[int, Dict[str, Any]] = {}
|
|
401
|
+
usage_tokens: Dict[str, int] = {}
|
|
402
|
+
|
|
403
|
+
# Use mutable containers to track state across event handling
|
|
404
|
+
current_block_index_ref: List[int] = [-1]
|
|
405
|
+
current_block_type_ref: List[Optional[str]] = [None]
|
|
406
|
+
|
|
407
|
+
event_count = 0
|
|
408
|
+
message_stop_received = False
|
|
409
|
+
|
|
410
|
+
async def _do_stream() -> None:
|
|
411
|
+
nonlocal event_count, message_stop_received
|
|
412
|
+
event_count = 0
|
|
413
|
+
message_stop_received = False
|
|
414
|
+
|
|
415
|
+
logger.debug(
|
|
416
|
+
"[anthropic_client] Initiating stream request",
|
|
417
|
+
extra={
|
|
418
|
+
"model": request_kwargs.get("model"),
|
|
419
|
+
},
|
|
420
|
+
)
|
|
421
|
+
|
|
422
|
+
# Create the stream - this initiates the connection
|
|
423
|
+
stream = client.messages.stream(**request_kwargs)
|
|
424
|
+
|
|
425
|
+
# Enter the stream context
|
|
426
|
+
stream_manager = await stream.__aenter__()
|
|
427
|
+
|
|
428
|
+
try:
|
|
429
|
+
# Iterate over events
|
|
430
|
+
# Some API proxies don't properly close the stream after message_stop,
|
|
431
|
+
# so we break out of the loop when we receive message_stop
|
|
432
|
+
async for event in stream_manager:
|
|
433
|
+
event_count += 1
|
|
434
|
+
event_type = getattr(event, "type", "unknown")
|
|
435
|
+
|
|
436
|
+
await self._handle_stream_event(
|
|
437
|
+
event=event,
|
|
438
|
+
collected_text=collected_text,
|
|
439
|
+
collected_thinking=collected_thinking,
|
|
440
|
+
collected_tool_calls=collected_tool_calls,
|
|
441
|
+
usage_tokens=usage_tokens,
|
|
442
|
+
progress_callback=progress_callback,
|
|
443
|
+
current_block_index_ref=current_block_index_ref,
|
|
444
|
+
current_block_type_ref=current_block_type_ref,
|
|
445
|
+
)
|
|
446
|
+
|
|
447
|
+
# Check if we received message_stop - break out of loop
|
|
448
|
+
# Some API proxies don't properly close the SSE stream
|
|
449
|
+
if event_type == "message_stop":
|
|
450
|
+
message_stop_received = True
|
|
451
|
+
break
|
|
452
|
+
|
|
453
|
+
except Exception:
|
|
454
|
+
raise
|
|
455
|
+
finally:
|
|
456
|
+
try:
|
|
457
|
+
# Use timeout for __aexit__ in case the stream doesn't close properly
|
|
458
|
+
await asyncio.wait_for(stream.__aexit__(None, None, None), timeout=5.0)
|
|
459
|
+
except asyncio.TimeoutError:
|
|
460
|
+
pass # Stream didn't close properly, continue anyway
|
|
461
|
+
except Exception:
|
|
462
|
+
pass # Ignore __aexit__ errors
|
|
463
|
+
|
|
464
|
+
# For streaming, we don't use call_with_timeout_and_retries on the whole operation
|
|
465
|
+
# Instead, timeout is applied per-event inside _iter_events_with_timeout
|
|
466
|
+
# But we still want retries for connection failures
|
|
467
|
+
attempts = max(0, int(max_retries)) + 1
|
|
468
|
+
last_error: Optional[Exception] = None
|
|
469
|
+
|
|
470
|
+
for attempt in range(1, attempts + 1):
|
|
471
|
+
try:
|
|
472
|
+
# Reset state for retry
|
|
473
|
+
collected_text.clear()
|
|
474
|
+
collected_thinking.clear()
|
|
475
|
+
collected_tool_calls.clear()
|
|
476
|
+
usage_tokens.clear()
|
|
477
|
+
current_block_index_ref[0] = -1
|
|
478
|
+
current_block_type_ref[0] = None
|
|
479
|
+
|
|
480
|
+
await _do_stream()
|
|
481
|
+
break # Success
|
|
482
|
+
except asyncio.TimeoutError as exc:
|
|
483
|
+
last_error = exc
|
|
484
|
+
if attempt == attempts:
|
|
485
|
+
break
|
|
486
|
+
delay = 0.5 * (2 ** (attempt - 1)) # Exponential backoff
|
|
487
|
+
logger.warning(
|
|
488
|
+
"[anthropic_client] Stream timed out; retrying",
|
|
489
|
+
extra={
|
|
490
|
+
"attempt": attempt,
|
|
491
|
+
"max_retries": max_retries,
|
|
492
|
+
"delay_seconds": delay,
|
|
493
|
+
},
|
|
494
|
+
)
|
|
495
|
+
await asyncio.sleep(delay)
|
|
496
|
+
except asyncio.CancelledError:
|
|
497
|
+
raise
|
|
498
|
+
except (RuntimeError, ValueError, TypeError, OSError, ConnectionError) as exc:
|
|
499
|
+
# Non-timeout errors: retry for connection errors only
|
|
500
|
+
if isinstance(exc, (OSError, ConnectionError)):
|
|
501
|
+
last_error = exc
|
|
502
|
+
if attempt == attempts:
|
|
503
|
+
raise
|
|
504
|
+
delay = 0.5 * (2 ** (attempt - 1))
|
|
505
|
+
logger.warning(
|
|
506
|
+
"[anthropic_client] Connection error; retrying",
|
|
507
|
+
extra={
|
|
508
|
+
"attempt": attempt,
|
|
509
|
+
"error": str(exc),
|
|
510
|
+
},
|
|
511
|
+
)
|
|
512
|
+
await asyncio.sleep(delay)
|
|
513
|
+
else:
|
|
514
|
+
raise
|
|
515
|
+
|
|
516
|
+
if (
|
|
517
|
+
last_error
|
|
518
|
+
and not collected_text
|
|
519
|
+
and not collected_thinking
|
|
520
|
+
and not collected_tool_calls
|
|
521
|
+
):
|
|
522
|
+
raise RuntimeError(f"Stream failed after {attempts} attempts") from last_error
|
|
523
|
+
|
|
524
|
+
# Store reasoning content in metadata
|
|
525
|
+
if collected_thinking:
|
|
526
|
+
response_metadata["reasoning_content"] = "".join(collected_thinking)
|
|
527
|
+
|
|
528
|
+
content_blocks = _content_blocks_from_stream_state(
|
|
529
|
+
collected_text, collected_thinking, collected_tool_calls
|
|
530
|
+
)
|
|
531
|
+
|
|
532
|
+
return content_blocks, usage_tokens
|
|
533
|
+
|
|
534
|
+
async def _handle_stream_event(
|
|
535
|
+
self,
|
|
536
|
+
*,
|
|
537
|
+
event: Any,
|
|
538
|
+
collected_text: List[str],
|
|
539
|
+
collected_thinking: List[str],
|
|
540
|
+
collected_tool_calls: Dict[int, Dict[str, Any]],
|
|
541
|
+
usage_tokens: Dict[str, int],
|
|
542
|
+
progress_callback: Optional[ProgressCallback],
|
|
543
|
+
current_block_index_ref: List[int],
|
|
544
|
+
current_block_type_ref: List[Optional[str]],
|
|
545
|
+
) -> None:
|
|
546
|
+
"""Handle a single stream event.
|
|
547
|
+
|
|
548
|
+
Supports both standard Anthropic API events and non-standard formats
|
|
549
|
+
from API proxies like aiping.cn.
|
|
550
|
+
|
|
551
|
+
Standard Anthropic events:
|
|
552
|
+
- message_start, content_block_start, content_block_delta, content_block_stop
|
|
553
|
+
- message_delta, message_stop
|
|
554
|
+
|
|
555
|
+
Non-standard events (aiping.cn style):
|
|
556
|
+
- thinking (direct thinking content)
|
|
557
|
+
- text (direct text content)
|
|
558
|
+
- signature (thinking signature)
|
|
559
|
+
"""
|
|
560
|
+
event_type = getattr(event, "type", None)
|
|
561
|
+
|
|
562
|
+
if event_type == "message_start":
|
|
563
|
+
# Extract initial usage info if available
|
|
564
|
+
message = getattr(event, "message", None)
|
|
565
|
+
if message:
|
|
566
|
+
usage = getattr(message, "usage", None)
|
|
567
|
+
if usage:
|
|
568
|
+
usage_tokens.update(anthropic_usage_tokens(usage))
|
|
569
|
+
|
|
570
|
+
elif event_type == "content_block_start":
|
|
571
|
+
# New content block starting
|
|
572
|
+
index = getattr(event, "index", 0)
|
|
573
|
+
content_block = getattr(event, "content_block", None)
|
|
574
|
+
if content_block:
|
|
575
|
+
block_type = getattr(content_block, "type", None)
|
|
576
|
+
current_block_index_ref[0] = index
|
|
577
|
+
current_block_type_ref[0] = block_type
|
|
578
|
+
|
|
579
|
+
if block_type == "tool_use":
|
|
580
|
+
# Initialize tool call state
|
|
581
|
+
collected_tool_calls[index] = {
|
|
582
|
+
"id": getattr(content_block, "id", None),
|
|
583
|
+
"name": getattr(content_block, "name", None),
|
|
584
|
+
"input_json": "",
|
|
585
|
+
"input": {},
|
|
586
|
+
}
|
|
587
|
+
# Announce tool start
|
|
588
|
+
if progress_callback:
|
|
589
|
+
tool_name = getattr(content_block, "name", "unknown")
|
|
590
|
+
try:
|
|
591
|
+
await progress_callback(f"[tool:{tool_name}]")
|
|
592
|
+
except (RuntimeError, ValueError, TypeError, OSError):
|
|
593
|
+
pass
|
|
594
|
+
|
|
595
|
+
elif event_type == "content_block_delta":
|
|
596
|
+
# Content delta within a block
|
|
597
|
+
index = getattr(event, "index", current_block_index_ref[0])
|
|
598
|
+
delta = getattr(event, "delta", None)
|
|
599
|
+
if not delta:
|
|
600
|
+
return
|
|
601
|
+
|
|
602
|
+
delta_type = getattr(delta, "type", None)
|
|
603
|
+
|
|
604
|
+
if delta_type == "thinking_delta":
|
|
605
|
+
# Thinking content delta
|
|
606
|
+
thinking_text = getattr(delta, "thinking", "")
|
|
607
|
+
if thinking_text:
|
|
608
|
+
collected_thinking.append(thinking_text)
|
|
609
|
+
if progress_callback:
|
|
610
|
+
try:
|
|
611
|
+
await progress_callback(thinking_text)
|
|
612
|
+
except (RuntimeError, ValueError, TypeError, OSError) as cb_exc:
|
|
613
|
+
logger.warning(
|
|
614
|
+
"[anthropic_client] Progress callback failed: %s: %s",
|
|
615
|
+
type(cb_exc).__name__,
|
|
616
|
+
cb_exc,
|
|
617
|
+
)
|
|
618
|
+
|
|
619
|
+
elif delta_type == "text_delta":
|
|
620
|
+
# Text content delta
|
|
621
|
+
text = getattr(delta, "text", "")
|
|
622
|
+
if text:
|
|
623
|
+
collected_text.append(text)
|
|
624
|
+
if progress_callback:
|
|
625
|
+
try:
|
|
626
|
+
await progress_callback(text)
|
|
627
|
+
except (RuntimeError, ValueError, TypeError, OSError) as cb_exc:
|
|
628
|
+
logger.warning(
|
|
629
|
+
"[anthropic_client] Progress callback failed: %s: %s",
|
|
630
|
+
type(cb_exc).__name__,
|
|
631
|
+
cb_exc,
|
|
632
|
+
)
|
|
633
|
+
|
|
634
|
+
elif delta_type == "input_json_delta":
|
|
635
|
+
# Tool input JSON delta
|
|
636
|
+
partial_json = getattr(delta, "partial_json", "")
|
|
637
|
+
if partial_json and index in collected_tool_calls:
|
|
638
|
+
collected_tool_calls[index]["input_json"] += partial_json
|
|
639
|
+
if progress_callback:
|
|
640
|
+
try:
|
|
641
|
+
await progress_callback(partial_json)
|
|
642
|
+
except (RuntimeError, ValueError, TypeError, OSError):
|
|
643
|
+
pass
|
|
644
|
+
|
|
645
|
+
# ===== Non-standard events from aiping.cn and similar proxies =====
|
|
646
|
+
# NOTE: aiping.cn sends BOTH standard (content_block_delta) and non-standard
|
|
647
|
+
# (text, thinking) events. We only process the non-standard events if we
|
|
648
|
+
# haven't already collected content from standard events in this block.
|
|
649
|
+
# This is controlled by checking if the standard delta was processed.
|
|
650
|
+
|
|
651
|
+
elif event_type == "thinking":
|
|
652
|
+
# Direct thinking content (non-standard, aiping.cn style)
|
|
653
|
+
# Skip - already handled via content_block_delta (aiping.cn sends both)
|
|
654
|
+
pass
|
|
655
|
+
|
|
656
|
+
elif event_type == "text":
|
|
657
|
+
# Direct text content (non-standard, aiping.cn style)
|
|
658
|
+
# Skip - already handled via content_block_delta (aiping.cn sends both)
|
|
659
|
+
pass
|
|
660
|
+
|
|
661
|
+
elif event_type == "signature":
|
|
662
|
+
# Thinking signature (non-standard, aiping.cn style)
|
|
663
|
+
pass
|
|
664
|
+
|
|
665
|
+
# ===== Standard events continued =====
|
|
666
|
+
|
|
667
|
+
elif event_type == "content_block_stop":
|
|
668
|
+
# Content block finished
|
|
669
|
+
index = getattr(event, "index", current_block_index_ref[0])
|
|
670
|
+
|
|
671
|
+
# Parse accumulated JSON for tool calls
|
|
672
|
+
if index in collected_tool_calls:
|
|
673
|
+
import json
|
|
674
|
+
|
|
675
|
+
json_str = collected_tool_calls[index].get("input_json", "")
|
|
676
|
+
if json_str:
|
|
677
|
+
try:
|
|
678
|
+
collected_tool_calls[index]["input"] = json.loads(json_str)
|
|
679
|
+
except json.JSONDecodeError:
|
|
680
|
+
logger.warning(
|
|
681
|
+
"[anthropic_client] Failed to parse tool input JSON",
|
|
682
|
+
extra={"json": json_str[:200]},
|
|
683
|
+
)
|
|
684
|
+
collected_tool_calls[index]["input"] = {}
|
|
685
|
+
|
|
686
|
+
elif event_type == "message_delta":
|
|
687
|
+
# Message-level delta (usually contains usage info at the end)
|
|
688
|
+
usage = getattr(event, "usage", None)
|
|
689
|
+
if usage:
|
|
690
|
+
# Update with final usage - output_tokens comes here
|
|
691
|
+
usage_tokens["output_tokens"] = getattr(usage, "output_tokens", 0)
|
|
692
|
+
|
|
693
|
+
elif event_type == "message_stop":
|
|
694
|
+
# Message complete
|
|
695
|
+
pass
|
|
696
|
+
|
|
697
|
+
# Unknown event types are silently ignored
|
|
698
|
+
|
|
699
|
+
async def _non_stream_request(
|
|
700
|
+
self,
|
|
701
|
+
*,
|
|
702
|
+
client: AsyncAnthropic,
|
|
703
|
+
request_kwargs: Dict[str, Any],
|
|
704
|
+
request_timeout: Optional[float],
|
|
705
|
+
max_retries: int,
|
|
706
|
+
response_metadata: Dict[str, Any],
|
|
707
|
+
) -> tuple[List[Dict[str, Any]], Dict[str, int]]:
|
|
708
|
+
"""Execute a non-streaming request with overall timeout."""
|
|
709
|
+
|
|
710
|
+
async def _do_request() -> Any:
|
|
711
|
+
return await client.messages.create(**request_kwargs)
|
|
712
|
+
|
|
713
|
+
response = await call_with_timeout_and_retries(
|
|
714
|
+
_do_request,
|
|
715
|
+
request_timeout,
|
|
716
|
+
max_retries,
|
|
717
|
+
)
|
|
718
|
+
|
|
719
|
+
usage_tokens = anthropic_usage_tokens(getattr(response, "usage", None))
|
|
720
|
+
content_blocks = _content_blocks_from_response(response)
|
|
721
|
+
|
|
722
|
+
# Extract reasoning content for metadata
|
|
723
|
+
for block in content_blocks:
|
|
724
|
+
if block.get("type") == "thinking":
|
|
725
|
+
thinking_text = block.get("thinking") or ""
|
|
726
|
+
if thinking_text:
|
|
727
|
+
response_metadata["reasoning_content"] = thinking_text
|
|
728
|
+
break
|
|
729
|
+
|
|
730
|
+
return content_blocks, usage_tokens
|