ripperdoc 0.2.3__py3-none-any.whl → 0.2.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ripperdoc/__init__.py +1 -1
- ripperdoc/__main__.py +0 -5
- ripperdoc/cli/cli.py +37 -16
- ripperdoc/cli/commands/__init__.py +2 -0
- ripperdoc/cli/commands/agents_cmd.py +12 -9
- ripperdoc/cli/commands/compact_cmd.py +7 -3
- ripperdoc/cli/commands/context_cmd.py +35 -15
- ripperdoc/cli/commands/doctor_cmd.py +27 -14
- ripperdoc/cli/commands/exit_cmd.py +1 -1
- ripperdoc/cli/commands/mcp_cmd.py +13 -8
- ripperdoc/cli/commands/memory_cmd.py +5 -5
- ripperdoc/cli/commands/models_cmd.py +47 -16
- ripperdoc/cli/commands/permissions_cmd.py +302 -0
- ripperdoc/cli/commands/resume_cmd.py +1 -2
- ripperdoc/cli/commands/tasks_cmd.py +24 -13
- ripperdoc/cli/ui/rich_ui.py +523 -396
- ripperdoc/cli/ui/tool_renderers.py +298 -0
- ripperdoc/core/agents.py +172 -4
- ripperdoc/core/config.py +130 -6
- ripperdoc/core/default_tools.py +13 -2
- ripperdoc/core/permissions.py +20 -14
- ripperdoc/core/providers/__init__.py +31 -15
- ripperdoc/core/providers/anthropic.py +122 -8
- ripperdoc/core/providers/base.py +93 -15
- ripperdoc/core/providers/gemini.py +539 -96
- ripperdoc/core/providers/openai.py +371 -26
- ripperdoc/core/query.py +301 -62
- ripperdoc/core/query_utils.py +51 -7
- ripperdoc/core/skills.py +295 -0
- ripperdoc/core/system_prompt.py +79 -67
- ripperdoc/core/tool.py +15 -6
- ripperdoc/sdk/client.py +14 -1
- ripperdoc/tools/ask_user_question_tool.py +431 -0
- ripperdoc/tools/background_shell.py +82 -26
- ripperdoc/tools/bash_tool.py +356 -209
- ripperdoc/tools/dynamic_mcp_tool.py +428 -0
- ripperdoc/tools/enter_plan_mode_tool.py +226 -0
- ripperdoc/tools/exit_plan_mode_tool.py +153 -0
- ripperdoc/tools/file_edit_tool.py +53 -10
- ripperdoc/tools/file_read_tool.py +17 -7
- ripperdoc/tools/file_write_tool.py +49 -13
- ripperdoc/tools/glob_tool.py +10 -9
- ripperdoc/tools/grep_tool.py +182 -51
- ripperdoc/tools/ls_tool.py +6 -6
- ripperdoc/tools/mcp_tools.py +172 -413
- ripperdoc/tools/multi_edit_tool.py +49 -9
- ripperdoc/tools/notebook_edit_tool.py +57 -13
- ripperdoc/tools/skill_tool.py +205 -0
- ripperdoc/tools/task_tool.py +91 -9
- ripperdoc/tools/todo_tool.py +12 -12
- ripperdoc/tools/tool_search_tool.py +5 -6
- ripperdoc/utils/coerce.py +34 -0
- ripperdoc/utils/context_length_errors.py +252 -0
- ripperdoc/utils/file_watch.py +5 -4
- ripperdoc/utils/json_utils.py +4 -4
- ripperdoc/utils/log.py +3 -3
- ripperdoc/utils/mcp.py +82 -22
- ripperdoc/utils/memory.py +9 -6
- ripperdoc/utils/message_compaction.py +19 -16
- ripperdoc/utils/messages.py +73 -8
- ripperdoc/utils/path_ignore.py +677 -0
- ripperdoc/utils/permissions/__init__.py +7 -1
- ripperdoc/utils/permissions/path_validation_utils.py +5 -3
- ripperdoc/utils/permissions/shell_command_validation.py +496 -18
- ripperdoc/utils/prompt.py +1 -1
- ripperdoc/utils/safe_get_cwd.py +5 -2
- ripperdoc/utils/session_history.py +38 -19
- ripperdoc/utils/todo.py +6 -2
- ripperdoc/utils/token_estimation.py +34 -0
- {ripperdoc-0.2.3.dist-info → ripperdoc-0.2.5.dist-info}/METADATA +14 -1
- ripperdoc-0.2.5.dist-info/RECORD +107 -0
- ripperdoc-0.2.3.dist-info/RECORD +0 -95
- {ripperdoc-0.2.3.dist-info → ripperdoc-0.2.5.dist-info}/WHEEL +0 -0
- {ripperdoc-0.2.3.dist-info → ripperdoc-0.2.5.dist-info}/entry_points.txt +0 -0
- {ripperdoc-0.2.3.dist-info → ripperdoc-0.2.5.dist-info}/licenses/LICENSE +0 -0
- {ripperdoc-0.2.3.dist-info → ripperdoc-0.2.5.dist-info}/top_level.txt +0 -0
|
@@ -2,9 +2,12 @@
|
|
|
2
2
|
|
|
3
3
|
from __future__ import annotations
|
|
4
4
|
|
|
5
|
+
import asyncio
|
|
5
6
|
import time
|
|
6
7
|
from typing import Any, Dict, List, Optional, cast
|
|
8
|
+
from uuid import uuid4
|
|
7
9
|
|
|
10
|
+
import openai
|
|
8
11
|
from openai import AsyncOpenAI
|
|
9
12
|
|
|
10
13
|
from ripperdoc.core.config import ModelProfile
|
|
@@ -13,12 +16,14 @@ from ripperdoc.core.providers.base import (
|
|
|
13
16
|
ProviderClient,
|
|
14
17
|
ProviderResponse,
|
|
15
18
|
call_with_timeout_and_retries,
|
|
19
|
+
iter_with_timeout,
|
|
16
20
|
sanitize_tool_history,
|
|
17
21
|
)
|
|
18
22
|
from ripperdoc.core.query_utils import (
|
|
19
23
|
build_openai_tool_schemas,
|
|
20
24
|
content_blocks_from_openai_choice,
|
|
21
25
|
estimate_cost_usd,
|
|
26
|
+
_normalize_tool_args,
|
|
22
27
|
openai_usage_tokens,
|
|
23
28
|
)
|
|
24
29
|
from ripperdoc.core.tool import Tool
|
|
@@ -28,6 +33,113 @@ from ripperdoc.utils.session_usage import record_usage
|
|
|
28
33
|
logger = get_logger()
|
|
29
34
|
|
|
30
35
|
|
|
36
|
+
def _classify_openai_error(exc: Exception) -> tuple[str, str]:
|
|
37
|
+
"""Classify an OpenAI exception into error code and user-friendly message."""
|
|
38
|
+
exc_type = type(exc).__name__
|
|
39
|
+
exc_msg = str(exc)
|
|
40
|
+
|
|
41
|
+
if isinstance(exc, openai.AuthenticationError):
|
|
42
|
+
return "authentication_error", f"Authentication failed: {exc_msg}"
|
|
43
|
+
if isinstance(exc, openai.PermissionDeniedError):
|
|
44
|
+
# Check for common permission denied reasons
|
|
45
|
+
if "balance" in exc_msg.lower() or "insufficient" in exc_msg.lower():
|
|
46
|
+
return "insufficient_balance", f"Insufficient balance: {exc_msg}"
|
|
47
|
+
return "permission_denied", f"Permission denied: {exc_msg}"
|
|
48
|
+
if isinstance(exc, openai.NotFoundError):
|
|
49
|
+
return "model_not_found", f"Model not found: {exc_msg}"
|
|
50
|
+
if isinstance(exc, openai.BadRequestError):
|
|
51
|
+
# Check for context length errors
|
|
52
|
+
if "context" in exc_msg.lower() or "token" in exc_msg.lower():
|
|
53
|
+
return "context_length_exceeded", f"Context length exceeded: {exc_msg}"
|
|
54
|
+
if "content" in exc_msg.lower() and "policy" in exc_msg.lower():
|
|
55
|
+
return "content_policy_violation", f"Content policy violation: {exc_msg}"
|
|
56
|
+
return "bad_request", f"Invalid request: {exc_msg}"
|
|
57
|
+
if isinstance(exc, openai.RateLimitError):
|
|
58
|
+
return "rate_limit", f"Rate limit exceeded: {exc_msg}"
|
|
59
|
+
if isinstance(exc, openai.APIConnectionError):
|
|
60
|
+
return "connection_error", f"Connection error: {exc_msg}"
|
|
61
|
+
if isinstance(exc, openai.APIStatusError):
|
|
62
|
+
return "api_error", f"API error ({exc.status_code}): {exc_msg}"
|
|
63
|
+
if isinstance(exc, asyncio.TimeoutError):
|
|
64
|
+
return "timeout", f"Request timed out: {exc_msg}"
|
|
65
|
+
|
|
66
|
+
# Generic fallback
|
|
67
|
+
return "unknown_error", f"Unexpected error ({exc_type}): {exc_msg}"
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def _effort_from_tokens(max_thinking_tokens: int) -> Optional[str]:
|
|
71
|
+
"""Map a thinking token budget to a coarse effort label."""
|
|
72
|
+
if max_thinking_tokens <= 0:
|
|
73
|
+
return None
|
|
74
|
+
if max_thinking_tokens <= 1024:
|
|
75
|
+
return "low"
|
|
76
|
+
if max_thinking_tokens <= 8192:
|
|
77
|
+
return "medium"
|
|
78
|
+
return "high"
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
def _detect_openai_vendor(model_profile: ModelProfile) -> str:
|
|
82
|
+
"""Best-effort vendor hint for OpenAI-compatible endpoints."""
|
|
83
|
+
override = getattr(model_profile, "thinking_mode", None)
|
|
84
|
+
if isinstance(override, str) and override.strip():
|
|
85
|
+
return override.strip().lower()
|
|
86
|
+
base = (model_profile.api_base or "").lower()
|
|
87
|
+
name = (model_profile.model or "").lower()
|
|
88
|
+
if "openrouter.ai" in base:
|
|
89
|
+
return "openrouter"
|
|
90
|
+
if "deepseek" in base or name.startswith("deepseek"):
|
|
91
|
+
return "deepseek"
|
|
92
|
+
if "dashscope" in base or "qwen" in name:
|
|
93
|
+
return "qwen"
|
|
94
|
+
if "generativelanguage.googleapis.com" in base or name.startswith("gemini"):
|
|
95
|
+
return "gemini_openai"
|
|
96
|
+
if "gpt-5" in name:
|
|
97
|
+
return "openai_reasoning"
|
|
98
|
+
return "openai"
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
def _build_thinking_kwargs(
|
|
102
|
+
model_profile: ModelProfile, max_thinking_tokens: int
|
|
103
|
+
) -> tuple[Dict[str, Any], Dict[str, Any]]:
|
|
104
|
+
"""Return (extra_body, top_level_kwargs) for thinking-enabled calls."""
|
|
105
|
+
extra_body: Dict[str, Any] = {}
|
|
106
|
+
top_level: Dict[str, Any] = {}
|
|
107
|
+
vendor = _detect_openai_vendor(model_profile)
|
|
108
|
+
effort = _effort_from_tokens(max_thinking_tokens)
|
|
109
|
+
|
|
110
|
+
if vendor == "deepseek":
|
|
111
|
+
if max_thinking_tokens != 0:
|
|
112
|
+
extra_body["thinking"] = {"type": "enabled"}
|
|
113
|
+
elif vendor == "qwen":
|
|
114
|
+
if max_thinking_tokens > 0:
|
|
115
|
+
extra_body["enable_thinking"] = True
|
|
116
|
+
elif max_thinking_tokens == 0:
|
|
117
|
+
extra_body["enable_thinking"] = False
|
|
118
|
+
elif vendor == "openrouter":
|
|
119
|
+
if max_thinking_tokens > 0:
|
|
120
|
+
extra_body["reasoning"] = {"max_tokens": max_thinking_tokens}
|
|
121
|
+
elif max_thinking_tokens == 0:
|
|
122
|
+
extra_body["reasoning"] = {"effort": "none"}
|
|
123
|
+
elif vendor == "gemini_openai":
|
|
124
|
+
google_cfg: Dict[str, Any] = {}
|
|
125
|
+
if max_thinking_tokens > 0:
|
|
126
|
+
google_cfg["thinking_budget"] = max_thinking_tokens
|
|
127
|
+
google_cfg["include_thoughts"] = True
|
|
128
|
+
if google_cfg:
|
|
129
|
+
extra_body["google"] = {"thinking_config": google_cfg}
|
|
130
|
+
if effort:
|
|
131
|
+
top_level["reasoning_effort"] = effort
|
|
132
|
+
extra_body.setdefault("reasoning", {"effort": effort})
|
|
133
|
+
elif vendor == "openai_reasoning":
|
|
134
|
+
if effort:
|
|
135
|
+
extra_body["reasoning"] = {"effort": effort}
|
|
136
|
+
else:
|
|
137
|
+
if effort:
|
|
138
|
+
extra_body["reasoning"] = {"effort": effort}
|
|
139
|
+
|
|
140
|
+
return extra_body, top_level
|
|
141
|
+
|
|
142
|
+
|
|
31
143
|
class OpenAIClient(ProviderClient):
|
|
32
144
|
"""OpenAI-compatible client with streaming and non-streaming support."""
|
|
33
145
|
|
|
@@ -43,33 +155,117 @@ class OpenAIClient(ProviderClient):
|
|
|
43
155
|
progress_callback: Optional[ProgressCallback],
|
|
44
156
|
request_timeout: Optional[float],
|
|
45
157
|
max_retries: int,
|
|
158
|
+
max_thinking_tokens: int,
|
|
46
159
|
) -> ProviderResponse:
|
|
47
160
|
start_time = time.time()
|
|
161
|
+
|
|
162
|
+
try:
|
|
163
|
+
return await self._call_impl(
|
|
164
|
+
model_profile=model_profile,
|
|
165
|
+
system_prompt=system_prompt,
|
|
166
|
+
normalized_messages=normalized_messages,
|
|
167
|
+
tools=tools,
|
|
168
|
+
tool_mode=tool_mode,
|
|
169
|
+
stream=stream,
|
|
170
|
+
progress_callback=progress_callback,
|
|
171
|
+
request_timeout=request_timeout,
|
|
172
|
+
max_retries=max_retries,
|
|
173
|
+
max_thinking_tokens=max_thinking_tokens,
|
|
174
|
+
start_time=start_time,
|
|
175
|
+
)
|
|
176
|
+
except asyncio.CancelledError:
|
|
177
|
+
raise # Don't suppress task cancellation
|
|
178
|
+
except Exception as exc:
|
|
179
|
+
duration_ms = (time.time() - start_time) * 1000
|
|
180
|
+
error_code, error_message = _classify_openai_error(exc)
|
|
181
|
+
logger.error(
|
|
182
|
+
"[openai_client] API call failed",
|
|
183
|
+
extra={
|
|
184
|
+
"model": model_profile.model,
|
|
185
|
+
"error_code": error_code,
|
|
186
|
+
"error_message": error_message,
|
|
187
|
+
"duration_ms": round(duration_ms, 2),
|
|
188
|
+
},
|
|
189
|
+
)
|
|
190
|
+
return ProviderResponse.create_error(
|
|
191
|
+
error_code=error_code,
|
|
192
|
+
error_message=error_message,
|
|
193
|
+
duration_ms=duration_ms,
|
|
194
|
+
)
|
|
195
|
+
|
|
196
|
+
async def _call_impl(
|
|
197
|
+
self,
|
|
198
|
+
*,
|
|
199
|
+
model_profile: ModelProfile,
|
|
200
|
+
system_prompt: str,
|
|
201
|
+
normalized_messages: List[Dict[str, Any]],
|
|
202
|
+
tools: List[Tool[Any, Any]],
|
|
203
|
+
tool_mode: str,
|
|
204
|
+
stream: bool,
|
|
205
|
+
progress_callback: Optional[ProgressCallback],
|
|
206
|
+
request_timeout: Optional[float],
|
|
207
|
+
max_retries: int,
|
|
208
|
+
max_thinking_tokens: int,
|
|
209
|
+
start_time: float,
|
|
210
|
+
) -> ProviderResponse:
|
|
211
|
+
"""Internal implementation of call, may raise exceptions."""
|
|
48
212
|
openai_tools = await build_openai_tool_schemas(tools)
|
|
49
213
|
openai_messages: List[Dict[str, object]] = [
|
|
50
214
|
{"role": "system", "content": system_prompt}
|
|
51
215
|
] + sanitize_tool_history(list(normalized_messages))
|
|
52
216
|
collected_text: List[str] = []
|
|
217
|
+
streamed_tool_calls: Dict[int, Dict[str, Optional[str]]] = {}
|
|
218
|
+
streamed_tool_text: List[str] = []
|
|
219
|
+
streamed_usage: Dict[str, int] = {}
|
|
220
|
+
stream_reasoning_text: List[str] = []
|
|
221
|
+
stream_reasoning_details: List[Any] = []
|
|
222
|
+
response_metadata: Dict[str, Any] = {}
|
|
53
223
|
|
|
54
|
-
|
|
224
|
+
can_stream_text = stream and tool_mode == "text" and not openai_tools
|
|
225
|
+
can_stream_tools = stream and tool_mode != "text" and bool(openai_tools)
|
|
226
|
+
can_stream = can_stream_text or can_stream_tools
|
|
227
|
+
thinking_extra_body, thinking_top_level = _build_thinking_kwargs(
|
|
228
|
+
model_profile, max_thinking_tokens
|
|
229
|
+
)
|
|
55
230
|
|
|
56
231
|
async with AsyncOpenAI(
|
|
57
232
|
api_key=model_profile.api_key, base_url=model_profile.api_base
|
|
58
233
|
) as client:
|
|
59
234
|
|
|
60
235
|
async def _stream_request() -> Dict[str, Dict[str, int]]:
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
236
|
+
announced_tool_indexes: set[int] = set()
|
|
237
|
+
stream_kwargs: Dict[str, Any] = {
|
|
238
|
+
"model": model_profile.model,
|
|
239
|
+
"messages": cast(Any, openai_messages),
|
|
240
|
+
"tools": openai_tools if openai_tools else None,
|
|
241
|
+
"temperature": model_profile.temperature,
|
|
242
|
+
"max_tokens": model_profile.max_tokens,
|
|
243
|
+
"stream": True,
|
|
244
|
+
"stream_options": {"include_usage": True},
|
|
245
|
+
**thinking_top_level,
|
|
246
|
+
}
|
|
247
|
+
if thinking_extra_body:
|
|
248
|
+
stream_kwargs["extra_body"] = thinking_extra_body
|
|
249
|
+
stream_coro = client.chat.completions.create( # type: ignore[call-overload]
|
|
250
|
+
**stream_kwargs
|
|
68
251
|
)
|
|
69
|
-
|
|
70
|
-
|
|
252
|
+
stream_resp = (
|
|
253
|
+
await asyncio.wait_for(stream_coro, timeout=request_timeout)
|
|
254
|
+
if request_timeout and request_timeout > 0
|
|
255
|
+
else await stream_coro
|
|
256
|
+
)
|
|
257
|
+
async for chunk in iter_with_timeout(stream_resp, request_timeout):
|
|
258
|
+
if getattr(chunk, "usage", None):
|
|
259
|
+
streamed_usage.update(openai_usage_tokens(chunk.usage))
|
|
260
|
+
|
|
261
|
+
if not getattr(chunk, "choices", None):
|
|
262
|
+
continue
|
|
71
263
|
delta = getattr(chunk.choices[0], "delta", None)
|
|
72
|
-
|
|
264
|
+
if not delta:
|
|
265
|
+
continue
|
|
266
|
+
|
|
267
|
+
# Text deltas (rare in native tool mode but supported)
|
|
268
|
+
delta_content = getattr(delta, "content", None)
|
|
73
269
|
text_delta = ""
|
|
74
270
|
if delta_content:
|
|
75
271
|
if isinstance(delta_content, list):
|
|
@@ -81,47 +277,195 @@ class OpenAIClient(ProviderClient):
|
|
|
81
277
|
text_delta += text_val
|
|
82
278
|
elif isinstance(delta_content, str):
|
|
83
279
|
text_delta += delta_content
|
|
280
|
+
delta_reasoning = getattr(delta, "reasoning_content", None) or getattr(
|
|
281
|
+
delta, "reasoning", None
|
|
282
|
+
)
|
|
283
|
+
if isinstance(delta_reasoning, str):
|
|
284
|
+
stream_reasoning_text.append(delta_reasoning)
|
|
285
|
+
elif isinstance(delta_reasoning, list):
|
|
286
|
+
for item in delta_reasoning:
|
|
287
|
+
if isinstance(item, str):
|
|
288
|
+
stream_reasoning_text.append(item)
|
|
289
|
+
delta_reasoning_details = getattr(delta, "reasoning_details", None)
|
|
290
|
+
if delta_reasoning_details:
|
|
291
|
+
if isinstance(delta_reasoning_details, list):
|
|
292
|
+
stream_reasoning_details.extend(delta_reasoning_details)
|
|
293
|
+
else:
|
|
294
|
+
stream_reasoning_details.append(delta_reasoning_details)
|
|
84
295
|
if text_delta:
|
|
85
|
-
|
|
296
|
+
target_collector = (
|
|
297
|
+
streamed_tool_text if can_stream_tools else collected_text
|
|
298
|
+
)
|
|
299
|
+
target_collector.append(text_delta)
|
|
86
300
|
if progress_callback:
|
|
87
301
|
try:
|
|
88
302
|
await progress_callback(text_delta)
|
|
89
|
-
except
|
|
90
|
-
logger.
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
303
|
+
except (RuntimeError, ValueError, TypeError, OSError) as cb_exc:
|
|
304
|
+
logger.warning(
|
|
305
|
+
"[openai_client] Stream callback failed: %s: %s",
|
|
306
|
+
type(cb_exc).__name__, cb_exc,
|
|
307
|
+
)
|
|
308
|
+
|
|
309
|
+
# Tool call deltas for native tool mode
|
|
310
|
+
if not can_stream_tools:
|
|
311
|
+
continue
|
|
312
|
+
|
|
313
|
+
for tool_delta in getattr(delta, "tool_calls", []) or []:
|
|
314
|
+
idx = getattr(tool_delta, "index", 0) or 0
|
|
315
|
+
state = streamed_tool_calls.get(
|
|
316
|
+
idx, {"id": None, "name": None, "arguments": ""}
|
|
317
|
+
)
|
|
318
|
+
|
|
319
|
+
if getattr(tool_delta, "id", None):
|
|
320
|
+
state["id"] = tool_delta.id
|
|
321
|
+
|
|
322
|
+
function_delta = getattr(tool_delta, "function", None)
|
|
323
|
+
if function_delta:
|
|
324
|
+
fn_name = getattr(function_delta, "name", None)
|
|
325
|
+
if fn_name:
|
|
326
|
+
state["name"] = fn_name
|
|
327
|
+
args_delta = getattr(function_delta, "arguments", None)
|
|
328
|
+
if args_delta:
|
|
329
|
+
state["arguments"] = (state.get("arguments") or "") + args_delta
|
|
330
|
+
if progress_callback:
|
|
331
|
+
try:
|
|
332
|
+
await progress_callback(args_delta)
|
|
333
|
+
except (RuntimeError, ValueError, TypeError, OSError) as cb_exc:
|
|
334
|
+
logger.warning(
|
|
335
|
+
"[openai_client] Stream callback failed: %s: %s",
|
|
336
|
+
type(cb_exc).__name__, cb_exc,
|
|
337
|
+
)
|
|
338
|
+
|
|
339
|
+
if idx not in announced_tool_indexes and state.get("name"):
|
|
340
|
+
announced_tool_indexes.add(idx)
|
|
341
|
+
if progress_callback:
|
|
342
|
+
try:
|
|
343
|
+
await progress_callback(f"[tool:{state['name']}]")
|
|
344
|
+
except (RuntimeError, ValueError, TypeError, OSError) as cb_exc:
|
|
345
|
+
logger.warning(
|
|
346
|
+
"[openai_client] Stream callback failed: %s: %s",
|
|
347
|
+
type(cb_exc).__name__, cb_exc,
|
|
348
|
+
)
|
|
349
|
+
|
|
350
|
+
streamed_tool_calls[idx] = state
|
|
351
|
+
|
|
352
|
+
return {"usage": streamed_usage}
|
|
94
353
|
|
|
95
354
|
async def _non_stream_request() -> Any:
|
|
355
|
+
kwargs: Dict[str, Any] = {
|
|
356
|
+
"model": model_profile.model,
|
|
357
|
+
"messages": cast(Any, openai_messages),
|
|
358
|
+
"tools": openai_tools if openai_tools else None, # type: ignore[arg-type]
|
|
359
|
+
"temperature": model_profile.temperature,
|
|
360
|
+
"max_tokens": model_profile.max_tokens,
|
|
361
|
+
**thinking_top_level,
|
|
362
|
+
}
|
|
363
|
+
if thinking_extra_body:
|
|
364
|
+
kwargs["extra_body"] = thinking_extra_body
|
|
96
365
|
return await client.chat.completions.create( # type: ignore[call-overload]
|
|
97
|
-
|
|
98
|
-
messages=cast(Any, openai_messages),
|
|
99
|
-
tools=openai_tools if openai_tools else None, # type: ignore[arg-type]
|
|
100
|
-
temperature=model_profile.temperature,
|
|
101
|
-
max_tokens=model_profile.max_tokens,
|
|
366
|
+
**kwargs
|
|
102
367
|
)
|
|
103
368
|
|
|
369
|
+
timeout_for_call = None if can_stream else request_timeout
|
|
104
370
|
openai_response: Any = await call_with_timeout_and_retries(
|
|
105
371
|
_stream_request if can_stream else _non_stream_request,
|
|
106
|
-
|
|
372
|
+
timeout_for_call,
|
|
107
373
|
max_retries,
|
|
108
374
|
)
|
|
109
375
|
|
|
376
|
+
if (
|
|
377
|
+
can_stream_text
|
|
378
|
+
and not collected_text
|
|
379
|
+
and not streamed_tool_calls
|
|
380
|
+
and not streamed_tool_text
|
|
381
|
+
):
|
|
382
|
+
logger.debug(
|
|
383
|
+
"[openai_client] Streaming returned no content; retrying without stream",
|
|
384
|
+
extra={"model": model_profile.model},
|
|
385
|
+
)
|
|
386
|
+
can_stream = False
|
|
387
|
+
can_stream_text = False
|
|
388
|
+
can_stream_tools = False
|
|
389
|
+
openai_response = await call_with_timeout_and_retries(
|
|
390
|
+
_non_stream_request, request_timeout, max_retries
|
|
391
|
+
)
|
|
392
|
+
|
|
110
393
|
duration_ms = (time.time() - start_time) * 1000
|
|
111
|
-
usage_tokens =
|
|
394
|
+
usage_tokens = (
|
|
395
|
+
streamed_usage
|
|
396
|
+
if can_stream
|
|
397
|
+
else openai_usage_tokens(getattr(openai_response, "usage", None))
|
|
398
|
+
)
|
|
112
399
|
cost_usd = estimate_cost_usd(model_profile, usage_tokens)
|
|
113
400
|
record_usage(
|
|
114
401
|
model_profile.model, duration_ms=duration_ms, cost_usd=cost_usd, **usage_tokens
|
|
115
402
|
)
|
|
116
403
|
|
|
117
|
-
|
|
118
|
-
|
|
404
|
+
if not can_stream and (
|
|
405
|
+
not openai_response or not getattr(openai_response, "choices", None)
|
|
406
|
+
):
|
|
407
|
+
logger.warning(
|
|
408
|
+
"[openai_client] No choices returned from OpenAI response",
|
|
409
|
+
extra={"model": model_profile.model},
|
|
410
|
+
)
|
|
411
|
+
empty_text = "Model returned no content."
|
|
412
|
+
return ProviderResponse(
|
|
413
|
+
content_blocks=[{"type": "text", "text": empty_text}],
|
|
414
|
+
usage_tokens=usage_tokens,
|
|
415
|
+
cost_usd=cost_usd,
|
|
416
|
+
duration_ms=duration_ms,
|
|
417
|
+
metadata=response_metadata,
|
|
418
|
+
)
|
|
419
|
+
|
|
420
|
+
content_blocks: List[Dict[str, Any]] = []
|
|
421
|
+
finish_reason: Optional[str] = None
|
|
422
|
+
if can_stream_text:
|
|
119
423
|
content_blocks = [{"type": "text", "text": "".join(collected_text)}]
|
|
120
424
|
finish_reason = "stream"
|
|
425
|
+
elif can_stream_tools:
|
|
426
|
+
if streamed_tool_text:
|
|
427
|
+
content_blocks.append({"type": "text", "text": "".join(streamed_tool_text)})
|
|
428
|
+
for idx in sorted(streamed_tool_calls.keys()):
|
|
429
|
+
call = streamed_tool_calls[idx]
|
|
430
|
+
name = call.get("name")
|
|
431
|
+
if not name:
|
|
432
|
+
continue
|
|
433
|
+
tool_use_id = call.get("id") or str(uuid4())
|
|
434
|
+
content_blocks.append(
|
|
435
|
+
{
|
|
436
|
+
"type": "tool_use",
|
|
437
|
+
"tool_use_id": tool_use_id,
|
|
438
|
+
"name": name,
|
|
439
|
+
"input": _normalize_tool_args(call.get("arguments")),
|
|
440
|
+
}
|
|
441
|
+
)
|
|
442
|
+
finish_reason = "stream"
|
|
121
443
|
else:
|
|
122
444
|
choice = openai_response.choices[0]
|
|
123
445
|
content_blocks = content_blocks_from_openai_choice(choice, tool_mode)
|
|
124
446
|
finish_reason = cast(Optional[str], getattr(choice, "finish_reason", None))
|
|
447
|
+
message_obj = getattr(choice, "message", None) or choice
|
|
448
|
+
reasoning_content = getattr(message_obj, "reasoning_content", None)
|
|
449
|
+
if reasoning_content:
|
|
450
|
+
response_metadata["reasoning_content"] = reasoning_content
|
|
451
|
+
reasoning_field = getattr(message_obj, "reasoning", None)
|
|
452
|
+
if reasoning_field:
|
|
453
|
+
response_metadata["reasoning"] = reasoning_field
|
|
454
|
+
if "reasoning_content" not in response_metadata and isinstance(
|
|
455
|
+
reasoning_field, str
|
|
456
|
+
):
|
|
457
|
+
response_metadata["reasoning_content"] = reasoning_field
|
|
458
|
+
reasoning_details = getattr(message_obj, "reasoning_details", None)
|
|
459
|
+
if reasoning_details:
|
|
460
|
+
response_metadata["reasoning_details"] = reasoning_details
|
|
461
|
+
|
|
462
|
+
if can_stream:
|
|
463
|
+
if stream_reasoning_text:
|
|
464
|
+
joined = "".join(stream_reasoning_text)
|
|
465
|
+
response_metadata["reasoning_content"] = joined
|
|
466
|
+
response_metadata.setdefault("reasoning", joined)
|
|
467
|
+
if stream_reasoning_details:
|
|
468
|
+
response_metadata["reasoning_details"] = stream_reasoning_details
|
|
125
469
|
|
|
126
470
|
logger.info(
|
|
127
471
|
"[openai_client] Response received",
|
|
@@ -139,4 +483,5 @@ class OpenAIClient(ProviderClient):
|
|
|
139
483
|
usage_tokens=usage_tokens,
|
|
140
484
|
cost_usd=cost_usd,
|
|
141
485
|
duration_ms=duration_ms,
|
|
486
|
+
metadata=response_metadata,
|
|
142
487
|
)
|