devcopilot 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- api/__init__.py +17 -0
- api/admin_config.py +1303 -0
- api/admin_routes.py +287 -0
- api/admin_static/admin.css +459 -0
- api/admin_static/admin.js +497 -0
- api/admin_static/index.html +77 -0
- api/admin_urls.py +34 -0
- api/app.py +194 -0
- api/command_utils.py +164 -0
- api/dependencies.py +144 -0
- api/detection.py +152 -0
- api/gateway_model_ids.py +54 -0
- api/model_catalog.py +133 -0
- api/model_router.py +125 -0
- api/models/__init__.py +45 -0
- api/models/anthropic.py +234 -0
- api/models/openai_responses.py +28 -0
- api/models/responses.py +60 -0
- api/optimization_handlers.py +154 -0
- api/request_pipeline.py +424 -0
- api/routes.py +156 -0
- api/runtime.py +334 -0
- api/validation_log.py +48 -0
- api/web_server_tools.py +22 -0
- api/web_tools/__init__.py +17 -0
- api/web_tools/constants.py +15 -0
- api/web_tools/egress.py +99 -0
- api/web_tools/outbound.py +278 -0
- api/web_tools/parsers.py +104 -0
- api/web_tools/request.py +87 -0
- api/web_tools/streaming.py +206 -0
- cli/__init__.py +5 -0
- cli/claude_env.py +12 -0
- cli/entrypoints.py +166 -0
- cli/env.example +209 -0
- cli/launchers/__init__.py +1 -0
- cli/launchers/claude.py +84 -0
- cli/launchers/codex.py +204 -0
- cli/launchers/codex_model_catalog.py +186 -0
- cli/launchers/common.py +93 -0
- cli/managed/__init__.py +6 -0
- cli/managed/claude.py +215 -0
- cli/managed/manager.py +157 -0
- cli/managed/session.py +260 -0
- cli/process_registry.py +78 -0
- config/__init__.py +5 -0
- config/constants.py +13 -0
- config/logging_config.py +159 -0
- config/nim.py +118 -0
- config/paths.py +91 -0
- config/provider_catalog.py +259 -0
- config/provider_ids.py +7 -0
- config/settings.py +538 -0
- core/__init__.py +1 -0
- core/anthropic/__init__.py +46 -0
- core/anthropic/content.py +31 -0
- core/anthropic/conversion.py +587 -0
- core/anthropic/emitted_sse_tracker.py +346 -0
- core/anthropic/errors.py +70 -0
- core/anthropic/native_messages_request.py +280 -0
- core/anthropic/native_sse_block_policy.py +313 -0
- core/anthropic/provider_stream_error.py +34 -0
- core/anthropic/server_tool_sse.py +14 -0
- core/anthropic/sse.py +440 -0
- core/anthropic/stream_contracts.py +205 -0
- core/anthropic/stream_recovery.py +346 -0
- core/anthropic/stream_recovery_session.py +133 -0
- core/anthropic/thinking.py +140 -0
- core/anthropic/tokens.py +117 -0
- core/anthropic/tools.py +212 -0
- core/anthropic/utils.py +9 -0
- core/openai_responses/__init__.py +5 -0
- core/openai_responses/adapter.py +31 -0
- core/openai_responses/anthropic_sse.py +59 -0
- core/openai_responses/errors.py +22 -0
- core/openai_responses/events.py +19 -0
- core/openai_responses/ids.py +21 -0
- core/openai_responses/input.py +258 -0
- core/openai_responses/items.py +37 -0
- core/openai_responses/reasoning.py +52 -0
- core/openai_responses/stream.py +25 -0
- core/openai_responses/stream_state.py +654 -0
- core/openai_responses/tools.py +374 -0
- core/openai_responses/usage.py +37 -0
- core/rate_limit.py +60 -0
- core/trace.py +216 -0
- devcopilot-0.2.0.dist-info/METADATA +687 -0
- devcopilot-0.2.0.dist-info/RECORD +189 -0
- devcopilot-0.2.0.dist-info/WHEEL +4 -0
- devcopilot-0.2.0.dist-info/entry_points.txt +6 -0
- devcopilot-0.2.0.dist-info/licenses/LICENSE +21 -0
- messaging/__init__.py +26 -0
- messaging/cli_event_constants.py +67 -0
- messaging/command_context.py +66 -0
- messaging/command_dispatcher.py +37 -0
- messaging/commands.py +275 -0
- messaging/event_parser.py +181 -0
- messaging/limiter.py +300 -0
- messaging/models.py +36 -0
- messaging/node_event_pipeline.py +127 -0
- messaging/node_runner.py +342 -0
- messaging/platforms/__init__.py +15 -0
- messaging/platforms/base.py +228 -0
- messaging/platforms/discord.py +567 -0
- messaging/platforms/factory.py +103 -0
- messaging/platforms/outbox.py +144 -0
- messaging/platforms/telegram.py +688 -0
- messaging/platforms/voice_flow.py +295 -0
- messaging/rendering/__init__.py +3 -0
- messaging/rendering/discord_markdown.py +318 -0
- messaging/rendering/markdown_tables.py +49 -0
- messaging/rendering/profiles.py +55 -0
- messaging/rendering/telegram_markdown.py +327 -0
- messaging/safe_diagnostics.py +17 -0
- messaging/session.py +334 -0
- messaging/transcript.py +581 -0
- messaging/transcription.py +164 -0
- messaging/trees/__init__.py +15 -0
- messaging/trees/data.py +482 -0
- messaging/trees/manager.py +433 -0
- messaging/trees/processor.py +179 -0
- messaging/trees/repository.py +177 -0
- messaging/turn_intake.py +235 -0
- messaging/ui_updates.py +101 -0
- messaging/voice.py +76 -0
- messaging/workflow.py +200 -0
- providers/__init__.py +31 -0
- providers/base.py +152 -0
- providers/cerebras/__init__.py +7 -0
- providers/cerebras/client.py +31 -0
- providers/cerebras/request.py +55 -0
- providers/codestral/__init__.py +7 -0
- providers/codestral/client.py +34 -0
- providers/deepseek/__init__.py +11 -0
- providers/deepseek/client.py +51 -0
- providers/deepseek/request.py +475 -0
- providers/defaults.py +41 -0
- providers/error_mapping.py +309 -0
- providers/exceptions.py +113 -0
- providers/fireworks/__init__.py +5 -0
- providers/fireworks/client.py +45 -0
- providers/fireworks/request.py +48 -0
- providers/gemini/__init__.py +7 -0
- providers/gemini/client.py +49 -0
- providers/gemini/request.py +199 -0
- providers/groq/__init__.py +7 -0
- providers/groq/client.py +31 -0
- providers/groq/request.py +83 -0
- providers/kimi/__init__.py +10 -0
- providers/kimi/client.py +53 -0
- providers/kimi/request.py +42 -0
- providers/llamacpp/__init__.py +3 -0
- providers/llamacpp/client.py +16 -0
- providers/lmstudio/__init__.py +5 -0
- providers/lmstudio/client.py +16 -0
- providers/mistral/__init__.py +7 -0
- providers/mistral/client.py +31 -0
- providers/mistral/request.py +37 -0
- providers/model_listing.py +133 -0
- providers/nvidia_nim/__init__.py +7 -0
- providers/nvidia_nim/client.py +91 -0
- providers/nvidia_nim/request.py +430 -0
- providers/nvidia_nim/voice.py +95 -0
- providers/ollama/__init__.py +7 -0
- providers/ollama/client.py +39 -0
- providers/open_router/__init__.py +7 -0
- providers/open_router/client.py +124 -0
- providers/open_router/request.py +42 -0
- providers/opencode/__init__.py +11 -0
- providers/opencode/client.py +31 -0
- providers/opencode/request.py +35 -0
- providers/rate_limit.py +300 -0
- providers/registry.py +527 -0
- providers/transports/__init__.py +1 -0
- providers/transports/anthropic_messages/__init__.py +5 -0
- providers/transports/anthropic_messages/http.py +118 -0
- providers/transports/anthropic_messages/recovery.py +206 -0
- providers/transports/anthropic_messages/stream.py +295 -0
- providers/transports/anthropic_messages/transport.py +236 -0
- providers/transports/openai_chat/__init__.py +5 -0
- providers/transports/openai_chat/recovery.py +217 -0
- providers/transports/openai_chat/stream.py +384 -0
- providers/transports/openai_chat/tool_calls.py +293 -0
- providers/transports/openai_chat/transport.py +156 -0
- providers/wafer/__init__.py +10 -0
- providers/wafer/client.py +50 -0
- providers/zai/__init__.py +10 -0
- providers/zai/client.py +46 -0
- providers/zai/request.py +42 -0
|
@@ -0,0 +1,346 @@
|
|
|
1
|
+
"""Always-on recovery helpers for truncated provider streams."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import json
|
|
6
|
+
import time
|
|
7
|
+
from collections.abc import Callable
|
|
8
|
+
from copy import deepcopy
|
|
9
|
+
from dataclasses import dataclass
|
|
10
|
+
from typing import Any
|
|
11
|
+
|
|
12
|
+
import httpx
|
|
13
|
+
import jsonschema
|
|
14
|
+
import openai
|
|
15
|
+
from loguru import logger
|
|
16
|
+
|
|
17
|
+
EARLY_TRANSPARENT_TOTAL_ATTEMPTS = 5
|
|
18
|
+
EARLY_TRANSPARENT_MAX_RETRIES = EARLY_TRANSPARENT_TOTAL_ATTEMPTS - 1
|
|
19
|
+
MIDSTREAM_RECOVERY_ATTEMPTS = 5
|
|
20
|
+
EARLY_HOLDBACK_SECONDS = 0.75
|
|
21
|
+
RECOVERY_BUFFER_MAX_BYTES = 65_536
|
|
22
|
+
|
|
23
|
+
_RECOVERY_USER_PREFIX = (
|
|
24
|
+
"The previous provider stream was interrupted. Continue the assistant response "
|
|
25
|
+
"exactly where it stopped. Do not repeat text already written."
|
|
26
|
+
)
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class TruncatedProviderStreamError(RuntimeError):
|
|
30
|
+
"""Raised internally when an upstream stream ends without a terminal marker."""
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
@dataclass(frozen=True, slots=True)
|
|
34
|
+
class ToolSchema:
|
|
35
|
+
"""Tool schema resolved from the original Anthropic request."""
|
|
36
|
+
|
|
37
|
+
name: str
|
|
38
|
+
input_schema: dict[str, Any]
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
@dataclass(frozen=True, slots=True)
|
|
42
|
+
class ToolRepair:
|
|
43
|
+
"""Accepted append-only tool JSON repair."""
|
|
44
|
+
|
|
45
|
+
suffix: str
|
|
46
|
+
parsed_input: dict[str, Any]
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
class RecoveryHoldbackBuffer:
|
|
50
|
+
"""Briefly hold downstream SSE so early stream cutoffs can be retried invisibly."""
|
|
51
|
+
|
|
52
|
+
def __init__(
|
|
53
|
+
self,
|
|
54
|
+
*,
|
|
55
|
+
holdback_seconds: float = EARLY_HOLDBACK_SECONDS,
|
|
56
|
+
max_bytes: int = RECOVERY_BUFFER_MAX_BYTES,
|
|
57
|
+
now: Callable[[], float] | None = None,
|
|
58
|
+
) -> None:
|
|
59
|
+
self._holdback_seconds = holdback_seconds
|
|
60
|
+
self._max_bytes = max_bytes
|
|
61
|
+
self._now = now or time.monotonic
|
|
62
|
+
self._events: list[str] = []
|
|
63
|
+
self._bytes = 0
|
|
64
|
+
self._started_at: float | None = None
|
|
65
|
+
self.committed = False
|
|
66
|
+
|
|
67
|
+
def push(self, event: str) -> list[str]:
|
|
68
|
+
"""Buffer ``event`` until holdback expires or cap is reached."""
|
|
69
|
+
if self.committed:
|
|
70
|
+
return [event]
|
|
71
|
+
if self._started_at is None:
|
|
72
|
+
self._started_at = self._now()
|
|
73
|
+
self._events.append(event)
|
|
74
|
+
self._bytes += len(event.encode("utf-8", errors="replace"))
|
|
75
|
+
if (
|
|
76
|
+
self._bytes >= self._max_bytes
|
|
77
|
+
or self._now() - self._started_at >= self._holdback_seconds
|
|
78
|
+
):
|
|
79
|
+
return self.flush()
|
|
80
|
+
return []
|
|
81
|
+
|
|
82
|
+
def flush(self) -> list[str]:
|
|
83
|
+
"""Commit and return all held events."""
|
|
84
|
+
if self.committed:
|
|
85
|
+
return []
|
|
86
|
+
self.committed = True
|
|
87
|
+
events = self._events
|
|
88
|
+
self._events = []
|
|
89
|
+
self._bytes = 0
|
|
90
|
+
self._started_at = None
|
|
91
|
+
return events
|
|
92
|
+
|
|
93
|
+
def discard(self) -> None:
|
|
94
|
+
"""Drop held events without committing them downstream."""
|
|
95
|
+
self._events = []
|
|
96
|
+
self._bytes = 0
|
|
97
|
+
self._started_at = None
|
|
98
|
+
|
|
99
|
+
@property
|
|
100
|
+
def has_buffered(self) -> bool:
|
|
101
|
+
return bool(self._events)
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
def is_retryable_stream_error(exc: BaseException) -> bool:
|
|
105
|
+
"""Return whether a provider stream error can be retried/recovered."""
|
|
106
|
+
if isinstance(exc, TruncatedProviderStreamError):
|
|
107
|
+
return True
|
|
108
|
+
if isinstance(exc, openai.AuthenticationError | openai.BadRequestError):
|
|
109
|
+
return False
|
|
110
|
+
if isinstance(exc, httpx.HTTPStatusError):
|
|
111
|
+
status = exc.response.status_code
|
|
112
|
+
return status == 429 or 500 <= status <= 599
|
|
113
|
+
if isinstance(exc, openai.RateLimitError):
|
|
114
|
+
return True
|
|
115
|
+
if isinstance(exc, openai.APIStatusError):
|
|
116
|
+
status = getattr(exc, "status_code", None)
|
|
117
|
+
return isinstance(status, int) and (status == 429 or 500 <= status <= 599)
|
|
118
|
+
return isinstance(
|
|
119
|
+
exc,
|
|
120
|
+
(
|
|
121
|
+
TimeoutError,
|
|
122
|
+
httpx.ReadTimeout,
|
|
123
|
+
httpx.ReadError,
|
|
124
|
+
httpx.RemoteProtocolError,
|
|
125
|
+
httpx.ConnectError,
|
|
126
|
+
httpx.NetworkError,
|
|
127
|
+
openai.APITimeoutError,
|
|
128
|
+
openai.APIConnectionError,
|
|
129
|
+
),
|
|
130
|
+
)
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
def tool_schemas_by_name(request: Any) -> dict[str, ToolSchema]:
|
|
134
|
+
"""Return Anthropic tool input schemas keyed by tool name."""
|
|
135
|
+
schemas: dict[str, ToolSchema] = {}
|
|
136
|
+
tools = getattr(request, "tools", None)
|
|
137
|
+
if not tools:
|
|
138
|
+
return schemas
|
|
139
|
+
|
|
140
|
+
for tool in tools:
|
|
141
|
+
name = _tool_attr(tool, "name")
|
|
142
|
+
if not isinstance(name, str) or not name:
|
|
143
|
+
continue
|
|
144
|
+
schema = _tool_attr(tool, "input_schema")
|
|
145
|
+
if not isinstance(schema, dict):
|
|
146
|
+
schema = {"type": "object"}
|
|
147
|
+
schemas[name] = ToolSchema(name=name, input_schema=deepcopy(schema))
|
|
148
|
+
return schemas
|
|
149
|
+
|
|
150
|
+
|
|
151
|
+
def validate_tool_input(
|
|
152
|
+
tool_name: str, parsed_input: dict[str, Any], schemas: dict[str, ToolSchema]
|
|
153
|
+
) -> bool:
|
|
154
|
+
"""Validate tool input against its JSON schema; unknown tools accept any object."""
|
|
155
|
+
tool_schema = schemas.get(tool_name)
|
|
156
|
+
if tool_schema is None:
|
|
157
|
+
return True
|
|
158
|
+
try:
|
|
159
|
+
validator_cls = jsonschema.validators.validator_for(tool_schema.input_schema)
|
|
160
|
+
validator_cls.check_schema(tool_schema.input_schema)
|
|
161
|
+
validator_cls(tool_schema.input_schema).validate(parsed_input)
|
|
162
|
+
except jsonschema.exceptions.SchemaError as exc:
|
|
163
|
+
logger.warning("Skipping invalid tool schema for {}: {}", tool_name, exc)
|
|
164
|
+
return True
|
|
165
|
+
except jsonschema.exceptions.ValidationError:
|
|
166
|
+
return False
|
|
167
|
+
return True
|
|
168
|
+
|
|
169
|
+
|
|
170
|
+
def parse_complete_tool_input(
|
|
171
|
+
raw_json: str, tool_name: str, schemas: dict[str, ToolSchema]
|
|
172
|
+
) -> dict[str, Any] | None:
|
|
173
|
+
"""Return parsed input when raw JSON is complete and schema-valid."""
|
|
174
|
+
try:
|
|
175
|
+
parsed = json.loads(raw_json)
|
|
176
|
+
except json.JSONDecodeError:
|
|
177
|
+
return None
|
|
178
|
+
if not isinstance(parsed, dict):
|
|
179
|
+
return None
|
|
180
|
+
if not validate_tool_input(tool_name, parsed, schemas):
|
|
181
|
+
return None
|
|
182
|
+
return parsed
|
|
183
|
+
|
|
184
|
+
|
|
185
|
+
def accept_tool_json_repair(
|
|
186
|
+
prefix: str,
|
|
187
|
+
candidate: str,
|
|
188
|
+
*,
|
|
189
|
+
tool_name: str,
|
|
190
|
+
schemas: dict[str, ToolSchema],
|
|
191
|
+
) -> ToolRepair | None:
|
|
192
|
+
"""Accept only append-only JSON repairs that make ``prefix`` valid."""
|
|
193
|
+
suffix_candidates = _repair_suffix_candidates(prefix, candidate)
|
|
194
|
+
for suffix in suffix_candidates:
|
|
195
|
+
combined = prefix + suffix
|
|
196
|
+
parsed = parse_complete_tool_input(combined, tool_name, schemas)
|
|
197
|
+
if parsed is not None:
|
|
198
|
+
return ToolRepair(suffix=suffix, parsed_input=parsed)
|
|
199
|
+
return None
|
|
200
|
+
|
|
201
|
+
|
|
202
|
+
def continuation_suffix(existing: str, candidate: str) -> str | None:
|
|
203
|
+
"""Return only the new suffix from a text/thinking continuation candidate."""
|
|
204
|
+
existing = existing or ""
|
|
205
|
+
candidate = candidate or ""
|
|
206
|
+
if not candidate:
|
|
207
|
+
return ""
|
|
208
|
+
if not existing:
|
|
209
|
+
return candidate
|
|
210
|
+
if candidate.startswith(existing):
|
|
211
|
+
return candidate[len(existing) :]
|
|
212
|
+
|
|
213
|
+
max_overlap = min(len(existing), len(candidate))
|
|
214
|
+
for size in range(max_overlap, 0, -1):
|
|
215
|
+
if existing.endswith(candidate[:size]):
|
|
216
|
+
return candidate[size:]
|
|
217
|
+
|
|
218
|
+
# Accept short standalone continuations, but reject full unrelated rewrites.
|
|
219
|
+
if len(candidate) < max(200, len(existing) // 2):
|
|
220
|
+
return candidate
|
|
221
|
+
return None
|
|
222
|
+
|
|
223
|
+
|
|
224
|
+
def make_openai_text_recovery_body(
|
|
225
|
+
body: dict[str, Any], partial: str
|
|
226
|
+
) -> dict[str, Any]:
|
|
227
|
+
"""Build a text-only OpenAI-chat continuation request."""
|
|
228
|
+
recovery = deepcopy(body)
|
|
229
|
+
recovery.pop("tools", None)
|
|
230
|
+
recovery.pop("tool_choice", None)
|
|
231
|
+
recovery["stream"] = True
|
|
232
|
+
messages = _copied_messages(recovery)
|
|
233
|
+
if partial:
|
|
234
|
+
messages.append({"role": "assistant", "content": partial})
|
|
235
|
+
messages.append({"role": "user", "content": _RECOVERY_USER_PREFIX})
|
|
236
|
+
recovery["messages"] = messages
|
|
237
|
+
return recovery
|
|
238
|
+
|
|
239
|
+
|
|
240
|
+
def make_openai_tool_repair_body(
|
|
241
|
+
body: dict[str, Any],
|
|
242
|
+
*,
|
|
243
|
+
tool_name: str,
|
|
244
|
+
prefix: str,
|
|
245
|
+
input_schema: dict[str, Any] | None,
|
|
246
|
+
) -> dict[str, Any]:
|
|
247
|
+
"""Build a text-only OpenAI-chat request asking for a JSON suffix."""
|
|
248
|
+
recovery = deepcopy(body)
|
|
249
|
+
recovery.pop("tools", None)
|
|
250
|
+
recovery.pop("tool_choice", None)
|
|
251
|
+
recovery["stream"] = True
|
|
252
|
+
messages = _copied_messages(recovery)
|
|
253
|
+
messages.append(
|
|
254
|
+
{
|
|
255
|
+
"role": "user",
|
|
256
|
+
"content": _tool_repair_prompt(
|
|
257
|
+
tool_name=tool_name, prefix=prefix, input_schema=input_schema
|
|
258
|
+
),
|
|
259
|
+
}
|
|
260
|
+
)
|
|
261
|
+
recovery["messages"] = messages
|
|
262
|
+
return recovery
|
|
263
|
+
|
|
264
|
+
|
|
265
|
+
def make_native_text_recovery_body(
|
|
266
|
+
body: dict[str, Any], partial: str
|
|
267
|
+
) -> dict[str, Any]:
|
|
268
|
+
"""Build a text-only native Anthropic continuation request."""
|
|
269
|
+
recovery = deepcopy(body)
|
|
270
|
+
recovery.pop("tools", None)
|
|
271
|
+
recovery.pop("tool_choice", None)
|
|
272
|
+
recovery["stream"] = True
|
|
273
|
+
messages = _copied_messages(recovery)
|
|
274
|
+
if partial:
|
|
275
|
+
messages.append({"role": "assistant", "content": partial})
|
|
276
|
+
messages.append({"role": "user", "content": _RECOVERY_USER_PREFIX})
|
|
277
|
+
recovery["messages"] = messages
|
|
278
|
+
return recovery
|
|
279
|
+
|
|
280
|
+
|
|
281
|
+
def make_native_tool_repair_body(
|
|
282
|
+
body: dict[str, Any],
|
|
283
|
+
*,
|
|
284
|
+
tool_name: str,
|
|
285
|
+
prefix: str,
|
|
286
|
+
input_schema: dict[str, Any] | None,
|
|
287
|
+
) -> dict[str, Any]:
|
|
288
|
+
"""Build a text-only native Anthropic request asking for a JSON suffix."""
|
|
289
|
+
recovery = deepcopy(body)
|
|
290
|
+
recovery.pop("tools", None)
|
|
291
|
+
recovery.pop("tool_choice", None)
|
|
292
|
+
recovery["stream"] = True
|
|
293
|
+
messages = _copied_messages(recovery)
|
|
294
|
+
messages.append(
|
|
295
|
+
{
|
|
296
|
+
"role": "user",
|
|
297
|
+
"content": _tool_repair_prompt(
|
|
298
|
+
tool_name=tool_name, prefix=prefix, input_schema=input_schema
|
|
299
|
+
),
|
|
300
|
+
}
|
|
301
|
+
)
|
|
302
|
+
recovery["messages"] = messages
|
|
303
|
+
return recovery
|
|
304
|
+
|
|
305
|
+
|
|
306
|
+
def _tool_attr(tool: Any, attr: str) -> Any:
|
|
307
|
+
if isinstance(tool, dict):
|
|
308
|
+
return tool.get(attr)
|
|
309
|
+
return getattr(tool, attr, None)
|
|
310
|
+
|
|
311
|
+
|
|
312
|
+
def _copied_messages(body: dict[str, Any]) -> list[Any]:
|
|
313
|
+
messages = body.get("messages")
|
|
314
|
+
return deepcopy(messages) if isinstance(messages, list) else []
|
|
315
|
+
|
|
316
|
+
|
|
317
|
+
def _repair_suffix_candidates(prefix: str, candidate: str) -> list[str]:
|
|
318
|
+
raw = candidate.strip()
|
|
319
|
+
if not raw:
|
|
320
|
+
return []
|
|
321
|
+
candidates: list[str] = []
|
|
322
|
+
if raw.startswith("```"):
|
|
323
|
+
lines = raw.splitlines()
|
|
324
|
+
if lines and lines[0].startswith("```"):
|
|
325
|
+
lines = lines[1:]
|
|
326
|
+
if lines and lines[-1].strip() == "```":
|
|
327
|
+
lines = lines[:-1]
|
|
328
|
+
raw = "\n".join(lines).strip()
|
|
329
|
+
candidates.append(raw)
|
|
330
|
+
if raw.startswith(prefix):
|
|
331
|
+
candidates.append(raw[len(prefix) :])
|
|
332
|
+
return list(dict.fromkeys(candidates))
|
|
333
|
+
|
|
334
|
+
|
|
335
|
+
def _tool_repair_prompt(
|
|
336
|
+
*, tool_name: str, prefix: str, input_schema: dict[str, Any] | None
|
|
337
|
+
) -> str:
|
|
338
|
+
schema_text = json.dumps(input_schema or {"type": "object"}, separators=(",", ":"))
|
|
339
|
+
return (
|
|
340
|
+
"A streamed tool call was interrupted while writing JSON arguments.\n"
|
|
341
|
+
f"Tool name: {tool_name}\n"
|
|
342
|
+
f"JSON schema: {schema_text}\n"
|
|
343
|
+
f"Already emitted JSON prefix: {prefix}\n\n"
|
|
344
|
+
"Return only the exact missing JSON suffix needed to complete the same object. "
|
|
345
|
+
"Do not repeat the prefix. Do not include markdown or explanation."
|
|
346
|
+
)
|
|
@@ -0,0 +1,133 @@
|
|
|
1
|
+
"""Shared stream recovery policy for provider transports."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from dataclasses import dataclass
|
|
6
|
+
from enum import StrEnum
|
|
7
|
+
|
|
8
|
+
from core.anthropic.stream_recovery import (
|
|
9
|
+
EARLY_TRANSPARENT_MAX_RETRIES,
|
|
10
|
+
EARLY_TRANSPARENT_TOTAL_ATTEMPTS,
|
|
11
|
+
RecoveryHoldbackBuffer,
|
|
12
|
+
is_retryable_stream_error,
|
|
13
|
+
)
|
|
14
|
+
from core.trace import trace_event
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class StreamFailureAction(StrEnum):
|
|
18
|
+
"""Transport action selected after a provider stream failure."""
|
|
19
|
+
|
|
20
|
+
EARLY_RETRY = "early_retry"
|
|
21
|
+
MIDSTREAM_RECOVERY = "midstream_recovery"
|
|
22
|
+
FINAL_ERROR = "final_error"
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
@dataclass(frozen=True, slots=True)
|
|
26
|
+
class StreamFailureDecision:
|
|
27
|
+
"""Failure-state snapshot for the current provider stream transition."""
|
|
28
|
+
|
|
29
|
+
action: StreamFailureAction
|
|
30
|
+
retryable: bool
|
|
31
|
+
committed: bool
|
|
32
|
+
has_buffered: bool
|
|
33
|
+
early_retry_attempt: int | None = None
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
class StreamRecoverySession:
|
|
37
|
+
"""Own holdback and retry policy shared by provider stream transports."""
|
|
38
|
+
|
|
39
|
+
def __init__(self, *, provider_name: str, request_id: str | None) -> None:
|
|
40
|
+
self._provider_name = provider_name
|
|
41
|
+
self._request_id = request_id
|
|
42
|
+
self._holdback = RecoveryHoldbackBuffer()
|
|
43
|
+
self._early_retries = 0
|
|
44
|
+
|
|
45
|
+
@property
|
|
46
|
+
def committed(self) -> bool:
|
|
47
|
+
return self._holdback.committed
|
|
48
|
+
|
|
49
|
+
@property
|
|
50
|
+
def has_buffered(self) -> bool:
|
|
51
|
+
return self._holdback.has_buffered
|
|
52
|
+
|
|
53
|
+
@property
|
|
54
|
+
def early_retries(self) -> int:
|
|
55
|
+
return self._early_retries
|
|
56
|
+
|
|
57
|
+
def push(self, event: str) -> list[str]:
|
|
58
|
+
"""Buffer one downstream event through the early retry holdback."""
|
|
59
|
+
return self._holdback.push(event)
|
|
60
|
+
|
|
61
|
+
def flush(self) -> list[str]:
|
|
62
|
+
"""Commit and return held events."""
|
|
63
|
+
return self._holdback.flush()
|
|
64
|
+
|
|
65
|
+
def flush_uncommitted(self, decision: StreamFailureDecision) -> list[str]:
|
|
66
|
+
"""Commit held events when the decision snapshot is still uncommitted."""
|
|
67
|
+
if decision.committed:
|
|
68
|
+
return []
|
|
69
|
+
return self.flush()
|
|
70
|
+
|
|
71
|
+
def discard(self) -> None:
|
|
72
|
+
"""Drop held events without committing them."""
|
|
73
|
+
self._holdback.discard()
|
|
74
|
+
|
|
75
|
+
def advance_failure(
|
|
76
|
+
self,
|
|
77
|
+
error: BaseException,
|
|
78
|
+
*,
|
|
79
|
+
stream_opened: bool,
|
|
80
|
+
generated_output: bool,
|
|
81
|
+
complete_tool_salvageable: bool,
|
|
82
|
+
) -> StreamFailureDecision:
|
|
83
|
+
"""Consume a stream failure and apply shared recovery state changes."""
|
|
84
|
+
committed = self.committed
|
|
85
|
+
has_buffered = self.has_buffered
|
|
86
|
+
retryable = is_retryable_stream_error(error)
|
|
87
|
+
|
|
88
|
+
if (
|
|
89
|
+
not committed
|
|
90
|
+
and stream_opened
|
|
91
|
+
and retryable
|
|
92
|
+
and not complete_tool_salvageable
|
|
93
|
+
and self._early_retries < EARLY_TRANSPARENT_MAX_RETRIES
|
|
94
|
+
):
|
|
95
|
+
self._early_retries += 1
|
|
96
|
+
attempt = self._early_retries
|
|
97
|
+
self._reset_holdback()
|
|
98
|
+
trace_event(
|
|
99
|
+
stage="provider",
|
|
100
|
+
event="provider.recovery.early_retry",
|
|
101
|
+
source="provider",
|
|
102
|
+
provider=self._provider_name,
|
|
103
|
+
request_id=self._request_id,
|
|
104
|
+
attempt=attempt,
|
|
105
|
+
max_attempts=EARLY_TRANSPARENT_TOTAL_ATTEMPTS,
|
|
106
|
+
exc_type=type(error).__name__,
|
|
107
|
+
)
|
|
108
|
+
return StreamFailureDecision(
|
|
109
|
+
action=StreamFailureAction.EARLY_RETRY,
|
|
110
|
+
retryable=retryable,
|
|
111
|
+
committed=committed,
|
|
112
|
+
has_buffered=has_buffered,
|
|
113
|
+
early_retry_attempt=attempt,
|
|
114
|
+
)
|
|
115
|
+
|
|
116
|
+
if generated_output and retryable:
|
|
117
|
+
return StreamFailureDecision(
|
|
118
|
+
action=StreamFailureAction.MIDSTREAM_RECOVERY,
|
|
119
|
+
retryable=retryable,
|
|
120
|
+
committed=committed,
|
|
121
|
+
has_buffered=has_buffered,
|
|
122
|
+
)
|
|
123
|
+
|
|
124
|
+
return StreamFailureDecision(
|
|
125
|
+
action=StreamFailureAction.FINAL_ERROR,
|
|
126
|
+
retryable=retryable,
|
|
127
|
+
committed=committed,
|
|
128
|
+
has_buffered=has_buffered,
|
|
129
|
+
)
|
|
130
|
+
|
|
131
|
+
def _reset_holdback(self) -> None:
|
|
132
|
+
self._holdback.discard()
|
|
133
|
+
self._holdback = RecoveryHoldbackBuffer()
|
|
@@ -0,0 +1,140 @@
|
|
|
1
|
+
"""Streaming parser for provider-emitted thinking tags."""
|
|
2
|
+
|
|
3
|
+
from collections.abc import Iterator
|
|
4
|
+
from dataclasses import dataclass
|
|
5
|
+
from enum import Enum
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class ContentType(Enum):
|
|
9
|
+
"""Type of content chunk."""
|
|
10
|
+
|
|
11
|
+
TEXT = "text"
|
|
12
|
+
THINKING = "thinking"
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
@dataclass
|
|
16
|
+
class ContentChunk:
|
|
17
|
+
"""A chunk of parsed content."""
|
|
18
|
+
|
|
19
|
+
type: ContentType
|
|
20
|
+
content: str
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class ThinkTagParser:
|
|
24
|
+
"""
|
|
25
|
+
Streaming parser for ``<think>...</think>`` tags.
|
|
26
|
+
|
|
27
|
+
Handles partial tags at chunk boundaries by buffering.
|
|
28
|
+
"""
|
|
29
|
+
|
|
30
|
+
OPEN_TAG = "<think>"
|
|
31
|
+
CLOSE_TAG = "</think>"
|
|
32
|
+
|
|
33
|
+
def __init__(self):
|
|
34
|
+
self._buffer: str = ""
|
|
35
|
+
self._in_think_tag: bool = False
|
|
36
|
+
|
|
37
|
+
@property
|
|
38
|
+
def in_think_mode(self) -> bool:
|
|
39
|
+
"""Whether currently inside a think tag."""
|
|
40
|
+
return self._in_think_tag
|
|
41
|
+
|
|
42
|
+
def feed(self, content: str) -> Iterator[ContentChunk]:
|
|
43
|
+
"""Feed content and yield parsed chunks."""
|
|
44
|
+
self._buffer += content
|
|
45
|
+
|
|
46
|
+
while self._buffer:
|
|
47
|
+
prev_len = len(self._buffer)
|
|
48
|
+
if not self._in_think_tag:
|
|
49
|
+
chunk = self._parse_outside_think()
|
|
50
|
+
else:
|
|
51
|
+
chunk = self._parse_inside_think()
|
|
52
|
+
|
|
53
|
+
if chunk:
|
|
54
|
+
yield chunk
|
|
55
|
+
elif len(self._buffer) == prev_len:
|
|
56
|
+
break
|
|
57
|
+
|
|
58
|
+
def _parse_outside_think(self) -> ContentChunk | None:
|
|
59
|
+
"""Parse content outside think tags."""
|
|
60
|
+
think_start = self._buffer.find(self.OPEN_TAG)
|
|
61
|
+
orphan_close = self._buffer.find(self.CLOSE_TAG)
|
|
62
|
+
|
|
63
|
+
if orphan_close != -1 and (think_start == -1 or orphan_close < think_start):
|
|
64
|
+
pre_orphan = self._buffer[:orphan_close]
|
|
65
|
+
self._buffer = self._buffer[orphan_close + len(self.CLOSE_TAG) :]
|
|
66
|
+
if pre_orphan:
|
|
67
|
+
return ContentChunk(ContentType.TEXT, pre_orphan)
|
|
68
|
+
return None
|
|
69
|
+
|
|
70
|
+
if think_start == -1:
|
|
71
|
+
last_bracket = self._buffer.rfind("<")
|
|
72
|
+
if last_bracket != -1:
|
|
73
|
+
potential_tag = self._buffer[last_bracket:]
|
|
74
|
+
tag_len = len(potential_tag)
|
|
75
|
+
if (
|
|
76
|
+
tag_len < len(self.OPEN_TAG)
|
|
77
|
+
and self.OPEN_TAG.startswith(potential_tag)
|
|
78
|
+
) or (
|
|
79
|
+
tag_len < len(self.CLOSE_TAG)
|
|
80
|
+
and self.CLOSE_TAG.startswith(potential_tag)
|
|
81
|
+
):
|
|
82
|
+
emit = self._buffer[:last_bracket]
|
|
83
|
+
self._buffer = self._buffer[last_bracket:]
|
|
84
|
+
if emit:
|
|
85
|
+
return ContentChunk(ContentType.TEXT, emit)
|
|
86
|
+
return None
|
|
87
|
+
|
|
88
|
+
emit = self._buffer
|
|
89
|
+
self._buffer = ""
|
|
90
|
+
if emit:
|
|
91
|
+
return ContentChunk(ContentType.TEXT, emit)
|
|
92
|
+
return None
|
|
93
|
+
|
|
94
|
+
pre_think = self._buffer[:think_start]
|
|
95
|
+
self._buffer = self._buffer[think_start + len(self.OPEN_TAG) :]
|
|
96
|
+
self._in_think_tag = True
|
|
97
|
+
if pre_think:
|
|
98
|
+
return ContentChunk(ContentType.TEXT, pre_think)
|
|
99
|
+
return None
|
|
100
|
+
|
|
101
|
+
def _parse_inside_think(self) -> ContentChunk | None:
|
|
102
|
+
"""Parse content inside think tags."""
|
|
103
|
+
think_end = self._buffer.find(self.CLOSE_TAG)
|
|
104
|
+
|
|
105
|
+
if think_end == -1:
|
|
106
|
+
last_bracket = self._buffer.rfind("<")
|
|
107
|
+
if last_bracket != -1 and len(self._buffer) - last_bracket < len(
|
|
108
|
+
self.CLOSE_TAG
|
|
109
|
+
):
|
|
110
|
+
potential_tag = self._buffer[last_bracket:]
|
|
111
|
+
if self.CLOSE_TAG.startswith(potential_tag):
|
|
112
|
+
emit = self._buffer[:last_bracket]
|
|
113
|
+
self._buffer = self._buffer[last_bracket:]
|
|
114
|
+
if emit:
|
|
115
|
+
return ContentChunk(ContentType.THINKING, emit)
|
|
116
|
+
return None
|
|
117
|
+
|
|
118
|
+
emit = self._buffer
|
|
119
|
+
self._buffer = ""
|
|
120
|
+
if emit:
|
|
121
|
+
return ContentChunk(ContentType.THINKING, emit)
|
|
122
|
+
return None
|
|
123
|
+
|
|
124
|
+
thinking_content = self._buffer[:think_end]
|
|
125
|
+
self._buffer = self._buffer[think_end + len(self.CLOSE_TAG) :]
|
|
126
|
+
self._in_think_tag = False
|
|
127
|
+
if thinking_content:
|
|
128
|
+
return ContentChunk(ContentType.THINKING, thinking_content)
|
|
129
|
+
return None
|
|
130
|
+
|
|
131
|
+
def flush(self) -> ContentChunk | None:
|
|
132
|
+
"""Flush any remaining buffered content."""
|
|
133
|
+
if self._buffer:
|
|
134
|
+
chunk_type = (
|
|
135
|
+
ContentType.THINKING if self._in_think_tag else ContentType.TEXT
|
|
136
|
+
)
|
|
137
|
+
content = self._buffer
|
|
138
|
+
self._buffer = ""
|
|
139
|
+
return ContentChunk(chunk_type, content)
|
|
140
|
+
return None
|