luckyd-code 1.2.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- luckyd_code/__init__.py +54 -0
- luckyd_code/__main__.py +5 -0
- luckyd_code/_agent_loop.py +551 -0
- luckyd_code/_data_dir.py +73 -0
- luckyd_code/agent.py +38 -0
- luckyd_code/analytics/__init__.py +18 -0
- luckyd_code/analytics/reporter.py +195 -0
- luckyd_code/analytics/scanner.py +443 -0
- luckyd_code/analytics/smells.py +316 -0
- luckyd_code/analytics/trends.py +303 -0
- luckyd_code/api.py +473 -0
- luckyd_code/audit_daemon.py +845 -0
- luckyd_code/autonomous_fixer.py +473 -0
- luckyd_code/background.py +159 -0
- luckyd_code/backup.py +237 -0
- luckyd_code/brain/__init__.py +84 -0
- luckyd_code/brain/assembler.py +100 -0
- luckyd_code/brain/chunker.py +345 -0
- luckyd_code/brain/constants.py +73 -0
- luckyd_code/brain/embedder.py +163 -0
- luckyd_code/brain/graph.py +311 -0
- luckyd_code/brain/indexer.py +316 -0
- luckyd_code/brain/parser.py +140 -0
- luckyd_code/brain/retriever.py +234 -0
- luckyd_code/cli.py +894 -0
- luckyd_code/cli_commands/__init__.py +1 -0
- luckyd_code/cli_commands/audit.py +120 -0
- luckyd_code/cli_commands/background.py +83 -0
- luckyd_code/cli_commands/brain.py +87 -0
- luckyd_code/cli_commands/config.py +75 -0
- luckyd_code/cli_commands/dispatcher.py +695 -0
- luckyd_code/cli_commands/sessions.py +41 -0
- luckyd_code/cli_entry.py +147 -0
- luckyd_code/cli_utils.py +112 -0
- luckyd_code/config.py +205 -0
- luckyd_code/context.py +214 -0
- luckyd_code/cost_tracker.py +209 -0
- luckyd_code/error_reporter.py +508 -0
- luckyd_code/exceptions.py +39 -0
- luckyd_code/export.py +126 -0
- luckyd_code/feedback_analyzer.py +290 -0
- luckyd_code/file_watcher.py +258 -0
- luckyd_code/git/__init__.py +11 -0
- luckyd_code/git/auto_commit.py +157 -0
- luckyd_code/git/tools.py +85 -0
- luckyd_code/hooks.py +236 -0
- luckyd_code/indexer.py +280 -0
- luckyd_code/init.py +39 -0
- luckyd_code/keybindings.py +77 -0
- luckyd_code/log.py +55 -0
- luckyd_code/mcp/__init__.py +6 -0
- luckyd_code/mcp/client.py +184 -0
- luckyd_code/memory/__init__.py +19 -0
- luckyd_code/memory/manager.py +339 -0
- luckyd_code/metrics/__init__.py +5 -0
- luckyd_code/model_registry.py +131 -0
- luckyd_code/orchestrator.py +204 -0
- luckyd_code/permissions/__init__.py +1 -0
- luckyd_code/permissions/manager.py +103 -0
- luckyd_code/planner.py +361 -0
- luckyd_code/plugins.py +91 -0
- luckyd_code/py.typed +0 -0
- luckyd_code/retry.py +57 -0
- luckyd_code/router.py +417 -0
- luckyd_code/sandbox.py +156 -0
- luckyd_code/self_critique.py +2 -0
- luckyd_code/self_improve.py +274 -0
- luckyd_code/sessions.py +114 -0
- luckyd_code/settings.py +72 -0
- luckyd_code/skills/__init__.py +8 -0
- luckyd_code/skills/review.py +22 -0
- luckyd_code/skills/security.py +17 -0
- luckyd_code/tasks/__init__.py +1 -0
- luckyd_code/tasks/manager.py +102 -0
- luckyd_code/templates/icon-192.png +0 -0
- luckyd_code/templates/icon-512.png +0 -0
- luckyd_code/templates/index.html +1965 -0
- luckyd_code/templates/manifest.json +14 -0
- luckyd_code/templates/src/app.js +694 -0
- luckyd_code/templates/src/body.html +767 -0
- luckyd_code/templates/src/cdn.txt +2 -0
- luckyd_code/templates/src/style.css +474 -0
- luckyd_code/templates/sw.js +31 -0
- luckyd_code/templates/test.html +6 -0
- luckyd_code/themes.py +48 -0
- luckyd_code/tools/__init__.py +97 -0
- luckyd_code/tools/agent_tools.py +65 -0
- luckyd_code/tools/bash.py +360 -0
- luckyd_code/tools/brain_tools.py +137 -0
- luckyd_code/tools/browser.py +369 -0
- luckyd_code/tools/datetime_tool.py +34 -0
- luckyd_code/tools/dockerfile_gen.py +212 -0
- luckyd_code/tools/file_ops.py +381 -0
- luckyd_code/tools/game_gen.py +360 -0
- luckyd_code/tools/git_tools.py +130 -0
- luckyd_code/tools/git_worktree.py +63 -0
- luckyd_code/tools/path_validate.py +64 -0
- luckyd_code/tools/project_gen.py +187 -0
- luckyd_code/tools/readme_gen.py +227 -0
- luckyd_code/tools/registry.py +157 -0
- luckyd_code/tools/shell_detect.py +109 -0
- luckyd_code/tools/web.py +89 -0
- luckyd_code/tools/youtube.py +187 -0
- luckyd_code/tools_bridge.py +144 -0
- luckyd_code/undo.py +126 -0
- luckyd_code/update.py +60 -0
- luckyd_code/verify.py +360 -0
- luckyd_code/web_app.py +176 -0
- luckyd_code/web_routes/__init__.py +23 -0
- luckyd_code/web_routes/background.py +73 -0
- luckyd_code/web_routes/brain.py +109 -0
- luckyd_code/web_routes/cost.py +12 -0
- luckyd_code/web_routes/files.py +133 -0
- luckyd_code/web_routes/memories.py +94 -0
- luckyd_code/web_routes/misc.py +67 -0
- luckyd_code/web_routes/project.py +48 -0
- luckyd_code/web_routes/review.py +20 -0
- luckyd_code/web_routes/sessions.py +44 -0
- luckyd_code/web_routes/settings.py +43 -0
- luckyd_code/web_routes/static.py +70 -0
- luckyd_code/web_routes/update.py +19 -0
- luckyd_code/web_routes/ws.py +237 -0
- luckyd_code-1.2.2.dist-info/METADATA +297 -0
- luckyd_code-1.2.2.dist-info/RECORD +127 -0
- luckyd_code-1.2.2.dist-info/WHEEL +4 -0
- luckyd_code-1.2.2.dist-info/entry_points.txt +3 -0
- luckyd_code-1.2.2.dist-info/licenses/LICENSE +21 -0
luckyd_code/api.py
ADDED
|
@@ -0,0 +1,473 @@
|
|
|
1
|
+
"""API client for DeepSeek Chat with streaming and retry logic."""
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
import time
|
|
5
|
+
import random
|
|
6
|
+
from typing import Any, Dict, Generator, List, Optional, Tuple
|
|
7
|
+
|
|
8
|
+
import httpx
|
|
9
|
+
from openai import OpenAI
|
|
10
|
+
|
|
11
|
+
from .retry import RetryableError, NonRetryableError, ModelNotFoundError
|
|
12
|
+
from .log import get_logger
|
|
13
|
+
|
|
14
|
+
_RETRY_MAX = 3
|
|
15
|
+
_RETRY_BASE_DELAY = 1.0
|
|
16
|
+
_RETRY_MAX_DELAY = 30.0
|
|
17
|
+
|
|
18
|
+
Event = Tuple[str, Any]
|
|
19
|
+
# Event types:
|
|
20
|
+
# ("text", str) - streamed text chunk
|
|
21
|
+
# ("tool_calls", (list, str)) - (tool_calls, reasoning_content)
|
|
22
|
+
# ("done", (str, str)) - (content, reasoning_content), no tool calls
|
|
23
|
+
# ("error", str) - error message
|
|
24
|
+
|
|
25
|
+
API_TIMEOUT = 60.0 # seconds
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def _make_client(api_key: str, base_url: str) -> OpenAI:
|
|
29
|
+
"""Create an OpenAI client with timeout."""
|
|
30
|
+
return OpenAI(
|
|
31
|
+
api_key=api_key,
|
|
32
|
+
base_url=base_url,
|
|
33
|
+
http_client=httpx.Client(timeout=httpx.Timeout(API_TIMEOUT, connect=10.0)),
|
|
34
|
+
)
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def test_connection(api_key: str, base_url: str = "https://api.deepseek.com/v1") -> tuple[bool, str]:
|
|
39
|
+
"""Test the API connection. Returns (success, message)."""
|
|
40
|
+
client = _make_client(api_key, base_url)
|
|
41
|
+
try:
|
|
42
|
+
client.models.list()
|
|
43
|
+
return True, "API connection OK"
|
|
44
|
+
except Exception as e:
|
|
45
|
+
err = str(e)
|
|
46
|
+
if "401" in err or "authentication" in err.lower() or "invalid" in err.lower():
|
|
47
|
+
return False, f"API key rejected: {err[:200]}"
|
|
48
|
+
if "connect" in err.lower() or "timeout" in err.lower() or "dns" in err.lower():
|
|
49
|
+
return False, f"Network error (cannot reach {base_url}): {err[:200]}"
|
|
50
|
+
# models.list() might not work with all providers — fall back to a
|
|
51
|
+
# minimal chat completion using the same client.
|
|
52
|
+
try:
|
|
53
|
+
response = client.chat.completions.create(
|
|
54
|
+
model="deepseek-v4-flash",
|
|
55
|
+
messages=[{"role": "user", "content": "hi"}],
|
|
56
|
+
max_tokens=5,
|
|
57
|
+
stream=False,
|
|
58
|
+
)
|
|
59
|
+
if response.choices:
|
|
60
|
+
return True, "API connection OK"
|
|
61
|
+
return False, "API returned empty response"
|
|
62
|
+
except Exception as e2:
|
|
63
|
+
return False, f"API error: {str(e2)[:200]}"
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def _open_stream(
|
|
67
|
+
messages: List[Dict[str, Any]],
|
|
68
|
+
tools: List[Dict[str, Any]],
|
|
69
|
+
model: str,
|
|
70
|
+
api_key: str,
|
|
71
|
+
base_url: str,
|
|
72
|
+
max_tokens: int,
|
|
73
|
+
temperature: float,
|
|
74
|
+
):
|
|
75
|
+
"""Open the streaming HTTP connection and validate the status code.
|
|
76
|
+
|
|
77
|
+
This is a **regular function** (not a generator) so that
|
|
78
|
+
``_call_with_retry`` can actually catch and retry HTTP errors. The
|
|
79
|
+
previous implementation (``_stream_chat_raw``) was a generator function:
|
|
80
|
+
calling it returned a lazy iterator without executing any code, which
|
|
81
|
+
made every ``try/except`` in ``_call_with_retry`` permanently unreachable.
|
|
82
|
+
|
|
83
|
+
Returns ``(client, response_cm, response)``. The caller is responsible
|
|
84
|
+
for cleanup once iteration is complete::
|
|
85
|
+
|
|
86
|
+
response_cm.__exit__(None, None, None)
|
|
87
|
+
client.close()
|
|
88
|
+
|
|
89
|
+
Raises ``RetryableError``, ``NonRetryableError``, or
|
|
90
|
+
``ModelNotFoundError`` so ``_call_with_retry`` can route correctly.
|
|
91
|
+
"""
|
|
92
|
+
url = f"{base_url.rstrip('/')}/chat/completions"
|
|
93
|
+
headers = {
|
|
94
|
+
"Authorization": f"Bearer {api_key}",
|
|
95
|
+
"Content-Type": "application/json",
|
|
96
|
+
"Accept": "text/event-stream",
|
|
97
|
+
}
|
|
98
|
+
body: Dict[str, Any] = {
|
|
99
|
+
"model": model,
|
|
100
|
+
"messages": _filter_messages(messages),
|
|
101
|
+
"max_tokens": max_tokens,
|
|
102
|
+
"temperature": temperature,
|
|
103
|
+
"stream": True,
|
|
104
|
+
"stream_options": {"include_usage": True},
|
|
105
|
+
}
|
|
106
|
+
if tools:
|
|
107
|
+
body["tools"] = tools
|
|
108
|
+
|
|
109
|
+
client = httpx.Client(timeout=httpx.Timeout(API_TIMEOUT, connect=10.0))
|
|
110
|
+
try:
|
|
111
|
+
response_cm = client.stream("POST", url, json=body, headers=headers)
|
|
112
|
+
response = response_cm.__enter__()
|
|
113
|
+
if response.status_code != 200:
|
|
114
|
+
err_detail = _parse_stream_error(response)
|
|
115
|
+
response_cm.__exit__(None, None, None)
|
|
116
|
+
client.close()
|
|
117
|
+
raise _classify_http_error(response.status_code, err_detail)
|
|
118
|
+
return client, response_cm, response
|
|
119
|
+
except (RetryableError, NonRetryableError, ModelNotFoundError):
|
|
120
|
+
# Re-raise classified errors — _call_with_retry decides whether to retry
|
|
121
|
+
raise
|
|
122
|
+
except Exception:
|
|
123
|
+
client.close()
|
|
124
|
+
raise
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
def _parse_stream_error(response: httpx.Response) -> str:
|
|
128
|
+
"""Extract error detail from a non-200 streaming response."""
|
|
129
|
+
try:
|
|
130
|
+
response.read()
|
|
131
|
+
data: dict[str, Any] = response.json()
|
|
132
|
+
return str(data.get("error", {}).get("message", str(response.text[:500])))
|
|
133
|
+
except Exception:
|
|
134
|
+
try:
|
|
135
|
+
return str(response.text[:500])
|
|
136
|
+
except Exception:
|
|
137
|
+
return f"HTTP {response.status_code}"
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
def _classify_http_error(status_code: int, detail: str) -> Exception:
|
|
141
|
+
"""Classify an HTTP error into the appropriate exception type."""
|
|
142
|
+
err_lower = detail.lower()
|
|
143
|
+
if status_code == 400:
|
|
144
|
+
if "model not exist" in err_lower or "model_not_exist" in err_lower:
|
|
145
|
+
return ModelNotFoundError(detail)
|
|
146
|
+
return NonRetryableError(detail)
|
|
147
|
+
if status_code == 401:
|
|
148
|
+
return NonRetryableError(f"Authentication failed (401). Check your API key: {detail[:200]}")
|
|
149
|
+
if status_code == 403:
|
|
150
|
+
return NonRetryableError(f"Access denied (403): {detail[:200]}")
|
|
151
|
+
if status_code == 404:
|
|
152
|
+
return NonRetryableError(f"Resource not found (404): {detail[:200]}")
|
|
153
|
+
if status_code == 422:
|
|
154
|
+
return NonRetryableError(f"Invalid request (422): {detail[:200]}")
|
|
155
|
+
if status_code == 429:
|
|
156
|
+
return RetryableError(f"Rate limited (429): {detail[:200]}")
|
|
157
|
+
if status_code >= 500:
|
|
158
|
+
return RetryableError(f"Server error ({status_code}): {detail[:200]}")
|
|
159
|
+
return NonRetryableError(detail)
|
|
160
|
+
|
|
161
|
+
|
|
162
|
+
def _parse_sse_line(line: str) -> Optional[Dict[str, Any]]:
|
|
163
|
+
"""Parse a single SSE line from a streaming response."""
|
|
164
|
+
line = line.strip()
|
|
165
|
+
if not line:
|
|
166
|
+
return None
|
|
167
|
+
if line == "data: [DONE]":
|
|
168
|
+
return {}
|
|
169
|
+
if line.startswith("data: "):
|
|
170
|
+
try:
|
|
171
|
+
result: Dict[str, Any] = json.loads(line[6:])
|
|
172
|
+
return result
|
|
173
|
+
except json.JSONDecodeError:
|
|
174
|
+
return None
|
|
175
|
+
return None
|
|
176
|
+
|
|
177
|
+
|
|
178
|
+
def _filter_messages(messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
|
|
179
|
+
"""Ensure DeepSeek reasoning_content is properly handled for thinking mode.
|
|
180
|
+
|
|
181
|
+
DeepSeek requires that when an assistant response had reasoning_content,
|
|
182
|
+
subsequent requests must include BOTH content AND reasoning_content.
|
|
183
|
+
Sending reasoning_content without a content field (or with content=None)
|
|
184
|
+
causes the API to return "content or tool_calls must be set".
|
|
185
|
+
|
|
186
|
+
Fix: whenever reasoning_content is present, ensure content is at least an
|
|
187
|
+
empty string so the API always sees both fields together.
|
|
188
|
+
"""
|
|
189
|
+
filtered = []
|
|
190
|
+
for msg in messages:
|
|
191
|
+
m = dict(msg)
|
|
192
|
+
if m.get("role") == "assistant" and "reasoning_content" in m:
|
|
193
|
+
# Guarantee content is present and is a string (never None/missing)
|
|
194
|
+
if not m.get("content"):
|
|
195
|
+
m["content"] = ""
|
|
196
|
+
filtered.append(m)
|
|
197
|
+
return filtered
|
|
198
|
+
|
|
199
|
+
|
|
200
|
+
def _count_unquoted(text: str, open_ch: str, close_ch: str) -> tuple[int, int]:
|
|
201
|
+
"""Count open_ch and close_ch occurrences that are outside string literals.
|
|
202
|
+
|
|
203
|
+
Handles ``\\"`` escape sequences so braces/brackets embedded inside JSON
|
|
204
|
+
string values are never counted. Returns ``(open_count, close_count)``.
|
|
205
|
+
"""
|
|
206
|
+
opens = closes = 0
|
|
207
|
+
in_string = False
|
|
208
|
+
escape = False
|
|
209
|
+
for ch in text:
|
|
210
|
+
if escape:
|
|
211
|
+
escape = False
|
|
212
|
+
continue
|
|
213
|
+
if ch == "\\" and in_string:
|
|
214
|
+
escape = True
|
|
215
|
+
continue
|
|
216
|
+
if ch == '"':
|
|
217
|
+
in_string = not in_string
|
|
218
|
+
continue
|
|
219
|
+
if not in_string:
|
|
220
|
+
if ch == open_ch:
|
|
221
|
+
opens += 1
|
|
222
|
+
elif ch == close_ch:
|
|
223
|
+
closes += 1
|
|
224
|
+
return opens, closes
|
|
225
|
+
|
|
226
|
+
|
|
227
|
+
def _repair_json(raw: str) -> str:
|
|
228
|
+
"""Attempt to repair common JSON issues in model-generated tool arguments.
|
|
229
|
+
|
|
230
|
+
Reasoning models sometimes produce multiline string values or trailing
|
|
231
|
+
commas that break JSON parsing. This tries to recover the intended JSON.
|
|
232
|
+
|
|
233
|
+
All replacements are done *outside* string literals to avoid corrupting
|
|
234
|
+
valid JSON that legitimately contains e.g. "}" inside a string value.
|
|
235
|
+
"""
|
|
236
|
+
raw = raw.strip()
|
|
237
|
+
if not raw:
|
|
238
|
+
return raw
|
|
239
|
+
|
|
240
|
+
# Remove trailing comma before closing brace/bracket (outside strings only)
|
|
241
|
+
raw = _remove_trailing_commas(raw)
|
|
242
|
+
|
|
243
|
+
# Close unmatched braces/brackets — count only characters outside strings
|
|
244
|
+
# so that values like {"key": "template {var}"} are never corrupted.
|
|
245
|
+
open_braces, close_braces = _count_unquoted(raw, "{", "}")
|
|
246
|
+
if open_braces > close_braces:
|
|
247
|
+
raw += "}" * (open_braces - close_braces)
|
|
248
|
+
|
|
249
|
+
open_brackets, close_brackets = _count_unquoted(raw, "[", "]")
|
|
250
|
+
if open_brackets > close_brackets:
|
|
251
|
+
raw += "]" * (open_brackets - close_brackets)
|
|
252
|
+
|
|
253
|
+
return raw
|
|
254
|
+
|
|
255
|
+
|
|
256
|
+
def _remove_trailing_commas(text: str) -> str:
|
|
257
|
+
"""Remove trailing commas before ``}`` or ``]``, but only outside strings.
|
|
258
|
+
|
|
259
|
+
Walks the text character-by-character tracking whether we're inside a
|
|
260
|
+
double-quoted string (handling ``\"`` escapes), and only strips a
|
|
261
|
+
comma when the immediately-following non-whitespace char is ``}`` or
|
|
262
|
+
``]`` and we are NOT inside a string.
|
|
263
|
+
"""
|
|
264
|
+
result: list[str] = []
|
|
265
|
+
in_string = False
|
|
266
|
+
escape = False
|
|
267
|
+
i = 0
|
|
268
|
+
n = len(text)
|
|
269
|
+
|
|
270
|
+
while i < n:
|
|
271
|
+
ch = text[i]
|
|
272
|
+
|
|
273
|
+
if escape:
|
|
274
|
+
escape = False
|
|
275
|
+
result.append(ch)
|
|
276
|
+
i += 1
|
|
277
|
+
continue
|
|
278
|
+
|
|
279
|
+
if ch == "\\" and in_string:
|
|
280
|
+
escape = True
|
|
281
|
+
result.append(ch)
|
|
282
|
+
i += 1
|
|
283
|
+
continue
|
|
284
|
+
|
|
285
|
+
if ch == '"':
|
|
286
|
+
in_string = not in_string
|
|
287
|
+
result.append(ch)
|
|
288
|
+
i += 1
|
|
289
|
+
continue
|
|
290
|
+
|
|
291
|
+
if ch == "," and not in_string:
|
|
292
|
+
# Look ahead past whitespace to see if next char is } or ]
|
|
293
|
+
j = i + 1
|
|
294
|
+
while j < n and text[j] in (" ", "\t", "\n", "\r"):
|
|
295
|
+
j += 1
|
|
296
|
+
if j < n and text[j] in ("}", "]"):
|
|
297
|
+
# Trailing comma — skip it
|
|
298
|
+
i += 1
|
|
299
|
+
continue
|
|
300
|
+
|
|
301
|
+
result.append(ch)
|
|
302
|
+
i += 1
|
|
303
|
+
|
|
304
|
+
return "".join(result)
|
|
305
|
+
|
|
306
|
+
|
|
307
|
+
def _call_with_retry(
|
|
308
|
+
messages: List[Dict[str, Any]],
|
|
309
|
+
tools: List[Dict[str, Any]],
|
|
310
|
+
model: str,
|
|
311
|
+
api_key: str,
|
|
312
|
+
base_url: str,
|
|
313
|
+
max_tokens: int,
|
|
314
|
+
temperature: float,
|
|
315
|
+
):
|
|
316
|
+
"""Open the streaming connection with exponential-backoff retry on retryable errors.
|
|
317
|
+
|
|
318
|
+
Delegates to ``_open_stream`` (a regular function) so that HTTP-level
|
|
319
|
+
errors — rate limits (429), server errors (5xx), network timeouts — are
|
|
320
|
+
raised during the call and can actually be caught and retried here.
|
|
321
|
+
"""
|
|
322
|
+
logger = get_logger()
|
|
323
|
+
delay = _RETRY_BASE_DELAY
|
|
324
|
+
last_err: Exception = RuntimeError("Unknown error")
|
|
325
|
+
|
|
326
|
+
for attempt in range(_RETRY_MAX + 1):
|
|
327
|
+
try:
|
|
328
|
+
return _open_stream(
|
|
329
|
+
messages, tools, model, api_key, base_url, max_tokens, temperature
|
|
330
|
+
)
|
|
331
|
+
except ModelNotFoundError:
|
|
332
|
+
raise # never retry — model doesn't exist
|
|
333
|
+
except NonRetryableError:
|
|
334
|
+
raise # never retry — auth/bad-request etc.
|
|
335
|
+
except RetryableError as e:
|
|
336
|
+
last_err = e
|
|
337
|
+
if attempt < _RETRY_MAX:
|
|
338
|
+
jittered = delay * (0.5 + random.random() * 0.5)
|
|
339
|
+
logger.warning(
|
|
340
|
+
"Retryable API error (attempt %d/%d), retrying in %.1fs: %s",
|
|
341
|
+
attempt + 1, _RETRY_MAX, jittered, e,
|
|
342
|
+
)
|
|
343
|
+
time.sleep(jittered)
|
|
344
|
+
delay = min(delay * 2, _RETRY_MAX_DELAY)
|
|
345
|
+
except Exception as e:
|
|
346
|
+
last_err = e
|
|
347
|
+
if attempt == 0: # one grace retry for unclassified errors
|
|
348
|
+
jittered = delay * (0.5 + random.random() * 0.5)
|
|
349
|
+
logger.warning("Transient error, retrying once in %.1fs: %s", jittered, e)
|
|
350
|
+
time.sleep(jittered)
|
|
351
|
+
else:
|
|
352
|
+
raise
|
|
353
|
+
|
|
354
|
+
raise last_err
|
|
355
|
+
|
|
356
|
+
|
|
357
|
+
def stream_chat(
|
|
358
|
+
messages: List[Dict[str, Any]],
|
|
359
|
+
tools: List[Dict[str, Any]],
|
|
360
|
+
model: str,
|
|
361
|
+
api_key: str,
|
|
362
|
+
base_url: str = "https://api.deepseek.com/v1",
|
|
363
|
+
max_tokens: int = 4096,
|
|
364
|
+
temperature: float = 0.7,
|
|
365
|
+
) -> Generator[Event, None, None]:
|
|
366
|
+
"""Stream a chat completion, yielding text chunks and tool calls.
|
|
367
|
+
|
|
368
|
+
Uses raw httpx (not the OpenAI SDK) so that vendor-specific fields like
|
|
369
|
+
DeepSeek ``reasoning_content`` are preserved in the JSON payload.
|
|
370
|
+
Retries automatically on rate-limits and transient server errors.
|
|
371
|
+
"""
|
|
372
|
+
try:
|
|
373
|
+
client, response_cm, response = _call_with_retry(
|
|
374
|
+
messages, tools, model, api_key, base_url,
|
|
375
|
+
max_tokens, temperature,
|
|
376
|
+
)
|
|
377
|
+
except ModelNotFoundError as e:
|
|
378
|
+
yield ("model_not_found", str(e))
|
|
379
|
+
return
|
|
380
|
+
except NonRetryableError as e:
|
|
381
|
+
yield ("error", str(e))
|
|
382
|
+
return
|
|
383
|
+
except RetryableError as e:
|
|
384
|
+
yield ("error", f"API request failed after {_RETRY_MAX} retries: {e}")
|
|
385
|
+
return
|
|
386
|
+
except Exception as e:
|
|
387
|
+
yield ("error", f"API request failed: {e}")
|
|
388
|
+
return
|
|
389
|
+
|
|
390
|
+
content_parts: List[str] = []
|
|
391
|
+
reasoning_parts: List[str] = []
|
|
392
|
+
tool_call_deltas: Dict[int, Dict[str, str]] = {}
|
|
393
|
+
|
|
394
|
+
try:
|
|
395
|
+
for raw_line in response.iter_lines():
|
|
396
|
+
line = raw_line.decode("utf-8") if isinstance(raw_line, bytes) else raw_line
|
|
397
|
+
chunk = _parse_sse_line(line)
|
|
398
|
+
if chunk is None:
|
|
399
|
+
continue
|
|
400
|
+
if not chunk:
|
|
401
|
+
# [DONE] signal
|
|
402
|
+
break
|
|
403
|
+
|
|
404
|
+
choices = chunk.get("choices")
|
|
405
|
+
if not choices:
|
|
406
|
+
continue
|
|
407
|
+
|
|
408
|
+
choice = choices[0]
|
|
409
|
+
delta = choice.get("delta", {})
|
|
410
|
+
|
|
411
|
+
if delta is None:
|
|
412
|
+
continue
|
|
413
|
+
|
|
414
|
+
# Capture reasoning_content (DeepSeek thinking mode) so it can be
|
|
415
|
+
# passed back in subsequent requests — the API requires it.
|
|
416
|
+
reasoning = delta.get("reasoning_content")
|
|
417
|
+
if reasoning:
|
|
418
|
+
reasoning_parts.append(reasoning)
|
|
419
|
+
yield ("reasoning", reasoning)
|
|
420
|
+
|
|
421
|
+
content = delta.get("content")
|
|
422
|
+
if content:
|
|
423
|
+
content_parts.append(content)
|
|
424
|
+
yield ("text", content)
|
|
425
|
+
|
|
426
|
+
tool_calls_delta = delta.get("tool_calls")
|
|
427
|
+
if tool_calls_delta:
|
|
428
|
+
for tc in tool_calls_delta:
|
|
429
|
+
idx = tc.get("index", 0)
|
|
430
|
+
if idx not in tool_call_deltas:
|
|
431
|
+
tool_call_deltas[idx] = {"id": "", "name": "", "arguments": ""}
|
|
432
|
+
tc_id = tc.get("id")
|
|
433
|
+
if tc_id:
|
|
434
|
+
tool_call_deltas[idx]["id"] = tc_id
|
|
435
|
+
fn = tc.get("function", {})
|
|
436
|
+
fn_name = fn.get("name", "")
|
|
437
|
+
if fn_name:
|
|
438
|
+
tool_call_deltas[idx]["name"] += fn_name
|
|
439
|
+
fn_args = fn.get("arguments", "")
|
|
440
|
+
if fn_args:
|
|
441
|
+
tool_call_deltas[idx]["arguments"] += fn_args
|
|
442
|
+
|
|
443
|
+
if tool_call_deltas:
|
|
444
|
+
tool_calls = []
|
|
445
|
+
for idx in sorted(tool_call_deltas.keys()):
|
|
446
|
+
d = tool_call_deltas[idx]
|
|
447
|
+
tool_calls.append({
|
|
448
|
+
"id": d["id"],
|
|
449
|
+
"type": "function",
|
|
450
|
+
"function": {
|
|
451
|
+
"name": d["name"],
|
|
452
|
+
"arguments": d["arguments"],
|
|
453
|
+
},
|
|
454
|
+
})
|
|
455
|
+
reasoning_str = "".join(reasoning_parts) if reasoning_parts else ""
|
|
456
|
+
yield ("tool_calls", (tool_calls, reasoning_str))
|
|
457
|
+
else:
|
|
458
|
+
content = "".join(content_parts)
|
|
459
|
+
reasoning = "".join(reasoning_parts) if reasoning_parts else ""
|
|
460
|
+
yield ("done", (content, reasoning))
|
|
461
|
+
|
|
462
|
+
except Exception as e:
|
|
463
|
+
yield ("error", f"Stream error: {e}")
|
|
464
|
+
finally:
|
|
465
|
+
# Always release the HTTP connection, even if streaming was interrupted
|
|
466
|
+
try:
|
|
467
|
+
response_cm.__exit__(None, None, None)
|
|
468
|
+
except Exception:
|
|
469
|
+
pass
|
|
470
|
+
try:
|
|
471
|
+
client.close()
|
|
472
|
+
except Exception:
|
|
473
|
+
pass
|