deepparallel 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- deepparallel/__init__.py +3 -0
- deepparallel/agent.py +286 -0
- deepparallel/backend.py +302 -0
- deepparallel/branding.py +211 -0
- deepparallel/cli.py +569 -0
- deepparallel/config.py +158 -0
- deepparallel/fusion.py +225 -0
- deepparallel/licensing.py +108 -0
- deepparallel/registry.json +13 -0
- deepparallel/renderer.py +222 -0
- deepparallel/system_prompt.txt +4 -0
- deepparallel/tools/__init__.py +27 -0
- deepparallel/tools/codeast.py +171 -0
- deepparallel/tools/edit.py +29 -0
- deepparallel/tools/files.py +74 -0
- deepparallel/tools/registry.py +149 -0
- deepparallel/tools/sandbox.py +110 -0
- deepparallel/tools/search.py +38 -0
- deepparallel/tools/shell.py +38 -0
- deepparallel/tools/vision.py +54 -0
- deepparallel/tools/web.py +76 -0
- deepparallel-0.2.0.dist-info/METADATA +128 -0
- deepparallel-0.2.0.dist-info/RECORD +26 -0
- deepparallel-0.2.0.dist-info/WHEEL +5 -0
- deepparallel-0.2.0.dist-info/entry_points.txt +3 -0
- deepparallel-0.2.0.dist-info/top_level.txt +1 -0
deepparallel/__init__.py
ADDED
deepparallel/agent.py
ADDED
|
@@ -0,0 +1,286 @@
|
|
|
1
|
+
"""The agentic tool-calling loop.
|
|
2
|
+
|
|
3
|
+
UI-agnostic: it drives a backend (`chat`) and a tool registry, and surfaces
|
|
4
|
+
activity through an injected renderer. Dangerous tools are gated; the loop
|
|
5
|
+
terminates on a content answer or a step cap.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
import difflib
|
|
11
|
+
import json
|
|
12
|
+
import time
|
|
13
|
+
from pathlib import Path
|
|
14
|
+
|
|
15
|
+
from deepparallel.tools.registry import coerce_args
|
|
16
|
+
|
|
17
|
+
_MAX_TOOL_RESULT = 50_000
|
|
18
|
+
_GATED_PATH_TOOLS = ("write_file", "edit_file")
|
|
19
|
+
_EDIT_TOOLS = ("write_file", "edit_file", "ast_replace_symbol")
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def _parse_args(raw: str) -> dict:
|
|
23
|
+
if not raw:
|
|
24
|
+
return {}
|
|
25
|
+
try:
|
|
26
|
+
value = json.loads(raw)
|
|
27
|
+
except json.JSONDecodeError:
|
|
28
|
+
return {"__parse_error__": raw[:2000]}
|
|
29
|
+
return value if isinstance(value, dict) else {}
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def _plural(n: int, sing: str, plur: str) -> str:
|
|
33
|
+
return f"{n} {sing if n == 1 else plur}"
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def _summarize_result(name: str, result: str) -> str:
|
|
37
|
+
"""A short, human-readable summary of a tool result for the UI.
|
|
38
|
+
|
|
39
|
+
The model still receives the full result; this is only what the user sees
|
|
40
|
+
on the tool card, so it must be legible rather than raw truncated JSON.
|
|
41
|
+
"""
|
|
42
|
+
try:
|
|
43
|
+
obj = json.loads(result)
|
|
44
|
+
except (json.JSONDecodeError, ValueError):
|
|
45
|
+
obj = None
|
|
46
|
+
if isinstance(obj, dict) and "error" in obj:
|
|
47
|
+
return f"error: {str(obj['error'])[:100]}"
|
|
48
|
+
if isinstance(obj, dict):
|
|
49
|
+
if name == "list_dir":
|
|
50
|
+
return _plural(len(obj.get("entries", [])), "entry", "entries")
|
|
51
|
+
if name in ("glob", "grep"):
|
|
52
|
+
return _plural(len(obj.get("matches", [])), "match", "matches")
|
|
53
|
+
if name == "web_search":
|
|
54
|
+
return _plural(len(obj.get("results", [])), "result", "results")
|
|
55
|
+
if name == "ast_symbols":
|
|
56
|
+
return _plural(len(obj.get("symbols", [])), "symbol", "symbols")
|
|
57
|
+
if name == "write_file":
|
|
58
|
+
return f"wrote {obj.get('bytes', 0)} bytes"
|
|
59
|
+
if name == "edit_file":
|
|
60
|
+
return "edited"
|
|
61
|
+
if name == "ast_replace_symbol":
|
|
62
|
+
return "replaced"
|
|
63
|
+
if name == "run_shell":
|
|
64
|
+
lines = (obj.get("stdout") or "").count("\n")
|
|
65
|
+
return f"rc {obj.get('return_code')} · {_plural(lines, 'line', 'lines')}"
|
|
66
|
+
if name == "run_code":
|
|
67
|
+
return f"rc {obj.get('exit_code')} · {obj.get('sandbox', '')}"
|
|
68
|
+
if name == "web_fetch":
|
|
69
|
+
title = (obj.get("title") or "").strip()
|
|
70
|
+
chars = len(obj.get("text") or "")
|
|
71
|
+
return f'"{title[:40]}" · {chars} chars' if title else f"{chars} chars"
|
|
72
|
+
if name == "ast_show_symbol":
|
|
73
|
+
return _plural((obj.get("source") or "").count("\n") + 1, "line", "lines")
|
|
74
|
+
if name == "analyze_image":
|
|
75
|
+
return f"{len(obj.get('description') or '')} chars"
|
|
76
|
+
return next(iter(obj)) if obj else "ok"
|
|
77
|
+
if name == "read_file":
|
|
78
|
+
return _plural(result.count("\n") + 1, "line", "lines") if result else "0 lines"
|
|
79
|
+
return " ".join(result.split())[:80]
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
def _preview(args: dict) -> str:
|
|
83
|
+
try:
|
|
84
|
+
s = json.dumps(args)
|
|
85
|
+
except (TypeError, ValueError):
|
|
86
|
+
s = str(args)
|
|
87
|
+
return s[:80]
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
def _outside_cwd(args: dict) -> bool:
|
|
91
|
+
p = args.get("file_path")
|
|
92
|
+
if not p:
|
|
93
|
+
return False
|
|
94
|
+
try:
|
|
95
|
+
Path(p).expanduser().resolve().relative_to(Path.cwd().resolve())
|
|
96
|
+
return False
|
|
97
|
+
except ValueError:
|
|
98
|
+
return True
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
def _diff_preview(name: str, args: dict) -> str:
|
|
102
|
+
path = args.get("file_path", "")
|
|
103
|
+
try:
|
|
104
|
+
target = Path(path).expanduser().resolve()
|
|
105
|
+
old = target.read_text(encoding="utf-8") if target.is_file() else ""
|
|
106
|
+
except OSError:
|
|
107
|
+
old = ""
|
|
108
|
+
if name == "write_file":
|
|
109
|
+
new = args.get("content", "")
|
|
110
|
+
if args.get("content_b64"):
|
|
111
|
+
import base64
|
|
112
|
+
|
|
113
|
+
try:
|
|
114
|
+
new = base64.b64decode(args["content_b64"]).decode("utf-8", errors="replace")
|
|
115
|
+
except Exception: # noqa: BLE001
|
|
116
|
+
new = "<base64 payload>"
|
|
117
|
+
else:
|
|
118
|
+
new = old.replace(args.get("old_string", ""), args.get("new_string", ""), 1)
|
|
119
|
+
diff = difflib.unified_diff(old.splitlines(), new.splitlines(), lineterm="", n=2)
|
|
120
|
+
text = "\n".join(list(diff)[:60])
|
|
121
|
+
return text or f"write {path}"
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
def _describe(name: str, args: dict) -> tuple[str, str]:
|
|
125
|
+
if name == "run_shell":
|
|
126
|
+
cmd = args.get("command", "")
|
|
127
|
+
return (
|
|
128
|
+
f"run shell: {cmd[:60]}",
|
|
129
|
+
f"command: {cmd}\ncwd: {args.get('working_directory') or '.'}",
|
|
130
|
+
)
|
|
131
|
+
if name in _GATED_PATH_TOOLS:
|
|
132
|
+
return f"{name}: {args.get('file_path', '')}", _diff_preview(name, args)
|
|
133
|
+
return name, json.dumps(args)[:500]
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
def _guardian_review_content(name: str, args: dict) -> str:
|
|
137
|
+
if name in _GATED_PATH_TOOLS:
|
|
138
|
+
return _diff_preview(name, args)
|
|
139
|
+
return (
|
|
140
|
+
f"Replace symbol '{args.get('name', '')}' in {args.get('file_path', '')} with:\n"
|
|
141
|
+
f"{(args.get('new_source') or '')[:4000]}"
|
|
142
|
+
)
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
def guardian_review(guardian, content: str) -> str | None:
|
|
146
|
+
"""Ask an independent model to review a code change/snippet. Returns a
|
|
147
|
+
one-line verdict ('safe', 'risky: ...', 'bug: ...') or None on failure."""
|
|
148
|
+
prompt = (
|
|
149
|
+
"You are an independent code reviewer giving a second opinion on a proposed "
|
|
150
|
+
"change. Reply with exactly one line: 'VERDICT: safe', 'VERDICT: risky: <reason>', "
|
|
151
|
+
f"or 'VERDICT: bug: <reason>'. Be terse.\n\nProposed change:\n{content}\n\nVerdict:"
|
|
152
|
+
)
|
|
153
|
+
messages = [{"role": "user", "content": prompt}]
|
|
154
|
+
msg = None
|
|
155
|
+
for _ in range(2): # one retry: review runs against a sometimes-flaky API
|
|
156
|
+
try:
|
|
157
|
+
msg = guardian.chat(messages, [], 0.0, 512)
|
|
158
|
+
break
|
|
159
|
+
except Exception: # noqa: BLE001 - review is best-effort
|
|
160
|
+
msg = None
|
|
161
|
+
if msg is None:
|
|
162
|
+
return None
|
|
163
|
+
text = (msg.get("content") or msg.get("reasoning_content") or "").strip()
|
|
164
|
+
for line in text.splitlines():
|
|
165
|
+
s = line.strip()
|
|
166
|
+
if s.upper().startswith("VERDICT:"):
|
|
167
|
+
return s.split(":", 1)[1].strip()
|
|
168
|
+
return text.splitlines()[0][:120] if text else None
|
|
169
|
+
|
|
170
|
+
|
|
171
|
+
def verdict_severity(verdict: str | None) -> str:
|
|
172
|
+
"""Classify a verdict string into safe | risky | bug | unknown."""
|
|
173
|
+
if not verdict:
|
|
174
|
+
return "unknown"
|
|
175
|
+
head = verdict.strip().lower()
|
|
176
|
+
if head.startswith("safe"):
|
|
177
|
+
return "safe"
|
|
178
|
+
if head.startswith("risky"):
|
|
179
|
+
return "risky"
|
|
180
|
+
if head.startswith("bug"):
|
|
181
|
+
return "bug"
|
|
182
|
+
return "unknown"
|
|
183
|
+
|
|
184
|
+
|
|
185
|
+
def verdict_exit_code(verdict: str | None) -> int:
|
|
186
|
+
"""Exit code for `review` as a PR gate: 0 safe, 1 risky, 2 bug, 0 unknown."""
|
|
187
|
+
return {"safe": 0, "risky": 1, "bug": 2, "unknown": 0}[verdict_severity(verdict)]
|
|
188
|
+
|
|
189
|
+
|
|
190
|
+
def _guardian_verdict(guardian, name: str, args: dict) -> str | None:
|
|
191
|
+
return guardian_review(guardian, _guardian_review_content(name, args))
|
|
192
|
+
|
|
193
|
+
|
|
194
|
+
def _approved(name, args, interactive, auto_approve, renderer, guardian=None) -> bool:
|
|
195
|
+
forced = name in _GATED_PATH_TOOLS and _outside_cwd(args)
|
|
196
|
+
if auto_approve and not forced:
|
|
197
|
+
return True
|
|
198
|
+
if not interactive:
|
|
199
|
+
return False
|
|
200
|
+
title, detail = _describe(name, args)
|
|
201
|
+
if guardian is not None and name in _EDIT_TOOLS:
|
|
202
|
+
verdict = _guardian_verdict(guardian, name, args)
|
|
203
|
+
if verdict:
|
|
204
|
+
detail = f"{detail}\n\nGuardian: {verdict}"
|
|
205
|
+
return renderer.confirm(title, detail)
|
|
206
|
+
|
|
207
|
+
|
|
208
|
+
def _stream_turn(backend, messages, schemas, settings, renderer):
|
|
209
|
+
"""Drive one streaming turn: stream content tokens live and capture the
|
|
210
|
+
assembled message (with any tool_calls) from the generator's return value."""
|
|
211
|
+
captured = {"msg": {"role": "assistant", "content": "", "tool_calls": None}}
|
|
212
|
+
|
|
213
|
+
def content_tokens():
|
|
214
|
+
gen = backend.stream_chat_tools(
|
|
215
|
+
messages, schemas, settings.temperature, settings.max_tokens
|
|
216
|
+
)
|
|
217
|
+
while True:
|
|
218
|
+
try:
|
|
219
|
+
channel, text = next(gen)
|
|
220
|
+
except StopIteration as stop:
|
|
221
|
+
captured["msg"] = stop.value
|
|
222
|
+
return
|
|
223
|
+
if channel == "content":
|
|
224
|
+
yield text
|
|
225
|
+
|
|
226
|
+
renderer.answer_stream(content_tokens())
|
|
227
|
+
return captured["msg"], True
|
|
228
|
+
|
|
229
|
+
|
|
230
|
+
def run_agent(
|
|
231
|
+
backend,
|
|
232
|
+
registry,
|
|
233
|
+
messages,
|
|
234
|
+
settings,
|
|
235
|
+
renderer,
|
|
236
|
+
*,
|
|
237
|
+
interactive: bool,
|
|
238
|
+
auto_approve: bool,
|
|
239
|
+
max_steps: int | None = None,
|
|
240
|
+
stream: bool = False,
|
|
241
|
+
guardian=None,
|
|
242
|
+
) -> str:
|
|
243
|
+
steps = max_steps if max_steps is not None else settings.max_steps
|
|
244
|
+
schemas = registry.schemas()
|
|
245
|
+
can_stream = stream and hasattr(backend, "stream_chat_tools")
|
|
246
|
+
for _ in range(steps):
|
|
247
|
+
if can_stream:
|
|
248
|
+
msg, streamed = _stream_turn(backend, messages, schemas, settings, renderer)
|
|
249
|
+
else:
|
|
250
|
+
msg = backend.chat(messages, schemas, settings.temperature, settings.max_tokens)
|
|
251
|
+
streamed = False
|
|
252
|
+
tool_calls = msg.get("tool_calls")
|
|
253
|
+
if not tool_calls:
|
|
254
|
+
content = msg.get("content") or ""
|
|
255
|
+
messages.append({"role": "assistant", "content": content})
|
|
256
|
+
if not streamed:
|
|
257
|
+
renderer.answer(content)
|
|
258
|
+
return content
|
|
259
|
+
|
|
260
|
+
messages.append(
|
|
261
|
+
{"role": "assistant", "content": msg.get("content"), "tool_calls": tool_calls}
|
|
262
|
+
)
|
|
263
|
+
for tc in tool_calls:
|
|
264
|
+
name = tc["function"]["name"]
|
|
265
|
+
args = _parse_args(tc["function"].get("arguments", ""))
|
|
266
|
+
meta = registry.get(name)
|
|
267
|
+
renderer.tool_start(name, _preview(args))
|
|
268
|
+
start = time.monotonic()
|
|
269
|
+
if meta is None:
|
|
270
|
+
result = json.dumps({"error": f"unknown tool: {name}"})
|
|
271
|
+
elif "__parse_error__" in args:
|
|
272
|
+
result = json.dumps({"error": "invalid JSON arguments"})
|
|
273
|
+
elif meta.dangerous and not _approved(
|
|
274
|
+
name, args, interactive, auto_approve, renderer, guardian
|
|
275
|
+
):
|
|
276
|
+
result = json.dumps({"error": "denied by user"})
|
|
277
|
+
else:
|
|
278
|
+
try:
|
|
279
|
+
result = registry.call(name, **coerce_args(meta.parameters, args))
|
|
280
|
+
except Exception as e: # noqa: BLE001 - surface tool failure to model
|
|
281
|
+
result = json.dumps({"error": f"{type(e).__name__}: {e}"})
|
|
282
|
+
result = str(result)[:_MAX_TOOL_RESULT]
|
|
283
|
+
ok = '"error"' not in result[:30]
|
|
284
|
+
renderer.tool_result(ok, _summarize_result(name, result), time.monotonic() - start)
|
|
285
|
+
messages.append({"role": "tool", "tool_call_id": tc["id"], "content": result})
|
|
286
|
+
return f"Reached the {steps}-step limit without a final answer."
|
deepparallel/backend.py
ADDED
|
@@ -0,0 +1,302 @@
|
|
|
1
|
+
"""HTTP transports for DeepParallel.
|
|
2
|
+
|
|
3
|
+
Two interchangeable backends behind one streaming seam:
|
|
4
|
+
- AzureBackend: direct Azure OpenAI chat-completions deployment.
|
|
5
|
+
- FoundryBackend: Crowe Logic Foundry control plane (/v1/chat/completions).
|
|
6
|
+
|
|
7
|
+
stream_chat yields (channel, text) tuples where channel is "content" or
|
|
8
|
+
"thinking", so reasoning visibility stays a rendering decision in cli.py.
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
from __future__ import annotations
|
|
12
|
+
|
|
13
|
+
import json
|
|
14
|
+
from typing import Iterator, Protocol
|
|
15
|
+
from urllib.parse import urlparse
|
|
16
|
+
|
|
17
|
+
import httpx
|
|
18
|
+
|
|
19
|
+
Chunk = tuple[str, str] # (channel, text)
|
|
20
|
+
|
|
21
|
+
_STREAM_TIMEOUT = httpx.Timeout(120.0, connect=10.0)
|
|
22
|
+
_CHECK_TIMEOUT = 4.0
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def parse_sse_lines(lines: Iterator[str]) -> Iterator[Chunk]:
|
|
26
|
+
"""Parse OpenAI-style SSE lines into (channel, text) chunks."""
|
|
27
|
+
for raw in lines:
|
|
28
|
+
line = raw.strip()
|
|
29
|
+
if not line or not line.startswith("data:"):
|
|
30
|
+
continue
|
|
31
|
+
data = line[len("data:") :].strip()
|
|
32
|
+
if data == "[DONE]":
|
|
33
|
+
return
|
|
34
|
+
try:
|
|
35
|
+
obj = json.loads(data)
|
|
36
|
+
except json.JSONDecodeError:
|
|
37
|
+
continue
|
|
38
|
+
choices = obj.get("choices") or []
|
|
39
|
+
if not choices:
|
|
40
|
+
continue
|
|
41
|
+
delta = choices[0].get("delta") or {}
|
|
42
|
+
reasoning = delta.get("reasoning_content")
|
|
43
|
+
if reasoning:
|
|
44
|
+
yield ("thinking", reasoning)
|
|
45
|
+
content = delta.get("content")
|
|
46
|
+
if content:
|
|
47
|
+
yield ("content", content)
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def parse_sse_stream(lines: Iterator[str]):
|
|
51
|
+
"""Parse a streaming chat completion that may include tool calls.
|
|
52
|
+
|
|
53
|
+
Yields ("content"|"thinking", text) deltas for live rendering, and returns
|
|
54
|
+
(via StopIteration.value) the fully assembled assistant message with any
|
|
55
|
+
tool_calls accumulated across chunks.
|
|
56
|
+
"""
|
|
57
|
+
content_parts: list[str] = []
|
|
58
|
+
acc: dict[int, dict] = {}
|
|
59
|
+
for raw in lines:
|
|
60
|
+
line = raw.strip()
|
|
61
|
+
if not line or not line.startswith("data:"):
|
|
62
|
+
continue
|
|
63
|
+
data = line[len("data:") :].strip()
|
|
64
|
+
if data == "[DONE]":
|
|
65
|
+
break
|
|
66
|
+
try:
|
|
67
|
+
obj = json.loads(data)
|
|
68
|
+
except json.JSONDecodeError:
|
|
69
|
+
continue
|
|
70
|
+
choices = obj.get("choices") or []
|
|
71
|
+
if not choices:
|
|
72
|
+
continue
|
|
73
|
+
delta = choices[0].get("delta") or {}
|
|
74
|
+
reasoning = delta.get("reasoning_content")
|
|
75
|
+
if reasoning:
|
|
76
|
+
yield ("thinking", reasoning)
|
|
77
|
+
content = delta.get("content")
|
|
78
|
+
if content:
|
|
79
|
+
content_parts.append(content)
|
|
80
|
+
yield ("content", content)
|
|
81
|
+
for tc in delta.get("tool_calls") or []:
|
|
82
|
+
idx = tc.get("index", 0)
|
|
83
|
+
slot = acc.setdefault(
|
|
84
|
+
idx, {"id": "", "type": "function", "function": {"name": "", "arguments": ""}}
|
|
85
|
+
)
|
|
86
|
+
if tc.get("id"):
|
|
87
|
+
slot["id"] = tc["id"]
|
|
88
|
+
fn = tc.get("function") or {}
|
|
89
|
+
if fn.get("name"):
|
|
90
|
+
slot["function"]["name"] = fn["name"]
|
|
91
|
+
if fn.get("arguments"):
|
|
92
|
+
slot["function"]["arguments"] += fn["arguments"]
|
|
93
|
+
tool_calls = [acc[i] for i in sorted(acc)] or None
|
|
94
|
+
return {
|
|
95
|
+
"role": "assistant",
|
|
96
|
+
"content": "".join(content_parts) or None,
|
|
97
|
+
"tool_calls": tool_calls,
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
def _host(url: str) -> str:
|
|
102
|
+
p = urlparse(url)
|
|
103
|
+
return f"{p.scheme}://{p.netloc}" if p.netloc else url
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
class Backend(Protocol):
|
|
107
|
+
label: str
|
|
108
|
+
|
|
109
|
+
def check(self) -> tuple[bool, str]: ...
|
|
110
|
+
|
|
111
|
+
def stream_chat(
|
|
112
|
+
self, messages: list[dict], temperature: float, max_tokens: int
|
|
113
|
+
) -> Iterator[Chunk]: ...
|
|
114
|
+
|
|
115
|
+
def chat(
|
|
116
|
+
self, messages: list[dict], tools: list[dict], temperature: float, max_tokens: int
|
|
117
|
+
) -> dict: ...
|
|
118
|
+
|
|
119
|
+
def stream_chat_tools(
|
|
120
|
+
self, messages: list[dict], tools: list[dict], temperature: float, max_tokens: int
|
|
121
|
+
) -> Iterator[Chunk]: ...
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
class AzureBackend:
|
|
125
|
+
label = "Azure OpenAI"
|
|
126
|
+
|
|
127
|
+
def __init__(self, endpoint: str, api_key: str, deployment: str, api_version: str):
|
|
128
|
+
self._endpoint = (endpoint or "").rstrip("/")
|
|
129
|
+
self._api_key = api_key or ""
|
|
130
|
+
self._deployment = deployment
|
|
131
|
+
self._api_version = api_version
|
|
132
|
+
|
|
133
|
+
@property
|
|
134
|
+
def _url(self) -> str:
|
|
135
|
+
return (
|
|
136
|
+
f"{self._endpoint}/openai/deployments/{self._deployment}"
|
|
137
|
+
f"/chat/completions?api-version={self._api_version}"
|
|
138
|
+
)
|
|
139
|
+
|
|
140
|
+
def check(self) -> tuple[bool, str]:
|
|
141
|
+
if not self._endpoint or not self._api_key:
|
|
142
|
+
return False, "Azure endpoint or API key not configured."
|
|
143
|
+
try:
|
|
144
|
+
httpx.get(_host(self._endpoint), timeout=_CHECK_TIMEOUT)
|
|
145
|
+
except Exception as e: # noqa: BLE001 - reachability probe
|
|
146
|
+
return False, f"Azure endpoint unreachable ({e.__class__.__name__})"
|
|
147
|
+
return True, f"Azure @ {_host(self._endpoint)}"
|
|
148
|
+
|
|
149
|
+
def stream_chat(self, messages, temperature, max_tokens):
|
|
150
|
+
payload = {
|
|
151
|
+
"messages": messages,
|
|
152
|
+
"stream": True,
|
|
153
|
+
"temperature": temperature,
|
|
154
|
+
"max_tokens": max_tokens,
|
|
155
|
+
}
|
|
156
|
+
headers = {"api-key": self._api_key, "content-type": "application/json"}
|
|
157
|
+
with httpx.stream(
|
|
158
|
+
"POST", self._url, json=payload, headers=headers, timeout=_STREAM_TIMEOUT
|
|
159
|
+
) as r:
|
|
160
|
+
r.raise_for_status()
|
|
161
|
+
yield from parse_sse_lines(r.iter_lines())
|
|
162
|
+
|
|
163
|
+
def chat(self, messages, tools, temperature, max_tokens) -> dict:
|
|
164
|
+
payload = {
|
|
165
|
+
"messages": messages,
|
|
166
|
+
"stream": False,
|
|
167
|
+
"temperature": temperature,
|
|
168
|
+
"max_tokens": max_tokens,
|
|
169
|
+
}
|
|
170
|
+
if tools:
|
|
171
|
+
payload["tools"] = tools
|
|
172
|
+
headers = {"api-key": self._api_key, "content-type": "application/json"}
|
|
173
|
+
r = httpx.post(self._url, json=payload, headers=headers, timeout=_STREAM_TIMEOUT)
|
|
174
|
+
r.raise_for_status()
|
|
175
|
+
return r.json()["choices"][0]["message"]
|
|
176
|
+
|
|
177
|
+
def stream_chat_tools(self, messages, tools, temperature, max_tokens):
|
|
178
|
+
payload = {
|
|
179
|
+
"messages": messages,
|
|
180
|
+
"stream": True,
|
|
181
|
+
"temperature": temperature,
|
|
182
|
+
"max_tokens": max_tokens,
|
|
183
|
+
}
|
|
184
|
+
if tools:
|
|
185
|
+
payload["tools"] = tools
|
|
186
|
+
headers = {"api-key": self._api_key, "content-type": "application/json"}
|
|
187
|
+
with httpx.stream(
|
|
188
|
+
"POST", self._url, json=payload, headers=headers, timeout=_STREAM_TIMEOUT
|
|
189
|
+
) as r:
|
|
190
|
+
r.raise_for_status()
|
|
191
|
+
return (yield from parse_sse_stream(r.iter_lines()))
|
|
192
|
+
|
|
193
|
+
|
|
194
|
+
class FoundryBackend:
|
|
195
|
+
label = "Foundry control plane"
|
|
196
|
+
|
|
197
|
+
def __init__(self, base_url: str, api_key: str, model: str):
|
|
198
|
+
self._base_url = (base_url or "").rstrip("/")
|
|
199
|
+
self._api_key = api_key or ""
|
|
200
|
+
self._model = model
|
|
201
|
+
|
|
202
|
+
@property
|
|
203
|
+
def _url(self) -> str:
|
|
204
|
+
return f"{self._base_url}/v1/chat/completions"
|
|
205
|
+
|
|
206
|
+
def check(self) -> tuple[bool, str]:
|
|
207
|
+
if not self._base_url or not self._api_key:
|
|
208
|
+
return False, "Foundry base URL or API key not configured."
|
|
209
|
+
try:
|
|
210
|
+
httpx.get(_host(self._base_url), timeout=_CHECK_TIMEOUT)
|
|
211
|
+
except Exception as e: # noqa: BLE001 - reachability probe
|
|
212
|
+
return False, f"Foundry endpoint unreachable ({e.__class__.__name__})"
|
|
213
|
+
return True, f"Foundry @ {_host(self._base_url)}"
|
|
214
|
+
|
|
215
|
+
def stream_chat(self, messages, temperature, max_tokens):
|
|
216
|
+
payload = {
|
|
217
|
+
"model": self._model,
|
|
218
|
+
"messages": messages,
|
|
219
|
+
"stream": True,
|
|
220
|
+
"temperature": temperature,
|
|
221
|
+
"max_tokens": max_tokens,
|
|
222
|
+
}
|
|
223
|
+
headers = {
|
|
224
|
+
"authorization": f"Bearer {self._api_key}",
|
|
225
|
+
"content-type": "application/json",
|
|
226
|
+
}
|
|
227
|
+
with httpx.stream(
|
|
228
|
+
"POST", self._url, json=payload, headers=headers, timeout=_STREAM_TIMEOUT
|
|
229
|
+
) as r:
|
|
230
|
+
r.raise_for_status()
|
|
231
|
+
yield from parse_sse_lines(r.iter_lines())
|
|
232
|
+
|
|
233
|
+
def chat(self, messages, tools, temperature, max_tokens) -> dict:
|
|
234
|
+
payload = {
|
|
235
|
+
"model": self._model,
|
|
236
|
+
"messages": messages,
|
|
237
|
+
"stream": False,
|
|
238
|
+
"temperature": temperature,
|
|
239
|
+
"max_tokens": max_tokens,
|
|
240
|
+
}
|
|
241
|
+
if tools:
|
|
242
|
+
payload["tools"] = tools
|
|
243
|
+
headers = {
|
|
244
|
+
"authorization": f"Bearer {self._api_key}",
|
|
245
|
+
"content-type": "application/json",
|
|
246
|
+
}
|
|
247
|
+
r = httpx.post(self._url, json=payload, headers=headers, timeout=_STREAM_TIMEOUT)
|
|
248
|
+
r.raise_for_status()
|
|
249
|
+
return r.json()["choices"][0]["message"]
|
|
250
|
+
|
|
251
|
+
def stream_chat_tools(self, messages, tools, temperature, max_tokens):
|
|
252
|
+
payload = {
|
|
253
|
+
"model": self._model,
|
|
254
|
+
"messages": messages,
|
|
255
|
+
"stream": True,
|
|
256
|
+
"temperature": temperature,
|
|
257
|
+
"max_tokens": max_tokens,
|
|
258
|
+
}
|
|
259
|
+
if tools:
|
|
260
|
+
payload["tools"] = tools
|
|
261
|
+
headers = {
|
|
262
|
+
"authorization": f"Bearer {self._api_key}",
|
|
263
|
+
"content-type": "application/json",
|
|
264
|
+
}
|
|
265
|
+
with httpx.stream(
|
|
266
|
+
"POST", self._url, json=payload, headers=headers, timeout=_STREAM_TIMEOUT
|
|
267
|
+
) as r:
|
|
268
|
+
r.raise_for_status()
|
|
269
|
+
return (yield from parse_sse_stream(r.iter_lines()))
|
|
270
|
+
|
|
271
|
+
|
|
272
|
+
def resolve_backend(settings) -> Backend:
|
|
273
|
+
"""Factory keyed on settings.backend."""
|
|
274
|
+
if settings.backend == "foundry":
|
|
275
|
+
return FoundryBackend(
|
|
276
|
+
settings.foundry_base_url or "",
|
|
277
|
+
settings.foundry_api_key or "",
|
|
278
|
+
settings.foundry_model,
|
|
279
|
+
)
|
|
280
|
+
return AzureBackend(
|
|
281
|
+
settings.azure_endpoint or "",
|
|
282
|
+
settings.azure_api_key or "",
|
|
283
|
+
settings.deployment,
|
|
284
|
+
settings.api_version,
|
|
285
|
+
)
|
|
286
|
+
|
|
287
|
+
|
|
288
|
+
def backend_for_deployment(settings, deployment: str) -> Backend:
|
|
289
|
+
"""Build a backend targeting a specific deployment/model (for fusion).
|
|
290
|
+
|
|
291
|
+
Uses the same transport as the active backend, just a different model id.
|
|
292
|
+
"""
|
|
293
|
+
if settings.backend == "foundry":
|
|
294
|
+
return FoundryBackend(
|
|
295
|
+
settings.foundry_base_url or "", settings.foundry_api_key or "", deployment
|
|
296
|
+
)
|
|
297
|
+
return AzureBackend(
|
|
298
|
+
settings.azure_endpoint or "",
|
|
299
|
+
settings.azure_api_key or "",
|
|
300
|
+
deployment,
|
|
301
|
+
settings.api_version,
|
|
302
|
+
)
|