deepparallel 0.4.3__tar.gz → 0.5.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (78) hide show
  1. {deepparallel-0.4.3 → deepparallel-0.5.1}/PKG-INFO +1 -1
  2. {deepparallel-0.4.3 → deepparallel-0.5.1}/deepparallel/__init__.py +1 -1
  3. deepparallel-0.5.1/deepparallel/backend.py +645 -0
  4. {deepparallel-0.4.3 → deepparallel-0.5.1}/deepparallel/branding.py +107 -20
  5. {deepparallel-0.4.3 → deepparallel-0.5.1}/deepparallel/cli.py +7 -5
  6. {deepparallel-0.4.3 → deepparallel-0.5.1}/deepparallel/config.py +62 -4
  7. deepparallel-0.5.1/deepparallel/crowe_id.py +75 -0
  8. deepparallel-0.5.1/deepparallel/dsml.py +129 -0
  9. {deepparallel-0.4.3 → deepparallel-0.5.1}/deepparallel/renderer.py +4 -10
  10. deepparallel-0.5.1/deepparallel/research/provider.py +125 -0
  11. deepparallel-0.5.1/deepparallel/routing.example.json +32 -0
  12. deepparallel-0.5.1/deepparallel/routing.py +135 -0
  13. {deepparallel-0.4.3 → deepparallel-0.5.1}/deepparallel/serve.py +127 -12
  14. {deepparallel-0.4.3 → deepparallel-0.5.1}/deepparallel/tools/__init__.py +1 -0
  15. deepparallel-0.5.1/deepparallel/tools/mcp.py +280 -0
  16. deepparallel-0.5.1/deepparallel/userinput.py +135 -0
  17. {deepparallel-0.4.3 → deepparallel-0.5.1}/deepparallel.egg-info/PKG-INFO +1 -1
  18. {deepparallel-0.4.3 → deepparallel-0.5.1}/deepparallel.egg-info/SOURCES.txt +17 -1
  19. {deepparallel-0.4.3 → deepparallel-0.5.1}/pyproject.toml +2 -2
  20. {deepparallel-0.4.3 → deepparallel-0.5.1}/tests/test_backend.py +60 -0
  21. deepparallel-0.5.1/tests/test_crowe_backend.py +79 -0
  22. deepparallel-0.5.1/tests/test_crowe_gateway_backend.py +99 -0
  23. deepparallel-0.5.1/tests/test_crowe_id_auth.py +112 -0
  24. deepparallel-0.5.1/tests/test_crowe_payment_required.py +70 -0
  25. deepparallel-0.5.1/tests/test_dsml.py +104 -0
  26. deepparallel-0.5.1/tests/test_research_provider.py +96 -0
  27. deepparallel-0.5.1/tests/test_routing.py +169 -0
  28. deepparallel-0.5.1/tests/test_serve.py +259 -0
  29. deepparallel-0.5.1/tests/test_tools_mcp.py +47 -0
  30. deepparallel-0.5.1/tests/test_userinput_paste.py +71 -0
  31. deepparallel-0.4.3/deepparallel/backend.py +0 -318
  32. deepparallel-0.4.3/deepparallel/userinput.py +0 -61
  33. {deepparallel-0.4.3 → deepparallel-0.5.1}/README.md +0 -0
  34. {deepparallel-0.4.3 → deepparallel-0.5.1}/deepparallel/agent.py +0 -0
  35. {deepparallel-0.4.3 → deepparallel-0.5.1}/deepparallel/fusion.py +0 -0
  36. {deepparallel-0.4.3 → deepparallel-0.5.1}/deepparallel/licensing.py +0 -0
  37. {deepparallel-0.4.3 → deepparallel-0.5.1}/deepparallel/registry.json +0 -0
  38. {deepparallel-0.4.3 → deepparallel-0.5.1}/deepparallel/research/__init__.py +0 -0
  39. {deepparallel-0.4.3 → deepparallel-0.5.1}/deepparallel/research/conduit.py +0 -0
  40. {deepparallel-0.4.3 → deepparallel-0.5.1}/deepparallel/supply_chain.py +0 -0
  41. {deepparallel-0.4.3 → deepparallel-0.5.1}/deepparallel/system_prompt.txt +0 -0
  42. {deepparallel-0.4.3 → deepparallel-0.5.1}/deepparallel/tools/codeast.py +0 -0
  43. {deepparallel-0.4.3 → deepparallel-0.5.1}/deepparallel/tools/edit.py +0 -0
  44. {deepparallel-0.4.3 → deepparallel-0.5.1}/deepparallel/tools/files.py +0 -0
  45. {deepparallel-0.4.3 → deepparallel-0.5.1}/deepparallel/tools/registry.py +0 -0
  46. {deepparallel-0.4.3 → deepparallel-0.5.1}/deepparallel/tools/sandbox.py +0 -0
  47. {deepparallel-0.4.3 → deepparallel-0.5.1}/deepparallel/tools/search.py +0 -0
  48. {deepparallel-0.4.3 → deepparallel-0.5.1}/deepparallel/tools/shell.py +0 -0
  49. {deepparallel-0.4.3 → deepparallel-0.5.1}/deepparallel/tools/vision.py +0 -0
  50. {deepparallel-0.4.3 → deepparallel-0.5.1}/deepparallel/tools/web.py +0 -0
  51. {deepparallel-0.4.3 → deepparallel-0.5.1}/deepparallel.egg-info/dependency_links.txt +0 -0
  52. {deepparallel-0.4.3 → deepparallel-0.5.1}/deepparallel.egg-info/entry_points.txt +0 -0
  53. {deepparallel-0.4.3 → deepparallel-0.5.1}/deepparallel.egg-info/requires.txt +0 -0
  54. {deepparallel-0.4.3 → deepparallel-0.5.1}/deepparallel.egg-info/top_level.txt +0 -0
  55. {deepparallel-0.4.3 → deepparallel-0.5.1}/setup.cfg +0 -0
  56. {deepparallel-0.4.3 → deepparallel-0.5.1}/tests/test_agent.py +0 -0
  57. {deepparallel-0.4.3 → deepparallel-0.5.1}/tests/test_backend_chat.py +0 -0
  58. {deepparallel-0.4.3 → deepparallel-0.5.1}/tests/test_backend_stream.py +0 -0
  59. {deepparallel-0.4.3 → deepparallel-0.5.1}/tests/test_branding.py +0 -0
  60. {deepparallel-0.4.3 → deepparallel-0.5.1}/tests/test_cli.py +0 -0
  61. {deepparallel-0.4.3 → deepparallel-0.5.1}/tests/test_config.py +0 -0
  62. {deepparallel-0.4.3 → deepparallel-0.5.1}/tests/test_fusion.py +0 -0
  63. {deepparallel-0.4.3 → deepparallel-0.5.1}/tests/test_issuer_signer.py +0 -0
  64. {deepparallel-0.4.3 → deepparallel-0.5.1}/tests/test_licensing.py +0 -0
  65. {deepparallel-0.4.3 → deepparallel-0.5.1}/tests/test_renderer.py +0 -0
  66. {deepparallel-0.4.3 → deepparallel-0.5.1}/tests/test_research.py +0 -0
  67. {deepparallel-0.4.3 → deepparallel-0.5.1}/tests/test_spinner_color.py +0 -0
  68. {deepparallel-0.4.3 → deepparallel-0.5.1}/tests/test_supply_chain.py +0 -0
  69. {deepparallel-0.4.3 → deepparallel-0.5.1}/tests/test_tool_registry.py +0 -0
  70. {deepparallel-0.4.3 → deepparallel-0.5.1}/tests/test_tools_codeast.py +0 -0
  71. {deepparallel-0.4.3 → deepparallel-0.5.1}/tests/test_tools_edit.py +0 -0
  72. {deepparallel-0.4.3 → deepparallel-0.5.1}/tests/test_tools_files.py +0 -0
  73. {deepparallel-0.4.3 → deepparallel-0.5.1}/tests/test_tools_sandbox.py +0 -0
  74. {deepparallel-0.4.3 → deepparallel-0.5.1}/tests/test_tools_search.py +0 -0
  75. {deepparallel-0.4.3 → deepparallel-0.5.1}/tests/test_tools_shell.py +0 -0
  76. {deepparallel-0.4.3 → deepparallel-0.5.1}/tests/test_tools_vision.py +0 -0
  77. {deepparallel-0.4.3 → deepparallel-0.5.1}/tests/test_tools_web.py +0 -0
  78. {deepparallel-0.4.3 → deepparallel-0.5.1}/tests/test_userinput.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: deepparallel
3
- Version: 0.4.3
3
+ Version: 0.5.1
4
4
  Summary: DeepParallel - a multi-model agentic coding CLI with cross-model Guardian review, served via Crowe Logic.
5
5
  Author-email: Michael Crowe <michael@crowelogic.com>
6
6
  License: Apache-2.0
@@ -1,3 +1,3 @@
1
1
  """DeepParallel CLI package."""
2
2
 
3
- __version__ = "0.4.3"
3
+ __version__ = "0.5.1"
@@ -0,0 +1,645 @@
1
+ """HTTP transports for DeepParallel.
2
+
3
+ Two interchangeable backends behind one streaming seam:
4
+ - AzureBackend: direct Azure OpenAI chat-completions deployment.
5
+ - FoundryBackend: Crowe Logic Foundry control plane (/v1/chat/completions).
6
+
7
+ stream_chat yields (channel, text) tuples where channel is "content" or
8
+ "thinking", so reasoning visibility stays a rendering decision in cli.py.
9
+ """
10
+
11
+ from __future__ import annotations
12
+
13
+ import json
14
+ import os
15
+ import sys
16
+ from typing import Iterator, Protocol
17
+ from urllib.parse import urlparse
18
+
19
+ import httpx
20
+
21
+ from . import crowe_id
22
+
23
+ Chunk = tuple[str, str] # (channel, text)
24
+
25
+ _STREAM_TIMEOUT = httpx.Timeout(120.0, connect=10.0)
26
+ # Modal scale-to-zero cold start can take 2-3 min before the first byte, so the
27
+ # read timeout must be generous; connect stays short.
28
+ _MODAL_TIMEOUT = httpx.Timeout(600.0, connect=15.0)
29
+ _CHECK_TIMEOUT = 4.0
30
+
31
+
32
+ def parse_sse_lines(lines: Iterator[str]) -> Iterator[Chunk]:
33
+ """Parse OpenAI-style SSE lines into (channel, text) chunks."""
34
+ for raw in lines:
35
+ line = raw.strip()
36
+ if not line or not line.startswith("data:"):
37
+ continue
38
+ data = line[len("data:") :].strip()
39
+ if data == "[DONE]":
40
+ return
41
+ try:
42
+ obj = json.loads(data)
43
+ except json.JSONDecodeError:
44
+ continue
45
+ choices = obj.get("choices") or []
46
+ if not choices:
47
+ continue
48
+ delta = choices[0].get("delta") or {}
49
+ reasoning = delta.get("reasoning_content")
50
+ if reasoning:
51
+ yield ("thinking", reasoning)
52
+ content = delta.get("content")
53
+ if content:
54
+ yield ("content", content)
55
+
56
+
57
+ def parse_sse_stream(lines: Iterator[str]):
58
+ """Parse a streaming chat completion that may include tool calls.
59
+
60
+ Yields ("content"|"thinking", text) deltas for live rendering, and returns
61
+ (via StopIteration.value) the fully assembled assistant message with any
62
+ tool_calls accumulated across chunks.
63
+ """
64
+ content_parts: list[str] = []
65
+ acc: dict[int, dict] = {}
66
+ finish_reason: str | None = None
67
+ for raw in lines:
68
+ line = raw.strip()
69
+ if not line or not line.startswith("data:"):
70
+ continue
71
+ data = line[len("data:") :].strip()
72
+ if data == "[DONE]":
73
+ break
74
+ try:
75
+ obj = json.loads(data)
76
+ except json.JSONDecodeError:
77
+ continue
78
+ choices = obj.get("choices") or []
79
+ if not choices:
80
+ continue
81
+ if choices[0].get("finish_reason"):
82
+ finish_reason = choices[0]["finish_reason"]
83
+ delta = choices[0].get("delta") or {}
84
+ reasoning = delta.get("reasoning_content")
85
+ if reasoning:
86
+ yield ("thinking", reasoning)
87
+ content = delta.get("content")
88
+ if content:
89
+ content_parts.append(content)
90
+ yield ("content", content)
91
+ for tc in delta.get("tool_calls") or []:
92
+ idx = tc.get("index", 0)
93
+ slot = acc.setdefault(
94
+ idx, {"id": "", "type": "function", "function": {"name": "", "arguments": ""}}
95
+ )
96
+ if tc.get("id"):
97
+ slot["id"] = tc["id"]
98
+ fn = tc.get("function") or {}
99
+ if fn.get("name"):
100
+ slot["function"]["name"] = fn["name"]
101
+ if fn.get("arguments"):
102
+ slot["function"]["arguments"] += fn["arguments"]
103
+ tool_calls = [acc[i] for i in sorted(acc)] or None
104
+ return {
105
+ "role": "assistant",
106
+ "content": "".join(content_parts) or None,
107
+ "tool_calls": tool_calls,
108
+ "_truncated": finish_reason == "length",
109
+ }
110
+
111
+
112
+ def _host(url: str) -> str:
113
+ p = urlparse(url)
114
+ return f"{p.scheme}://{p.netloc}" if p.netloc else url
115
+
116
+
117
+ def _message_from_choice(choice: dict) -> dict:
118
+ """Extract the assistant message and flag output-token truncation.
119
+
120
+ `finish_reason == "length"` means the model was cut off mid-output. For a
121
+ tool call that carries file content, that means the arguments (and any file
122
+ body inside them) are incomplete and must not be applied blindly.
123
+ """
124
+ msg = dict(choice.get("message") or {})
125
+ msg["_truncated"] = choice.get("finish_reason") == "length"
126
+ return msg
127
+
128
+
129
+ class Backend(Protocol):
130
+ label: str
131
+
132
+ def check(self) -> tuple[bool, str]: ...
133
+
134
+ def stream_chat(
135
+ self, messages: list[dict], temperature: float, max_tokens: int
136
+ ) -> Iterator[Chunk]: ...
137
+
138
+ def chat(
139
+ self, messages: list[dict], tools: list[dict], temperature: float, max_tokens: int
140
+ ) -> dict: ...
141
+
142
+ def stream_chat_tools(
143
+ self, messages: list[dict], tools: list[dict], temperature: float, max_tokens: int
144
+ ) -> Iterator[Chunk]: ...
145
+
146
+
147
+ def _should_failover(exc: Exception) -> bool:
148
+ """Fail over to direct Azure only on transport errors or upstream 5xx;
149
+ a 4xx means the request itself is bad, so retrying elsewhere is pointless."""
150
+ if isinstance(exc, httpx.TransportError):
151
+ return True
152
+ if isinstance(exc, httpx.HTTPStatusError):
153
+ return exc.response.status_code >= 500
154
+ return False
155
+
156
+
157
+ def _log_failover(label: str, exc: Exception) -> None:
158
+ sys.stderr.write(
159
+ f"[deepparallel] {label}: primary endpoint failed "
160
+ f"({exc.__class__.__name__}); failing over to direct Azure\n"
161
+ )
162
+ sys.stderr.flush()
163
+
164
+
165
+ class AzureBackend:
166
+ label = "Azure OpenAI"
167
+
168
+ def __init__(self, endpoint: str, api_key: str, deployment: str, api_version: str):
169
+ self._endpoint = (endpoint or "").rstrip("/")
170
+ self._api_key = api_key or ""
171
+ self._deployment = deployment
172
+ self._api_version = api_version
173
+
174
+ def _build_url(self, endpoint: str) -> str:
175
+ # Cloudflare AI Gateway azure-openai routes carry the resource in the
176
+ # path and drop the native "/openai/deployments" segment; native Azure
177
+ # endpoints keep it.
178
+ endpoint = endpoint.rstrip("/")
179
+ if "/azure-openai/" in endpoint:
180
+ return (
181
+ f"{endpoint}/{self._deployment}"
182
+ f"/chat/completions?api-version={self._api_version}"
183
+ )
184
+ return (
185
+ f"{endpoint}/openai/deployments/{self._deployment}"
186
+ f"/chat/completions?api-version={self._api_version}"
187
+ )
188
+
189
+ def _endpoints(self) -> list[str]:
190
+ # Primary is whatever is configured (typically the Cloudflare AI Gateway
191
+ # route). When that primary is a gateway route, derive the direct Azure
192
+ # endpoint from its resource segment and append it as automatic failover:
193
+ # fail-open, so a gateway outage degrades to direct Azure instead of
194
+ # taking down every CroweLM request. The failover request is not logged
195
+ # by the gateway -- the acceptable cost of staying available.
196
+ eps = [self._endpoint]
197
+ marker = "/azure-openai/"
198
+ if marker in self._endpoint:
199
+ resource = self._endpoint.split(marker, 1)[1].split("/", 1)[0]
200
+ if resource:
201
+ eps.append(f"https://{resource}.cognitiveservices.azure.com")
202
+ return eps
203
+
204
+ @property
205
+ def _url(self) -> str:
206
+ return self._build_url(self._endpoint)
207
+
208
+ @property
209
+ def _headers(self) -> dict:
210
+ # cf-aig-* headers configure AI Gateway per-request (no management API
211
+ # needed). Cache TTL is operationally tunable via DEEPPARALLEL_CACHE_TTL
212
+ # (seconds; "0" or empty disables caching for this agentic workload).
213
+ # Ignored by direct Azure on the failover path.
214
+ headers = {
215
+ "api-key": self._api_key,
216
+ "content-type": "application/json",
217
+ }
218
+ ttl = os.getenv("DEEPPARALLEL_CACHE_TTL", "300").strip()
219
+ if ttl and ttl != "0":
220
+ headers["cf-aig-cache-ttl"] = ttl
221
+ headers["cf-aig-metadata"] = '{"via":"deepparallel-gateway"}'
222
+ return headers
223
+
224
+ def check(self) -> tuple[bool, str]:
225
+ if not self._endpoint or not self._api_key:
226
+ return False, "Azure endpoint or API key not configured."
227
+ try:
228
+ httpx.get(_host(self._endpoint), timeout=_CHECK_TIMEOUT)
229
+ except Exception as e: # noqa: BLE001 - reachability probe
230
+ return False, f"Azure endpoint unreachable ({e.__class__.__name__})"
231
+ return True, f"Azure @ {_host(self._endpoint)}"
232
+
233
+ def _payload(self, messages, stream, temperature, max_tokens) -> dict:
234
+ # GPT-5 family deployments require `max_completion_tokens` and reject a
235
+ # custom `temperature` (only the default is accepted) -> they 400 on the
236
+ # legacy `max_tokens`/`temperature` shape. Everything else uses the
237
+ # classic params.
238
+ payload = {"messages": messages, "stream": stream}
239
+ dep = self._deployment.lower()
240
+ if dep.startswith("gpt-5") or dep.startswith("gpt-chat"):
241
+ # GPT-5 family + gpt-chat-latest require max_completion_tokens and
242
+ # reject a custom temperature (only the default is accepted).
243
+ payload["max_completion_tokens"] = max_tokens
244
+ else:
245
+ payload["temperature"] = temperature
246
+ payload["max_tokens"] = max_tokens
247
+ return payload
248
+
249
+ def stream_chat(self, messages, temperature, max_tokens):
250
+ payload = self._payload(messages, True, temperature, max_tokens)
251
+ urls = [self._build_url(e) for e in self._endpoints()]
252
+ for i, url in enumerate(urls):
253
+ last = i == len(urls) - 1
254
+ started = False
255
+ try:
256
+ with httpx.stream(
257
+ "POST", url, json=payload, headers=self._headers, timeout=_STREAM_TIMEOUT
258
+ ) as r:
259
+ r.raise_for_status()
260
+ for chunk in parse_sse_lines(r.iter_lines()):
261
+ started = True
262
+ yield chunk
263
+ return
264
+ except (httpx.TransportError, httpx.HTTPStatusError) as e:
265
+ if last or started or not _should_failover(e):
266
+ raise
267
+ _log_failover(self.label, e)
268
+ continue
269
+
270
+ def chat(self, messages, tools, temperature, max_tokens) -> dict:
271
+ payload = self._payload(messages, False, temperature, max_tokens)
272
+ if tools:
273
+ payload["tools"] = tools
274
+ urls = [self._build_url(e) for e in self._endpoints()]
275
+ for i, url in enumerate(urls):
276
+ last = i == len(urls) - 1
277
+ try:
278
+ r = httpx.post(url, json=payload, headers=self._headers, timeout=_STREAM_TIMEOUT)
279
+ r.raise_for_status()
280
+ return _message_from_choice(r.json()["choices"][0])
281
+ except (httpx.TransportError, httpx.HTTPStatusError) as e:
282
+ if last or not _should_failover(e):
283
+ raise
284
+ _log_failover(self.label, e)
285
+ continue
286
+
287
+ def stream_chat_tools(self, messages, tools, temperature, max_tokens):
288
+ payload = self._payload(messages, True, temperature, max_tokens)
289
+ if tools:
290
+ payload["tools"] = tools
291
+ urls = [self._build_url(e) for e in self._endpoints()]
292
+ for i, url in enumerate(urls):
293
+ last = i == len(urls) - 1
294
+ started = False
295
+ try:
296
+ with httpx.stream(
297
+ "POST", url, json=payload, headers=self._headers, timeout=_STREAM_TIMEOUT
298
+ ) as r:
299
+ r.raise_for_status()
300
+ gen = parse_sse_stream(r.iter_lines())
301
+ while True:
302
+ try:
303
+ chunk = next(gen)
304
+ except StopIteration as stop:
305
+ return stop.value
306
+ started = True
307
+ yield chunk
308
+ except (httpx.TransportError, httpx.HTTPStatusError) as e:
309
+ if last or started or not _should_failover(e):
310
+ raise
311
+ _log_failover(self.label, e)
312
+ continue
313
+
314
+
315
+ class FoundryBackend:
316
+ label = "Foundry control plane"
317
+
318
+ def __init__(self, base_url: str, api_key: str, model: str, token_provider=None):
319
+ self._base_url = (base_url or "").rstrip("/")
320
+ self._api_key = api_key or ""
321
+ self._model = model
322
+ # Optional callable returning a fresh bearer (e.g. a Crowe ID
323
+ # client_credentials token). When set it takes precedence over the static
324
+ # api_key, so the gateway sees a sovereign agent identity per request.
325
+ self._token_provider = token_provider
326
+
327
+ @property
328
+ def _url(self) -> str:
329
+ return f"{self._base_url}/v1/chat/completions"
330
+
331
+ def _bearer(self) -> str:
332
+ return self._token_provider() if self._token_provider else self._api_key
333
+
334
+ def check(self) -> tuple[bool, str]:
335
+ if not self._base_url or not (self._api_key or self._token_provider):
336
+ return False, "Foundry base URL or API key not configured."
337
+ try:
338
+ httpx.get(_host(self._base_url), timeout=_CHECK_TIMEOUT)
339
+ except Exception as e: # noqa: BLE001 - reachability probe
340
+ return False, f"Foundry endpoint unreachable ({e.__class__.__name__})"
341
+ return True, f"Foundry @ {_host(self._base_url)}"
342
+
343
+ def stream_chat(self, messages, temperature, max_tokens):
344
+ payload = {
345
+ "model": self._model,
346
+ "messages": messages,
347
+ "stream": True,
348
+ "temperature": temperature,
349
+ "max_tokens": max_tokens,
350
+ }
351
+ headers = {
352
+ "authorization": f"Bearer {self._bearer()}",
353
+ "content-type": "application/json",
354
+ }
355
+ with httpx.stream(
356
+ "POST", self._url, json=payload, headers=headers, timeout=_STREAM_TIMEOUT
357
+ ) as r:
358
+ r.raise_for_status()
359
+ yield from parse_sse_lines(r.iter_lines())
360
+
361
+ def chat(self, messages, tools, temperature, max_tokens) -> dict:
362
+ payload = {
363
+ "model": self._model,
364
+ "messages": messages,
365
+ "stream": False,
366
+ "temperature": temperature,
367
+ "max_tokens": max_tokens,
368
+ }
369
+ if tools:
370
+ payload["tools"] = tools
371
+ headers = {
372
+ "authorization": f"Bearer {self._bearer()}",
373
+ "content-type": "application/json",
374
+ }
375
+ r = httpx.post(self._url, json=payload, headers=headers, timeout=_STREAM_TIMEOUT)
376
+ r.raise_for_status()
377
+ return _message_from_choice(r.json()["choices"][0])
378
+
379
+ def stream_chat_tools(self, messages, tools, temperature, max_tokens):
380
+ payload = {
381
+ "model": self._model,
382
+ "messages": messages,
383
+ "stream": True,
384
+ "temperature": temperature,
385
+ "max_tokens": max_tokens,
386
+ }
387
+ if tools:
388
+ payload["tools"] = tools
389
+ headers = {
390
+ "authorization": f"Bearer {self._bearer()}",
391
+ "content-type": "application/json",
392
+ }
393
+ with httpx.stream(
394
+ "POST", self._url, json=payload, headers=headers, timeout=_STREAM_TIMEOUT
395
+ ) as r:
396
+ r.raise_for_status()
397
+ return (yield from parse_sse_stream(r.iter_lines()))
398
+
399
+
400
+ class PaymentRequired(Exception):
401
+ """The agent's wallet can't cover the call — the x402 rail returned HTTP 402.
402
+
403
+ Carries the parsed x402 envelope so callers can see the price + accepted schemes
404
+ and decide how to fund (top-up) or pay (X-PAYMENT)."""
405
+
406
+ def __init__(self, envelope: dict):
407
+ self.envelope = envelope or {}
408
+ accepts = self.envelope.get("accepts", [])
409
+ price = accepts[0].get("maxAmountRequired", "?") if accepts else "?"
410
+ schemes = ", ".join(a.get("scheme", "") for a in accepts) or "?"
411
+ super().__init__(
412
+ f"payment required: {price} micro-USD via [{schemes}] — fund the agent wallet"
413
+ )
414
+
415
+
416
+ class CroweGatewayBackend:
417
+ """Foundry gateway PAID agent rail (/api/agent/v1/chat by default), Crowe ID auth.
418
+
419
+ Targets the x402 agent endpoint that debits the agent's wallet per call (override
420
+ with CROWE_AGENT_RESOURCE). Native GatewayResponse shape, not the OpenAI-compat
421
+ /v1 path (which 404s there); non-streaming with no server-side tool-calls, so we
422
+ adapt it to DeepParallel's streaming seam by yielding the full completion as a
423
+ single content chunk. The bearer is a Crowe ID client_credentials token from
424
+ ``token_provider`` (the agent's sovereign identity).
425
+ """
426
+
427
+ label = "Crowe ID agent (Foundry gateway)"
428
+
429
+ def __init__(self, base_url: str, model: str, token_provider=None):
430
+ self._base_url = (base_url or "").rstrip("/")
431
+ self._model = model
432
+ self._token_provider = token_provider
433
+
434
+ @property
435
+ def _url(self) -> str:
436
+ # The PAID x402 agent rail (debits the agent's wallet). Overridable via
437
+ # CROWE_AGENT_RESOURCE for the legacy non-paid /api/gateway/chat path.
438
+ import os
439
+
440
+ resource = os.environ.get("CROWE_AGENT_RESOURCE", "/api/agent/v1/chat")
441
+ return f"{self._base_url}{resource}"
442
+
443
+ def _bearer(self) -> str:
444
+ return self._token_provider() if self._token_provider else ""
445
+
446
+ def _headers(self) -> dict:
447
+ return {
448
+ "authorization": f"Bearer {self._bearer()}",
449
+ "content-type": "application/json",
450
+ }
451
+
452
+ def check(self) -> tuple[bool, str]:
453
+ if not self._base_url or not self._token_provider:
454
+ return False, "Crowe gateway URL or Crowe ID credentials not configured."
455
+ try:
456
+ httpx.get(_host(self._base_url), timeout=_CHECK_TIMEOUT)
457
+ except Exception as e: # noqa: BLE001 - reachability probe
458
+ return False, f"Crowe gateway unreachable ({e.__class__.__name__})"
459
+ return True, f"Crowe ID @ {_host(self._base_url)}"
460
+
461
+ def _complete(self, messages, temperature, max_tokens) -> str:
462
+ payload = {
463
+ "model": self._model,
464
+ "messages": messages,
465
+ "temperature": temperature,
466
+ "max_tokens": max_tokens,
467
+ }
468
+ r = httpx.post(self._url, json=payload, headers=self._headers(), timeout=_STREAM_TIMEOUT)
469
+ if r.status_code == 402:
470
+ # x402 payment-required: surface the machine-readable envelope as an
471
+ # actionable error (price + schemes) rather than a raw HTTP error.
472
+ try:
473
+ envelope = r.json()
474
+ except Exception: # noqa: BLE001 - tolerate a non-JSON 402 body
475
+ envelope = {}
476
+ raise PaymentRequired(envelope)
477
+ r.raise_for_status()
478
+ return r.json().get("content", "")
479
+
480
+ def stream_chat(self, messages, temperature, max_tokens):
481
+ yield ("content", self._complete(messages, temperature, max_tokens))
482
+
483
+ def chat(self, messages, tools, temperature, max_tokens) -> dict:
484
+ # Native gateway endpoint has no tool-calling; tools are ignored.
485
+ return {"role": "assistant", "content": self._complete(messages, temperature, max_tokens)}
486
+
487
+ def stream_chat_tools(self, messages, tools, temperature, max_tokens):
488
+ # No server-side tool-calls; yields content and returns the final message
489
+ # (matches the FoundryBackend generator-return contract used by the agent loop).
490
+ content = self._complete(messages, temperature, max_tokens)
491
+ yield ("content", content)
492
+ return {"role": "assistant", "content": content}
493
+
494
+
495
+ class ModalBackend:
496
+ """Gemma 4 Mycelium served on a Modal scale-to-zero GPU (the free base tier).
497
+
498
+ OpenAI-compatible /v1/chat/completions, but the Modal web endpoint requires
499
+ proxy-auth headers (Modal-Key / Modal-Secret) on every request — which is why
500
+ the gateway, not OWUI, must own this connection."""
501
+
502
+ label = "Modal (Mycelium)"
503
+
504
+ def __init__(self, endpoint: str, key: str, secret: str, model: str):
505
+ self._base_url = (endpoint or "").rstrip("/")
506
+ self._key = key or ""
507
+ self._secret = secret or ""
508
+ self._model = model
509
+
510
+ @property
511
+ def _url(self) -> str:
512
+ return f"{self._base_url}/v1/chat/completions"
513
+
514
+ def _headers(self) -> dict:
515
+ return {
516
+ "Modal-Key": self._key,
517
+ "Modal-Secret": self._secret,
518
+ "content-type": "application/json",
519
+ }
520
+
521
+ def check(self) -> tuple[bool, str]:
522
+ if not self._base_url or not self._key or not self._secret:
523
+ return False, "Modal Mycelium endpoint or proxy-auth token not configured."
524
+ try:
525
+ httpx.get(_host(self._base_url), timeout=_CHECK_TIMEOUT)
526
+ except Exception as e: # noqa: BLE001 - reachability probe
527
+ return False, f"Modal endpoint unreachable ({e.__class__.__name__})"
528
+ return True, f"Modal @ {_host(self._base_url)}"
529
+
530
+ def stream_chat(self, messages, temperature, max_tokens):
531
+ payload = {
532
+ "model": self._model,
533
+ "messages": messages,
534
+ "stream": True,
535
+ "temperature": temperature,
536
+ "max_tokens": max_tokens,
537
+ }
538
+ with httpx.stream(
539
+ "POST", self._url, json=payload, headers=self._headers(), timeout=_MODAL_TIMEOUT
540
+ ) as r:
541
+ r.raise_for_status()
542
+ yield from parse_sse_lines(r.iter_lines())
543
+
544
+ def chat(self, messages, tools, temperature, max_tokens) -> dict:
545
+ payload = {
546
+ "model": self._model,
547
+ "messages": messages,
548
+ "stream": False,
549
+ "temperature": temperature,
550
+ "max_tokens": max_tokens,
551
+ }
552
+ if tools:
553
+ payload["tools"] = tools
554
+ r = httpx.post(self._url, json=payload, headers=self._headers(), timeout=_MODAL_TIMEOUT)
555
+ r.raise_for_status()
556
+ return _message_from_choice(r.json()["choices"][0])
557
+
558
+ def stream_chat_tools(self, messages, tools, temperature, max_tokens):
559
+ payload = {
560
+ "model": self._model,
561
+ "messages": messages,
562
+ "stream": True,
563
+ "temperature": temperature,
564
+ "max_tokens": max_tokens,
565
+ }
566
+ if tools:
567
+ payload["tools"] = tools
568
+ with httpx.stream(
569
+ "POST", self._url, json=payload, headers=self._headers(), timeout=_MODAL_TIMEOUT
570
+ ) as r:
571
+ r.raise_for_status()
572
+ return (yield from parse_sse_stream(r.iter_lines()))
573
+
574
+
575
+ _crowe_providers: dict[tuple, crowe_id.CroweIDTokenProvider] = {}
576
+
577
+
578
+ def _crowe_token_provider(settings) -> crowe_id.CroweIDTokenProvider:
579
+ """One memoized token provider per (issuer, client_id) so fusion's many
580
+ per-deployment backends share a single cached Crowe ID token."""
581
+ key = (settings.crowe_id_issuer, settings.crowe_id_client_id)
582
+ provider = _crowe_providers.get(key)
583
+ if provider is None:
584
+ provider = crowe_id.CroweIDTokenProvider(
585
+ settings.crowe_id_issuer,
586
+ settings.crowe_id_client_id or "",
587
+ settings.crowe_id_client_secret or "",
588
+ audience=settings.crowe_id_audience,
589
+ )
590
+ _crowe_providers[key] = provider
591
+ return provider
592
+
593
+
594
+ def resolve_backend(settings) -> Backend:
595
+ """Factory keyed on settings.backend."""
596
+ if settings.backend == "crowe":
597
+ return CroweGatewayBackend(
598
+ settings.gateway_url or "",
599
+ settings.foundry_model,
600
+ token_provider=_crowe_token_provider(settings),
601
+ )
602
+ if settings.backend == "foundry":
603
+ return FoundryBackend(
604
+ settings.foundry_base_url or "",
605
+ settings.foundry_api_key or "",
606
+ settings.foundry_model,
607
+ )
608
+ return AzureBackend(
609
+ settings.azure_endpoint or "",
610
+ settings.azure_api_key or "",
611
+ settings.deployment,
612
+ settings.api_version,
613
+ )
614
+
615
+
616
+ def backend_for_deployment(settings, deployment: str) -> Backend:
617
+ """Build a backend targeting a specific deployment/model (for fusion).
618
+
619
+ Uses the same transport as the active backend, just a different model id.
620
+ The Modal-served Mycelium model is the exception: it routes to its own
621
+ endpoint with proxy-auth headers, regardless of the active backend.
622
+ """
623
+ if settings.mycelium_endpoint and deployment == settings.mycelium_model:
624
+ return ModalBackend(
625
+ settings.mycelium_endpoint,
626
+ settings.mycelium_key or "",
627
+ settings.mycelium_secret or "",
628
+ deployment,
629
+ )
630
+ if settings.backend == "crowe":
631
+ return CroweGatewayBackend(
632
+ settings.gateway_url or "",
633
+ deployment,
634
+ token_provider=_crowe_token_provider(settings),
635
+ )
636
+ if settings.backend == "foundry":
637
+ return FoundryBackend(
638
+ settings.foundry_base_url or "", settings.foundry_api_key or "", deployment
639
+ )
640
+ return AzureBackend(
641
+ settings.azure_endpoint or "",
642
+ settings.azure_api_key or "",
643
+ deployment,
644
+ settings.api_version,
645
+ )