PyPI - deepparallel - Versions diffs - 0.4.3__tar.gz → 0.5.1__tar.gz - Mend

deepparallel 0.4.3tar.gz → 0.5.1tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (78) hide show

{deepparallel-0.4.3 → deepparallel-0.5.1}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: deepparallel
-Version: 0.4.3
+Version: 0.5.1
 Summary: DeepParallel - a multi-model agentic coding CLI with cross-model Guardian review, served via Crowe Logic.
 Author-email: Michael Crowe <michael@crowelogic.com>
 License: Apache-2.0

{deepparallel-0.4.3 → deepparallel-0.5.1}/deepparallel/__init__.py RENAMED Viewed

@@ -1,3 +1,3 @@
 """DeepParallel CLI package."""
-__version__ = "0.4.3"
+__version__ = "0.5.1"

deepparallel-0.5.1/deepparallel/backend.py ADDED Viewed

@@ -0,0 +1,645 @@
+"""HTTP transports for DeepParallel.
+Two interchangeable backends behind one streaming seam:
+- AzureBackend: direct Azure OpenAI chat-completions deployment.
+- FoundryBackend: Crowe Logic Foundry control plane (/v1/chat/completions).
+stream_chat yields (channel, text) tuples where channel is "content" or
+"thinking", so reasoning visibility stays a rendering decision in cli.py.
+"""
+from __future__ import annotations
+import json
+import os
+import sys
+from typing import Iterator, Protocol
+from urllib.parse import urlparse
+import httpx
+from . import crowe_id
+Chunk = tuple[str, str]  # (channel, text)
+_STREAM_TIMEOUT = httpx.Timeout(120.0, connect=10.0)
+# Modal scale-to-zero cold start can take 2-3 min before the first byte, so the
+# read timeout must be generous; connect stays short.
+_MODAL_TIMEOUT = httpx.Timeout(600.0, connect=15.0)
+_CHECK_TIMEOUT = 4.0
+def parse_sse_lines(lines: Iterator[str]) -> Iterator[Chunk]:
+    """Parse OpenAI-style SSE lines into (channel, text) chunks."""
+    for raw in lines:
+        line = raw.strip()
+        if not line or not line.startswith("data:"):
+            continue
+        data = line[len("data:") :].strip()
+        if data == "[DONE]":
+            return
+        try:
+            obj = json.loads(data)
+        except json.JSONDecodeError:
+            continue
+        choices = obj.get("choices") or []
+        if not choices:
+            continue
+        delta = choices[0].get("delta") or {}
+        reasoning = delta.get("reasoning_content")
+        if reasoning:
+            yield ("thinking", reasoning)
+        content = delta.get("content")
+        if content:
+            yield ("content", content)
+def parse_sse_stream(lines: Iterator[str]):
+    """Parse a streaming chat completion that may include tool calls.
+    Yields ("content"|"thinking", text) deltas for live rendering, and returns
+    (via StopIteration.value) the fully assembled assistant message with any
+    tool_calls accumulated across chunks.
+    """
+    content_parts: list[str] = []
+    acc: dict[int, dict] = {}
+    finish_reason: str | None = None
+    for raw in lines:
+        line = raw.strip()
+        if not line or not line.startswith("data:"):
+            continue
+        data = line[len("data:") :].strip()
+        if data == "[DONE]":
+            break
+        try:
+            obj = json.loads(data)
+        except json.JSONDecodeError:
+            continue
+        choices = obj.get("choices") or []
+        if not choices:
+            continue
+        if choices[0].get("finish_reason"):
+            finish_reason = choices[0]["finish_reason"]
+        delta = choices[0].get("delta") or {}
+        reasoning = delta.get("reasoning_content")
+        if reasoning:
+            yield ("thinking", reasoning)
+        content = delta.get("content")
+        if content:
+            content_parts.append(content)
+            yield ("content", content)
+        for tc in delta.get("tool_calls") or []:
+            idx = tc.get("index", 0)
+            slot = acc.setdefault(
+                idx, {"id": "", "type": "function", "function": {"name": "", "arguments": ""}}
+            )
+            if tc.get("id"):
+                slot["id"] = tc["id"]
+            fn = tc.get("function") or {}
+            if fn.get("name"):
+                slot["function"]["name"] = fn["name"]
+            if fn.get("arguments"):
+                slot["function"]["arguments"] += fn["arguments"]
+    tool_calls = [acc[i] for i in sorted(acc)] or None
+    return {
+        "role": "assistant",
+        "content": "".join(content_parts) or None,
+        "tool_calls": tool_calls,
+        "_truncated": finish_reason == "length",
+    }
+def _host(url: str) -> str:
+    p = urlparse(url)
+    return f"{p.scheme}://{p.netloc}" if p.netloc else url
+def _message_from_choice(choice: dict) -> dict:
+    """Extract the assistant message and flag output-token truncation.
+    `finish_reason == "length"` means the model was cut off mid-output. For a
+    tool call that carries file content, that means the arguments (and any file
+    body inside them) are incomplete and must not be applied blindly.
+    """
+    msg = dict(choice.get("message") or {})
+    msg["_truncated"] = choice.get("finish_reason") == "length"
+    return msg
+class Backend(Protocol):
+    label: str
+    def check(self) -> tuple[bool, str]: ...
+    def stream_chat(
+        self, messages: list[dict], temperature: float, max_tokens: int
+    ) -> Iterator[Chunk]: ...
+    def chat(
+        self, messages: list[dict], tools: list[dict], temperature: float, max_tokens: int
+    ) -> dict: ...
+    def stream_chat_tools(
+        self, messages: list[dict], tools: list[dict], temperature: float, max_tokens: int
+    ) -> Iterator[Chunk]: ...
+def _should_failover(exc: Exception) -> bool:
+    """Fail over to direct Azure only on transport errors or upstream 5xx;
+    a 4xx means the request itself is bad, so retrying elsewhere is pointless."""
+    if isinstance(exc, httpx.TransportError):
+        return True
+    if isinstance(exc, httpx.HTTPStatusError):
+        return exc.response.status_code >= 500
+    return False
+def _log_failover(label: str, exc: Exception) -> None:
+    sys.stderr.write(
+        f"[deepparallel] {label}: primary endpoint failed "
+        f"({exc.__class__.__name__}); failing over to direct Azure\n"
+    )
+    sys.stderr.flush()
+class AzureBackend:
+    label = "Azure OpenAI"
+    def __init__(self, endpoint: str, api_key: str, deployment: str, api_version: str):
+        self._endpoint = (endpoint or "").rstrip("/")
+        self._api_key = api_key or ""
+        self._deployment = deployment
+        self._api_version = api_version
+    def _build_url(self, endpoint: str) -> str:
+        # Cloudflare AI Gateway azure-openai routes carry the resource in the
+        # path and drop the native "/openai/deployments" segment; native Azure
+        # endpoints keep it.
+        endpoint = endpoint.rstrip("/")
+        if "/azure-openai/" in endpoint:
+            return (
+                f"{endpoint}/{self._deployment}"
+                f"/chat/completions?api-version={self._api_version}"
+            )
+        return (
+            f"{endpoint}/openai/deployments/{self._deployment}"
+            f"/chat/completions?api-version={self._api_version}"
+        )
+    def _endpoints(self) -> list[str]:
+        # Primary is whatever is configured (typically the Cloudflare AI Gateway
+        # route). When that primary is a gateway route, derive the direct Azure
+        # endpoint from its resource segment and append it as automatic failover:
+        # fail-open, so a gateway outage degrades to direct Azure instead of
+        # taking down every CroweLM request. The failover request is not logged
+        # by the gateway -- the acceptable cost of staying available.
+        eps = [self._endpoint]
+        marker = "/azure-openai/"
+        if marker in self._endpoint:
+            resource = self._endpoint.split(marker, 1)[1].split("/", 1)[0]
+            if resource:
+                eps.append(f"https://{resource}.cognitiveservices.azure.com")
+        return eps
+    @property
+    def _url(self) -> str:
+        return self._build_url(self._endpoint)
+    @property
+    def _headers(self) -> dict:
+        # cf-aig-* headers configure AI Gateway per-request (no management API
+        # needed). Cache TTL is operationally tunable via DEEPPARALLEL_CACHE_TTL
+        # (seconds; "0" or empty disables caching for this agentic workload).
+        # Ignored by direct Azure on the failover path.
+        headers = {
+            "api-key": self._api_key,
+            "content-type": "application/json",
+        }
+        ttl = os.getenv("DEEPPARALLEL_CACHE_TTL", "300").strip()
+        if ttl and ttl != "0":
+            headers["cf-aig-cache-ttl"] = ttl
+            headers["cf-aig-metadata"] = '{"via":"deepparallel-gateway"}'
+        return headers
+    def check(self) -> tuple[bool, str]:
+        if not self._endpoint or not self._api_key:
+            return False, "Azure endpoint or API key not configured."
+        try:
+            httpx.get(_host(self._endpoint), timeout=_CHECK_TIMEOUT)
+        except Exception as e:  # noqa: BLE001 - reachability probe
+            return False, f"Azure endpoint unreachable ({e.__class__.__name__})"
+        return True, f"Azure @ {_host(self._endpoint)}"
+    def _payload(self, messages, stream, temperature, max_tokens) -> dict:
+        # GPT-5 family deployments require `max_completion_tokens` and reject a
+        # custom `temperature` (only the default is accepted) -> they 400 on the
+        # legacy `max_tokens`/`temperature` shape. Everything else uses the
+        # classic params.
+        payload = {"messages": messages, "stream": stream}
+        dep = self._deployment.lower()
+        if dep.startswith("gpt-5") or dep.startswith("gpt-chat"):
+            # GPT-5 family + gpt-chat-latest require max_completion_tokens and
+            # reject a custom temperature (only the default is accepted).
+            payload["max_completion_tokens"] = max_tokens
+        else:
+            payload["temperature"] = temperature
+            payload["max_tokens"] = max_tokens
+        return payload
+    def stream_chat(self, messages, temperature, max_tokens):
+        payload = self._payload(messages, True, temperature, max_tokens)
+        urls = [self._build_url(e) for e in self._endpoints()]
+        for i, url in enumerate(urls):
+            last = i == len(urls) - 1
+            started = False
+            try:
+                with httpx.stream(
+                    "POST", url, json=payload, headers=self._headers, timeout=_STREAM_TIMEOUT
+                ) as r:
+                    r.raise_for_status()
+                    for chunk in parse_sse_lines(r.iter_lines()):
+                        started = True
+                        yield chunk
+                return
+            except (httpx.TransportError, httpx.HTTPStatusError) as e:
+                if last or started or not _should_failover(e):
+                    raise
+                _log_failover(self.label, e)
+                continue
+    def chat(self, messages, tools, temperature, max_tokens) -> dict:
+        payload = self._payload(messages, False, temperature, max_tokens)
+        if tools:
+            payload["tools"] = tools
+        urls = [self._build_url(e) for e in self._endpoints()]
+        for i, url in enumerate(urls):
+            last = i == len(urls) - 1
+            try:
+                r = httpx.post(url, json=payload, headers=self._headers, timeout=_STREAM_TIMEOUT)
+                r.raise_for_status()
+                return _message_from_choice(r.json()["choices"][0])
+            except (httpx.TransportError, httpx.HTTPStatusError) as e:
+                if last or not _should_failover(e):
+                    raise
+                _log_failover(self.label, e)
+                continue
+    def stream_chat_tools(self, messages, tools, temperature, max_tokens):
+        payload = self._payload(messages, True, temperature, max_tokens)
+        if tools:
+            payload["tools"] = tools
+        urls = [self._build_url(e) for e in self._endpoints()]
+        for i, url in enumerate(urls):
+            last = i == len(urls) - 1
+            started = False
+            try:
+                with httpx.stream(
+                    "POST", url, json=payload, headers=self._headers, timeout=_STREAM_TIMEOUT
+                ) as r:
+                    r.raise_for_status()
+                    gen = parse_sse_stream(r.iter_lines())
+                    while True:
+                        try:
+                            chunk = next(gen)
+                        except StopIteration as stop:
+                            return stop.value
+                        started = True
+                        yield chunk
+            except (httpx.TransportError, httpx.HTTPStatusError) as e:
+                if last or started or not _should_failover(e):
+                    raise
+                _log_failover(self.label, e)
+                continue
+class FoundryBackend:
+    label = "Foundry control plane"
+    def __init__(self, base_url: str, api_key: str, model: str, token_provider=None):
+        self._base_url = (base_url or "").rstrip("/")
+        self._api_key = api_key or ""
+        self._model = model
+        # Optional callable returning a fresh bearer (e.g. a Crowe ID
+        # client_credentials token). When set it takes precedence over the static
+        # api_key, so the gateway sees a sovereign agent identity per request.
+        self._token_provider = token_provider
+    @property
+    def _url(self) -> str:
+        return f"{self._base_url}/v1/chat/completions"
+    def _bearer(self) -> str:
+        return self._token_provider() if self._token_provider else self._api_key
+    def check(self) -> tuple[bool, str]:
+        if not self._base_url or not (self._api_key or self._token_provider):
+            return False, "Foundry base URL or API key not configured."
+        try:
+            httpx.get(_host(self._base_url), timeout=_CHECK_TIMEOUT)
+        except Exception as e:  # noqa: BLE001 - reachability probe
+            return False, f"Foundry endpoint unreachable ({e.__class__.__name__})"
+        return True, f"Foundry @ {_host(self._base_url)}"
+    def stream_chat(self, messages, temperature, max_tokens):
+        payload = {
+            "model": self._model,
+            "messages": messages,
+            "stream": True,
+            "temperature": temperature,
+            "max_tokens": max_tokens,
+        }
+        headers = {
+            "authorization": f"Bearer {self._bearer()}",
+            "content-type": "application/json",
+        }
+        with httpx.stream(
+            "POST", self._url, json=payload, headers=headers, timeout=_STREAM_TIMEOUT
+        ) as r:
+            r.raise_for_status()
+            yield from parse_sse_lines(r.iter_lines())
+    def chat(self, messages, tools, temperature, max_tokens) -> dict:
+        payload = {
+            "model": self._model,
+            "messages": messages,
+            "stream": False,
+            "temperature": temperature,
+            "max_tokens": max_tokens,
+        }
+        if tools:
+            payload["tools"] = tools
+        headers = {
+            "authorization": f"Bearer {self._bearer()}",
+            "content-type": "application/json",
+        }
+        r = httpx.post(self._url, json=payload, headers=headers, timeout=_STREAM_TIMEOUT)
+        r.raise_for_status()
+        return _message_from_choice(r.json()["choices"][0])
+    def stream_chat_tools(self, messages, tools, temperature, max_tokens):
+        payload = {
+            "model": self._model,
+            "messages": messages,
+            "stream": True,
+            "temperature": temperature,
+            "max_tokens": max_tokens,
+        }
+        if tools:
+            payload["tools"] = tools
+        headers = {
+            "authorization": f"Bearer {self._bearer()}",
+            "content-type": "application/json",
+        }
+        with httpx.stream(
+            "POST", self._url, json=payload, headers=headers, timeout=_STREAM_TIMEOUT
+        ) as r:
+            r.raise_for_status()
+            return (yield from parse_sse_stream(r.iter_lines()))
+class PaymentRequired(Exception):
+    """The agent's wallet can't cover the call — the x402 rail returned HTTP 402.
+    Carries the parsed x402 envelope so callers can see the price + accepted schemes
+    and decide how to fund (top-up) or pay (X-PAYMENT)."""
+    def __init__(self, envelope: dict):
+        self.envelope = envelope or {}
+        accepts = self.envelope.get("accepts", [])
+        price = accepts[0].get("maxAmountRequired", "?") if accepts else "?"
+        schemes = ", ".join(a.get("scheme", "") for a in accepts) or "?"
+        super().__init__(
+            f"payment required: {price} micro-USD via [{schemes}] — fund the agent wallet"
+        )
+class CroweGatewayBackend:
+    """Foundry gateway PAID agent rail (/api/agent/v1/chat by default), Crowe ID auth.
+    Targets the x402 agent endpoint that debits the agent's wallet per call (override
+    with CROWE_AGENT_RESOURCE). Native GatewayResponse shape, not the OpenAI-compat
+    /v1 path (which 404s there); non-streaming with no server-side tool-calls, so we
+    adapt it to DeepParallel's streaming seam by yielding the full completion as a
+    single content chunk. The bearer is a Crowe ID client_credentials token from
+    ``token_provider`` (the agent's sovereign identity).
+    """
+    label = "Crowe ID agent (Foundry gateway)"
+    def __init__(self, base_url: str, model: str, token_provider=None):
+        self._base_url = (base_url or "").rstrip("/")
+        self._model = model
+        self._token_provider = token_provider
+    @property
+    def _url(self) -> str:
+        # The PAID x402 agent rail (debits the agent's wallet). Overridable via
+        # CROWE_AGENT_RESOURCE for the legacy non-paid /api/gateway/chat path.
+        import os
+        resource = os.environ.get("CROWE_AGENT_RESOURCE", "/api/agent/v1/chat")
+        return f"{self._base_url}{resource}"
+    def _bearer(self) -> str:
+        return self._token_provider() if self._token_provider else ""
+    def _headers(self) -> dict:
+        return {
+            "authorization": f"Bearer {self._bearer()}",
+            "content-type": "application/json",
+        }
+    def check(self) -> tuple[bool, str]:
+        if not self._base_url or not self._token_provider:
+            return False, "Crowe gateway URL or Crowe ID credentials not configured."
+        try:
+            httpx.get(_host(self._base_url), timeout=_CHECK_TIMEOUT)
+        except Exception as e:  # noqa: BLE001 - reachability probe
+            return False, f"Crowe gateway unreachable ({e.__class__.__name__})"
+        return True, f"Crowe ID @ {_host(self._base_url)}"
+    def _complete(self, messages, temperature, max_tokens) -> str:
+        payload = {
+            "model": self._model,
+            "messages": messages,
+            "temperature": temperature,
+            "max_tokens": max_tokens,
+        }
+        r = httpx.post(self._url, json=payload, headers=self._headers(), timeout=_STREAM_TIMEOUT)
+        if r.status_code == 402:
+            # x402 payment-required: surface the machine-readable envelope as an
+            # actionable error (price + schemes) rather than a raw HTTP error.
+            try:
+                envelope = r.json()
+            except Exception:  # noqa: BLE001 - tolerate a non-JSON 402 body
+                envelope = {}
+            raise PaymentRequired(envelope)
+        r.raise_for_status()
+        return r.json().get("content", "")
+    def stream_chat(self, messages, temperature, max_tokens):
+        yield ("content", self._complete(messages, temperature, max_tokens))
+    def chat(self, messages, tools, temperature, max_tokens) -> dict:
+        # Native gateway endpoint has no tool-calling; tools are ignored.
+        return {"role": "assistant", "content": self._complete(messages, temperature, max_tokens)}
+    def stream_chat_tools(self, messages, tools, temperature, max_tokens):
+        # No server-side tool-calls; yields content and returns the final message
+        # (matches the FoundryBackend generator-return contract used by the agent loop).
+        content = self._complete(messages, temperature, max_tokens)
+        yield ("content", content)
+        return {"role": "assistant", "content": content}
+class ModalBackend:
+    """Gemma 4 Mycelium served on a Modal scale-to-zero GPU (the free base tier).
+    OpenAI-compatible /v1/chat/completions, but the Modal web endpoint requires
+    proxy-auth headers (Modal-Key / Modal-Secret) on every request — which is why
+    the gateway, not OWUI, must own this connection."""
+    label = "Modal (Mycelium)"
+    def __init__(self, endpoint: str, key: str, secret: str, model: str):
+        self._base_url = (endpoint or "").rstrip("/")
+        self._key = key or ""
+        self._secret = secret or ""
+        self._model = model
+    @property
+    def _url(self) -> str:
+        return f"{self._base_url}/v1/chat/completions"
+    def _headers(self) -> dict:
+        return {
+            "Modal-Key": self._key,
+            "Modal-Secret": self._secret,
+            "content-type": "application/json",
+        }
+    def check(self) -> tuple[bool, str]:
+        if not self._base_url or not self._key or not self._secret:
+            return False, "Modal Mycelium endpoint or proxy-auth token not configured."
+        try:
+            httpx.get(_host(self._base_url), timeout=_CHECK_TIMEOUT)
+        except Exception as e:  # noqa: BLE001 - reachability probe
+            return False, f"Modal endpoint unreachable ({e.__class__.__name__})"
+        return True, f"Modal @ {_host(self._base_url)}"
+    def stream_chat(self, messages, temperature, max_tokens):
+        payload = {
+            "model": self._model,
+            "messages": messages,
+            "stream": True,
+            "temperature": temperature,
+            "max_tokens": max_tokens,
+        }
+        with httpx.stream(
+            "POST", self._url, json=payload, headers=self._headers(), timeout=_MODAL_TIMEOUT
+        ) as r:
+            r.raise_for_status()
+            yield from parse_sse_lines(r.iter_lines())
+    def chat(self, messages, tools, temperature, max_tokens) -> dict:
+        payload = {
+            "model": self._model,
+            "messages": messages,
+            "stream": False,
+            "temperature": temperature,
+            "max_tokens": max_tokens,
+        }
+        if tools:
+            payload["tools"] = tools
+        r = httpx.post(self._url, json=payload, headers=self._headers(), timeout=_MODAL_TIMEOUT)
+        r.raise_for_status()
+        return _message_from_choice(r.json()["choices"][0])
+    def stream_chat_tools(self, messages, tools, temperature, max_tokens):
+        payload = {
+            "model": self._model,
+            "messages": messages,
+            "stream": True,
+            "temperature": temperature,
+            "max_tokens": max_tokens,
+        }
+        if tools:
+            payload["tools"] = tools
+        with httpx.stream(
+            "POST", self._url, json=payload, headers=self._headers(), timeout=_MODAL_TIMEOUT
+        ) as r:
+            r.raise_for_status()
+            return (yield from parse_sse_stream(r.iter_lines()))
+_crowe_providers: dict[tuple, crowe_id.CroweIDTokenProvider] = {}
+def _crowe_token_provider(settings) -> crowe_id.CroweIDTokenProvider:
+    """One memoized token provider per (issuer, client_id) so fusion's many
+    per-deployment backends share a single cached Crowe ID token."""
+    key = (settings.crowe_id_issuer, settings.crowe_id_client_id)
+    provider = _crowe_providers.get(key)
+    if provider is None:
+        provider = crowe_id.CroweIDTokenProvider(
+            settings.crowe_id_issuer,
+            settings.crowe_id_client_id or "",
+            settings.crowe_id_client_secret or "",
+            audience=settings.crowe_id_audience,
+        )
+        _crowe_providers[key] = provider
+    return provider
+def resolve_backend(settings) -> Backend:
+    """Factory keyed on settings.backend."""
+    if settings.backend == "crowe":
+        return CroweGatewayBackend(
+            settings.gateway_url or "",
+            settings.foundry_model,
+            token_provider=_crowe_token_provider(settings),
+        )
+    if settings.backend == "foundry":
+        return FoundryBackend(
+            settings.foundry_base_url or "",
+            settings.foundry_api_key or "",
+            settings.foundry_model,
+        )
+    return AzureBackend(
+        settings.azure_endpoint or "",
+        settings.azure_api_key or "",
+        settings.deployment,
+        settings.api_version,
+    )
+def backend_for_deployment(settings, deployment: str) -> Backend:
+    """Build a backend targeting a specific deployment/model (for fusion).
+    Uses the same transport as the active backend, just a different model id.
+    The Modal-served Mycelium model is the exception: it routes to its own
+    endpoint with proxy-auth headers, regardless of the active backend.
+    """
+    if settings.mycelium_endpoint and deployment == settings.mycelium_model:
+        return ModalBackend(
+            settings.mycelium_endpoint,
+            settings.mycelium_key or "",
+            settings.mycelium_secret or "",
+            deployment,
+        )
+    if settings.backend == "crowe":
+        return CroweGatewayBackend(
+            settings.gateway_url or "",
+            deployment,
+            token_provider=_crowe_token_provider(settings),
+        )
+    if settings.backend == "foundry":
+        return FoundryBackend(
+            settings.foundry_base_url or "", settings.foundry_api_key or "", deployment
+        )
+    return AzureBackend(
+        settings.azure_endpoint or "",
+        settings.azure_api_key or "",
+        deployment,
+        settings.api_version,
+    )

deepparallel 0.4.3__tar.gz → 0.5.1__tar.gz

deepparallel 0.4.3tar.gz → 0.5.1tar.gz