PyPI - opengradient - Versions diffs - 0.5.8__tar.gz → 0.5.9__tar.gz - Mend

opengradient 0.5.8tar.gz → 0.5.9tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (41) hide show

{opengradient-0.5.8/src/opengradient.egg-info → opengradient-0.5.9}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: opengradient
-Version: 0.5.8
+Version: 0.5.9
 Summary: Python SDK for OpenGradient decentralized model management & inference services
 Author-email: OpenGradient <kyle@vannalabs.ai>
 License-Expression: MIT
@@ -23,7 +23,7 @@ Requires-Dist: requests>=2.32.3
 Requires-Dist: langchain>=0.3.7
 Requires-Dist: openai>=1.58.1
 Requires-Dist: pydantic>=2.9.2
-Requires-Dist: og-test-x402==0.0.1
+Requires-Dist: og-test-x402==0.0.9
 Dynamic: license-file
 # OpenGradient Python SDK

{opengradient-0.5.8 → opengradient-0.5.9}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "opengradient"
-version = "0.5.8"
+version = "0.5.9"
 description = "Python SDK for OpenGradient decentralized model management & inference services"
 authors = [{name = "OpenGradient", email = "kyle@vannalabs.ai"}]
 readme = "README.md"
@@ -29,7 +29,7 @@ dependencies = [
     "langchain>=0.3.7",
     "openai>=1.58.1",
     "pydantic>=2.9.2",
-    "og-test-x402==0.0.1",
+    "og-test-x402==0.0.9",
 ]
 [project.scripts]

{opengradient-0.5.8 → opengradient-0.5.9}/src/opengradient/__init__.py RENAMED Viewed

@@ -17,6 +17,7 @@ from .types import (
     InferenceResult,
     LlmInferenceMode,
     TextGenerationOutput,
+    TextGenerationStream,
     ModelOutput,
     ModelRepository,
     FileUploadResult,
@@ -225,7 +226,8 @@ def llm_chat(
     tool_choice: Optional[str] = None,
     max_retries: Optional[int] = None,
     x402_settlement_mode: Optional[x402SettlementMode] = x402SettlementMode.SETTLE_BATCH,
-) -> TextGenerationOutput:
+    stream: Optional[bool] = False,
+) -> Union[TextGenerationOutput, TextGenerationStream]:
     """Have a chat conversation with an LLM.
     Args:
@@ -239,9 +241,10 @@ def llm_chat(
         tool_choice: Optional specific tool to use
         max_retries: Maximum number of retries for failed transactions
         x402_settlement_mode: Settlement modes for x402 payment protocol transactions (enum x402SettlementMode)
+        stream: Optional boolean to enable streaming
     Returns:
-        TextGenerationOutput
+        TextGenerationOutput or TextGenerationStream
     Raises:
         RuntimeError: If SDK is not initialized
@@ -258,7 +261,8 @@ def llm_chat(
         tools=tools,
         tool_choice=tool_choice,
         max_retries=max_retries,
-        x402_settlement_mode=x402_settlement_mode
+        x402_settlement_mode=x402_settlement_mode,
+        stream=stream,
     )

{opengradient-0.5.8 → opengradient-0.5.9}/src/opengradient/cli.py RENAMED Viewed

@@ -6,6 +6,7 @@ import logging
 import webbrowser
 from pathlib import Path
 from typing import Dict, List, Optional
+import sys
 import click
@@ -557,6 +558,7 @@ def print_llm_completion_result(model_cid, tx_hash, llm_output, is_local=True):
     default="settle-batch",
     help="Settlement mode for x402 payments: settle (hashes only), settle-batch (batched, default), settle-metadata (full data)",
 )
+@click.option("--stream", is_flag=True, default=False, help="Stream the output from the LLM")
 @click.pass_context
 def chat(
     ctx,
@@ -572,6 +574,7 @@ def chat(
     tool_choice: Optional[str],
     x402_settlement_mode: Optional[str],
     local: bool,
+    stream: bool,
 ):
     """
     Run chat inference on an LLM model (local or external).
@@ -590,6 +593,9 @@ def chat(
     # External Anthropic model
     opengradient chat --model claude-haiku-4-5-20251001 --messages '[{"role":"user","content":"Write a poem"}]' --max-tokens 100
+    # Stream output
+    opengradient chat --model anthropic/claude-3.5-haiku --messages '[{"role":"user","content":"How are clouds formed?"}]' --max-tokens 250 --stream
     """
     client: Client = ctx.obj["client"]
@@ -656,7 +662,7 @@ def chat(
         if not tools and not tools_file:
             parsed_tools = None
-        completion_output = client.llm_chat(
+        result = client.llm_chat(
             model_cid=model_cid,
             inference_mode=LlmInferenceModes[inference_mode],
             messages=messages,
@@ -667,11 +673,16 @@ def chat(
             tool_choice=tool_choice,
             local_model=local,
             x402_settlement_mode=x402_settlement_mode,
+            stream=stream,
         )
-        print_llm_chat_result(
-            model_cid, completion_output.transaction_hash, completion_output.finish_reason, completion_output.chat_output, is_local
-        )
+        # Handle response based on streaming flag
+        if stream:
+            print_streaming_chat_result(model_cid, result, is_local)
+        else:
+            print_llm_chat_result(
+                model_cid, result.transaction_hash, result.finish_reason, result.chat_output, is_local
+            )
     except Exception as e:
         click.echo(f"Error running LLM chat inference: {str(e)}")
@@ -706,6 +717,80 @@ def print_llm_chat_result(model_cid, tx_hash, finish_reason, chat_output, is_loc
     click.echo()
+def print_streaming_chat_result(model_cid, stream, is_local=True):
+    """Handle streaming chat response with typed chunks - prints in real-time"""
+    click.secho("🌊 Streaming LLM Chat", fg="green", bold=True)
+    click.echo("──────────────────────────────────────")
+    click.echo("Model: ", nl=False)
+    click.secho(model_cid, fg="cyan", bold=True)
+    if is_local:
+        click.echo("Source: ", nl=False)
+        click.secho("OpenGradient TEE", fg="cyan", bold=True)
+    else:
+        click.echo("Source: ", nl=False)
+        click.secho("External Provider", fg="cyan", bold=True)
+    click.echo("──────────────────────────────────────")
+    click.secho("Response:", fg="yellow", bold=True)
+    click.echo()
+    try:
+        content_parts = []
+        chunk_count = 0
+        for chunk in stream:
+            chunk_count += 1
+            if chunk.choices[0].delta.content:
+                content = chunk.choices[0].delta.content
+                sys.stdout.write(content)
+                sys.stdout.flush()
+                content_parts.append(content)
+            # Handle tool calls
+            if chunk.choices[0].delta.tool_calls:
+                sys.stdout.write("\n")
+                sys.stdout.flush()
+                click.secho("Tool Calls:", fg="yellow", bold=True)
+                for tool_call in chunk.choices[0].delta.tool_calls:
+                    click.echo(f"  Function: {tool_call['function']['name']}")
+                    click.echo(f"  Arguments: {tool_call['function']['arguments']}")
+            # Print final info when stream completes
+            if chunk.is_final:
+                sys.stdout.write("\n\n")
+                sys.stdout.flush()
+                click.echo("──────────────────────────────────────")
+                if chunk.usage:
+                    click.secho("Token Usage:", fg="cyan")
+                    click.echo(f"  Prompt tokens: {chunk.usage.prompt_tokens}")
+                    click.echo(f"  Completion tokens: {chunk.usage.completion_tokens}")
+                    click.echo(f"  Total tokens: {chunk.usage.total_tokens}")
+                    click.echo()
+                if chunk.choices[0].finish_reason:
+                    click.echo("Finish reason: ", nl=False)
+                    click.secho(chunk.choices[0].finish_reason, fg="green")
+                click.echo("──────────────────────────────────────")
+                click.echo(f"Chunks received: {chunk_count}")
+                click.echo(f"Content length: {len(''.join(content_parts))} characters")
+                click.echo()
+    except KeyboardInterrupt:
+        sys.stdout.write("\n")
+        sys.stdout.flush()
+        click.secho("Stream interrupted by user", fg="yellow")
+        click.echo()
+    except Exception as e:
+        sys.stdout.write("\n")
+        sys.stdout.flush()
+        click.secho(f"Streaming error: {str(e)}", fg="red", bold=True)
+        click.echo()
 @cli.command()
 def create_account():
     """Create a new test account for OpenGradient inference and model management"""

{opengradient-0.5.8 → opengradient-0.5.9}/src/opengradient/client.py RENAMED Viewed

@@ -9,6 +9,7 @@ from typing import Any, Dict, List, Optional, Union, Callable
 import firebase
 import numpy as np
 import requests
+import httpx
 from eth_account.account import LocalAccount
 from web3 import Web3
 from web3.exceptions import ContractLogicError
@@ -17,7 +18,9 @@ import urllib.parse
 import asyncio
 from x402.clients.httpx import x402HttpxClient
 from x402.clients.base import decode_x_payment_response, x402Client
+from x402.clients.httpx import x402HttpxClient
+from .x402_auth import X402Auth
 from .exceptions import OpenGradientError
 from .proto import infer_pb2, infer_pb2_grpc
 from .types import (
@@ -29,10 +32,12 @@ from .types import (
     LlmInferenceMode,
     ModelOutput,
     TextGenerationOutput,
+    TextGenerationStream,
     SchedulerParams,
     InferenceResult,
     ModelRepository,
     FileUploadResult,
+    StreamChunk,
 )
 from .defaults import (
     DEFAULT_IMAGE_GEN_HOST,
@@ -40,6 +45,8 @@ from .defaults import (
     DEFAULT_SCHEDULER_ADDRESS,
     DEFAULT_LLM_SERVER_URL,
     DEFAULT_OPENGRADIENT_LLM_SERVER_URL,
+    DEFAULT_OPENGRADIENT_LLM_STREAMING_SERVER_URL,
+    DEFAULT_NETWORK_FILTER,
 )
 from .utils import convert_array_to_model_output, convert_to_model_input, convert_to_model_output
@@ -66,6 +73,18 @@ PRECOMPILE_CONTRACT_ADDRESS = "0x00000000000000000000000000000000000000F4"
 X402_PROCESSING_HASH_HEADER = "x-processing-hash"
 X402_PLACEHOLDER_API_KEY = "0x1234567890abcdef1234567890abcdef1234567890abcdef1234567890abcdef"
+TIMEOUT = httpx.Timeout(
+    timeout=90.0,
+    connect=15.0,
+    read=15.0,
+    write=30.0,
+    pool=10.0,
+)
+LIMITS = httpx.Limits(
+    max_keepalive_connections=100,
+    max_connections=500,
+    keepalive_expiry=60 * 20,  # 20 minutes
+)
 class Client:
     _inference_hub_contract_address: str
@@ -89,6 +108,7 @@ class Client:
         password: Optional[str] = None,
         llm_server_url: Optional[str] = DEFAULT_LLM_SERVER_URL,
         og_llm_server_url: Optional[str] = DEFAULT_OPENGRADIENT_LLM_SERVER_URL,
+        og_llm_streaming_server_url: Optional[str] = DEFAULT_OPENGRADIENT_LLM_STREAMING_SERVER_URL,
         openai_api_key: Optional[str] = None,
         anthropic_api_key: Optional[str] = None,
         google_api_key: Optional[str] = None,
@@ -123,6 +143,7 @@ class Client:
         self._llm_server_url = llm_server_url
         self._og_llm_server_url = og_llm_server_url
+        self._og_llm_streaming_server_url = og_llm_streaming_server_url
         self._external_api_keys = {}
         if openai_api_key or os.getenv("OPENAI_API_KEY"):
@@ -421,11 +442,11 @@ class Client:
         return run_with_retry(execute_transaction, max_retries)
-    def _og_payment_selector(self, accepts, network_filter=None, scheme_filter=None, max_value=None):
-        """Custom payment selector for OpenGradient network (og-devnet)."""
+    def _og_payment_selector(self, accepts, network_filter=DEFAULT_NETWORK_FILTER, scheme_filter=None, max_value=None):
+        """Custom payment selector for OpenGradient network."""
         return x402Client.default_payment_requirements_selector(
             accepts,
-            network_filter="og-devnet",
+            network_filter=network_filter,
             scheme_filter=scheme_filter,
             max_value=max_value,
         )
@@ -652,7 +673,8 @@ class Client:
         max_retries: Optional[int] = None,
         local_model: Optional[bool] = False,
         x402_settlement_mode: Optional[x402SettlementMode] = x402SettlementMode.SETTLE_BATCH,
-    ) -> TextGenerationOutput:
+        stream: bool = False,
+    ) -> Union[TextGenerationOutput, TextGenerationStream]:
         """
         Perform inference on an LLM model using chat.
@@ -672,13 +694,12 @@ class Client:
                 - SETTLE_BATCH: Aggregates multiple inferences into batch hashes (most cost-efficient).
                 - SETTLE_METADATA: Records full model info, complete input/output data, and all metadata.
                 Defaults to SETTLE_BATCH.
+            stream (bool, optional): Whether to stream the response. Default is False.
         Returns:
-            TextGenerationOutput: Generated text results including:
-                - chat_output: Dict with role, content, and tool_calls
-                - transaction_hash: Blockchain hash (or "external" for external providers)
-                - finish_reason: Reason for completion (e.g., "stop", "tool_call")
-                - payment_hash: Payment hash for x402 transactions (when using x402 settlement)
+            Union[TextGenerationOutput, TextGenerationStream]:
+                - If stream=False: TextGenerationOutput with chat_output, transaction_hash, finish_reason, and payment_hash
+                - If stream=True: TextGenerationStream yielding StreamChunk objects with typed deltas (true streaming via threading)
         Raises:
             OpenGradientError: If the inference fails.
@@ -689,16 +710,33 @@ class Client:
             if model_cid not in TEE_LLM:
                 return OpenGradientError("That model CID is not supported yet for TEE inference")
-            return self._external_llm_chat(
-                model=model_cid.split("/")[1],
-                messages=messages,
-                max_tokens=max_tokens,
-                stop_sequence=stop_sequence,
-                temperature=temperature,
-                tools=tools,
-                tool_choice=tool_choice,
-                x402_settlement_mode=x402_settlement_mode,
-            )
+            if stream:
+                # Use threading bridge for true sync streaming
+                return self._external_llm_chat_stream_sync(
+                    model=model_cid.split("/")[1],
+                    messages=messages,
+                    max_tokens=max_tokens,
+                    stop_sequence=stop_sequence,
+                    temperature=temperature,
+                    tools=tools,
+                    tool_choice=tool_choice,
+                    x402_settlement_mode=x402_settlement_mode,
+                    use_tee=True,
+                )
+            else:
+                # Non-streaming
+                return self._external_llm_chat(
+                    model=model_cid.split("/")[1],
+                    messages=messages,
+                    max_tokens=max_tokens,
+                    stop_sequence=stop_sequence,
+                    temperature=temperature,
+                    tools=tools,
+                    tool_choice=tool_choice,
+                    x402_settlement_mode=x402_settlement_mode,
+                    stream=False,
+                    use_tee=True,
+                )
         # Original local model logic
         def execute_transaction():
@@ -778,7 +816,9 @@ class Client:
         tools: Optional[List[Dict]] = None,
         tool_choice: Optional[str] = None,
         x402_settlement_mode: x402SettlementMode = x402SettlementMode.SETTLE_BATCH,
-    ) -> TextGenerationOutput:
+        stream: bool = False,
+        use_tee: bool = False,
+    ) -> Union[TextGenerationOutput, TextGenerationStream]:
         """
         Route chat request to external LLM server with x402 payments.
@@ -790,18 +830,24 @@ class Client:
             temperature: Sampling temperature
             tools: Function calling tools
             tool_choice: Tool selection strategy
+            stream: Whether to stream the response
+            use_tee: Whether to use TEE
         Returns:
-            TextGenerationOutput with chat completion
+            Union[TextGenerationOutput, TextGenerationStream]: Chat completion or TextGenerationStream
         Raises:
             OpenGradientError: If request fails
         """
-        api_key = self._get_api_key_for_model(model)
+        api_key = None if use_tee else self._get_api_key_for_model(model)
         if api_key:
-            logging.debug("External LLM completion using API key")
-            url = f"{self._llm_server_url}/v1/chat/completions"
+            logging.debug("External LLM chat using API key")
+            if stream:
+                url = f"{self._llm_server_url}/v1/chat/completions/stream"
+            else:
+                url = f"{self._llm_server_url}/v1/chat/completions"
             headers = {"Content-Type": "application/json", "Authorization": f"Bearer {api_key}"}
@@ -820,14 +866,23 @@ class Client:
                 payload["tool_choice"] = tool_choice or "auto"
             try:
-                response = requests.post(url, json=payload, headers=headers, timeout=60)
-                response.raise_for_status()
+                if stream:
+                    # Return streaming response wrapped in TextGenerationStream
+                    response = requests.post(url, json=payload, headers=headers, timeout=60, stream=True)
+                    response.raise_for_status()
+                    return TextGenerationStream(_iterator=response.iter_lines(decode_unicode=True), _is_async=False)
+                else:
+                    # Non-streaming response
+                    response = requests.post(url, json=payload, headers=headers, timeout=60)
+                    response.raise_for_status()
-                result = response.json()
+                    result = response.json()
-                return TextGenerationOutput(
-                    transaction_hash="external", finish_reason=result.get("finish_reason"), chat_output=result.get("message")
-                )
+                    return TextGenerationOutput(
+                        transaction_hash="external",
+                        finish_reason=result.get("finish_reason"),
+                        chat_output=result.get("message")
+                    )
             except requests.RequestException as e:
                 error_msg = f"External LLM chat failed: {str(e)}"
@@ -840,6 +895,7 @@ class Client:
                 logging.error(error_msg)
                 raise OpenGradientError(error_msg)
+        # x402 payment path - non-streaming only here
         async def make_request():
             async with x402HttpxClient(
                 account=self._wallet_account,
@@ -867,13 +923,13 @@ class Client:
                     payload["tool_choice"] = tool_choice or "auto"
                 try:
-                    response = await client.post("/v1/chat/completions", json=payload, headers=headers, timeout=60)
+                    # Non-streaming with x402
+                    endpoint = "/v1/chat/completions"
+                    response = await client.post(endpoint, json=payload, headers=headers, timeout=60)
                     # Read the response content
                     content = await response.aread()
                     result = json.loads(content.decode())
-                    # print(f"Response: {response}")
-                    # print(f"Response Headers: {response.headers}")
                     payment_hash = ""
                     if X402_PROCESSING_HASH_HEADER in response.headers:
@@ -909,6 +965,234 @@ class Client:
             logging.error(error_msg)
             raise OpenGradientError(error_msg)
+    def _external_llm_chat_stream_sync(
+        self,
+        model: str,
+        messages: List[Dict],
+        max_tokens: int = 100,
+        stop_sequence: Optional[List[str]] = None,
+        temperature: float = 0.0,
+        tools: Optional[List[Dict]] = None,
+        tool_choice: Optional[str] = None,
+        x402_settlement_mode: x402SettlementMode = x402SettlementMode.SETTLE_BATCH,
+        use_tee: bool = False,
+    ):
+        """
+        Sync streaming using threading bridge - TRUE real-time streaming.
+        Yields StreamChunk objects as they arrive from the background thread.
+        NO buffering, NO conversion, just direct pass-through.
+        """
+        import threading
+        from queue import Queue
+        queue = Queue()
+        exception_holder = []
+        def _run_async():
+            """Run async streaming in background thread"""
+            loop = None
+            try:
+                loop = asyncio.new_event_loop()
+                asyncio.set_event_loop(loop)
+                async def _stream():
+                    try:
+                        async for chunk in self._external_llm_chat_stream_async(
+                            model=model,
+                            messages=messages,
+                            max_tokens=max_tokens,
+                            stop_sequence=stop_sequence,
+                            temperature=temperature,
+                            tools=tools,
+                            tool_choice=tool_choice,
+                            x402_settlement_mode=x402_settlement_mode,
+                            use_tee=use_tee,
+                        ):
+                            queue.put(chunk)  # Put chunk immediately
+                    except Exception as e:
+                        exception_holder.append(e)
+                    finally:
+                        queue.put(None)  # Signal completion
+                loop.run_until_complete(_stream())
+            except Exception as e:
+                exception_holder.append(e)
+                queue.put(None)
+            finally:
+                if loop:
+                    try:
+                        pending = asyncio.all_tasks(loop)
+                        for task in pending:
+                            task.cancel()
+                        loop.run_until_complete(asyncio.gather(*pending, return_exceptions=True))
+                    finally:
+                        loop.close()
+        # Start background thread
+        thread = threading.Thread(target=_run_async, daemon=True)
+        thread.start()
+        # Yield chunks DIRECTLY as they arrive - NO buffering
+        try:
+            while True:
+                chunk = queue.get()  # Blocks until chunk available
+                if chunk is None:
+                    break
+                yield chunk  # Yield immediately!
+            thread.join(timeout=5)
+            if exception_holder:
+                raise exception_holder[0]
+        except Exception as e:
+            thread.join(timeout=1)
+            raise
+    async def _external_llm_chat_stream_async(
+        self,
+        model: str,
+        messages: List[Dict],
+        max_tokens: int = 100,
+        stop_sequence: Optional[List[str]] = None,
+        temperature: float = 0.0,
+        tools: Optional[List[Dict]] = None,
+        tool_choice: Optional[str] = None,
+        x402_settlement_mode: x402SettlementMode = x402SettlementMode.SETTLE_BATCH,
+        use_tee: bool = False,
+    ):
+        """
+        Internal async streaming implementation.
+        Yields StreamChunk objects as they arrive from the server.
+        """
+        api_key = None if use_tee else self._get_api_key_for_model(model)
+        if api_key:
+            # API key path - streaming to local llm-server
+            url = f"{self._og_llm_streaming_server_url}/v1/chat/completions"
+            headers = {
+                "Content-Type": "application/json",
+                "Authorization": f"Bearer {api_key}"
+            }
+            payload = {
+                "model": model,
+                "messages": messages,
+                "max_tokens": max_tokens,
+                "temperature": temperature,
+                "stream": True,
+            }
+            if stop_sequence:
+                payload["stop"] = stop_sequence
+            if tools:
+                payload["tools"] = tools
+                payload["tool_choice"] = tool_choice or "auto"
+            async with httpx.AsyncClient(verify=False, timeout=None) as client:
+                async with client.stream("POST", url, json=payload, headers=headers) as response:
+                    buffer = b""
+                    async for chunk in response.aiter_raw():
+                        if not chunk:
+                            continue
+                        buffer += chunk
+                        # Process all complete lines in buffer
+                        while b"\n" in buffer:
+                            line_bytes, buffer = buffer.split(b"\n", 1)
+                            if not line_bytes.strip():
+                                continue
+                            try:
+                                line = line_bytes.decode('utf-8').strip()
+                            except UnicodeDecodeError:
+                                continue
+                            if not line.startswith("data: "):
+                                continue
+                            data_str = line[6:]  # Strip "data: " prefix
+                            if data_str.strip() == "[DONE]":
+                                return
+                            try:
+                                data = json.loads(data_str)
+                                yield StreamChunk.from_sse_data(data)
+                            except json.JSONDecodeError:
+                                continue
+        else:
+            # x402 payment path
+            async with httpx.AsyncClient(
+                base_url=self._og_llm_streaming_server_url,
+                headers={"Authorization": f"Bearer {X402_PLACEHOLDER_API_KEY}"},
+                timeout=TIMEOUT,
+                limits=LIMITS,
+                http2=False,
+                follow_redirects=False,
+                auth=X402Auth(account=self._wallet_account),  # type: ignore
+            ) as client:
+                headers = {
+                    "Content-Type": "application/json",
+                    "Authorization": f"Bearer {X402_PLACEHOLDER_API_KEY}",
+                    "X-SETTLEMENT-TYPE": x402_settlement_mode,
+                }
+                payload = {
+                    "model": model,
+                    "messages": messages,
+                    "max_tokens": max_tokens,
+                    "temperature": temperature,
+                    "stream": True,
+                }
+                if stop_sequence:
+                    payload["stop"] = stop_sequence
+                if tools:
+                    payload["tools"] = tools
+                    payload["tool_choice"] = tool_choice or "auto"
+                async with client.stream(
+                    "POST",
+                    "/v1/chat/completions",
+                    json=payload,
+                    headers=headers,
+                ) as response:
+                    buffer = b""
+                    async for chunk in response.aiter_raw():
+                        if not chunk:
+                            continue
+                        buffer += chunk
+                        # Process complete lines from buffer
+                        while b"\n" in buffer:
+                            line_bytes, buffer = buffer.split(b"\n", 1)
+                            if not line_bytes.strip():
+                                continue
+                            try:
+                                line = line_bytes.decode('utf-8').strip()
+                            except UnicodeDecodeError:
+                                continue
+                            if not line.startswith("data: "):
+                                continue
+                            data_str = line[6:]
+                            if data_str.strip() == "[DONE]":
+                                return
+                            try:
+                                data = json.loads(data_str)
+                                yield StreamChunk.from_sse_data(data)
+                            except json.JSONDecodeError:
+                                continue
     def list_files(self, model_name: str, version: str) -> List[Dict]:
         """
         List files for a specific version of a model.

{opengradient-0.5.8 → opengradient-0.5.9}/src/opengradient/defaults.py RENAMED Viewed

@@ -9,4 +9,6 @@ DEFAULT_BLOCKCHAIN_EXPLORER = "https://explorer.opengradient.ai/tx/"
 DEFAULT_IMAGE_GEN_HOST = "18.217.25.69"
 DEFAULT_IMAGE_GEN_PORT = 5125
 DEFAULT_LLM_SERVER_URL = "http://35.225.197.84:8000"
-DEFAULT_OPENGRADIENT_LLM_SERVER_URL = "https://llm.opengradient.ai"
+DEFAULT_OPENGRADIENT_LLM_SERVER_URL = "https://llmogevm.opengradient.ai"
+DEFAULT_OPENGRADIENT_LLM_STREAMING_SERVER_URL = "https://llmogevm.opengradient.ai"
+DEFAULT_NETWORK_FILTER = "og-evm"

{opengradient-0.5.8 → opengradient-0.5.9}/src/opengradient/types.py RENAMED Viewed

@@ -1,7 +1,7 @@
 import time
 from dataclasses import dataclass
 from enum import Enum, IntEnum, StrEnum
-from typing import Dict, List, Optional, Tuple, Union, DefaultDict
+from typing import Dict, List, Optional, Tuple, Union, DefaultDict, Iterator, AsyncIterator
 import numpy as np
@@ -165,6 +165,196 @@ class InferenceResult:
     model_output: Dict[str, np.ndarray]
+@dataclass
+class StreamDelta:
+    """
+    Represents a delta (incremental change) in a streaming response.
+    Attributes:
+        content: Incremental text content (if any)
+        role: Message role (appears in first chunk)
+        tool_calls: Tool call information (if function calling is used)
+    """
+    content: Optional[str] = None
+    role: Optional[str] = None
+    tool_calls: Optional[List[Dict]] = None
+@dataclass
+class StreamChoice:
+    """
+    Represents a choice in a streaming response.
+    Attributes:
+        delta: The incremental changes in this chunk
+        index: Choice index (usually 0)
+        finish_reason: Reason for completion (appears in final chunk)
+    """
+    delta: StreamDelta
+    index: int = 0
+    finish_reason: Optional[str] = None
+@dataclass
+class StreamUsage:
+    """
+    Token usage information for a streaming response.
+    Attributes:
+        prompt_tokens: Number of tokens in the prompt
+        completion_tokens: Number of tokens in the completion
+        total_tokens: Total tokens used
+    """
+    prompt_tokens: int
+    completion_tokens: int
+    total_tokens: int
+@dataclass
+class StreamChunk:
+    """
+    Represents a single chunk in a streaming LLM response.
+    This follows the OpenAI streaming format but is provider-agnostic.
+    Each chunk contains incremental data, with the final chunk including
+    usage information.
+    Attributes:
+        choices: List of streaming choices (usually contains one choice)
+        model: Model identifier
+        usage: Token usage information (only in final chunk)
+        is_final: Whether this is the final chunk (before [DONE])
+    """
+    choices: List[StreamChoice]
+    model: str
+    usage: Optional[StreamUsage] = None
+    is_final: bool = False
+    @classmethod
+    def from_sse_data(cls, data: Dict) -> "StreamChunk":
+        """
+        Parse a StreamChunk from SSE data dictionary.
+        Args:
+            data: Dictionary parsed from SSE data line
+        Returns:
+            StreamChunk instance
+        """
+        choices = []
+        for choice_data in data.get("choices", []):
+            delta_data = choice_data.get("delta", {})
+            delta = StreamDelta(
+                content=delta_data.get("content"),
+                role=delta_data.get("role"),
+                tool_calls=delta_data.get("tool_calls")
+            )
+            choice = StreamChoice(
+                delta=delta,
+                index=choice_data.get("index", 0),
+                finish_reason=choice_data.get("finish_reason")
+            )
+            choices.append(choice)
+        usage = None
+        if "usage" in data:
+            usage_data = data["usage"]
+            usage = StreamUsage(
+                prompt_tokens=usage_data.get("prompt_tokens", 0),
+                completion_tokens=usage_data.get("completion_tokens", 0),
+                total_tokens=usage_data.get("total_tokens", 0)
+            )
+        is_final = any(c.finish_reason is not None for c in choices) or usage is not None
+        return cls(
+            choices=choices,
+            model=data.get("model", "unknown"),
+            usage=usage,
+            is_final=is_final
+        )
+@dataclass
+class TextGenerationStream:
+    """
+    Iterator wrapper for streaming text generation responses.
+    Provides a clean interface for iterating over stream chunks with
+    automatic parsing of SSE format.
+    Usage:
+        stream = client.llm_chat(..., stream=True)
+        for chunk in stream:
+            if chunk.choices[0].delta.content:
+                print(chunk.choices[0].delta.content, end="")
+    """
+    _iterator: Union[Iterator[str], AsyncIterator[str]]
+    _is_async: bool = False
+    def __iter__(self):
+        """Iterate over stream chunks."""
+        return self
+    def __next__(self) -> StreamChunk:
+        """Get next stream chunk."""
+        import json
+        while True:
+            try:
+                line = next(self._iterator)
+            except StopIteration:
+                raise
+            if not line or not line.strip():
+                continue
+            if not line.startswith("data: "):
+                continue
+            data_str = line[6:]  # Remove "data: " prefix
+            if data_str.strip() == "[DONE]":
+                raise StopIteration
+            try:
+                data = json.loads(data_str)
+                return StreamChunk.from_sse_data(data)
+            except json.JSONDecodeError:
+                # Skip malformed chunks
+                continue
+    async def __anext__(self) -> StreamChunk:
+        """Get next stream chunk (async version)."""
+        import json
+        if not self._is_async:
+            raise TypeError("Use __next__ for sync iterators")
+        while True:
+            try:
+                line = await self._iterator.__anext__()
+            except StopAsyncIteration:
+                raise
+            if not line or not line.strip():
+                continue
+            if not line.startswith("data: "):
+                continue
+            data_str = line[6:]
+            if data_str.strip() == "[DONE]":
+                raise StopAsyncIteration
+            try:
+                data = json.loads(data_str)
+                return StreamChunk.from_sse_data(data)
+            except json.JSONDecodeError:
+                continue
 @dataclass
 class TextGenerationOutput:
     """

opengradient-0.5.9/src/opengradient/x402_auth.py ADDED Viewed

@@ -0,0 +1,60 @@
+import httpx
+import typing
+import logging
+from x402.clients.base import x402Client
+from x402.types import x402PaymentRequiredResponse, PaymentRequirements
+class X402Auth(httpx.Auth):
+    """Auth class for handling x402 payment requirements."""
+    def __init__(
+        self,
+        account: typing.Any,
+        max_value: typing.Optional[int] = None,
+        payment_requirements_selector: typing.Optional[
+            typing.Callable[
+                [
+                    list[PaymentRequirements],
+                    typing.Optional[str],
+                    typing.Optional[str],
+                    typing.Optional[int],
+                ],
+                PaymentRequirements,
+            ]
+        ] = None,
+    ):
+        self.x402_client = x402Client(
+            account,
+            max_value=max_value,
+            payment_requirements_selector=payment_requirements_selector,  # type: ignore
+        )
+    async def async_auth_flow(
+        self, request: httpx.Request
+    ) -> typing.AsyncGenerator[httpx.Request, httpx.Response]:
+        response = yield request
+        if response.status_code == 402:
+            try:
+                await response.aread()
+                data = response.json()
+                payment_response = x402PaymentRequiredResponse(**data)
+                selected_requirements = self.x402_client.select_payment_requirements(
+                    payment_response.accepts
+                )
+                payment_header = self.x402_client.create_payment_header(
+                    selected_requirements, payment_response.x402_version
+                )
+                request.headers["X-Payment"] = payment_header
+                request.headers["Access-Control-Expose-Headers"] = "X-Payment-Response"
+                yield request
+            except Exception as e:
+                logging.error(f"X402Auth: Error handling payment: {e}")
+                return

{opengradient-0.5.8 → opengradient-0.5.9/src/opengradient.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: opengradient
-Version: 0.5.8
+Version: 0.5.9
 Summary: Python SDK for OpenGradient decentralized model management & inference services
 Author-email: OpenGradient <kyle@vannalabs.ai>
 License-Expression: MIT
@@ -23,7 +23,7 @@ Requires-Dist: requests>=2.32.3
 Requires-Dist: langchain>=0.3.7
 Requires-Dist: openai>=1.58.1
 Requires-Dist: pydantic>=2.9.2
-Requires-Dist: og-test-x402==0.0.1
+Requires-Dist: og-test-x402==0.0.9
 Dynamic: license-file
 # OpenGradient Python SDK

{opengradient-0.5.8 → opengradient-0.5.9}/src/opengradient.egg-info/SOURCES.txt RENAMED Viewed

@@ -9,6 +9,7 @@ src/opengradient/defaults.py
 src/opengradient/exceptions.py
 src/opengradient/types.py
 src/opengradient/utils.py
+src/opengradient/x402_auth.py
 src/opengradient.egg-info/PKG-INFO
 src/opengradient.egg-info/SOURCES.txt
 src/opengradient.egg-info/dependency_links.txt