opengradient 0.5.8__tar.gz → 0.5.9__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {opengradient-0.5.8/src/opengradient.egg-info → opengradient-0.5.9}/PKG-INFO +2 -2
- {opengradient-0.5.8 → opengradient-0.5.9}/pyproject.toml +2 -2
- {opengradient-0.5.8 → opengradient-0.5.9}/src/opengradient/__init__.py +7 -3
- {opengradient-0.5.8 → opengradient-0.5.9}/src/opengradient/cli.py +89 -4
- {opengradient-0.5.8 → opengradient-0.5.9}/src/opengradient/client.py +317 -33
- {opengradient-0.5.8 → opengradient-0.5.9}/src/opengradient/defaults.py +3 -1
- {opengradient-0.5.8 → opengradient-0.5.9}/src/opengradient/types.py +191 -1
- opengradient-0.5.9/src/opengradient/x402_auth.py +60 -0
- {opengradient-0.5.8 → opengradient-0.5.9/src/opengradient.egg-info}/PKG-INFO +2 -2
- {opengradient-0.5.8 → opengradient-0.5.9}/src/opengradient.egg-info/SOURCES.txt +1 -0
- {opengradient-0.5.8 → opengradient-0.5.9}/src/opengradient.egg-info/requires.txt +1 -1
- {opengradient-0.5.8 → opengradient-0.5.9}/LICENSE +0 -0
- {opengradient-0.5.8 → opengradient-0.5.9}/README.md +0 -0
- {opengradient-0.5.8 → opengradient-0.5.9}/setup.cfg +0 -0
- {opengradient-0.5.8 → opengradient-0.5.9}/src/opengradient/abi/InferencePrecompile.abi +0 -0
- {opengradient-0.5.8 → opengradient-0.5.9}/src/opengradient/abi/PriceHistoryInference.abi +0 -0
- {opengradient-0.5.8 → opengradient-0.5.9}/src/opengradient/abi/WorkflowScheduler.abi +0 -0
- {opengradient-0.5.8 → opengradient-0.5.9}/src/opengradient/abi/inference.abi +0 -0
- {opengradient-0.5.8 → opengradient-0.5.9}/src/opengradient/account.py +0 -0
- {opengradient-0.5.8 → opengradient-0.5.9}/src/opengradient/alphasense/__init__.py +0 -0
- {opengradient-0.5.8 → opengradient-0.5.9}/src/opengradient/alphasense/read_workflow_tool.py +0 -0
- {opengradient-0.5.8 → opengradient-0.5.9}/src/opengradient/alphasense/run_model_tool.py +0 -0
- {opengradient-0.5.8 → opengradient-0.5.9}/src/opengradient/alphasense/types.py +0 -0
- {opengradient-0.5.8 → opengradient-0.5.9}/src/opengradient/bin/PriceHistoryInference.bin +0 -0
- {opengradient-0.5.8 → opengradient-0.5.9}/src/opengradient/exceptions.py +0 -0
- {opengradient-0.5.8 → opengradient-0.5.9}/src/opengradient/llm/__init__.py +0 -0
- {opengradient-0.5.8 → opengradient-0.5.9}/src/opengradient/llm/og_langchain.py +0 -0
- {opengradient-0.5.8 → opengradient-0.5.9}/src/opengradient/llm/og_openai.py +0 -0
- {opengradient-0.5.8 → opengradient-0.5.9}/src/opengradient/proto/__init__.py +0 -0
- {opengradient-0.5.8 → opengradient-0.5.9}/src/opengradient/proto/infer.proto +0 -0
- {opengradient-0.5.8 → opengradient-0.5.9}/src/opengradient/proto/infer_pb2.py +0 -0
- {opengradient-0.5.8 → opengradient-0.5.9}/src/opengradient/proto/infer_pb2_grpc.py +0 -0
- {opengradient-0.5.8 → opengradient-0.5.9}/src/opengradient/utils.py +0 -0
- {opengradient-0.5.8 → opengradient-0.5.9}/src/opengradient/workflow_models/__init__.py +0 -0
- {opengradient-0.5.8 → opengradient-0.5.9}/src/opengradient/workflow_models/constants.py +0 -0
- {opengradient-0.5.8 → opengradient-0.5.9}/src/opengradient/workflow_models/types.py +0 -0
- {opengradient-0.5.8 → opengradient-0.5.9}/src/opengradient/workflow_models/utils.py +0 -0
- {opengradient-0.5.8 → opengradient-0.5.9}/src/opengradient/workflow_models/workflow_models.py +0 -0
- {opengradient-0.5.8 → opengradient-0.5.9}/src/opengradient.egg-info/dependency_links.txt +0 -0
- {opengradient-0.5.8 → opengradient-0.5.9}/src/opengradient.egg-info/entry_points.txt +0 -0
- {opengradient-0.5.8 → opengradient-0.5.9}/src/opengradient.egg-info/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: opengradient
|
|
3
|
-
Version: 0.5.
|
|
3
|
+
Version: 0.5.9
|
|
4
4
|
Summary: Python SDK for OpenGradient decentralized model management & inference services
|
|
5
5
|
Author-email: OpenGradient <kyle@vannalabs.ai>
|
|
6
6
|
License-Expression: MIT
|
|
@@ -23,7 +23,7 @@ Requires-Dist: requests>=2.32.3
|
|
|
23
23
|
Requires-Dist: langchain>=0.3.7
|
|
24
24
|
Requires-Dist: openai>=1.58.1
|
|
25
25
|
Requires-Dist: pydantic>=2.9.2
|
|
26
|
-
Requires-Dist: og-test-x402==0.0.
|
|
26
|
+
Requires-Dist: og-test-x402==0.0.9
|
|
27
27
|
Dynamic: license-file
|
|
28
28
|
|
|
29
29
|
# OpenGradient Python SDK
|
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "opengradient"
|
|
7
|
-
version = "0.5.
|
|
7
|
+
version = "0.5.9"
|
|
8
8
|
description = "Python SDK for OpenGradient decentralized model management & inference services"
|
|
9
9
|
authors = [{name = "OpenGradient", email = "kyle@vannalabs.ai"}]
|
|
10
10
|
readme = "README.md"
|
|
@@ -29,7 +29,7 @@ dependencies = [
|
|
|
29
29
|
"langchain>=0.3.7",
|
|
30
30
|
"openai>=1.58.1",
|
|
31
31
|
"pydantic>=2.9.2",
|
|
32
|
-
"og-test-x402==0.0.
|
|
32
|
+
"og-test-x402==0.0.9",
|
|
33
33
|
]
|
|
34
34
|
|
|
35
35
|
[project.scripts]
|
|
@@ -17,6 +17,7 @@ from .types import (
|
|
|
17
17
|
InferenceResult,
|
|
18
18
|
LlmInferenceMode,
|
|
19
19
|
TextGenerationOutput,
|
|
20
|
+
TextGenerationStream,
|
|
20
21
|
ModelOutput,
|
|
21
22
|
ModelRepository,
|
|
22
23
|
FileUploadResult,
|
|
@@ -225,7 +226,8 @@ def llm_chat(
|
|
|
225
226
|
tool_choice: Optional[str] = None,
|
|
226
227
|
max_retries: Optional[int] = None,
|
|
227
228
|
x402_settlement_mode: Optional[x402SettlementMode] = x402SettlementMode.SETTLE_BATCH,
|
|
228
|
-
|
|
229
|
+
stream: Optional[bool] = False,
|
|
230
|
+
) -> Union[TextGenerationOutput, TextGenerationStream]:
|
|
229
231
|
"""Have a chat conversation with an LLM.
|
|
230
232
|
|
|
231
233
|
Args:
|
|
@@ -239,9 +241,10 @@ def llm_chat(
|
|
|
239
241
|
tool_choice: Optional specific tool to use
|
|
240
242
|
max_retries: Maximum number of retries for failed transactions
|
|
241
243
|
x402_settlement_mode: Settlement modes for x402 payment protocol transactions (enum x402SettlementMode)
|
|
244
|
+
stream: Optional boolean to enable streaming
|
|
242
245
|
|
|
243
246
|
Returns:
|
|
244
|
-
TextGenerationOutput
|
|
247
|
+
TextGenerationOutput or TextGenerationStream
|
|
245
248
|
|
|
246
249
|
Raises:
|
|
247
250
|
RuntimeError: If SDK is not initialized
|
|
@@ -258,7 +261,8 @@ def llm_chat(
|
|
|
258
261
|
tools=tools,
|
|
259
262
|
tool_choice=tool_choice,
|
|
260
263
|
max_retries=max_retries,
|
|
261
|
-
x402_settlement_mode=x402_settlement_mode
|
|
264
|
+
x402_settlement_mode=x402_settlement_mode,
|
|
265
|
+
stream=stream,
|
|
262
266
|
)
|
|
263
267
|
|
|
264
268
|
|
|
@@ -6,6 +6,7 @@ import logging
|
|
|
6
6
|
import webbrowser
|
|
7
7
|
from pathlib import Path
|
|
8
8
|
from typing import Dict, List, Optional
|
|
9
|
+
import sys
|
|
9
10
|
|
|
10
11
|
import click
|
|
11
12
|
|
|
@@ -557,6 +558,7 @@ def print_llm_completion_result(model_cid, tx_hash, llm_output, is_local=True):
|
|
|
557
558
|
default="settle-batch",
|
|
558
559
|
help="Settlement mode for x402 payments: settle (hashes only), settle-batch (batched, default), settle-metadata (full data)",
|
|
559
560
|
)
|
|
561
|
+
@click.option("--stream", is_flag=True, default=False, help="Stream the output from the LLM")
|
|
560
562
|
@click.pass_context
|
|
561
563
|
def chat(
|
|
562
564
|
ctx,
|
|
@@ -572,6 +574,7 @@ def chat(
|
|
|
572
574
|
tool_choice: Optional[str],
|
|
573
575
|
x402_settlement_mode: Optional[str],
|
|
574
576
|
local: bool,
|
|
577
|
+
stream: bool,
|
|
575
578
|
):
|
|
576
579
|
"""
|
|
577
580
|
Run chat inference on an LLM model (local or external).
|
|
@@ -590,6 +593,9 @@ def chat(
|
|
|
590
593
|
|
|
591
594
|
# External Anthropic model
|
|
592
595
|
opengradient chat --model claude-haiku-4-5-20251001 --messages '[{"role":"user","content":"Write a poem"}]' --max-tokens 100
|
|
596
|
+
|
|
597
|
+
# Stream output
|
|
598
|
+
opengradient chat --model anthropic/claude-3.5-haiku --messages '[{"role":"user","content":"How are clouds formed?"}]' --max-tokens 250 --stream
|
|
593
599
|
"""
|
|
594
600
|
client: Client = ctx.obj["client"]
|
|
595
601
|
|
|
@@ -656,7 +662,7 @@ def chat(
|
|
|
656
662
|
if not tools and not tools_file:
|
|
657
663
|
parsed_tools = None
|
|
658
664
|
|
|
659
|
-
|
|
665
|
+
result = client.llm_chat(
|
|
660
666
|
model_cid=model_cid,
|
|
661
667
|
inference_mode=LlmInferenceModes[inference_mode],
|
|
662
668
|
messages=messages,
|
|
@@ -667,11 +673,16 @@ def chat(
|
|
|
667
673
|
tool_choice=tool_choice,
|
|
668
674
|
local_model=local,
|
|
669
675
|
x402_settlement_mode=x402_settlement_mode,
|
|
676
|
+
stream=stream,
|
|
670
677
|
)
|
|
671
678
|
|
|
672
|
-
|
|
673
|
-
|
|
674
|
-
|
|
679
|
+
# Handle response based on streaming flag
|
|
680
|
+
if stream:
|
|
681
|
+
print_streaming_chat_result(model_cid, result, is_local)
|
|
682
|
+
else:
|
|
683
|
+
print_llm_chat_result(
|
|
684
|
+
model_cid, result.transaction_hash, result.finish_reason, result.chat_output, is_local
|
|
685
|
+
)
|
|
675
686
|
|
|
676
687
|
except Exception as e:
|
|
677
688
|
click.echo(f"Error running LLM chat inference: {str(e)}")
|
|
@@ -706,6 +717,80 @@ def print_llm_chat_result(model_cid, tx_hash, finish_reason, chat_output, is_loc
|
|
|
706
717
|
click.echo()
|
|
707
718
|
|
|
708
719
|
|
|
720
|
+
def print_streaming_chat_result(model_cid, stream, is_local=True):
|
|
721
|
+
"""Handle streaming chat response with typed chunks - prints in real-time"""
|
|
722
|
+
click.secho("🌊 Streaming LLM Chat", fg="green", bold=True)
|
|
723
|
+
click.echo("──────────────────────────────────────")
|
|
724
|
+
click.echo("Model: ", nl=False)
|
|
725
|
+
click.secho(model_cid, fg="cyan", bold=True)
|
|
726
|
+
|
|
727
|
+
if is_local:
|
|
728
|
+
click.echo("Source: ", nl=False)
|
|
729
|
+
click.secho("OpenGradient TEE", fg="cyan", bold=True)
|
|
730
|
+
else:
|
|
731
|
+
click.echo("Source: ", nl=False)
|
|
732
|
+
click.secho("External Provider", fg="cyan", bold=True)
|
|
733
|
+
|
|
734
|
+
click.echo("──────────────────────────────────────")
|
|
735
|
+
click.secho("Response:", fg="yellow", bold=True)
|
|
736
|
+
click.echo()
|
|
737
|
+
|
|
738
|
+
try:
|
|
739
|
+
content_parts = []
|
|
740
|
+
chunk_count = 0
|
|
741
|
+
|
|
742
|
+
for chunk in stream:
|
|
743
|
+
chunk_count += 1
|
|
744
|
+
|
|
745
|
+
if chunk.choices[0].delta.content:
|
|
746
|
+
content = chunk.choices[0].delta.content
|
|
747
|
+
sys.stdout.write(content)
|
|
748
|
+
sys.stdout.flush()
|
|
749
|
+
content_parts.append(content)
|
|
750
|
+
|
|
751
|
+
# Handle tool calls
|
|
752
|
+
if chunk.choices[0].delta.tool_calls:
|
|
753
|
+
sys.stdout.write("\n")
|
|
754
|
+
sys.stdout.flush()
|
|
755
|
+
click.secho("Tool Calls:", fg="yellow", bold=True)
|
|
756
|
+
for tool_call in chunk.choices[0].delta.tool_calls:
|
|
757
|
+
click.echo(f" Function: {tool_call['function']['name']}")
|
|
758
|
+
click.echo(f" Arguments: {tool_call['function']['arguments']}")
|
|
759
|
+
|
|
760
|
+
# Print final info when stream completes
|
|
761
|
+
if chunk.is_final:
|
|
762
|
+
sys.stdout.write("\n\n")
|
|
763
|
+
sys.stdout.flush()
|
|
764
|
+
click.echo("──────────────────────────────────────")
|
|
765
|
+
|
|
766
|
+
if chunk.usage:
|
|
767
|
+
click.secho("Token Usage:", fg="cyan")
|
|
768
|
+
click.echo(f" Prompt tokens: {chunk.usage.prompt_tokens}")
|
|
769
|
+
click.echo(f" Completion tokens: {chunk.usage.completion_tokens}")
|
|
770
|
+
click.echo(f" Total tokens: {chunk.usage.total_tokens}")
|
|
771
|
+
click.echo()
|
|
772
|
+
|
|
773
|
+
if chunk.choices[0].finish_reason:
|
|
774
|
+
click.echo("Finish reason: ", nl=False)
|
|
775
|
+
click.secho(chunk.choices[0].finish_reason, fg="green")
|
|
776
|
+
|
|
777
|
+
click.echo("──────────────────────────────────────")
|
|
778
|
+
click.echo(f"Chunks received: {chunk_count}")
|
|
779
|
+
click.echo(f"Content length: {len(''.join(content_parts))} characters")
|
|
780
|
+
click.echo()
|
|
781
|
+
|
|
782
|
+
except KeyboardInterrupt:
|
|
783
|
+
sys.stdout.write("\n")
|
|
784
|
+
sys.stdout.flush()
|
|
785
|
+
click.secho("Stream interrupted by user", fg="yellow")
|
|
786
|
+
click.echo()
|
|
787
|
+
except Exception as e:
|
|
788
|
+
sys.stdout.write("\n")
|
|
789
|
+
sys.stdout.flush()
|
|
790
|
+
click.secho(f"Streaming error: {str(e)}", fg="red", bold=True)
|
|
791
|
+
click.echo()
|
|
792
|
+
|
|
793
|
+
|
|
709
794
|
@cli.command()
|
|
710
795
|
def create_account():
|
|
711
796
|
"""Create a new test account for OpenGradient inference and model management"""
|
|
@@ -9,6 +9,7 @@ from typing import Any, Dict, List, Optional, Union, Callable
|
|
|
9
9
|
import firebase
|
|
10
10
|
import numpy as np
|
|
11
11
|
import requests
|
|
12
|
+
import httpx
|
|
12
13
|
from eth_account.account import LocalAccount
|
|
13
14
|
from web3 import Web3
|
|
14
15
|
from web3.exceptions import ContractLogicError
|
|
@@ -17,7 +18,9 @@ import urllib.parse
|
|
|
17
18
|
import asyncio
|
|
18
19
|
from x402.clients.httpx import x402HttpxClient
|
|
19
20
|
from x402.clients.base import decode_x_payment_response, x402Client
|
|
21
|
+
from x402.clients.httpx import x402HttpxClient
|
|
20
22
|
|
|
23
|
+
from .x402_auth import X402Auth
|
|
21
24
|
from .exceptions import OpenGradientError
|
|
22
25
|
from .proto import infer_pb2, infer_pb2_grpc
|
|
23
26
|
from .types import (
|
|
@@ -29,10 +32,12 @@ from .types import (
|
|
|
29
32
|
LlmInferenceMode,
|
|
30
33
|
ModelOutput,
|
|
31
34
|
TextGenerationOutput,
|
|
35
|
+
TextGenerationStream,
|
|
32
36
|
SchedulerParams,
|
|
33
37
|
InferenceResult,
|
|
34
38
|
ModelRepository,
|
|
35
39
|
FileUploadResult,
|
|
40
|
+
StreamChunk,
|
|
36
41
|
)
|
|
37
42
|
from .defaults import (
|
|
38
43
|
DEFAULT_IMAGE_GEN_HOST,
|
|
@@ -40,6 +45,8 @@ from .defaults import (
|
|
|
40
45
|
DEFAULT_SCHEDULER_ADDRESS,
|
|
41
46
|
DEFAULT_LLM_SERVER_URL,
|
|
42
47
|
DEFAULT_OPENGRADIENT_LLM_SERVER_URL,
|
|
48
|
+
DEFAULT_OPENGRADIENT_LLM_STREAMING_SERVER_URL,
|
|
49
|
+
DEFAULT_NETWORK_FILTER,
|
|
43
50
|
)
|
|
44
51
|
from .utils import convert_array_to_model_output, convert_to_model_input, convert_to_model_output
|
|
45
52
|
|
|
@@ -66,6 +73,18 @@ PRECOMPILE_CONTRACT_ADDRESS = "0x00000000000000000000000000000000000000F4"
|
|
|
66
73
|
X402_PROCESSING_HASH_HEADER = "x-processing-hash"
|
|
67
74
|
X402_PLACEHOLDER_API_KEY = "0x1234567890abcdef1234567890abcdef1234567890abcdef1234567890abcdef"
|
|
68
75
|
|
|
76
|
+
TIMEOUT = httpx.Timeout(
|
|
77
|
+
timeout=90.0,
|
|
78
|
+
connect=15.0,
|
|
79
|
+
read=15.0,
|
|
80
|
+
write=30.0,
|
|
81
|
+
pool=10.0,
|
|
82
|
+
)
|
|
83
|
+
LIMITS = httpx.Limits(
|
|
84
|
+
max_keepalive_connections=100,
|
|
85
|
+
max_connections=500,
|
|
86
|
+
keepalive_expiry=60 * 20, # 20 minutes
|
|
87
|
+
)
|
|
69
88
|
|
|
70
89
|
class Client:
|
|
71
90
|
_inference_hub_contract_address: str
|
|
@@ -89,6 +108,7 @@ class Client:
|
|
|
89
108
|
password: Optional[str] = None,
|
|
90
109
|
llm_server_url: Optional[str] = DEFAULT_LLM_SERVER_URL,
|
|
91
110
|
og_llm_server_url: Optional[str] = DEFAULT_OPENGRADIENT_LLM_SERVER_URL,
|
|
111
|
+
og_llm_streaming_server_url: Optional[str] = DEFAULT_OPENGRADIENT_LLM_STREAMING_SERVER_URL,
|
|
92
112
|
openai_api_key: Optional[str] = None,
|
|
93
113
|
anthropic_api_key: Optional[str] = None,
|
|
94
114
|
google_api_key: Optional[str] = None,
|
|
@@ -123,6 +143,7 @@ class Client:
|
|
|
123
143
|
|
|
124
144
|
self._llm_server_url = llm_server_url
|
|
125
145
|
self._og_llm_server_url = og_llm_server_url
|
|
146
|
+
self._og_llm_streaming_server_url = og_llm_streaming_server_url
|
|
126
147
|
|
|
127
148
|
self._external_api_keys = {}
|
|
128
149
|
if openai_api_key or os.getenv("OPENAI_API_KEY"):
|
|
@@ -421,11 +442,11 @@ class Client:
|
|
|
421
442
|
|
|
422
443
|
return run_with_retry(execute_transaction, max_retries)
|
|
423
444
|
|
|
424
|
-
def _og_payment_selector(self, accepts, network_filter=
|
|
425
|
-
"""Custom payment selector for OpenGradient network
|
|
445
|
+
def _og_payment_selector(self, accepts, network_filter=DEFAULT_NETWORK_FILTER, scheme_filter=None, max_value=None):
|
|
446
|
+
"""Custom payment selector for OpenGradient network."""
|
|
426
447
|
return x402Client.default_payment_requirements_selector(
|
|
427
448
|
accepts,
|
|
428
|
-
network_filter=
|
|
449
|
+
network_filter=network_filter,
|
|
429
450
|
scheme_filter=scheme_filter,
|
|
430
451
|
max_value=max_value,
|
|
431
452
|
)
|
|
@@ -652,7 +673,8 @@ class Client:
|
|
|
652
673
|
max_retries: Optional[int] = None,
|
|
653
674
|
local_model: Optional[bool] = False,
|
|
654
675
|
x402_settlement_mode: Optional[x402SettlementMode] = x402SettlementMode.SETTLE_BATCH,
|
|
655
|
-
|
|
676
|
+
stream: bool = False,
|
|
677
|
+
) -> Union[TextGenerationOutput, TextGenerationStream]:
|
|
656
678
|
"""
|
|
657
679
|
Perform inference on an LLM model using chat.
|
|
658
680
|
|
|
@@ -672,13 +694,12 @@ class Client:
|
|
|
672
694
|
- SETTLE_BATCH: Aggregates multiple inferences into batch hashes (most cost-efficient).
|
|
673
695
|
- SETTLE_METADATA: Records full model info, complete input/output data, and all metadata.
|
|
674
696
|
Defaults to SETTLE_BATCH.
|
|
697
|
+
stream (bool, optional): Whether to stream the response. Default is False.
|
|
675
698
|
|
|
676
699
|
Returns:
|
|
677
|
-
TextGenerationOutput:
|
|
678
|
-
-
|
|
679
|
-
-
|
|
680
|
-
- finish_reason: Reason for completion (e.g., "stop", "tool_call")
|
|
681
|
-
- payment_hash: Payment hash for x402 transactions (when using x402 settlement)
|
|
700
|
+
Union[TextGenerationOutput, TextGenerationStream]:
|
|
701
|
+
- If stream=False: TextGenerationOutput with chat_output, transaction_hash, finish_reason, and payment_hash
|
|
702
|
+
- If stream=True: TextGenerationStream yielding StreamChunk objects with typed deltas (true streaming via threading)
|
|
682
703
|
|
|
683
704
|
Raises:
|
|
684
705
|
OpenGradientError: If the inference fails.
|
|
@@ -689,16 +710,33 @@ class Client:
|
|
|
689
710
|
if model_cid not in TEE_LLM:
|
|
690
711
|
return OpenGradientError("That model CID is not supported yet for TEE inference")
|
|
691
712
|
|
|
692
|
-
|
|
693
|
-
|
|
694
|
-
|
|
695
|
-
|
|
696
|
-
|
|
697
|
-
|
|
698
|
-
|
|
699
|
-
|
|
700
|
-
|
|
701
|
-
|
|
713
|
+
if stream:
|
|
714
|
+
# Use threading bridge for true sync streaming
|
|
715
|
+
return self._external_llm_chat_stream_sync(
|
|
716
|
+
model=model_cid.split("/")[1],
|
|
717
|
+
messages=messages,
|
|
718
|
+
max_tokens=max_tokens,
|
|
719
|
+
stop_sequence=stop_sequence,
|
|
720
|
+
temperature=temperature,
|
|
721
|
+
tools=tools,
|
|
722
|
+
tool_choice=tool_choice,
|
|
723
|
+
x402_settlement_mode=x402_settlement_mode,
|
|
724
|
+
use_tee=True,
|
|
725
|
+
)
|
|
726
|
+
else:
|
|
727
|
+
# Non-streaming
|
|
728
|
+
return self._external_llm_chat(
|
|
729
|
+
model=model_cid.split("/")[1],
|
|
730
|
+
messages=messages,
|
|
731
|
+
max_tokens=max_tokens,
|
|
732
|
+
stop_sequence=stop_sequence,
|
|
733
|
+
temperature=temperature,
|
|
734
|
+
tools=tools,
|
|
735
|
+
tool_choice=tool_choice,
|
|
736
|
+
x402_settlement_mode=x402_settlement_mode,
|
|
737
|
+
stream=False,
|
|
738
|
+
use_tee=True,
|
|
739
|
+
)
|
|
702
740
|
|
|
703
741
|
# Original local model logic
|
|
704
742
|
def execute_transaction():
|
|
@@ -778,7 +816,9 @@ class Client:
|
|
|
778
816
|
tools: Optional[List[Dict]] = None,
|
|
779
817
|
tool_choice: Optional[str] = None,
|
|
780
818
|
x402_settlement_mode: x402SettlementMode = x402SettlementMode.SETTLE_BATCH,
|
|
781
|
-
|
|
819
|
+
stream: bool = False,
|
|
820
|
+
use_tee: bool = False,
|
|
821
|
+
) -> Union[TextGenerationOutput, TextGenerationStream]:
|
|
782
822
|
"""
|
|
783
823
|
Route chat request to external LLM server with x402 payments.
|
|
784
824
|
|
|
@@ -790,18 +830,24 @@ class Client:
|
|
|
790
830
|
temperature: Sampling temperature
|
|
791
831
|
tools: Function calling tools
|
|
792
832
|
tool_choice: Tool selection strategy
|
|
833
|
+
stream: Whether to stream the response
|
|
834
|
+
use_tee: Whether to use TEE
|
|
793
835
|
|
|
794
836
|
Returns:
|
|
795
|
-
TextGenerationOutput
|
|
837
|
+
Union[TextGenerationOutput, TextGenerationStream]: Chat completion or TextGenerationStream
|
|
796
838
|
|
|
797
839
|
Raises:
|
|
798
840
|
OpenGradientError: If request fails
|
|
799
841
|
"""
|
|
800
|
-
api_key = self._get_api_key_for_model(model)
|
|
842
|
+
api_key = None if use_tee else self._get_api_key_for_model(model)
|
|
801
843
|
|
|
802
844
|
if api_key:
|
|
803
|
-
logging.debug("External LLM
|
|
804
|
-
|
|
845
|
+
logging.debug("External LLM chat using API key")
|
|
846
|
+
|
|
847
|
+
if stream:
|
|
848
|
+
url = f"{self._llm_server_url}/v1/chat/completions/stream"
|
|
849
|
+
else:
|
|
850
|
+
url = f"{self._llm_server_url}/v1/chat/completions"
|
|
805
851
|
|
|
806
852
|
headers = {"Content-Type": "application/json", "Authorization": f"Bearer {api_key}"}
|
|
807
853
|
|
|
@@ -820,14 +866,23 @@ class Client:
|
|
|
820
866
|
payload["tool_choice"] = tool_choice or "auto"
|
|
821
867
|
|
|
822
868
|
try:
|
|
823
|
-
|
|
824
|
-
|
|
869
|
+
if stream:
|
|
870
|
+
# Return streaming response wrapped in TextGenerationStream
|
|
871
|
+
response = requests.post(url, json=payload, headers=headers, timeout=60, stream=True)
|
|
872
|
+
response.raise_for_status()
|
|
873
|
+
return TextGenerationStream(_iterator=response.iter_lines(decode_unicode=True), _is_async=False)
|
|
874
|
+
else:
|
|
875
|
+
# Non-streaming response
|
|
876
|
+
response = requests.post(url, json=payload, headers=headers, timeout=60)
|
|
877
|
+
response.raise_for_status()
|
|
825
878
|
|
|
826
|
-
|
|
879
|
+
result = response.json()
|
|
827
880
|
|
|
828
|
-
|
|
829
|
-
|
|
830
|
-
|
|
881
|
+
return TextGenerationOutput(
|
|
882
|
+
transaction_hash="external",
|
|
883
|
+
finish_reason=result.get("finish_reason"),
|
|
884
|
+
chat_output=result.get("message")
|
|
885
|
+
)
|
|
831
886
|
|
|
832
887
|
except requests.RequestException as e:
|
|
833
888
|
error_msg = f"External LLM chat failed: {str(e)}"
|
|
@@ -840,6 +895,7 @@ class Client:
|
|
|
840
895
|
logging.error(error_msg)
|
|
841
896
|
raise OpenGradientError(error_msg)
|
|
842
897
|
|
|
898
|
+
# x402 payment path - non-streaming only here
|
|
843
899
|
async def make_request():
|
|
844
900
|
async with x402HttpxClient(
|
|
845
901
|
account=self._wallet_account,
|
|
@@ -867,13 +923,13 @@ class Client:
|
|
|
867
923
|
payload["tool_choice"] = tool_choice or "auto"
|
|
868
924
|
|
|
869
925
|
try:
|
|
870
|
-
|
|
926
|
+
# Non-streaming with x402
|
|
927
|
+
endpoint = "/v1/chat/completions"
|
|
928
|
+
response = await client.post(endpoint, json=payload, headers=headers, timeout=60)
|
|
871
929
|
|
|
872
930
|
# Read the response content
|
|
873
931
|
content = await response.aread()
|
|
874
932
|
result = json.loads(content.decode())
|
|
875
|
-
# print(f"Response: {response}")
|
|
876
|
-
# print(f"Response Headers: {response.headers}")
|
|
877
933
|
|
|
878
934
|
payment_hash = ""
|
|
879
935
|
if X402_PROCESSING_HASH_HEADER in response.headers:
|
|
@@ -909,6 +965,234 @@ class Client:
|
|
|
909
965
|
logging.error(error_msg)
|
|
910
966
|
raise OpenGradientError(error_msg)
|
|
911
967
|
|
|
968
|
+
def _external_llm_chat_stream_sync(
|
|
969
|
+
self,
|
|
970
|
+
model: str,
|
|
971
|
+
messages: List[Dict],
|
|
972
|
+
max_tokens: int = 100,
|
|
973
|
+
stop_sequence: Optional[List[str]] = None,
|
|
974
|
+
temperature: float = 0.0,
|
|
975
|
+
tools: Optional[List[Dict]] = None,
|
|
976
|
+
tool_choice: Optional[str] = None,
|
|
977
|
+
x402_settlement_mode: x402SettlementMode = x402SettlementMode.SETTLE_BATCH,
|
|
978
|
+
use_tee: bool = False,
|
|
979
|
+
):
|
|
980
|
+
"""
|
|
981
|
+
Sync streaming using threading bridge - TRUE real-time streaming.
|
|
982
|
+
|
|
983
|
+
Yields StreamChunk objects as they arrive from the background thread.
|
|
984
|
+
NO buffering, NO conversion, just direct pass-through.
|
|
985
|
+
"""
|
|
986
|
+
import threading
|
|
987
|
+
from queue import Queue
|
|
988
|
+
|
|
989
|
+
queue = Queue()
|
|
990
|
+
exception_holder = []
|
|
991
|
+
|
|
992
|
+
def _run_async():
|
|
993
|
+
"""Run async streaming in background thread"""
|
|
994
|
+
loop = None
|
|
995
|
+
try:
|
|
996
|
+
loop = asyncio.new_event_loop()
|
|
997
|
+
asyncio.set_event_loop(loop)
|
|
998
|
+
|
|
999
|
+
async def _stream():
|
|
1000
|
+
try:
|
|
1001
|
+
async for chunk in self._external_llm_chat_stream_async(
|
|
1002
|
+
model=model,
|
|
1003
|
+
messages=messages,
|
|
1004
|
+
max_tokens=max_tokens,
|
|
1005
|
+
stop_sequence=stop_sequence,
|
|
1006
|
+
temperature=temperature,
|
|
1007
|
+
tools=tools,
|
|
1008
|
+
tool_choice=tool_choice,
|
|
1009
|
+
x402_settlement_mode=x402_settlement_mode,
|
|
1010
|
+
use_tee=use_tee,
|
|
1011
|
+
):
|
|
1012
|
+
queue.put(chunk) # Put chunk immediately
|
|
1013
|
+
except Exception as e:
|
|
1014
|
+
exception_holder.append(e)
|
|
1015
|
+
finally:
|
|
1016
|
+
queue.put(None) # Signal completion
|
|
1017
|
+
|
|
1018
|
+
loop.run_until_complete(_stream())
|
|
1019
|
+
except Exception as e:
|
|
1020
|
+
exception_holder.append(e)
|
|
1021
|
+
queue.put(None)
|
|
1022
|
+
finally:
|
|
1023
|
+
if loop:
|
|
1024
|
+
try:
|
|
1025
|
+
pending = asyncio.all_tasks(loop)
|
|
1026
|
+
for task in pending:
|
|
1027
|
+
task.cancel()
|
|
1028
|
+
loop.run_until_complete(asyncio.gather(*pending, return_exceptions=True))
|
|
1029
|
+
finally:
|
|
1030
|
+
loop.close()
|
|
1031
|
+
|
|
1032
|
+
# Start background thread
|
|
1033
|
+
thread = threading.Thread(target=_run_async, daemon=True)
|
|
1034
|
+
thread.start()
|
|
1035
|
+
|
|
1036
|
+
# Yield chunks DIRECTLY as they arrive - NO buffering
|
|
1037
|
+
try:
|
|
1038
|
+
while True:
|
|
1039
|
+
chunk = queue.get() # Blocks until chunk available
|
|
1040
|
+
if chunk is None:
|
|
1041
|
+
break
|
|
1042
|
+
yield chunk # Yield immediately!
|
|
1043
|
+
|
|
1044
|
+
thread.join(timeout=5)
|
|
1045
|
+
|
|
1046
|
+
if exception_holder:
|
|
1047
|
+
raise exception_holder[0]
|
|
1048
|
+
except Exception as e:
|
|
1049
|
+
thread.join(timeout=1)
|
|
1050
|
+
raise
|
|
1051
|
+
|
|
1052
|
+
|
|
1053
|
+
async def _external_llm_chat_stream_async(
|
|
1054
|
+
self,
|
|
1055
|
+
model: str,
|
|
1056
|
+
messages: List[Dict],
|
|
1057
|
+
max_tokens: int = 100,
|
|
1058
|
+
stop_sequence: Optional[List[str]] = None,
|
|
1059
|
+
temperature: float = 0.0,
|
|
1060
|
+
tools: Optional[List[Dict]] = None,
|
|
1061
|
+
tool_choice: Optional[str] = None,
|
|
1062
|
+
x402_settlement_mode: x402SettlementMode = x402SettlementMode.SETTLE_BATCH,
|
|
1063
|
+
use_tee: bool = False,
|
|
1064
|
+
):
|
|
1065
|
+
"""
|
|
1066
|
+
Internal async streaming implementation.
|
|
1067
|
+
|
|
1068
|
+
Yields StreamChunk objects as they arrive from the server.
|
|
1069
|
+
"""
|
|
1070
|
+
api_key = None if use_tee else self._get_api_key_for_model(model)
|
|
1071
|
+
|
|
1072
|
+
if api_key:
|
|
1073
|
+
# API key path - streaming to local llm-server
|
|
1074
|
+
url = f"{self._og_llm_streaming_server_url}/v1/chat/completions"
|
|
1075
|
+
headers = {
|
|
1076
|
+
"Content-Type": "application/json",
|
|
1077
|
+
"Authorization": f"Bearer {api_key}"
|
|
1078
|
+
}
|
|
1079
|
+
|
|
1080
|
+
payload = {
|
|
1081
|
+
"model": model,
|
|
1082
|
+
"messages": messages,
|
|
1083
|
+
"max_tokens": max_tokens,
|
|
1084
|
+
"temperature": temperature,
|
|
1085
|
+
"stream": True,
|
|
1086
|
+
}
|
|
1087
|
+
|
|
1088
|
+
if stop_sequence:
|
|
1089
|
+
payload["stop"] = stop_sequence
|
|
1090
|
+
if tools:
|
|
1091
|
+
payload["tools"] = tools
|
|
1092
|
+
payload["tool_choice"] = tool_choice or "auto"
|
|
1093
|
+
|
|
1094
|
+
async with httpx.AsyncClient(verify=False, timeout=None) as client:
|
|
1095
|
+
async with client.stream("POST", url, json=payload, headers=headers) as response:
|
|
1096
|
+
buffer = b""
|
|
1097
|
+
async for chunk in response.aiter_raw():
|
|
1098
|
+
if not chunk:
|
|
1099
|
+
continue
|
|
1100
|
+
|
|
1101
|
+
buffer += chunk
|
|
1102
|
+
|
|
1103
|
+
# Process all complete lines in buffer
|
|
1104
|
+
while b"\n" in buffer:
|
|
1105
|
+
line_bytes, buffer = buffer.split(b"\n", 1)
|
|
1106
|
+
|
|
1107
|
+
if not line_bytes.strip():
|
|
1108
|
+
continue
|
|
1109
|
+
|
|
1110
|
+
try:
|
|
1111
|
+
line = line_bytes.decode('utf-8').strip()
|
|
1112
|
+
except UnicodeDecodeError:
|
|
1113
|
+
continue
|
|
1114
|
+
|
|
1115
|
+
if not line.startswith("data: "):
|
|
1116
|
+
continue
|
|
1117
|
+
|
|
1118
|
+
data_str = line[6:] # Strip "data: " prefix
|
|
1119
|
+
if data_str.strip() == "[DONE]":
|
|
1120
|
+
return
|
|
1121
|
+
|
|
1122
|
+
try:
|
|
1123
|
+
data = json.loads(data_str)
|
|
1124
|
+
yield StreamChunk.from_sse_data(data)
|
|
1125
|
+
except json.JSONDecodeError:
|
|
1126
|
+
continue
|
|
1127
|
+
else:
|
|
1128
|
+
# x402 payment path
|
|
1129
|
+
async with httpx.AsyncClient(
|
|
1130
|
+
base_url=self._og_llm_streaming_server_url,
|
|
1131
|
+
headers={"Authorization": f"Bearer {X402_PLACEHOLDER_API_KEY}"},
|
|
1132
|
+
timeout=TIMEOUT,
|
|
1133
|
+
limits=LIMITS,
|
|
1134
|
+
http2=False,
|
|
1135
|
+
follow_redirects=False,
|
|
1136
|
+
auth=X402Auth(account=self._wallet_account), # type: ignore
|
|
1137
|
+
) as client:
|
|
1138
|
+
headers = {
|
|
1139
|
+
"Content-Type": "application/json",
|
|
1140
|
+
"Authorization": f"Bearer {X402_PLACEHOLDER_API_KEY}",
|
|
1141
|
+
"X-SETTLEMENT-TYPE": x402_settlement_mode,
|
|
1142
|
+
}
|
|
1143
|
+
|
|
1144
|
+
payload = {
|
|
1145
|
+
"model": model,
|
|
1146
|
+
"messages": messages,
|
|
1147
|
+
"max_tokens": max_tokens,
|
|
1148
|
+
"temperature": temperature,
|
|
1149
|
+
"stream": True,
|
|
1150
|
+
}
|
|
1151
|
+
|
|
1152
|
+
if stop_sequence:
|
|
1153
|
+
payload["stop"] = stop_sequence
|
|
1154
|
+
if tools:
|
|
1155
|
+
payload["tools"] = tools
|
|
1156
|
+
payload["tool_choice"] = tool_choice or "auto"
|
|
1157
|
+
|
|
1158
|
+
async with client.stream(
|
|
1159
|
+
"POST",
|
|
1160
|
+
"/v1/chat/completions",
|
|
1161
|
+
json=payload,
|
|
1162
|
+
headers=headers,
|
|
1163
|
+
) as response:
|
|
1164
|
+
buffer = b""
|
|
1165
|
+
async for chunk in response.aiter_raw():
|
|
1166
|
+
if not chunk:
|
|
1167
|
+
continue
|
|
1168
|
+
|
|
1169
|
+
buffer += chunk
|
|
1170
|
+
|
|
1171
|
+
# Process complete lines from buffer
|
|
1172
|
+
while b"\n" in buffer:
|
|
1173
|
+
line_bytes, buffer = buffer.split(b"\n", 1)
|
|
1174
|
+
|
|
1175
|
+
if not line_bytes.strip():
|
|
1176
|
+
continue
|
|
1177
|
+
|
|
1178
|
+
try:
|
|
1179
|
+
line = line_bytes.decode('utf-8').strip()
|
|
1180
|
+
except UnicodeDecodeError:
|
|
1181
|
+
continue
|
|
1182
|
+
|
|
1183
|
+
if not line.startswith("data: "):
|
|
1184
|
+
continue
|
|
1185
|
+
|
|
1186
|
+
data_str = line[6:]
|
|
1187
|
+
if data_str.strip() == "[DONE]":
|
|
1188
|
+
return
|
|
1189
|
+
|
|
1190
|
+
try:
|
|
1191
|
+
data = json.loads(data_str)
|
|
1192
|
+
yield StreamChunk.from_sse_data(data)
|
|
1193
|
+
except json.JSONDecodeError:
|
|
1194
|
+
continue
|
|
1195
|
+
|
|
912
1196
|
def list_files(self, model_name: str, version: str) -> List[Dict]:
|
|
913
1197
|
"""
|
|
914
1198
|
List files for a specific version of a model.
|
|
@@ -9,4 +9,6 @@ DEFAULT_BLOCKCHAIN_EXPLORER = "https://explorer.opengradient.ai/tx/"
|
|
|
9
9
|
DEFAULT_IMAGE_GEN_HOST = "18.217.25.69"
|
|
10
10
|
DEFAULT_IMAGE_GEN_PORT = 5125
|
|
11
11
|
DEFAULT_LLM_SERVER_URL = "http://35.225.197.84:8000"
|
|
12
|
-
DEFAULT_OPENGRADIENT_LLM_SERVER_URL = "https://
|
|
12
|
+
DEFAULT_OPENGRADIENT_LLM_SERVER_URL = "https://llmogevm.opengradient.ai"
|
|
13
|
+
DEFAULT_OPENGRADIENT_LLM_STREAMING_SERVER_URL = "https://llmogevm.opengradient.ai"
|
|
14
|
+
DEFAULT_NETWORK_FILTER = "og-evm"
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import time
|
|
2
2
|
from dataclasses import dataclass
|
|
3
3
|
from enum import Enum, IntEnum, StrEnum
|
|
4
|
-
from typing import Dict, List, Optional, Tuple, Union, DefaultDict
|
|
4
|
+
from typing import Dict, List, Optional, Tuple, Union, DefaultDict, Iterator, AsyncIterator
|
|
5
5
|
import numpy as np
|
|
6
6
|
|
|
7
7
|
|
|
@@ -165,6 +165,196 @@ class InferenceResult:
|
|
|
165
165
|
model_output: Dict[str, np.ndarray]
|
|
166
166
|
|
|
167
167
|
|
|
168
|
+
@dataclass
|
|
169
|
+
class StreamDelta:
|
|
170
|
+
"""
|
|
171
|
+
Represents a delta (incremental change) in a streaming response.
|
|
172
|
+
|
|
173
|
+
Attributes:
|
|
174
|
+
content: Incremental text content (if any)
|
|
175
|
+
role: Message role (appears in first chunk)
|
|
176
|
+
tool_calls: Tool call information (if function calling is used)
|
|
177
|
+
"""
|
|
178
|
+
content: Optional[str] = None
|
|
179
|
+
role: Optional[str] = None
|
|
180
|
+
tool_calls: Optional[List[Dict]] = None
|
|
181
|
+
|
|
182
|
+
|
|
183
|
+
@dataclass
|
|
184
|
+
class StreamChoice:
|
|
185
|
+
"""
|
|
186
|
+
Represents a choice in a streaming response.
|
|
187
|
+
|
|
188
|
+
Attributes:
|
|
189
|
+
delta: The incremental changes in this chunk
|
|
190
|
+
index: Choice index (usually 0)
|
|
191
|
+
finish_reason: Reason for completion (appears in final chunk)
|
|
192
|
+
"""
|
|
193
|
+
delta: StreamDelta
|
|
194
|
+
index: int = 0
|
|
195
|
+
finish_reason: Optional[str] = None
|
|
196
|
+
|
|
197
|
+
|
|
198
|
+
@dataclass
|
|
199
|
+
class StreamUsage:
|
|
200
|
+
"""
|
|
201
|
+
Token usage information for a streaming response.
|
|
202
|
+
|
|
203
|
+
Attributes:
|
|
204
|
+
prompt_tokens: Number of tokens in the prompt
|
|
205
|
+
completion_tokens: Number of tokens in the completion
|
|
206
|
+
total_tokens: Total tokens used
|
|
207
|
+
"""
|
|
208
|
+
prompt_tokens: int
|
|
209
|
+
completion_tokens: int
|
|
210
|
+
total_tokens: int
|
|
211
|
+
|
|
212
|
+
|
|
213
|
+
@dataclass
|
|
214
|
+
class StreamChunk:
|
|
215
|
+
"""
|
|
216
|
+
Represents a single chunk in a streaming LLM response.
|
|
217
|
+
|
|
218
|
+
This follows the OpenAI streaming format but is provider-agnostic.
|
|
219
|
+
Each chunk contains incremental data, with the final chunk including
|
|
220
|
+
usage information.
|
|
221
|
+
|
|
222
|
+
Attributes:
|
|
223
|
+
choices: List of streaming choices (usually contains one choice)
|
|
224
|
+
model: Model identifier
|
|
225
|
+
usage: Token usage information (only in final chunk)
|
|
226
|
+
is_final: Whether this is the final chunk (before [DONE])
|
|
227
|
+
"""
|
|
228
|
+
choices: List[StreamChoice]
|
|
229
|
+
model: str
|
|
230
|
+
usage: Optional[StreamUsage] = None
|
|
231
|
+
is_final: bool = False
|
|
232
|
+
|
|
233
|
+
@classmethod
|
|
234
|
+
def from_sse_data(cls, data: Dict) -> "StreamChunk":
|
|
235
|
+
"""
|
|
236
|
+
Parse a StreamChunk from SSE data dictionary.
|
|
237
|
+
|
|
238
|
+
Args:
|
|
239
|
+
data: Dictionary parsed from SSE data line
|
|
240
|
+
|
|
241
|
+
Returns:
|
|
242
|
+
StreamChunk instance
|
|
243
|
+
"""
|
|
244
|
+
choices = []
|
|
245
|
+
for choice_data in data.get("choices", []):
|
|
246
|
+
delta_data = choice_data.get("delta", {})
|
|
247
|
+
delta = StreamDelta(
|
|
248
|
+
content=delta_data.get("content"),
|
|
249
|
+
role=delta_data.get("role"),
|
|
250
|
+
tool_calls=delta_data.get("tool_calls")
|
|
251
|
+
)
|
|
252
|
+
choice = StreamChoice(
|
|
253
|
+
delta=delta,
|
|
254
|
+
index=choice_data.get("index", 0),
|
|
255
|
+
finish_reason=choice_data.get("finish_reason")
|
|
256
|
+
)
|
|
257
|
+
choices.append(choice)
|
|
258
|
+
|
|
259
|
+
usage = None
|
|
260
|
+
if "usage" in data:
|
|
261
|
+
usage_data = data["usage"]
|
|
262
|
+
usage = StreamUsage(
|
|
263
|
+
prompt_tokens=usage_data.get("prompt_tokens", 0),
|
|
264
|
+
completion_tokens=usage_data.get("completion_tokens", 0),
|
|
265
|
+
total_tokens=usage_data.get("total_tokens", 0)
|
|
266
|
+
)
|
|
267
|
+
|
|
268
|
+
is_final = any(c.finish_reason is not None for c in choices) or usage is not None
|
|
269
|
+
|
|
270
|
+
return cls(
|
|
271
|
+
choices=choices,
|
|
272
|
+
model=data.get("model", "unknown"),
|
|
273
|
+
usage=usage,
|
|
274
|
+
is_final=is_final
|
|
275
|
+
)
|
|
276
|
+
|
|
277
|
+
|
|
278
|
+
@dataclass
|
|
279
|
+
class TextGenerationStream:
|
|
280
|
+
"""
|
|
281
|
+
Iterator wrapper for streaming text generation responses.
|
|
282
|
+
|
|
283
|
+
Provides a clean interface for iterating over stream chunks with
|
|
284
|
+
automatic parsing of SSE format.
|
|
285
|
+
|
|
286
|
+
Usage:
|
|
287
|
+
stream = client.llm_chat(..., stream=True)
|
|
288
|
+
for chunk in stream:
|
|
289
|
+
if chunk.choices[0].delta.content:
|
|
290
|
+
print(chunk.choices[0].delta.content, end="")
|
|
291
|
+
"""
|
|
292
|
+
_iterator: Union[Iterator[str], AsyncIterator[str]]
|
|
293
|
+
_is_async: bool = False
|
|
294
|
+
|
|
295
|
+
def __iter__(self):
|
|
296
|
+
"""Iterate over stream chunks."""
|
|
297
|
+
return self
|
|
298
|
+
|
|
299
|
+
def __next__(self) -> StreamChunk:
|
|
300
|
+
"""Get next stream chunk."""
|
|
301
|
+
import json
|
|
302
|
+
|
|
303
|
+
while True:
|
|
304
|
+
try:
|
|
305
|
+
line = next(self._iterator)
|
|
306
|
+
except StopIteration:
|
|
307
|
+
raise
|
|
308
|
+
|
|
309
|
+
if not line or not line.strip():
|
|
310
|
+
continue
|
|
311
|
+
|
|
312
|
+
if not line.startswith("data: "):
|
|
313
|
+
continue
|
|
314
|
+
|
|
315
|
+
data_str = line[6:] # Remove "data: " prefix
|
|
316
|
+
|
|
317
|
+
if data_str.strip() == "[DONE]":
|
|
318
|
+
raise StopIteration
|
|
319
|
+
|
|
320
|
+
try:
|
|
321
|
+
data = json.loads(data_str)
|
|
322
|
+
return StreamChunk.from_sse_data(data)
|
|
323
|
+
except json.JSONDecodeError:
|
|
324
|
+
# Skip malformed chunks
|
|
325
|
+
continue
|
|
326
|
+
|
|
327
|
+
async def __anext__(self) -> StreamChunk:
|
|
328
|
+
"""Get next stream chunk (async version)."""
|
|
329
|
+
import json
|
|
330
|
+
|
|
331
|
+
if not self._is_async:
|
|
332
|
+
raise TypeError("Use __next__ for sync iterators")
|
|
333
|
+
|
|
334
|
+
while True:
|
|
335
|
+
try:
|
|
336
|
+
line = await self._iterator.__anext__()
|
|
337
|
+
except StopAsyncIteration:
|
|
338
|
+
raise
|
|
339
|
+
|
|
340
|
+
if not line or not line.strip():
|
|
341
|
+
continue
|
|
342
|
+
|
|
343
|
+
if not line.startswith("data: "):
|
|
344
|
+
continue
|
|
345
|
+
|
|
346
|
+
data_str = line[6:]
|
|
347
|
+
|
|
348
|
+
if data_str.strip() == "[DONE]":
|
|
349
|
+
raise StopAsyncIteration
|
|
350
|
+
|
|
351
|
+
try:
|
|
352
|
+
data = json.loads(data_str)
|
|
353
|
+
return StreamChunk.from_sse_data(data)
|
|
354
|
+
except json.JSONDecodeError:
|
|
355
|
+
continue
|
|
356
|
+
|
|
357
|
+
|
|
168
358
|
@dataclass
|
|
169
359
|
class TextGenerationOutput:
|
|
170
360
|
"""
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
import httpx
|
|
2
|
+
import typing
|
|
3
|
+
import logging
|
|
4
|
+
|
|
5
|
+
from x402.clients.base import x402Client
|
|
6
|
+
from x402.types import x402PaymentRequiredResponse, PaymentRequirements
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class X402Auth(httpx.Auth):
|
|
10
|
+
"""Auth class for handling x402 payment requirements."""
|
|
11
|
+
|
|
12
|
+
def __init__(
|
|
13
|
+
self,
|
|
14
|
+
account: typing.Any,
|
|
15
|
+
max_value: typing.Optional[int] = None,
|
|
16
|
+
payment_requirements_selector: typing.Optional[
|
|
17
|
+
typing.Callable[
|
|
18
|
+
[
|
|
19
|
+
list[PaymentRequirements],
|
|
20
|
+
typing.Optional[str],
|
|
21
|
+
typing.Optional[str],
|
|
22
|
+
typing.Optional[int],
|
|
23
|
+
],
|
|
24
|
+
PaymentRequirements,
|
|
25
|
+
]
|
|
26
|
+
] = None,
|
|
27
|
+
):
|
|
28
|
+
self.x402_client = x402Client(
|
|
29
|
+
account,
|
|
30
|
+
max_value=max_value,
|
|
31
|
+
payment_requirements_selector=payment_requirements_selector, # type: ignore
|
|
32
|
+
)
|
|
33
|
+
|
|
34
|
+
async def async_auth_flow(
|
|
35
|
+
self, request: httpx.Request
|
|
36
|
+
) -> typing.AsyncGenerator[httpx.Request, httpx.Response]:
|
|
37
|
+
response = yield request
|
|
38
|
+
|
|
39
|
+
if response.status_code == 402:
|
|
40
|
+
try:
|
|
41
|
+
await response.aread()
|
|
42
|
+
data = response.json()
|
|
43
|
+
|
|
44
|
+
payment_response = x402PaymentRequiredResponse(**data)
|
|
45
|
+
|
|
46
|
+
selected_requirements = self.x402_client.select_payment_requirements(
|
|
47
|
+
payment_response.accepts
|
|
48
|
+
)
|
|
49
|
+
|
|
50
|
+
payment_header = self.x402_client.create_payment_header(
|
|
51
|
+
selected_requirements, payment_response.x402_version
|
|
52
|
+
)
|
|
53
|
+
|
|
54
|
+
request.headers["X-Payment"] = payment_header
|
|
55
|
+
request.headers["Access-Control-Expose-Headers"] = "X-Payment-Response"
|
|
56
|
+
yield request
|
|
57
|
+
|
|
58
|
+
except Exception as e:
|
|
59
|
+
logging.error(f"X402Auth: Error handling payment: {e}")
|
|
60
|
+
return
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: opengradient
|
|
3
|
-
Version: 0.5.
|
|
3
|
+
Version: 0.5.9
|
|
4
4
|
Summary: Python SDK for OpenGradient decentralized model management & inference services
|
|
5
5
|
Author-email: OpenGradient <kyle@vannalabs.ai>
|
|
6
6
|
License-Expression: MIT
|
|
@@ -23,7 +23,7 @@ Requires-Dist: requests>=2.32.3
|
|
|
23
23
|
Requires-Dist: langchain>=0.3.7
|
|
24
24
|
Requires-Dist: openai>=1.58.1
|
|
25
25
|
Requires-Dist: pydantic>=2.9.2
|
|
26
|
-
Requires-Dist: og-test-x402==0.0.
|
|
26
|
+
Requires-Dist: og-test-x402==0.0.9
|
|
27
27
|
Dynamic: license-file
|
|
28
28
|
|
|
29
29
|
# OpenGradient Python SDK
|
|
@@ -9,6 +9,7 @@ src/opengradient/defaults.py
|
|
|
9
9
|
src/opengradient/exceptions.py
|
|
10
10
|
src/opengradient/types.py
|
|
11
11
|
src/opengradient/utils.py
|
|
12
|
+
src/opengradient/x402_auth.py
|
|
12
13
|
src/opengradient.egg-info/PKG-INFO
|
|
13
14
|
src/opengradient.egg-info/SOURCES.txt
|
|
14
15
|
src/opengradient.egg-info/dependency_links.txt
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{opengradient-0.5.8 → opengradient-0.5.9}/src/opengradient/workflow_models/workflow_models.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|