opengradient 0.5.7__tar.gz → 0.5.9__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {opengradient-0.5.7/src/opengradient.egg-info → opengradient-0.5.9}/PKG-INFO +6 -3
- {opengradient-0.5.7 → opengradient-0.5.9}/README.md +4 -0
- {opengradient-0.5.7 → opengradient-0.5.9}/pyproject.toml +2 -3
- {opengradient-0.5.7 → opengradient-0.5.9}/src/opengradient/__init__.py +34 -6
- {opengradient-0.5.7 → opengradient-0.5.9}/src/opengradient/cli.py +155 -55
- {opengradient-0.5.7 → opengradient-0.5.9}/src/opengradient/client.py +429 -146
- {opengradient-0.5.7 → opengradient-0.5.9}/src/opengradient/defaults.py +3 -1
- {opengradient-0.5.7 → opengradient-0.5.9}/src/opengradient/llm/og_langchain.py +6 -1
- {opengradient-0.5.7 → opengradient-0.5.9}/src/opengradient/types.py +229 -11
- opengradient-0.5.9/src/opengradient/x402_auth.py +60 -0
- {opengradient-0.5.7 → opengradient-0.5.9/src/opengradient.egg-info}/PKG-INFO +6 -3
- {opengradient-0.5.7 → opengradient-0.5.9}/src/opengradient.egg-info/SOURCES.txt +1 -0
- {opengradient-0.5.7 → opengradient-0.5.9}/src/opengradient.egg-info/requires.txt +1 -2
- {opengradient-0.5.7 → opengradient-0.5.9}/LICENSE +0 -0
- {opengradient-0.5.7 → opengradient-0.5.9}/setup.cfg +0 -0
- {opengradient-0.5.7 → opengradient-0.5.9}/src/opengradient/abi/InferencePrecompile.abi +0 -0
- {opengradient-0.5.7 → opengradient-0.5.9}/src/opengradient/abi/PriceHistoryInference.abi +0 -0
- {opengradient-0.5.7 → opengradient-0.5.9}/src/opengradient/abi/WorkflowScheduler.abi +0 -0
- {opengradient-0.5.7 → opengradient-0.5.9}/src/opengradient/abi/inference.abi +0 -0
- {opengradient-0.5.7 → opengradient-0.5.9}/src/opengradient/account.py +0 -0
- {opengradient-0.5.7 → opengradient-0.5.9}/src/opengradient/alphasense/__init__.py +0 -0
- {opengradient-0.5.7 → opengradient-0.5.9}/src/opengradient/alphasense/read_workflow_tool.py +0 -0
- {opengradient-0.5.7 → opengradient-0.5.9}/src/opengradient/alphasense/run_model_tool.py +0 -0
- {opengradient-0.5.7 → opengradient-0.5.9}/src/opengradient/alphasense/types.py +0 -0
- {opengradient-0.5.7 → opengradient-0.5.9}/src/opengradient/bin/PriceHistoryInference.bin +0 -0
- {opengradient-0.5.7 → opengradient-0.5.9}/src/opengradient/exceptions.py +0 -0
- {opengradient-0.5.7 → opengradient-0.5.9}/src/opengradient/llm/__init__.py +0 -0
- {opengradient-0.5.7 → opengradient-0.5.9}/src/opengradient/llm/og_openai.py +0 -0
- {opengradient-0.5.7 → opengradient-0.5.9}/src/opengradient/proto/__init__.py +0 -0
- {opengradient-0.5.7 → opengradient-0.5.9}/src/opengradient/proto/infer.proto +0 -0
- {opengradient-0.5.7 → opengradient-0.5.9}/src/opengradient/proto/infer_pb2.py +0 -0
- {opengradient-0.5.7 → opengradient-0.5.9}/src/opengradient/proto/infer_pb2_grpc.py +0 -0
- {opengradient-0.5.7 → opengradient-0.5.9}/src/opengradient/utils.py +0 -0
- {opengradient-0.5.7 → opengradient-0.5.9}/src/opengradient/workflow_models/__init__.py +0 -0
- {opengradient-0.5.7 → opengradient-0.5.9}/src/opengradient/workflow_models/constants.py +0 -0
- {opengradient-0.5.7 → opengradient-0.5.9}/src/opengradient/workflow_models/types.py +0 -0
- {opengradient-0.5.7 → opengradient-0.5.9}/src/opengradient/workflow_models/utils.py +0 -0
- {opengradient-0.5.7 → opengradient-0.5.9}/src/opengradient/workflow_models/workflow_models.py +0 -0
- {opengradient-0.5.7 → opengradient-0.5.9}/src/opengradient.egg-info/dependency_links.txt +0 -0
- {opengradient-0.5.7 → opengradient-0.5.9}/src/opengradient.egg-info/entry_points.txt +0 -0
- {opengradient-0.5.7 → opengradient-0.5.9}/src/opengradient.egg-info/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: opengradient
|
|
3
|
-
Version: 0.5.
|
|
3
|
+
Version: 0.5.9
|
|
4
4
|
Summary: Python SDK for OpenGradient decentralized model management & inference services
|
|
5
5
|
Author-email: OpenGradient <kyle@vannalabs.ai>
|
|
6
6
|
License-Expression: MIT
|
|
@@ -23,8 +23,7 @@ Requires-Dist: requests>=2.32.3
|
|
|
23
23
|
Requires-Dist: langchain>=0.3.7
|
|
24
24
|
Requires-Dist: openai>=1.58.1
|
|
25
25
|
Requires-Dist: pydantic>=2.9.2
|
|
26
|
-
Requires-Dist: og-test-x402==0.0.
|
|
27
|
-
Requires-Dist: x402==0.2.1
|
|
26
|
+
Requires-Dist: og-test-x402==0.0.9
|
|
28
27
|
Dynamic: license-file
|
|
29
28
|
|
|
30
29
|
# OpenGradient Python SDK
|
|
@@ -133,6 +132,10 @@ For comprehensive documentation, API reference, and examples, visit:
|
|
|
133
132
|
- [OpenGradient Documentation](https://docs.opengradient.ai/)
|
|
134
133
|
- [API Reference](https://docs.opengradient.ai/api_reference/python_sdk/)
|
|
135
134
|
|
|
135
|
+
### Claude Code Users
|
|
136
|
+
|
|
137
|
+
If you use [Claude Code](https://claude.ai/code), copy [docs/CLAUDE_SDK_USERS.md](docs/CLAUDE_SDK_USERS.md) to your project's `CLAUDE.md` to help Claude assist you with OpenGradient SDK development.
|
|
138
|
+
|
|
136
139
|
## Support
|
|
137
140
|
|
|
138
141
|
- Run `opengradient --help` for CLI command reference
|
|
@@ -104,6 +104,10 @@ For comprehensive documentation, API reference, and examples, visit:
|
|
|
104
104
|
- [OpenGradient Documentation](https://docs.opengradient.ai/)
|
|
105
105
|
- [API Reference](https://docs.opengradient.ai/api_reference/python_sdk/)
|
|
106
106
|
|
|
107
|
+
### Claude Code Users
|
|
108
|
+
|
|
109
|
+
If you use [Claude Code](https://claude.ai/code), copy [docs/CLAUDE_SDK_USERS.md](docs/CLAUDE_SDK_USERS.md) to your project's `CLAUDE.md` to help Claude assist you with OpenGradient SDK development.
|
|
110
|
+
|
|
107
111
|
## Support
|
|
108
112
|
|
|
109
113
|
- Run `opengradient --help` for CLI command reference
|
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "opengradient"
|
|
7
|
-
version = "0.5.
|
|
7
|
+
version = "0.5.9"
|
|
8
8
|
description = "Python SDK for OpenGradient decentralized model management & inference services"
|
|
9
9
|
authors = [{name = "OpenGradient", email = "kyle@vannalabs.ai"}]
|
|
10
10
|
readme = "README.md"
|
|
@@ -29,8 +29,7 @@ dependencies = [
|
|
|
29
29
|
"langchain>=0.3.7",
|
|
30
30
|
"openai>=1.58.1",
|
|
31
31
|
"pydantic>=2.9.2",
|
|
32
|
-
"og-test-x402==0.0.
|
|
33
|
-
"x402==0.2.1"
|
|
32
|
+
"og-test-x402==0.0.9",
|
|
34
33
|
]
|
|
35
34
|
|
|
36
35
|
[project.scripts]
|
|
@@ -17,9 +17,11 @@ from .types import (
|
|
|
17
17
|
InferenceResult,
|
|
18
18
|
LlmInferenceMode,
|
|
19
19
|
TextGenerationOutput,
|
|
20
|
+
TextGenerationStream,
|
|
20
21
|
ModelOutput,
|
|
21
22
|
ModelRepository,
|
|
22
23
|
FileUploadResult,
|
|
24
|
+
x402SettlementMode,
|
|
23
25
|
)
|
|
24
26
|
|
|
25
27
|
from . import llm, alphasense
|
|
@@ -47,10 +49,25 @@ def new_client(
|
|
|
47
49
|
contract_address: Optional inference contract address
|
|
48
50
|
"""
|
|
49
51
|
|
|
50
|
-
return Client(
|
|
52
|
+
return Client(
|
|
53
|
+
email=email,
|
|
54
|
+
password=password,
|
|
55
|
+
private_key=private_key,
|
|
56
|
+
rpc_url=rpc_url,
|
|
57
|
+
api_url=api_url,
|
|
58
|
+
contract_address=contract_address,
|
|
59
|
+
**kwargs,
|
|
60
|
+
)
|
|
51
61
|
|
|
52
62
|
|
|
53
|
-
def init(
|
|
63
|
+
def init(
|
|
64
|
+
email: str,
|
|
65
|
+
password: str,
|
|
66
|
+
private_key: str,
|
|
67
|
+
rpc_url=DEFAULT_RPC_URL,
|
|
68
|
+
api_url=DEFAULT_API_URL,
|
|
69
|
+
contract_address=DEFAULT_INFERENCE_CONTRACT_ADDRESS,
|
|
70
|
+
):
|
|
54
71
|
"""Initialize the OpenGradient SDK with authentication and network settings.
|
|
55
72
|
|
|
56
73
|
Args:
|
|
@@ -62,8 +79,10 @@ def init(email: str, password: str, private_key: str, rpc_url=DEFAULT_RPC_URL, a
|
|
|
62
79
|
contract_address: Optional inference contract address
|
|
63
80
|
"""
|
|
64
81
|
global _client
|
|
65
|
-
|
|
66
|
-
_client = Client(
|
|
82
|
+
|
|
83
|
+
_client = Client(
|
|
84
|
+
private_key=private_key, rpc_url=rpc_url, api_url=api_url, email=email, password=password, contract_address=contract_address
|
|
85
|
+
)
|
|
67
86
|
return _client
|
|
68
87
|
|
|
69
88
|
|
|
@@ -162,6 +181,7 @@ def llm_completion(
|
|
|
162
181
|
stop_sequence: Optional[List[str]] = None,
|
|
163
182
|
temperature: float = 0.0,
|
|
164
183
|
max_retries: Optional[int] = None,
|
|
184
|
+
x402_settlement_mode: Optional[x402SettlementMode] = x402SettlementMode.SETTLE_BATCH,
|
|
165
185
|
) -> TextGenerationOutput:
|
|
166
186
|
"""Generate text completion using an LLM.
|
|
167
187
|
|
|
@@ -173,6 +193,7 @@ def llm_completion(
|
|
|
173
193
|
stop_sequence: Optional list of sequences where generation should stop
|
|
174
194
|
temperature: Sampling temperature (0.0 = deterministic, 1.0 = creative)
|
|
175
195
|
max_retries: Maximum number of retries for failed transactions
|
|
196
|
+
x402_settlement_mode: Settlement modes for x402 payment protocol transactions (enum x402SettlementMode)
|
|
176
197
|
|
|
177
198
|
Returns:
|
|
178
199
|
TextGenerationOutput: Transaction hash and generated text
|
|
@@ -190,6 +211,7 @@ def llm_completion(
|
|
|
190
211
|
stop_sequence=stop_sequence,
|
|
191
212
|
temperature=temperature,
|
|
192
213
|
max_retries=max_retries,
|
|
214
|
+
x402_settlement_mode=x402_settlement_mode
|
|
193
215
|
)
|
|
194
216
|
|
|
195
217
|
|
|
@@ -203,7 +225,9 @@ def llm_chat(
|
|
|
203
225
|
tools: Optional[List[Dict]] = None,
|
|
204
226
|
tool_choice: Optional[str] = None,
|
|
205
227
|
max_retries: Optional[int] = None,
|
|
206
|
-
|
|
228
|
+
x402_settlement_mode: Optional[x402SettlementMode] = x402SettlementMode.SETTLE_BATCH,
|
|
229
|
+
stream: Optional[bool] = False,
|
|
230
|
+
) -> Union[TextGenerationOutput, TextGenerationStream]:
|
|
207
231
|
"""Have a chat conversation with an LLM.
|
|
208
232
|
|
|
209
233
|
Args:
|
|
@@ -216,9 +240,11 @@ def llm_chat(
|
|
|
216
240
|
tools: Optional list of tools the model can use
|
|
217
241
|
tool_choice: Optional specific tool to use
|
|
218
242
|
max_retries: Maximum number of retries for failed transactions
|
|
243
|
+
x402_settlement_mode: Settlement modes for x402 payment protocol transactions (enum x402SettlementMode)
|
|
244
|
+
stream: Optional boolean to enable streaming
|
|
219
245
|
|
|
220
246
|
Returns:
|
|
221
|
-
TextGenerationOutput
|
|
247
|
+
TextGenerationOutput or TextGenerationStream
|
|
222
248
|
|
|
223
249
|
Raises:
|
|
224
250
|
RuntimeError: If SDK is not initialized
|
|
@@ -235,6 +261,8 @@ def llm_chat(
|
|
|
235
261
|
tools=tools,
|
|
236
262
|
tool_choice=tool_choice,
|
|
237
263
|
max_retries=max_retries,
|
|
264
|
+
x402_settlement_mode=x402_settlement_mode,
|
|
265
|
+
stream=stream,
|
|
238
266
|
)
|
|
239
267
|
|
|
240
268
|
|
|
@@ -6,6 +6,7 @@ import logging
|
|
|
6
6
|
import webbrowser
|
|
7
7
|
from pathlib import Path
|
|
8
8
|
from typing import Dict, List, Optional
|
|
9
|
+
import sys
|
|
9
10
|
|
|
10
11
|
import click
|
|
11
12
|
|
|
@@ -80,6 +81,7 @@ x402SettlementModes = {
|
|
|
80
81
|
"settle-metadata": x402SettlementMode.SETTLE_METADATA,
|
|
81
82
|
}
|
|
82
83
|
|
|
84
|
+
|
|
83
85
|
def initialize_config(ctx):
|
|
84
86
|
"""Interactively initialize OpenGradient config"""
|
|
85
87
|
if ctx.obj: # Check if config data already exists
|
|
@@ -140,7 +142,7 @@ def cli(ctx):
|
|
|
140
142
|
openai_api_key = ctx.obj.get("openai_api_key")
|
|
141
143
|
anthropic_api_key = ctx.obj.get("anthropic_api_key")
|
|
142
144
|
google_api_key = ctx.obj.get("google_api_key")
|
|
143
|
-
|
|
145
|
+
|
|
144
146
|
ctx.obj["client"] = Client(
|
|
145
147
|
private_key=ctx.obj["private_key"],
|
|
146
148
|
rpc_url=DEFAULT_RPC_URL,
|
|
@@ -219,9 +221,9 @@ def clear(ctx):
|
|
|
219
221
|
def set_api_key(ctx, provider: str, key: str):
|
|
220
222
|
"""
|
|
221
223
|
Set API key for external LLM providers.
|
|
222
|
-
|
|
224
|
+
|
|
223
225
|
Example usage:
|
|
224
|
-
|
|
226
|
+
|
|
225
227
|
\b
|
|
226
228
|
opengradient config set-api-key --provider openai --key ..
|
|
227
229
|
opengradient config set-api-key --provider anthropic --key ...
|
|
@@ -230,7 +232,7 @@ def set_api_key(ctx, provider: str, key: str):
|
|
|
230
232
|
config_key = f"{provider}_api_key"
|
|
231
233
|
ctx.obj[config_key] = key
|
|
232
234
|
save_og_config(ctx)
|
|
233
|
-
|
|
235
|
+
|
|
234
236
|
click.secho(f"✅ API key for {provider} has been set", fg="green")
|
|
235
237
|
click.echo("You can now use models from this provider in completion and chat commands.")
|
|
236
238
|
|
|
@@ -241,9 +243,9 @@ def set_api_key(ctx, provider: str, key: str):
|
|
|
241
243
|
def remove_api_key(ctx, provider: str):
|
|
242
244
|
"""
|
|
243
245
|
Remove API key for an external LLM provider.
|
|
244
|
-
|
|
246
|
+
|
|
245
247
|
Example usage:
|
|
246
|
-
|
|
248
|
+
|
|
247
249
|
\b
|
|
248
250
|
opengradient config remove-api-key --provider openai
|
|
249
251
|
"""
|
|
@@ -417,52 +419,68 @@ def infer(ctx, model_cid: str, inference_mode: str, input_data, input_file: Path
|
|
|
417
419
|
help="Model identifier (local model from LLM enum or external model like 'gpt-4o', 'gemini-2.5-flash-lite', etc.)",
|
|
418
420
|
)
|
|
419
421
|
@click.option(
|
|
420
|
-
"--mode",
|
|
421
|
-
"inference_mode",
|
|
422
|
-
type=click.Choice(LlmInferenceModes.keys()),
|
|
423
|
-
default="VANILLA",
|
|
424
|
-
help="Inference mode (only applies to local models, default: VANILLA)"
|
|
422
|
+
"--mode",
|
|
423
|
+
"inference_mode",
|
|
424
|
+
type=click.Choice(LlmInferenceModes.keys()),
|
|
425
|
+
default="VANILLA",
|
|
426
|
+
help="Inference mode (only applies to local models, default: VANILLA)",
|
|
425
427
|
)
|
|
426
428
|
@click.option("--prompt", "-p", required=True, help="Input prompt for the LLM completion")
|
|
427
429
|
@click.option("--max-tokens", type=int, default=100, help="Maximum number of tokens for LLM completion output")
|
|
428
430
|
@click.option("--stop-sequence", multiple=True, help="Stop sequences for LLM")
|
|
429
431
|
@click.option("--temperature", type=float, default=0.0, help="Temperature for LLM inference (0.0 to 1.0)")
|
|
430
432
|
@click.option("--local", is_flag=True, help="Force use of local model even if not in LLM enum")
|
|
431
|
-
@click.option(
|
|
433
|
+
@click.option(
|
|
434
|
+
"--x402-settlement-mode",
|
|
435
|
+
"x402_settlement_mode",
|
|
436
|
+
type=click.Choice(x402SettlementModes.keys()),
|
|
437
|
+
default="settle-batch",
|
|
438
|
+
help="Settlement mode for x402 payments: settle (hashes only), settle-batch (batched, default), settle-metadata (full data)",
|
|
439
|
+
)
|
|
432
440
|
@click.pass_context
|
|
433
|
-
def completion(
|
|
441
|
+
def completion(
|
|
442
|
+
ctx,
|
|
443
|
+
model_cid: str,
|
|
444
|
+
inference_mode: str,
|
|
445
|
+
x402_settlement_mode: str,
|
|
446
|
+
prompt: str,
|
|
447
|
+
max_tokens: int,
|
|
448
|
+
stop_sequence: List[str],
|
|
449
|
+
temperature: float,
|
|
450
|
+
local: bool,
|
|
451
|
+
):
|
|
434
452
|
"""
|
|
435
453
|
Run completion inference on an LLM model (local or external).
|
|
436
454
|
|
|
437
|
-
This command supports both local OpenGradient models and external providers
|
|
438
|
-
(OpenAI, Anthropic, Google, etc.). For external models, make sure to set
|
|
455
|
+
This command supports both local OpenGradient models and external providers
|
|
456
|
+
(OpenAI, Anthropic, Google, etc.). For external models, make sure to set
|
|
439
457
|
the appropriate API key using 'opengradient config set-api-key'.
|
|
440
458
|
|
|
441
459
|
Example usage:
|
|
442
460
|
|
|
443
461
|
\b
|
|
444
|
-
#
|
|
445
|
-
opengradient completion --model
|
|
446
|
-
|
|
462
|
+
# TEE model
|
|
463
|
+
opengradient completion --model anthropic/claude-3.5-haiku --prompt "Hello, how are you?" --max-tokens 50
|
|
464
|
+
|
|
447
465
|
# External OpenAI model
|
|
448
466
|
opengradient completion --model gpt-4o --prompt "Translate to French: Hello world" --max-tokens 50
|
|
449
|
-
|
|
467
|
+
|
|
450
468
|
# External Anthropic model
|
|
451
|
-
opengradient completion --model claude-haiku-4-5-20251001--prompt "Write a haiku about coding" --max-tokens 100
|
|
452
|
-
|
|
469
|
+
opengradient completion --model claude-haiku-4-5-20251001 --prompt "Write a haiku about coding" --max-tokens 100
|
|
470
|
+
|
|
453
471
|
# External Google model
|
|
454
472
|
opengradient completion --model gemini-2.5-flash-lite --prompt "Explain quantum computing" --max-tokens 200
|
|
455
473
|
"""
|
|
456
474
|
client: Client = ctx.obj["client"]
|
|
457
|
-
|
|
475
|
+
|
|
458
476
|
try:
|
|
459
477
|
is_local = local or model_cid in [llm.value for llm in LLM]
|
|
460
|
-
|
|
478
|
+
|
|
461
479
|
if is_local:
|
|
462
480
|
click.echo(f'Running LLM completion inference for local model "{model_cid}"\n')
|
|
463
481
|
else:
|
|
464
482
|
click.echo(f'Running LLM completion inference for external model "{model_cid}"\n')
|
|
465
|
-
|
|
483
|
+
|
|
466
484
|
completion_output = client.llm_completion(
|
|
467
485
|
model_cid=model_cid,
|
|
468
486
|
inference_mode=LlmInferenceModes[inference_mode],
|
|
@@ -475,7 +493,7 @@ def completion(ctx, model_cid: str, inference_mode: str, x402_settlement_mode: s
|
|
|
475
493
|
)
|
|
476
494
|
|
|
477
495
|
print_llm_completion_result(model_cid, completion_output.transaction_hash, completion_output.completion_output, is_local)
|
|
478
|
-
|
|
496
|
+
|
|
479
497
|
except Exception as e:
|
|
480
498
|
click.echo(f"Error running LLM completion: {str(e)}")
|
|
481
499
|
|
|
@@ -485,7 +503,7 @@ def print_llm_completion_result(model_cid, tx_hash, llm_output, is_local=True):
|
|
|
485
503
|
click.echo("──────────────────────────────────────")
|
|
486
504
|
click.echo("Model: ", nl=False)
|
|
487
505
|
click.secho(model_cid, fg="cyan", bold=True)
|
|
488
|
-
|
|
506
|
+
|
|
489
507
|
if is_local and tx_hash != "external":
|
|
490
508
|
click.echo("Transaction hash: ", nl=False)
|
|
491
509
|
click.secho(tx_hash, fg="cyan", bold=True)
|
|
@@ -495,7 +513,7 @@ def print_llm_completion_result(model_cid, tx_hash, llm_output, is_local=True):
|
|
|
495
513
|
else:
|
|
496
514
|
click.echo("Source: ", nl=False)
|
|
497
515
|
click.secho("External Provider", fg="cyan", bold=True)
|
|
498
|
-
|
|
516
|
+
|
|
499
517
|
click.echo("──────────────────────────────────────")
|
|
500
518
|
click.secho("LLM Output:", fg="yellow", bold=True)
|
|
501
519
|
click.echo()
|
|
@@ -512,11 +530,11 @@ def print_llm_completion_result(model_cid, tx_hash, llm_output, is_local=True):
|
|
|
512
530
|
help="Model identifier (local model from LLM enum or external model like 'gpt-4o', 'gemini-2.5-flash-lite', etc.)",
|
|
513
531
|
)
|
|
514
532
|
@click.option(
|
|
515
|
-
"--mode",
|
|
516
|
-
"inference_mode",
|
|
517
|
-
type=click.Choice(LlmInferenceModes.keys()),
|
|
518
|
-
default="VANILLA",
|
|
519
|
-
help="Inference mode (only applies to local models, default: VANILLA)"
|
|
533
|
+
"--mode",
|
|
534
|
+
"inference_mode",
|
|
535
|
+
type=click.Choice(LlmInferenceModes.keys()),
|
|
536
|
+
default="VANILLA",
|
|
537
|
+
help="Inference mode (only applies to local models, default: VANILLA)",
|
|
520
538
|
)
|
|
521
539
|
@click.option("--messages", type=str, required=False, help="Input messages for the chat inference in JSON format")
|
|
522
540
|
@click.option(
|
|
@@ -530,14 +548,17 @@ def print_llm_completion_result(model_cid, tx_hash, llm_output, is_local=True):
|
|
|
530
548
|
@click.option("--temperature", type=float, default=0.0, help="Temperature for LLM inference (0.0 to 1.0)")
|
|
531
549
|
@click.option("--tools", type=str, default=None, help="Tool configurations in JSON format")
|
|
532
550
|
@click.option(
|
|
533
|
-
"--tools-file",
|
|
534
|
-
type=click.Path(exists=True, path_type=Path),
|
|
535
|
-
required=False,
|
|
536
|
-
help="Path to JSON file containing tool configurations"
|
|
551
|
+
"--tools-file", type=click.Path(exists=True, path_type=Path), required=False, help="Path to JSON file containing tool configurations"
|
|
537
552
|
)
|
|
538
553
|
@click.option("--tool-choice", type=str, default="", help="Specific tool choice for the LLM")
|
|
539
554
|
@click.option("--local", is_flag=True, help="Force use of local model even if not in LLM enum")
|
|
540
|
-
@click.option(
|
|
555
|
+
@click.option(
|
|
556
|
+
"--x402-settlement-mode",
|
|
557
|
+
type=click.Choice(x402SettlementModes.keys()),
|
|
558
|
+
default="settle-batch",
|
|
559
|
+
help="Settlement mode for x402 payments: settle (hashes only), settle-batch (batched, default), settle-metadata (full data)",
|
|
560
|
+
)
|
|
561
|
+
@click.option("--stream", is_flag=True, default=False, help="Stream the output from the LLM")
|
|
541
562
|
@click.pass_context
|
|
542
563
|
def chat(
|
|
543
564
|
ctx,
|
|
@@ -553,6 +574,7 @@ def chat(
|
|
|
553
574
|
tool_choice: Optional[str],
|
|
554
575
|
x402_settlement_mode: Optional[str],
|
|
555
576
|
local: bool,
|
|
577
|
+
stream: bool,
|
|
556
578
|
):
|
|
557
579
|
"""
|
|
558
580
|
Run chat inference on an LLM model (local or external).
|
|
@@ -563,25 +585,28 @@ def chat(
|
|
|
563
585
|
Example usage:
|
|
564
586
|
|
|
565
587
|
\b
|
|
566
|
-
#
|
|
567
|
-
opengradient chat --model
|
|
568
|
-
|
|
588
|
+
# TEE model
|
|
589
|
+
opengradient chat --model anthropic/claude-3.5-haiku --messages '[{"role":"user","content":"hello"}]' --max-tokens 50
|
|
590
|
+
|
|
569
591
|
# External OpenAI model with tools
|
|
570
592
|
opengradient chat --model gpt-4o --messages-file messages.json --tools-file tools.json --max-tokens 200
|
|
571
|
-
|
|
593
|
+
|
|
572
594
|
# External Anthropic model
|
|
573
595
|
opengradient chat --model claude-haiku-4-5-20251001 --messages '[{"role":"user","content":"Write a poem"}]' --max-tokens 100
|
|
596
|
+
|
|
597
|
+
# Stream output
|
|
598
|
+
opengradient chat --model anthropic/claude-3.5-haiku --messages '[{"role":"user","content":"How are clouds formed?"}]' --max-tokens 250 --stream
|
|
574
599
|
"""
|
|
575
600
|
client: Client = ctx.obj["client"]
|
|
576
|
-
|
|
601
|
+
|
|
577
602
|
try:
|
|
578
603
|
is_local = local or model_cid in [llm.value for llm in LLM]
|
|
579
|
-
|
|
604
|
+
|
|
580
605
|
if is_local:
|
|
581
606
|
click.echo(f'Running LLM chat inference for local model "{model_cid}"\n')
|
|
582
607
|
else:
|
|
583
608
|
click.echo(f'Running LLM chat inference for external model "{model_cid}"\n')
|
|
584
|
-
|
|
609
|
+
|
|
585
610
|
# Parse messages
|
|
586
611
|
if not messages and not messages_file:
|
|
587
612
|
click.echo("Must specify either messages or messages-file")
|
|
@@ -637,7 +662,7 @@ def chat(
|
|
|
637
662
|
if not tools and not tools_file:
|
|
638
663
|
parsed_tools = None
|
|
639
664
|
|
|
640
|
-
|
|
665
|
+
result = client.llm_chat(
|
|
641
666
|
model_cid=model_cid,
|
|
642
667
|
inference_mode=LlmInferenceModes[inference_mode],
|
|
643
668
|
messages=messages,
|
|
@@ -648,16 +673,17 @@ def chat(
|
|
|
648
673
|
tool_choice=tool_choice,
|
|
649
674
|
local_model=local,
|
|
650
675
|
x402_settlement_mode=x402_settlement_mode,
|
|
676
|
+
stream=stream,
|
|
651
677
|
)
|
|
652
678
|
|
|
653
|
-
|
|
654
|
-
|
|
655
|
-
|
|
656
|
-
|
|
657
|
-
|
|
658
|
-
|
|
659
|
-
|
|
660
|
-
|
|
679
|
+
# Handle response based on streaming flag
|
|
680
|
+
if stream:
|
|
681
|
+
print_streaming_chat_result(model_cid, result, is_local)
|
|
682
|
+
else:
|
|
683
|
+
print_llm_chat_result(
|
|
684
|
+
model_cid, result.transaction_hash, result.finish_reason, result.chat_output, is_local
|
|
685
|
+
)
|
|
686
|
+
|
|
661
687
|
except Exception as e:
|
|
662
688
|
click.echo(f"Error running LLM chat inference: {str(e)}")
|
|
663
689
|
|
|
@@ -667,7 +693,7 @@ def print_llm_chat_result(model_cid, tx_hash, finish_reason, chat_output, is_loc
|
|
|
667
693
|
click.echo("──────────────────────────────────────")
|
|
668
694
|
click.echo("Model: ", nl=False)
|
|
669
695
|
click.secho(model_cid, fg="cyan", bold=True)
|
|
670
|
-
|
|
696
|
+
|
|
671
697
|
if is_local and tx_hash != "external":
|
|
672
698
|
click.echo("Transaction hash: ", nl=False)
|
|
673
699
|
click.secho(tx_hash, fg="cyan", bold=True)
|
|
@@ -677,7 +703,7 @@ def print_llm_chat_result(model_cid, tx_hash, finish_reason, chat_output, is_loc
|
|
|
677
703
|
else:
|
|
678
704
|
click.echo("Source: ", nl=False)
|
|
679
705
|
click.secho("External Provider", fg="cyan", bold=True)
|
|
680
|
-
|
|
706
|
+
|
|
681
707
|
click.echo("──────────────────────────────────────")
|
|
682
708
|
click.secho("Finish Reason: ", fg="yellow", bold=True)
|
|
683
709
|
click.echo()
|
|
@@ -691,6 +717,80 @@ def print_llm_chat_result(model_cid, tx_hash, finish_reason, chat_output, is_loc
|
|
|
691
717
|
click.echo()
|
|
692
718
|
|
|
693
719
|
|
|
720
|
+
def print_streaming_chat_result(model_cid, stream, is_local=True):
|
|
721
|
+
"""Handle streaming chat response with typed chunks - prints in real-time"""
|
|
722
|
+
click.secho("🌊 Streaming LLM Chat", fg="green", bold=True)
|
|
723
|
+
click.echo("──────────────────────────────────────")
|
|
724
|
+
click.echo("Model: ", nl=False)
|
|
725
|
+
click.secho(model_cid, fg="cyan", bold=True)
|
|
726
|
+
|
|
727
|
+
if is_local:
|
|
728
|
+
click.echo("Source: ", nl=False)
|
|
729
|
+
click.secho("OpenGradient TEE", fg="cyan", bold=True)
|
|
730
|
+
else:
|
|
731
|
+
click.echo("Source: ", nl=False)
|
|
732
|
+
click.secho("External Provider", fg="cyan", bold=True)
|
|
733
|
+
|
|
734
|
+
click.echo("──────────────────────────────────────")
|
|
735
|
+
click.secho("Response:", fg="yellow", bold=True)
|
|
736
|
+
click.echo()
|
|
737
|
+
|
|
738
|
+
try:
|
|
739
|
+
content_parts = []
|
|
740
|
+
chunk_count = 0
|
|
741
|
+
|
|
742
|
+
for chunk in stream:
|
|
743
|
+
chunk_count += 1
|
|
744
|
+
|
|
745
|
+
if chunk.choices[0].delta.content:
|
|
746
|
+
content = chunk.choices[0].delta.content
|
|
747
|
+
sys.stdout.write(content)
|
|
748
|
+
sys.stdout.flush()
|
|
749
|
+
content_parts.append(content)
|
|
750
|
+
|
|
751
|
+
# Handle tool calls
|
|
752
|
+
if chunk.choices[0].delta.tool_calls:
|
|
753
|
+
sys.stdout.write("\n")
|
|
754
|
+
sys.stdout.flush()
|
|
755
|
+
click.secho("Tool Calls:", fg="yellow", bold=True)
|
|
756
|
+
for tool_call in chunk.choices[0].delta.tool_calls:
|
|
757
|
+
click.echo(f" Function: {tool_call['function']['name']}")
|
|
758
|
+
click.echo(f" Arguments: {tool_call['function']['arguments']}")
|
|
759
|
+
|
|
760
|
+
# Print final info when stream completes
|
|
761
|
+
if chunk.is_final:
|
|
762
|
+
sys.stdout.write("\n\n")
|
|
763
|
+
sys.stdout.flush()
|
|
764
|
+
click.echo("──────────────────────────────────────")
|
|
765
|
+
|
|
766
|
+
if chunk.usage:
|
|
767
|
+
click.secho("Token Usage:", fg="cyan")
|
|
768
|
+
click.echo(f" Prompt tokens: {chunk.usage.prompt_tokens}")
|
|
769
|
+
click.echo(f" Completion tokens: {chunk.usage.completion_tokens}")
|
|
770
|
+
click.echo(f" Total tokens: {chunk.usage.total_tokens}")
|
|
771
|
+
click.echo()
|
|
772
|
+
|
|
773
|
+
if chunk.choices[0].finish_reason:
|
|
774
|
+
click.echo("Finish reason: ", nl=False)
|
|
775
|
+
click.secho(chunk.choices[0].finish_reason, fg="green")
|
|
776
|
+
|
|
777
|
+
click.echo("──────────────────────────────────────")
|
|
778
|
+
click.echo(f"Chunks received: {chunk_count}")
|
|
779
|
+
click.echo(f"Content length: {len(''.join(content_parts))} characters")
|
|
780
|
+
click.echo()
|
|
781
|
+
|
|
782
|
+
except KeyboardInterrupt:
|
|
783
|
+
sys.stdout.write("\n")
|
|
784
|
+
sys.stdout.flush()
|
|
785
|
+
click.secho("Stream interrupted by user", fg="yellow")
|
|
786
|
+
click.echo()
|
|
787
|
+
except Exception as e:
|
|
788
|
+
sys.stdout.write("\n")
|
|
789
|
+
sys.stdout.flush()
|
|
790
|
+
click.secho(f"Streaming error: {str(e)}", fg="red", bold=True)
|
|
791
|
+
click.echo()
|
|
792
|
+
|
|
793
|
+
|
|
694
794
|
@cli.command()
|
|
695
795
|
def create_account():
|
|
696
796
|
"""Create a new test account for OpenGradient inference and model management"""
|