opengradient 0.5.8__py3-none-any.whl → 0.5.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
opengradient/__init__.py CHANGED
@@ -17,14 +17,19 @@ from .types import (
17
17
  InferenceResult,
18
18
  LlmInferenceMode,
19
19
  TextGenerationOutput,
20
+ TextGenerationStream,
20
21
  ModelOutput,
21
22
  ModelRepository,
22
23
  FileUploadResult,
23
24
  x402SettlementMode,
24
25
  )
26
+ from .alpha import _AlphaNamespace
25
27
 
26
28
  from . import llm, alphasense
27
29
 
30
+ # Module-level alpha namespace for workflow/ML execution features (Alpha Testnet only)
31
+ alpha = _AlphaNamespace()
32
+
28
33
  _client = None
29
34
 
30
35
 
@@ -225,7 +230,8 @@ def llm_chat(
225
230
  tool_choice: Optional[str] = None,
226
231
  max_retries: Optional[int] = None,
227
232
  x402_settlement_mode: Optional[x402SettlementMode] = x402SettlementMode.SETTLE_BATCH,
228
- ) -> TextGenerationOutput:
233
+ stream: Optional[bool] = False,
234
+ ) -> Union[TextGenerationOutput, TextGenerationStream]:
229
235
  """Have a chat conversation with an LLM.
230
236
 
231
237
  Args:
@@ -239,9 +245,10 @@ def llm_chat(
239
245
  tool_choice: Optional specific tool to use
240
246
  max_retries: Maximum number of retries for failed transactions
241
247
  x402_settlement_mode: Settlement modes for x402 payment protocol transactions (enum x402SettlementMode)
248
+ stream: Optional boolean to enable streaming
242
249
 
243
250
  Returns:
244
- TextGenerationOutput
251
+ TextGenerationOutput or TextGenerationStream
245
252
 
246
253
  Raises:
247
254
  RuntimeError: If SDK is not initialized
@@ -258,7 +265,8 @@ def llm_chat(
258
265
  tools=tools,
259
266
  tool_choice=tool_choice,
260
267
  max_retries=max_retries,
261
- x402_settlement_mode=x402_settlement_mode
268
+ x402_settlement_mode=x402_settlement_mode,
269
+ stream=stream,
262
270
  )
263
271
 
264
272
 
@@ -280,93 +288,6 @@ def list_files(model_name: str, version: str) -> List[Dict]:
280
288
  return _client.list_files(model_name, version)
281
289
 
282
290
 
283
- def new_workflow(
284
- model_cid: str,
285
- input_query: HistoricalInputQuery,
286
- input_tensor_name: str,
287
- scheduler_params: Optional[SchedulerParams] = None,
288
- ) -> str:
289
- """
290
- Deploy a new workflow contract with the specified parameters.
291
-
292
- This function deploys a new workflow contract and optionally registers it with
293
- the scheduler for automated execution. If scheduler_params is not provided,
294
- the workflow will be deployed without automated execution scheduling.
295
-
296
- Args:
297
- model_cid: IPFS CID of the model
298
- input_query: HistoricalInputQuery containing query parameters
299
- input_tensor_name: Name of the input tensor
300
- scheduler_params: Optional scheduler configuration as SchedulerParams instance
301
- If not provided, the workflow will be deployed without scheduling.
302
-
303
- Returns:
304
- str: Deployed contract address. If scheduler_params was provided, the workflow
305
- will be automatically executed according to the specified schedule.
306
- """
307
- if _client is None:
308
- raise RuntimeError("OpenGradient client not initialized. Call og.init(...) first.")
309
-
310
- return _client.new_workflow(
311
- model_cid=model_cid, input_query=input_query, input_tensor_name=input_tensor_name, scheduler_params=scheduler_params
312
- )
313
-
314
-
315
- def read_workflow_result(contract_address: str) -> ModelOutput:
316
- """
317
- Reads the latest inference result from a deployed workflow contract.
318
-
319
- This function retrieves the most recent output from a deployed model executor contract.
320
- It includes built-in retry logic to handle blockchain state delays.
321
-
322
- Args:
323
- contract_address (str): Address of the deployed workflow contract
324
-
325
- Returns:
326
- Dict[str, Union[str, Dict]]: A dictionary containing:
327
- - status: "success" or "error"
328
- - result: The model output data if successful
329
- - error: Error message if status is "error"
330
-
331
- Raises:
332
- RuntimeError: If OpenGradient client is not initialized
333
- """
334
- if _client is None:
335
- raise RuntimeError("OpenGradient client not initialized. Call og.init() first.")
336
- return _client.read_workflow_result(contract_address)
337
-
338
-
339
- def run_workflow(contract_address: str) -> ModelOutput:
340
- """
341
- Executes the workflow by calling run() on the contract to pull latest data and perform inference.
342
-
343
- Args:
344
- contract_address (str): Address of the deployed workflow contract
345
-
346
- Returns:
347
- Dict[str, Union[str, Dict]]: Status of the run operation
348
- """
349
- if _client is None:
350
- raise RuntimeError("OpenGradient client not initialized. Call og.init() first.")
351
- return _client.run_workflow(contract_address)
352
-
353
-
354
- def read_workflow_history(contract_address: str, num_results: int) -> List[ModelOutput]:
355
- """
356
- Gets historical inference results from a workflow contract.
357
-
358
- Args:
359
- contract_address (str): Address of the deployed workflow contract
360
- num_results (int): Number of historical results to retrieve
361
-
362
- Returns:
363
- List[Dict]: List of historical inference results
364
- """
365
- if _client is None:
366
- raise RuntimeError("OpenGradient client not initialized. Call og.init() first.")
367
- return _client.read_workflow_history(contract_address, num_results)
368
-
369
-
370
291
  __all__ = [
371
292
  "list_files",
372
293
  "login",
@@ -379,10 +300,7 @@ __all__ = [
379
300
  "init",
380
301
  "LLM",
381
302
  "TEE_LLM",
382
- "new_workflow",
383
- "read_workflow_result",
384
- "run_workflow",
385
- "read_workflow_history",
303
+ "alpha",
386
304
  "InferenceMode",
387
305
  "LlmInferenceMode",
388
306
  "HistoricalInputQuery",
opengradient/alpha.py ADDED
@@ -0,0 +1,375 @@
1
+ """
2
+ Alpha Testnet features for OpenGradient SDK.
3
+
4
+ This module contains features that are only available on the Alpha Testnet,
5
+ including workflow management and ML model execution.
6
+ """
7
+
8
+ import json
9
+ import logging
10
+ from pathlib import Path
11
+ from typing import TYPE_CHECKING, List, Optional
12
+
13
+ from web3 import Web3
14
+ from web3.exceptions import ContractLogicError
15
+
16
+ from .defaults import DEFAULT_SCHEDULER_ADDRESS
17
+ from .types import HistoricalInputQuery, ModelOutput, SchedulerParams
18
+ from .utils import convert_array_to_model_output
19
+
20
+ if TYPE_CHECKING:
21
+ from .client import Client
22
+
23
+ # How much time we wait for txn to be included in chain
24
+ INFERENCE_TX_TIMEOUT = 120
25
+ REGULAR_TX_TIMEOUT = 30
26
+
27
+
28
+ class Alpha:
29
+ """
30
+ Alpha Testnet features namespace.
31
+
32
+ This class provides access to features that are only available on the Alpha Testnet,
33
+ including workflow deployment and execution.
34
+
35
+ Usage:
36
+ client = og.new_client(...)
37
+ result = client.alpha.new_workflow(model_cid, input_query, input_tensor_name)
38
+ """
39
+
40
+ def __init__(self, client: "Client"):
41
+ self._client = client
42
+
43
+ def _get_abi(self, abi_name: str) -> dict:
44
+ """Returns the ABI for the requested contract."""
45
+ abi_path = Path(__file__).parent / "abi" / abi_name
46
+ with open(abi_path, "r") as f:
47
+ return json.load(f)
48
+
49
+ def _get_bin(self, bin_name: str) -> str:
50
+ """Returns the bin for the requested contract."""
51
+ bin_path = Path(__file__).parent / "bin" / bin_name
52
+ with open(bin_path, "r", encoding="utf-8") as f:
53
+ bytecode = f.read().strip()
54
+ if not bytecode.startswith("0x"):
55
+ bytecode = "0x" + bytecode
56
+ return bytecode
57
+
58
+ def new_workflow(
59
+ self,
60
+ model_cid: str,
61
+ input_query: HistoricalInputQuery,
62
+ input_tensor_name: str,
63
+ scheduler_params: Optional[SchedulerParams] = None,
64
+ ) -> str:
65
+ """
66
+ Deploy a new workflow contract with the specified parameters.
67
+
68
+ This function deploys a new workflow contract on OpenGradient that connects
69
+ an AI model with its required input data. When executed, the workflow will fetch
70
+ the specified model, evaluate the input query to get data, and perform inference.
71
+
72
+ The workflow can be set to execute manually or automatically via a scheduler.
73
+
74
+ Args:
75
+ model_cid (str): CID of the model to be executed from the Model Hub
76
+ input_query (HistoricalInputQuery): Input definition for the model inference,
77
+ will be evaluated at runtime for each inference
78
+ input_tensor_name (str): Name of the input tensor expected by the model
79
+ scheduler_params (Optional[SchedulerParams]): Scheduler configuration for automated execution:
80
+ - frequency: Execution frequency in seconds
81
+ - duration_hours: How long the schedule should live for
82
+
83
+ Returns:
84
+ str: Deployed contract address. If scheduler_params was provided, the workflow
85
+ will be automatically executed according to the specified schedule.
86
+
87
+ Raises:
88
+ Exception: If transaction fails or gas estimation fails
89
+ """
90
+ from .client import run_with_retry
91
+
92
+ # Get contract ABI and bytecode
93
+ abi = self._get_abi("PriceHistoryInference.abi")
94
+ bytecode = self._get_bin("PriceHistoryInference.bin")
95
+
96
+ def deploy_transaction():
97
+ contract = self._client._blockchain.eth.contract(abi=abi, bytecode=bytecode)
98
+ query_tuple = input_query.to_abi_format()
99
+ constructor_args = [model_cid, input_tensor_name, query_tuple]
100
+
101
+ try:
102
+ # Estimate gas needed
103
+ estimated_gas = contract.constructor(*constructor_args).estimate_gas(
104
+ {"from": self._client._wallet_account.address}
105
+ )
106
+ gas_limit = int(estimated_gas * 1.2)
107
+ except Exception as e:
108
+ print(f"⚠️ Gas estimation failed: {str(e)}")
109
+ gas_limit = 5000000 # Conservative fallback
110
+ print(f"📊 Using fallback gas limit: {gas_limit}")
111
+
112
+ transaction = contract.constructor(*constructor_args).build_transaction(
113
+ {
114
+ "from": self._client._wallet_account.address,
115
+ "nonce": self._client._blockchain.eth.get_transaction_count(
116
+ self._client._wallet_account.address, "pending"
117
+ ),
118
+ "gas": gas_limit,
119
+ "gasPrice": self._client._blockchain.eth.gas_price,
120
+ "chainId": self._client._blockchain.eth.chain_id,
121
+ }
122
+ )
123
+
124
+ signed_txn = self._client._wallet_account.sign_transaction(transaction)
125
+ tx_hash = self._client._blockchain.eth.send_raw_transaction(signed_txn.raw_transaction)
126
+
127
+ tx_receipt = self._client._blockchain.eth.wait_for_transaction_receipt(tx_hash, timeout=60)
128
+
129
+ if tx_receipt["status"] == 0:
130
+ raise Exception(f"❌ Contract deployment failed, transaction hash: {tx_hash.hex()}")
131
+
132
+ return tx_receipt.contractAddress
133
+
134
+ contract_address = run_with_retry(deploy_transaction)
135
+
136
+ if scheduler_params:
137
+ self._register_with_scheduler(contract_address, scheduler_params)
138
+
139
+ return contract_address
140
+
141
+ def _register_with_scheduler(self, contract_address: str, scheduler_params: SchedulerParams) -> None:
142
+ """
143
+ Register the deployed workflow contract with the scheduler for automated execution.
144
+
145
+ Args:
146
+ contract_address (str): Address of the deployed workflow contract
147
+ scheduler_params (SchedulerParams): Scheduler configuration containing:
148
+ - frequency: Execution frequency in seconds
149
+ - duration_hours: How long to run in hours
150
+ - end_time: Unix timestamp when scheduling should end
151
+
152
+ Raises:
153
+ Exception: If registration with scheduler fails. The workflow contract will
154
+ still be deployed and can be executed manually.
155
+ """
156
+ scheduler_abi = self._get_abi("WorkflowScheduler.abi")
157
+
158
+ # Scheduler contract address
159
+ scheduler_address = DEFAULT_SCHEDULER_ADDRESS
160
+ scheduler_contract = self._client._blockchain.eth.contract(address=scheduler_address, abi=scheduler_abi)
161
+
162
+ try:
163
+ # Register the workflow with the scheduler
164
+ scheduler_tx = scheduler_contract.functions.registerTask(
165
+ contract_address, scheduler_params.end_time, scheduler_params.frequency
166
+ ).build_transaction(
167
+ {
168
+ "from": self._client._wallet_account.address,
169
+ "gas": 300000,
170
+ "gasPrice": self._client._blockchain.eth.gas_price,
171
+ "nonce": self._client._blockchain.eth.get_transaction_count(
172
+ self._client._wallet_account.address, "pending"
173
+ ),
174
+ "chainId": self._client._blockchain.eth.chain_id,
175
+ }
176
+ )
177
+
178
+ signed_scheduler_tx = self._client._wallet_account.sign_transaction(scheduler_tx)
179
+ scheduler_tx_hash = self._client._blockchain.eth.send_raw_transaction(signed_scheduler_tx.raw_transaction)
180
+ self._client._blockchain.eth.wait_for_transaction_receipt(scheduler_tx_hash, timeout=REGULAR_TX_TIMEOUT)
181
+ except Exception as e:
182
+ print(f"❌ Error registering contract with scheduler: {str(e)}")
183
+ print(" The workflow contract is still deployed and can be executed manually.")
184
+
185
+ def read_workflow_result(self, contract_address: str) -> ModelOutput:
186
+ """
187
+ Reads the latest inference result from a deployed workflow contract.
188
+
189
+ Args:
190
+ contract_address (str): Address of the deployed workflow contract
191
+
192
+ Returns:
193
+ ModelOutput: The inference result from the contract
194
+
195
+ Raises:
196
+ ContractLogicError: If the transaction fails
197
+ Web3Error: If there are issues with the web3 connection or contract interaction
198
+ """
199
+ # Get the contract interface
200
+ contract = self._client._blockchain.eth.contract(
201
+ address=Web3.to_checksum_address(contract_address), abi=self._get_abi("PriceHistoryInference.abi")
202
+ )
203
+
204
+ # Get the result
205
+ result = contract.functions.getInferenceResult().call()
206
+
207
+ return convert_array_to_model_output(result)
208
+
209
+ def run_workflow(self, contract_address: str) -> ModelOutput:
210
+ """
211
+ Triggers the run() function on a deployed workflow contract and returns the result.
212
+
213
+ Args:
214
+ contract_address (str): Address of the deployed workflow contract
215
+
216
+ Returns:
217
+ ModelOutput: The inference result from the contract
218
+
219
+ Raises:
220
+ ContractLogicError: If the transaction fails
221
+ Web3Error: If there are issues with the web3 connection or contract interaction
222
+ """
223
+ # Get the contract interface
224
+ contract = self._client._blockchain.eth.contract(
225
+ address=Web3.to_checksum_address(contract_address), abi=self._get_abi("PriceHistoryInference.abi")
226
+ )
227
+
228
+ # Call run() function
229
+ nonce = self._client._blockchain.eth.get_transaction_count(self._client._wallet_account.address, "pending")
230
+
231
+ run_function = contract.functions.run()
232
+ transaction = run_function.build_transaction(
233
+ {
234
+ "from": self._client._wallet_account.address,
235
+ "nonce": nonce,
236
+ "gas": 30000000,
237
+ "gasPrice": self._client._blockchain.eth.gas_price,
238
+ "chainId": self._client._blockchain.eth.chain_id,
239
+ }
240
+ )
241
+
242
+ signed_txn = self._client._wallet_account.sign_transaction(transaction)
243
+ tx_hash = self._client._blockchain.eth.send_raw_transaction(signed_txn.raw_transaction)
244
+ tx_receipt = self._client._blockchain.eth.wait_for_transaction_receipt(tx_hash, timeout=INFERENCE_TX_TIMEOUT)
245
+
246
+ if tx_receipt.status == 0:
247
+ raise ContractLogicError(f"Run transaction failed. Receipt: {tx_receipt}")
248
+
249
+ # Get the inference result from the contract
250
+ result = contract.functions.getInferenceResult().call()
251
+
252
+ return convert_array_to_model_output(result)
253
+
254
+ def read_workflow_history(self, contract_address: str, num_results: int) -> List[ModelOutput]:
255
+ """
256
+ Gets historical inference results from a workflow contract.
257
+
258
+ Retrieves the specified number of most recent inference results from the contract's
259
+ storage, with the most recent result first.
260
+
261
+ Args:
262
+ contract_address (str): Address of the deployed workflow contract
263
+ num_results (int): Number of historical results to retrieve
264
+
265
+ Returns:
266
+ List[ModelOutput]: List of historical inference results
267
+ """
268
+ contract = self._client._blockchain.eth.contract(
269
+ address=Web3.to_checksum_address(contract_address), abi=self._get_abi("PriceHistoryInference.abi")
270
+ )
271
+
272
+ results = contract.functions.getLastInferenceResults(num_results).call()
273
+ return [convert_array_to_model_output(result) for result in results]
274
+
275
+
276
+ class _AlphaNamespace:
277
+ """
278
+ Module-level alpha namespace for use with og.init().
279
+
280
+ Usage:
281
+ og.init(...)
282
+ result = og.alpha.new_workflow(model_cid, input_query, input_tensor_name)
283
+ """
284
+
285
+ def new_workflow(
286
+ self,
287
+ model_cid: str,
288
+ input_query: HistoricalInputQuery,
289
+ input_tensor_name: str,
290
+ scheduler_params: Optional[SchedulerParams] = None,
291
+ ) -> str:
292
+ """
293
+ Deploy a new workflow contract with the specified parameters.
294
+
295
+ This function deploys a new workflow contract and optionally registers it with
296
+ the scheduler for automated execution. If scheduler_params is not provided,
297
+ the workflow will be deployed without automated execution scheduling.
298
+
299
+ Args:
300
+ model_cid: IPFS CID of the model
301
+ input_query: HistoricalInputQuery containing query parameters
302
+ input_tensor_name: Name of the input tensor
303
+ scheduler_params: Optional scheduler configuration as SchedulerParams instance
304
+ If not provided, the workflow will be deployed without scheduling.
305
+
306
+ Returns:
307
+ str: Deployed contract address. If scheduler_params was provided, the workflow
308
+ will be automatically executed according to the specified schedule.
309
+ """
310
+ from . import _client
311
+
312
+ if _client is None:
313
+ raise RuntimeError("OpenGradient client not initialized. Call og.init(...) first.")
314
+
315
+ return _client.alpha.new_workflow(
316
+ model_cid=model_cid,
317
+ input_query=input_query,
318
+ input_tensor_name=input_tensor_name,
319
+ scheduler_params=scheduler_params,
320
+ )
321
+
322
+ def read_workflow_result(self, contract_address: str) -> ModelOutput:
323
+ """
324
+ Reads the latest inference result from a deployed workflow contract.
325
+
326
+ This function retrieves the most recent output from a deployed model executor contract.
327
+ It includes built-in retry logic to handle blockchain state delays.
328
+
329
+ Args:
330
+ contract_address (str): Address of the deployed workflow contract
331
+
332
+ Returns:
333
+ ModelOutput: The inference result from the contract
334
+
335
+ Raises:
336
+ RuntimeError: If OpenGradient client is not initialized
337
+ """
338
+ from . import _client
339
+
340
+ if _client is None:
341
+ raise RuntimeError("OpenGradient client not initialized. Call og.init() first.")
342
+ return _client.alpha.read_workflow_result(contract_address)
343
+
344
+ def run_workflow(self, contract_address: str) -> ModelOutput:
345
+ """
346
+ Executes the workflow by calling run() on the contract to pull latest data and perform inference.
347
+
348
+ Args:
349
+ contract_address (str): Address of the deployed workflow contract
350
+
351
+ Returns:
352
+ ModelOutput: The inference result from the contract
353
+ """
354
+ from . import _client
355
+
356
+ if _client is None:
357
+ raise RuntimeError("OpenGradient client not initialized. Call og.init() first.")
358
+ return _client.alpha.run_workflow(contract_address)
359
+
360
+ def read_workflow_history(self, contract_address: str, num_results: int) -> List[ModelOutput]:
361
+ """
362
+ Gets historical inference results from a workflow contract.
363
+
364
+ Args:
365
+ contract_address (str): Address of the deployed workflow contract
366
+ num_results (int): Number of historical results to retrieve
367
+
368
+ Returns:
369
+ List[ModelOutput]: List of historical inference results
370
+ """
371
+ from . import _client
372
+
373
+ if _client is None:
374
+ raise RuntimeError("OpenGradient client not initialized. Call og.init() first.")
375
+ return _client.alpha.read_workflow_history(contract_address, num_results)
@@ -64,7 +64,7 @@ def create_read_workflow_tool(
64
64
 
65
65
  # define runnable
66
66
  def read_workflow():
67
- output = og.read_workflow_result(contract_address=workflow_contract_address)
67
+ output = og.alpha.read_workflow_result(contract_address=workflow_contract_address)
68
68
  return output_formatter(output)
69
69
 
70
70
  if tool_type == ToolType.LANGCHAIN:
opengradient/cli.py CHANGED
@@ -6,6 +6,7 @@ import logging
6
6
  import webbrowser
7
7
  from pathlib import Path
8
8
  from typing import Dict, List, Optional
9
+ import sys
9
10
 
10
11
  import click
11
12
 
@@ -557,6 +558,7 @@ def print_llm_completion_result(model_cid, tx_hash, llm_output, is_local=True):
557
558
  default="settle-batch",
558
559
  help="Settlement mode for x402 payments: settle (hashes only), settle-batch (batched, default), settle-metadata (full data)",
559
560
  )
561
+ @click.option("--stream", is_flag=True, default=False, help="Stream the output from the LLM")
560
562
  @click.pass_context
561
563
  def chat(
562
564
  ctx,
@@ -572,6 +574,7 @@ def chat(
572
574
  tool_choice: Optional[str],
573
575
  x402_settlement_mode: Optional[str],
574
576
  local: bool,
577
+ stream: bool,
575
578
  ):
576
579
  """
577
580
  Run chat inference on an LLM model (local or external).
@@ -590,6 +593,9 @@ def chat(
590
593
 
591
594
  # External Anthropic model
592
595
  opengradient chat --model claude-haiku-4-5-20251001 --messages '[{"role":"user","content":"Write a poem"}]' --max-tokens 100
596
+
597
+ # Stream output
598
+ opengradient chat --model anthropic/claude-3.5-haiku --messages '[{"role":"user","content":"How are clouds formed?"}]' --max-tokens 250 --stream
593
599
  """
594
600
  client: Client = ctx.obj["client"]
595
601
 
@@ -656,7 +662,7 @@ def chat(
656
662
  if not tools and not tools_file:
657
663
  parsed_tools = None
658
664
 
659
- completion_output = client.llm_chat(
665
+ result = client.llm_chat(
660
666
  model_cid=model_cid,
661
667
  inference_mode=LlmInferenceModes[inference_mode],
662
668
  messages=messages,
@@ -667,11 +673,16 @@ def chat(
667
673
  tool_choice=tool_choice,
668
674
  local_model=local,
669
675
  x402_settlement_mode=x402_settlement_mode,
676
+ stream=stream,
670
677
  )
671
678
 
672
- print_llm_chat_result(
673
- model_cid, completion_output.transaction_hash, completion_output.finish_reason, completion_output.chat_output, is_local
674
- )
679
+ # Handle response based on streaming flag
680
+ if stream:
681
+ print_streaming_chat_result(model_cid, result, is_local)
682
+ else:
683
+ print_llm_chat_result(
684
+ model_cid, result.transaction_hash, result.finish_reason, result.chat_output, is_local
685
+ )
675
686
 
676
687
  except Exception as e:
677
688
  click.echo(f"Error running LLM chat inference: {str(e)}")
@@ -706,6 +717,80 @@ def print_llm_chat_result(model_cid, tx_hash, finish_reason, chat_output, is_loc
706
717
  click.echo()
707
718
 
708
719
 
720
+ def print_streaming_chat_result(model_cid, stream, is_local=True):
721
+ """Handle streaming chat response with typed chunks - prints in real-time"""
722
+ click.secho("🌊 Streaming LLM Chat", fg="green", bold=True)
723
+ click.echo("──────────────────────────────────────")
724
+ click.echo("Model: ", nl=False)
725
+ click.secho(model_cid, fg="cyan", bold=True)
726
+
727
+ if is_local:
728
+ click.echo("Source: ", nl=False)
729
+ click.secho("OpenGradient TEE", fg="cyan", bold=True)
730
+ else:
731
+ click.echo("Source: ", nl=False)
732
+ click.secho("External Provider", fg="cyan", bold=True)
733
+
734
+ click.echo("──────────────────────────────────────")
735
+ click.secho("Response:", fg="yellow", bold=True)
736
+ click.echo()
737
+
738
+ try:
739
+ content_parts = []
740
+ chunk_count = 0
741
+
742
+ for chunk in stream:
743
+ chunk_count += 1
744
+
745
+ if chunk.choices[0].delta.content:
746
+ content = chunk.choices[0].delta.content
747
+ sys.stdout.write(content)
748
+ sys.stdout.flush()
749
+ content_parts.append(content)
750
+
751
+ # Handle tool calls
752
+ if chunk.choices[0].delta.tool_calls:
753
+ sys.stdout.write("\n")
754
+ sys.stdout.flush()
755
+ click.secho("Tool Calls:", fg="yellow", bold=True)
756
+ for tool_call in chunk.choices[0].delta.tool_calls:
757
+ click.echo(f" Function: {tool_call['function']['name']}")
758
+ click.echo(f" Arguments: {tool_call['function']['arguments']}")
759
+
760
+ # Print final info when stream completes
761
+ if chunk.is_final:
762
+ sys.stdout.write("\n\n")
763
+ sys.stdout.flush()
764
+ click.echo("──────────────────────────────────────")
765
+
766
+ if chunk.usage:
767
+ click.secho("Token Usage:", fg="cyan")
768
+ click.echo(f" Prompt tokens: {chunk.usage.prompt_tokens}")
769
+ click.echo(f" Completion tokens: {chunk.usage.completion_tokens}")
770
+ click.echo(f" Total tokens: {chunk.usage.total_tokens}")
771
+ click.echo()
772
+
773
+ if chunk.choices[0].finish_reason:
774
+ click.echo("Finish reason: ", nl=False)
775
+ click.secho(chunk.choices[0].finish_reason, fg="green")
776
+
777
+ click.echo("──────────────────────────────────────")
778
+ click.echo(f"Chunks received: {chunk_count}")
779
+ click.echo(f"Content length: {len(''.join(content_parts))} characters")
780
+ click.echo()
781
+
782
+ except KeyboardInterrupt:
783
+ sys.stdout.write("\n")
784
+ sys.stdout.flush()
785
+ click.secho("Stream interrupted by user", fg="yellow")
786
+ click.echo()
787
+ except Exception as e:
788
+ sys.stdout.write("\n")
789
+ sys.stdout.flush()
790
+ click.secho(f"Streaming error: {str(e)}", fg="red", bold=True)
791
+ click.echo()
792
+
793
+
709
794
  @cli.command()
710
795
  def create_account():
711
796
  """Create a new test account for OpenGradient inference and model management"""