PyPI - opengradient - Versions diffs - 0.3.15__tar.gz → 0.3.17__tar.gz - Mend

opengradient 0.3.15tar.gz → 0.3.17tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (20) hide show

{opengradient-0.3.15 → opengradient-0.3.17}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.3
 Name: opengradient
-Version: 0.3.15
+Version: 0.3.17
 Summary: Python SDK for OpenGradient decentralized model management & inference services
 Project-URL: Homepage, https://opengradient.ai
 Author-email: OpenGradient <oliver@opengradient.ai>

{opengradient-0.3.15 → opengradient-0.3.17}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
 [project]
 name = "opengradient"
-version = "0.3.15"
+version = "0.3.17"
 description = "Python SDK for OpenGradient decentralized model management & inference services"
 authors = [{name = "OpenGradient", email = "oliver@opengradient.ai"}]
 license = {file = "LICENSE"}

{opengradient-0.3.15 → opengradient-0.3.17}/src/opengradient/__init__.py RENAMED Viewed

@@ -2,10 +2,10 @@ from typing import Dict, List, Optional, Tuple
 from .client import Client
 from .defaults import DEFAULT_INFERENCE_CONTRACT_ADDRESS, DEFAULT_RPC_URL
-from .types import InferenceMode, LLM
+from .types import InferenceMode, LlmInferenceMode, LLM, TEE_LLM
 from . import llm
-__version__ = "0.3.15"
+__version__ = "0.3.17"
 _client = None
@@ -40,30 +40,60 @@ def create_version(model_name, notes=None, is_major=False):
         raise RuntimeError("OpenGradient client not initialized. Call og.init() first.")
     return _client.create_version(model_name, notes, is_major)
-def infer(model_cid, inference_mode, model_input):
+def infer(model_cid, inference_mode, model_input, max_retries: Optional[int] = None):
+    """
+    Perform inference on a model.
+    Args:
+        model_cid: Model CID to use for inference
+        inference_mode: Mode of inference (e.g. VANILLA)
+        model_input: Input data for the model
+        max_retries: Optional maximum number of retry attempts for transaction errors
+    Returns:
+        Tuple of (transaction hash, model output)
+    """
     if _client is None:
         raise RuntimeError("OpenGradient client not initialized. Call og.init() first.")
-    return _client.infer(model_cid, inference_mode, model_input)
+    return _client.infer(model_cid, inference_mode, model_input, max_retries=max_retries)
 def llm_completion(model_cid: LLM,
-                         prompt: str,
-                         max_tokens: int = 100,
-                         stop_sequence: Optional[List[str]] = None,
-                         temperature: float = 0.0) -> Tuple[str, str]:
+                  prompt: str,
+                  inference_mode: str = LlmInferenceMode.VANILLA,
+                  max_tokens: int = 100,
+                  stop_sequence: Optional[List[str]] = None,
+                  temperature: float = 0.0,
+                  max_retries: Optional[int] = None) -> Tuple[str, str]:
     if _client is None:
         raise RuntimeError("OpenGradient client not initialized. Call og.init() first.")
-    return _client.llm_completion(model_cid, prompt, max_tokens, stop_sequence, temperature)
+    return _client.llm_completion(model_cid=model_cid,
+                                inference_mode=inference_mode,
+                                prompt=prompt,
+                                max_tokens=max_tokens,
+                                stop_sequence=stop_sequence,
+                                temperature=temperature,
+                                max_retries=max_retries)
 def llm_chat(model_cid: LLM,
-                   messages: List[Dict],
-                   max_tokens: int = 100,
-                   stop_sequence: Optional[List[str]] = None,
-                   temperature: float = 0.0,
-                   tools: Optional[List[Dict]] = None,
-                   tool_choice: Optional[str] = None):
+             messages: List[Dict],
+             inference_mode: str = LlmInferenceMode.VANILLA,
+             max_tokens: int = 100,
+             stop_sequence: Optional[List[str]] = None,
+             temperature: float = 0.0,
+             tools: Optional[List[Dict]] = None,
+             tool_choice: Optional[str] = None,
+             max_retries: Optional[int] = None) -> Tuple[str, str, Dict]:
     if _client is None:
         raise RuntimeError("OpenGradient client not initialized. Call og.init() first.")
-    return _client.llm_chat(model_cid, messages, max_tokens, stop_sequence, temperature, tools, tool_choice)
+    return _client.llm_chat(model_cid=model_cid,
+                          inference_mode=inference_mode,
+                          messages=messages,
+                          max_tokens=max_tokens,
+                          stop_sequence=stop_sequence,
+                          temperature=temperature,
+                          tools=tools,
+                          tool_choice=tool_choice,
+                          max_retries=max_retries)
 def login(email: str, password: str):
     if _client is None:

{opengradient-0.3.15 → opengradient-0.3.17}/src/opengradient/cli.py RENAMED Viewed

@@ -20,7 +20,7 @@ from .defaults import (
     DEFAULT_OG_FAUCET_URL,
     DEFAULT_RPC_URL,
 )
-from .types import InferenceMode
+from .types import InferenceMode, LlmInferenceMode
 OG_CONFIG_FILE = Path.home() / '.opengradient_config.json'
@@ -65,11 +65,17 @@ InferenceModes = {
     "TEE": InferenceMode.TEE,
 }
+LlmInferenceModes = {
+    "VANILLA": LlmInferenceMode.VANILLA,
+    "TEE": LlmInferenceMode.TEE,
+}
 # Supported LLMs
 LlmModels = {
     "meta-llama/Meta-Llama-3-8B-Instruct",
     "meta-llama/Llama-3.2-3B-Instruct",
-    "mistralai/Mistral-7B-Instruct-v0.3"
+    "mistralai/Mistral-7B-Instruct-v0.3",
+    "meta-llama/Llama-3.1-70B-Instruct",
 }
 def initialize_config(ctx):
@@ -339,13 +345,15 @@ def infer(ctx, model_cid: str, inference_mode: str, input_data, input_file: Path
         click.echo(f"Error running inference: {str(e)}")
 @cli.command()
-@click.option('--model', '-m', 'model_cid', type=click.Choice(LlmModels), required=True, help='CID of the LLM model to run inference on')
+@click.option('--model', '-m', 'model_cid', type=click.Choice([e.value for e in types.LLM]), required=True, help='CID of the LLM model to run inference on')
+@click.option('--mode', 'inference_mode', type=click.Choice(LlmInferenceModes.keys()), default="VANILLA",
+              help='Inference mode (default: VANILLA)')
 @click.option('--prompt', '-p', required=True, help='Input prompt for the LLM completion')
 @click.option('--max-tokens', type=int, default=100, help='Maximum number of tokens for LLM completion output')
 @click.option('--stop-sequence', multiple=True, help='Stop sequences for LLM')
 @click.option('--temperature', type=float, default=0.0, help='Temperature for LLM inference (0.0 to 1.0)')
 @click.pass_context
-def completion(ctx, model_cid: str, prompt: str, max_tokens: int, stop_sequence: List[str], temperature: float):
+def completion(ctx, model_cid: str, inference_mode: str,  prompt: str, max_tokens: int, stop_sequence: List[str], temperature: float):
     """
     Run completion inference on an LLM model.
@@ -355,13 +363,14 @@ def completion(ctx, model_cid: str, prompt: str, max_tokens: int, stop_sequence:
     \b
     opengradient completion --model meta-llama/Meta-Llama-3-8B-Instruct --prompt "Hello, how are you?" --max-tokens 50 --temperature 0.7
-    opengradient completion -m meta-llama/Meta-Llama-3-8B-Instruct -p "Translate to French: Hello world" --stop-sequence "." --stop-sequence "\n"
+    opengradient completion -m meta-llama/Meta-Llama-3-8B-Instruct -p "Translate to French: Hello world" --stop-sequence "." --stop-sequence "\\n"
     """
     client: Client = ctx.obj['client']
     try:
         click.echo(f"Running LLM completion inference for model \"{model_cid}\"\n")
         tx_hash, llm_output = client.llm_completion(
             model_cid=model_cid,
+            inference_mode=LlmInferenceModes[inference_mode],
             prompt=prompt,
             max_tokens=max_tokens,
             stop_sequence=list(stop_sequence),
@@ -394,6 +403,9 @@ def print_llm_completion_result(model_cid, tx_hash, llm_output):
               type=click.Choice([e.value for e in types.LLM]),
               required=True,
               help='CID of the LLM model to run inference on')
+@click.option('--mode', 'inference_mode', type=click.Choice(LlmInferenceModes.keys()),
+              default="VANILLA",
+              help='Inference mode (default: VANILLA)')
 @click.option('--messages',
               type=str,
               required=False,
@@ -431,6 +443,7 @@ def print_llm_completion_result(model_cid, tx_hash, llm_output):
 def chat(
     ctx,
     model_cid: str,
+    inference_mode: str,
     messages: Optional[str],
     messages_file: Optional[Path],
     max_tokens: int,
@@ -444,11 +457,13 @@ def chat(
     This command runs a chat inference on the specified LLM model using the provided messages and parameters.
+    Tool call formatting is based on OpenAI documentation tool calls (see here: https://platform.openai.com/docs/guides/function-calling).
     Example usage:
     \b
     opengradient chat --model meta-llama/Meta-Llama-3-8B-Instruct --messages '[{"role":"user","content":"hello"}]' --max-tokens 50 --temperature 0.7
-    opengradient chat -m mistralai/Mistral-7B-Instruct-v0.3 --messages-file messages.json --stop-sequence "." --stop-sequence "\n"
+    opengradient chat --model mistralai/Mistral-7B-Instruct-v0.3 --messages-file messages.json --tools-file tools.json --max-tokens 200 --stop-sequence "." --stop-sequence "\\n"
     """
     client: Client = ctx.obj['client']
     try:
@@ -458,7 +473,7 @@ def chat(
             ctx.exit(1)
             return
         if messages and messages_file:
-            click.echo("Cannot have both messages and messages_file")
+            click.echo("Cannot have both messages and messages-file")
             ctx.exit(1)
             return
@@ -473,9 +488,9 @@ def chat(
                 messages = json.load(file)
         # Parse tools if provided
-        if tools is not None and tools != "[]" and tools_file:
-            click.echo("Cannot have both tools and tools_file")
-            ctx.exit(1)
+        if (tools and tools != '[]') and tools_file:
+            click.echo("Cannot have both tools and tools-file")
+            click.exit(1)
             return
         parsed_tools=[]
@@ -509,6 +524,7 @@ def chat(
         tx_hash, finish_reason, llm_chat_output = client.llm_chat(
             model_cid=model_cid,
+            inference_mode=LlmInferenceModes[inference_mode],
             messages=messages,
             max_tokens=max_tokens,
             stop_sequence=list(stop_sequence),
@@ -517,15 +533,32 @@ def chat(
             tool_choice=tool_choice,
         )
-        # TODO (Kyle): Make this prettier
-        print("TX Hash: ", tx_hash)
-        print("Finish reason: ", finish_reason)
-        print("Chat output: ", llm_chat_output)
+        print_llm_chat_result(model_cid, tx_hash, finish_reason, llm_chat_output)
     except Exception as e:
         click.echo(f"Error running LLM chat inference: {str(e)}")
-def print_llm_chat_result():
-    pass
+def print_llm_chat_result(model_cid, tx_hash, finish_reason, chat_output):
+    click.secho("✅ LLM Chat Successful", fg="green", bold=True)
+    click.echo("──────────────────────────────────────")
+    click.echo("Model CID: ", nl=False)
+    click.secho(model_cid, fg="cyan", bold=True)
+    click.echo("Transaction hash: ", nl=False)
+    click.secho(tx_hash, fg="cyan", bold=True)
+    block_explorer_link = f"{DEFAULT_BLOCKCHAIN_EXPLORER}0x{tx_hash}"
+    click.echo("Block explorer link: ", nl=False)
+    click.secho(block_explorer_link, fg="blue", underline=True)
+    click.echo("──────────────────────────────────────")
+    click.secho("Finish Reason: ", fg="yellow", bold=True)
+    click.echo()
+    click.echo(finish_reason)
+    click.echo()
+    click.secho("Chat Output:", fg="yellow", bold=True)
+    click.echo()
+    for key, value in chat_output.items():
+        # If the value doesn't give any information, don't print it
+        if value != None and value != "" and value != '[]' and value != []:
+            click.echo(f"{key}: {value}")
+    click.echo()
 @cli.command()
 def create_account():

{opengradient-0.3.15 → opengradient-0.3.17}/src/opengradient/client.py RENAMED Viewed

@@ -1,6 +1,7 @@
 import json
 import logging
 import os
+import random
 from typing import Dict, List, Optional, Tuple, Union
 import firebase
@@ -12,7 +13,7 @@ from web3.logs import DISCARD
 from opengradient import utils
 from opengradient.exceptions import OpenGradientError
-from opengradient.types import InferenceMode, LLM
+from opengradient.types import InferenceMode, LlmInferenceMode, LLM, TEE_LLM
 import grpc
 import time
@@ -23,6 +24,31 @@ from opengradient.proto import infer_pb2
 from opengradient.proto import infer_pb2_grpc
 from .defaults import DEFAULT_IMAGE_GEN_HOST, DEFAULT_IMAGE_GEN_PORT
+from functools import wraps
+def run_with_retry(txn_function, max_retries=5):
+    """
+    Execute a blockchain transaction with retry logic.
+    Args:
+        txn_function: Function that executes the transaction
+        max_retries (int): Maximum number of retry attempts
+    """
+    last_error = None
+    for attempt in range(max_retries):
+        try:
+            return txn_function()
+        except Exception as e:
+            last_error = e
+            if attempt < max_retries - 1:
+                if "nonce too low" in str(e) or "nonce too high" in str(e):
+                    time.sleep(1)  # Wait before retry
+                    continue
+                # If it's not a nonce error, raise immediately
+                raise
+    # If we've exhausted all retries, raise the last error
+    raise OpenGradientError(f"Transaction failed after {max_retries} attempts: {str(last_error)}")
 class Client:
     FIREBASE_CONFIG = {
         "apiKey": "AIzaSyDUVckVtfl-hiteBzPopy1pDD8Uvfncs7w",
@@ -311,7 +337,8 @@ class Client:
             self,
             model_cid: str,
             inference_mode: InferenceMode,
-            model_input: Dict[str, Union[str, int, float, List, np.ndarray]]
+            model_input: Dict[str, Union[str, int, float, List, np.ndarray]],
+            max_retries: Optional[int] = None
             ) -> Tuple[str, Dict[str, np.ndarray]]:
         """
         Perform inference on a model.
@@ -320,6 +347,7 @@ class Client:
             model_cid (str): The unique content identifier for the model from IPFS.
             inference_mode (InferenceMode): The inference mode.
             model_input (Dict[str, Union[str, int, float, List, np.ndarray]]): The input data for the model.
+            max_retries (int, optional): Maximum number of retry attempts. Defaults to 5.
         Returns:
             Tuple[str, Dict[str, np.ndarray]]: The transaction hash and the model output.
@@ -327,46 +355,22 @@ class Client:
         Raises:
             OpenGradientError: If the inference fails.
         """
-        # TODO (Kyle): Add input support for JSON tensors
-        try:
-            logging.debug("Entering infer method")
+        def execute_transaction():
             self._initialize_web3()
-            logging.debug(f"Web3 initialized. Connected: {self._w3.is_connected()}")
-            logging.debug(f"Creating contract instance. Address: {self.contract_address}")
             contract = self._w3.eth.contract(address=self.contract_address, abi=self.abi)
-            logging.debug("Contract instance created successfully")
-            logging.debug(f"Model ID: {model_cid}")
-            logging.debug(f"Inference Mode: {inference_mode}")
-            logging.debug(f"Model Input: {model_input}")
-            # Convert InferenceMode to uint8
             inference_mode_uint8 = int(inference_mode)
-            # Prepare ModelInput tuple
             converted_model_input = utils.convert_to_model_input(model_input)
-            logging.debug(f"Prepared model input tuple: {converted_model_input}")
-            logging.debug("Preparing run function")
             run_function = contract.functions.run(
                 model_cid,
                 inference_mode_uint8,
                 converted_model_input
             )
-            logging.debug("Run function prepared successfully")
-            # Build transaction
-            nonce = self._w3.eth.get_transaction_count(self.wallet_address)
-            logging.debug(f"Nonce: {nonce}")
-            # Estimate gas
+            nonce = self._w3.eth.get_transaction_count(self.wallet_address, 'pending')
             estimated_gas = run_function.estimate_gas({'from': self.wallet_address})
-            logging.debug(f"Estimated gas: {estimated_gas}")
-            # Increase gas limit by 20%
             gas_limit = int(estimated_gas * 3)
-            logging.debug(f"Gas limit set to: {gas_limit}")
             transaction = run_function.build_transaction({
                 'from': self.wallet_address,
@@ -375,62 +379,36 @@ class Client:
                 'gasPrice': self._w3.eth.gas_price,
             })
-            logging.debug(f"Transaction built: {transaction}")
-            # Sign transaction
             signed_tx = self._w3.eth.account.sign_transaction(transaction, self.private_key)
-            logging.debug("Transaction signed successfully")
-            # Send transaction
             tx_hash = self._w3.eth.send_raw_transaction(signed_tx.raw_transaction)
-            logging.debug(f"Transaction sent. Hash: {tx_hash.hex()}")
-            # Wait for transaction receipt
             tx_receipt = self._w3.eth.wait_for_transaction_receipt(tx_hash)
-            logging.debug(f"Transaction receipt received: {tx_receipt}")
-            # Check if the transaction was successful
             if tx_receipt['status'] == 0:
                 raise ContractLogicError(f"Transaction failed. Receipt: {tx_receipt}")
-            # Process the InferenceResult event
             parsed_logs = contract.events.InferenceResult().process_receipt(tx_receipt, errors=DISCARD)
             if len(parsed_logs) < 1:
                 raise OpenGradientError("InferenceResult event not found in transaction logs")
-            inference_result = parsed_logs[0]
-            # Extract the ModelOutput from the event
-            event_data = inference_result['args']
-            logging.debug(f"Raw event data: {event_data}")
-            try:
-                model_output = utils.convert_to_model_output(event_data)
-                logging.debug(f"Parsed ModelOutput: {model_output}")
-            except Exception as e:
-                logging.error(f"Error parsing event data: {str(e)}", exc_info=True)
-                raise OpenGradientError(f"Failed to parse event data: {str(e)}")
+            model_output = utils.convert_to_model_output(parsed_logs[0]['args'])
             return tx_hash.hex(), model_output
-        except ContractLogicError as e:
-            logging.error(f"Contract logic error: {str(e)}", exc_info=True)
-            raise OpenGradientError(f"Inference failed due to contract logic error: {str(e)}")
-        except Exception as e:
-            logging.error(f"Error in infer method: {str(e)}", exc_info=True)
-            raise OpenGradientError(f"Inference failed: {str(e)}")
+        return run_with_retry(execute_transaction, max_retries or 5)
     def llm_completion(self,
                        model_cid: LLM,
+                       inference_mode: InferenceMode,
                        prompt: str,
                        max_tokens: int = 100,
                        stop_sequence: Optional[List[str]] = None,
-                       temperature: float = 0.0) -> Tuple[str, str]:
+                       temperature: float = 0.0,
+                       max_retries: Optional[int] = None) -> Tuple[str, str]:
         """
         Perform inference on an LLM model using completions.
         Args:
             model_cid (LLM): The unique content identifier for the model.
+            inference_mode (InferenceMode): The inference mode.
             prompt (str): The input prompt for the LLM.
             max_tokens (int): Maximum number of tokens for LLM output. Default is 100.
             stop_sequence (List[str], optional): List of stop sequences for LLM. Default is None.
@@ -442,17 +420,20 @@ class Client:
         Raises:
             OpenGradientError: If the inference fails.
         """
-        try:
-            self._initialize_web3()
+        def execute_transaction():
+            # Check inference mode and supported model
+            if inference_mode != LlmInferenceMode.VANILLA and inference_mode != LlmInferenceMode.TEE:
+                raise OpenGradientError("Invalid inference mode %s: Inference mode must be VANILLA or TEE" % inference_mode)
-            abi_path = os.path.join(os.path.dirname(__file__), 'abi', 'inference.abi')
-            with open(abi_path, 'r') as abi_file:
-                llm_abi = json.load(abi_file)
-            contract = self._w3.eth.contract(address=self.contract_address, abi=llm_abi)
+            if inference_mode == LlmInferenceMode.TEE and model_cid not in TEE_LLM:
+                raise OpenGradientError("That model CID is not supported yet supported for TEE inference")
+            self._initialize_web3()
+            contract = self._w3.eth.contract(address=self.contract_address, abi=self.abi)
             # Prepare LLM input
             llm_request = {
-                "mode": InferenceMode.VANILLA,
+                "mode": inference_mode,
                 "modelCID": model_cid,
                 "prompt": prompt,
                 "max_tokens": max_tokens,
@@ -461,11 +442,9 @@ class Client:
             }
             logging.debug(f"Prepared LLM request: {llm_request}")
-            # Prepare run function
             run_function = contract.functions.runLLMCompletion(llm_request)
-            # Build transaction
-            nonce = self._w3.eth.get_transaction_count(self.wallet_address)
+            nonce = self._w3.eth.get_transaction_count(self.wallet_address, 'pending')
             estimated_gas = run_function.estimate_gas({'from': self.wallet_address})
             gas_limit = int(estimated_gas * 1.2)
@@ -476,47 +455,38 @@ class Client:
                 'gasPrice': self._w3.eth.gas_price,
             })
-            # Sign and send transaction
             signed_tx = self._w3.eth.account.sign_transaction(transaction, self.private_key)
             tx_hash = self._w3.eth.send_raw_transaction(signed_tx.raw_transaction)
-            logging.debug(f"Transaction sent. Hash: {tx_hash.hex()}")
-            # Wait for transaction receipt
             tx_receipt = self._w3.eth.wait_for_transaction_receipt(tx_hash)
             if tx_receipt['status'] == 0:
                 raise ContractLogicError(f"Transaction failed. Receipt: {tx_receipt}")
-            # Process the LLMResult event
             parsed_logs = contract.events.LLMCompletionResult().process_receipt(tx_receipt, errors=DISCARD)
             if len(parsed_logs) < 1:
                 raise OpenGradientError("LLM completion result event not found in transaction logs")
-            llm_result = parsed_logs[0]
-            llm_answer = llm_result['args']['response']['answer']
+            llm_answer = parsed_logs[0]['args']['response']['answer']
             return tx_hash.hex(), llm_answer
-        except ContractLogicError as e:
-            logging.error(f"Contract logic error: {str(e)}", exc_info=True)
-            raise OpenGradientError(f"LLM inference failed due to contract logic error: {str(e)}")
-        except Exception as e:
-            logging.error(f"Error in infer completion method: {str(e)}", exc_info=True)
-            raise OpenGradientError(f"LLM inference failed: {str(e)}")
+        return run_with_retry(execute_transaction, max_retries or 5)
     def llm_chat(self,
                  model_cid: str,
+                 inference_mode: InferenceMode,
                  messages: List[Dict],
                  max_tokens: int = 100,
                  stop_sequence: Optional[List[str]] = None,
                  temperature: float = 0.0,
                  tools: Optional[List[Dict]] = [],
-                 tool_choice: Optional[str] = None) -> Tuple[str, str]:
+                 tool_choice: Optional[str] = None,
+                 max_retries: Optional[int] = None) -> Tuple[str, str]:
         """
         Perform inference on an LLM model using chat.
         Args:
             model_cid (LLM): The unique content identifier for the model.
+            inference_mode (InferenceMode): The inference mode.
             messages (dict): The messages that will be passed into the chat.
                 This should be in OpenAI API format (https://platform.openai.com/docs/api-reference/chat/create)
                 Example:
@@ -567,13 +537,16 @@ class Client:
         Raises:
             OpenGradientError: If the inference fails.
         """
-        try:
-            self._initialize_web3()
+        def execute_transaction():
+            # Check inference mode and supported model
+            if inference_mode != LlmInferenceMode.VANILLA and inference_mode != LlmInferenceMode.TEE:
+                raise OpenGradientError("Invalid inference mode %s: Inference mode must be VANILLA or TEE" % inference_mode)
+            if inference_mode == LlmInferenceMode.TEE and model_cid not in TEE_LLM:
+                raise OpenGradientError("That model CID is not supported yet supported for TEE inference")
-            abi_path = os.path.join(os.path.dirname(__file__), 'abi', 'inference.abi')
-            with open(abi_path, 'r') as abi_file:
-                llm_abi = json.load(abi_file)
-            contract = self._w3.eth.contract(address=self.contract_address, abi=llm_abi)
+            self._initialize_web3()
+            contract = self._w3.eth.contract(address=self.contract_address, abi=self.abi)
             # For incoming chat messages, tool_calls can be empty. Add an empty array so that it will fit the ABI.
             for message in messages:
@@ -585,17 +558,10 @@ class Client:
                     message['name'] = ""
             # Create simplified tool structure for smart contract
-            #
-            #   struct ToolDefinition {
-            #       string description;
-            #       string name;
-            #       string parameters; // This must be a JSON
-            #   }
             converted_tools = []
             if tools is not None:
                 for tool in tools:
                     function = tool['function']
                     converted_tool = {}
                     converted_tool['name'] = function['name']
                     converted_tool['description'] = function['description']
@@ -604,12 +570,11 @@ class Client:
                             converted_tool['parameters'] = json.dumps(parameters)
                         except Exception as e:
                             raise OpenGradientError("Chat LLM failed to convert parameters into JSON: %s", e)
                     converted_tools.append(converted_tool)
             # Prepare LLM input
             llm_request = {
-                "mode": InferenceMode.VANILLA,
+                "mode": inference_mode,
                 "modelCID": model_cid,
                 "messages": messages,
                 "max_tokens": max_tokens,
@@ -620,11 +585,9 @@ class Client:
             }
             logging.debug(f"Prepared LLM request: {llm_request}")
-            # Prepare run function
             run_function = contract.functions.runLLMChat(llm_request)
-            # Build transaction
-            nonce = self._w3.eth.get_transaction_count(self.wallet_address)
+            nonce = self._w3.eth.get_transaction_count(self.wallet_address, 'pending')
             estimated_gas = run_function.estimate_gas({'from': self.wallet_address})
             gas_limit = int(estimated_gas * 1.2)
@@ -635,41 +598,25 @@ class Client:
                 'gasPrice': self._w3.eth.gas_price,
             })
-            # Sign and send transaction
             signed_tx = self._w3.eth.account.sign_transaction(transaction, self.private_key)
             tx_hash = self._w3.eth.send_raw_transaction(signed_tx.raw_transaction)
-            logging.debug(f"Transaction sent. Hash: {tx_hash.hex()}")
-            # Wait for transaction receipt
             tx_receipt = self._w3.eth.wait_for_transaction_receipt(tx_hash)
             if tx_receipt['status'] == 0:
                 raise ContractLogicError(f"Transaction failed. Receipt: {tx_receipt}")
-            # Process the LLMResult event
             parsed_logs = contract.events.LLMChatResult().process_receipt(tx_receipt, errors=DISCARD)
             if len(parsed_logs) < 1:
                 raise OpenGradientError("LLM chat result event not found in transaction logs")
-            llm_result = parsed_logs[0]['args']['response']
-            # Turn tool calls into normal dicts
+            llm_result = parsed_logs[0]['args']['response']
             message = dict(llm_result['message'])
-            if (tool_calls := message.get('tool_calls')) != None:
-                new_tool_calls = []
-                for tool_call in tool_calls:
-                    new_tool_calls.append(dict(tool_call))
-                message['tool_calls'] = new_tool_calls
-            return (tx_hash.hex(), llm_result['finish_reason'], message)
+            if (tool_calls := message.get('tool_calls')) is not None:
+                message['tool_calls'] = [dict(tool_call) for tool_call in tool_calls]
-        except ContractLogicError as e:
-            logging.error(f"Contract logic error: {str(e)}", exc_info=True)
-            raise OpenGradientError(f"LLM inference failed due to contract logic error: {str(e)}")
-        except Exception as e:
-            logging.error(f"Error in infer chat method: {str(e)}", exc_info=True)
-            raise OpenGradientError(f"LLM inference failed: {str(e)}")
+            return tx_hash.hex(), llm_result['finish_reason'], message
+        return run_with_retry(execute_transaction, max_retries or 5)
     def list_files(self, model_name: str, version: str) -> List[Dict]:
         """

{opengradient-0.3.15 → opengradient-0.3.17}/src/opengradient/types.py RENAMED Viewed

@@ -27,6 +27,10 @@ class InferenceMode:
     ZKML = 1
     TEE = 2
+class LlmInferenceMode:
+    VANILLA = 0
+    TEE = 1
 @dataclass
 class ModelOutput:
     numbers: List[NumberTensor]
@@ -79,4 +83,7 @@ class LLM(str, Enum):
     LLAMA_3_2_3B_INSTRUCT = "meta-llama/Llama-3.2-3B-Instruct"
     MISTRAL_7B_INSTRUCT_V3 = "mistralai/Mistral-7B-Instruct-v0.3"
     HERMES_3_LLAMA_3_1_70B = "NousResearch/Hermes-3-Llama-3.1-70B"
+    META_LLAMA_3_1_70B_INSTRUCT = "meta-llama/Llama-3.1-70B-Instruct"
+class TEE_LLM(str, Enum):
     META_LLAMA_3_1_70B_INSTRUCT = "meta-llama/Llama-3.1-70B-Instruct"