PyPI - opengradient - Versions diffs - 0.4.14__tar.gz → 0.5.0a1__tar.gz - Mend

opengradient 0.4.14tar.gz → 0.5.0a1tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (40) hide show

{opengradient-0.4.14/src/opengradient.egg-info → opengradient-0.5.0a1}/PKG-INFO RENAMED Viewed

@@ -1,34 +1,12 @@
 Metadata-Version: 2.4
 Name: opengradient
-Version: 0.4.14
+Version: 0.5.0a1
 Summary: Python SDK for OpenGradient decentralized model management & inference services
-Author-email: OpenGradient <oliver@opengradient.ai>
-License: MIT License
-        Copyright (c) 2024 OpenGradient
-        Permission is hereby granted, free of charge, to any person obtaining a copy
-        of this software and associated documentation files (the "Software"), to deal
-        in the Software without restriction, including without limitation the rights
-        to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-        copies of the Software, and to permit persons to whom the Software is
-        furnished to do so, subject to the following conditions:
-        The above copyright notice and this permission notice shall be included in all
-        copies or substantial portions of the Software.
-        THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-        IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-        FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-        AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-        LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-        OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-        SOFTWARE.
+Author-email: OpenGradient <kyle@vannalabs.ai>
+License-Expression: MIT
 Project-URL: Homepage, https://opengradient.ai
 Classifier: Development Status :: 3 - Alpha
 Classifier: Intended Audience :: Developers
-Classifier: License :: OSI Approved :: MIT License
 Classifier: Programming Language :: Python :: 3.10
 Classifier: Programming Language :: Python :: 3.11
 Classifier: Programming Language :: Python :: 3.12

{opengradient-0.4.14 → opengradient-0.5.0a1}/pyproject.toml RENAMED Viewed

@@ -1,19 +1,18 @@
 [build-system]
-requires = ["setuptools>=61.0"]
+requires = ["setuptools>=77.0.0"]
 build-backend = "setuptools.build_meta"
 [project]
 name = "opengradient"
-version = "0.4.14"
+version = "0.5.0a1"
 description = "Python SDK for OpenGradient decentralized model management & inference services"
-authors = [{name = "OpenGradient", email = "oliver@opengradient.ai"}]
-license = {file = "LICENSE"}
+authors = [{name = "OpenGradient", email = "kyle@vannalabs.ai"}]
 readme = "README.md"
 requires-python = ">=3.10"
+license = "MIT"
 classifiers = [
     "Development Status :: 3 - Alpha",
     "Intended Audience :: Developers",
-    "License :: OSI Approved :: MIT License",
     "Programming Language :: Python :: 3.10",
     "Programming Language :: Python :: 3.11",
     "Programming Language :: Python :: 3.12",
@@ -47,7 +46,7 @@ include-package-data = true
 [tool.setuptools.packages.find]
 where = ["src"]
-include = ["opengradient*"]  # Explicitly include all opengradient packages
+include = ["opengradient*"]
 exclude = ["tests*", "stresstest*"]
 [tool.setuptools.package-data]

{opengradient-0.4.14 → opengradient-0.5.0a1}/src/opengradient/cli.py RENAMED Viewed

@@ -18,6 +18,7 @@ from .defaults import (
     DEFAULT_OG_FAUCET_URL,
     DEFAULT_RPC_URL,
     DEFAULT_API_URL,
+    DEFAULT_LLM_SERVER_URL,
 )
 from .types import InferenceMode, LlmInferenceMode, LLM, TEE_LLM
@@ -119,17 +120,21 @@ def cli(ctx):
     Visit https://docs.opengradient.ai/developers/python_sdk/ for more documentation.
     """
-    # Load existing config
     ctx.obj = load_og_config()
     no_client_commands = ["config", "create-account", "version"]
-    # Only create client if this is not a config management command
     if ctx.invoked_subcommand in no_client_commands:
         return
     if all(key in ctx.obj for key in ["private_key"]):
         try:
+            # Extract API keys from config
+            llm_server_url = ctx.obj.get("llm_server_url", DEFAULT_LLM_SERVER_URL)
+            openai_api_key = ctx.obj.get("openai_api_key")
+            anthropic_api_key = ctx.obj.get("anthropic_api_key")
+            google_api_key = ctx.obj.get("google_api_key")
             ctx.obj["client"] = Client(
                 private_key=ctx.obj["private_key"],
                 rpc_url=DEFAULT_RPC_URL,
@@ -137,6 +142,10 @@ def cli(ctx):
                 contract_address=DEFAULT_INFERENCE_CONTRACT_ADDRESS,
                 email=ctx.obj.get("email"),
                 password=ctx.obj.get("password"),
+                llm_server_url=llm_server_url,
+                openai_api_key=openai_api_key,
+                anthropic_api_key=anthropic_api_key,
+                google_api_key=google_api_key,
             )
         except Exception as e:
             click.echo(f"Failed to create OpenGradient client: {str(e)}")
@@ -197,6 +206,50 @@ def clear(ctx):
         click.echo("Config clear cancelled.")
+@config.command()
+@click.option("--provider", type=click.Choice(["openai", "anthropic", "google"]), required=True)
+@click.option("--key", required=True, help="API key for the provider")
+@click.pass_context
+def set_api_key(ctx, provider: str, key: str):
+    """
+    Set API key for external LLM providers.
+    Example usage:
+    \b
+    opengradient config set-api-key --provider openai --key ..
+    opengradient config set-api-key --provider anthropic --key ...
+    opengradient config set-api-key --provider google --key ...
+    """
+    config_key = f"{provider}_api_key"
+    ctx.obj[config_key] = key
+    save_og_config(ctx)
+    click.secho(f"✅ API key for {provider} has been set", fg="green")
+    click.echo("You can now use models from this provider in completion and chat commands.")
+@config.command()
+@click.option("--provider", type=click.Choice(["openai", "anthropic", "google"]), required=True)
+@click.pass_context
+def remove_api_key(ctx, provider: str):
+    """
+    Remove API key for an external LLM provider.
+    Example usage:
+    \b
+    opengradient config remove-api-key --provider openai
+    """
+    config_key = f"{provider}_api_key"
+    if config_key in ctx.obj:
+        del ctx.obj[config_key]
+        save_og_config(ctx)
+        click.secho(f"✅ API key for {provider} has been removed", fg="green")
+    else:
+        click.echo(f"No API key found for {provider}")
 @cli.command()
 @click.option("--repo", "-r", "--name", "repo_name", required=True, help="Name of the new model repository")
 @click.option("--description", "-d", required=True, help="Description of the model")
@@ -354,33 +407,55 @@ def infer(ctx, model_cid: str, inference_mode: str, input_data, input_file: Path
     "--model",
     "-m",
     "model_cid",
-    type=click.Choice([e.value for e in LLM]),
     required=True,
-    help="CID of the LLM model to run inference on",
+    help="Model identifier (local model from LLM enum or external model like 'gpt-4o', 'gemini-2.5-flash-lite', etc.)",
 )
 @click.option(
-    "--mode", "inference_mode", type=click.Choice(LlmInferenceModes.keys()), default="VANILLA", help="Inference mode (default: VANILLA)"
+    "--mode",
+    "inference_mode",
+    type=click.Choice(LlmInferenceModes.keys()),
+    default="VANILLA",
+    help="Inference mode (only applies to local models, default: VANILLA)"
 )
 @click.option("--prompt", "-p", required=True, help="Input prompt for the LLM completion")
 @click.option("--max-tokens", type=int, default=100, help="Maximum number of tokens for LLM completion output")
 @click.option("--stop-sequence", multiple=True, help="Stop sequences for LLM")
 @click.option("--temperature", type=float, default=0.0, help="Temperature for LLM inference (0.0 to 1.0)")
+@click.option("--local", is_flag=True, help="Force use of local model even if not in LLM enum")
 @click.pass_context
-def completion(ctx, model_cid: str, inference_mode: str, prompt: str, max_tokens: int, stop_sequence: List[str], temperature: float):
+def completion(ctx, model_cid: str, inference_mode: str, prompt: str, max_tokens: int, stop_sequence: List[str], temperature: float, local: bool):
     """
-    Run completion inference on an LLM model.
+    Run completion inference on an LLM model (local or external).
-    This command runs a completion inference on the specified LLM model using the provided prompt and parameters.
+    This command supports both local OpenGradient models and external providers
+    (OpenAI, Anthropic, Google, etc.). For external models, make sure to set
+    the appropriate API key using 'opengradient config set-api-key'.
     Example usage:
     \b
-    opengradient completion --model meta-llama/Meta-Llama-3-8B-Instruct --prompt "Hello, how are you?" --max-tokens 50 --temperature 0.7
-    opengradient completion -m meta-llama/Meta-Llama-3-8B-Instruct -p "Translate to French: Hello world" --stop-sequence "." --stop-sequence "\\n"
+    # Local model
+    opengradient completion --model meta-llama/Meta-Llama-3-8B-Instruct --prompt "Hello, how are you?" --max-tokens 50
+    # External OpenAI model
+    opengradient completion --model gpt-4o --prompt "Translate to French: Hello world" --max-tokens 50
+    # External Anthropic model
+    opengradient completion --model claude-haiku-4-5-20251001--prompt "Write a haiku about coding" --max-tokens 100
+    # External Google model
+    opengradient completion --model gemini-2.5-flash-lite --prompt "Explain quantum computing" --max-tokens 200
     """
     client: Client = ctx.obj["client"]
     try:
-        click.echo(f'Running LLM completion inference for model "{model_cid}"\n')
+        is_local = local or model_cid in [llm.value for llm in LLM]
+        if is_local:
+            click.echo(f'Running LLM completion inference for local model "{model_cid}"\n')
+        else:
+            click.echo(f'Running LLM completion inference for external model "{model_cid}"\n')
         completion_output = client.llm_completion(
             model_cid=model_cid,
             inference_mode=LlmInferenceModes[inference_mode],
@@ -388,23 +463,31 @@ def completion(ctx, model_cid: str, inference_mode: str, prompt: str, max_tokens
             max_tokens=max_tokens,
             stop_sequence=list(stop_sequence),
             temperature=temperature,
+            local_model=local,
         )
-        print_llm_completion_result(model_cid, completion_output.transaction_hash, completion_output.completion_output)
+        print_llm_completion_result(model_cid, completion_output.transaction_hash, completion_output.completion_output, is_local)
     except Exception as e:
         click.echo(f"Error running LLM completion: {str(e)}")
-def print_llm_completion_result(model_cid, tx_hash, llm_output):
+def print_llm_completion_result(model_cid, tx_hash, llm_output, is_local=True):
     click.secho("✅ LLM completion Successful", fg="green", bold=True)
     click.echo("──────────────────────────────────────")
-    click.echo("Model CID: ", nl=False)
+    click.echo("Model: ", nl=False)
     click.secho(model_cid, fg="cyan", bold=True)
-    click.echo("Transaction hash: ", nl=False)
-    click.secho(tx_hash, fg="cyan", bold=True)
-    block_explorer_link = f"{DEFAULT_BLOCKCHAIN_EXPLORER}0x{tx_hash}"
-    click.echo("Block explorer link: ", nl=False)
-    click.secho(block_explorer_link, fg="blue", underline=True)
+    if is_local and tx_hash != "external":
+        click.echo("Transaction hash: ", nl=False)
+        click.secho(tx_hash, fg="cyan", bold=True)
+        block_explorer_link = f"{DEFAULT_BLOCKCHAIN_EXPLORER}0x{tx_hash}"
+        click.echo("Block explorer link: ", nl=False)
+        click.secho(block_explorer_link, fg="blue", underline=True)
+    else:
+        click.echo("Source: ", nl=False)
+        click.secho("External Provider", fg="cyan", bold=True)
     click.echo("──────────────────────────────────────")
     click.secho("LLM Output:", fg="yellow", bold=True)
     click.echo()
@@ -417,12 +500,15 @@ def print_llm_completion_result(model_cid, tx_hash, llm_output):
     "--model",
     "-m",
     "model_cid",
-    type=click.Choice([e.value for e in LLM]),
     required=True,
-    help="CID of the LLM model to run inference on",
+    help="Model identifier (local model from LLM enum or external model like 'gpt-4o', 'gemini-2.5-flash-lite', etc.)",
 )
 @click.option(
-    "--mode", "inference_mode", type=click.Choice(LlmInferenceModes.keys()), default="VANILLA", help="Inference mode (default: VANILLA)"
+    "--mode",
+    "inference_mode",
+    type=click.Choice(LlmInferenceModes.keys()),
+    default="VANILLA",
+    help="Inference mode (only applies to local models, default: VANILLA)"
 )
 @click.option("--messages", type=str, required=False, help="Input messages for the chat inference in JSON format")
 @click.option(
@@ -436,9 +522,13 @@ def print_llm_completion_result(model_cid, tx_hash, llm_output):
 @click.option("--temperature", type=float, default=0.0, help="Temperature for LLM inference (0.0 to 1.0)")
 @click.option("--tools", type=str, default=None, help="Tool configurations in JSON format")
 @click.option(
-    "--tools-file", type=click.Path(exists=True, path_type=Path), required=False, help="Path to JSON file containing tool configurations"
+    "--tools-file",
+    type=click.Path(exists=True, path_type=Path),
+    required=False,
+    help="Path to JSON file containing tool configurations"
 )
 @click.option("--tool-choice", type=str, default="", help="Specific tool choice for the LLM")
+@click.option("--local", is_flag=True, help="Force use of local model even if not in LLM enum")
 @click.pass_context
 def chat(
     ctx,
@@ -452,23 +542,37 @@ def chat(
     tools: Optional[str],
     tools_file: Optional[Path],
     tool_choice: Optional[str],
+    local: bool,
 ):
     """
-    Run chat inference on an LLM model.
+    Run chat inference on an LLM model (local or external).
-    This command runs a chat inference on the specified LLM model using the provided messages and parameters.
-    Tool call formatting is based on OpenAI documentation tool calls (see here: https://platform.openai.com/docs/guides/function-calling).
+    This command supports both local OpenGradient models and external providers.
+    Tool calling is supported for compatible models.
     Example usage:
     \b
-    opengradient chat --model meta-llama/Meta-Llama-3-8B-Instruct --messages '[{"role":"user","content":"hello"}]' --max-tokens 50 --temperature 0.7
-    opengradient chat --model mistralai/Mistral-7B-Instruct-v0.3 --messages-file messages.json --tools-file tools.json --max-tokens 200 --stop-sequence "." --stop-sequence "\\n"
+    # Local model
+    opengradient chat --model meta-llama/Meta-Llama-3-8B-Instruct --messages '[{"role":"user","content":"hello"}]' --max-tokens 50
+    # External OpenAI model with tools
+    opengradient chat --model gpt-4o --messages-file messages.json --tools-file tools.json --max-tokens 200
+    # External Anthropic model
+    opengradient chat --model claude-haiku-4-5-20251001 --messages '[{"role":"user","content":"Write a poem"}]' --max-tokens 100
     """
     client: Client = ctx.obj["client"]
     try:
-        click.echo(f'Running LLM chat inference for model "{model_cid}"\n')
+        is_local = local or model_cid in [llm.value for llm in LLM]
+        if is_local:
+            click.echo(f'Running LLM chat inference for local model "{model_cid}"\n')
+        else:
+            click.echo(f'Running LLM chat inference for external model "{model_cid}"\n')
+        # Parse messages
         if not messages and not messages_file:
             click.echo("Must specify either messages or messages-file")
             ctx.exit(1)
@@ -488,10 +592,10 @@ def chat(
             with messages_file.open("r") as file:
                 messages = json.load(file)
-        # Parse tools if provided
+        # Parse tools
         if (tools and tools != "[]") and tools_file:
             click.echo("Cannot have both tools and tools-file")
-            click.exit(1)
+            ctx.exit(1)
             return
         parsed_tools = []
@@ -532,23 +636,37 @@ def chat(
             temperature=temperature,
             tools=parsed_tools,
             tool_choice=tool_choice,
+            local_model=local,
         )
-        print_llm_chat_result(model_cid, completion_output.transaction_hash, completion_output.finish_reason, completion_output.chat_output)
+        print_llm_chat_result(
+            model_cid,
+            completion_output.transaction_hash,
+            completion_output.finish_reason,
+            completion_output.chat_output,
+            is_local
+        )
     except Exception as e:
         click.echo(f"Error running LLM chat inference: {str(e)}")
-def print_llm_chat_result(model_cid, tx_hash, finish_reason, chat_output):
+def print_llm_chat_result(model_cid, tx_hash, finish_reason, chat_output, is_local=True):
     click.secho("✅ LLM Chat Successful", fg="green", bold=True)
     click.echo("──────────────────────────────────────")
-    click.echo("Model CID: ", nl=False)
+    click.echo("Model: ", nl=False)
     click.secho(model_cid, fg="cyan", bold=True)
-    click.echo("Transaction hash: ", nl=False)
-    click.secho(tx_hash, fg="cyan", bold=True)
-    block_explorer_link = f"{DEFAULT_BLOCKCHAIN_EXPLORER}0x{tx_hash}"
-    click.echo("Block explorer link: ", nl=False)
-    click.secho(block_explorer_link, fg="blue", underline=True)
+    if is_local and tx_hash != "external":
+        click.echo("Transaction hash: ", nl=False)
+        click.secho(tx_hash, fg="cyan", bold=True)
+        block_explorer_link = f"{DEFAULT_BLOCKCHAIN_EXPLORER}0x{tx_hash}"
+        click.echo("Block explorer link: ", nl=False)
+        click.secho(block_explorer_link, fg="blue", underline=True)
+    else:
+        click.echo("Source: ", nl=False)
+        click.secho("External Provider", fg="cyan", bold=True)
     click.echo("──────────────────────────────────────")
     click.secho("Finish Reason: ", fg="yellow", bold=True)
     click.echo()
@@ -557,7 +675,6 @@ def print_llm_chat_result(model_cid, tx_hash, finish_reason, chat_output):
     click.secho("Chat Output:", fg="yellow", bold=True)
     click.echo()
     for key, value in chat_output.items():
-        # If the value doesn't give any information, don't print it
         if value != None and value != "" and value != "[]" and value != []:
             click.echo(f"{key}: {value}")
     click.echo()

{opengradient-0.4.14 → opengradient-0.5.0a1}/src/opengradient/client.py RENAMED Viewed

@@ -30,7 +30,7 @@ from .types import (
     ModelRepository,
     FileUploadResult,
 )
-from .defaults import DEFAULT_IMAGE_GEN_HOST, DEFAULT_IMAGE_GEN_PORT, DEFAULT_SCHEDULER_ADDRESS
+from .defaults import DEFAULT_IMAGE_GEN_HOST, DEFAULT_IMAGE_GEN_PORT, DEFAULT_SCHEDULER_ADDRESS, DEFAULT_LLM_SERVER_URL
 from .utils import convert_array_to_model_output, convert_to_model_input, convert_to_model_output
 _FIREBASE_CONFIG = {
@@ -62,7 +62,21 @@ class Client:
     _api_url: str
     _inference_abi: Dict
     _precompile_abi: Dict
-    def __init__(self, private_key: str, rpc_url: str, api_url: str, contract_address: str, email: Optional[str], password: Optional[str]):
+    _llm_server_url: str
+    _external_api_keys: Dict[str, str]
+    def __init__(
+        self,
+        private_key: str,
+        rpc_url: str,
+        api_url: str,
+        contract_address: str,
+        email: Optional[str] = None,
+        password: Optional[str] = None,
+        llm_server_url: Optional[str] = DEFAULT_LLM_SERVER_URL,
+        openai_api_key: Optional[str] = None,
+        anthropic_api_key: Optional[str] = None,
+        google_api_key: Optional[str] = None,
+        ):
         """
         Initialize the Client with private key, RPC URL, and contract address.
@@ -91,6 +105,70 @@ class Client:
         else:
             self._hub_user = None
+        self._llm_server_url = llm_server_url
+        self._external_api_keys = {}
+        if openai_api_key or os.getenv("OPENAI_API_KEY"):
+            self._external_api_keys["openai"] = openai_api_key or os.getenv("OPENAI_API_KEY")
+        if anthropic_api_key or os.getenv("ANTHROPIC_API_KEY"):
+            self._external_api_keys["anthropic"] = anthropic_api_key or os.getenv("ANTHROPIC_API_KEY")
+        if google_api_key or os.getenv("GOOGLE_API_KEY"):
+            self._external_api_keys["google"] = google_api_key or os.getenv("GOOGLE_API_KEY")
+    def set_api_key(self, provider: str, api_key: str):
+        """
+        Set or update API key for an external provider.
+        Args:
+            provider: Provider name (e.g., 'openai', 'anthropic', 'google')
+            api_key: The API key for the provider
+        """
+        self._external_api_keys[provider] = api_key
+    def _is_local_model(self, model_cid: str) -> bool:
+        """
+        Check if a model is hosted locally on OpenGradient.
+        Args:
+            model_cid: Model identifier
+        Returns:
+            True if model is local, False if it should use external provider
+        """
+        # Check if it's in our local LLM enum
+        try:
+            return model_cid in [llm.value for llm in LLM]
+        except:
+            return False
+    def _get_provider_from_model(self, model: str) -> str:
+        """Infer provider from model name."""
+        model_lower = model.lower()
+        if "gpt" in model_lower or model.startswith("openai/"):
+            return "openai"
+        elif "claude" in model_lower or model.startswith("anthropic/"):
+            return "anthropic"
+        elif "gemini" in model_lower or "palm" in model_lower or model.startswith("google/"):
+            return "google"
+        elif "command" in model_lower or model.startswith("cohere/"):
+            return "cohere"
+        else:
+            return "openai"
+    def _get_api_key_for_model(self, model: str) -> Optional[str]:
+        """
+        Get the appropriate API key for a model.
+        Args:
+            model: Model identifier
+        Returns:
+            API key string or None
+        """
+        provider = self._get_provider_from_model(model)
+        return self._external_api_keys.get(provider)
     def _login_to_hub(self, email, password):
         try:
             firebase_app = firebase.initialize_app(_FIREBASE_CONFIG)
@@ -328,36 +406,48 @@ class Client:
     def llm_completion(
         self,
-        model_cid: LLM,
-        inference_mode: LlmInferenceMode,
+        model_cid: str,  # Changed from LLM to str to accept any model
         prompt: str,
         max_tokens: int = 100,
         stop_sequence: Optional[List[str]] = None,
         temperature: float = 0.0,
+        inference_mode: LlmInferenceMode = LlmInferenceMode.VANILLA,
         max_retries: Optional[int] = None,
+        local_model: Optional[bool] = False,
     ) -> TextGenerationOutput:
         """
         Perform inference on an LLM model using completions.
         Args:
-            model_cid (LLM): The unique content identifier for the model.
-            inference_mode (InferenceMode): The inference mode.
+            model_cid (str): The unique content identifier for the model.
+            inference_mode (LlmInferenceMode): The inference mode (only used for local models).
             prompt (str): The input prompt for the LLM.
             max_tokens (int): Maximum number of tokens for LLM output. Default is 100.
             stop_sequence (List[str], optional): List of stop sequences for LLM. Default is None.
             temperature (float): Temperature for LLM inference, between 0 and 1. Default is 0.0.
+            max_retries (int, optional): Maximum number of retry attempts for blockchain transactions.
+            local_model (bool, optional): Force use of local model even if not in LLM enum.
         Returns:
             TextGenerationOutput: Generated text results including:
-                - Transaction hash
+                - Transaction hash (or "external" for external providers)
                 - String of completion output
         Raises:
             OpenGradientError: If the inference fails.
         """
+        # Check if this is a local model or external
+        if not local_model and not self._is_local_model(model_cid):
+            return self._external_llm_completion(
+                model=model_cid,
+                prompt=prompt,
+                max_tokens=max_tokens,
+                stop_sequence=stop_sequence,
+                temperature=temperature,
+            )
+        # Original local model logic
         def execute_transaction():
-            # Check inference mode and supported model
             if inference_mode != LlmInferenceMode.VANILLA and inference_mode != LlmInferenceMode.TEE:
                 raise OpenGradientError("Invalid inference mode %s: Inference mode must be VANILLA or TEE" % inference_mode)
@@ -366,14 +456,13 @@ class Client:
             contract = self._blockchain.eth.contract(address=self._inference_hub_contract_address, abi=self._inference_abi)
-            # Prepare LLM input
             llm_request = {
                 "mode": inference_mode.value,
                 "modelCID": model_cid,
                 "prompt": prompt,
                 "max_tokens": max_tokens,
                 "stop_sequence": stop_sequence or [],
-                "temperature": int(temperature * 100),  # Scale to 0-100 range
+                "temperature": int(temperature * 100),
             }
             logging.debug(f"Prepared LLM request: {llm_request}")
@@ -390,80 +479,117 @@ class Client:
         return run_with_retry(execute_transaction, max_retries)
+    def _external_llm_completion(
+        self,
+        model: str,
+        prompt: str,
+        max_tokens: int = 100,
+        stop_sequence: Optional[List[str]] = None,
+        temperature: float = 0.0,
+    ) -> TextGenerationOutput:
+        """
+        Route completion request to external LLM server.
+        Args:
+            model: Model identifier
+            prompt: Input prompt
+            max_tokens: Maximum tokens to generate
+            stop_sequence: Stop sequences
+            temperature: Sampling temperature
+        Returns:
+            TextGenerationOutput with completion
+        Raises:
+            OpenGradientError: If request fails
+        """
+        url = f"{self._llm_server_url}/v1/completions"
+        headers = {"Content-Type": "application/json"}
+        api_key = self._get_api_key_for_model(model)
+        if api_key:
+            headers["Authorization"] = f"Bearer {api_key}"
+        payload = {
+            "model": model,
+            "prompt": prompt,
+            "max_tokens": max_tokens,
+            "temperature": temperature,
+        }
+        if stop_sequence:
+            payload["stop"] = stop_sequence
+        try:
+            response = requests.post(url, json=payload, headers=headers, timeout=60)
+            response.raise_for_status()
+            result = response.json()
+            return TextGenerationOutput(
+                transaction_hash="external",  # No blockchain transaction for external
+                completion_output=result["completion"]
+            )
+        except requests.RequestException as e:
+            error_msg = f"External LLM completion failed: {str(e)}"
+            if hasattr(e, 'response') and e.response is not None:
+                try:
+                    error_detail = e.response.json()
+                    error_msg += f" - {error_detail}"
+                except:
+                    error_msg += f" - {e.response.text}"
+            logging.error(error_msg)
+            raise OpenGradientError(error_msg)
     def llm_chat(
         self,
-        model_cid: LLM,
-        inference_mode: LlmInferenceMode,
+        model_cid: str,  # Changed from LLM to str
         messages: List[Dict],
+        inference_mode: LlmInferenceMode = LlmInferenceMode.VANILLA,
         max_tokens: int = 100,
         stop_sequence: Optional[List[str]] = None,
         temperature: float = 0.0,
         tools: Optional[List[Dict]] = [],
         tool_choice: Optional[str] = None,
         max_retries: Optional[int] = None,
+        local_model: Optional[bool] = False,
     ) -> TextGenerationOutput:
         """
         Perform inference on an LLM model using chat.
         Args:
-            model_cid (LLM): The unique content identifier for the model.
-            inference_mode (InferenceMode): The inference mode.
-            messages (dict): The messages that will be passed into the chat.
-                This should be in OpenAI API format (https://platform.openai.com/docs/api-reference/chat/create)
-                Example:
-                [
-                    {
-                        "role": "system",
-                        "content": "You are a helpful assistant."
-                    },
-                    {
-                        "role": "user",
-                        "content": "Hello!"
-                    }
-                ]
+            model_cid (str): The unique content identifier for the model.
+            inference_mode (LlmInferenceMode): The inference mode (only used for local models).
+            messages (List[Dict]): The messages that will be passed into the chat.
             max_tokens (int): Maximum number of tokens for LLM output. Default is 100.
-            stop_sequence (List[str], optional): List of stop sequences for LLM. Default is None.
-            temperature (float): Temperature for LLM inference, between 0 and 1. Default is 0.0.
-            tools (List[dict], optional): Set of tools
-                This should be in OpenAI API format (https://platform.openai.com/docs/api-reference/chat/create#chat-create-tools)
-                Example:
-                [
-                    {
-                        "type": "function",
-                        "function": {
-                            "name": "get_current_weather",
-                            "description": "Get the current weather in a given location",
-                            "parameters": {
-                                "type": "object",
-                                "properties": {
-                                    "location": {
-                                        "type": "string",
-                                        "description": "The city and state, e.g. San Francisco, CA"
-                                    },
-                                    "unit": {
-                                        "type": "string",
-                                        "enum": ["celsius", "fahrenheit"]
-                                    }
-                                },
-                                "required": ["location"]
-                            }
-                        }
-                    }
-                ]
-            tool_choice (str, optional): Sets a specific tool to choose. Default value is "auto".
+            stop_sequence (List[str], optional): List of stop sequences for LLM.
+            temperature (float): Temperature for LLM inference, between 0 and 1.
+            tools (List[dict], optional): Set of tools for function calling.
+            tool_choice (str, optional): Sets a specific tool to choose.
+            max_retries (int, optional): Maximum number of retry attempts.
+            local_model (bool, optional): Force use of local model.
         Returns:
-            TextGenerationOutput: Generated text results including:
-                - Transaction hash
-                - Finish reason (tool_call, stop, etc.)
-                - Dictionary of chat message output (role, content, tool_call, etc.)
+            TextGenerationOutput: Generated text results.
         Raises:
             OpenGradientError: If the inference fails.
         """
+        # Check if this is a local model or external
+        if not local_model and not self._is_local_model(model_cid):
+            return self._external_llm_chat(
+                model=model_cid,
+                messages=messages,
+                max_tokens=max_tokens,
+                stop_sequence=stop_sequence,
+                temperature=temperature,
+                tools=tools,
+                tool_choice=tool_choice,
+            )
+        # Original local model logic
         def execute_transaction():
-            # Check inference mode and supported model
             if inference_mode != LlmInferenceMode.VANILLA and inference_mode != LlmInferenceMode.TEE:
                 raise OpenGradientError("Invalid inference mode %s: Inference mode must be VANILLA or TEE" % inference_mode)
@@ -472,7 +598,6 @@ class Client:
             contract = self._blockchain.eth.contract(address=self._inference_hub_contract_address, abi=self._inference_abi)
-            # For incoming chat messages, tool_calls can be empty. Add an empty array so that it will fit the ABI.
             for message in messages:
                 if "tool_calls" not in message:
                     message["tool_calls"] = []
@@ -481,7 +606,6 @@ class Client:
                 if "name" not in message:
                     message["name"] = ""
-            # Create simplified tool structure for smart contract
             converted_tools = []
             if tools is not None:
                 for tool in tools:
@@ -496,14 +620,13 @@ class Client:
                             raise OpenGradientError("Chat LLM failed to convert parameters into JSON: %s", e)
                     converted_tools.append(converted_tool)
-            # Prepare LLM input
             llm_request = {
                 "mode": inference_mode.value,
                 "modelCID": model_cid,
                 "messages": messages,
                 "max_tokens": max_tokens,
                 "stop_sequence": stop_sequence or [],
-                "temperature": int(temperature * 100),  # Scale to 0-100 range
+                "temperature": int(temperature * 100),
                 "tools": converted_tools or [],
                 "tool_choice": tool_choice if tool_choice else ("" if tools is None else "auto"),
             }
@@ -529,6 +652,78 @@ class Client:
         return run_with_retry(execute_transaction, max_retries)
+    def _external_llm_chat(
+        self,
+        model: str,
+        messages: List[Dict],
+        max_tokens: int = 100,
+        stop_sequence: Optional[List[str]] = None,
+        temperature: float = 0.0,
+        tools: Optional[List[Dict]] = None,
+        tool_choice: Optional[str] = None,
+    ) -> TextGenerationOutput:
+        """
+        Route chat request to external LLM server.
+        Args:
+            model: Model identifier
+            messages: List of chat messages
+            max_tokens: Maximum tokens to generate
+            stop_sequence: Stop sequences
+            temperature: Sampling temperature
+            tools: Function calling tools
+            tool_choice: Tool selection strategy
+        Returns:
+            TextGenerationOutput with chat completion
+        Raises:
+            OpenGradientError: If request fails
+        """
+        url = f"{self._llm_server_url}/v1/chat/completions"
+        headers = {"Content-Type": "application/json"}
+        api_key = self._get_api_key_for_model(model)
+        if api_key:
+            headers["Authorization"] = f"Bearer {api_key}"
+        payload = {
+            "model": model,
+            "messages": messages,
+            "max_tokens": max_tokens,
+            "temperature": temperature,
+        }
+        if stop_sequence:
+            payload["stop"] = stop_sequence
+        if tools:
+            payload["tools"] = tools
+            payload["tool_choice"] = tool_choice or "auto"
+        try:
+            response = requests.post(url, json=payload, headers=headers, timeout=60)
+            response.raise_for_status()
+            result = response.json()
+            return TextGenerationOutput(
+                transaction_hash="external",  # No blockchain transaction for external
+                finish_reason=result["finish_reason"],
+                chat_output=result["message"]
+            )
+        except requests.RequestException as e:
+            error_msg = f"External LLM chat failed: {str(e)}"
+            if hasattr(e, 'response') and e.response is not None:
+                try:
+                    error_detail = e.response.json()
+                    error_msg += f" - {error_detail}"
+                except:
+                    error_msg += f" - {e.response.text}"
+            logging.error(error_msg)
+            raise OpenGradientError(error_msg)
     def list_files(self, model_name: str, version: str) -> List[Dict]:
         """
         List files for a specific version of a model.

{opengradient-0.4.14 → opengradient-0.5.0a1}/src/opengradient/defaults.py RENAMED Viewed

@@ -7,4 +7,5 @@ DEFAULT_INFERENCE_CONTRACT_ADDRESS = "0x8383C9bD7462F12Eb996DD02F78234C0421A6FaE
 DEFAULT_SCHEDULER_ADDRESS = "0x7179724De4e7FF9271FA40C0337c7f90C0508eF6"
 DEFAULT_BLOCKCHAIN_EXPLORER = "https://explorer.opengradient.ai/tx/"
 DEFAULT_IMAGE_GEN_HOST = "18.217.25.69"
-DEFAULT_IMAGE_GEN_PORT = 5125
+DEFAULT_IMAGE_GEN_PORT = 5125
+DEFAULT_LLM_SERVER_URL = "http://35.225.197.84:8000"

{opengradient-0.4.14 → opengradient-0.5.0a1/src/opengradient.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,34 +1,12 @@
 Metadata-Version: 2.4
 Name: opengradient
-Version: 0.4.14
+Version: 0.5.0a1
 Summary: Python SDK for OpenGradient decentralized model management & inference services
-Author-email: OpenGradient <oliver@opengradient.ai>
-License: MIT License
-        Copyright (c) 2024 OpenGradient
-        Permission is hereby granted, free of charge, to any person obtaining a copy
-        of this software and associated documentation files (the "Software"), to deal
-        in the Software without restriction, including without limitation the rights
-        to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-        copies of the Software, and to permit persons to whom the Software is
-        furnished to do so, subject to the following conditions:
-        The above copyright notice and this permission notice shall be included in all
-        copies or substantial portions of the Software.
-        THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-        IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-        FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-        AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-        LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-        OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-        SOFTWARE.
+Author-email: OpenGradient <kyle@vannalabs.ai>
+License-Expression: MIT
 Project-URL: Homepage, https://opengradient.ai
 Classifier: Development Status :: 3 - Alpha
 Classifier: Intended Audience :: Developers
-Classifier: License :: OSI Approved :: MIT License
 Classifier: Programming Language :: Python :: 3.10
 Classifier: Programming Language :: Python :: 3.11
 Classifier: Programming Language :: Python :: 3.12