PyPI - autobyteus - Versions diffs - 1.1.8__py3-none-any.whl → 1.2.0__py3-none-any.whl - Mend

autobyteus 1.1.8py3-none-any.whl → 1.2.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (127) hide show

autobyteus/llm/api/gemini_llm.py CHANGED Viewed

@@ -1,6 +1,7 @@
 import logging
-from typing import Dict, List, AsyncGenerator, Any
-import google.generativeai as genai  # CHANGED: Using the older 'google.generativeai' library
+from typing import Dict, Optional, List, AsyncGenerator, Any
+from google import genai
+from google.genai import types as genai_types
 import os
 from autobyteus.llm.models import LLMModel
 from autobyteus.llm.base_llm import BaseLLM
@@ -13,66 +14,60 @@ from autobyteus.llm.user_message import LLMUserMessage
 logger = logging.getLogger(__name__)
 def _format_gemini_history(messages: List[Message]) -> List[Dict[str, Any]]:
-    """
-    Formats internal message history for the Gemini API.
-    This function remains compatible with the older library.
-    """
+    """Formats internal message history for the Gemini API."""
     history = []
-    # System message is handled separately in the model initialization
+    # System message is handled separately in the new API
     for msg in messages:
         if msg.role in [MessageRole.USER, MessageRole.ASSISTANT]:
+            # NOTE: This history conversion will need to be updated for multimodal messages
             role = 'model' if msg.role == MessageRole.ASSISTANT else 'user'
+            # The `parts` must be a list of dictionaries (Part objects), not a list of strings.
             history.append({"role": role, "parts": [{"text": msg.content}]})
     return history
 class GeminiLLM(BaseLLM):
     def __init__(self, model: LLMModel = None, llm_config: LLMConfig = None):
+        self.generation_config_dict = {
+            "response_mime_type": "text/plain",
+        }
         if model is None:
-            model = LLMModel['gemini-2.5-flash'] # Note: Ensure model name is compatible, e.g., 'gemini-1.5-flash-latest'
+            model = LLMModel['gemini-2.5-flash']
         if llm_config is None:
             llm_config = LLMConfig()
         super().__init__(model=model, llm_config=llm_config)
-        # CHANGED: Initialization flow. Configure API key and then instantiate the model.
-        self.initialize()
-        system_instruction = self.system_message if self.system_message else None
-        self.model = genai.GenerativeModel(
-            model_name=self.model.value,
-            system_instruction=system_instruction
-        )
+        self.client = self.initialize()
+        self.async_client = self.client.aio
-    @staticmethod
-    def initialize():
-        """
-        CHANGED: This method now configures the genai library with the API key
-        instead of creating a client instance.
-        """
+    @classmethod
+    def initialize(cls) -> genai.client.Client:
         api_key = os.environ.get("GEMINI_API_KEY")
         if not api_key:
             logger.error("GEMINI_API_KEY environment variable is not set.")
             raise ValueError("GEMINI_API_KEY environment variable is not set.")
         try:
-            genai.configure(api_key=api_key)
+            return genai.Client()
         except Exception as e:
-            logger.error(f"Failed to configure Gemini client: {str(e)}")
-            raise ValueError(f"Failed to configure Gemini client: {str(e)}")
+            logger.error(f"Failed to initialize Gemini client: {str(e)}")
+            raise ValueError(f"Failed to initialize Gemini client: {str(e)}")
-    def _get_generation_config(self) -> Dict[str, Any]:
-        """
-        CHANGED: Builds the generation config as a dictionary.
-        'thinking_config' is not available in the old library.
-        'system_instruction' is passed during model initialization.
-        """
-        # Basic configuration, you can expand this with temperature, top_p, etc.
-        # from self.llm_config if needed.
-        config = {
-            "response_mime_type": "text/plain",
-            # Example: "temperature": self.llm_config.temperature
-        }
-        return config
+    def _get_generation_config(self) -> genai_types.GenerateContentConfig:
+        """Builds the generation config, handling special cases like 'thinking'."""
+        config = self.generation_config_dict.copy()
+        thinking_config = None
+        if "flash" in self.model.value:
+            thinking_config = genai_types.ThinkingConfig(thinking_budget=0)
+        # System instruction is now part of the config
+        system_instruction = self.system_message if self.system_message else None
+        return genai_types.GenerateContentConfig(
+            **config,
+            thinking_config=thinking_config,
+            system_instruction=system_instruction
+        )
     async def _send_user_message_to_llm(self, user_message: LLMUserMessage, **kwargs) -> CompleteResponse:
         self.add_user_message(user_message)
@@ -81,20 +76,19 @@ class GeminiLLM(BaseLLM):
             history = _format_gemini_history(self.messages)
             generation_config = self._get_generation_config()
-            # CHANGED: API call now uses the model instance directly.
-            response = await self.model.generate_content_async(
+            response = await self.async_client.models.generate_content(
+                model=f"models/{self.model.value}",
                 contents=history,
-                generation_config=generation_config,
+                config=generation_config,
             )
             assistant_message = response.text
             self.add_assistant_message(assistant_message)
-            # CHANGED: Token usage is extracted from 'usage_metadata'.
             token_usage = TokenUsage(
-                prompt_tokens=response.usage_metadata.prompt_token_count,
-                completion_tokens=response.usage_metadata.candidates_token_count,
-                total_tokens=response.usage_metadata.total_token_count
+                prompt_tokens=0,
+                completion_tokens=0,
+                total_tokens=0
             )
             return CompleteResponse(
@@ -113,11 +107,10 @@ class GeminiLLM(BaseLLM):
             history = _format_gemini_history(self.messages)
             generation_config = self._get_generation_config()
-            # CHANGED: API call for streaming is now part of generate_content_async.
-            response_stream = await self.model.generate_content_async(
+            response_stream = await self.async_client.models.generate_content_stream(
+                model=f"models/{self.model.value}",
                 contents=history,
-                generation_config=generation_config,
-                stream=True
+                config=generation_config,
             )
             async for chunk in response_stream:
@@ -130,8 +123,6 @@ class GeminiLLM(BaseLLM):
             self.add_assistant_message(complete_response)
-            # NOTE: The old library's async stream does not easily expose token usage.
-            # Keeping it at 0, consistent with your original implementation.
             token_usage = TokenUsage(
                 prompt_tokens=0,
                 completion_tokens=0,

autobyteus/llm/api/qwen_llm.py ADDED Viewed

@@ -0,0 +1,25 @@
+import logging
+from typing import Optional
+from autobyteus.llm.models import LLMModel
+from autobyteus.llm.utils.llm_config import LLMConfig
+from autobyteus.llm.api.openai_compatible_llm import OpenAICompatibleLLM
+logger = logging.getLogger(__name__)
+class QwenLLM(OpenAICompatibleLLM):
+    def __init__(self, model: LLMModel = None, llm_config: LLMConfig = None):
+        if model is None:
+            model = LLMModel['qwen3-max-preview']
+        if llm_config is None:
+            llm_config = LLMConfig()
+        super().__init__(
+            model=model,
+            llm_config=llm_config,
+            api_key_env_var="DASHSCOPE_API_KEY",
+            base_url="https://dashscope-intl.aliyuncs.com/compatible-mode/v1"
+        )
+        logger.info(f"QwenLLM initialized with model: {self.model}")
+    async def cleanup(self):
+        await super().cleanup()

autobyteus/llm/api/zhipu_llm.py ADDED Viewed

@@ -0,0 +1,26 @@
+import logging
+from typing import Optional
+from autobyteus.llm.models import LLMModel
+from autobyteus.llm.utils.llm_config import LLMConfig
+from autobyteus.llm.api.openai_compatible_llm import OpenAICompatibleLLM
+logger = logging.getLogger(__name__)
+class ZhipuLLM(OpenAICompatibleLLM):
+    def __init__(self, model: LLMModel = None, llm_config: LLMConfig = None):
+        # Provide defaults if not specified
+        if model is None:
+            model = LLMModel['glm-4.6']
+        if llm_config is None:
+            llm_config = LLMConfig()
+        super().__init__(
+            model=model,
+            llm_config=llm_config,
+            api_key_env_var="ZHIPU_API_KEY",
+            base_url="https://open.bigmodel.cn/api/paas/v4/"
+        )
+        logger.info(f"ZhipuLLM initialized with model: {self.model}")
+    async def cleanup(self):
+        await super().cleanup()

autobyteus/llm/autobyteus_provider.py CHANGED Viewed

@@ -7,7 +7,7 @@ from typing import Dict, Any, TYPE_CHECKING, List, Optional
 import os
 import logging
 from urllib.parse import urlparse
-from autobyteus_llm_client import AutobyteusClient
+from autobyteus.clients import AutobyteusClient
 if TYPE_CHECKING:
     from autobyteus.llm.llm_factory import LLMFactory
@@ -19,7 +19,9 @@ class AutobyteusModelProvider:
     @staticmethod
     def _get_hosts() -> List[str]:
-        """Gets Autobyteus LLM server hosts from env vars, supporting a comma-separated list."""
+        """
+        Gets Autobyteus LLM server hosts from env vars. Skips discovery if no host is configured.
+        """
         hosts_str = os.getenv('AUTOBYTEUS_LLM_SERVER_HOSTS')
         if hosts_str:
             return [host.strip() for host in hosts_str.split(',')]
@@ -28,7 +30,7 @@ class AutobyteusModelProvider:
         if legacy_host:
             return [legacy_host]
-        return [AutobyteusModelProvider.DEFAULT_SERVER_URL]
+        return []
     @staticmethod
     def discover_and_register():
@@ -37,6 +39,10 @@ class AutobyteusModelProvider:
             from autobyteus.llm.llm_factory import LLMFactory
             hosts = AutobyteusModelProvider._get_hosts()
+            if not hosts:
+                logger.info("No Autobyteus LLM server hosts configured. Skipping Autobyteus LLM model discovery.")
+                return
             total_registered_count = 0
             for host_url in hosts:

autobyteus/llm/llm_factory.py CHANGED Viewed

@@ -17,6 +17,8 @@ from autobyteus.llm.api.deepseek_llm import DeepSeekLLM
 from autobyteus.llm.api.gemini_llm import GeminiLLM
 from autobyteus.llm.api.grok_llm import GrokLLM
 from autobyteus.llm.api.kimi_llm import KimiLLM
+from autobyteus.llm.api.qwen_llm import QwenLLM
+from autobyteus.llm.api.zhipu_llm import ZhipuLLM
 from autobyteus.llm.ollama_provider import OllamaModelProvider
 from autobyteus.llm.lmstudio_provider import LMStudioModelProvider
 from autobyteus.utils.singleton import SingletonMeta
@@ -344,6 +346,43 @@ class LLMFactory(metaclass=SingletonMeta):
                     pricing_config=TokenPricingConfig(27.59, 27.59)
                 )
             ),
+            # QWEN Provider Models
+            LLMModel(
+                name="qwen3-max",
+                value="qwen-max",
+                provider=LLMProvider.QWEN,
+                llm_class=QwenLLM,
+                canonical_name="qwen3-max",
+                default_config=LLMConfig(
+                    token_limit=262144,
+                    pricing_config=TokenPricingConfig(
+                        input_token_pricing=2.4,
+                        output_token_pricing=12.0
+                    )
+                )
+            ),
+            # ZHIPU Provider Models
+            LLMModel(
+                name="glm-4.6",
+                value="glm-4.6",
+                provider=LLMProvider.ZHIPU,
+                llm_class=ZhipuLLM,
+                canonical_name="glm-4.6",
+                default_config=LLMConfig(
+                    pricing_config=TokenPricingConfig(13.8, 13.8)
+                )
+            ),
+            LLMModel(
+                name="glm-4.6-thinking",
+                value="glm-4.6",
+                provider=LLMProvider.ZHIPU,
+                llm_class=ZhipuLLM,
+                canonical_name="glm-4.6-thinking",
+                default_config=LLMConfig(
+                    pricing_config=TokenPricingConfig(13.8, 13.8),
+                    extra_params={ "extra_body": { "thinking": { "type": "enabled" } } }
+                )
+            ),
         ]
         for model in supported_models:
             LLMFactory.register_model(model)

autobyteus/llm/ollama_provider_resolver.py CHANGED Viewed

@@ -19,6 +19,7 @@ class OllamaProviderResolver:
         (['mistral'], LLMProvider.MISTRAL),
         (['deepseek'], LLMProvider.DEEPSEEK),
         (['qwen'], LLMProvider.QWEN),
+        (['glm'], LLMProvider.ZHIPU),
     ]
     @staticmethod

autobyteus/llm/providers.py CHANGED Viewed

@@ -15,3 +15,4 @@ class LLMProvider(Enum):
     KIMI = "KIMI"
     QWEN = "QWEN"
     LMSTUDIO = "LMSTUDIO"
+    ZHIPU = "ZHIPU"

autobyteus/llm/token_counter/token_counter_factory.py CHANGED Viewed

@@ -4,6 +4,7 @@ from autobyteus.llm.token_counter.claude_token_counter import ClaudeTokenCounter
 from autobyteus.llm.token_counter.mistral_token_counter import MistralTokenCounter
 from autobyteus.llm.token_counter.deepseek_token_counter import DeepSeekTokenCounter
 from autobyteus.llm.token_counter.kimi_token_counter import KimiTokenCounter
+from autobyteus.llm.token_counter.zhipu_token_counter import ZhipuTokenCounter
 from autobyteus.llm.token_counter.base_token_counter import BaseTokenCounter
 from autobyteus.llm.models import LLMModel
 from autobyteus.llm.providers import LLMProvider
@@ -42,6 +43,8 @@ def get_token_counter(model: LLMModel, llm: 'BaseLLM') -> BaseTokenCounter:
         return OpenAITokenCounter(model, llm)
     elif model.provider == LLMProvider.GEMINI:
         return OpenAITokenCounter(model, llm)
+    elif model.provider == LLMProvider.ZHIPU:
+        return ZhipuTokenCounter(model, llm)
     else:
         # For models that do not have a specialized counter, raise a NotImplementedError
         raise NotImplementedError(f"No token counter available for model {model.value}")

autobyteus/llm/token_counter/zhipu_token_counter.py ADDED Viewed

@@ -0,0 +1,24 @@
+from typing import TYPE_CHECKING
+from autobyteus.llm.token_counter.openai_token_counter import OpenAITokenCounter
+from autobyteus.llm.models import LLMModel
+if TYPE_CHECKING:
+    from autobyteus.llm.base_llm import BaseLLM
+class ZhipuTokenCounter(OpenAITokenCounter):
+    """
+    Token counter for Zhipu models. Uses the same token counting implementation as OpenAI.
+    This implementation inherits from OpenAITokenCounter as Zhipu uses a similar tokenization
+    approach as OpenAI's models.
+    """
+    def __init__(self, model: LLMModel, llm: 'BaseLLM' = None):
+        """
+        Initialize the Zhipu token counter.
+        Args:
+            model (LLMModel): The Zhipu model to count tokens for.
+            llm (BaseLLM, optional): The LLM instance. Defaults to None.
+        """
+        super().__init__(model, llm)

autobyteus/multimedia/audio/api/autobyteus_audio_client.py CHANGED Viewed

@@ -1,6 +1,6 @@
 import logging
 from typing import Optional, List, Dict, Any, TYPE_CHECKING
-from autobyteus_llm_client import AutobyteusClient
+from autobyteus.clients import AutobyteusClient
 from autobyteus.multimedia.audio.base_audio_client import BaseAudioClient
 from autobyteus.multimedia.utils.response_types import SpeechGenerationResponse
@@ -26,7 +26,8 @@ class AutobyteusAudioClient(BaseAudioClient):
     async def generate_speech(
         self,
         prompt: str,
-        generation_config: Optional[Dict[str, Any]] = None
+        generation_config: Optional[Dict[str, Any]] = None,
+        **kwargs
     ) -> SpeechGenerationResponse:
         """
         Generates speech by calling the generate_speech endpoint on the remote Autobyteus server.
@@ -36,6 +37,8 @@ class AutobyteusAudioClient(BaseAudioClient):
             model_name_for_server = self.model.name
+            # Note: The underlying autobyteus_client.generate_speech does not currently accept **kwargs.
+            # They are accepted here for interface consistency and future-proofing.
             response_data = await self.autobyteus_client.generate_speech(
                 model_name=model_name_for_server,
                 prompt=prompt,

autobyteus 1.1.8__py3-none-any.whl → 1.2.0__py3-none-any.whl

autobyteus 1.1.8py3-none-any.whl → 1.2.0py3-none-any.whl