autobyteus 1.1.5__py3-none-any.whl → 1.1.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (74) hide show
  1. autobyteus/agent/context/agent_config.py +6 -1
  2. autobyteus/agent/handlers/llm_user_message_ready_event_handler.py +30 -7
  3. autobyteus/agent/handlers/user_input_message_event_handler.py +22 -25
  4. autobyteus/agent/message/__init__.py +7 -5
  5. autobyteus/agent/message/agent_input_user_message.py +6 -16
  6. autobyteus/agent/message/context_file.py +24 -24
  7. autobyteus/agent/message/context_file_type.py +29 -8
  8. autobyteus/agent/message/multimodal_message_builder.py +47 -0
  9. autobyteus/agent/streaming/stream_event_payloads.py +23 -4
  10. autobyteus/agent/system_prompt_processor/tool_manifest_injector_processor.py +6 -2
  11. autobyteus/agent/tool_invocation.py +2 -1
  12. autobyteus/agent_team/bootstrap_steps/agent_configuration_preparation_step.py +9 -2
  13. autobyteus/agent_team/context/agent_team_config.py +1 -0
  14. autobyteus/llm/api/autobyteus_llm.py +33 -33
  15. autobyteus/llm/api/bedrock_llm.py +13 -5
  16. autobyteus/llm/api/claude_llm.py +13 -27
  17. autobyteus/llm/api/gemini_llm.py +108 -42
  18. autobyteus/llm/api/groq_llm.py +4 -3
  19. autobyteus/llm/api/mistral_llm.py +97 -51
  20. autobyteus/llm/api/nvidia_llm.py +6 -5
  21. autobyteus/llm/api/ollama_llm.py +37 -12
  22. autobyteus/llm/api/openai_compatible_llm.py +91 -91
  23. autobyteus/llm/autobyteus_provider.py +1 -1
  24. autobyteus/llm/base_llm.py +42 -139
  25. autobyteus/llm/extensions/base_extension.py +6 -6
  26. autobyteus/llm/extensions/token_usage_tracking_extension.py +3 -2
  27. autobyteus/llm/llm_factory.py +106 -4
  28. autobyteus/llm/token_counter/token_counter_factory.py +1 -1
  29. autobyteus/llm/user_message.py +43 -35
  30. autobyteus/llm/utils/llm_config.py +34 -18
  31. autobyteus/llm/utils/media_payload_formatter.py +99 -0
  32. autobyteus/llm/utils/messages.py +32 -25
  33. autobyteus/llm/utils/response_types.py +9 -3
  34. autobyteus/llm/utils/token_usage.py +6 -5
  35. autobyteus/multimedia/__init__.py +31 -0
  36. autobyteus/multimedia/audio/__init__.py +11 -0
  37. autobyteus/multimedia/audio/api/__init__.py +4 -0
  38. autobyteus/multimedia/audio/api/autobyteus_audio_client.py +59 -0
  39. autobyteus/multimedia/audio/api/gemini_audio_client.py +219 -0
  40. autobyteus/multimedia/audio/audio_client_factory.py +120 -0
  41. autobyteus/multimedia/audio/audio_model.py +96 -0
  42. autobyteus/multimedia/audio/autobyteus_audio_provider.py +108 -0
  43. autobyteus/multimedia/audio/base_audio_client.py +40 -0
  44. autobyteus/multimedia/image/__init__.py +11 -0
  45. autobyteus/multimedia/image/api/__init__.py +9 -0
  46. autobyteus/multimedia/image/api/autobyteus_image_client.py +97 -0
  47. autobyteus/multimedia/image/api/gemini_image_client.py +188 -0
  48. autobyteus/multimedia/image/api/openai_image_client.py +142 -0
  49. autobyteus/multimedia/image/autobyteus_image_provider.py +109 -0
  50. autobyteus/multimedia/image/base_image_client.py +67 -0
  51. autobyteus/multimedia/image/image_client_factory.py +118 -0
  52. autobyteus/multimedia/image/image_model.py +96 -0
  53. autobyteus/multimedia/providers.py +5 -0
  54. autobyteus/multimedia/runtimes.py +8 -0
  55. autobyteus/multimedia/utils/__init__.py +10 -0
  56. autobyteus/multimedia/utils/api_utils.py +19 -0
  57. autobyteus/multimedia/utils/multimedia_config.py +29 -0
  58. autobyteus/multimedia/utils/response_types.py +13 -0
  59. autobyteus/tools/__init__.py +3 -0
  60. autobyteus/tools/multimedia/__init__.py +8 -0
  61. autobyteus/tools/multimedia/audio_tools.py +116 -0
  62. autobyteus/tools/multimedia/image_tools.py +186 -0
  63. autobyteus/tools/tool_category.py +1 -0
  64. autobyteus/tools/usage/parsers/provider_aware_tool_usage_parser.py +5 -2
  65. autobyteus/tools/usage/providers/tool_manifest_provider.py +5 -3
  66. autobyteus/tools/usage/registries/tool_formatting_registry.py +9 -2
  67. autobyteus/tools/usage/registries/tool_usage_parser_registry.py +9 -2
  68. {autobyteus-1.1.5.dist-info → autobyteus-1.1.6.dist-info}/METADATA +9 -9
  69. {autobyteus-1.1.5.dist-info → autobyteus-1.1.6.dist-info}/RECORD +73 -45
  70. examples/run_browser_agent.py +1 -1
  71. autobyteus/llm/utils/image_payload_formatter.py +0 -89
  72. {autobyteus-1.1.5.dist-info → autobyteus-1.1.6.dist-info}/WHEEL +0 -0
  73. {autobyteus-1.1.5.dist-info → autobyteus-1.1.6.dist-info}/licenses/LICENSE +0 -0
  74. {autobyteus-1.1.5.dist-info → autobyteus-1.1.6.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,120 @@
1
+ import logging
2
+ from typing import Dict, Optional
3
+
4
+ from autobyteus.multimedia.audio.base_audio_client import BaseAudioClient
5
+ from autobyteus.multimedia.audio.audio_model import AudioModel
6
+ from autobyteus.multimedia.providers import MultimediaProvider
7
+ from autobyteus.multimedia.audio.api.gemini_audio_client import GeminiAudioClient
8
+ from autobyteus.multimedia.audio.autobyteus_audio_provider import AutobyteusAudioModelProvider
9
+ from autobyteus.multimedia.utils.multimedia_config import MultimediaConfig
10
+ from autobyteus.utils.singleton import SingletonMeta
11
+
12
+ logger = logging.getLogger(__name__)
13
+
14
+ GEMINI_TTS_VOICES = [
15
+ "Zephyr", "Puck", "Charon", "Kore", "Fenrir", "Leda", "Orus", "Aoede",
16
+ "Callirrhoe", "Autonoe", "Enceladus", "Iapetus", "Umbriel", "Algieba",
17
+ "Despina", "Erinome", "Algenib", "Rasalgethi", "Laomedeia", "Achernar",
18
+ "Alnilam", "Schedar", "Gacrux", "Pulcherrima", "Achird", "Zubenelgenubi",
19
+ "Vindemiatrix", "Sadachbia", "Sadaltager", "Sulafat"
20
+ ]
21
+
22
+ class AudioClientFactory(metaclass=SingletonMeta):
23
+ """
24
+ A factory for creating instances of audio clients based on registered AudioModels.
25
+ """
26
+ _models_by_identifier: Dict[str, AudioModel] = {}
27
+ _initialized = False
28
+
29
+ @staticmethod
30
+ def ensure_initialized():
31
+ """Ensures the factory is initialized before use."""
32
+ if not AudioClientFactory._initialized:
33
+ AudioClientFactory._initialize_registry()
34
+ AudioClientFactory._initialized = True
35
+
36
+ @staticmethod
37
+ def reinitialize():
38
+ """Reinitializes the model registry, clearing all models and re-discovering them."""
39
+ logger.info("Reinitializing Audio model registry...")
40
+ AudioClientFactory._initialized = False
41
+ AudioClientFactory._models_by_identifier.clear()
42
+ AudioClientFactory.ensure_initialized()
43
+ logger.info("Audio model registry reinitialized successfully.")
44
+
45
+ @staticmethod
46
+ def _initialize_registry():
47
+ """Initializes the registry with built-in audio models."""
48
+
49
+ # Google Gemini Audio Models
50
+ gemini_tts_model = AudioModel(
51
+ name="gemini-2.5-flash-tts",
52
+ value="gemini-2.5-flash-preview-tts",
53
+ provider=MultimediaProvider.GOOGLE,
54
+ client_class=GeminiAudioClient,
55
+ parameter_schema={
56
+ "mode": {
57
+ "type": "string",
58
+ "default": "single-speaker",
59
+ "allowed_values": ["single-speaker", "multi-speaker"],
60
+ "description": "The speech generation mode. 'single-speaker' for a consistent voice, or 'multi-speaker' to assign different voices to speakers identified in the prompt."
61
+ },
62
+ "voice_name": {
63
+ "type": "string",
64
+ "default": "Kore",
65
+ "allowed_values": GEMINI_TTS_VOICES,
66
+ "description": "The voice to use for single-speaker generation."
67
+ },
68
+ "style_instructions": {
69
+ "type": "string",
70
+ "description": "Optional instructions on the style of speech, e.g., 'Say this in a dramatic whisper'."
71
+ },
72
+ "speaker_mapping": {
73
+ "type": "object",
74
+ "description": "Required for multi-speaker mode. An object mapping speaker names from the prompt (e.g., 'Joe') to a voice name (e.g., 'Puck')."
75
+ }
76
+ }
77
+ )
78
+
79
+ models_to_register = [
80
+ gemini_tts_model,
81
+ ]
82
+
83
+ for model in models_to_register:
84
+ AudioClientFactory.register_model(model)
85
+
86
+ logger.info("Default API-based audio models registered.")
87
+
88
+ # Discover models from remote Autobyteus servers
89
+ AutobyteusAudioModelProvider.discover_and_register()
90
+
91
+ @staticmethod
92
+ def register_model(model: AudioModel):
93
+ """Registers a new audio model."""
94
+ identifier = model.model_identifier
95
+ if identifier in AudioClientFactory._models_by_identifier:
96
+ logger.warning(f"Audio model '{identifier}' is already registered. Overwriting.")
97
+
98
+ if not isinstance(model.provider, MultimediaProvider):
99
+ try:
100
+ model.provider = MultimediaProvider(model.provider)
101
+ except ValueError:
102
+ logger.error(f"Cannot register model '{identifier}' with unknown provider '{model.provider}'.")
103
+ return
104
+
105
+ AudioClientFactory._models_by_identifier[identifier] = model
106
+
107
+ @staticmethod
108
+ def create_audio_client(model_identifier: str, config_override: Optional[MultimediaConfig] = None) -> BaseAudioClient:
109
+ """Creates an instance of a registered audio client for a specific model."""
110
+ AudioClientFactory.ensure_initialized()
111
+
112
+ model = AudioClientFactory._models_by_identifier.get(model_identifier)
113
+ if not model:
114
+ raise ValueError(f"No audio model registered with the name '{model_identifier}'. "
115
+ f"Available models: {list(AudioClientFactory._models_by_identifier.keys())}")
116
+
117
+ logger.info(f"Creating instance of audio client for model '{model_identifier}'.")
118
+ return model.create_client(config_override)
119
+
120
+ audio_client_factory = AudioClientFactory()
@@ -0,0 +1,96 @@
1
+ from __future__ import annotations
2
+ import logging
3
+ from typing import TYPE_CHECKING, Type, Optional, Iterator, Dict, Any
4
+ from urllib.parse import urlparse
5
+
6
+ from autobyteus.multimedia.providers import MultimediaProvider
7
+ from autobyteus.multimedia.runtimes import MultimediaRuntime
8
+ from autobyteus.multimedia.utils.multimedia_config import MultimediaConfig
9
+
10
+ if TYPE_CHECKING:
11
+ from autobyteus.multimedia.audio.base_audio_client import BaseAudioClient
12
+
13
+ logger = logging.getLogger(__name__)
14
+
15
+ class AudioModelMeta(type):
16
+ """
17
+ Metaclass for AudioModel to allow discovery and access like an Enum.
18
+ """
19
+ def __iter__(cls) -> Iterator[AudioModel]:
20
+ from autobyteus.multimedia.audio.audio_client_factory import AudioClientFactory
21
+ AudioClientFactory.ensure_initialized()
22
+ for model in AudioClientFactory._models_by_identifier.values():
23
+ yield model
24
+
25
+ def __getitem__(cls, name_or_identifier: str) -> AudioModel:
26
+ from autobyteus.multimedia.audio.audio_client_factory import AudioClientFactory
27
+ AudioClientFactory.ensure_initialized()
28
+ model = AudioClientFactory._models_by_identifier.get(name_or_identifier)
29
+ if model:
30
+ return model
31
+ raise KeyError(f"Audio model '{name_or_identifier}' not found.")
32
+
33
+ def __len__(cls) -> int:
34
+ from autobyteus.multimedia.audio.audio_client_factory import AudioClientFactory
35
+ AudioClientFactory.ensure_initialized()
36
+ return len(AudioClientFactory._models_by_identifier)
37
+
38
+
39
+ class AudioModel(metaclass=AudioModelMeta):
40
+ """
41
+ Represents a single audio model's metadata.
42
+ """
43
+ def __init__(
44
+ self,
45
+ name: str,
46
+ value: str,
47
+ provider: MultimediaProvider,
48
+ client_class: Type["BaseAudioClient"],
49
+ parameter_schema: Optional[Dict[str, Any]] = None,
50
+ runtime: MultimediaRuntime = MultimediaRuntime.API,
51
+ host_url: Optional[str] = None
52
+ ):
53
+ self.name = name
54
+ self.value = value
55
+ self.provider = provider
56
+ self.client_class = client_class
57
+ self.runtime = runtime
58
+ self.host_url = host_url
59
+ self.parameter_schema = parameter_schema if parameter_schema else {}
60
+
61
+ # Automatically build default_config from the schema's default values
62
+ default_params = {
63
+ key: meta.get("default")
64
+ for key, meta in self.parameter_schema.items()
65
+ if "default" in meta
66
+ }
67
+ self.default_config = MultimediaConfig(params=default_params)
68
+
69
+ @property
70
+ def model_identifier(self) -> str:
71
+ """Returns the unique identifier for the model."""
72
+ if self.runtime == MultimediaRuntime.AUTOBYTEUS and self.host_url:
73
+ try:
74
+ host = urlparse(self.host_url).hostname
75
+ return f"{self.name}@{host}"
76
+ except Exception:
77
+ return f"{self.name}@{self.host_url}" # Fallback
78
+ return self.name
79
+
80
+ def create_client(self, config_override: Optional[MultimediaConfig] = None) -> "BaseAudioClient":
81
+ """
82
+ Instantiates the client class for this model.
83
+ """
84
+ config_to_use = self.default_config
85
+ if config_override:
86
+ from copy import deepcopy
87
+ config_to_use = deepcopy(self.default_config)
88
+ config_to_use.merge_with(config_override)
89
+
90
+ return self.client_class(model=self, config=config_to_use)
91
+
92
+ def __repr__(self):
93
+ return (
94
+ f"AudioModel(identifier='{self.model_identifier}', "
95
+ f"provider='{self.provider.name}', runtime='{self.runtime.value}')"
96
+ )
@@ -0,0 +1,108 @@
1
+ import logging
2
+ from typing import List
3
+ import os
4
+ from urllib.parse import urlparse
5
+
6
+ from autobyteus_llm_client import AutobyteusClient
7
+ from autobyteus.multimedia.audio.api.autobyteus_audio_client import AutobyteusAudioClient
8
+ from autobyteus.multimedia.audio.audio_model import AudioModel
9
+ from autobyteus.multimedia.providers import MultimediaProvider
10
+ from autobyteus.multimedia.runtimes import MultimediaRuntime
11
+
12
+ logger = logging.getLogger(__name__)
13
+
14
+ class AutobyteusAudioModelProvider:
15
+ """
16
+ Discovers and registers audio models from remote Autobyteus server instances.
17
+ """
18
+ DEFAULT_SERVER_URL = 'http://localhost:8000'
19
+
20
+ @staticmethod
21
+ def _get_hosts() -> List[str]:
22
+ """Gets Autobyteus server hosts from env vars."""
23
+ hosts_str = os.getenv('AUTOBYTEUS_LLM_SERVER_HOSTS')
24
+ if hosts_str:
25
+ return [host.strip() for host in hosts_str.split(',')]
26
+
27
+ legacy_host = os.getenv('AUTOBYTEUS_LLM_SERVER_URL')
28
+ if legacy_host:
29
+ return [legacy_host]
30
+
31
+ return [AutobyteusAudioModelProvider.DEFAULT_SERVER_URL]
32
+
33
+ @staticmethod
34
+ def discover_and_register():
35
+ """Discover and register audio models from all configured hosts."""
36
+ try:
37
+ from autobyteus.multimedia.audio.audio_client_factory import AudioClientFactory
38
+
39
+ hosts = AutobyteusAudioModelProvider._get_hosts()
40
+ total_registered_count = 0
41
+
42
+ for host_url in hosts:
43
+ if not AutobyteusAudioModelProvider.is_valid_url(host_url):
44
+ logger.error(f"Invalid Autobyteus host URL for audio model discovery: {host_url}, skipping.")
45
+ continue
46
+
47
+ logger.info(f"Discovering audio models from host: {host_url}")
48
+ client = None
49
+ try:
50
+ client = AutobyteusClient(server_url=host_url)
51
+ response = client.get_available_audio_models_sync()
52
+ except Exception as e:
53
+ logger.warning(f"Could not fetch audio models from Autobyteus server at {host_url}: {e}")
54
+ continue
55
+ finally:
56
+ if client:
57
+ client.sync_client.close()
58
+
59
+ if not response.get('models'):
60
+ logger.info(f"No audio models found on host {host_url}.")
61
+ continue
62
+
63
+ models = response.get('models', [])
64
+ host_registered_count = 0
65
+ for model_info in models:
66
+ try:
67
+ if not all(k in model_info for k in ["name", "value", "provider"]):
68
+ logger.warning(f"Skipping malformed audio model from {host_url}: {model_info}")
69
+ continue
70
+
71
+ if "parameter_schema" not in model_info:
72
+ logger.debug(f"Skipping model from {host_url} as it lacks a parameter schema, likely not an audio model: {model_info.get('name')}")
73
+ continue
74
+
75
+ audio_model = AudioModel(
76
+ name=model_info["name"],
77
+ value=model_info["value"],
78
+ provider=MultimediaProvider(model_info["provider"]),
79
+ client_class=AutobyteusAudioClient,
80
+ runtime=MultimediaRuntime.AUTOBYTEUS,
81
+ host_url=host_url,
82
+ parameter_schema=model_info.get("parameter_schema")
83
+ )
84
+
85
+ AudioClientFactory.register_model(audio_model)
86
+ host_registered_count += 1
87
+
88
+ except Exception as e:
89
+ logger.error(f"Failed to register audio model '{model_info.get('name')}' from {host_url}: {e}")
90
+
91
+ if host_registered_count > 0:
92
+ logger.info(f"Registered {host_registered_count} audio models from Autobyteus host {host_url}")
93
+ total_registered_count += host_registered_count
94
+
95
+ if total_registered_count > 0:
96
+ logger.info(f"Finished Autobyteus audio model discovery. Total models registered: {total_registered_count}")
97
+
98
+ except Exception as e:
99
+ logger.error(f"An unexpected error occurred during Autobyteus audio model discovery: {e}", exc_info=True)
100
+
101
+ @staticmethod
102
+ def is_valid_url(url: str) -> bool:
103
+ """Validate URL format"""
104
+ try:
105
+ result = urlparse(url)
106
+ return all([result.scheme, result.netloc])
107
+ except Exception:
108
+ return False
@@ -0,0 +1,40 @@
1
+ from __future__ import annotations
2
+ from abc import ABC, abstractmethod
3
+ from typing import Optional, Dict, Any, List, TYPE_CHECKING
4
+ from autobyteus.multimedia.utils.response_types import SpeechGenerationResponse
5
+
6
+ if TYPE_CHECKING:
7
+ from autobyteus.multimedia.audio.audio_model import AudioModel
8
+ from autobyteus.multimedia.utils.multimedia_config import MultimediaConfig
9
+
10
+
11
+ class BaseAudioClient(ABC):
12
+ """
13
+ Abstract base class for audio clients that connect to models for audio generation.
14
+ """
15
+ def __init__(self, model: "AudioModel", config: "MultimediaConfig"):
16
+ self.model = model
17
+ self.config = config
18
+
19
+ @abstractmethod
20
+ async def generate_speech(
21
+ self,
22
+ prompt: str,
23
+ generation_config: Optional[Dict[str, Any]] = None
24
+ ) -> SpeechGenerationResponse:
25
+ """
26
+ Generates spoken audio from text (Text-to-Speech).
27
+
28
+ Args:
29
+ prompt (str): The text to be converted to speech.
30
+ generation_config (Optional[Dict[str, Any]]): Provider-specific parameters
31
+ (e.g., voice_name, speaker_mapping).
32
+
33
+ Returns:
34
+ SpeechGenerationResponse: An object containing URLs or paths to the generated audio files.
35
+ """
36
+ pass
37
+
38
+ async def cleanup(self):
39
+ """Optional cleanup method for resources like network clients."""
40
+ pass
@@ -0,0 +1,11 @@
1
+ from .image_client_factory import image_client_factory, ImageClientFactory
2
+ from .image_model import ImageModel
3
+ from .base_image_client import BaseImageClient
4
+ from .api import *
5
+
6
+ __all__ = [
7
+ "image_client_factory",
8
+ "ImageClientFactory",
9
+ "ImageModel",
10
+ "BaseImageClient",
11
+ ]
@@ -0,0 +1,9 @@
1
+ from .autobyteus_image_client import AutobyteusImageClient
2
+ from .gemini_image_client import GeminiImageClient
3
+ from .openai_image_client import OpenAIImageClient
4
+
5
+ __all__ = [
6
+ "AutobyteusImageClient",
7
+ "GeminiImageClient",
8
+ "OpenAIImageClient",
9
+ ]
@@ -0,0 +1,97 @@
1
+ import logging
2
+ from typing import Optional, List, Dict, Any, TYPE_CHECKING
3
+ from autobyteus_llm_client import AutobyteusClient
4
+ from autobyteus.multimedia.image.base_image_client import BaseImageClient
5
+ from autobyteus.multimedia.utils.response_types import ImageGenerationResponse
6
+
7
+ if TYPE_CHECKING:
8
+ from autobyteus.multimedia.image.image_model import ImageModel
9
+ from autobyteus.multimedia.utils.multimedia_config import MultimediaConfig
10
+
11
+ logger = logging.getLogger(__name__)
12
+
13
+ class AutobyteusImageClient(BaseImageClient):
14
+ """
15
+ An image client that connects to an Autobyteus LLM server instance for image tasks.
16
+ """
17
+
18
+ def __init__(self, model: "ImageModel", config: "MultimediaConfig"):
19
+ super().__init__(model, config)
20
+ if not model.host_url:
21
+ raise ValueError("AutobyteusImageClient requires a host_url in its ImageModel.")
22
+
23
+ self.autobyteus_client = AutobyteusClient(server_url=model.host_url)
24
+ logger.info(f"AutobyteusImageClient initialized for model '{self.model.name}' on host '{model.host_url}'.")
25
+
26
+ async def generate_image(
27
+ self,
28
+ prompt: str,
29
+ input_image_urls: Optional[List[str]] = None,
30
+ generation_config: Optional[Dict[str, Any]] = None
31
+ ) -> ImageGenerationResponse:
32
+ """
33
+ Generates an image by calling the generate_image endpoint on the remote Autobyteus server.
34
+ """
35
+ # The remote server handles both generation and editing through one endpoint.
36
+ # This method is a unified entry point.
37
+ return await self._call_remote_generate(
38
+ prompt=prompt,
39
+ input_image_urls=input_image_urls,
40
+ mask_url=None, # Not used in pure generation
41
+ generation_config=generation_config
42
+ )
43
+
44
+ async def edit_image(
45
+ self,
46
+ prompt: str,
47
+ input_image_urls: List[str],
48
+ mask_url: Optional[str] = None,
49
+ generation_config: Optional[Dict[str, Any]] = None
50
+ ) -> ImageGenerationResponse:
51
+ """
52
+ Edits an image by calling the generate_image endpoint on the remote Autobyteus server.
53
+ """
54
+ return await self._call_remote_generate(
55
+ prompt=prompt,
56
+ input_image_urls=input_image_urls,
57
+ mask_url=mask_url,
58
+ generation_config=generation_config
59
+ )
60
+
61
+ async def _call_remote_generate(
62
+ self,
63
+ prompt: str,
64
+ input_image_urls: Optional[List[str]],
65
+ mask_url: Optional[str],
66
+ generation_config: Optional[Dict[str, Any]]
67
+ ) -> ImageGenerationResponse:
68
+ """Internal helper to call the remote server."""
69
+ try:
70
+ logger.info(f"Sending image generation request for model '{self.model.name}' to {self.model.host_url}")
71
+
72
+ # The model name for the remote server is the `value`, not the unique `model_identifier`
73
+ model_name_for_server = self.model.name
74
+
75
+ response_data = await self.autobyteus_client.generate_image(
76
+ model_name=model_name_for_server,
77
+ prompt=prompt,
78
+ input_image_urls=input_image_urls,
79
+ mask_url=mask_url,
80
+ generation_config=generation_config
81
+ )
82
+
83
+ image_urls = response_data.get("image_urls", [])
84
+ if not image_urls:
85
+ raise ValueError("Remote Autobyteus server did not return any image URLs.")
86
+
87
+ return ImageGenerationResponse(image_urls=image_urls)
88
+
89
+ except Exception as e:
90
+ logger.error(f"Error calling Autobyteus server for image generation: {e}")
91
+ raise
92
+
93
+ async def cleanup(self):
94
+ """Closes the underlying AutobyteusClient."""
95
+ if self.autobyteus_client:
96
+ await self.autobyteus_client.close()
97
+ logger.debug("AutobyteusImageClient cleaned up.")
@@ -0,0 +1,188 @@
1
+ import asyncio
2
+ import base64
3
+ import logging
4
+ import mimetypes
5
+ import os
6
+ from typing import Optional, List, Dict, Any, TYPE_CHECKING
7
+
8
+ # ✅ Legacy Gemini SDK (as requested)
9
+ import google.generativeai as genai
10
+ import requests
11
+
12
+ from autobyteus.multimedia.image.base_image_client import BaseImageClient
13
+ from autobyteus.multimedia.utils.response_types import ImageGenerationResponse
14
+
15
+ if TYPE_CHECKING:
16
+ from autobyteus.multimedia.image.image_model import ImageModel
17
+ from autobyteus.multimedia.utils.multimedia_config import MultimediaConfig
18
+
19
+ logger = logging.getLogger(__name__)
20
+
21
+
22
+ def _data_uri(mime_type: str, raw: bytes) -> str:
23
+ """Convert raw bytes to a data URI."""
24
+ b64 = base64.b64encode(raw).decode("utf-8")
25
+ return f"data:{mime_type};base64,{b64}"
26
+
27
+
28
+ def _guess_mime_from_url(url: str) -> str:
29
+ """Best-effort MIME guess from URL; fall back to image/jpeg."""
30
+ mime, _ = mimetypes.guess_type(url)
31
+ return mime or "image/jpeg"
32
+
33
+
34
+ def _fetch_image_part(url: str) -> Dict[str, Any]:
35
+ """
36
+ Download an image and return an inline-data Part compatible with the legacy SDK:
37
+ { "mime_type": "...", "data": <bytes> }
38
+ """
39
+ resp = requests.get(url, timeout=30)
40
+ resp.raise_for_status()
41
+ mime = resp.headers.get("Content-Type") or _guess_mime_from_url(url)
42
+ return {"mime_type": mime.split(";")[0], "data": resp.content}
43
+
44
+
45
+ def _extract_inline_images(response) -> List[Dict[str, bytes]]:
46
+ """
47
+ Collect inline image parts from the legacy SDK response.
48
+ Returns list of { "mime_type": str, "data": bytes }.
49
+ """
50
+ images = []
51
+ try:
52
+ candidates = getattr(response, "candidates", []) or []
53
+ if not candidates:
54
+ return images
55
+
56
+ parts = candidates[0].content.parts if candidates[0].content else []
57
+ for p in parts:
58
+ inline = getattr(p, "inline_data", None)
59
+ if not inline:
60
+ continue
61
+ mime = getattr(inline, "mime_type", "") or ""
62
+ if not mime.startswith("image/"):
63
+ continue
64
+
65
+ data = getattr(inline, "data", None)
66
+ if isinstance(data, bytes):
67
+ images.append({"mime_type": mime, "data": data})
68
+ elif isinstance(data, str):
69
+ # Some bindings expose base64 text
70
+ images.append({"mime_type": mime, "data": base64.b64decode(data)})
71
+ except Exception as e:
72
+ logger.error("Failed to parse inline image(s): %s", e)
73
+ raise
74
+ return images
75
+
76
+
77
+ class GeminiImageClient(BaseImageClient):
78
+ """
79
+ Image generation client using Google's legacy SDK (`google.generativeai`).
80
+
81
+ Notes:
82
+ - We configure `response_mime_type='image/png'` to request image output.
83
+ - You can guide generation with input images by passing URLs; they’re added as inline image Parts.
84
+ - This runs the blocking SDK call in a worker thread to keep your async API.
85
+ """
86
+
87
+ def __init__(self, model: "ImageModel", config: "MultimediaConfig"):
88
+ super().__init__(model, config)
89
+
90
+ api_key = os.getenv("GEMINI_API_KEY")
91
+ if not api_key:
92
+ raise ValueError("Please set the GEMINI_API_KEY environment variable.")
93
+
94
+ try:
95
+ genai.configure(api_key=api_key)
96
+ # `self.model.value` should be an image-capable model.
97
+ # Examples (subject to availability): "imagen-3.0-generate", "imagen-3.0-fast",
98
+ # or Gemini image-preview models that support image output.
99
+ model_name = self.model.value or "imagen-3.0-generate"
100
+ self._model = genai.GenerativeModel(model_name)
101
+ logger.info("GeminiImageClient (legacy SDK) initialized for model '%s'.", model_name)
102
+ except Exception as e:
103
+ logger.error("Failed to initialize Gemini image client: %s", e)
104
+ raise RuntimeError(f"Failed to initialize Gemini image client: {e}")
105
+
106
+ async def generate_image(
107
+ self,
108
+ prompt: str,
109
+ input_image_urls: Optional[List[str]] = None,
110
+ generation_config: Optional[Dict[str, Any]] = None
111
+ ) -> ImageGenerationResponse:
112
+ """
113
+ Generate an image (text→image or image-guided).
114
+
115
+ `generation_config` supports common fields; we always ensure
116
+ `response_mime_type='image/png'` so the SDK returns inline image bytes.
117
+ """
118
+ try:
119
+ logger.info("Generating image with model '%s'...", self._model.model_name)
120
+
121
+ # Build contents array: [text, (optional) image parts...]
122
+ contents: List[Any] = [prompt]
123
+
124
+ if input_image_urls:
125
+ logger.info("Loading %d input image(s) for guidance...", len(input_image_urls))
126
+ for url in input_image_urls:
127
+ try:
128
+ contents.append(_fetch_image_part(url))
129
+ except Exception as e:
130
+ logger.error("Skipping image '%s' due to error: %s", url, e)
131
+
132
+ # Merge config and force image output
133
+ gen_cfg: Dict[str, Any] = (generation_config or {}).copy()
134
+ gen_cfg["response_mime_type"] = gen_cfg.get("response_mime_type", "image/png")
135
+
136
+ # Call the (sync) SDK in a worker thread
137
+ response = await asyncio.to_thread(
138
+ self._model.generate_content,
139
+ contents,
140
+ generation_config=gen_cfg,
141
+ )
142
+
143
+ # Handle safety blocks if present
144
+ feedback = getattr(response, "prompt_feedback", None)
145
+ block_reason = getattr(feedback, "block_reason", None)
146
+ if block_reason:
147
+ reason = getattr(block_reason, "name", str(block_reason))
148
+ logger.error("Image generation blocked by safety settings: %s", reason)
149
+ raise ValueError(f"Image generation failed due to safety settings: {reason}")
150
+
151
+ images = _extract_inline_images(response)
152
+ if not images:
153
+ logger.warning("No image parts returned for prompt: '%.100s...'", prompt)
154
+ raise ValueError("Gemini API did not return any images.")
155
+
156
+ image_urls = [_data_uri(img["mime_type"], img["data"]) for img in images]
157
+ logger.info("Successfully generated %d image(s).", len(image_urls))
158
+
159
+ return ImageGenerationResponse(
160
+ image_urls=image_urls,
161
+ revised_prompt=None # legacy SDK does not provide a revised prompt here
162
+ )
163
+
164
+ except Exception as e:
165
+ logger.error("Error during Gemini image generation (legacy SDK): %s", e)
166
+ # Region support / feature gating errors sometimes include 'Unsupported' hints.
167
+ if "Unsupported" in str(e) and "location" in str(e):
168
+ raise ValueError(
169
+ "Image generation may not be supported in your configured region or project. "
170
+ "Check your API access and region settings."
171
+ )
172
+ raise ValueError(f"Google Gemini image generation failed: {str(e)}")
173
+
174
+ async def edit_image(
175
+ self,
176
+ prompt: str,
177
+ input_image_urls: List[str],
178
+ mask_url: Optional[str] = None,
179
+ generation_config: Optional[Dict[str, Any]] = None
180
+ ) -> ImageGenerationResponse:
181
+ """
182
+ Image editing/redraw with masks isn’t exposed via this legacy path here.
183
+ """
184
+ logger.error("Image editing is not supported by the GeminiImageClient (legacy SDK).")
185
+ raise NotImplementedError("The GeminiImageClient does not support the edit_image method.")
186
+
187
+ async def cleanup(self):
188
+ logger.debug("GeminiImageClient cleanup called (legacy SDK; nothing to release).")