autobyteus 1.1.5__py3-none-any.whl → 1.1.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (74) hide show
  1. autobyteus/agent/context/agent_config.py +6 -1
  2. autobyteus/agent/handlers/llm_user_message_ready_event_handler.py +30 -7
  3. autobyteus/agent/handlers/user_input_message_event_handler.py +22 -25
  4. autobyteus/agent/message/__init__.py +7 -5
  5. autobyteus/agent/message/agent_input_user_message.py +6 -16
  6. autobyteus/agent/message/context_file.py +24 -24
  7. autobyteus/agent/message/context_file_type.py +29 -8
  8. autobyteus/agent/message/multimodal_message_builder.py +47 -0
  9. autobyteus/agent/streaming/stream_event_payloads.py +23 -4
  10. autobyteus/agent/system_prompt_processor/tool_manifest_injector_processor.py +6 -2
  11. autobyteus/agent/tool_invocation.py +2 -1
  12. autobyteus/agent_team/bootstrap_steps/agent_configuration_preparation_step.py +9 -2
  13. autobyteus/agent_team/context/agent_team_config.py +1 -0
  14. autobyteus/llm/api/autobyteus_llm.py +33 -33
  15. autobyteus/llm/api/bedrock_llm.py +13 -5
  16. autobyteus/llm/api/claude_llm.py +13 -27
  17. autobyteus/llm/api/gemini_llm.py +108 -42
  18. autobyteus/llm/api/groq_llm.py +4 -3
  19. autobyteus/llm/api/mistral_llm.py +97 -51
  20. autobyteus/llm/api/nvidia_llm.py +6 -5
  21. autobyteus/llm/api/ollama_llm.py +37 -12
  22. autobyteus/llm/api/openai_compatible_llm.py +91 -91
  23. autobyteus/llm/autobyteus_provider.py +1 -1
  24. autobyteus/llm/base_llm.py +42 -139
  25. autobyteus/llm/extensions/base_extension.py +6 -6
  26. autobyteus/llm/extensions/token_usage_tracking_extension.py +3 -2
  27. autobyteus/llm/llm_factory.py +106 -4
  28. autobyteus/llm/token_counter/token_counter_factory.py +1 -1
  29. autobyteus/llm/user_message.py +43 -35
  30. autobyteus/llm/utils/llm_config.py +34 -18
  31. autobyteus/llm/utils/media_payload_formatter.py +99 -0
  32. autobyteus/llm/utils/messages.py +32 -25
  33. autobyteus/llm/utils/response_types.py +9 -3
  34. autobyteus/llm/utils/token_usage.py +6 -5
  35. autobyteus/multimedia/__init__.py +31 -0
  36. autobyteus/multimedia/audio/__init__.py +11 -0
  37. autobyteus/multimedia/audio/api/__init__.py +4 -0
  38. autobyteus/multimedia/audio/api/autobyteus_audio_client.py +59 -0
  39. autobyteus/multimedia/audio/api/gemini_audio_client.py +219 -0
  40. autobyteus/multimedia/audio/audio_client_factory.py +120 -0
  41. autobyteus/multimedia/audio/audio_model.py +96 -0
  42. autobyteus/multimedia/audio/autobyteus_audio_provider.py +108 -0
  43. autobyteus/multimedia/audio/base_audio_client.py +40 -0
  44. autobyteus/multimedia/image/__init__.py +11 -0
  45. autobyteus/multimedia/image/api/__init__.py +9 -0
  46. autobyteus/multimedia/image/api/autobyteus_image_client.py +97 -0
  47. autobyteus/multimedia/image/api/gemini_image_client.py +188 -0
  48. autobyteus/multimedia/image/api/openai_image_client.py +142 -0
  49. autobyteus/multimedia/image/autobyteus_image_provider.py +109 -0
  50. autobyteus/multimedia/image/base_image_client.py +67 -0
  51. autobyteus/multimedia/image/image_client_factory.py +118 -0
  52. autobyteus/multimedia/image/image_model.py +96 -0
  53. autobyteus/multimedia/providers.py +5 -0
  54. autobyteus/multimedia/runtimes.py +8 -0
  55. autobyteus/multimedia/utils/__init__.py +10 -0
  56. autobyteus/multimedia/utils/api_utils.py +19 -0
  57. autobyteus/multimedia/utils/multimedia_config.py +29 -0
  58. autobyteus/multimedia/utils/response_types.py +13 -0
  59. autobyteus/tools/__init__.py +3 -0
  60. autobyteus/tools/multimedia/__init__.py +8 -0
  61. autobyteus/tools/multimedia/audio_tools.py +116 -0
  62. autobyteus/tools/multimedia/image_tools.py +186 -0
  63. autobyteus/tools/tool_category.py +1 -0
  64. autobyteus/tools/usage/parsers/provider_aware_tool_usage_parser.py +5 -2
  65. autobyteus/tools/usage/providers/tool_manifest_provider.py +5 -3
  66. autobyteus/tools/usage/registries/tool_formatting_registry.py +9 -2
  67. autobyteus/tools/usage/registries/tool_usage_parser_registry.py +9 -2
  68. {autobyteus-1.1.5.dist-info → autobyteus-1.1.6.dist-info}/METADATA +9 -9
  69. {autobyteus-1.1.5.dist-info → autobyteus-1.1.6.dist-info}/RECORD +73 -45
  70. examples/run_browser_agent.py +1 -1
  71. autobyteus/llm/utils/image_payload_formatter.py +0 -89
  72. {autobyteus-1.1.5.dist-info → autobyteus-1.1.6.dist-info}/WHEEL +0 -0
  73. {autobyteus-1.1.5.dist-info → autobyteus-1.1.6.dist-info}/licenses/LICENSE +0 -0
  74. {autobyteus-1.1.5.dist-info → autobyteus-1.1.6.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,142 @@
1
+ import logging
2
+ import os
3
+ from typing import Optional, List, Dict, Any, TYPE_CHECKING
4
+ from openai import OpenAI
5
+ from autobyteus.multimedia.image.base_image_client import BaseImageClient
6
+ from autobyteus.multimedia.utils.response_types import ImageGenerationResponse
7
+
8
+ if TYPE_CHECKING:
9
+ from autobyteus.multimedia.image.image_model import ImageModel
10
+ from autobyteus.multimedia.utils.multimedia_config import MultimediaConfig
11
+
12
+ logger = logging.getLogger(__name__)
13
+
14
+ class OpenAIImageClient(BaseImageClient):
15
+ """
16
+ An image client that uses OpenAI's DALL-E models.
17
+ """
18
+
19
+ def __init__(self, model: "ImageModel", config: "MultimediaConfig"):
20
+ super().__init__(model, config)
21
+ api_key = os.getenv("OPENAI_API_KEY")
22
+ if not api_key:
23
+ logger.error("OPENAI_API_KEY environment variable is not set.")
24
+ raise ValueError("OPENAI_API_KEY environment variable is not set.")
25
+
26
+ self.client = OpenAI(api_key=api_key, base_url="https://api.openai.com/v1")
27
+ logger.info(f"OpenAIImageClient initialized for model '{self.model.name}'.")
28
+
29
+ async def generate_image(
30
+ self,
31
+ prompt: str,
32
+ input_image_urls: Optional[List[str]] = None,
33
+ generation_config: Optional[Dict[str, Any]] = None
34
+ ) -> ImageGenerationResponse:
35
+ """
36
+ Generates an image using an OpenAI DALL-E model via the v1/images/generations endpoint.
37
+ Note: This endpoint does not support image inputs, even for multimodal models like gpt-image-1.
38
+ """
39
+ if input_image_urls:
40
+ logger.warning(
41
+ f"The OpenAI `images.generate` API used by this client does not support input images. "
42
+ f"The images provided for model '{self.model.value}' will be ignored. "
43
+ f"To use image inputs, a client based on the Chat Completions API is required."
44
+ )
45
+
46
+ try:
47
+ image_model = self.model.value
48
+ logger.info(f"Generating image with OpenAI model '{image_model}' and prompt: '{prompt[:50]}...'")
49
+
50
+ # Combine default config with any overrides
51
+ final_config = self.config.to_dict().copy()
52
+ if generation_config:
53
+ final_config.update(generation_config)
54
+
55
+ response = self.client.images.generate(
56
+ model=image_model,
57
+ prompt=prompt,
58
+ n=final_config.get("n", 1),
59
+ size=final_config.get("size", "1024x1024"),
60
+ quality=final_config.get("quality", "standard"),
61
+ style=final_config.get("style", "vivid"),
62
+ response_format="url"
63
+ )
64
+
65
+ image_urls_list: List[str] = [img.url for img in response.data if img.url]
66
+ revised_prompt: Optional[str] = response.data[0].revised_prompt if response.data and hasattr(response.data[0], 'revised_prompt') else None
67
+
68
+ if not image_urls_list:
69
+ raise ValueError("OpenAI API did not return any image URLs.")
70
+
71
+ logger.info(f"Successfully generated {len(image_urls_list)} image(s).")
72
+
73
+ return ImageGenerationResponse(
74
+ image_urls=image_urls_list,
75
+ revised_prompt=revised_prompt
76
+ )
77
+ except Exception as e:
78
+ logger.error(f"Error during OpenAI image generation: {str(e)}")
79
+ raise ValueError(f"OpenAI image generation failed: {str(e)}")
80
+
81
+ async def edit_image(
82
+ self,
83
+ prompt: str,
84
+ input_image_urls: List[str],
85
+ mask_url: Optional[str] = None,
86
+ generation_config: Optional[Dict[str, Any]] = None
87
+ ) -> ImageGenerationResponse:
88
+ """
89
+ Edits an image using an OpenAI model that supports the v1/images/edits endpoint.
90
+ """
91
+ if not input_image_urls:
92
+ raise ValueError("At least one input image URL must be provided for editing.")
93
+
94
+ source_image_url = input_image_urls[0]
95
+ if len(input_image_urls) > 1:
96
+ logger.warning(f"OpenAI edit endpoint only supports one input image. Using '{source_image_url}' and ignoring the rest.")
97
+
98
+ try:
99
+ logger.info(f"Editing image '{source_image_url}' with prompt: '{prompt[:50]}...'")
100
+
101
+ # Combine default config with any overrides
102
+ final_config = self.config.to_dict().copy()
103
+ if generation_config:
104
+ final_config.update(generation_config)
105
+
106
+ with open(source_image_url, "rb") as image_file:
107
+ mask_file = open(mask_url, "rb") if mask_url else None
108
+ try:
109
+ response = self.client.images.edit(
110
+ image=image_file,
111
+ mask=mask_file,
112
+ prompt=prompt,
113
+ model=self.model.value,
114
+ n=final_config.get("n", 1),
115
+ size=final_config.get("size", "1024x1024"),
116
+ response_format="url"
117
+ )
118
+ finally:
119
+ if mask_file:
120
+ mask_file.close()
121
+
122
+ image_urls_list: List[str] = [img.url for img in response.data if img.url]
123
+ if not image_urls_list:
124
+ raise ValueError("OpenAI API did not return any edited image URLs.")
125
+
126
+ logger.info(f"Successfully edited image, generated {len(image_urls_list)} version(s).")
127
+ return ImageGenerationResponse(image_urls=image_urls_list)
128
+
129
+ except FileNotFoundError as e:
130
+ logger.error(f"Image file not found for editing: {e.filename}")
131
+ raise
132
+ except Exception as e:
133
+ logger.error(f"Error during OpenAI image editing: {str(e)}")
134
+ # The API might return a 400 Bad Request if the model doesn't support edits
135
+ if "does not support image editing" in str(e):
136
+ raise ValueError(f"The model '{self.model.value}' does not support the image editing endpoint.")
137
+ raise ValueError(f"OpenAI image editing failed: {str(e)}")
138
+
139
+
140
+ async def cleanup(self):
141
+ # The OpenAI client does not require explicit cleanup of a session.
142
+ logger.debug("OpenAIImageClient cleanup called.")
@@ -0,0 +1,109 @@
1
+ import logging
2
+ from typing import Dict, Any, List
3
+ import os
4
+ from urllib.parse import urlparse
5
+
6
+ from autobyteus_llm_client import AutobyteusClient
7
+ from autobyteus.multimedia.image.api.autobyteus_image_client import AutobyteusImageClient
8
+ from autobyteus.multimedia.image.image_model import ImageModel
9
+ from autobyteus.multimedia.providers import MultimediaProvider
10
+ from autobyteus.multimedia.runtimes import MultimediaRuntime
11
+
12
+ logger = logging.getLogger(__name__)
13
+
14
+ class AutobyteusImageModelProvider:
15
+ """
16
+ Discovers and registers image models from remote Autobyteus server instances.
17
+ """
18
+ DEFAULT_SERVER_URL = 'http://localhost:8000'
19
+
20
+ @staticmethod
21
+ def _get_hosts() -> List[str]:
22
+ """Gets Autobyteus server hosts from env vars."""
23
+ hosts_str = os.getenv('AUTOBYTEUS_LLM_SERVER_HOSTS')
24
+ if hosts_str:
25
+ return [host.strip() for host in hosts_str.split(',')]
26
+
27
+ legacy_host = os.getenv('AUTOBYTEUS_LLM_SERVER_URL')
28
+ if legacy_host:
29
+ return [legacy_host]
30
+
31
+ return [AutobyteusImageModelProvider.DEFAULT_SERVER_URL]
32
+
33
+ @staticmethod
34
+ def discover_and_register():
35
+ """Discover and register image models from all configured hosts."""
36
+ try:
37
+ from autobyteus.multimedia.image.image_client_factory import ImageClientFactory
38
+
39
+ hosts = AutobyteusImageModelProvider._get_hosts()
40
+ total_registered_count = 0
41
+
42
+ for host_url in hosts:
43
+ if not AutobyteusImageModelProvider.is_valid_url(host_url):
44
+ logger.error(f"Invalid Autobyteus host URL for image model discovery: {host_url}, skipping.")
45
+ continue
46
+
47
+ logger.info(f"Discovering image models from host: {host_url}")
48
+ client = None
49
+ try:
50
+ client = AutobyteusClient(server_url=host_url)
51
+ response = client.get_available_image_models_sync()
52
+ except Exception as e:
53
+ logger.warning(f"Could not fetch models from Autobyteus server at {host_url}: {e}")
54
+ continue
55
+ finally:
56
+ if client:
57
+ client.sync_client.close()
58
+
59
+ if not response.get('models'):
60
+ logger.info(f"No image models found on host {host_url}.")
61
+ continue
62
+
63
+ models = response.get('models', [])
64
+ host_registered_count = 0
65
+ for model_info in models:
66
+ try:
67
+ if not all(k in model_info for k in ["name", "value", "provider"]):
68
+ logger.warning(f"Skipping malformed image model from {host_url}: {model_info}")
69
+ continue
70
+
71
+ # Heuristic to ensure it's an image model if the server doesn't specify modality
72
+ if "parameter_schema" not in model_info:
73
+ logger.debug(f"Skipping model from {host_url} as it lacks a parameter schema, likely not an image model: {model_info.get('name')}")
74
+ continue
75
+
76
+ image_model = ImageModel(
77
+ name=model_info["name"],
78
+ value=model_info["value"],
79
+ provider=MultimediaProvider(model_info["provider"]),
80
+ client_class=AutobyteusImageClient,
81
+ runtime=MultimediaRuntime.AUTOBYTEUS,
82
+ host_url=host_url,
83
+ parameter_schema=model_info.get("parameter_schema")
84
+ )
85
+
86
+ ImageClientFactory.register_model(image_model)
87
+ host_registered_count += 1
88
+
89
+ except Exception as e:
90
+ logger.error(f"Failed to register image model '{model_info.get('name')}' from {host_url}: {e}")
91
+
92
+ if host_registered_count > 0:
93
+ logger.info(f"Registered {host_registered_count} image models from Autobyteus host {host_url}")
94
+ total_registered_count += host_registered_count
95
+
96
+ if total_registered_count > 0:
97
+ logger.info(f"Finished Autobyteus image model discovery. Total models registered: {total_registered_count}")
98
+
99
+ except Exception as e:
100
+ logger.error(f"An unexpected error occurred during Autobyteus image model discovery: {e}", exc_info=True)
101
+
102
+ @staticmethod
103
+ def is_valid_url(url: str) -> bool:
104
+ """Validate URL format"""
105
+ try:
106
+ result = urlparse(url)
107
+ return all([result.scheme, result.netloc])
108
+ except Exception:
109
+ return False
@@ -0,0 +1,67 @@
1
+ from __future__ import annotations
2
+ from abc import ABC, abstractmethod
3
+ from typing import Optional, Dict, Any, List, TYPE_CHECKING
4
+ from autobyteus.multimedia.utils.response_types import ImageGenerationResponse
5
+
6
+ if TYPE_CHECKING:
7
+ from autobyteus.multimedia.image.image_model import ImageModel
8
+ from autobyteus.multimedia.utils.multimedia_config import MultimediaConfig
9
+
10
+
11
+ class BaseImageClient(ABC):
12
+ """
13
+ Abstract base class for image clients that connect to models for image generation and editing.
14
+ """
15
+ def __init__(self, model: "ImageModel", config: "MultimediaConfig"):
16
+ self.model = model
17
+ self.config = config
18
+
19
+ @abstractmethod
20
+ async def generate_image(
21
+ self,
22
+ prompt: str,
23
+ input_image_urls: Optional[List[str]] = None,
24
+ generation_config: Optional[Dict[str, Any]] = None
25
+ ) -> ImageGenerationResponse:
26
+ """
27
+ Generates an image based on a textual prompt.
28
+
29
+ Args:
30
+ prompt (str): The text prompt describing the image to generate.
31
+ input_image_urls (Optional[List[str]]): A list of URLs or local paths to input images
32
+ for image-to-image generation.
33
+ generation_config (Optional[Dict[str, Any]]): Provider-specific parameters for image generation
34
+ to override defaults.
35
+ (e.g., n, size, quality, style).
36
+
37
+ Returns:
38
+ ImageGenerationResponse: An object containing URLs to the generated images.
39
+ """
40
+ pass
41
+
42
+ @abstractmethod
43
+ async def edit_image(
44
+ self,
45
+ prompt: str,
46
+ input_image_urls: List[str],
47
+ mask_url: Optional[str] = None,
48
+ generation_config: Optional[Dict[str, Any]] = None
49
+ ) -> ImageGenerationResponse:
50
+ """
51
+ Edits an existing image based on a textual prompt.
52
+
53
+ Args:
54
+ prompt (str): A text prompt describing the desired edits.
55
+ input_image_urls (List[str]): The path(s) or URL(s) to the source image(s) to edit.
56
+ mask_url (Optional[str]): The path to a mask image. The transparent areas of the mask
57
+ indicate where the image should be edited.
58
+ generation_config (Optional[Dict[str, Any]]): Provider-specific parameters.
59
+
60
+ Returns:
61
+ ImageGenerationResponse: An object containing URLs to the edited images.
62
+ """
63
+ pass
64
+
65
+ async def cleanup(self):
66
+ """Optional cleanup method for resources like network clients."""
67
+ pass
@@ -0,0 +1,118 @@
1
+ import logging
2
+ from typing import Dict, Optional
3
+ from autobyteus.multimedia.image.autobyteus_image_provider import AutobyteusImageModelProvider
4
+ from autobyteus.multimedia.image.base_image_client import BaseImageClient
5
+ from autobyteus.multimedia.image.image_model import ImageModel
6
+ from autobyteus.multimedia.providers import MultimediaProvider
7
+ from autobyteus.multimedia.image.api.openai_image_client import OpenAIImageClient
8
+ from autobyteus.multimedia.image.api.gemini_image_client import GeminiImageClient
9
+ from autobyteus.multimedia.utils.multimedia_config import MultimediaConfig
10
+ from autobyteus.utils.singleton import SingletonMeta
11
+
12
+ logger = logging.getLogger(__name__)
13
+
14
+ class ImageClientFactory(metaclass=SingletonMeta):
15
+ """
16
+ A factory for creating instances of image generation clients based on registered ImageModels.
17
+ """
18
+ _models_by_identifier: Dict[str, ImageModel] = {}
19
+ _initialized = False
20
+
21
+ @staticmethod
22
+ def ensure_initialized():
23
+ """Ensures the factory is initialized before use."""
24
+ if not ImageClientFactory._initialized:
25
+ ImageClientFactory._initialize_registry()
26
+ ImageClientFactory._initialized = True
27
+
28
+ @staticmethod
29
+ def reinitialize():
30
+ """Reinitializes the model registry, clearing all models and re-discovering them."""
31
+ logger.info("Reinitializing Image model registry...")
32
+ ImageClientFactory._initialized = False
33
+ ImageClientFactory._models_by_identifier.clear()
34
+ ImageClientFactory.ensure_initialized()
35
+ logger.info("Image model registry reinitialized successfully.")
36
+
37
+ @staticmethod
38
+ def _initialize_registry():
39
+ """Initializes the registry with built-in image models and discovers remote ones."""
40
+
41
+ # OpenAI Models
42
+ gpt_image_1_model = ImageModel(
43
+ name="gpt-image-1",
44
+ value="dall-e-3",
45
+ provider=MultimediaProvider.OPENAI,
46
+ client_class=OpenAIImageClient,
47
+ parameter_schema={
48
+ "n": {"type": "integer", "default": 1, "allowed_values": [1], "description": "The number of images to generate."},
49
+ "size": {"type": "string", "default": "1024x1024", "allowed_values": ["1024x1024", "1792x1024", "1024x1792"], "description": "The size of the generated images."},
50
+ "quality": {"type": "string", "default": "hd", "allowed_values": ["standard", "hd"], "description": "The quality of the image that will be generated."},
51
+ "style": {"type": "string", "default": "vivid", "allowed_values": ["vivid", "natural"], "description": "The style of the generated images."}
52
+ }
53
+ )
54
+
55
+ dall_e_2_model = ImageModel(
56
+ name="dall-e-2",
57
+ value="dall-e-2",
58
+ provider=MultimediaProvider.OPENAI,
59
+ client_class=OpenAIImageClient,
60
+ parameter_schema={
61
+ "n": {"type": "integer", "default": 1, "description": "The number of images to generate."},
62
+ "size": {"type": "string", "default": "1024x1024", "allowed_values": ["256x256", "512x512", "1024x1024"], "description": "The size of the generated images."}
63
+ }
64
+ )
65
+
66
+ # Google Imagen Models (via Gemini API)
67
+ imagen_model = ImageModel(
68
+ name="imagen-4",
69
+ value="imagen-4.0-generate-001",
70
+ provider=MultimediaProvider.GOOGLE,
71
+ client_class=GeminiImageClient,
72
+ parameter_schema={} # The genai library doesn't expose these as simple params
73
+ )
74
+
75
+ models_to_register = [
76
+ gpt_image_1_model,
77
+ dall_e_2_model,
78
+ imagen_model,
79
+ ]
80
+
81
+ for model in models_to_register:
82
+ ImageClientFactory.register_model(model)
83
+
84
+ logger.info("Default API-based image models registered.")
85
+
86
+ # Discover models from remote Autobyteus servers
87
+ AutobyteusImageModelProvider.discover_and_register()
88
+
89
+ @staticmethod
90
+ def register_model(model: ImageModel):
91
+ """Registers a new image model."""
92
+ identifier = model.model_identifier
93
+ if identifier in ImageClientFactory._models_by_identifier:
94
+ logger.warning(f"Image model '{identifier}' is already registered. Overwriting.")
95
+
96
+ if not isinstance(model.provider, MultimediaProvider):
97
+ try:
98
+ model.provider = MultimediaProvider(model.provider)
99
+ except ValueError:
100
+ logger.error(f"Cannot register model '{identifier}' with unknown provider '{model.provider}'.")
101
+ return
102
+
103
+ ImageClientFactory._models_by_identifier[identifier] = model
104
+
105
+ @staticmethod
106
+ def create_image_client(model_identifier: str, config_override: Optional[MultimediaConfig] = None) -> BaseImageClient:
107
+ """Creates an instance of a registered image client for a specific model."""
108
+ ImageClientFactory.ensure_initialized()
109
+
110
+ model = ImageClientFactory._models_by_identifier.get(model_identifier)
111
+ if not model:
112
+ raise ValueError(f"No image model registered with the name '{model_identifier}'. "
113
+ f"Available models: {list(ImageClientFactory._models_by_identifier.keys())}")
114
+
115
+ logger.info(f"Creating instance of image client for model '{model_identifier}'.")
116
+ return model.create_client(config_override)
117
+
118
+ image_client_factory = ImageClientFactory()
@@ -0,0 +1,96 @@
1
+ from __future__ import annotations
2
+ import logging
3
+ from typing import TYPE_CHECKING, Type, Optional, Iterator, Dict, Any
4
+ from urllib.parse import urlparse
5
+
6
+ from autobyteus.multimedia.providers import MultimediaProvider
7
+ from autobyteus.multimedia.runtimes import MultimediaRuntime
8
+ from autobyteus.multimedia.utils.multimedia_config import MultimediaConfig
9
+
10
+ if TYPE_CHECKING:
11
+ from autobyteus.multimedia.image.base_image_client import BaseImageClient
12
+
13
+ logger = logging.getLogger(__name__)
14
+
15
+ class ImageModelMeta(type):
16
+ """
17
+ Metaclass for ImageModel to allow discovery and access like an Enum.
18
+ """
19
+ def __iter__(cls) -> Iterator[ImageModel]:
20
+ from autobyteus.multimedia.image.image_client_factory import ImageClientFactory
21
+ ImageClientFactory.ensure_initialized()
22
+ for model in ImageClientFactory._models_by_identifier.values():
23
+ yield model
24
+
25
+ def __getitem__(cls, name_or_identifier: str) -> ImageModel:
26
+ from autobyteus.multimedia.image.image_client_factory import ImageClientFactory
27
+ ImageClientFactory.ensure_initialized()
28
+ model = ImageClientFactory._models_by_identifier.get(name_or_identifier)
29
+ if model:
30
+ return model
31
+ raise KeyError(f"Image model '{name_or_identifier}' not found.")
32
+
33
+ def __len__(cls) -> int:
34
+ from autobyteus.multimedia.image.image_client_factory import ImageClientFactory
35
+ ImageClientFactory.ensure_initialized()
36
+ return len(ImageClientFactory._models_by_identifier)
37
+
38
+
39
+ class ImageModel(metaclass=ImageModelMeta):
40
+ """
41
+ Represents a single image model's metadata.
42
+ """
43
+ def __init__(
44
+ self,
45
+ name: str,
46
+ value: str,
47
+ provider: MultimediaProvider,
48
+ client_class: Type["BaseImageClient"],
49
+ parameter_schema: Optional[Dict[str, Any]] = None,
50
+ runtime: MultimediaRuntime = MultimediaRuntime.API,
51
+ host_url: Optional[str] = None
52
+ ):
53
+ self.name = name
54
+ self.value = value
55
+ self.provider = provider
56
+ self.client_class = client_class
57
+ self.runtime = runtime
58
+ self.host_url = host_url
59
+ self.parameter_schema = parameter_schema if parameter_schema else {}
60
+
61
+ # Automatically build default_config from the schema's default values
62
+ default_params = {
63
+ key: meta.get("default")
64
+ for key, meta in self.parameter_schema.items()
65
+ if "default" in meta
66
+ }
67
+ self.default_config = MultimediaConfig(params=default_params)
68
+
69
+ @property
70
+ def model_identifier(self) -> str:
71
+ """Returns the unique identifier for the model."""
72
+ if self.runtime == MultimediaRuntime.AUTOBYTEUS and self.host_url:
73
+ try:
74
+ host = urlparse(self.host_url).hostname
75
+ return f"{self.name}@{host}"
76
+ except Exception:
77
+ return f"{self.name}@{self.host_url}" # Fallback
78
+ return self.name
79
+
80
+ def create_client(self, config_override: Optional[MultimediaConfig] = None) -> "BaseImageClient":
81
+ """
82
+ Instantiates the client class for this model.
83
+ """
84
+ config_to_use = self.default_config
85
+ if config_override:
86
+ from copy import deepcopy
87
+ config_to_use = deepcopy(self.default_config)
88
+ config_to_use.merge_with(config_override)
89
+
90
+ return self.client_class(model=self, config=config_to_use)
91
+
92
+ def __repr__(self):
93
+ return (
94
+ f"ImageModel(identifier='{self.model_identifier}', "
95
+ f"provider='{self.provider.name}', runtime='{self.runtime.value}')"
96
+ )
@@ -0,0 +1,5 @@
1
+ from enum import Enum
2
+
3
+ class MultimediaProvider(Enum):
4
+ OPENAI = "OPENAI"
5
+ GOOGLE = "GOOGLE"
@@ -0,0 +1,8 @@
1
+ from enum import Enum
2
+
3
+ class MultimediaRuntime(Enum):
4
+ """
5
+ Specifies the execution environment for a multimedia model.
6
+ """
7
+ API = "api"
8
+ AUTOBYTEUS = "autobyteus"
@@ -0,0 +1,10 @@
1
+ from .multimedia_config import MultimediaConfig
2
+ from .response_types import ImageGenerationResponse, SpeechGenerationResponse
3
+ from .api_utils import load_image_from_url
4
+
5
+ __all__ = [
6
+ "MultimediaConfig",
7
+ "ImageGenerationResponse",
8
+ "SpeechGenerationResponse",
9
+ "load_image_from_url",
10
+ ]
@@ -0,0 +1,19 @@
1
+ import logging
2
+ from PIL import Image
3
+ import requests
4
+
5
+ logger = logging.getLogger(__name__)
6
+
7
+ def load_image_from_url(url: str) -> Image.Image:
8
+ """Loads an image from a URL (http, https, or file path)."""
9
+ try:
10
+ if url.startswith(('http://', 'https://')):
11
+ response = requests.get(url, stream=True)
12
+ response.raise_for_status()
13
+ return Image.open(response.raw)
14
+ else:
15
+ # Assume it's a local file path
16
+ return Image.open(url)
17
+ except Exception as e:
18
+ logger.error(f"Failed to load image from URL/path '{url}': {e}")
19
+ raise
@@ -0,0 +1,29 @@
1
+ from dataclasses import dataclass, field
2
+ from typing import Optional, Dict, Any
3
+
4
+ @dataclass
5
+ class MultimediaConfig:
6
+ """
7
+ Configuration for multimedia generation, using a flexible dictionary for parameters.
8
+ """
9
+ params: Dict[str, Any] = field(default_factory=dict)
10
+
11
+ def merge_with(self, override_config: Optional['MultimediaConfig']):
12
+ """
13
+ Merges parameters from an override config into this one.
14
+ """
15
+ if override_config and override_config.params:
16
+ self.params.update(override_config.params)
17
+
18
+ @classmethod
19
+ def from_dict(cls, data: Dict[str, Any]) -> 'MultimediaConfig':
20
+ """
21
+ Creates a MultimediaConfig instance from a dictionary of parameters.
22
+ """
23
+ return cls(params=data if data is not None else {})
24
+
25
+ def to_dict(self) -> Dict[str, Any]:
26
+ """
27
+ Returns the configuration parameters as a dictionary.
28
+ """
29
+ return self.params
@@ -0,0 +1,13 @@
1
+ from dataclasses import dataclass
2
+ from typing import Optional, List
3
+
4
+ @dataclass
5
+ class ImageGenerationResponse:
6
+ """Response for image generation or editing."""
7
+ image_urls: List[str]
8
+ revised_prompt: Optional[str] = None
9
+
10
+ @dataclass
11
+ class SpeechGenerationResponse:
12
+ """Response for speech generation (Text-to-Speech)."""
13
+ audio_urls: List[str]
@@ -25,6 +25,7 @@ from .file.file_writer import file_writer
25
25
  # General Class-based tools
26
26
  from .image_downloader import ImageDownloader
27
27
  from .timer import Timer
28
+ from .multimedia.image_tools import GenerateImageTool, EditImageTool
28
29
 
29
30
  # Standalone Browser tools
30
31
  from .browser.standalone.google_search_ui import GoogleSearch
@@ -62,6 +63,8 @@ __all__ = [
62
63
  # Re-exported general class-based tools
63
64
  "ImageDownloader",
64
65
  "Timer",
66
+ "GenerateImageTool",
67
+ "EditImageTool",
65
68
 
66
69
  # Re-exported Standalone Browser tools
67
70
  "GoogleSearch",
@@ -0,0 +1,8 @@
1
+ from .image_tools import GenerateImageTool, EditImageTool
2
+ from .audio_tools import GenerateSpeechTool
3
+
4
+ __all__ = [
5
+ "GenerateImageTool",
6
+ "EditImageTool",
7
+ "GenerateSpeechTool",
8
+ ]