autobyteus 1.1.5__py3-none-any.whl → 1.1.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- autobyteus/agent/context/agent_config.py +6 -1
- autobyteus/agent/handlers/llm_user_message_ready_event_handler.py +30 -7
- autobyteus/agent/handlers/user_input_message_event_handler.py +22 -25
- autobyteus/agent/message/__init__.py +7 -5
- autobyteus/agent/message/agent_input_user_message.py +6 -16
- autobyteus/agent/message/context_file.py +24 -24
- autobyteus/agent/message/context_file_type.py +29 -8
- autobyteus/agent/message/multimodal_message_builder.py +47 -0
- autobyteus/agent/streaming/stream_event_payloads.py +23 -4
- autobyteus/agent/system_prompt_processor/tool_manifest_injector_processor.py +6 -2
- autobyteus/agent/tool_invocation.py +2 -1
- autobyteus/agent_team/bootstrap_steps/agent_configuration_preparation_step.py +9 -2
- autobyteus/agent_team/context/agent_team_config.py +1 -0
- autobyteus/llm/api/autobyteus_llm.py +33 -33
- autobyteus/llm/api/bedrock_llm.py +13 -5
- autobyteus/llm/api/claude_llm.py +13 -27
- autobyteus/llm/api/gemini_llm.py +108 -42
- autobyteus/llm/api/groq_llm.py +4 -3
- autobyteus/llm/api/mistral_llm.py +97 -51
- autobyteus/llm/api/nvidia_llm.py +6 -5
- autobyteus/llm/api/ollama_llm.py +37 -12
- autobyteus/llm/api/openai_compatible_llm.py +91 -91
- autobyteus/llm/autobyteus_provider.py +1 -1
- autobyteus/llm/base_llm.py +42 -139
- autobyteus/llm/extensions/base_extension.py +6 -6
- autobyteus/llm/extensions/token_usage_tracking_extension.py +3 -2
- autobyteus/llm/llm_factory.py +106 -4
- autobyteus/llm/token_counter/token_counter_factory.py +1 -1
- autobyteus/llm/user_message.py +43 -35
- autobyteus/llm/utils/llm_config.py +34 -18
- autobyteus/llm/utils/media_payload_formatter.py +99 -0
- autobyteus/llm/utils/messages.py +32 -25
- autobyteus/llm/utils/response_types.py +9 -3
- autobyteus/llm/utils/token_usage.py +6 -5
- autobyteus/multimedia/__init__.py +31 -0
- autobyteus/multimedia/audio/__init__.py +11 -0
- autobyteus/multimedia/audio/api/__init__.py +4 -0
- autobyteus/multimedia/audio/api/autobyteus_audio_client.py +59 -0
- autobyteus/multimedia/audio/api/gemini_audio_client.py +219 -0
- autobyteus/multimedia/audio/audio_client_factory.py +120 -0
- autobyteus/multimedia/audio/audio_model.py +96 -0
- autobyteus/multimedia/audio/autobyteus_audio_provider.py +108 -0
- autobyteus/multimedia/audio/base_audio_client.py +40 -0
- autobyteus/multimedia/image/__init__.py +11 -0
- autobyteus/multimedia/image/api/__init__.py +9 -0
- autobyteus/multimedia/image/api/autobyteus_image_client.py +97 -0
- autobyteus/multimedia/image/api/gemini_image_client.py +188 -0
- autobyteus/multimedia/image/api/openai_image_client.py +142 -0
- autobyteus/multimedia/image/autobyteus_image_provider.py +109 -0
- autobyteus/multimedia/image/base_image_client.py +67 -0
- autobyteus/multimedia/image/image_client_factory.py +118 -0
- autobyteus/multimedia/image/image_model.py +96 -0
- autobyteus/multimedia/providers.py +5 -0
- autobyteus/multimedia/runtimes.py +8 -0
- autobyteus/multimedia/utils/__init__.py +10 -0
- autobyteus/multimedia/utils/api_utils.py +19 -0
- autobyteus/multimedia/utils/multimedia_config.py +29 -0
- autobyteus/multimedia/utils/response_types.py +13 -0
- autobyteus/tools/__init__.py +3 -0
- autobyteus/tools/multimedia/__init__.py +8 -0
- autobyteus/tools/multimedia/audio_tools.py +116 -0
- autobyteus/tools/multimedia/image_tools.py +186 -0
- autobyteus/tools/tool_category.py +1 -0
- autobyteus/tools/usage/parsers/provider_aware_tool_usage_parser.py +5 -2
- autobyteus/tools/usage/providers/tool_manifest_provider.py +5 -3
- autobyteus/tools/usage/registries/tool_formatting_registry.py +9 -2
- autobyteus/tools/usage/registries/tool_usage_parser_registry.py +9 -2
- {autobyteus-1.1.5.dist-info → autobyteus-1.1.6.dist-info}/METADATA +9 -9
- {autobyteus-1.1.5.dist-info → autobyteus-1.1.6.dist-info}/RECORD +73 -45
- examples/run_browser_agent.py +1 -1
- autobyteus/llm/utils/image_payload_formatter.py +0 -89
- {autobyteus-1.1.5.dist-info → autobyteus-1.1.6.dist-info}/WHEEL +0 -0
- {autobyteus-1.1.5.dist-info → autobyteus-1.1.6.dist-info}/licenses/LICENSE +0 -0
- {autobyteus-1.1.5.dist-info → autobyteus-1.1.6.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,142 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
import os
|
|
3
|
+
from typing import Optional, List, Dict, Any, TYPE_CHECKING
|
|
4
|
+
from openai import OpenAI
|
|
5
|
+
from autobyteus.multimedia.image.base_image_client import BaseImageClient
|
|
6
|
+
from autobyteus.multimedia.utils.response_types import ImageGenerationResponse
|
|
7
|
+
|
|
8
|
+
if TYPE_CHECKING:
|
|
9
|
+
from autobyteus.multimedia.image.image_model import ImageModel
|
|
10
|
+
from autobyteus.multimedia.utils.multimedia_config import MultimediaConfig
|
|
11
|
+
|
|
12
|
+
logger = logging.getLogger(__name__)
|
|
13
|
+
|
|
14
|
+
class OpenAIImageClient(BaseImageClient):
|
|
15
|
+
"""
|
|
16
|
+
An image client that uses OpenAI's DALL-E models.
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
def __init__(self, model: "ImageModel", config: "MultimediaConfig"):
|
|
20
|
+
super().__init__(model, config)
|
|
21
|
+
api_key = os.getenv("OPENAI_API_KEY")
|
|
22
|
+
if not api_key:
|
|
23
|
+
logger.error("OPENAI_API_KEY environment variable is not set.")
|
|
24
|
+
raise ValueError("OPENAI_API_KEY environment variable is not set.")
|
|
25
|
+
|
|
26
|
+
self.client = OpenAI(api_key=api_key, base_url="https://api.openai.com/v1")
|
|
27
|
+
logger.info(f"OpenAIImageClient initialized for model '{self.model.name}'.")
|
|
28
|
+
|
|
29
|
+
async def generate_image(
|
|
30
|
+
self,
|
|
31
|
+
prompt: str,
|
|
32
|
+
input_image_urls: Optional[List[str]] = None,
|
|
33
|
+
generation_config: Optional[Dict[str, Any]] = None
|
|
34
|
+
) -> ImageGenerationResponse:
|
|
35
|
+
"""
|
|
36
|
+
Generates an image using an OpenAI DALL-E model via the v1/images/generations endpoint.
|
|
37
|
+
Note: This endpoint does not support image inputs, even for multimodal models like gpt-image-1.
|
|
38
|
+
"""
|
|
39
|
+
if input_image_urls:
|
|
40
|
+
logger.warning(
|
|
41
|
+
f"The OpenAI `images.generate` API used by this client does not support input images. "
|
|
42
|
+
f"The images provided for model '{self.model.value}' will be ignored. "
|
|
43
|
+
f"To use image inputs, a client based on the Chat Completions API is required."
|
|
44
|
+
)
|
|
45
|
+
|
|
46
|
+
try:
|
|
47
|
+
image_model = self.model.value
|
|
48
|
+
logger.info(f"Generating image with OpenAI model '{image_model}' and prompt: '{prompt[:50]}...'")
|
|
49
|
+
|
|
50
|
+
# Combine default config with any overrides
|
|
51
|
+
final_config = self.config.to_dict().copy()
|
|
52
|
+
if generation_config:
|
|
53
|
+
final_config.update(generation_config)
|
|
54
|
+
|
|
55
|
+
response = self.client.images.generate(
|
|
56
|
+
model=image_model,
|
|
57
|
+
prompt=prompt,
|
|
58
|
+
n=final_config.get("n", 1),
|
|
59
|
+
size=final_config.get("size", "1024x1024"),
|
|
60
|
+
quality=final_config.get("quality", "standard"),
|
|
61
|
+
style=final_config.get("style", "vivid"),
|
|
62
|
+
response_format="url"
|
|
63
|
+
)
|
|
64
|
+
|
|
65
|
+
image_urls_list: List[str] = [img.url for img in response.data if img.url]
|
|
66
|
+
revised_prompt: Optional[str] = response.data[0].revised_prompt if response.data and hasattr(response.data[0], 'revised_prompt') else None
|
|
67
|
+
|
|
68
|
+
if not image_urls_list:
|
|
69
|
+
raise ValueError("OpenAI API did not return any image URLs.")
|
|
70
|
+
|
|
71
|
+
logger.info(f"Successfully generated {len(image_urls_list)} image(s).")
|
|
72
|
+
|
|
73
|
+
return ImageGenerationResponse(
|
|
74
|
+
image_urls=image_urls_list,
|
|
75
|
+
revised_prompt=revised_prompt
|
|
76
|
+
)
|
|
77
|
+
except Exception as e:
|
|
78
|
+
logger.error(f"Error during OpenAI image generation: {str(e)}")
|
|
79
|
+
raise ValueError(f"OpenAI image generation failed: {str(e)}")
|
|
80
|
+
|
|
81
|
+
async def edit_image(
|
|
82
|
+
self,
|
|
83
|
+
prompt: str,
|
|
84
|
+
input_image_urls: List[str],
|
|
85
|
+
mask_url: Optional[str] = None,
|
|
86
|
+
generation_config: Optional[Dict[str, Any]] = None
|
|
87
|
+
) -> ImageGenerationResponse:
|
|
88
|
+
"""
|
|
89
|
+
Edits an image using an OpenAI model that supports the v1/images/edits endpoint.
|
|
90
|
+
"""
|
|
91
|
+
if not input_image_urls:
|
|
92
|
+
raise ValueError("At least one input image URL must be provided for editing.")
|
|
93
|
+
|
|
94
|
+
source_image_url = input_image_urls[0]
|
|
95
|
+
if len(input_image_urls) > 1:
|
|
96
|
+
logger.warning(f"OpenAI edit endpoint only supports one input image. Using '{source_image_url}' and ignoring the rest.")
|
|
97
|
+
|
|
98
|
+
try:
|
|
99
|
+
logger.info(f"Editing image '{source_image_url}' with prompt: '{prompt[:50]}...'")
|
|
100
|
+
|
|
101
|
+
# Combine default config with any overrides
|
|
102
|
+
final_config = self.config.to_dict().copy()
|
|
103
|
+
if generation_config:
|
|
104
|
+
final_config.update(generation_config)
|
|
105
|
+
|
|
106
|
+
with open(source_image_url, "rb") as image_file:
|
|
107
|
+
mask_file = open(mask_url, "rb") if mask_url else None
|
|
108
|
+
try:
|
|
109
|
+
response = self.client.images.edit(
|
|
110
|
+
image=image_file,
|
|
111
|
+
mask=mask_file,
|
|
112
|
+
prompt=prompt,
|
|
113
|
+
model=self.model.value,
|
|
114
|
+
n=final_config.get("n", 1),
|
|
115
|
+
size=final_config.get("size", "1024x1024"),
|
|
116
|
+
response_format="url"
|
|
117
|
+
)
|
|
118
|
+
finally:
|
|
119
|
+
if mask_file:
|
|
120
|
+
mask_file.close()
|
|
121
|
+
|
|
122
|
+
image_urls_list: List[str] = [img.url for img in response.data if img.url]
|
|
123
|
+
if not image_urls_list:
|
|
124
|
+
raise ValueError("OpenAI API did not return any edited image URLs.")
|
|
125
|
+
|
|
126
|
+
logger.info(f"Successfully edited image, generated {len(image_urls_list)} version(s).")
|
|
127
|
+
return ImageGenerationResponse(image_urls=image_urls_list)
|
|
128
|
+
|
|
129
|
+
except FileNotFoundError as e:
|
|
130
|
+
logger.error(f"Image file not found for editing: {e.filename}")
|
|
131
|
+
raise
|
|
132
|
+
except Exception as e:
|
|
133
|
+
logger.error(f"Error during OpenAI image editing: {str(e)}")
|
|
134
|
+
# The API might return a 400 Bad Request if the model doesn't support edits
|
|
135
|
+
if "does not support image editing" in str(e):
|
|
136
|
+
raise ValueError(f"The model '{self.model.value}' does not support the image editing endpoint.")
|
|
137
|
+
raise ValueError(f"OpenAI image editing failed: {str(e)}")
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
async def cleanup(self):
|
|
141
|
+
# The OpenAI client does not require explicit cleanup of a session.
|
|
142
|
+
logger.debug("OpenAIImageClient cleanup called.")
|
|
@@ -0,0 +1,109 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from typing import Dict, Any, List
|
|
3
|
+
import os
|
|
4
|
+
from urllib.parse import urlparse
|
|
5
|
+
|
|
6
|
+
from autobyteus_llm_client import AutobyteusClient
|
|
7
|
+
from autobyteus.multimedia.image.api.autobyteus_image_client import AutobyteusImageClient
|
|
8
|
+
from autobyteus.multimedia.image.image_model import ImageModel
|
|
9
|
+
from autobyteus.multimedia.providers import MultimediaProvider
|
|
10
|
+
from autobyteus.multimedia.runtimes import MultimediaRuntime
|
|
11
|
+
|
|
12
|
+
logger = logging.getLogger(__name__)
|
|
13
|
+
|
|
14
|
+
class AutobyteusImageModelProvider:
|
|
15
|
+
"""
|
|
16
|
+
Discovers and registers image models from remote Autobyteus server instances.
|
|
17
|
+
"""
|
|
18
|
+
DEFAULT_SERVER_URL = 'http://localhost:8000'
|
|
19
|
+
|
|
20
|
+
@staticmethod
|
|
21
|
+
def _get_hosts() -> List[str]:
|
|
22
|
+
"""Gets Autobyteus server hosts from env vars."""
|
|
23
|
+
hosts_str = os.getenv('AUTOBYTEUS_LLM_SERVER_HOSTS')
|
|
24
|
+
if hosts_str:
|
|
25
|
+
return [host.strip() for host in hosts_str.split(',')]
|
|
26
|
+
|
|
27
|
+
legacy_host = os.getenv('AUTOBYTEUS_LLM_SERVER_URL')
|
|
28
|
+
if legacy_host:
|
|
29
|
+
return [legacy_host]
|
|
30
|
+
|
|
31
|
+
return [AutobyteusImageModelProvider.DEFAULT_SERVER_URL]
|
|
32
|
+
|
|
33
|
+
@staticmethod
|
|
34
|
+
def discover_and_register():
|
|
35
|
+
"""Discover and register image models from all configured hosts."""
|
|
36
|
+
try:
|
|
37
|
+
from autobyteus.multimedia.image.image_client_factory import ImageClientFactory
|
|
38
|
+
|
|
39
|
+
hosts = AutobyteusImageModelProvider._get_hosts()
|
|
40
|
+
total_registered_count = 0
|
|
41
|
+
|
|
42
|
+
for host_url in hosts:
|
|
43
|
+
if not AutobyteusImageModelProvider.is_valid_url(host_url):
|
|
44
|
+
logger.error(f"Invalid Autobyteus host URL for image model discovery: {host_url}, skipping.")
|
|
45
|
+
continue
|
|
46
|
+
|
|
47
|
+
logger.info(f"Discovering image models from host: {host_url}")
|
|
48
|
+
client = None
|
|
49
|
+
try:
|
|
50
|
+
client = AutobyteusClient(server_url=host_url)
|
|
51
|
+
response = client.get_available_image_models_sync()
|
|
52
|
+
except Exception as e:
|
|
53
|
+
logger.warning(f"Could not fetch models from Autobyteus server at {host_url}: {e}")
|
|
54
|
+
continue
|
|
55
|
+
finally:
|
|
56
|
+
if client:
|
|
57
|
+
client.sync_client.close()
|
|
58
|
+
|
|
59
|
+
if not response.get('models'):
|
|
60
|
+
logger.info(f"No image models found on host {host_url}.")
|
|
61
|
+
continue
|
|
62
|
+
|
|
63
|
+
models = response.get('models', [])
|
|
64
|
+
host_registered_count = 0
|
|
65
|
+
for model_info in models:
|
|
66
|
+
try:
|
|
67
|
+
if not all(k in model_info for k in ["name", "value", "provider"]):
|
|
68
|
+
logger.warning(f"Skipping malformed image model from {host_url}: {model_info}")
|
|
69
|
+
continue
|
|
70
|
+
|
|
71
|
+
# Heuristic to ensure it's an image model if the server doesn't specify modality
|
|
72
|
+
if "parameter_schema" not in model_info:
|
|
73
|
+
logger.debug(f"Skipping model from {host_url} as it lacks a parameter schema, likely not an image model: {model_info.get('name')}")
|
|
74
|
+
continue
|
|
75
|
+
|
|
76
|
+
image_model = ImageModel(
|
|
77
|
+
name=model_info["name"],
|
|
78
|
+
value=model_info["value"],
|
|
79
|
+
provider=MultimediaProvider(model_info["provider"]),
|
|
80
|
+
client_class=AutobyteusImageClient,
|
|
81
|
+
runtime=MultimediaRuntime.AUTOBYTEUS,
|
|
82
|
+
host_url=host_url,
|
|
83
|
+
parameter_schema=model_info.get("parameter_schema")
|
|
84
|
+
)
|
|
85
|
+
|
|
86
|
+
ImageClientFactory.register_model(image_model)
|
|
87
|
+
host_registered_count += 1
|
|
88
|
+
|
|
89
|
+
except Exception as e:
|
|
90
|
+
logger.error(f"Failed to register image model '{model_info.get('name')}' from {host_url}: {e}")
|
|
91
|
+
|
|
92
|
+
if host_registered_count > 0:
|
|
93
|
+
logger.info(f"Registered {host_registered_count} image models from Autobyteus host {host_url}")
|
|
94
|
+
total_registered_count += host_registered_count
|
|
95
|
+
|
|
96
|
+
if total_registered_count > 0:
|
|
97
|
+
logger.info(f"Finished Autobyteus image model discovery. Total models registered: {total_registered_count}")
|
|
98
|
+
|
|
99
|
+
except Exception as e:
|
|
100
|
+
logger.error(f"An unexpected error occurred during Autobyteus image model discovery: {e}", exc_info=True)
|
|
101
|
+
|
|
102
|
+
@staticmethod
|
|
103
|
+
def is_valid_url(url: str) -> bool:
|
|
104
|
+
"""Validate URL format"""
|
|
105
|
+
try:
|
|
106
|
+
result = urlparse(url)
|
|
107
|
+
return all([result.scheme, result.netloc])
|
|
108
|
+
except Exception:
|
|
109
|
+
return False
|
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
from abc import ABC, abstractmethod
|
|
3
|
+
from typing import Optional, Dict, Any, List, TYPE_CHECKING
|
|
4
|
+
from autobyteus.multimedia.utils.response_types import ImageGenerationResponse
|
|
5
|
+
|
|
6
|
+
if TYPE_CHECKING:
|
|
7
|
+
from autobyteus.multimedia.image.image_model import ImageModel
|
|
8
|
+
from autobyteus.multimedia.utils.multimedia_config import MultimediaConfig
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class BaseImageClient(ABC):
|
|
12
|
+
"""
|
|
13
|
+
Abstract base class for image clients that connect to models for image generation and editing.
|
|
14
|
+
"""
|
|
15
|
+
def __init__(self, model: "ImageModel", config: "MultimediaConfig"):
|
|
16
|
+
self.model = model
|
|
17
|
+
self.config = config
|
|
18
|
+
|
|
19
|
+
@abstractmethod
|
|
20
|
+
async def generate_image(
|
|
21
|
+
self,
|
|
22
|
+
prompt: str,
|
|
23
|
+
input_image_urls: Optional[List[str]] = None,
|
|
24
|
+
generation_config: Optional[Dict[str, Any]] = None
|
|
25
|
+
) -> ImageGenerationResponse:
|
|
26
|
+
"""
|
|
27
|
+
Generates an image based on a textual prompt.
|
|
28
|
+
|
|
29
|
+
Args:
|
|
30
|
+
prompt (str): The text prompt describing the image to generate.
|
|
31
|
+
input_image_urls (Optional[List[str]]): A list of URLs or local paths to input images
|
|
32
|
+
for image-to-image generation.
|
|
33
|
+
generation_config (Optional[Dict[str, Any]]): Provider-specific parameters for image generation
|
|
34
|
+
to override defaults.
|
|
35
|
+
(e.g., n, size, quality, style).
|
|
36
|
+
|
|
37
|
+
Returns:
|
|
38
|
+
ImageGenerationResponse: An object containing URLs to the generated images.
|
|
39
|
+
"""
|
|
40
|
+
pass
|
|
41
|
+
|
|
42
|
+
@abstractmethod
|
|
43
|
+
async def edit_image(
|
|
44
|
+
self,
|
|
45
|
+
prompt: str,
|
|
46
|
+
input_image_urls: List[str],
|
|
47
|
+
mask_url: Optional[str] = None,
|
|
48
|
+
generation_config: Optional[Dict[str, Any]] = None
|
|
49
|
+
) -> ImageGenerationResponse:
|
|
50
|
+
"""
|
|
51
|
+
Edits an existing image based on a textual prompt.
|
|
52
|
+
|
|
53
|
+
Args:
|
|
54
|
+
prompt (str): A text prompt describing the desired edits.
|
|
55
|
+
input_image_urls (List[str]): The path(s) or URL(s) to the source image(s) to edit.
|
|
56
|
+
mask_url (Optional[str]): The path to a mask image. The transparent areas of the mask
|
|
57
|
+
indicate where the image should be edited.
|
|
58
|
+
generation_config (Optional[Dict[str, Any]]): Provider-specific parameters.
|
|
59
|
+
|
|
60
|
+
Returns:
|
|
61
|
+
ImageGenerationResponse: An object containing URLs to the edited images.
|
|
62
|
+
"""
|
|
63
|
+
pass
|
|
64
|
+
|
|
65
|
+
async def cleanup(self):
|
|
66
|
+
"""Optional cleanup method for resources like network clients."""
|
|
67
|
+
pass
|
|
@@ -0,0 +1,118 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from typing import Dict, Optional
|
|
3
|
+
from autobyteus.multimedia.image.autobyteus_image_provider import AutobyteusImageModelProvider
|
|
4
|
+
from autobyteus.multimedia.image.base_image_client import BaseImageClient
|
|
5
|
+
from autobyteus.multimedia.image.image_model import ImageModel
|
|
6
|
+
from autobyteus.multimedia.providers import MultimediaProvider
|
|
7
|
+
from autobyteus.multimedia.image.api.openai_image_client import OpenAIImageClient
|
|
8
|
+
from autobyteus.multimedia.image.api.gemini_image_client import GeminiImageClient
|
|
9
|
+
from autobyteus.multimedia.utils.multimedia_config import MultimediaConfig
|
|
10
|
+
from autobyteus.utils.singleton import SingletonMeta
|
|
11
|
+
|
|
12
|
+
logger = logging.getLogger(__name__)
|
|
13
|
+
|
|
14
|
+
class ImageClientFactory(metaclass=SingletonMeta):
|
|
15
|
+
"""
|
|
16
|
+
A factory for creating instances of image generation clients based on registered ImageModels.
|
|
17
|
+
"""
|
|
18
|
+
_models_by_identifier: Dict[str, ImageModel] = {}
|
|
19
|
+
_initialized = False
|
|
20
|
+
|
|
21
|
+
@staticmethod
|
|
22
|
+
def ensure_initialized():
|
|
23
|
+
"""Ensures the factory is initialized before use."""
|
|
24
|
+
if not ImageClientFactory._initialized:
|
|
25
|
+
ImageClientFactory._initialize_registry()
|
|
26
|
+
ImageClientFactory._initialized = True
|
|
27
|
+
|
|
28
|
+
@staticmethod
|
|
29
|
+
def reinitialize():
|
|
30
|
+
"""Reinitializes the model registry, clearing all models and re-discovering them."""
|
|
31
|
+
logger.info("Reinitializing Image model registry...")
|
|
32
|
+
ImageClientFactory._initialized = False
|
|
33
|
+
ImageClientFactory._models_by_identifier.clear()
|
|
34
|
+
ImageClientFactory.ensure_initialized()
|
|
35
|
+
logger.info("Image model registry reinitialized successfully.")
|
|
36
|
+
|
|
37
|
+
@staticmethod
|
|
38
|
+
def _initialize_registry():
|
|
39
|
+
"""Initializes the registry with built-in image models and discovers remote ones."""
|
|
40
|
+
|
|
41
|
+
# OpenAI Models
|
|
42
|
+
gpt_image_1_model = ImageModel(
|
|
43
|
+
name="gpt-image-1",
|
|
44
|
+
value="dall-e-3",
|
|
45
|
+
provider=MultimediaProvider.OPENAI,
|
|
46
|
+
client_class=OpenAIImageClient,
|
|
47
|
+
parameter_schema={
|
|
48
|
+
"n": {"type": "integer", "default": 1, "allowed_values": [1], "description": "The number of images to generate."},
|
|
49
|
+
"size": {"type": "string", "default": "1024x1024", "allowed_values": ["1024x1024", "1792x1024", "1024x1792"], "description": "The size of the generated images."},
|
|
50
|
+
"quality": {"type": "string", "default": "hd", "allowed_values": ["standard", "hd"], "description": "The quality of the image that will be generated."},
|
|
51
|
+
"style": {"type": "string", "default": "vivid", "allowed_values": ["vivid", "natural"], "description": "The style of the generated images."}
|
|
52
|
+
}
|
|
53
|
+
)
|
|
54
|
+
|
|
55
|
+
dall_e_2_model = ImageModel(
|
|
56
|
+
name="dall-e-2",
|
|
57
|
+
value="dall-e-2",
|
|
58
|
+
provider=MultimediaProvider.OPENAI,
|
|
59
|
+
client_class=OpenAIImageClient,
|
|
60
|
+
parameter_schema={
|
|
61
|
+
"n": {"type": "integer", "default": 1, "description": "The number of images to generate."},
|
|
62
|
+
"size": {"type": "string", "default": "1024x1024", "allowed_values": ["256x256", "512x512", "1024x1024"], "description": "The size of the generated images."}
|
|
63
|
+
}
|
|
64
|
+
)
|
|
65
|
+
|
|
66
|
+
# Google Imagen Models (via Gemini API)
|
|
67
|
+
imagen_model = ImageModel(
|
|
68
|
+
name="imagen-4",
|
|
69
|
+
value="imagen-4.0-generate-001",
|
|
70
|
+
provider=MultimediaProvider.GOOGLE,
|
|
71
|
+
client_class=GeminiImageClient,
|
|
72
|
+
parameter_schema={} # The genai library doesn't expose these as simple params
|
|
73
|
+
)
|
|
74
|
+
|
|
75
|
+
models_to_register = [
|
|
76
|
+
gpt_image_1_model,
|
|
77
|
+
dall_e_2_model,
|
|
78
|
+
imagen_model,
|
|
79
|
+
]
|
|
80
|
+
|
|
81
|
+
for model in models_to_register:
|
|
82
|
+
ImageClientFactory.register_model(model)
|
|
83
|
+
|
|
84
|
+
logger.info("Default API-based image models registered.")
|
|
85
|
+
|
|
86
|
+
# Discover models from remote Autobyteus servers
|
|
87
|
+
AutobyteusImageModelProvider.discover_and_register()
|
|
88
|
+
|
|
89
|
+
@staticmethod
|
|
90
|
+
def register_model(model: ImageModel):
|
|
91
|
+
"""Registers a new image model."""
|
|
92
|
+
identifier = model.model_identifier
|
|
93
|
+
if identifier in ImageClientFactory._models_by_identifier:
|
|
94
|
+
logger.warning(f"Image model '{identifier}' is already registered. Overwriting.")
|
|
95
|
+
|
|
96
|
+
if not isinstance(model.provider, MultimediaProvider):
|
|
97
|
+
try:
|
|
98
|
+
model.provider = MultimediaProvider(model.provider)
|
|
99
|
+
except ValueError:
|
|
100
|
+
logger.error(f"Cannot register model '{identifier}' with unknown provider '{model.provider}'.")
|
|
101
|
+
return
|
|
102
|
+
|
|
103
|
+
ImageClientFactory._models_by_identifier[identifier] = model
|
|
104
|
+
|
|
105
|
+
@staticmethod
|
|
106
|
+
def create_image_client(model_identifier: str, config_override: Optional[MultimediaConfig] = None) -> BaseImageClient:
|
|
107
|
+
"""Creates an instance of a registered image client for a specific model."""
|
|
108
|
+
ImageClientFactory.ensure_initialized()
|
|
109
|
+
|
|
110
|
+
model = ImageClientFactory._models_by_identifier.get(model_identifier)
|
|
111
|
+
if not model:
|
|
112
|
+
raise ValueError(f"No image model registered with the name '{model_identifier}'. "
|
|
113
|
+
f"Available models: {list(ImageClientFactory._models_by_identifier.keys())}")
|
|
114
|
+
|
|
115
|
+
logger.info(f"Creating instance of image client for model '{model_identifier}'.")
|
|
116
|
+
return model.create_client(config_override)
|
|
117
|
+
|
|
118
|
+
image_client_factory = ImageClientFactory()
|
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
import logging
|
|
3
|
+
from typing import TYPE_CHECKING, Type, Optional, Iterator, Dict, Any
|
|
4
|
+
from urllib.parse import urlparse
|
|
5
|
+
|
|
6
|
+
from autobyteus.multimedia.providers import MultimediaProvider
|
|
7
|
+
from autobyteus.multimedia.runtimes import MultimediaRuntime
|
|
8
|
+
from autobyteus.multimedia.utils.multimedia_config import MultimediaConfig
|
|
9
|
+
|
|
10
|
+
if TYPE_CHECKING:
|
|
11
|
+
from autobyteus.multimedia.image.base_image_client import BaseImageClient
|
|
12
|
+
|
|
13
|
+
logger = logging.getLogger(__name__)
|
|
14
|
+
|
|
15
|
+
class ImageModelMeta(type):
|
|
16
|
+
"""
|
|
17
|
+
Metaclass for ImageModel to allow discovery and access like an Enum.
|
|
18
|
+
"""
|
|
19
|
+
def __iter__(cls) -> Iterator[ImageModel]:
|
|
20
|
+
from autobyteus.multimedia.image.image_client_factory import ImageClientFactory
|
|
21
|
+
ImageClientFactory.ensure_initialized()
|
|
22
|
+
for model in ImageClientFactory._models_by_identifier.values():
|
|
23
|
+
yield model
|
|
24
|
+
|
|
25
|
+
def __getitem__(cls, name_or_identifier: str) -> ImageModel:
|
|
26
|
+
from autobyteus.multimedia.image.image_client_factory import ImageClientFactory
|
|
27
|
+
ImageClientFactory.ensure_initialized()
|
|
28
|
+
model = ImageClientFactory._models_by_identifier.get(name_or_identifier)
|
|
29
|
+
if model:
|
|
30
|
+
return model
|
|
31
|
+
raise KeyError(f"Image model '{name_or_identifier}' not found.")
|
|
32
|
+
|
|
33
|
+
def __len__(cls) -> int:
|
|
34
|
+
from autobyteus.multimedia.image.image_client_factory import ImageClientFactory
|
|
35
|
+
ImageClientFactory.ensure_initialized()
|
|
36
|
+
return len(ImageClientFactory._models_by_identifier)
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
class ImageModel(metaclass=ImageModelMeta):
|
|
40
|
+
"""
|
|
41
|
+
Represents a single image model's metadata.
|
|
42
|
+
"""
|
|
43
|
+
def __init__(
|
|
44
|
+
self,
|
|
45
|
+
name: str,
|
|
46
|
+
value: str,
|
|
47
|
+
provider: MultimediaProvider,
|
|
48
|
+
client_class: Type["BaseImageClient"],
|
|
49
|
+
parameter_schema: Optional[Dict[str, Any]] = None,
|
|
50
|
+
runtime: MultimediaRuntime = MultimediaRuntime.API,
|
|
51
|
+
host_url: Optional[str] = None
|
|
52
|
+
):
|
|
53
|
+
self.name = name
|
|
54
|
+
self.value = value
|
|
55
|
+
self.provider = provider
|
|
56
|
+
self.client_class = client_class
|
|
57
|
+
self.runtime = runtime
|
|
58
|
+
self.host_url = host_url
|
|
59
|
+
self.parameter_schema = parameter_schema if parameter_schema else {}
|
|
60
|
+
|
|
61
|
+
# Automatically build default_config from the schema's default values
|
|
62
|
+
default_params = {
|
|
63
|
+
key: meta.get("default")
|
|
64
|
+
for key, meta in self.parameter_schema.items()
|
|
65
|
+
if "default" in meta
|
|
66
|
+
}
|
|
67
|
+
self.default_config = MultimediaConfig(params=default_params)
|
|
68
|
+
|
|
69
|
+
@property
|
|
70
|
+
def model_identifier(self) -> str:
|
|
71
|
+
"""Returns the unique identifier for the model."""
|
|
72
|
+
if self.runtime == MultimediaRuntime.AUTOBYTEUS and self.host_url:
|
|
73
|
+
try:
|
|
74
|
+
host = urlparse(self.host_url).hostname
|
|
75
|
+
return f"{self.name}@{host}"
|
|
76
|
+
except Exception:
|
|
77
|
+
return f"{self.name}@{self.host_url}" # Fallback
|
|
78
|
+
return self.name
|
|
79
|
+
|
|
80
|
+
def create_client(self, config_override: Optional[MultimediaConfig] = None) -> "BaseImageClient":
|
|
81
|
+
"""
|
|
82
|
+
Instantiates the client class for this model.
|
|
83
|
+
"""
|
|
84
|
+
config_to_use = self.default_config
|
|
85
|
+
if config_override:
|
|
86
|
+
from copy import deepcopy
|
|
87
|
+
config_to_use = deepcopy(self.default_config)
|
|
88
|
+
config_to_use.merge_with(config_override)
|
|
89
|
+
|
|
90
|
+
return self.client_class(model=self, config=config_to_use)
|
|
91
|
+
|
|
92
|
+
def __repr__(self):
|
|
93
|
+
return (
|
|
94
|
+
f"ImageModel(identifier='{self.model_identifier}', "
|
|
95
|
+
f"provider='{self.provider.name}', runtime='{self.runtime.value}')"
|
|
96
|
+
)
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
from .multimedia_config import MultimediaConfig
|
|
2
|
+
from .response_types import ImageGenerationResponse, SpeechGenerationResponse
|
|
3
|
+
from .api_utils import load_image_from_url
|
|
4
|
+
|
|
5
|
+
__all__ = [
|
|
6
|
+
"MultimediaConfig",
|
|
7
|
+
"ImageGenerationResponse",
|
|
8
|
+
"SpeechGenerationResponse",
|
|
9
|
+
"load_image_from_url",
|
|
10
|
+
]
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from PIL import Image
|
|
3
|
+
import requests
|
|
4
|
+
|
|
5
|
+
logger = logging.getLogger(__name__)
|
|
6
|
+
|
|
7
|
+
def load_image_from_url(url: str) -> Image.Image:
|
|
8
|
+
"""Loads an image from a URL (http, https, or file path)."""
|
|
9
|
+
try:
|
|
10
|
+
if url.startswith(('http://', 'https://')):
|
|
11
|
+
response = requests.get(url, stream=True)
|
|
12
|
+
response.raise_for_status()
|
|
13
|
+
return Image.open(response.raw)
|
|
14
|
+
else:
|
|
15
|
+
# Assume it's a local file path
|
|
16
|
+
return Image.open(url)
|
|
17
|
+
except Exception as e:
|
|
18
|
+
logger.error(f"Failed to load image from URL/path '{url}': {e}")
|
|
19
|
+
raise
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
from dataclasses import dataclass, field
|
|
2
|
+
from typing import Optional, Dict, Any
|
|
3
|
+
|
|
4
|
+
@dataclass
|
|
5
|
+
class MultimediaConfig:
|
|
6
|
+
"""
|
|
7
|
+
Configuration for multimedia generation, using a flexible dictionary for parameters.
|
|
8
|
+
"""
|
|
9
|
+
params: Dict[str, Any] = field(default_factory=dict)
|
|
10
|
+
|
|
11
|
+
def merge_with(self, override_config: Optional['MultimediaConfig']):
|
|
12
|
+
"""
|
|
13
|
+
Merges parameters from an override config into this one.
|
|
14
|
+
"""
|
|
15
|
+
if override_config and override_config.params:
|
|
16
|
+
self.params.update(override_config.params)
|
|
17
|
+
|
|
18
|
+
@classmethod
|
|
19
|
+
def from_dict(cls, data: Dict[str, Any]) -> 'MultimediaConfig':
|
|
20
|
+
"""
|
|
21
|
+
Creates a MultimediaConfig instance from a dictionary of parameters.
|
|
22
|
+
"""
|
|
23
|
+
return cls(params=data if data is not None else {})
|
|
24
|
+
|
|
25
|
+
def to_dict(self) -> Dict[str, Any]:
|
|
26
|
+
"""
|
|
27
|
+
Returns the configuration parameters as a dictionary.
|
|
28
|
+
"""
|
|
29
|
+
return self.params
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
from dataclasses import dataclass
|
|
2
|
+
from typing import Optional, List
|
|
3
|
+
|
|
4
|
+
@dataclass
|
|
5
|
+
class ImageGenerationResponse:
|
|
6
|
+
"""Response for image generation or editing."""
|
|
7
|
+
image_urls: List[str]
|
|
8
|
+
revised_prompt: Optional[str] = None
|
|
9
|
+
|
|
10
|
+
@dataclass
|
|
11
|
+
class SpeechGenerationResponse:
|
|
12
|
+
"""Response for speech generation (Text-to-Speech)."""
|
|
13
|
+
audio_urls: List[str]
|
autobyteus/tools/__init__.py
CHANGED
|
@@ -25,6 +25,7 @@ from .file.file_writer import file_writer
|
|
|
25
25
|
# General Class-based tools
|
|
26
26
|
from .image_downloader import ImageDownloader
|
|
27
27
|
from .timer import Timer
|
|
28
|
+
from .multimedia.image_tools import GenerateImageTool, EditImageTool
|
|
28
29
|
|
|
29
30
|
# Standalone Browser tools
|
|
30
31
|
from .browser.standalone.google_search_ui import GoogleSearch
|
|
@@ -62,6 +63,8 @@ __all__ = [
|
|
|
62
63
|
# Re-exported general class-based tools
|
|
63
64
|
"ImageDownloader",
|
|
64
65
|
"Timer",
|
|
66
|
+
"GenerateImageTool",
|
|
67
|
+
"EditImageTool",
|
|
65
68
|
|
|
66
69
|
# Re-exported Standalone Browser tools
|
|
67
70
|
"GoogleSearch",
|