autobyteus 1.1.5__py3-none-any.whl → 1.1.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- autobyteus/agent/context/agent_config.py +6 -1
- autobyteus/agent/context/agent_runtime_state.py +7 -1
- autobyteus/agent/handlers/llm_user_message_ready_event_handler.py +30 -7
- autobyteus/agent/handlers/tool_result_event_handler.py +100 -88
- autobyteus/agent/handlers/user_input_message_event_handler.py +22 -25
- autobyteus/agent/llm_response_processor/provider_aware_tool_usage_processor.py +7 -1
- autobyteus/agent/message/__init__.py +7 -5
- autobyteus/agent/message/agent_input_user_message.py +6 -16
- autobyteus/agent/message/context_file.py +24 -24
- autobyteus/agent/message/context_file_type.py +29 -8
- autobyteus/agent/message/multimodal_message_builder.py +47 -0
- autobyteus/agent/streaming/stream_event_payloads.py +23 -4
- autobyteus/agent/system_prompt_processor/tool_manifest_injector_processor.py +6 -2
- autobyteus/agent/tool_invocation.py +27 -2
- autobyteus/agent_team/agent_team_builder.py +22 -1
- autobyteus/agent_team/bootstrap_steps/agent_configuration_preparation_step.py +9 -2
- autobyteus/agent_team/context/agent_team_config.py +1 -0
- autobyteus/agent_team/context/agent_team_runtime_state.py +0 -2
- autobyteus/llm/api/autobyteus_llm.py +33 -33
- autobyteus/llm/api/bedrock_llm.py +13 -5
- autobyteus/llm/api/claude_llm.py +13 -27
- autobyteus/llm/api/gemini_llm.py +108 -42
- autobyteus/llm/api/groq_llm.py +4 -3
- autobyteus/llm/api/mistral_llm.py +97 -51
- autobyteus/llm/api/nvidia_llm.py +6 -5
- autobyteus/llm/api/ollama_llm.py +37 -12
- autobyteus/llm/api/openai_compatible_llm.py +91 -91
- autobyteus/llm/autobyteus_provider.py +1 -1
- autobyteus/llm/base_llm.py +42 -139
- autobyteus/llm/extensions/base_extension.py +6 -6
- autobyteus/llm/extensions/token_usage_tracking_extension.py +3 -2
- autobyteus/llm/llm_factory.py +131 -61
- autobyteus/llm/ollama_provider_resolver.py +1 -0
- autobyteus/llm/providers.py +1 -0
- autobyteus/llm/token_counter/token_counter_factory.py +3 -1
- autobyteus/llm/user_message.py +43 -35
- autobyteus/llm/utils/llm_config.py +34 -18
- autobyteus/llm/utils/media_payload_formatter.py +99 -0
- autobyteus/llm/utils/messages.py +32 -25
- autobyteus/llm/utils/response_types.py +9 -3
- autobyteus/llm/utils/token_usage.py +6 -5
- autobyteus/multimedia/__init__.py +31 -0
- autobyteus/multimedia/audio/__init__.py +11 -0
- autobyteus/multimedia/audio/api/__init__.py +4 -0
- autobyteus/multimedia/audio/api/autobyteus_audio_client.py +59 -0
- autobyteus/multimedia/audio/api/gemini_audio_client.py +219 -0
- autobyteus/multimedia/audio/audio_client_factory.py +120 -0
- autobyteus/multimedia/audio/audio_model.py +97 -0
- autobyteus/multimedia/audio/autobyteus_audio_provider.py +108 -0
- autobyteus/multimedia/audio/base_audio_client.py +40 -0
- autobyteus/multimedia/image/__init__.py +11 -0
- autobyteus/multimedia/image/api/__init__.py +9 -0
- autobyteus/multimedia/image/api/autobyteus_image_client.py +97 -0
- autobyteus/multimedia/image/api/gemini_image_client.py +188 -0
- autobyteus/multimedia/image/api/openai_image_client.py +142 -0
- autobyteus/multimedia/image/autobyteus_image_provider.py +109 -0
- autobyteus/multimedia/image/base_image_client.py +67 -0
- autobyteus/multimedia/image/image_client_factory.py +118 -0
- autobyteus/multimedia/image/image_model.py +97 -0
- autobyteus/multimedia/providers.py +5 -0
- autobyteus/multimedia/runtimes.py +8 -0
- autobyteus/multimedia/utils/__init__.py +10 -0
- autobyteus/multimedia/utils/api_utils.py +19 -0
- autobyteus/multimedia/utils/multimedia_config.py +29 -0
- autobyteus/multimedia/utils/response_types.py +13 -0
- autobyteus/task_management/tools/publish_task_plan.py +4 -16
- autobyteus/task_management/tools/update_task_status.py +4 -19
- autobyteus/tools/__init__.py +5 -4
- autobyteus/tools/base_tool.py +98 -29
- autobyteus/tools/browser/standalone/__init__.py +0 -1
- autobyteus/tools/google_search.py +149 -0
- autobyteus/tools/mcp/schema_mapper.py +29 -71
- autobyteus/tools/multimedia/__init__.py +8 -0
- autobyteus/tools/multimedia/audio_tools.py +116 -0
- autobyteus/tools/multimedia/image_tools.py +186 -0
- autobyteus/tools/parameter_schema.py +82 -89
- autobyteus/tools/pydantic_schema_converter.py +81 -0
- autobyteus/tools/tool_category.py +1 -0
- autobyteus/tools/usage/formatters/default_json_example_formatter.py +89 -20
- autobyteus/tools/usage/formatters/default_xml_example_formatter.py +115 -41
- autobyteus/tools/usage/formatters/default_xml_schema_formatter.py +50 -20
- autobyteus/tools/usage/formatters/gemini_json_example_formatter.py +55 -22
- autobyteus/tools/usage/formatters/google_json_example_formatter.py +54 -21
- autobyteus/tools/usage/formatters/openai_json_example_formatter.py +53 -23
- autobyteus/tools/usage/parsers/default_xml_tool_usage_parser.py +270 -94
- autobyteus/tools/usage/parsers/provider_aware_tool_usage_parser.py +5 -2
- autobyteus/tools/usage/providers/tool_manifest_provider.py +43 -16
- autobyteus/tools/usage/registries/tool_formatting_registry.py +9 -2
- autobyteus/tools/usage/registries/tool_usage_parser_registry.py +9 -2
- autobyteus-1.1.7.dist-info/METADATA +204 -0
- {autobyteus-1.1.5.dist-info → autobyteus-1.1.7.dist-info}/RECORD +98 -71
- examples/run_browser_agent.py +1 -1
- examples/run_google_slides_agent.py +2 -2
- examples/run_mcp_google_slides_client.py +1 -1
- examples/run_sqlite_agent.py +1 -1
- autobyteus/llm/utils/image_payload_formatter.py +0 -89
- autobyteus/tools/ask_user_input.py +0 -40
- autobyteus/tools/browser/standalone/factory/google_search_factory.py +0 -25
- autobyteus/tools/browser/standalone/google_search_ui.py +0 -126
- autobyteus-1.1.5.dist-info/METADATA +0 -161
- {autobyteus-1.1.5.dist-info → autobyteus-1.1.7.dist-info}/WHEEL +0 -0
- {autobyteus-1.1.5.dist-info → autobyteus-1.1.7.dist-info}/licenses/LICENSE +0 -0
- {autobyteus-1.1.5.dist-info → autobyteus-1.1.7.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,97 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from typing import Optional, List, Dict, Any, TYPE_CHECKING
|
|
3
|
+
from autobyteus_llm_client import AutobyteusClient
|
|
4
|
+
from autobyteus.multimedia.image.base_image_client import BaseImageClient
|
|
5
|
+
from autobyteus.multimedia.utils.response_types import ImageGenerationResponse
|
|
6
|
+
|
|
7
|
+
if TYPE_CHECKING:
|
|
8
|
+
from autobyteus.multimedia.image.image_model import ImageModel
|
|
9
|
+
from autobyteus.multimedia.utils.multimedia_config import MultimediaConfig
|
|
10
|
+
|
|
11
|
+
logger = logging.getLogger(__name__)
|
|
12
|
+
|
|
13
|
+
class AutobyteusImageClient(BaseImageClient):
|
|
14
|
+
"""
|
|
15
|
+
An image client that connects to an Autobyteus LLM server instance for image tasks.
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
def __init__(self, model: "ImageModel", config: "MultimediaConfig"):
|
|
19
|
+
super().__init__(model, config)
|
|
20
|
+
if not model.host_url:
|
|
21
|
+
raise ValueError("AutobyteusImageClient requires a host_url in its ImageModel.")
|
|
22
|
+
|
|
23
|
+
self.autobyteus_client = AutobyteusClient(server_url=model.host_url)
|
|
24
|
+
logger.info(f"AutobyteusImageClient initialized for model '{self.model.name}' on host '{model.host_url}'.")
|
|
25
|
+
|
|
26
|
+
async def generate_image(
|
|
27
|
+
self,
|
|
28
|
+
prompt: str,
|
|
29
|
+
input_image_urls: Optional[List[str]] = None,
|
|
30
|
+
generation_config: Optional[Dict[str, Any]] = None
|
|
31
|
+
) -> ImageGenerationResponse:
|
|
32
|
+
"""
|
|
33
|
+
Generates an image by calling the generate_image endpoint on the remote Autobyteus server.
|
|
34
|
+
"""
|
|
35
|
+
# The remote server handles both generation and editing through one endpoint.
|
|
36
|
+
# This method is a unified entry point.
|
|
37
|
+
return await self._call_remote_generate(
|
|
38
|
+
prompt=prompt,
|
|
39
|
+
input_image_urls=input_image_urls,
|
|
40
|
+
mask_url=None, # Not used in pure generation
|
|
41
|
+
generation_config=generation_config
|
|
42
|
+
)
|
|
43
|
+
|
|
44
|
+
async def edit_image(
|
|
45
|
+
self,
|
|
46
|
+
prompt: str,
|
|
47
|
+
input_image_urls: List[str],
|
|
48
|
+
mask_url: Optional[str] = None,
|
|
49
|
+
generation_config: Optional[Dict[str, Any]] = None
|
|
50
|
+
) -> ImageGenerationResponse:
|
|
51
|
+
"""
|
|
52
|
+
Edits an image by calling the generate_image endpoint on the remote Autobyteus server.
|
|
53
|
+
"""
|
|
54
|
+
return await self._call_remote_generate(
|
|
55
|
+
prompt=prompt,
|
|
56
|
+
input_image_urls=input_image_urls,
|
|
57
|
+
mask_url=mask_url,
|
|
58
|
+
generation_config=generation_config
|
|
59
|
+
)
|
|
60
|
+
|
|
61
|
+
async def _call_remote_generate(
|
|
62
|
+
self,
|
|
63
|
+
prompt: str,
|
|
64
|
+
input_image_urls: Optional[List[str]],
|
|
65
|
+
mask_url: Optional[str],
|
|
66
|
+
generation_config: Optional[Dict[str, Any]]
|
|
67
|
+
) -> ImageGenerationResponse:
|
|
68
|
+
"""Internal helper to call the remote server."""
|
|
69
|
+
try:
|
|
70
|
+
logger.info(f"Sending image generation request for model '{self.model.name}' to {self.model.host_url}")
|
|
71
|
+
|
|
72
|
+
# The model name for the remote server is the `value`, not the unique `model_identifier`
|
|
73
|
+
model_name_for_server = self.model.name
|
|
74
|
+
|
|
75
|
+
response_data = await self.autobyteus_client.generate_image(
|
|
76
|
+
model_name=model_name_for_server,
|
|
77
|
+
prompt=prompt,
|
|
78
|
+
input_image_urls=input_image_urls,
|
|
79
|
+
mask_url=mask_url,
|
|
80
|
+
generation_config=generation_config
|
|
81
|
+
)
|
|
82
|
+
|
|
83
|
+
image_urls = response_data.get("image_urls", [])
|
|
84
|
+
if not image_urls:
|
|
85
|
+
raise ValueError("Remote Autobyteus server did not return any image URLs.")
|
|
86
|
+
|
|
87
|
+
return ImageGenerationResponse(image_urls=image_urls)
|
|
88
|
+
|
|
89
|
+
except Exception as e:
|
|
90
|
+
logger.error(f"Error calling Autobyteus server for image generation: {e}")
|
|
91
|
+
raise
|
|
92
|
+
|
|
93
|
+
async def cleanup(self):
|
|
94
|
+
"""Closes the underlying AutobyteusClient."""
|
|
95
|
+
if self.autobyteus_client:
|
|
96
|
+
await self.autobyteus_client.close()
|
|
97
|
+
logger.debug("AutobyteusImageClient cleaned up.")
|
|
@@ -0,0 +1,188 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
import base64
|
|
3
|
+
import logging
|
|
4
|
+
import mimetypes
|
|
5
|
+
import os
|
|
6
|
+
from typing import Optional, List, Dict, Any, TYPE_CHECKING
|
|
7
|
+
|
|
8
|
+
# ✅ Legacy Gemini SDK (as requested)
|
|
9
|
+
import google.generativeai as genai
|
|
10
|
+
import requests
|
|
11
|
+
|
|
12
|
+
from autobyteus.multimedia.image.base_image_client import BaseImageClient
|
|
13
|
+
from autobyteus.multimedia.utils.response_types import ImageGenerationResponse
|
|
14
|
+
|
|
15
|
+
if TYPE_CHECKING:
|
|
16
|
+
from autobyteus.multimedia.image.image_model import ImageModel
|
|
17
|
+
from autobyteus.multimedia.utils.multimedia_config import MultimediaConfig
|
|
18
|
+
|
|
19
|
+
logger = logging.getLogger(__name__)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def _data_uri(mime_type: str, raw: bytes) -> str:
|
|
23
|
+
"""Convert raw bytes to a data URI."""
|
|
24
|
+
b64 = base64.b64encode(raw).decode("utf-8")
|
|
25
|
+
return f"data:{mime_type};base64,{b64}"
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def _guess_mime_from_url(url: str) -> str:
|
|
29
|
+
"""Best-effort MIME guess from URL; fall back to image/jpeg."""
|
|
30
|
+
mime, _ = mimetypes.guess_type(url)
|
|
31
|
+
return mime or "image/jpeg"
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def _fetch_image_part(url: str) -> Dict[str, Any]:
|
|
35
|
+
"""
|
|
36
|
+
Download an image and return an inline-data Part compatible with the legacy SDK:
|
|
37
|
+
{ "mime_type": "...", "data": <bytes> }
|
|
38
|
+
"""
|
|
39
|
+
resp = requests.get(url, timeout=30)
|
|
40
|
+
resp.raise_for_status()
|
|
41
|
+
mime = resp.headers.get("Content-Type") or _guess_mime_from_url(url)
|
|
42
|
+
return {"mime_type": mime.split(";")[0], "data": resp.content}
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def _extract_inline_images(response) -> List[Dict[str, bytes]]:
|
|
46
|
+
"""
|
|
47
|
+
Collect inline image parts from the legacy SDK response.
|
|
48
|
+
Returns list of { "mime_type": str, "data": bytes }.
|
|
49
|
+
"""
|
|
50
|
+
images = []
|
|
51
|
+
try:
|
|
52
|
+
candidates = getattr(response, "candidates", []) or []
|
|
53
|
+
if not candidates:
|
|
54
|
+
return images
|
|
55
|
+
|
|
56
|
+
parts = candidates[0].content.parts if candidates[0].content else []
|
|
57
|
+
for p in parts:
|
|
58
|
+
inline = getattr(p, "inline_data", None)
|
|
59
|
+
if not inline:
|
|
60
|
+
continue
|
|
61
|
+
mime = getattr(inline, "mime_type", "") or ""
|
|
62
|
+
if not mime.startswith("image/"):
|
|
63
|
+
continue
|
|
64
|
+
|
|
65
|
+
data = getattr(inline, "data", None)
|
|
66
|
+
if isinstance(data, bytes):
|
|
67
|
+
images.append({"mime_type": mime, "data": data})
|
|
68
|
+
elif isinstance(data, str):
|
|
69
|
+
# Some bindings expose base64 text
|
|
70
|
+
images.append({"mime_type": mime, "data": base64.b64decode(data)})
|
|
71
|
+
except Exception as e:
|
|
72
|
+
logger.error("Failed to parse inline image(s): %s", e)
|
|
73
|
+
raise
|
|
74
|
+
return images
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
class GeminiImageClient(BaseImageClient):
|
|
78
|
+
"""
|
|
79
|
+
Image generation client using Google's legacy SDK (`google.generativeai`).
|
|
80
|
+
|
|
81
|
+
Notes:
|
|
82
|
+
- We configure `response_mime_type='image/png'` to request image output.
|
|
83
|
+
- You can guide generation with input images by passing URLs; they’re added as inline image Parts.
|
|
84
|
+
- This runs the blocking SDK call in a worker thread to keep your async API.
|
|
85
|
+
"""
|
|
86
|
+
|
|
87
|
+
def __init__(self, model: "ImageModel", config: "MultimediaConfig"):
|
|
88
|
+
super().__init__(model, config)
|
|
89
|
+
|
|
90
|
+
api_key = os.getenv("GEMINI_API_KEY")
|
|
91
|
+
if not api_key:
|
|
92
|
+
raise ValueError("Please set the GEMINI_API_KEY environment variable.")
|
|
93
|
+
|
|
94
|
+
try:
|
|
95
|
+
genai.configure(api_key=api_key)
|
|
96
|
+
# `self.model.value` should be an image-capable model.
|
|
97
|
+
# Examples (subject to availability): "imagen-3.0-generate", "imagen-3.0-fast",
|
|
98
|
+
# or Gemini image-preview models that support image output.
|
|
99
|
+
model_name = self.model.value or "imagen-3.0-generate"
|
|
100
|
+
self._model = genai.GenerativeModel(model_name)
|
|
101
|
+
logger.info("GeminiImageClient (legacy SDK) initialized for model '%s'.", model_name)
|
|
102
|
+
except Exception as e:
|
|
103
|
+
logger.error("Failed to initialize Gemini image client: %s", e)
|
|
104
|
+
raise RuntimeError(f"Failed to initialize Gemini image client: {e}")
|
|
105
|
+
|
|
106
|
+
async def generate_image(
|
|
107
|
+
self,
|
|
108
|
+
prompt: str,
|
|
109
|
+
input_image_urls: Optional[List[str]] = None,
|
|
110
|
+
generation_config: Optional[Dict[str, Any]] = None
|
|
111
|
+
) -> ImageGenerationResponse:
|
|
112
|
+
"""
|
|
113
|
+
Generate an image (text→image or image-guided).
|
|
114
|
+
|
|
115
|
+
`generation_config` supports common fields; we always ensure
|
|
116
|
+
`response_mime_type='image/png'` so the SDK returns inline image bytes.
|
|
117
|
+
"""
|
|
118
|
+
try:
|
|
119
|
+
logger.info("Generating image with model '%s'...", self._model.model_name)
|
|
120
|
+
|
|
121
|
+
# Build contents array: [text, (optional) image parts...]
|
|
122
|
+
contents: List[Any] = [prompt]
|
|
123
|
+
|
|
124
|
+
if input_image_urls:
|
|
125
|
+
logger.info("Loading %d input image(s) for guidance...", len(input_image_urls))
|
|
126
|
+
for url in input_image_urls:
|
|
127
|
+
try:
|
|
128
|
+
contents.append(_fetch_image_part(url))
|
|
129
|
+
except Exception as e:
|
|
130
|
+
logger.error("Skipping image '%s' due to error: %s", url, e)
|
|
131
|
+
|
|
132
|
+
# Merge config and force image output
|
|
133
|
+
gen_cfg: Dict[str, Any] = (generation_config or {}).copy()
|
|
134
|
+
gen_cfg["response_mime_type"] = gen_cfg.get("response_mime_type", "image/png")
|
|
135
|
+
|
|
136
|
+
# Call the (sync) SDK in a worker thread
|
|
137
|
+
response = await asyncio.to_thread(
|
|
138
|
+
self._model.generate_content,
|
|
139
|
+
contents,
|
|
140
|
+
generation_config=gen_cfg,
|
|
141
|
+
)
|
|
142
|
+
|
|
143
|
+
# Handle safety blocks if present
|
|
144
|
+
feedback = getattr(response, "prompt_feedback", None)
|
|
145
|
+
block_reason = getattr(feedback, "block_reason", None)
|
|
146
|
+
if block_reason:
|
|
147
|
+
reason = getattr(block_reason, "name", str(block_reason))
|
|
148
|
+
logger.error("Image generation blocked by safety settings: %s", reason)
|
|
149
|
+
raise ValueError(f"Image generation failed due to safety settings: {reason}")
|
|
150
|
+
|
|
151
|
+
images = _extract_inline_images(response)
|
|
152
|
+
if not images:
|
|
153
|
+
logger.warning("No image parts returned for prompt: '%.100s...'", prompt)
|
|
154
|
+
raise ValueError("Gemini API did not return any images.")
|
|
155
|
+
|
|
156
|
+
image_urls = [_data_uri(img["mime_type"], img["data"]) for img in images]
|
|
157
|
+
logger.info("Successfully generated %d image(s).", len(image_urls))
|
|
158
|
+
|
|
159
|
+
return ImageGenerationResponse(
|
|
160
|
+
image_urls=image_urls,
|
|
161
|
+
revised_prompt=None # legacy SDK does not provide a revised prompt here
|
|
162
|
+
)
|
|
163
|
+
|
|
164
|
+
except Exception as e:
|
|
165
|
+
logger.error("Error during Gemini image generation (legacy SDK): %s", e)
|
|
166
|
+
# Region support / feature gating errors sometimes include 'Unsupported' hints.
|
|
167
|
+
if "Unsupported" in str(e) and "location" in str(e):
|
|
168
|
+
raise ValueError(
|
|
169
|
+
"Image generation may not be supported in your configured region or project. "
|
|
170
|
+
"Check your API access and region settings."
|
|
171
|
+
)
|
|
172
|
+
raise ValueError(f"Google Gemini image generation failed: {str(e)}")
|
|
173
|
+
|
|
174
|
+
async def edit_image(
|
|
175
|
+
self,
|
|
176
|
+
prompt: str,
|
|
177
|
+
input_image_urls: List[str],
|
|
178
|
+
mask_url: Optional[str] = None,
|
|
179
|
+
generation_config: Optional[Dict[str, Any]] = None
|
|
180
|
+
) -> ImageGenerationResponse:
|
|
181
|
+
"""
|
|
182
|
+
Image editing/redraw with masks isn’t exposed via this legacy path here.
|
|
183
|
+
"""
|
|
184
|
+
logger.error("Image editing is not supported by the GeminiImageClient (legacy SDK).")
|
|
185
|
+
raise NotImplementedError("The GeminiImageClient does not support the edit_image method.")
|
|
186
|
+
|
|
187
|
+
async def cleanup(self):
|
|
188
|
+
logger.debug("GeminiImageClient cleanup called (legacy SDK; nothing to release).")
|
|
@@ -0,0 +1,142 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
import os
|
|
3
|
+
from typing import Optional, List, Dict, Any, TYPE_CHECKING
|
|
4
|
+
from openai import OpenAI
|
|
5
|
+
from autobyteus.multimedia.image.base_image_client import BaseImageClient
|
|
6
|
+
from autobyteus.multimedia.utils.response_types import ImageGenerationResponse
|
|
7
|
+
|
|
8
|
+
if TYPE_CHECKING:
|
|
9
|
+
from autobyteus.multimedia.image.image_model import ImageModel
|
|
10
|
+
from autobyteus.multimedia.utils.multimedia_config import MultimediaConfig
|
|
11
|
+
|
|
12
|
+
logger = logging.getLogger(__name__)
|
|
13
|
+
|
|
14
|
+
class OpenAIImageClient(BaseImageClient):
|
|
15
|
+
"""
|
|
16
|
+
An image client that uses OpenAI's DALL-E models.
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
def __init__(self, model: "ImageModel", config: "MultimediaConfig"):
|
|
20
|
+
super().__init__(model, config)
|
|
21
|
+
api_key = os.getenv("OPENAI_API_KEY")
|
|
22
|
+
if not api_key:
|
|
23
|
+
logger.error("OPENAI_API_KEY environment variable is not set.")
|
|
24
|
+
raise ValueError("OPENAI_API_KEY environment variable is not set.")
|
|
25
|
+
|
|
26
|
+
self.client = OpenAI(api_key=api_key, base_url="https://api.openai.com/v1")
|
|
27
|
+
logger.info(f"OpenAIImageClient initialized for model '{self.model.name}'.")
|
|
28
|
+
|
|
29
|
+
async def generate_image(
|
|
30
|
+
self,
|
|
31
|
+
prompt: str,
|
|
32
|
+
input_image_urls: Optional[List[str]] = None,
|
|
33
|
+
generation_config: Optional[Dict[str, Any]] = None
|
|
34
|
+
) -> ImageGenerationResponse:
|
|
35
|
+
"""
|
|
36
|
+
Generates an image using an OpenAI DALL-E model via the v1/images/generations endpoint.
|
|
37
|
+
Note: This endpoint does not support image inputs, even for multimodal models like gpt-image-1.
|
|
38
|
+
"""
|
|
39
|
+
if input_image_urls:
|
|
40
|
+
logger.warning(
|
|
41
|
+
f"The OpenAI `images.generate` API used by this client does not support input images. "
|
|
42
|
+
f"The images provided for model '{self.model.value}' will be ignored. "
|
|
43
|
+
f"To use image inputs, a client based on the Chat Completions API is required."
|
|
44
|
+
)
|
|
45
|
+
|
|
46
|
+
try:
|
|
47
|
+
image_model = self.model.value
|
|
48
|
+
logger.info(f"Generating image with OpenAI model '{image_model}' and prompt: '{prompt[:50]}...'")
|
|
49
|
+
|
|
50
|
+
# Combine default config with any overrides
|
|
51
|
+
final_config = self.config.to_dict().copy()
|
|
52
|
+
if generation_config:
|
|
53
|
+
final_config.update(generation_config)
|
|
54
|
+
|
|
55
|
+
response = self.client.images.generate(
|
|
56
|
+
model=image_model,
|
|
57
|
+
prompt=prompt,
|
|
58
|
+
n=final_config.get("n", 1),
|
|
59
|
+
size=final_config.get("size", "1024x1024"),
|
|
60
|
+
quality=final_config.get("quality", "standard"),
|
|
61
|
+
style=final_config.get("style", "vivid"),
|
|
62
|
+
response_format="url"
|
|
63
|
+
)
|
|
64
|
+
|
|
65
|
+
image_urls_list: List[str] = [img.url for img in response.data if img.url]
|
|
66
|
+
revised_prompt: Optional[str] = response.data[0].revised_prompt if response.data and hasattr(response.data[0], 'revised_prompt') else None
|
|
67
|
+
|
|
68
|
+
if not image_urls_list:
|
|
69
|
+
raise ValueError("OpenAI API did not return any image URLs.")
|
|
70
|
+
|
|
71
|
+
logger.info(f"Successfully generated {len(image_urls_list)} image(s).")
|
|
72
|
+
|
|
73
|
+
return ImageGenerationResponse(
|
|
74
|
+
image_urls=image_urls_list,
|
|
75
|
+
revised_prompt=revised_prompt
|
|
76
|
+
)
|
|
77
|
+
except Exception as e:
|
|
78
|
+
logger.error(f"Error during OpenAI image generation: {str(e)}")
|
|
79
|
+
raise ValueError(f"OpenAI image generation failed: {str(e)}")
|
|
80
|
+
|
|
81
|
+
async def edit_image(
|
|
82
|
+
self,
|
|
83
|
+
prompt: str,
|
|
84
|
+
input_image_urls: List[str],
|
|
85
|
+
mask_url: Optional[str] = None,
|
|
86
|
+
generation_config: Optional[Dict[str, Any]] = None
|
|
87
|
+
) -> ImageGenerationResponse:
|
|
88
|
+
"""
|
|
89
|
+
Edits an image using an OpenAI model that supports the v1/images/edits endpoint.
|
|
90
|
+
"""
|
|
91
|
+
if not input_image_urls:
|
|
92
|
+
raise ValueError("At least one input image URL must be provided for editing.")
|
|
93
|
+
|
|
94
|
+
source_image_url = input_image_urls[0]
|
|
95
|
+
if len(input_image_urls) > 1:
|
|
96
|
+
logger.warning(f"OpenAI edit endpoint only supports one input image. Using '{source_image_url}' and ignoring the rest.")
|
|
97
|
+
|
|
98
|
+
try:
|
|
99
|
+
logger.info(f"Editing image '{source_image_url}' with prompt: '{prompt[:50]}...'")
|
|
100
|
+
|
|
101
|
+
# Combine default config with any overrides
|
|
102
|
+
final_config = self.config.to_dict().copy()
|
|
103
|
+
if generation_config:
|
|
104
|
+
final_config.update(generation_config)
|
|
105
|
+
|
|
106
|
+
with open(source_image_url, "rb") as image_file:
|
|
107
|
+
mask_file = open(mask_url, "rb") if mask_url else None
|
|
108
|
+
try:
|
|
109
|
+
response = self.client.images.edit(
|
|
110
|
+
image=image_file,
|
|
111
|
+
mask=mask_file,
|
|
112
|
+
prompt=prompt,
|
|
113
|
+
model=self.model.value,
|
|
114
|
+
n=final_config.get("n", 1),
|
|
115
|
+
size=final_config.get("size", "1024x1024"),
|
|
116
|
+
response_format="url"
|
|
117
|
+
)
|
|
118
|
+
finally:
|
|
119
|
+
if mask_file:
|
|
120
|
+
mask_file.close()
|
|
121
|
+
|
|
122
|
+
image_urls_list: List[str] = [img.url for img in response.data if img.url]
|
|
123
|
+
if not image_urls_list:
|
|
124
|
+
raise ValueError("OpenAI API did not return any edited image URLs.")
|
|
125
|
+
|
|
126
|
+
logger.info(f"Successfully edited image, generated {len(image_urls_list)} version(s).")
|
|
127
|
+
return ImageGenerationResponse(image_urls=image_urls_list)
|
|
128
|
+
|
|
129
|
+
except FileNotFoundError as e:
|
|
130
|
+
logger.error(f"Image file not found for editing: {e.filename}")
|
|
131
|
+
raise
|
|
132
|
+
except Exception as e:
|
|
133
|
+
logger.error(f"Error during OpenAI image editing: {str(e)}")
|
|
134
|
+
# The API might return a 400 Bad Request if the model doesn't support edits
|
|
135
|
+
if "does not support image editing" in str(e):
|
|
136
|
+
raise ValueError(f"The model '{self.model.value}' does not support the image editing endpoint.")
|
|
137
|
+
raise ValueError(f"OpenAI image editing failed: {str(e)}")
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
async def cleanup(self):
|
|
141
|
+
# The OpenAI client does not require explicit cleanup of a session.
|
|
142
|
+
logger.debug("OpenAIImageClient cleanup called.")
|
|
@@ -0,0 +1,109 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from typing import Dict, Any, List
|
|
3
|
+
import os
|
|
4
|
+
from urllib.parse import urlparse
|
|
5
|
+
|
|
6
|
+
from autobyteus_llm_client import AutobyteusClient
|
|
7
|
+
from autobyteus.multimedia.image.api.autobyteus_image_client import AutobyteusImageClient
|
|
8
|
+
from autobyteus.multimedia.image.image_model import ImageModel
|
|
9
|
+
from autobyteus.multimedia.providers import MultimediaProvider
|
|
10
|
+
from autobyteus.multimedia.runtimes import MultimediaRuntime
|
|
11
|
+
|
|
12
|
+
logger = logging.getLogger(__name__)
|
|
13
|
+
|
|
14
|
+
class AutobyteusImageModelProvider:
|
|
15
|
+
"""
|
|
16
|
+
Discovers and registers image models from remote Autobyteus server instances.
|
|
17
|
+
"""
|
|
18
|
+
DEFAULT_SERVER_URL = 'http://localhost:8000'
|
|
19
|
+
|
|
20
|
+
@staticmethod
|
|
21
|
+
def _get_hosts() -> List[str]:
|
|
22
|
+
"""Gets Autobyteus server hosts from env vars."""
|
|
23
|
+
hosts_str = os.getenv('AUTOBYTEUS_LLM_SERVER_HOSTS')
|
|
24
|
+
if hosts_str:
|
|
25
|
+
return [host.strip() for host in hosts_str.split(',')]
|
|
26
|
+
|
|
27
|
+
legacy_host = os.getenv('AUTOBYTEUS_LLM_SERVER_URL')
|
|
28
|
+
if legacy_host:
|
|
29
|
+
return [legacy_host]
|
|
30
|
+
|
|
31
|
+
return [AutobyteusImageModelProvider.DEFAULT_SERVER_URL]
|
|
32
|
+
|
|
33
|
+
@staticmethod
|
|
34
|
+
def discover_and_register():
|
|
35
|
+
"""Discover and register image models from all configured hosts."""
|
|
36
|
+
try:
|
|
37
|
+
from autobyteus.multimedia.image.image_client_factory import ImageClientFactory
|
|
38
|
+
|
|
39
|
+
hosts = AutobyteusImageModelProvider._get_hosts()
|
|
40
|
+
total_registered_count = 0
|
|
41
|
+
|
|
42
|
+
for host_url in hosts:
|
|
43
|
+
if not AutobyteusImageModelProvider.is_valid_url(host_url):
|
|
44
|
+
logger.error(f"Invalid Autobyteus host URL for image model discovery: {host_url}, skipping.")
|
|
45
|
+
continue
|
|
46
|
+
|
|
47
|
+
logger.info(f"Discovering image models from host: {host_url}")
|
|
48
|
+
client = None
|
|
49
|
+
try:
|
|
50
|
+
client = AutobyteusClient(server_url=host_url)
|
|
51
|
+
response = client.get_available_image_models_sync()
|
|
52
|
+
except Exception as e:
|
|
53
|
+
logger.warning(f"Could not fetch models from Autobyteus server at {host_url}: {e}")
|
|
54
|
+
continue
|
|
55
|
+
finally:
|
|
56
|
+
if client:
|
|
57
|
+
client.sync_client.close()
|
|
58
|
+
|
|
59
|
+
if not response.get('models'):
|
|
60
|
+
logger.info(f"No image models found on host {host_url}.")
|
|
61
|
+
continue
|
|
62
|
+
|
|
63
|
+
models = response.get('models', [])
|
|
64
|
+
host_registered_count = 0
|
|
65
|
+
for model_info in models:
|
|
66
|
+
try:
|
|
67
|
+
if not all(k in model_info for k in ["name", "value", "provider"]):
|
|
68
|
+
logger.warning(f"Skipping malformed image model from {host_url}: {model_info}")
|
|
69
|
+
continue
|
|
70
|
+
|
|
71
|
+
# Heuristic to ensure it's an image model if the server doesn't specify modality
|
|
72
|
+
if "parameter_schema" not in model_info:
|
|
73
|
+
logger.debug(f"Skipping model from {host_url} as it lacks a parameter schema, likely not an image model: {model_info.get('name')}")
|
|
74
|
+
continue
|
|
75
|
+
|
|
76
|
+
image_model = ImageModel(
|
|
77
|
+
name=model_info["name"],
|
|
78
|
+
value=model_info["value"],
|
|
79
|
+
provider=MultimediaProvider(model_info["provider"]),
|
|
80
|
+
client_class=AutobyteusImageClient,
|
|
81
|
+
runtime=MultimediaRuntime.AUTOBYTEUS,
|
|
82
|
+
host_url=host_url,
|
|
83
|
+
parameter_schema=model_info.get("parameter_schema")
|
|
84
|
+
)
|
|
85
|
+
|
|
86
|
+
ImageClientFactory.register_model(image_model)
|
|
87
|
+
host_registered_count += 1
|
|
88
|
+
|
|
89
|
+
except Exception as e:
|
|
90
|
+
logger.error(f"Failed to register image model '{model_info.get('name')}' from {host_url}: {e}")
|
|
91
|
+
|
|
92
|
+
if host_registered_count > 0:
|
|
93
|
+
logger.info(f"Registered {host_registered_count} image models from Autobyteus host {host_url}")
|
|
94
|
+
total_registered_count += host_registered_count
|
|
95
|
+
|
|
96
|
+
if total_registered_count > 0:
|
|
97
|
+
logger.info(f"Finished Autobyteus image model discovery. Total models registered: {total_registered_count}")
|
|
98
|
+
|
|
99
|
+
except Exception as e:
|
|
100
|
+
logger.error(f"An unexpected error occurred during Autobyteus image model discovery: {e}", exc_info=True)
|
|
101
|
+
|
|
102
|
+
@staticmethod
|
|
103
|
+
def is_valid_url(url: str) -> bool:
|
|
104
|
+
"""Validate URL format"""
|
|
105
|
+
try:
|
|
106
|
+
result = urlparse(url)
|
|
107
|
+
return all([result.scheme, result.netloc])
|
|
108
|
+
except Exception:
|
|
109
|
+
return False
|
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
from abc import ABC, abstractmethod
|
|
3
|
+
from typing import Optional, Dict, Any, List, TYPE_CHECKING
|
|
4
|
+
from autobyteus.multimedia.utils.response_types import ImageGenerationResponse
|
|
5
|
+
|
|
6
|
+
if TYPE_CHECKING:
|
|
7
|
+
from autobyteus.multimedia.image.image_model import ImageModel
|
|
8
|
+
from autobyteus.multimedia.utils.multimedia_config import MultimediaConfig
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class BaseImageClient(ABC):
|
|
12
|
+
"""
|
|
13
|
+
Abstract base class for image clients that connect to models for image generation and editing.
|
|
14
|
+
"""
|
|
15
|
+
def __init__(self, model: "ImageModel", config: "MultimediaConfig"):
|
|
16
|
+
self.model = model
|
|
17
|
+
self.config = config
|
|
18
|
+
|
|
19
|
+
@abstractmethod
|
|
20
|
+
async def generate_image(
|
|
21
|
+
self,
|
|
22
|
+
prompt: str,
|
|
23
|
+
input_image_urls: Optional[List[str]] = None,
|
|
24
|
+
generation_config: Optional[Dict[str, Any]] = None
|
|
25
|
+
) -> ImageGenerationResponse:
|
|
26
|
+
"""
|
|
27
|
+
Generates an image based on a textual prompt.
|
|
28
|
+
|
|
29
|
+
Args:
|
|
30
|
+
prompt (str): The text prompt describing the image to generate.
|
|
31
|
+
input_image_urls (Optional[List[str]]): A list of URLs or local paths to input images
|
|
32
|
+
for image-to-image generation.
|
|
33
|
+
generation_config (Optional[Dict[str, Any]]): Provider-specific parameters for image generation
|
|
34
|
+
to override defaults.
|
|
35
|
+
(e.g., n, size, quality, style).
|
|
36
|
+
|
|
37
|
+
Returns:
|
|
38
|
+
ImageGenerationResponse: An object containing URLs to the generated images.
|
|
39
|
+
"""
|
|
40
|
+
pass
|
|
41
|
+
|
|
42
|
+
@abstractmethod
|
|
43
|
+
async def edit_image(
|
|
44
|
+
self,
|
|
45
|
+
prompt: str,
|
|
46
|
+
input_image_urls: List[str],
|
|
47
|
+
mask_url: Optional[str] = None,
|
|
48
|
+
generation_config: Optional[Dict[str, Any]] = None
|
|
49
|
+
) -> ImageGenerationResponse:
|
|
50
|
+
"""
|
|
51
|
+
Edits an existing image based on a textual prompt.
|
|
52
|
+
|
|
53
|
+
Args:
|
|
54
|
+
prompt (str): A text prompt describing the desired edits.
|
|
55
|
+
input_image_urls (List[str]): The path(s) or URL(s) to the source image(s) to edit.
|
|
56
|
+
mask_url (Optional[str]): The path to a mask image. The transparent areas of the mask
|
|
57
|
+
indicate where the image should be edited.
|
|
58
|
+
generation_config (Optional[Dict[str, Any]]): Provider-specific parameters.
|
|
59
|
+
|
|
60
|
+
Returns:
|
|
61
|
+
ImageGenerationResponse: An object containing URLs to the edited images.
|
|
62
|
+
"""
|
|
63
|
+
pass
|
|
64
|
+
|
|
65
|
+
async def cleanup(self):
|
|
66
|
+
"""Optional cleanup method for resources like network clients."""
|
|
67
|
+
pass
|