autobyteus 1.1.8__py3-none-any.whl → 1.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- autobyteus/agent/bootstrap_steps/system_prompt_processing_step.py +6 -2
- autobyteus/agent/handlers/inter_agent_message_event_handler.py +17 -19
- autobyteus/agent/handlers/llm_complete_response_received_event_handler.py +6 -3
- autobyteus/agent/handlers/tool_result_event_handler.py +61 -18
- autobyteus/agent/handlers/user_input_message_event_handler.py +19 -10
- autobyteus/agent/hooks/base_phase_hook.py +17 -0
- autobyteus/agent/hooks/hook_registry.py +15 -27
- autobyteus/agent/input_processor/base_user_input_processor.py +17 -1
- autobyteus/agent/input_processor/processor_registry.py +15 -27
- autobyteus/agent/llm_response_processor/base_processor.py +17 -1
- autobyteus/agent/llm_response_processor/processor_registry.py +15 -24
- autobyteus/agent/llm_response_processor/provider_aware_tool_usage_processor.py +14 -0
- autobyteus/agent/message/agent_input_user_message.py +15 -2
- autobyteus/agent/message/send_message_to.py +1 -1
- autobyteus/agent/processor_option.py +17 -0
- autobyteus/agent/sender_type.py +1 -0
- autobyteus/agent/system_prompt_processor/base_processor.py +17 -1
- autobyteus/agent/system_prompt_processor/processor_registry.py +15 -27
- autobyteus/agent/system_prompt_processor/tool_manifest_injector_processor.py +10 -0
- autobyteus/agent/tool_execution_result_processor/base_processor.py +17 -1
- autobyteus/agent/tool_execution_result_processor/processor_registry.py +15 -1
- autobyteus/agent/workspace/base_workspace.py +1 -1
- autobyteus/agent/workspace/workspace_definition.py +1 -1
- autobyteus/agent_team/bootstrap_steps/team_context_initialization_step.py +1 -1
- autobyteus/agent_team/streaming/agent_team_stream_event_payloads.py +2 -2
- autobyteus/agent_team/task_notification/__init__.py +4 -0
- autobyteus/agent_team/task_notification/activation_policy.py +70 -0
- autobyteus/agent_team/task_notification/system_event_driven_agent_task_notifier.py +56 -122
- autobyteus/agent_team/task_notification/task_activator.py +66 -0
- autobyteus/cli/agent_team_tui/state.py +17 -20
- autobyteus/cli/agent_team_tui/widgets/focus_pane.py +1 -1
- autobyteus/cli/agent_team_tui/widgets/task_board_panel.py +1 -1
- autobyteus/clients/__init__.py +10 -0
- autobyteus/clients/autobyteus_client.py +318 -0
- autobyteus/clients/cert_utils.py +105 -0
- autobyteus/clients/certificates/cert.pem +34 -0
- autobyteus/events/event_types.py +2 -2
- autobyteus/llm/api/autobyteus_llm.py +1 -1
- autobyteus/llm/api/gemini_llm.py +45 -54
- autobyteus/llm/api/qwen_llm.py +25 -0
- autobyteus/llm/api/zhipu_llm.py +26 -0
- autobyteus/llm/autobyteus_provider.py +9 -3
- autobyteus/llm/llm_factory.py +39 -0
- autobyteus/llm/ollama_provider_resolver.py +1 -0
- autobyteus/llm/providers.py +1 -0
- autobyteus/llm/token_counter/token_counter_factory.py +3 -0
- autobyteus/llm/token_counter/zhipu_token_counter.py +24 -0
- autobyteus/multimedia/audio/api/autobyteus_audio_client.py +5 -2
- autobyteus/multimedia/audio/api/gemini_audio_client.py +84 -153
- autobyteus/multimedia/audio/audio_client_factory.py +47 -22
- autobyteus/multimedia/audio/audio_model.py +13 -6
- autobyteus/multimedia/audio/autobyteus_audio_provider.py +9 -3
- autobyteus/multimedia/audio/base_audio_client.py +3 -1
- autobyteus/multimedia/image/api/autobyteus_image_client.py +13 -6
- autobyteus/multimedia/image/api/gemini_image_client.py +72 -130
- autobyteus/multimedia/image/api/openai_image_client.py +4 -2
- autobyteus/multimedia/image/autobyteus_image_provider.py +9 -3
- autobyteus/multimedia/image/base_image_client.py +6 -2
- autobyteus/multimedia/image/image_client_factory.py +20 -19
- autobyteus/multimedia/image/image_model.py +13 -6
- autobyteus/multimedia/providers.py +1 -0
- autobyteus/task_management/__init__.py +10 -10
- autobyteus/task_management/base_task_board.py +14 -6
- autobyteus/task_management/converters/__init__.py +0 -2
- autobyteus/task_management/converters/task_board_converter.py +7 -16
- autobyteus/task_management/events.py +6 -6
- autobyteus/task_management/in_memory_task_board.py +48 -38
- autobyteus/task_management/schemas/__init__.py +2 -2
- autobyteus/task_management/schemas/{plan_definition.py → task_definition.py} +6 -7
- autobyteus/task_management/schemas/task_status_report.py +1 -2
- autobyteus/task_management/task.py +60 -0
- autobyteus/task_management/tools/__init__.py +6 -2
- autobyteus/task_management/tools/assign_task_to.py +125 -0
- autobyteus/task_management/tools/get_my_tasks.py +80 -0
- autobyteus/task_management/tools/get_task_board_status.py +3 -3
- autobyteus/task_management/tools/publish_task.py +77 -0
- autobyteus/task_management/tools/publish_tasks.py +74 -0
- autobyteus/task_management/tools/update_task_status.py +5 -5
- autobyteus/tools/__init__.py +54 -16
- autobyteus/tools/base_tool.py +4 -4
- autobyteus/tools/browser/session_aware/browser_session_aware_navigate_to.py +1 -1
- autobyteus/tools/browser/session_aware/browser_session_aware_web_element_trigger.py +1 -1
- autobyteus/tools/browser/session_aware/browser_session_aware_webpage_reader.py +1 -1
- autobyteus/tools/browser/session_aware/browser_session_aware_webpage_screenshot_taker.py +1 -1
- autobyteus/tools/browser/standalone/navigate_to.py +1 -1
- autobyteus/tools/browser/standalone/web_page_pdf_generator.py +1 -1
- autobyteus/tools/browser/standalone/webpage_image_downloader.py +1 -1
- autobyteus/tools/browser/standalone/webpage_reader.py +1 -1
- autobyteus/tools/browser/standalone/webpage_screenshot_taker.py +1 -1
- autobyteus/tools/download_media_tool.py +136 -0
- autobyteus/tools/file/file_editor.py +200 -0
- autobyteus/tools/functional_tool.py +1 -1
- autobyteus/tools/google_search.py +1 -1
- autobyteus/tools/mcp/factory.py +1 -1
- autobyteus/tools/mcp/schema_mapper.py +1 -1
- autobyteus/tools/mcp/tool.py +1 -1
- autobyteus/tools/multimedia/__init__.py +2 -0
- autobyteus/tools/multimedia/audio_tools.py +10 -20
- autobyteus/tools/multimedia/image_tools.py +21 -22
- autobyteus/tools/multimedia/media_reader_tool.py +117 -0
- autobyteus/tools/pydantic_schema_converter.py +1 -1
- autobyteus/tools/registry/tool_definition.py +1 -1
- autobyteus/tools/timer.py +1 -1
- autobyteus/tools/tool_meta.py +1 -1
- autobyteus/tools/usage/formatters/default_json_example_formatter.py +1 -1
- autobyteus/tools/usage/formatters/default_xml_example_formatter.py +1 -1
- autobyteus/tools/usage/formatters/default_xml_schema_formatter.py +59 -3
- autobyteus/tools/usage/formatters/gemini_json_example_formatter.py +1 -1
- autobyteus/tools/usage/formatters/google_json_example_formatter.py +1 -1
- autobyteus/tools/usage/formatters/openai_json_example_formatter.py +1 -1
- autobyteus/tools/usage/parsers/_string_decoders.py +18 -0
- autobyteus/tools/usage/parsers/default_json_tool_usage_parser.py +9 -1
- autobyteus/tools/usage/parsers/default_xml_tool_usage_parser.py +15 -1
- autobyteus/tools/usage/parsers/gemini_json_tool_usage_parser.py +4 -1
- autobyteus/tools/usage/parsers/openai_json_tool_usage_parser.py +4 -1
- autobyteus/{tools → utils}/parameter_schema.py +1 -1
- {autobyteus-1.1.8.dist-info → autobyteus-1.2.0.dist-info}/METADATA +4 -3
- {autobyteus-1.1.8.dist-info → autobyteus-1.2.0.dist-info}/RECORD +122 -108
- examples/run_poem_writer.py +1 -1
- autobyteus/task_management/converters/task_plan_converter.py +0 -48
- autobyteus/task_management/task_plan.py +0 -110
- autobyteus/task_management/tools/publish_task_plan.py +0 -101
- autobyteus/tools/image_downloader.py +0 -99
- autobyteus/tools/pdf_downloader.py +0 -89
- {autobyteus-1.1.8.dist-info → autobyteus-1.2.0.dist-info}/WHEEL +0 -0
- {autobyteus-1.1.8.dist-info → autobyteus-1.2.0.dist-info}/licenses/LICENSE +0 -0
- {autobyteus-1.1.8.dist-info → autobyteus-1.2.0.dist-info}/top_level.txt +0 -0
|
@@ -1,16 +1,14 @@
|
|
|
1
|
-
import asyncio
|
|
2
|
-
import base64
|
|
3
1
|
import logging
|
|
4
|
-
import
|
|
2
|
+
import base64
|
|
5
3
|
import os
|
|
6
4
|
from typing import Optional, List, Dict, Any, TYPE_CHECKING
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
import google.generativeai as genai
|
|
5
|
+
from google import genai
|
|
6
|
+
from PIL import Image
|
|
10
7
|
import requests
|
|
11
8
|
|
|
12
9
|
from autobyteus.multimedia.image.base_image_client import BaseImageClient
|
|
13
10
|
from autobyteus.multimedia.utils.response_types import ImageGenerationResponse
|
|
11
|
+
from autobyteus.multimedia.utils.api_utils import load_image_from_url
|
|
14
12
|
|
|
15
13
|
if TYPE_CHECKING:
|
|
16
14
|
from autobyteus.multimedia.image.image_model import ImageModel
|
|
@@ -18,157 +16,86 @@ if TYPE_CHECKING:
|
|
|
18
16
|
|
|
19
17
|
logger = logging.getLogger(__name__)
|
|
20
18
|
|
|
21
|
-
|
|
22
|
-
def _data_uri(mime_type: str, raw: bytes) -> str:
|
|
23
|
-
"""Convert raw bytes to a data URI."""
|
|
24
|
-
b64 = base64.b64encode(raw).decode("utf-8")
|
|
25
|
-
return f"data:{mime_type};base64,{b64}"
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
def _guess_mime_from_url(url: str) -> str:
|
|
29
|
-
"""Best-effort MIME guess from URL; fall back to image/jpeg."""
|
|
30
|
-
mime, _ = mimetypes.guess_type(url)
|
|
31
|
-
return mime or "image/jpeg"
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
def _fetch_image_part(url: str) -> Dict[str, Any]:
|
|
35
|
-
"""
|
|
36
|
-
Download an image and return an inline-data Part compatible with the legacy SDK:
|
|
37
|
-
{ "mime_type": "...", "data": <bytes> }
|
|
38
|
-
"""
|
|
39
|
-
resp = requests.get(url, timeout=30)
|
|
40
|
-
resp.raise_for_status()
|
|
41
|
-
mime = resp.headers.get("Content-Type") or _guess_mime_from_url(url)
|
|
42
|
-
return {"mime_type": mime.split(";")[0], "data": resp.content}
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
def _extract_inline_images(response) -> List[Dict[str, bytes]]:
|
|
46
|
-
"""
|
|
47
|
-
Collect inline image parts from the legacy SDK response.
|
|
48
|
-
Returns list of { "mime_type": str, "data": bytes }.
|
|
49
|
-
"""
|
|
50
|
-
images = []
|
|
51
|
-
try:
|
|
52
|
-
candidates = getattr(response, "candidates", []) or []
|
|
53
|
-
if not candidates:
|
|
54
|
-
return images
|
|
55
|
-
|
|
56
|
-
parts = candidates[0].content.parts if candidates[0].content else []
|
|
57
|
-
for p in parts:
|
|
58
|
-
inline = getattr(p, "inline_data", None)
|
|
59
|
-
if not inline:
|
|
60
|
-
continue
|
|
61
|
-
mime = getattr(inline, "mime_type", "") or ""
|
|
62
|
-
if not mime.startswith("image/"):
|
|
63
|
-
continue
|
|
64
|
-
|
|
65
|
-
data = getattr(inline, "data", None)
|
|
66
|
-
if isinstance(data, bytes):
|
|
67
|
-
images.append({"mime_type": mime, "data": data})
|
|
68
|
-
elif isinstance(data, str):
|
|
69
|
-
# Some bindings expose base64 text
|
|
70
|
-
images.append({"mime_type": mime, "data": base64.b64decode(data)})
|
|
71
|
-
except Exception as e:
|
|
72
|
-
logger.error("Failed to parse inline image(s): %s", e)
|
|
73
|
-
raise
|
|
74
|
-
return images
|
|
75
|
-
|
|
76
|
-
|
|
77
19
|
class GeminiImageClient(BaseImageClient):
|
|
78
20
|
"""
|
|
79
|
-
|
|
21
|
+
An image client that uses Google's Gemini models for image generation tasks.
|
|
80
22
|
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
- You can guide generation with input images by passing URLs; they’re added as inline image Parts.
|
|
84
|
-
- This runs the blocking SDK call in a worker thread to keep your async API.
|
|
23
|
+
**Setup Requirements:**
|
|
24
|
+
1. **Authentication:** Set the `GEMINI_API_KEY` environment variable with your API key.
|
|
85
25
|
"""
|
|
86
26
|
|
|
87
27
|
def __init__(self, model: "ImageModel", config: "MultimediaConfig"):
|
|
88
28
|
super().__init__(model, config)
|
|
89
|
-
|
|
90
29
|
api_key = os.getenv("GEMINI_API_KEY")
|
|
91
30
|
if not api_key:
|
|
92
31
|
raise ValueError("Please set the GEMINI_API_KEY environment variable.")
|
|
93
|
-
|
|
32
|
+
|
|
94
33
|
try:
|
|
95
|
-
genai.
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
# or Gemini image-preview models that support image output.
|
|
99
|
-
model_name = self.model.value or "imagen-3.0-generate"
|
|
100
|
-
self._model = genai.GenerativeModel(model_name)
|
|
101
|
-
logger.info("GeminiImageClient (legacy SDK) initialized for model '%s'.", model_name)
|
|
34
|
+
self.client = genai.Client()
|
|
35
|
+
self.async_client = self.client.aio
|
|
36
|
+
logger.info(f"GeminiImageClient initialized for model '{self.model.name}'.")
|
|
102
37
|
except Exception as e:
|
|
103
|
-
logger.error("Failed to initialize Gemini
|
|
104
|
-
raise RuntimeError(f"Failed to initialize Gemini
|
|
38
|
+
logger.error(f"Failed to initialize Gemini client for images: {e}")
|
|
39
|
+
raise RuntimeError(f"Failed to initialize Gemini client for images: {e}")
|
|
105
40
|
|
|
106
41
|
async def generate_image(
|
|
107
42
|
self,
|
|
108
43
|
prompt: str,
|
|
109
44
|
input_image_urls: Optional[List[str]] = None,
|
|
110
|
-
generation_config: Optional[Dict[str, Any]] = None
|
|
45
|
+
generation_config: Optional[Dict[str, Any]] = None,
|
|
46
|
+
**kwargs
|
|
111
47
|
) -> ImageGenerationResponse:
|
|
112
48
|
"""
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
`generation_config` supports common fields; we always ensure
|
|
116
|
-
`response_mime_type='image/png'` so the SDK returns inline image bytes.
|
|
49
|
+
Generates an image using a Google Gemini model. Can be text-to-image or image-to-image.
|
|
117
50
|
"""
|
|
118
51
|
try:
|
|
119
|
-
logger.info("Generating image with model '
|
|
120
|
-
|
|
121
|
-
# Build contents array: [text, (optional) image parts...]
|
|
122
|
-
contents: List[Any] = [prompt]
|
|
52
|
+
logger.info(f"Generating image with Google Gemini model '{self.model.value}'...")
|
|
123
53
|
|
|
54
|
+
content = [prompt]
|
|
124
55
|
if input_image_urls:
|
|
125
|
-
logger.info("Loading
|
|
56
|
+
logger.info(f"Loading {len(input_image_urls)} input image(s) for generation.")
|
|
126
57
|
for url in input_image_urls:
|
|
127
58
|
try:
|
|
128
|
-
|
|
59
|
+
content.append(load_image_from_url(url))
|
|
129
60
|
except Exception as e:
|
|
130
|
-
logger.error("Skipping image '
|
|
131
|
-
|
|
132
|
-
#
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
logger.info("Successfully generated
|
|
61
|
+
logger.error(f"Skipping image at '{url}' due to loading error: {e}")
|
|
62
|
+
|
|
63
|
+
# Note: The google-genai library uses the synchronous client for the `.generate_content` method on a model
|
|
64
|
+
# even in an async context, as there isn't a direct async equivalent exposed for this specific call on the model object.
|
|
65
|
+
# We use the top-level async client for other potential future calls if the library API changes.
|
|
66
|
+
model_instance = self.client.get_generative_model(model_name=f"models/{self.model.value}")
|
|
67
|
+
response = await model_instance.generate_content_async(contents=content)
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
image_urls = []
|
|
71
|
+
for part in response.parts:
|
|
72
|
+
if part.inline_data and "image" in part.inline_data.mime_type:
|
|
73
|
+
image_bytes = part.inline_data.data
|
|
74
|
+
base64_image = base64.b64encode(image_bytes).decode("utf-8")
|
|
75
|
+
data_uri = f"data:{part.inline_data.mime_type};base64,{base64_image}"
|
|
76
|
+
image_urls.append(data_uri)
|
|
77
|
+
|
|
78
|
+
if not image_urls:
|
|
79
|
+
# Check for a safety-related refusal to generate content
|
|
80
|
+
if response.prompt_feedback.block_reason:
|
|
81
|
+
reason = response.prompt_feedback.block_reason.name
|
|
82
|
+
logger.error(f"Image generation blocked due to safety settings. Reason: {reason}")
|
|
83
|
+
raise ValueError(f"Image generation failed due to safety settings: {reason}")
|
|
84
|
+
|
|
85
|
+
logger.warning(f"Gemini API did not return any images for the prompt: '{prompt[:100]}...'")
|
|
86
|
+
raise ValueError("Gemini API did not return any processable images.")
|
|
87
|
+
|
|
88
|
+
logger.info(f"Successfully generated {len(image_urls)} image(s) with Gemini.")
|
|
158
89
|
|
|
159
90
|
return ImageGenerationResponse(
|
|
160
91
|
image_urls=image_urls,
|
|
161
|
-
revised_prompt=None #
|
|
92
|
+
revised_prompt=None # genai library does not provide a revised prompt for images
|
|
162
93
|
)
|
|
163
|
-
|
|
164
94
|
except Exception as e:
|
|
165
|
-
logger.error("Error during Gemini image generation (
|
|
166
|
-
#
|
|
95
|
+
logger.error(f"Error during Google Gemini image generation: {str(e)}")
|
|
96
|
+
# Re-raise with a more specific message if it's a known type of error
|
|
167
97
|
if "Unsupported" in str(e) and "location" in str(e):
|
|
168
|
-
|
|
169
|
-
"Image generation may not be supported in your configured region or project. "
|
|
170
|
-
"Check your API access and region settings."
|
|
171
|
-
)
|
|
98
|
+
raise ValueError(f"Image generation is not supported in your configured region. Please check your Google Cloud project settings.")
|
|
172
99
|
raise ValueError(f"Google Gemini image generation failed: {str(e)}")
|
|
173
100
|
|
|
174
101
|
async def edit_image(
|
|
@@ -176,13 +103,28 @@ class GeminiImageClient(BaseImageClient):
|
|
|
176
103
|
prompt: str,
|
|
177
104
|
input_image_urls: List[str],
|
|
178
105
|
mask_url: Optional[str] = None,
|
|
179
|
-
generation_config: Optional[Dict[str, Any]] = None
|
|
106
|
+
generation_config: Optional[Dict[str, Any]] = None,
|
|
107
|
+
**kwargs
|
|
180
108
|
) -> ImageGenerationResponse:
|
|
181
109
|
"""
|
|
182
|
-
|
|
110
|
+
Edits an image using a Google Gemini model by providing the image(s) as context.
|
|
111
|
+
This method leverages the same underlying 'generate_content' call as generate_image.
|
|
112
|
+
Note: The Gemini API via the google-genai library does not support explicit masking.
|
|
183
113
|
"""
|
|
184
|
-
|
|
185
|
-
|
|
114
|
+
if mask_url:
|
|
115
|
+
logger.warning(
|
|
116
|
+
f"The GeminiImageClient for model '{self.model.name}' received a 'mask_url' but does not support "
|
|
117
|
+
"explicit masking. The mask will be ignored. The model will perform a general edit based on the prompt."
|
|
118
|
+
)
|
|
119
|
+
|
|
120
|
+
# For Gemini, editing is the same as generating with an input image.
|
|
121
|
+
# The generate_image method already handles this logic correctly.
|
|
122
|
+
return await self.generate_image(
|
|
123
|
+
prompt=prompt,
|
|
124
|
+
input_image_urls=input_image_urls,
|
|
125
|
+
generation_config=generation_config,
|
|
126
|
+
**kwargs
|
|
127
|
+
)
|
|
186
128
|
|
|
187
129
|
async def cleanup(self):
|
|
188
|
-
logger.debug("GeminiImageClient cleanup called
|
|
130
|
+
logger.debug("GeminiImageClient cleanup called.")
|
|
@@ -30,7 +30,8 @@ class OpenAIImageClient(BaseImageClient):
|
|
|
30
30
|
self,
|
|
31
31
|
prompt: str,
|
|
32
32
|
input_image_urls: Optional[List[str]] = None,
|
|
33
|
-
generation_config: Optional[Dict[str, Any]] = None
|
|
33
|
+
generation_config: Optional[Dict[str, Any]] = None,
|
|
34
|
+
**kwargs
|
|
34
35
|
) -> ImageGenerationResponse:
|
|
35
36
|
"""
|
|
36
37
|
Generates an image using an OpenAI DALL-E model via the v1/images/generations endpoint.
|
|
@@ -83,7 +84,8 @@ class OpenAIImageClient(BaseImageClient):
|
|
|
83
84
|
prompt: str,
|
|
84
85
|
input_image_urls: List[str],
|
|
85
86
|
mask_url: Optional[str] = None,
|
|
86
|
-
generation_config: Optional[Dict[str, Any]] = None
|
|
87
|
+
generation_config: Optional[Dict[str, Any]] = None,
|
|
88
|
+
**kwargs
|
|
87
89
|
) -> ImageGenerationResponse:
|
|
88
90
|
"""
|
|
89
91
|
Edits an image using an OpenAI model that supports the v1/images/edits endpoint.
|
|
@@ -3,7 +3,7 @@ from typing import Dict, Any, List
|
|
|
3
3
|
import os
|
|
4
4
|
from urllib.parse import urlparse
|
|
5
5
|
|
|
6
|
-
from
|
|
6
|
+
from autobyteus.clients import AutobyteusClient
|
|
7
7
|
from autobyteus.multimedia.image.api.autobyteus_image_client import AutobyteusImageClient
|
|
8
8
|
from autobyteus.multimedia.image.image_model import ImageModel
|
|
9
9
|
from autobyteus.multimedia.providers import MultimediaProvider
|
|
@@ -19,7 +19,9 @@ class AutobyteusImageModelProvider:
|
|
|
19
19
|
|
|
20
20
|
@staticmethod
|
|
21
21
|
def _get_hosts() -> List[str]:
|
|
22
|
-
"""
|
|
22
|
+
"""
|
|
23
|
+
Gets Autobyteus server hosts from env vars. Skips discovery if no host is configured.
|
|
24
|
+
"""
|
|
23
25
|
hosts_str = os.getenv('AUTOBYTEUS_LLM_SERVER_HOSTS')
|
|
24
26
|
if hosts_str:
|
|
25
27
|
return [host.strip() for host in hosts_str.split(',')]
|
|
@@ -28,7 +30,7 @@ class AutobyteusImageModelProvider:
|
|
|
28
30
|
if legacy_host:
|
|
29
31
|
return [legacy_host]
|
|
30
32
|
|
|
31
|
-
return [
|
|
33
|
+
return []
|
|
32
34
|
|
|
33
35
|
@staticmethod
|
|
34
36
|
def discover_and_register():
|
|
@@ -37,6 +39,10 @@ class AutobyteusImageModelProvider:
|
|
|
37
39
|
from autobyteus.multimedia.image.image_client_factory import ImageClientFactory
|
|
38
40
|
|
|
39
41
|
hosts = AutobyteusImageModelProvider._get_hosts()
|
|
42
|
+
if not hosts:
|
|
43
|
+
logger.info("No Autobyteus server hosts configured. Skipping Autobyteus image model discovery.")
|
|
44
|
+
return
|
|
45
|
+
|
|
40
46
|
total_registered_count = 0
|
|
41
47
|
|
|
42
48
|
for host_url in hosts:
|
|
@@ -21,7 +21,8 @@ class BaseImageClient(ABC):
|
|
|
21
21
|
self,
|
|
22
22
|
prompt: str,
|
|
23
23
|
input_image_urls: Optional[List[str]] = None,
|
|
24
|
-
generation_config: Optional[Dict[str, Any]] = None
|
|
24
|
+
generation_config: Optional[Dict[str, Any]] = None,
|
|
25
|
+
**kwargs
|
|
25
26
|
) -> ImageGenerationResponse:
|
|
26
27
|
"""
|
|
27
28
|
Generates an image based on a textual prompt.
|
|
@@ -33,6 +34,7 @@ class BaseImageClient(ABC):
|
|
|
33
34
|
generation_config (Optional[Dict[str, Any]]): Provider-specific parameters for image generation
|
|
34
35
|
to override defaults.
|
|
35
36
|
(e.g., n, size, quality, style).
|
|
37
|
+
**kwargs: Additional keyword arguments for extensibility.
|
|
36
38
|
|
|
37
39
|
Returns:
|
|
38
40
|
ImageGenerationResponse: An object containing URLs to the generated images.
|
|
@@ -45,7 +47,8 @@ class BaseImageClient(ABC):
|
|
|
45
47
|
prompt: str,
|
|
46
48
|
input_image_urls: List[str],
|
|
47
49
|
mask_url: Optional[str] = None,
|
|
48
|
-
generation_config: Optional[Dict[str, Any]] = None
|
|
50
|
+
generation_config: Optional[Dict[str, Any]] = None,
|
|
51
|
+
**kwargs
|
|
49
52
|
) -> ImageGenerationResponse:
|
|
50
53
|
"""
|
|
51
54
|
Edits an existing image based on a textual prompt.
|
|
@@ -56,6 +59,7 @@ class BaseImageClient(ABC):
|
|
|
56
59
|
mask_url (Optional[str]): The path to a mask image. The transparent areas of the mask
|
|
57
60
|
indicate where the image should be edited.
|
|
58
61
|
generation_config (Optional[Dict[str, Any]]): Provider-specific parameters.
|
|
62
|
+
**kwargs: Additional keyword arguments for extensibility.
|
|
59
63
|
|
|
60
64
|
Returns:
|
|
61
65
|
ImageGenerationResponse: An object containing URLs to the edited images.
|
|
@@ -8,6 +8,7 @@ from autobyteus.multimedia.image.api.openai_image_client import OpenAIImageClien
|
|
|
8
8
|
from autobyteus.multimedia.image.api.gemini_image_client import GeminiImageClient
|
|
9
9
|
from autobyteus.multimedia.utils.multimedia_config import MultimediaConfig
|
|
10
10
|
from autobyteus.utils.singleton import SingletonMeta
|
|
11
|
+
from autobyteus.utils.parameter_schema import ParameterSchema, ParameterDefinition, ParameterType
|
|
11
12
|
|
|
12
13
|
logger = logging.getLogger(__name__)
|
|
13
14
|
|
|
@@ -39,28 +40,19 @@ class ImageClientFactory(metaclass=SingletonMeta):
|
|
|
39
40
|
"""Initializes the registry with built-in image models and discovers remote ones."""
|
|
40
41
|
|
|
41
42
|
# OpenAI Models
|
|
43
|
+
gpt_image_1_schema = ParameterSchema(parameters=[
|
|
44
|
+
ParameterDefinition(name="n", param_type=ParameterType.INTEGER, default_value=1, enum_values=[1], description="The number of images to generate."),
|
|
45
|
+
ParameterDefinition(name="size", param_type=ParameterType.ENUM, default_value="1024x1024", enum_values=["1024x1024", "1792x1024", "1024x1792"], description="The size of the generated images."),
|
|
46
|
+
ParameterDefinition(name="quality", param_type=ParameterType.ENUM, default_value="hd", enum_values=["standard", "hd"], description="The quality of the image that will be generated."),
|
|
47
|
+
ParameterDefinition(name="style", param_type=ParameterType.ENUM, default_value="vivid", enum_values=["vivid", "natural"], description="The style of the generated images.")
|
|
48
|
+
])
|
|
49
|
+
|
|
42
50
|
gpt_image_1_model = ImageModel(
|
|
43
51
|
name="gpt-image-1",
|
|
44
52
|
value="dall-e-3",
|
|
45
53
|
provider=MultimediaProvider.OPENAI,
|
|
46
54
|
client_class=OpenAIImageClient,
|
|
47
|
-
parameter_schema=
|
|
48
|
-
"n": {"type": "integer", "default": 1, "allowed_values": [1], "description": "The number of images to generate."},
|
|
49
|
-
"size": {"type": "string", "default": "1024x1024", "allowed_values": ["1024x1024", "1792x1024", "1024x1792"], "description": "The size of the generated images."},
|
|
50
|
-
"quality": {"type": "string", "default": "hd", "allowed_values": ["standard", "hd"], "description": "The quality of the image that will be generated."},
|
|
51
|
-
"style": {"type": "string", "default": "vivid", "allowed_values": ["vivid", "natural"], "description": "The style of the generated images."}
|
|
52
|
-
}
|
|
53
|
-
)
|
|
54
|
-
|
|
55
|
-
dall_e_2_model = ImageModel(
|
|
56
|
-
name="dall-e-2",
|
|
57
|
-
value="dall-e-2",
|
|
58
|
-
provider=MultimediaProvider.OPENAI,
|
|
59
|
-
client_class=OpenAIImageClient,
|
|
60
|
-
parameter_schema={
|
|
61
|
-
"n": {"type": "integer", "default": 1, "description": "The number of images to generate."},
|
|
62
|
-
"size": {"type": "string", "default": "1024x1024", "allowed_values": ["256x256", "512x512", "1024x1024"], "description": "The size of the generated images."}
|
|
63
|
-
}
|
|
55
|
+
parameter_schema=gpt_image_1_schema
|
|
64
56
|
)
|
|
65
57
|
|
|
66
58
|
# Google Imagen Models (via Gemini API)
|
|
@@ -69,13 +61,22 @@ class ImageClientFactory(metaclass=SingletonMeta):
|
|
|
69
61
|
value="imagen-4.0-generate-001",
|
|
70
62
|
provider=MultimediaProvider.GOOGLE,
|
|
71
63
|
client_class=GeminiImageClient,
|
|
72
|
-
parameter_schema=
|
|
64
|
+
parameter_schema=None # The genai library doesn't expose these as simple params
|
|
65
|
+
)
|
|
66
|
+
|
|
67
|
+
# Google Gemini Flash Image Model (aka "Nano Banana")
|
|
68
|
+
gemini_flash_image_model = ImageModel(
|
|
69
|
+
name="gemini-2.5-flash-image-preview",
|
|
70
|
+
value="gemini-2.5-flash-image-preview",
|
|
71
|
+
provider=MultimediaProvider.GOOGLE,
|
|
72
|
+
client_class=GeminiImageClient,
|
|
73
|
+
parameter_schema=None # Parameters are not exposed for this model via the genai library.
|
|
73
74
|
)
|
|
74
75
|
|
|
75
76
|
models_to_register = [
|
|
76
77
|
gpt_image_1_model,
|
|
77
|
-
dall_e_2_model,
|
|
78
78
|
imagen_model,
|
|
79
|
+
gemini_flash_image_model,
|
|
79
80
|
]
|
|
80
81
|
|
|
81
82
|
for model in models_to_register:
|
|
@@ -1,11 +1,12 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
import logging
|
|
3
|
-
from typing import TYPE_CHECKING, Type, Optional, Iterator, Dict, Any
|
|
3
|
+
from typing import TYPE_CHECKING, Type, Optional, Iterator, Dict, Any, Union
|
|
4
4
|
from urllib.parse import urlparse
|
|
5
5
|
|
|
6
6
|
from autobyteus.multimedia.providers import MultimediaProvider
|
|
7
7
|
from autobyteus.multimedia.runtimes import MultimediaRuntime
|
|
8
8
|
from autobyteus.multimedia.utils.multimedia_config import MultimediaConfig
|
|
9
|
+
from autobyteus.utils.parameter_schema import ParameterSchema
|
|
9
10
|
|
|
10
11
|
if TYPE_CHECKING:
|
|
11
12
|
from autobyteus.multimedia.image.base_image_client import BaseImageClient
|
|
@@ -47,7 +48,7 @@ class ImageModel(metaclass=ImageModelMeta):
|
|
|
47
48
|
value: str,
|
|
48
49
|
provider: MultimediaProvider,
|
|
49
50
|
client_class: Type["BaseImageClient"],
|
|
50
|
-
parameter_schema: Optional[Dict[str, Any]] = None,
|
|
51
|
+
parameter_schema: Optional[Union[Dict[str, Any], ParameterSchema]] = None,
|
|
51
52
|
runtime: MultimediaRuntime = MultimediaRuntime.API,
|
|
52
53
|
host_url: Optional[str] = None
|
|
53
54
|
):
|
|
@@ -57,13 +58,19 @@ class ImageModel(metaclass=ImageModelMeta):
|
|
|
57
58
|
self.client_class = client_class
|
|
58
59
|
self.runtime = runtime
|
|
59
60
|
self.host_url = host_url
|
|
60
|
-
|
|
61
|
+
|
|
62
|
+
if isinstance(parameter_schema, dict):
|
|
63
|
+
self.parameter_schema = ParameterSchema.from_dict(parameter_schema)
|
|
64
|
+
elif parameter_schema is None:
|
|
65
|
+
self.parameter_schema = ParameterSchema()
|
|
66
|
+
else:
|
|
67
|
+
self.parameter_schema = parameter_schema
|
|
61
68
|
|
|
62
69
|
# Automatically build default_config from the schema's default values
|
|
63
70
|
default_params = {
|
|
64
|
-
|
|
65
|
-
for
|
|
66
|
-
if
|
|
71
|
+
param.name: param.default_value
|
|
72
|
+
for param in self.parameter_schema.parameters
|
|
73
|
+
if param.default_value is not None
|
|
67
74
|
}
|
|
68
75
|
self.default_config = MultimediaConfig(params=default_params)
|
|
69
76
|
|
|
@@ -4,15 +4,15 @@ This package defines components for task management and state tracking,
|
|
|
4
4
|
including task plans and live task boards. It is designed to be a general-purpose
|
|
5
5
|
module usable by various components, such as agents or agent teams.
|
|
6
6
|
"""
|
|
7
|
-
from .
|
|
8
|
-
from .schemas import (
|
|
7
|
+
from .task import Task
|
|
8
|
+
from .schemas import (TasksDefinitionSchema, TaskDefinitionSchema, TaskStatusReportSchema,
|
|
9
9
|
TaskStatusReportItemSchema, FileDeliverableSchema)
|
|
10
10
|
from .base_task_board import BaseTaskBoard, TaskStatus
|
|
11
11
|
from .in_memory_task_board import InMemoryTaskBoard
|
|
12
12
|
from .deliverable import FileDeliverable
|
|
13
|
-
from .tools import GetTaskBoardStatus,
|
|
14
|
-
from .converters import TaskBoardConverter
|
|
15
|
-
from .events import BaseTaskBoardEvent,
|
|
13
|
+
from .tools import GetTaskBoardStatus, PublishTasks, PublishTask, UpdateTaskStatus, AssignTaskTo
|
|
14
|
+
from .converters import TaskBoardConverter
|
|
15
|
+
from .events import BaseTaskBoardEvent, TasksAddedEvent, TaskStatusUpdatedEvent
|
|
16
16
|
|
|
17
17
|
# For convenience, we can alias InMemoryTaskBoard as the default TaskBoard.
|
|
18
18
|
# This allows other parts of the code to import `TaskBoard` without needing
|
|
@@ -20,9 +20,8 @@ from .events import BaseTaskBoardEvent, TaskPlanPublishedEvent, TaskStatusUpdate
|
|
|
20
20
|
TaskBoard = InMemoryTaskBoard
|
|
21
21
|
|
|
22
22
|
__all__ = [
|
|
23
|
-
"TaskPlan",
|
|
24
23
|
"Task",
|
|
25
|
-
"
|
|
24
|
+
"TasksDefinitionSchema",
|
|
26
25
|
"TaskDefinitionSchema",
|
|
27
26
|
"TaskStatusReportSchema",
|
|
28
27
|
"TaskStatusReportItemSchema",
|
|
@@ -33,11 +32,12 @@ __all__ = [
|
|
|
33
32
|
"TaskBoard", # Exposing the alias
|
|
34
33
|
"FileDeliverable",
|
|
35
34
|
"GetTaskBoardStatus",
|
|
36
|
-
"
|
|
35
|
+
"PublishTasks",
|
|
36
|
+
"PublishTask",
|
|
37
37
|
"UpdateTaskStatus",
|
|
38
|
+
"AssignTaskTo",
|
|
38
39
|
"TaskBoardConverter",
|
|
39
|
-
"TaskPlanConverter",
|
|
40
40
|
"BaseTaskBoardEvent",
|
|
41
|
-
"
|
|
41
|
+
"TasksAddedEvent",
|
|
42
42
|
"TaskStatusUpdatedEvent",
|
|
43
43
|
]
|
|
@@ -8,13 +8,14 @@ from enum import Enum
|
|
|
8
8
|
from typing import Dict, Any, List, Optional
|
|
9
9
|
|
|
10
10
|
from autobyteus.events.event_emitter import EventEmitter
|
|
11
|
-
from .
|
|
11
|
+
from .task import Task
|
|
12
12
|
|
|
13
13
|
logger = logging.getLogger(__name__)
|
|
14
14
|
|
|
15
15
|
class TaskStatus(str, Enum):
|
|
16
16
|
"""Enumerates the possible lifecycle states of a task on the TaskBoard."""
|
|
17
17
|
NOT_STARTED = "not_started"
|
|
18
|
+
QUEUED = "queued"
|
|
18
19
|
IN_PROGRESS = "in_progress"
|
|
19
20
|
COMPLETED = "completed"
|
|
20
21
|
BLOCKED = "blocked"
|
|
@@ -29,20 +30,27 @@ class BaseTaskBoard(ABC, EventEmitter):
|
|
|
29
30
|
Abstract base class for a TaskBoard.
|
|
30
31
|
|
|
31
32
|
This class defines the contract for any component that manages the live state
|
|
32
|
-
of a
|
|
33
|
-
|
|
34
|
-
broadcast state changes.
|
|
33
|
+
of tasks for a team. It is a dynamic board, not a static plan.
|
|
34
|
+
It inherits from EventEmitter to broadcast state changes.
|
|
35
35
|
"""
|
|
36
36
|
|
|
37
37
|
def __init__(self, team_id: str):
|
|
38
38
|
EventEmitter.__init__(self)
|
|
39
39
|
self.team_id = team_id
|
|
40
|
+
self.tasks: List[Task] = []
|
|
40
41
|
logger.debug(f"BaseTaskBoard initialized for team '{self.team_id}'.")
|
|
41
42
|
|
|
42
43
|
@abstractmethod
|
|
43
|
-
def
|
|
44
|
+
def add_tasks(self, tasks: List[Task]) -> bool:
|
|
44
45
|
"""
|
|
45
|
-
|
|
46
|
+
Adds a list of new tasks to the board. This is an additive-only operation.
|
|
47
|
+
"""
|
|
48
|
+
raise NotImplementedError
|
|
49
|
+
|
|
50
|
+
@abstractmethod
|
|
51
|
+
def add_task(self, task: Task) -> bool:
|
|
52
|
+
"""
|
|
53
|
+
Adds a single new task to the board.
|
|
46
54
|
"""
|
|
47
55
|
raise NotImplementedError
|
|
48
56
|
|
|
@@ -23,26 +23,19 @@ class TaskBoardConverter:
|
|
|
23
23
|
task_board: The task board instance to convert.
|
|
24
24
|
|
|
25
25
|
Returns:
|
|
26
|
-
A TaskStatusReportSchema object if
|
|
26
|
+
A TaskStatusReportSchema object if there are tasks, otherwise None.
|
|
27
27
|
"""
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
if not plan:
|
|
32
|
-
logger.debug(f"TaskBoard for team '{task_board.team_id}' has no plan loaded. Cannot generate report.")
|
|
28
|
+
if not task_board.tasks:
|
|
29
|
+
logger.debug(f"TaskBoard for team '{task_board.team_id}' has no tasks. Cannot generate report.")
|
|
33
30
|
return None
|
|
34
31
|
|
|
35
|
-
|
|
32
|
+
internal_status = task_board.get_status_overview()
|
|
36
33
|
|
|
37
|
-
|
|
38
|
-
id_to_name_map = {task.task_id: task.task_name for task in plan.tasks}
|
|
34
|
+
id_to_name_map = {task.task_id: task.task_name for task in task_board.tasks}
|
|
39
35
|
|
|
40
|
-
# 2. Build the list of LLM-friendly task items
|
|
41
36
|
report_items = []
|
|
42
|
-
for task in
|
|
43
|
-
|
|
44
|
-
# should have been hydrated already.
|
|
45
|
-
dep_names = [id_to_name_map[dep_id] for dep_id in task.dependencies]
|
|
37
|
+
for task in task_board.tasks:
|
|
38
|
+
dep_names = [id_to_name_map.get(dep_id, str(dep_id)) for dep_id in task.dependencies]
|
|
46
39
|
|
|
47
40
|
report_item = TaskStatusReportItemSchema(
|
|
48
41
|
task_name=task.task_name,
|
|
@@ -54,9 +47,7 @@ class TaskBoardConverter:
|
|
|
54
47
|
)
|
|
55
48
|
report_items.append(report_item)
|
|
56
49
|
|
|
57
|
-
# 3. Assemble the final report object
|
|
58
50
|
status_report = TaskStatusReportSchema(
|
|
59
|
-
overall_goal=plan.overall_goal,
|
|
60
51
|
tasks=report_items
|
|
61
52
|
)
|
|
62
53
|
|