autobyteus 1.1.7__py3-none-any.whl → 1.1.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (108) hide show
  1. autobyteus/agent/bootstrap_steps/system_prompt_processing_step.py +6 -2
  2. autobyteus/agent/handlers/inter_agent_message_event_handler.py +17 -19
  3. autobyteus/agent/handlers/llm_complete_response_received_event_handler.py +6 -3
  4. autobyteus/agent/handlers/tool_result_event_handler.py +86 -23
  5. autobyteus/agent/handlers/user_input_message_event_handler.py +19 -10
  6. autobyteus/agent/hooks/base_phase_hook.py +17 -0
  7. autobyteus/agent/hooks/hook_registry.py +15 -27
  8. autobyteus/agent/input_processor/base_user_input_processor.py +17 -1
  9. autobyteus/agent/input_processor/processor_registry.py +15 -27
  10. autobyteus/agent/llm_response_processor/base_processor.py +17 -1
  11. autobyteus/agent/llm_response_processor/processor_registry.py +15 -24
  12. autobyteus/agent/llm_response_processor/provider_aware_tool_usage_processor.py +14 -0
  13. autobyteus/agent/message/agent_input_user_message.py +15 -2
  14. autobyteus/agent/message/send_message_to.py +1 -1
  15. autobyteus/agent/processor_option.py +17 -0
  16. autobyteus/agent/sender_type.py +1 -0
  17. autobyteus/agent/system_prompt_processor/base_processor.py +17 -1
  18. autobyteus/agent/system_prompt_processor/processor_registry.py +15 -27
  19. autobyteus/agent/system_prompt_processor/tool_manifest_injector_processor.py +10 -0
  20. autobyteus/agent/tool_execution_result_processor/base_processor.py +17 -1
  21. autobyteus/agent/tool_execution_result_processor/processor_registry.py +15 -1
  22. autobyteus/agent/workspace/base_workspace.py +1 -1
  23. autobyteus/agent/workspace/workspace_definition.py +1 -1
  24. autobyteus/agent_team/bootstrap_steps/team_context_initialization_step.py +1 -1
  25. autobyteus/agent_team/streaming/agent_team_stream_event_payloads.py +2 -2
  26. autobyteus/agent_team/task_notification/__init__.py +4 -0
  27. autobyteus/agent_team/task_notification/activation_policy.py +70 -0
  28. autobyteus/agent_team/task_notification/system_event_driven_agent_task_notifier.py +56 -122
  29. autobyteus/agent_team/task_notification/task_activator.py +66 -0
  30. autobyteus/cli/agent_team_tui/state.py +17 -20
  31. autobyteus/cli/agent_team_tui/widgets/focus_pane.py +1 -1
  32. autobyteus/cli/agent_team_tui/widgets/task_board_panel.py +1 -1
  33. autobyteus/events/event_types.py +2 -2
  34. autobyteus/llm/api/gemini_llm.py +45 -54
  35. autobyteus/llm/api/qwen_llm.py +25 -0
  36. autobyteus/llm/autobyteus_provider.py +8 -2
  37. autobyteus/llm/llm_factory.py +16 -0
  38. autobyteus/multimedia/audio/api/autobyteus_audio_client.py +4 -1
  39. autobyteus/multimedia/audio/api/gemini_audio_client.py +84 -153
  40. autobyteus/multimedia/audio/audio_client_factory.py +47 -22
  41. autobyteus/multimedia/audio/audio_model.py +13 -6
  42. autobyteus/multimedia/audio/autobyteus_audio_provider.py +8 -2
  43. autobyteus/multimedia/audio/base_audio_client.py +3 -1
  44. autobyteus/multimedia/image/api/autobyteus_image_client.py +12 -5
  45. autobyteus/multimedia/image/api/gemini_image_client.py +72 -130
  46. autobyteus/multimedia/image/api/openai_image_client.py +4 -2
  47. autobyteus/multimedia/image/autobyteus_image_provider.py +8 -2
  48. autobyteus/multimedia/image/base_image_client.py +6 -2
  49. autobyteus/multimedia/image/image_client_factory.py +20 -19
  50. autobyteus/multimedia/image/image_model.py +13 -6
  51. autobyteus/multimedia/providers.py +1 -0
  52. autobyteus/task_management/__init__.py +9 -10
  53. autobyteus/task_management/base_task_board.py +14 -6
  54. autobyteus/task_management/converters/__init__.py +0 -2
  55. autobyteus/task_management/converters/task_board_converter.py +7 -16
  56. autobyteus/task_management/events.py +6 -6
  57. autobyteus/task_management/in_memory_task_board.py +48 -38
  58. autobyteus/task_management/schemas/__init__.py +2 -2
  59. autobyteus/task_management/schemas/{plan_definition.py → task_definition.py} +5 -6
  60. autobyteus/task_management/schemas/task_status_report.py +0 -1
  61. autobyteus/task_management/task.py +60 -0
  62. autobyteus/task_management/tools/__init__.py +4 -2
  63. autobyteus/task_management/tools/get_my_tasks.py +80 -0
  64. autobyteus/task_management/tools/get_task_board_status.py +3 -3
  65. autobyteus/task_management/tools/publish_task.py +77 -0
  66. autobyteus/task_management/tools/publish_tasks.py +74 -0
  67. autobyteus/task_management/tools/update_task_status.py +5 -5
  68. autobyteus/tools/__init__.py +3 -1
  69. autobyteus/tools/base_tool.py +4 -4
  70. autobyteus/tools/browser/session_aware/browser_session_aware_navigate_to.py +1 -1
  71. autobyteus/tools/browser/session_aware/browser_session_aware_web_element_trigger.py +1 -1
  72. autobyteus/tools/browser/session_aware/browser_session_aware_webpage_reader.py +1 -1
  73. autobyteus/tools/browser/session_aware/browser_session_aware_webpage_screenshot_taker.py +1 -1
  74. autobyteus/tools/browser/standalone/navigate_to.py +1 -1
  75. autobyteus/tools/browser/standalone/web_page_pdf_generator.py +1 -1
  76. autobyteus/tools/browser/standalone/webpage_image_downloader.py +1 -1
  77. autobyteus/tools/browser/standalone/webpage_reader.py +1 -1
  78. autobyteus/tools/browser/standalone/webpage_screenshot_taker.py +1 -1
  79. autobyteus/tools/functional_tool.py +1 -1
  80. autobyteus/tools/google_search.py +1 -1
  81. autobyteus/tools/image_downloader.py +1 -1
  82. autobyteus/tools/mcp/factory.py +1 -1
  83. autobyteus/tools/mcp/schema_mapper.py +1 -1
  84. autobyteus/tools/mcp/tool.py +1 -1
  85. autobyteus/tools/multimedia/__init__.py +2 -0
  86. autobyteus/tools/multimedia/audio_tools.py +10 -20
  87. autobyteus/tools/multimedia/image_tools.py +21 -22
  88. autobyteus/tools/multimedia/media_reader_tool.py +117 -0
  89. autobyteus/tools/pydantic_schema_converter.py +1 -1
  90. autobyteus/tools/registry/tool_definition.py +1 -1
  91. autobyteus/tools/timer.py +1 -1
  92. autobyteus/tools/tool_meta.py +1 -1
  93. autobyteus/tools/usage/formatters/default_json_example_formatter.py +1 -1
  94. autobyteus/tools/usage/formatters/default_xml_example_formatter.py +1 -1
  95. autobyteus/tools/usage/formatters/default_xml_schema_formatter.py +59 -3
  96. autobyteus/tools/usage/formatters/gemini_json_example_formatter.py +1 -1
  97. autobyteus/tools/usage/formatters/google_json_example_formatter.py +1 -1
  98. autobyteus/tools/usage/formatters/openai_json_example_formatter.py +1 -1
  99. autobyteus/{tools → utils}/parameter_schema.py +1 -1
  100. {autobyteus-1.1.7.dist-info → autobyteus-1.1.9.dist-info}/METADATA +2 -2
  101. {autobyteus-1.1.7.dist-info → autobyteus-1.1.9.dist-info}/RECORD +105 -99
  102. examples/run_poem_writer.py +1 -1
  103. autobyteus/task_management/converters/task_plan_converter.py +0 -48
  104. autobyteus/task_management/task_plan.py +0 -110
  105. autobyteus/task_management/tools/publish_task_plan.py +0 -101
  106. {autobyteus-1.1.7.dist-info → autobyteus-1.1.9.dist-info}/WHEEL +0 -0
  107. {autobyteus-1.1.7.dist-info → autobyteus-1.1.9.dist-info}/licenses/LICENSE +0 -0
  108. {autobyteus-1.1.7.dist-info → autobyteus-1.1.9.dist-info}/top_level.txt +0 -0
@@ -1,16 +1,14 @@
1
- import asyncio
2
- import base64
3
1
  import logging
4
- import mimetypes
2
+ import base64
5
3
  import os
6
4
  from typing import Optional, List, Dict, Any, TYPE_CHECKING
7
-
8
- # Legacy Gemini SDK (as requested)
9
- import google.generativeai as genai
5
+ from google import genai
6
+ from PIL import Image
10
7
  import requests
11
8
 
12
9
  from autobyteus.multimedia.image.base_image_client import BaseImageClient
13
10
  from autobyteus.multimedia.utils.response_types import ImageGenerationResponse
11
+ from autobyteus.multimedia.utils.api_utils import load_image_from_url
14
12
 
15
13
  if TYPE_CHECKING:
16
14
  from autobyteus.multimedia.image.image_model import ImageModel
@@ -18,157 +16,86 @@ if TYPE_CHECKING:
18
16
 
19
17
  logger = logging.getLogger(__name__)
20
18
 
21
-
22
- def _data_uri(mime_type: str, raw: bytes) -> str:
23
- """Convert raw bytes to a data URI."""
24
- b64 = base64.b64encode(raw).decode("utf-8")
25
- return f"data:{mime_type};base64,{b64}"
26
-
27
-
28
- def _guess_mime_from_url(url: str) -> str:
29
- """Best-effort MIME guess from URL; fall back to image/jpeg."""
30
- mime, _ = mimetypes.guess_type(url)
31
- return mime or "image/jpeg"
32
-
33
-
34
- def _fetch_image_part(url: str) -> Dict[str, Any]:
35
- """
36
- Download an image and return an inline-data Part compatible with the legacy SDK:
37
- { "mime_type": "...", "data": <bytes> }
38
- """
39
- resp = requests.get(url, timeout=30)
40
- resp.raise_for_status()
41
- mime = resp.headers.get("Content-Type") or _guess_mime_from_url(url)
42
- return {"mime_type": mime.split(";")[0], "data": resp.content}
43
-
44
-
45
- def _extract_inline_images(response) -> List[Dict[str, bytes]]:
46
- """
47
- Collect inline image parts from the legacy SDK response.
48
- Returns list of { "mime_type": str, "data": bytes }.
49
- """
50
- images = []
51
- try:
52
- candidates = getattr(response, "candidates", []) or []
53
- if not candidates:
54
- return images
55
-
56
- parts = candidates[0].content.parts if candidates[0].content else []
57
- for p in parts:
58
- inline = getattr(p, "inline_data", None)
59
- if not inline:
60
- continue
61
- mime = getattr(inline, "mime_type", "") or ""
62
- if not mime.startswith("image/"):
63
- continue
64
-
65
- data = getattr(inline, "data", None)
66
- if isinstance(data, bytes):
67
- images.append({"mime_type": mime, "data": data})
68
- elif isinstance(data, str):
69
- # Some bindings expose base64 text
70
- images.append({"mime_type": mime, "data": base64.b64decode(data)})
71
- except Exception as e:
72
- logger.error("Failed to parse inline image(s): %s", e)
73
- raise
74
- return images
75
-
76
-
77
19
  class GeminiImageClient(BaseImageClient):
78
20
  """
79
- Image generation client using Google's legacy SDK (`google.generativeai`).
21
+ An image client that uses Google's Gemini models for image generation tasks.
80
22
 
81
- Notes:
82
- - We configure `response_mime_type='image/png'` to request image output.
83
- - You can guide generation with input images by passing URLs; they’re added as inline image Parts.
84
- - This runs the blocking SDK call in a worker thread to keep your async API.
23
+ **Setup Requirements:**
24
+ 1. **Authentication:** Set the `GEMINI_API_KEY` environment variable with your API key.
85
25
  """
86
26
 
87
27
  def __init__(self, model: "ImageModel", config: "MultimediaConfig"):
88
28
  super().__init__(model, config)
89
-
90
29
  api_key = os.getenv("GEMINI_API_KEY")
91
30
  if not api_key:
92
31
  raise ValueError("Please set the GEMINI_API_KEY environment variable.")
93
-
32
+
94
33
  try:
95
- genai.configure(api_key=api_key)
96
- # `self.model.value` should be an image-capable model.
97
- # Examples (subject to availability): "imagen-3.0-generate", "imagen-3.0-fast",
98
- # or Gemini image-preview models that support image output.
99
- model_name = self.model.value or "imagen-3.0-generate"
100
- self._model = genai.GenerativeModel(model_name)
101
- logger.info("GeminiImageClient (legacy SDK) initialized for model '%s'.", model_name)
34
+ self.client = genai.Client()
35
+ self.async_client = self.client.aio
36
+ logger.info(f"GeminiImageClient initialized for model '{self.model.name}'.")
102
37
  except Exception as e:
103
- logger.error("Failed to initialize Gemini image client: %s", e)
104
- raise RuntimeError(f"Failed to initialize Gemini image client: {e}")
38
+ logger.error(f"Failed to initialize Gemini client for images: {e}")
39
+ raise RuntimeError(f"Failed to initialize Gemini client for images: {e}")
105
40
 
106
41
  async def generate_image(
107
42
  self,
108
43
  prompt: str,
109
44
  input_image_urls: Optional[List[str]] = None,
110
- generation_config: Optional[Dict[str, Any]] = None
45
+ generation_config: Optional[Dict[str, Any]] = None,
46
+ **kwargs
111
47
  ) -> ImageGenerationResponse:
112
48
  """
113
- Generate an image (textimage or image-guided).
114
-
115
- `generation_config` supports common fields; we always ensure
116
- `response_mime_type='image/png'` so the SDK returns inline image bytes.
49
+ Generates an image using a Google Gemini model. Can be text-to-image or image-to-image.
117
50
  """
118
51
  try:
119
- logger.info("Generating image with model '%s'...", self._model.model_name)
120
-
121
- # Build contents array: [text, (optional) image parts...]
122
- contents: List[Any] = [prompt]
52
+ logger.info(f"Generating image with Google Gemini model '{self.model.value}'...")
123
53
 
54
+ content = [prompt]
124
55
  if input_image_urls:
125
- logger.info("Loading %d input image(s) for guidance...", len(input_image_urls))
56
+ logger.info(f"Loading {len(input_image_urls)} input image(s) for generation.")
126
57
  for url in input_image_urls:
127
58
  try:
128
- contents.append(_fetch_image_part(url))
59
+ content.append(load_image_from_url(url))
129
60
  except Exception as e:
130
- logger.error("Skipping image '%s' due to error: %s", url, e)
131
-
132
- # Merge config and force image output
133
- gen_cfg: Dict[str, Any] = (generation_config or {}).copy()
134
- gen_cfg["response_mime_type"] = gen_cfg.get("response_mime_type", "image/png")
135
-
136
- # Call the (sync) SDK in a worker thread
137
- response = await asyncio.to_thread(
138
- self._model.generate_content,
139
- contents,
140
- generation_config=gen_cfg,
141
- )
142
-
143
- # Handle safety blocks if present
144
- feedback = getattr(response, "prompt_feedback", None)
145
- block_reason = getattr(feedback, "block_reason", None)
146
- if block_reason:
147
- reason = getattr(block_reason, "name", str(block_reason))
148
- logger.error("Image generation blocked by safety settings: %s", reason)
149
- raise ValueError(f"Image generation failed due to safety settings: {reason}")
150
-
151
- images = _extract_inline_images(response)
152
- if not images:
153
- logger.warning("No image parts returned for prompt: '%.100s...'", prompt)
154
- raise ValueError("Gemini API did not return any images.")
155
-
156
- image_urls = [_data_uri(img["mime_type"], img["data"]) for img in images]
157
- logger.info("Successfully generated %d image(s).", len(image_urls))
61
+ logger.error(f"Skipping image at '{url}' due to loading error: {e}")
62
+
63
+ # Note: The google-genai library uses the synchronous client for the `.generate_content` method on a model
64
+ # even in an async context, as there isn't a direct async equivalent exposed for this specific call on the model object.
65
+ # We use the top-level async client for other potential future calls if the library API changes.
66
+ model_instance = self.client.get_generative_model(model_name=f"models/{self.model.value}")
67
+ response = await model_instance.generate_content_async(contents=content)
68
+
69
+
70
+ image_urls = []
71
+ for part in response.parts:
72
+ if part.inline_data and "image" in part.inline_data.mime_type:
73
+ image_bytes = part.inline_data.data
74
+ base64_image = base64.b64encode(image_bytes).decode("utf-8")
75
+ data_uri = f"data:{part.inline_data.mime_type};base64,{base64_image}"
76
+ image_urls.append(data_uri)
77
+
78
+ if not image_urls:
79
+ # Check for a safety-related refusal to generate content
80
+ if response.prompt_feedback.block_reason:
81
+ reason = response.prompt_feedback.block_reason.name
82
+ logger.error(f"Image generation blocked due to safety settings. Reason: {reason}")
83
+ raise ValueError(f"Image generation failed due to safety settings: {reason}")
84
+
85
+ logger.warning(f"Gemini API did not return any images for the prompt: '{prompt[:100]}...'")
86
+ raise ValueError("Gemini API did not return any processable images.")
87
+
88
+ logger.info(f"Successfully generated {len(image_urls)} image(s) with Gemini.")
158
89
 
159
90
  return ImageGenerationResponse(
160
91
  image_urls=image_urls,
161
- revised_prompt=None # legacy SDK does not provide a revised prompt here
92
+ revised_prompt=None # genai library does not provide a revised prompt for images
162
93
  )
163
-
164
94
  except Exception as e:
165
- logger.error("Error during Gemini image generation (legacy SDK): %s", e)
166
- # Region support / feature gating errors sometimes include 'Unsupported' hints.
95
+ logger.error(f"Error during Google Gemini image generation: {str(e)}")
96
+ # Re-raise with a more specific message if it's a known type of error
167
97
  if "Unsupported" in str(e) and "location" in str(e):
168
- raise ValueError(
169
- "Image generation may not be supported in your configured region or project. "
170
- "Check your API access and region settings."
171
- )
98
+ raise ValueError(f"Image generation is not supported in your configured region. Please check your Google Cloud project settings.")
172
99
  raise ValueError(f"Google Gemini image generation failed: {str(e)}")
173
100
 
174
101
  async def edit_image(
@@ -176,13 +103,28 @@ class GeminiImageClient(BaseImageClient):
176
103
  prompt: str,
177
104
  input_image_urls: List[str],
178
105
  mask_url: Optional[str] = None,
179
- generation_config: Optional[Dict[str, Any]] = None
106
+ generation_config: Optional[Dict[str, Any]] = None,
107
+ **kwargs
180
108
  ) -> ImageGenerationResponse:
181
109
  """
182
- Image editing/redraw with masks isn’t exposed via this legacy path here.
110
+ Edits an image using a Google Gemini model by providing the image(s) as context.
111
+ This method leverages the same underlying 'generate_content' call as generate_image.
112
+ Note: The Gemini API via the google-genai library does not support explicit masking.
183
113
  """
184
- logger.error("Image editing is not supported by the GeminiImageClient (legacy SDK).")
185
- raise NotImplementedError("The GeminiImageClient does not support the edit_image method.")
114
+ if mask_url:
115
+ logger.warning(
116
+ f"The GeminiImageClient for model '{self.model.name}' received a 'mask_url' but does not support "
117
+ "explicit masking. The mask will be ignored. The model will perform a general edit based on the prompt."
118
+ )
119
+
120
+ # For Gemini, editing is the same as generating with an input image.
121
+ # The generate_image method already handles this logic correctly.
122
+ return await self.generate_image(
123
+ prompt=prompt,
124
+ input_image_urls=input_image_urls,
125
+ generation_config=generation_config,
126
+ **kwargs
127
+ )
186
128
 
187
129
  async def cleanup(self):
188
- logger.debug("GeminiImageClient cleanup called (legacy SDK; nothing to release).")
130
+ logger.debug("GeminiImageClient cleanup called.")
@@ -30,7 +30,8 @@ class OpenAIImageClient(BaseImageClient):
30
30
  self,
31
31
  prompt: str,
32
32
  input_image_urls: Optional[List[str]] = None,
33
- generation_config: Optional[Dict[str, Any]] = None
33
+ generation_config: Optional[Dict[str, Any]] = None,
34
+ **kwargs
34
35
  ) -> ImageGenerationResponse:
35
36
  """
36
37
  Generates an image using an OpenAI DALL-E model via the v1/images/generations endpoint.
@@ -83,7 +84,8 @@ class OpenAIImageClient(BaseImageClient):
83
84
  prompt: str,
84
85
  input_image_urls: List[str],
85
86
  mask_url: Optional[str] = None,
86
- generation_config: Optional[Dict[str, Any]] = None
87
+ generation_config: Optional[Dict[str, Any]] = None,
88
+ **kwargs
87
89
  ) -> ImageGenerationResponse:
88
90
  """
89
91
  Edits an image using an OpenAI model that supports the v1/images/edits endpoint.
@@ -19,7 +19,9 @@ class AutobyteusImageModelProvider:
19
19
 
20
20
  @staticmethod
21
21
  def _get_hosts() -> List[str]:
22
- """Gets Autobyteus server hosts from env vars."""
22
+ """
23
+ Gets Autobyteus server hosts from env vars. Skips discovery if no host is configured.
24
+ """
23
25
  hosts_str = os.getenv('AUTOBYTEUS_LLM_SERVER_HOSTS')
24
26
  if hosts_str:
25
27
  return [host.strip() for host in hosts_str.split(',')]
@@ -28,7 +30,7 @@ class AutobyteusImageModelProvider:
28
30
  if legacy_host:
29
31
  return [legacy_host]
30
32
 
31
- return [AutobyteusImageModelProvider.DEFAULT_SERVER_URL]
33
+ return []
32
34
 
33
35
  @staticmethod
34
36
  def discover_and_register():
@@ -37,6 +39,10 @@ class AutobyteusImageModelProvider:
37
39
  from autobyteus.multimedia.image.image_client_factory import ImageClientFactory
38
40
 
39
41
  hosts = AutobyteusImageModelProvider._get_hosts()
42
+ if not hosts:
43
+ logger.info("No Autobyteus server hosts configured. Skipping Autobyteus image model discovery.")
44
+ return
45
+
40
46
  total_registered_count = 0
41
47
 
42
48
  for host_url in hosts:
@@ -21,7 +21,8 @@ class BaseImageClient(ABC):
21
21
  self,
22
22
  prompt: str,
23
23
  input_image_urls: Optional[List[str]] = None,
24
- generation_config: Optional[Dict[str, Any]] = None
24
+ generation_config: Optional[Dict[str, Any]] = None,
25
+ **kwargs
25
26
  ) -> ImageGenerationResponse:
26
27
  """
27
28
  Generates an image based on a textual prompt.
@@ -33,6 +34,7 @@ class BaseImageClient(ABC):
33
34
  generation_config (Optional[Dict[str, Any]]): Provider-specific parameters for image generation
34
35
  to override defaults.
35
36
  (e.g., n, size, quality, style).
37
+ **kwargs: Additional keyword arguments for extensibility.
36
38
 
37
39
  Returns:
38
40
  ImageGenerationResponse: An object containing URLs to the generated images.
@@ -45,7 +47,8 @@ class BaseImageClient(ABC):
45
47
  prompt: str,
46
48
  input_image_urls: List[str],
47
49
  mask_url: Optional[str] = None,
48
- generation_config: Optional[Dict[str, Any]] = None
50
+ generation_config: Optional[Dict[str, Any]] = None,
51
+ **kwargs
49
52
  ) -> ImageGenerationResponse:
50
53
  """
51
54
  Edits an existing image based on a textual prompt.
@@ -56,6 +59,7 @@ class BaseImageClient(ABC):
56
59
  mask_url (Optional[str]): The path to a mask image. The transparent areas of the mask
57
60
  indicate where the image should be edited.
58
61
  generation_config (Optional[Dict[str, Any]]): Provider-specific parameters.
62
+ **kwargs: Additional keyword arguments for extensibility.
59
63
 
60
64
  Returns:
61
65
  ImageGenerationResponse: An object containing URLs to the edited images.
@@ -8,6 +8,7 @@ from autobyteus.multimedia.image.api.openai_image_client import OpenAIImageClien
8
8
  from autobyteus.multimedia.image.api.gemini_image_client import GeminiImageClient
9
9
  from autobyteus.multimedia.utils.multimedia_config import MultimediaConfig
10
10
  from autobyteus.utils.singleton import SingletonMeta
11
+ from autobyteus.utils.parameter_schema import ParameterSchema, ParameterDefinition, ParameterType
11
12
 
12
13
  logger = logging.getLogger(__name__)
13
14
 
@@ -39,28 +40,19 @@ class ImageClientFactory(metaclass=SingletonMeta):
39
40
  """Initializes the registry with built-in image models and discovers remote ones."""
40
41
 
41
42
  # OpenAI Models
43
+ gpt_image_1_schema = ParameterSchema(parameters=[
44
+ ParameterDefinition(name="n", param_type=ParameterType.INTEGER, default_value=1, enum_values=[1], description="The number of images to generate."),
45
+ ParameterDefinition(name="size", param_type=ParameterType.ENUM, default_value="1024x1024", enum_values=["1024x1024", "1792x1024", "1024x1792"], description="The size of the generated images."),
46
+ ParameterDefinition(name="quality", param_type=ParameterType.ENUM, default_value="hd", enum_values=["standard", "hd"], description="The quality of the image that will be generated."),
47
+ ParameterDefinition(name="style", param_type=ParameterType.ENUM, default_value="vivid", enum_values=["vivid", "natural"], description="The style of the generated images.")
48
+ ])
49
+
42
50
  gpt_image_1_model = ImageModel(
43
51
  name="gpt-image-1",
44
52
  value="dall-e-3",
45
53
  provider=MultimediaProvider.OPENAI,
46
54
  client_class=OpenAIImageClient,
47
- parameter_schema={
48
- "n": {"type": "integer", "default": 1, "allowed_values": [1], "description": "The number of images to generate."},
49
- "size": {"type": "string", "default": "1024x1024", "allowed_values": ["1024x1024", "1792x1024", "1024x1792"], "description": "The size of the generated images."},
50
- "quality": {"type": "string", "default": "hd", "allowed_values": ["standard", "hd"], "description": "The quality of the image that will be generated."},
51
- "style": {"type": "string", "default": "vivid", "allowed_values": ["vivid", "natural"], "description": "The style of the generated images."}
52
- }
53
- )
54
-
55
- dall_e_2_model = ImageModel(
56
- name="dall-e-2",
57
- value="dall-e-2",
58
- provider=MultimediaProvider.OPENAI,
59
- client_class=OpenAIImageClient,
60
- parameter_schema={
61
- "n": {"type": "integer", "default": 1, "description": "The number of images to generate."},
62
- "size": {"type": "string", "default": "1024x1024", "allowed_values": ["256x256", "512x512", "1024x1024"], "description": "The size of the generated images."}
63
- }
55
+ parameter_schema=gpt_image_1_schema
64
56
  )
65
57
 
66
58
  # Google Imagen Models (via Gemini API)
@@ -69,13 +61,22 @@ class ImageClientFactory(metaclass=SingletonMeta):
69
61
  value="imagen-4.0-generate-001",
70
62
  provider=MultimediaProvider.GOOGLE,
71
63
  client_class=GeminiImageClient,
72
- parameter_schema={} # The genai library doesn't expose these as simple params
64
+ parameter_schema=None # The genai library doesn't expose these as simple params
65
+ )
66
+
67
+ # Google Gemini Flash Image Model (aka "Nano Banana")
68
+ gemini_flash_image_model = ImageModel(
69
+ name="gemini-2.5-flash-image-preview",
70
+ value="gemini-2.5-flash-image-preview",
71
+ provider=MultimediaProvider.GOOGLE,
72
+ client_class=GeminiImageClient,
73
+ parameter_schema=None # Parameters are not exposed for this model via the genai library.
73
74
  )
74
75
 
75
76
  models_to_register = [
76
77
  gpt_image_1_model,
77
- dall_e_2_model,
78
78
  imagen_model,
79
+ gemini_flash_image_model,
79
80
  ]
80
81
 
81
82
  for model in models_to_register:
@@ -1,11 +1,12 @@
1
1
  from __future__ import annotations
2
2
  import logging
3
- from typing import TYPE_CHECKING, Type, Optional, Iterator, Dict, Any
3
+ from typing import TYPE_CHECKING, Type, Optional, Iterator, Dict, Any, Union
4
4
  from urllib.parse import urlparse
5
5
 
6
6
  from autobyteus.multimedia.providers import MultimediaProvider
7
7
  from autobyteus.multimedia.runtimes import MultimediaRuntime
8
8
  from autobyteus.multimedia.utils.multimedia_config import MultimediaConfig
9
+ from autobyteus.utils.parameter_schema import ParameterSchema
9
10
 
10
11
  if TYPE_CHECKING:
11
12
  from autobyteus.multimedia.image.base_image_client import BaseImageClient
@@ -47,7 +48,7 @@ class ImageModel(metaclass=ImageModelMeta):
47
48
  value: str,
48
49
  provider: MultimediaProvider,
49
50
  client_class: Type["BaseImageClient"],
50
- parameter_schema: Optional[Dict[str, Any]] = None,
51
+ parameter_schema: Optional[Union[Dict[str, Any], ParameterSchema]] = None,
51
52
  runtime: MultimediaRuntime = MultimediaRuntime.API,
52
53
  host_url: Optional[str] = None
53
54
  ):
@@ -57,13 +58,19 @@ class ImageModel(metaclass=ImageModelMeta):
57
58
  self.client_class = client_class
58
59
  self.runtime = runtime
59
60
  self.host_url = host_url
60
- self.parameter_schema = parameter_schema if parameter_schema else {}
61
+
62
+ if isinstance(parameter_schema, dict):
63
+ self.parameter_schema = ParameterSchema.from_dict(parameter_schema)
64
+ elif parameter_schema is None:
65
+ self.parameter_schema = ParameterSchema()
66
+ else:
67
+ self.parameter_schema = parameter_schema
61
68
 
62
69
  # Automatically build default_config from the schema's default values
63
70
  default_params = {
64
- key: meta.get("default")
65
- for key, meta in self.parameter_schema.items()
66
- if "default" in meta
71
+ param.name: param.default_value
72
+ for param in self.parameter_schema.parameters
73
+ if param.default_value is not None
67
74
  }
68
75
  self.default_config = MultimediaConfig(params=default_params)
69
76
 
@@ -3,3 +3,4 @@ from enum import Enum
3
3
  class MultimediaProvider(Enum):
4
4
  OPENAI = "OPENAI"
5
5
  GOOGLE = "GOOGLE"
6
+ ALIBABA_QWEN = "ALIBABA_QWEN"
@@ -4,15 +4,15 @@ This package defines components for task management and state tracking,
4
4
  including task plans and live task boards. It is designed to be a general-purpose
5
5
  module usable by various components, such as agents or agent teams.
6
6
  """
7
- from .task_plan import TaskPlan, Task
8
- from .schemas import (TaskPlanDefinitionSchema, TaskDefinitionSchema, TaskStatusReportSchema,
7
+ from .task import Task
8
+ from .schemas import (TasksDefinitionSchema, TaskDefinitionSchema, TaskStatusReportSchema,
9
9
  TaskStatusReportItemSchema, FileDeliverableSchema)
10
10
  from .base_task_board import BaseTaskBoard, TaskStatus
11
11
  from .in_memory_task_board import InMemoryTaskBoard
12
12
  from .deliverable import FileDeliverable
13
- from .tools import GetTaskBoardStatus, PublishTaskPlan, UpdateTaskStatus
14
- from .converters import TaskBoardConverter, TaskPlanConverter
15
- from .events import BaseTaskBoardEvent, TaskPlanPublishedEvent, TaskStatusUpdatedEvent
13
+ from .tools import GetTaskBoardStatus, PublishTasks, PublishTask, UpdateTaskStatus
14
+ from .converters import TaskBoardConverter
15
+ from .events import BaseTaskBoardEvent, TasksAddedEvent, TaskStatusUpdatedEvent
16
16
 
17
17
  # For convenience, we can alias InMemoryTaskBoard as the default TaskBoard.
18
18
  # This allows other parts of the code to import `TaskBoard` without needing
@@ -20,9 +20,8 @@ from .events import BaseTaskBoardEvent, TaskPlanPublishedEvent, TaskStatusUpdate
20
20
  TaskBoard = InMemoryTaskBoard
21
21
 
22
22
  __all__ = [
23
- "TaskPlan",
24
23
  "Task",
25
- "TaskPlanDefinitionSchema",
24
+ "TasksDefinitionSchema",
26
25
  "TaskDefinitionSchema",
27
26
  "TaskStatusReportSchema",
28
27
  "TaskStatusReportItemSchema",
@@ -33,11 +32,11 @@ __all__ = [
33
32
  "TaskBoard", # Exposing the alias
34
33
  "FileDeliverable",
35
34
  "GetTaskBoardStatus",
36
- "PublishTaskPlan",
35
+ "PublishTasks",
36
+ "PublishTask",
37
37
  "UpdateTaskStatus",
38
38
  "TaskBoardConverter",
39
- "TaskPlanConverter",
40
39
  "BaseTaskBoardEvent",
41
- "TaskPlanPublishedEvent",
40
+ "TasksAddedEvent",
42
41
  "TaskStatusUpdatedEvent",
43
42
  ]
@@ -8,13 +8,14 @@ from enum import Enum
8
8
  from typing import Dict, Any, List, Optional
9
9
 
10
10
  from autobyteus.events.event_emitter import EventEmitter
11
- from .task_plan import Task, TaskPlan
11
+ from .task import Task
12
12
 
13
13
  logger = logging.getLogger(__name__)
14
14
 
15
15
  class TaskStatus(str, Enum):
16
16
  """Enumerates the possible lifecycle states of a task on the TaskBoard."""
17
17
  NOT_STARTED = "not_started"
18
+ QUEUED = "queued"
18
19
  IN_PROGRESS = "in_progress"
19
20
  COMPLETED = "completed"
20
21
  BLOCKED = "blocked"
@@ -29,20 +30,27 @@ class BaseTaskBoard(ABC, EventEmitter):
29
30
  Abstract base class for a TaskBoard.
30
31
 
31
32
  This class defines the contract for any component that manages the live state
32
- of a TaskPlan. Implementations could be in-memory, database-backed, or
33
- connected to external services like JIRA. It inherits from EventEmitter to
34
- broadcast state changes.
33
+ of tasks for a team. It is a dynamic board, not a static plan.
34
+ It inherits from EventEmitter to broadcast state changes.
35
35
  """
36
36
 
37
37
  def __init__(self, team_id: str):
38
38
  EventEmitter.__init__(self)
39
39
  self.team_id = team_id
40
+ self.tasks: List[Task] = []
40
41
  logger.debug(f"BaseTaskBoard initialized for team '{self.team_id}'.")
41
42
 
42
43
  @abstractmethod
43
- def load_task_plan(self, plan: TaskPlan) -> bool:
44
+ def add_tasks(self, tasks: List[Task]) -> bool:
44
45
  """
45
- Loads a new plan onto the board, resetting its state.
46
+ Adds a list of new tasks to the board. This is an additive-only operation.
47
+ """
48
+ raise NotImplementedError
49
+
50
+ @abstractmethod
51
+ def add_task(self, task: Task) -> bool:
52
+ """
53
+ Adds a single new task to the board.
46
54
  """
47
55
  raise NotImplementedError
48
56
 
@@ -3,9 +3,7 @@
3
3
  Exposes the public converters for the task management module.
4
4
  """
5
5
  from .task_board_converter import TaskBoardConverter
6
- from .task_plan_converter import TaskPlanConverter
7
6
 
8
7
  __all__ = [
9
8
  "TaskBoardConverter",
10
- "TaskPlanConverter",
11
9
  ]
@@ -23,26 +23,19 @@ class TaskBoardConverter:
23
23
  task_board: The task board instance to convert.
24
24
 
25
25
  Returns:
26
- A TaskStatusReportSchema object if a plan is loaded, otherwise None.
26
+ A TaskStatusReportSchema object if there are tasks, otherwise None.
27
27
  """
28
- internal_status = task_board.get_status_overview()
29
- plan = task_board.current_plan
30
-
31
- if not plan:
32
- logger.debug(f"TaskBoard for team '{task_board.team_id}' has no plan loaded. Cannot generate report.")
28
+ if not task_board.tasks:
29
+ logger.debug(f"TaskBoard for team '{task_board.team_id}' has no tasks. Cannot generate report.")
33
30
  return None
34
31
 
35
- # --- Conversion to LLM-Friendly Format ---
32
+ internal_status = task_board.get_status_overview()
36
33
 
37
- # 1. Create maps for easy lookup
38
- id_to_name_map = {task.task_id: task.task_name for task in plan.tasks}
34
+ id_to_name_map = {task.task_id: task.task_name for task in task_board.tasks}
39
35
 
40
- # 2. Build the list of LLM-friendly task items
41
36
  report_items = []
42
- for task in plan.tasks:
43
- # Convert dependency IDs back to names. This is safe because the plan
44
- # should have been hydrated already.
45
- dep_names = [id_to_name_map[dep_id] for dep_id in task.dependencies]
37
+ for task in task_board.tasks:
38
+ dep_names = [id_to_name_map.get(dep_id, str(dep_id)) for dep_id in task.dependencies]
46
39
 
47
40
  report_item = TaskStatusReportItemSchema(
48
41
  task_name=task.task_name,
@@ -54,9 +47,7 @@ class TaskBoardConverter:
54
47
  )
55
48
  report_items.append(report_item)
56
49
 
57
- # 3. Assemble the final report object
58
50
  status_report = TaskStatusReportSchema(
59
- overall_goal=plan.overall_goal,
60
51
  tasks=report_items
61
52
  )
62
53