autobyteus 1.2.0__py3-none-any.whl → 1.2.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (512) hide show
  1. autobyteus/agent/agent.py +15 -5
  2. autobyteus/agent/bootstrap_steps/__init__.py +1 -3
  3. autobyteus/agent/bootstrap_steps/agent_bootstrapper.py +3 -59
  4. autobyteus/agent/bootstrap_steps/base_bootstrap_step.py +1 -4
  5. autobyteus/agent/bootstrap_steps/mcp_server_prewarming_step.py +1 -3
  6. autobyteus/agent/bootstrap_steps/system_prompt_processing_step.py +16 -13
  7. autobyteus/agent/bootstrap_steps/workspace_context_initialization_step.py +2 -4
  8. autobyteus/agent/context/agent_config.py +43 -20
  9. autobyteus/agent/context/agent_context.py +23 -18
  10. autobyteus/agent/context/agent_runtime_state.py +23 -19
  11. autobyteus/agent/events/__init__.py +16 -1
  12. autobyteus/agent/events/agent_events.py +43 -3
  13. autobyteus/agent/events/agent_input_event_queue_manager.py +79 -26
  14. autobyteus/agent/events/event_store.py +57 -0
  15. autobyteus/agent/events/notifiers.py +74 -60
  16. autobyteus/agent/events/worker_event_dispatcher.py +21 -64
  17. autobyteus/agent/factory/agent_factory.py +52 -0
  18. autobyteus/agent/handlers/__init__.py +2 -0
  19. autobyteus/agent/handlers/approved_tool_invocation_event_handler.py +51 -34
  20. autobyteus/agent/handlers/bootstrap_event_handler.py +155 -0
  21. autobyteus/agent/handlers/inter_agent_message_event_handler.py +10 -0
  22. autobyteus/agent/handlers/lifecycle_event_logger.py +19 -11
  23. autobyteus/agent/handlers/llm_complete_response_received_event_handler.py +10 -15
  24. autobyteus/agent/handlers/llm_user_message_ready_event_handler.py +188 -48
  25. autobyteus/agent/handlers/tool_execution_approval_event_handler.py +0 -10
  26. autobyteus/agent/handlers/tool_invocation_request_event_handler.py +53 -48
  27. autobyteus/agent/handlers/tool_result_event_handler.py +7 -8
  28. autobyteus/agent/handlers/user_input_message_event_handler.py +10 -3
  29. autobyteus/agent/input_processor/memory_ingest_input_processor.py +40 -0
  30. autobyteus/agent/lifecycle/__init__.py +12 -0
  31. autobyteus/agent/lifecycle/base_processor.py +109 -0
  32. autobyteus/agent/lifecycle/events.py +35 -0
  33. autobyteus/agent/lifecycle/processor_definition.py +36 -0
  34. autobyteus/agent/lifecycle/processor_registry.py +106 -0
  35. autobyteus/agent/llm_request_assembler.py +98 -0
  36. autobyteus/agent/llm_response_processor/__init__.py +1 -8
  37. autobyteus/agent/message/context_file_type.py +1 -1
  38. autobyteus/agent/message/send_message_to.py +5 -4
  39. autobyteus/agent/runtime/agent_runtime.py +29 -21
  40. autobyteus/agent/runtime/agent_worker.py +98 -19
  41. autobyteus/agent/shutdown_steps/__init__.py +2 -0
  42. autobyteus/agent/shutdown_steps/agent_shutdown_orchestrator.py +2 -0
  43. autobyteus/agent/shutdown_steps/tool_cleanup_step.py +58 -0
  44. autobyteus/agent/status/__init__.py +14 -0
  45. autobyteus/agent/status/manager.py +93 -0
  46. autobyteus/agent/status/status_deriver.py +96 -0
  47. autobyteus/agent/{phases/phase_enum.py → status/status_enum.py} +16 -16
  48. autobyteus/agent/status/status_update_utils.py +73 -0
  49. autobyteus/agent/streaming/__init__.py +52 -5
  50. autobyteus/agent/streaming/adapters/__init__.py +18 -0
  51. autobyteus/agent/streaming/adapters/invocation_adapter.py +184 -0
  52. autobyteus/agent/streaming/adapters/tool_call_parsing.py +163 -0
  53. autobyteus/agent/streaming/adapters/tool_syntax_registry.py +67 -0
  54. autobyteus/agent/streaming/agent_event_stream.py +3 -178
  55. autobyteus/agent/streaming/api_tool_call/__init__.py +16 -0
  56. autobyteus/agent/streaming/api_tool_call/file_content_streamer.py +56 -0
  57. autobyteus/agent/streaming/api_tool_call/json_string_field_extractor.py +175 -0
  58. autobyteus/agent/streaming/api_tool_call_streaming_response_handler.py +4 -0
  59. autobyteus/agent/streaming/events/__init__.py +6 -0
  60. autobyteus/agent/streaming/events/stream_event_payloads.py +284 -0
  61. autobyteus/agent/streaming/events/stream_events.py +141 -0
  62. autobyteus/agent/streaming/handlers/__init__.py +15 -0
  63. autobyteus/agent/streaming/handlers/api_tool_call_streaming_response_handler.py +303 -0
  64. autobyteus/agent/streaming/handlers/parsing_streaming_response_handler.py +107 -0
  65. autobyteus/agent/streaming/handlers/pass_through_streaming_response_handler.py +107 -0
  66. autobyteus/agent/streaming/handlers/streaming_handler_factory.py +177 -0
  67. autobyteus/agent/streaming/handlers/streaming_response_handler.py +58 -0
  68. autobyteus/agent/streaming/parser/__init__.py +61 -0
  69. autobyteus/agent/streaming/parser/event_emitter.py +181 -0
  70. autobyteus/agent/streaming/parser/events.py +4 -0
  71. autobyteus/agent/streaming/parser/invocation_adapter.py +4 -0
  72. autobyteus/agent/streaming/parser/json_parsing_strategies/__init__.py +19 -0
  73. autobyteus/agent/streaming/parser/json_parsing_strategies/base.py +32 -0
  74. autobyteus/agent/streaming/parser/json_parsing_strategies/default.py +34 -0
  75. autobyteus/agent/streaming/parser/json_parsing_strategies/gemini.py +31 -0
  76. autobyteus/agent/streaming/parser/json_parsing_strategies/openai.py +64 -0
  77. autobyteus/agent/streaming/parser/json_parsing_strategies/registry.py +75 -0
  78. autobyteus/agent/streaming/parser/parser_context.py +227 -0
  79. autobyteus/agent/streaming/parser/parser_factory.py +132 -0
  80. autobyteus/agent/streaming/parser/sentinel_format.py +7 -0
  81. autobyteus/agent/streaming/parser/state_factory.py +62 -0
  82. autobyteus/agent/streaming/parser/states/__init__.py +1 -0
  83. autobyteus/agent/streaming/parser/states/base_state.py +60 -0
  84. autobyteus/agent/streaming/parser/states/custom_xml_tag_run_bash_parsing_state.py +38 -0
  85. autobyteus/agent/streaming/parser/states/custom_xml_tag_write_file_parsing_state.py +55 -0
  86. autobyteus/agent/streaming/parser/states/delimited_content_state.py +146 -0
  87. autobyteus/agent/streaming/parser/states/json_initialization_state.py +144 -0
  88. autobyteus/agent/streaming/parser/states/json_tool_parsing_state.py +137 -0
  89. autobyteus/agent/streaming/parser/states/sentinel_content_state.py +30 -0
  90. autobyteus/agent/streaming/parser/states/sentinel_initialization_state.py +117 -0
  91. autobyteus/agent/streaming/parser/states/text_state.py +78 -0
  92. autobyteus/agent/streaming/parser/states/xml_patch_file_tool_parsing_state.py +328 -0
  93. autobyteus/agent/streaming/parser/states/xml_run_bash_tool_parsing_state.py +129 -0
  94. autobyteus/agent/streaming/parser/states/xml_tag_initialization_state.py +151 -0
  95. autobyteus/agent/streaming/parser/states/xml_tool_parsing_state.py +63 -0
  96. autobyteus/agent/streaming/parser/states/xml_write_file_tool_parsing_state.py +343 -0
  97. autobyteus/agent/streaming/parser/strategies/__init__.py +17 -0
  98. autobyteus/agent/streaming/parser/strategies/base.py +24 -0
  99. autobyteus/agent/streaming/parser/strategies/json_tool_strategy.py +26 -0
  100. autobyteus/agent/streaming/parser/strategies/registry.py +28 -0
  101. autobyteus/agent/streaming/parser/strategies/sentinel_strategy.py +23 -0
  102. autobyteus/agent/streaming/parser/strategies/xml_tag_strategy.py +21 -0
  103. autobyteus/agent/streaming/parser/stream_scanner.py +167 -0
  104. autobyteus/agent/streaming/parser/streaming_parser.py +212 -0
  105. autobyteus/agent/streaming/parser/tool_call_parsing.py +4 -0
  106. autobyteus/agent/streaming/parser/tool_constants.py +7 -0
  107. autobyteus/agent/streaming/parser/tool_syntax_registry.py +4 -0
  108. autobyteus/agent/streaming/parser/xml_tool_parsing_state_registry.py +55 -0
  109. autobyteus/agent/streaming/parsing_streaming_response_handler.py +4 -0
  110. autobyteus/agent/streaming/pass_through_streaming_response_handler.py +4 -0
  111. autobyteus/agent/streaming/queue_streamer.py +3 -57
  112. autobyteus/agent/streaming/segments/__init__.py +5 -0
  113. autobyteus/agent/streaming/segments/segment_events.py +81 -0
  114. autobyteus/agent/streaming/stream_event_payloads.py +2 -198
  115. autobyteus/agent/streaming/stream_events.py +3 -128
  116. autobyteus/agent/streaming/streaming_handler_factory.py +4 -0
  117. autobyteus/agent/streaming/streaming_response_handler.py +4 -0
  118. autobyteus/agent/streaming/streams/__init__.py +5 -0
  119. autobyteus/agent/streaming/streams/agent_event_stream.py +197 -0
  120. autobyteus/agent/streaming/utils/__init__.py +5 -0
  121. autobyteus/agent/streaming/utils/queue_streamer.py +59 -0
  122. autobyteus/agent/system_prompt_processor/__init__.py +2 -0
  123. autobyteus/agent/system_prompt_processor/available_skills_processor.py +96 -0
  124. autobyteus/agent/system_prompt_processor/base_processor.py +1 -1
  125. autobyteus/agent/system_prompt_processor/processor_meta.py +15 -2
  126. autobyteus/agent/system_prompt_processor/tool_manifest_injector_processor.py +39 -58
  127. autobyteus/agent/token_budget.py +56 -0
  128. autobyteus/agent/tool_execution_result_processor/memory_ingest_tool_result_processor.py +29 -0
  129. autobyteus/agent/tool_invocation.py +16 -40
  130. autobyteus/agent/tool_invocation_preprocessor/__init__.py +9 -0
  131. autobyteus/agent/tool_invocation_preprocessor/base_preprocessor.py +45 -0
  132. autobyteus/agent/tool_invocation_preprocessor/processor_definition.py +15 -0
  133. autobyteus/agent/tool_invocation_preprocessor/processor_meta.py +33 -0
  134. autobyteus/agent/tool_invocation_preprocessor/processor_registry.py +60 -0
  135. autobyteus/agent/utils/wait_for_idle.py +12 -14
  136. autobyteus/agent/workspace/base_workspace.py +6 -27
  137. autobyteus/agent_team/agent_team.py +3 -3
  138. autobyteus/agent_team/agent_team_builder.py +1 -41
  139. autobyteus/agent_team/bootstrap_steps/__init__.py +0 -4
  140. autobyteus/agent_team/bootstrap_steps/agent_configuration_preparation_step.py +8 -18
  141. autobyteus/agent_team/bootstrap_steps/agent_team_bootstrapper.py +4 -16
  142. autobyteus/agent_team/bootstrap_steps/base_agent_team_bootstrap_step.py +1 -2
  143. autobyteus/agent_team/bootstrap_steps/coordinator_initialization_step.py +1 -2
  144. autobyteus/agent_team/bootstrap_steps/task_notifier_initialization_step.py +5 -6
  145. autobyteus/agent_team/bootstrap_steps/team_context_initialization_step.py +15 -15
  146. autobyteus/agent_team/context/agent_team_config.py +6 -3
  147. autobyteus/agent_team/context/agent_team_context.py +25 -3
  148. autobyteus/agent_team/context/agent_team_runtime_state.py +11 -8
  149. autobyteus/agent_team/events/__init__.py +11 -0
  150. autobyteus/agent_team/events/agent_team_event_dispatcher.py +22 -9
  151. autobyteus/agent_team/events/agent_team_events.py +16 -0
  152. autobyteus/agent_team/events/event_store.py +57 -0
  153. autobyteus/agent_team/factory/agent_team_factory.py +8 -0
  154. autobyteus/agent_team/handlers/inter_agent_message_request_event_handler.py +18 -2
  155. autobyteus/agent_team/handlers/lifecycle_agent_team_event_handler.py +21 -5
  156. autobyteus/agent_team/handlers/process_user_message_event_handler.py +17 -8
  157. autobyteus/agent_team/handlers/tool_approval_team_event_handler.py +19 -4
  158. autobyteus/agent_team/runtime/agent_team_runtime.py +41 -10
  159. autobyteus/agent_team/runtime/agent_team_worker.py +69 -5
  160. autobyteus/agent_team/status/__init__.py +14 -0
  161. autobyteus/agent_team/status/agent_team_status.py +18 -0
  162. autobyteus/agent_team/status/agent_team_status_manager.py +33 -0
  163. autobyteus/agent_team/status/status_deriver.py +62 -0
  164. autobyteus/agent_team/status/status_update_utils.py +42 -0
  165. autobyteus/agent_team/streaming/__init__.py +2 -2
  166. autobyteus/agent_team/streaming/agent_team_event_notifier.py +10 -10
  167. autobyteus/agent_team/streaming/agent_team_stream_event_payloads.py +7 -7
  168. autobyteus/agent_team/streaming/agent_team_stream_events.py +11 -11
  169. autobyteus/agent_team/system_prompt_processor/__init__.py +6 -0
  170. autobyteus/agent_team/system_prompt_processor/team_manifest_injector_processor.py +76 -0
  171. autobyteus/agent_team/task_notification/activation_policy.py +1 -1
  172. autobyteus/agent_team/task_notification/system_event_driven_agent_task_notifier.py +22 -22
  173. autobyteus/agent_team/task_notification/task_notification_mode.py +20 -1
  174. autobyteus/agent_team/utils/wait_for_idle.py +4 -4
  175. autobyteus/cli/agent_cli.py +18 -10
  176. autobyteus/cli/agent_team_tui/app.py +18 -15
  177. autobyteus/cli/agent_team_tui/state.py +21 -23
  178. autobyteus/cli/agent_team_tui/widgets/agent_list_sidebar.py +15 -15
  179. autobyteus/cli/agent_team_tui/widgets/focus_pane.py +146 -39
  180. autobyteus/cli/agent_team_tui/widgets/renderables.py +1 -1
  181. autobyteus/cli/agent_team_tui/widgets/shared.py +26 -26
  182. autobyteus/cli/agent_team_tui/widgets/{task_board_panel.py → task_plan_panel.py} +5 -5
  183. autobyteus/cli/cli_display.py +193 -44
  184. autobyteus/cli/workflow_tui/app.py +9 -10
  185. autobyteus/cli/workflow_tui/state.py +14 -16
  186. autobyteus/cli/workflow_tui/widgets/agent_list_sidebar.py +15 -15
  187. autobyteus/cli/workflow_tui/widgets/focus_pane.py +137 -35
  188. autobyteus/cli/workflow_tui/widgets/renderables.py +1 -1
  189. autobyteus/cli/workflow_tui/widgets/shared.py +25 -25
  190. autobyteus/clients/autobyteus_client.py +94 -1
  191. autobyteus/events/event_types.py +15 -21
  192. autobyteus/llm/api/autobyteus_llm.py +33 -29
  193. autobyteus/llm/api/claude_llm.py +142 -36
  194. autobyteus/llm/api/gemini_llm.py +163 -59
  195. autobyteus/llm/api/grok_llm.py +1 -1
  196. autobyteus/llm/api/minimax_llm.py +26 -0
  197. autobyteus/llm/api/mistral_llm.py +113 -87
  198. autobyteus/llm/api/ollama_llm.py +9 -42
  199. autobyteus/llm/api/openai_compatible_llm.py +127 -91
  200. autobyteus/llm/api/openai_llm.py +3 -3
  201. autobyteus/llm/api/openai_responses_llm.py +324 -0
  202. autobyteus/llm/api/zhipu_llm.py +21 -2
  203. autobyteus/llm/autobyteus_provider.py +70 -60
  204. autobyteus/llm/base_llm.py +85 -81
  205. autobyteus/llm/converters/__init__.py +14 -0
  206. autobyteus/llm/converters/anthropic_tool_call_converter.py +37 -0
  207. autobyteus/llm/converters/gemini_tool_call_converter.py +57 -0
  208. autobyteus/llm/converters/mistral_tool_call_converter.py +37 -0
  209. autobyteus/llm/converters/openai_tool_call_converter.py +38 -0
  210. autobyteus/llm/extensions/base_extension.py +6 -12
  211. autobyteus/llm/extensions/token_usage_tracking_extension.py +45 -18
  212. autobyteus/llm/llm_factory.py +282 -204
  213. autobyteus/llm/lmstudio_provider.py +60 -49
  214. autobyteus/llm/models.py +35 -2
  215. autobyteus/llm/ollama_provider.py +60 -49
  216. autobyteus/llm/ollama_provider_resolver.py +0 -1
  217. autobyteus/llm/prompt_renderers/__init__.py +19 -0
  218. autobyteus/llm/prompt_renderers/anthropic_prompt_renderer.py +104 -0
  219. autobyteus/llm/prompt_renderers/autobyteus_prompt_renderer.py +19 -0
  220. autobyteus/llm/prompt_renderers/base_prompt_renderer.py +10 -0
  221. autobyteus/llm/prompt_renderers/gemini_prompt_renderer.py +63 -0
  222. autobyteus/llm/prompt_renderers/mistral_prompt_renderer.py +87 -0
  223. autobyteus/llm/prompt_renderers/ollama_prompt_renderer.py +51 -0
  224. autobyteus/llm/prompt_renderers/openai_chat_renderer.py +97 -0
  225. autobyteus/llm/prompt_renderers/openai_responses_renderer.py +101 -0
  226. autobyteus/llm/providers.py +1 -3
  227. autobyteus/llm/token_counter/claude_token_counter.py +56 -25
  228. autobyteus/llm/token_counter/mistral_token_counter.py +12 -8
  229. autobyteus/llm/token_counter/openai_token_counter.py +24 -5
  230. autobyteus/llm/token_counter/token_counter_factory.py +12 -5
  231. autobyteus/llm/utils/llm_config.py +6 -12
  232. autobyteus/llm/utils/media_payload_formatter.py +27 -20
  233. autobyteus/llm/utils/messages.py +55 -3
  234. autobyteus/llm/utils/response_types.py +3 -0
  235. autobyteus/llm/utils/tool_call_delta.py +31 -0
  236. autobyteus/memory/__init__.py +32 -0
  237. autobyteus/memory/active_transcript.py +69 -0
  238. autobyteus/memory/compaction/__init__.py +9 -0
  239. autobyteus/memory/compaction/compaction_result.py +8 -0
  240. autobyteus/memory/compaction/compactor.py +89 -0
  241. autobyteus/memory/compaction/summarizer.py +11 -0
  242. autobyteus/memory/compaction_snapshot_builder.py +84 -0
  243. autobyteus/memory/memory_manager.py +183 -0
  244. autobyteus/memory/models/__init__.py +14 -0
  245. autobyteus/memory/models/episodic_item.py +41 -0
  246. autobyteus/memory/models/memory_types.py +7 -0
  247. autobyteus/memory/models/raw_trace_item.py +79 -0
  248. autobyteus/memory/models/semantic_item.py +41 -0
  249. autobyteus/memory/models/tool_interaction.py +20 -0
  250. autobyteus/memory/policies/__init__.py +5 -0
  251. autobyteus/memory/policies/compaction_policy.py +16 -0
  252. autobyteus/memory/retrieval/__init__.py +7 -0
  253. autobyteus/memory/retrieval/memory_bundle.py +11 -0
  254. autobyteus/memory/retrieval/retriever.py +13 -0
  255. autobyteus/memory/store/__init__.py +7 -0
  256. autobyteus/memory/store/base_store.py +14 -0
  257. autobyteus/memory/store/file_store.py +98 -0
  258. autobyteus/memory/tool_interaction_builder.py +46 -0
  259. autobyteus/memory/turn_tracker.py +9 -0
  260. autobyteus/multimedia/audio/api/__init__.py +3 -2
  261. autobyteus/multimedia/audio/api/autobyteus_audio_client.py +19 -5
  262. autobyteus/multimedia/audio/api/gemini_audio_client.py +108 -16
  263. autobyteus/multimedia/audio/api/openai_audio_client.py +112 -0
  264. autobyteus/multimedia/audio/audio_client_factory.py +84 -9
  265. autobyteus/multimedia/audio/audio_model.py +2 -1
  266. autobyteus/multimedia/image/api/autobyteus_image_client.py +19 -5
  267. autobyteus/multimedia/image/api/gemini_image_client.py +38 -17
  268. autobyteus/multimedia/image/api/openai_image_client.py +125 -43
  269. autobyteus/multimedia/image/autobyteus_image_provider.py +2 -1
  270. autobyteus/multimedia/image/image_client_factory.py +47 -15
  271. autobyteus/multimedia/image/image_model.py +5 -2
  272. autobyteus/multimedia/providers.py +3 -2
  273. autobyteus/skills/loader.py +71 -0
  274. autobyteus/skills/model.py +11 -0
  275. autobyteus/skills/registry.py +70 -0
  276. autobyteus/task_management/__init__.py +43 -20
  277. autobyteus/task_management/{base_task_board.py → base_task_plan.py} +16 -13
  278. autobyteus/task_management/converters/__init__.py +2 -2
  279. autobyteus/task_management/converters/{task_board_converter.py → task_plan_converter.py} +13 -13
  280. autobyteus/task_management/events.py +7 -7
  281. autobyteus/task_management/{in_memory_task_board.py → in_memory_task_plan.py} +34 -22
  282. autobyteus/task_management/schemas/__init__.py +3 -0
  283. autobyteus/task_management/schemas/task_status_report.py +2 -2
  284. autobyteus/task_management/schemas/todo_definition.py +15 -0
  285. autobyteus/task_management/todo.py +29 -0
  286. autobyteus/task_management/todo_list.py +75 -0
  287. autobyteus/task_management/tools/__init__.py +24 -8
  288. autobyteus/task_management/tools/task_tools/__init__.py +19 -0
  289. autobyteus/task_management/tools/{assign_task_to.py → task_tools/assign_task_to.py} +18 -18
  290. autobyteus/task_management/tools/{publish_task.py → task_tools/create_task.py} +16 -18
  291. autobyteus/task_management/tools/{publish_tasks.py → task_tools/create_tasks.py} +19 -19
  292. autobyteus/task_management/tools/{get_my_tasks.py → task_tools/get_my_tasks.py} +15 -15
  293. autobyteus/task_management/tools/{get_task_board_status.py → task_tools/get_task_plan_status.py} +16 -16
  294. autobyteus/task_management/tools/{update_task_status.py → task_tools/update_task_status.py} +16 -16
  295. autobyteus/task_management/tools/todo_tools/__init__.py +18 -0
  296. autobyteus/task_management/tools/todo_tools/add_todo.py +78 -0
  297. autobyteus/task_management/tools/todo_tools/create_todo_list.py +79 -0
  298. autobyteus/task_management/tools/todo_tools/get_todo_list.py +55 -0
  299. autobyteus/task_management/tools/todo_tools/update_todo_status.py +85 -0
  300. autobyteus/tools/__init__.py +43 -52
  301. autobyteus/tools/base_tool.py +7 -0
  302. autobyteus/tools/file/__init__.py +9 -0
  303. autobyteus/tools/file/patch_file.py +149 -0
  304. autobyteus/tools/file/{file_reader.py → read_file.py} +38 -7
  305. autobyteus/tools/file/{file_writer.py → write_file.py} +7 -4
  306. autobyteus/tools/functional_tool.py +53 -14
  307. autobyteus/tools/mcp/__init__.py +2 -0
  308. autobyteus/tools/mcp/config_service.py +5 -1
  309. autobyteus/tools/mcp/server/__init__.py +2 -0
  310. autobyteus/tools/mcp/server/http_managed_mcp_server.py +1 -1
  311. autobyteus/tools/mcp/server/websocket_managed_mcp_server.py +141 -0
  312. autobyteus/tools/mcp/server_instance_manager.py +8 -1
  313. autobyteus/tools/mcp/tool.py +3 -3
  314. autobyteus/tools/mcp/tool_registrar.py +5 -2
  315. autobyteus/tools/mcp/types.py +61 -0
  316. autobyteus/tools/multimedia/__init__.py +2 -1
  317. autobyteus/tools/multimedia/audio_tools.py +72 -19
  318. autobyteus/tools/{download_media_tool.py → multimedia/download_media_tool.py} +21 -7
  319. autobyteus/tools/multimedia/image_tools.py +248 -64
  320. autobyteus/tools/multimedia/media_reader_tool.py +1 -1
  321. autobyteus/tools/operation_executor/journal_manager.py +107 -0
  322. autobyteus/tools/operation_executor/operation_event_buffer.py +57 -0
  323. autobyteus/tools/operation_executor/operation_event_producer.py +29 -0
  324. autobyteus/tools/operation_executor/operation_executor.py +58 -0
  325. autobyteus/tools/registry/tool_definition.py +108 -14
  326. autobyteus/tools/registry/tool_registry.py +29 -0
  327. autobyteus/tools/search/__init__.py +17 -0
  328. autobyteus/tools/search/base_strategy.py +35 -0
  329. autobyteus/tools/search/client.py +24 -0
  330. autobyteus/tools/search/factory.py +81 -0
  331. autobyteus/tools/search/google_cse_strategy.py +68 -0
  332. autobyteus/tools/search/providers.py +10 -0
  333. autobyteus/tools/search/serpapi_strategy.py +65 -0
  334. autobyteus/tools/search/serper_strategy.py +87 -0
  335. autobyteus/tools/search_tool.py +83 -0
  336. autobyteus/tools/skill/load_skill.py +50 -0
  337. autobyteus/tools/terminal/__init__.py +45 -0
  338. autobyteus/tools/terminal/ansi_utils.py +32 -0
  339. autobyteus/tools/terminal/background_process_manager.py +233 -0
  340. autobyteus/tools/terminal/output_buffer.py +105 -0
  341. autobyteus/tools/terminal/prompt_detector.py +63 -0
  342. autobyteus/tools/terminal/pty_session.py +241 -0
  343. autobyteus/tools/terminal/session_factory.py +20 -0
  344. autobyteus/tools/terminal/terminal_session_manager.py +226 -0
  345. autobyteus/tools/terminal/tools/__init__.py +13 -0
  346. autobyteus/tools/terminal/tools/get_process_output.py +81 -0
  347. autobyteus/tools/terminal/tools/run_bash.py +109 -0
  348. autobyteus/tools/terminal/tools/start_background_process.py +104 -0
  349. autobyteus/tools/terminal/tools/stop_background_process.py +67 -0
  350. autobyteus/tools/terminal/types.py +54 -0
  351. autobyteus/tools/terminal/wsl_tmux_session.py +221 -0
  352. autobyteus/tools/terminal/wsl_utils.py +156 -0
  353. autobyteus/tools/tool_meta.py +4 -24
  354. autobyteus/tools/transaction_management/backup_handler.py +48 -0
  355. autobyteus/tools/transaction_management/operation_lifecycle_manager.py +62 -0
  356. autobyteus/tools/usage/__init__.py +1 -2
  357. autobyteus/tools/usage/formatters/__init__.py +17 -1
  358. autobyteus/tools/usage/formatters/base_formatter.py +8 -0
  359. autobyteus/tools/usage/formatters/default_xml_schema_formatter.py +2 -2
  360. autobyteus/tools/usage/formatters/mistral_json_schema_formatter.py +18 -0
  361. autobyteus/tools/usage/formatters/patch_file_xml_example_formatter.py +64 -0
  362. autobyteus/tools/usage/formatters/patch_file_xml_schema_formatter.py +31 -0
  363. autobyteus/tools/usage/formatters/run_bash_xml_example_formatter.py +32 -0
  364. autobyteus/tools/usage/formatters/run_bash_xml_schema_formatter.py +36 -0
  365. autobyteus/tools/usage/formatters/write_file_xml_example_formatter.py +53 -0
  366. autobyteus/tools/usage/formatters/write_file_xml_schema_formatter.py +31 -0
  367. autobyteus/tools/usage/providers/tool_manifest_provider.py +10 -10
  368. autobyteus/tools/usage/registries/__init__.py +1 -3
  369. autobyteus/tools/usage/registries/tool_formatting_registry.py +115 -8
  370. autobyteus/tools/usage/tool_schema_provider.py +51 -0
  371. autobyteus/tools/web/__init__.py +4 -0
  372. autobyteus/tools/web/read_url_tool.py +80 -0
  373. autobyteus/utils/diff_utils.py +271 -0
  374. autobyteus/utils/download_utils.py +109 -0
  375. autobyteus/utils/file_utils.py +57 -2
  376. autobyteus/utils/gemini_helper.py +56 -0
  377. autobyteus/utils/gemini_model_mapping.py +71 -0
  378. autobyteus/utils/llm_output_formatter.py +75 -0
  379. autobyteus/utils/tool_call_format.py +36 -0
  380. autobyteus/workflow/agentic_workflow.py +3 -3
  381. autobyteus/workflow/bootstrap_steps/agent_tool_injection_step.py +2 -2
  382. autobyteus/workflow/bootstrap_steps/base_workflow_bootstrap_step.py +2 -2
  383. autobyteus/workflow/bootstrap_steps/coordinator_initialization_step.py +2 -2
  384. autobyteus/workflow/bootstrap_steps/coordinator_prompt_preparation_step.py +4 -11
  385. autobyteus/workflow/bootstrap_steps/workflow_bootstrapper.py +6 -6
  386. autobyteus/workflow/bootstrap_steps/workflow_runtime_queue_initialization_step.py +2 -2
  387. autobyteus/workflow/context/workflow_context.py +3 -3
  388. autobyteus/workflow/context/workflow_runtime_state.py +5 -5
  389. autobyteus/workflow/events/workflow_event_dispatcher.py +5 -5
  390. autobyteus/workflow/handlers/lifecycle_workflow_event_handler.py +3 -3
  391. autobyteus/workflow/handlers/process_user_message_event_handler.py +5 -5
  392. autobyteus/workflow/handlers/tool_approval_workflow_event_handler.py +2 -2
  393. autobyteus/workflow/runtime/workflow_runtime.py +8 -8
  394. autobyteus/workflow/runtime/workflow_worker.py +3 -3
  395. autobyteus/workflow/status/__init__.py +11 -0
  396. autobyteus/workflow/status/workflow_status.py +19 -0
  397. autobyteus/workflow/status/workflow_status_manager.py +48 -0
  398. autobyteus/workflow/streaming/__init__.py +2 -2
  399. autobyteus/workflow/streaming/workflow_event_notifier.py +7 -7
  400. autobyteus/workflow/streaming/workflow_stream_event_payloads.py +4 -4
  401. autobyteus/workflow/streaming/workflow_stream_events.py +3 -3
  402. autobyteus/workflow/utils/wait_for_idle.py +4 -4
  403. autobyteus-1.2.3.dist-info/METADATA +293 -0
  404. autobyteus-1.2.3.dist-info/RECORD +600 -0
  405. {autobyteus-1.2.0.dist-info → autobyteus-1.2.3.dist-info}/WHEEL +1 -1
  406. {autobyteus-1.2.0.dist-info → autobyteus-1.2.3.dist-info}/top_level.txt +0 -1
  407. autobyteus/agent/bootstrap_steps/agent_runtime_queue_initialization_step.py +0 -57
  408. autobyteus/agent/hooks/__init__.py +0 -16
  409. autobyteus/agent/hooks/base_phase_hook.py +0 -78
  410. autobyteus/agent/hooks/hook_definition.py +0 -36
  411. autobyteus/agent/hooks/hook_meta.py +0 -37
  412. autobyteus/agent/hooks/hook_registry.py +0 -106
  413. autobyteus/agent/llm_response_processor/provider_aware_tool_usage_processor.py +0 -103
  414. autobyteus/agent/phases/__init__.py +0 -18
  415. autobyteus/agent/phases/discover.py +0 -53
  416. autobyteus/agent/phases/manager.py +0 -265
  417. autobyteus/agent/phases/transition_decorator.py +0 -40
  418. autobyteus/agent/phases/transition_info.py +0 -33
  419. autobyteus/agent/remote_agent.py +0 -244
  420. autobyteus/agent/workspace/workspace_definition.py +0 -36
  421. autobyteus/agent/workspace/workspace_meta.py +0 -37
  422. autobyteus/agent/workspace/workspace_registry.py +0 -72
  423. autobyteus/agent_team/bootstrap_steps/agent_team_runtime_queue_initialization_step.py +0 -25
  424. autobyteus/agent_team/bootstrap_steps/coordinator_prompt_preparation_step.py +0 -85
  425. autobyteus/agent_team/phases/__init__.py +0 -11
  426. autobyteus/agent_team/phases/agent_team_operational_phase.py +0 -19
  427. autobyteus/agent_team/phases/agent_team_phase_manager.py +0 -48
  428. autobyteus/llm/api/bedrock_llm.py +0 -92
  429. autobyteus/llm/api/groq_llm.py +0 -94
  430. autobyteus/llm/api/nvidia_llm.py +0 -108
  431. autobyteus/llm/utils/token_pricing_config.py +0 -87
  432. autobyteus/person/examples/sample_persons.py +0 -14
  433. autobyteus/person/examples/sample_roles.py +0 -14
  434. autobyteus/person/person.py +0 -29
  435. autobyteus/person/role.py +0 -14
  436. autobyteus/rpc/__init__.py +0 -73
  437. autobyteus/rpc/client/__init__.py +0 -17
  438. autobyteus/rpc/client/abstract_client_connection.py +0 -124
  439. autobyteus/rpc/client/client_connection_manager.py +0 -153
  440. autobyteus/rpc/client/sse_client_connection.py +0 -306
  441. autobyteus/rpc/client/stdio_client_connection.py +0 -280
  442. autobyteus/rpc/config/__init__.py +0 -13
  443. autobyteus/rpc/config/agent_server_config.py +0 -153
  444. autobyteus/rpc/config/agent_server_registry.py +0 -152
  445. autobyteus/rpc/hosting.py +0 -244
  446. autobyteus/rpc/protocol.py +0 -244
  447. autobyteus/rpc/server/__init__.py +0 -20
  448. autobyteus/rpc/server/agent_server_endpoint.py +0 -181
  449. autobyteus/rpc/server/base_method_handler.py +0 -40
  450. autobyteus/rpc/server/method_handlers.py +0 -259
  451. autobyteus/rpc/server/sse_server_handler.py +0 -182
  452. autobyteus/rpc/server/stdio_server_handler.py +0 -151
  453. autobyteus/rpc/server_main.py +0 -198
  454. autobyteus/rpc/transport_type.py +0 -13
  455. autobyteus/tools/bash/__init__.py +0 -2
  456. autobyteus/tools/bash/bash_executor.py +0 -100
  457. autobyteus/tools/browser/__init__.py +0 -2
  458. autobyteus/tools/browser/session_aware/__init__.py +0 -0
  459. autobyteus/tools/browser/session_aware/browser_session_aware_navigate_to.py +0 -75
  460. autobyteus/tools/browser/session_aware/browser_session_aware_tool.py +0 -30
  461. autobyteus/tools/browser/session_aware/browser_session_aware_web_element_trigger.py +0 -154
  462. autobyteus/tools/browser/session_aware/browser_session_aware_webpage_reader.py +0 -89
  463. autobyteus/tools/browser/session_aware/browser_session_aware_webpage_screenshot_taker.py +0 -107
  464. autobyteus/tools/browser/session_aware/factory/__init__.py +0 -0
  465. autobyteus/tools/browser/session_aware/factory/browser_session_aware_web_element_trigger_factory.py +0 -14
  466. autobyteus/tools/browser/session_aware/factory/browser_session_aware_webpage_reader_factory.py +0 -26
  467. autobyteus/tools/browser/session_aware/factory/browser_session_aware_webpage_screenshot_taker_factory.py +0 -14
  468. autobyteus/tools/browser/session_aware/shared_browser_session.py +0 -11
  469. autobyteus/tools/browser/session_aware/shared_browser_session_manager.py +0 -25
  470. autobyteus/tools/browser/session_aware/web_element_action.py +0 -20
  471. autobyteus/tools/browser/standalone/__init__.py +0 -6
  472. autobyteus/tools/browser/standalone/factory/__init__.py +0 -0
  473. autobyteus/tools/browser/standalone/factory/webpage_reader_factory.py +0 -25
  474. autobyteus/tools/browser/standalone/factory/webpage_screenshot_taker_factory.py +0 -14
  475. autobyteus/tools/browser/standalone/navigate_to.py +0 -80
  476. autobyteus/tools/browser/standalone/web_page_pdf_generator.py +0 -97
  477. autobyteus/tools/browser/standalone/webpage_image_downloader.py +0 -165
  478. autobyteus/tools/browser/standalone/webpage_reader.py +0 -101
  479. autobyteus/tools/browser/standalone/webpage_screenshot_taker.py +0 -101
  480. autobyteus/tools/file/file_editor.py +0 -200
  481. autobyteus/tools/google_search.py +0 -149
  482. autobyteus/tools/timer.py +0 -171
  483. autobyteus/tools/usage/parsers/__init__.py +0 -22
  484. autobyteus/tools/usage/parsers/_json_extractor.py +0 -99
  485. autobyteus/tools/usage/parsers/_string_decoders.py +0 -18
  486. autobyteus/tools/usage/parsers/anthropic_xml_tool_usage_parser.py +0 -10
  487. autobyteus/tools/usage/parsers/base_parser.py +0 -41
  488. autobyteus/tools/usage/parsers/default_json_tool_usage_parser.py +0 -83
  489. autobyteus/tools/usage/parsers/default_xml_tool_usage_parser.py +0 -316
  490. autobyteus/tools/usage/parsers/exceptions.py +0 -13
  491. autobyteus/tools/usage/parsers/gemini_json_tool_usage_parser.py +0 -77
  492. autobyteus/tools/usage/parsers/openai_json_tool_usage_parser.py +0 -149
  493. autobyteus/tools/usage/parsers/provider_aware_tool_usage_parser.py +0 -59
  494. autobyteus/tools/usage/registries/tool_usage_parser_registry.py +0 -62
  495. autobyteus/workflow/phases/__init__.py +0 -11
  496. autobyteus/workflow/phases/workflow_operational_phase.py +0 -19
  497. autobyteus/workflow/phases/workflow_phase_manager.py +0 -48
  498. autobyteus-1.2.0.dist-info/METADATA +0 -205
  499. autobyteus-1.2.0.dist-info/RECORD +0 -496
  500. examples/__init__.py +0 -1
  501. examples/agent_team/__init__.py +0 -1
  502. examples/discover_phase_transitions.py +0 -104
  503. examples/run_browser_agent.py +0 -262
  504. examples/run_google_slides_agent.py +0 -287
  505. examples/run_mcp_browser_client.py +0 -174
  506. examples/run_mcp_google_slides_client.py +0 -270
  507. examples/run_mcp_list_tools.py +0 -189
  508. examples/run_poem_writer.py +0 -284
  509. examples/run_sqlite_agent.py +0 -295
  510. /autobyteus/{person → skills}/__init__.py +0 -0
  511. /autobyteus/{person/examples → tools/skill}/__init__.py +0 -0
  512. {autobyteus-1.2.0.dist-info → autobyteus-1.2.3.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,112 @@
1
+ import asyncio
2
+ import logging
3
+ import os
4
+ import uuid
5
+ from pathlib import Path
6
+ from typing import Optional, Dict, Any, TYPE_CHECKING
7
+
8
+ from openai import OpenAI
9
+
10
+ from autobyteus.multimedia.audio.base_audio_client import BaseAudioClient
11
+ from autobyteus.multimedia.utils.response_types import SpeechGenerationResponse
12
+
13
+ if TYPE_CHECKING:
14
+ from autobyteus.multimedia.audio.audio_model import AudioModel
15
+ from autobyteus.multimedia.utils.multimedia_config import MultimediaConfig
16
+
17
+ logger = logging.getLogger(__name__)
18
+
19
+ _AUDIO_TEMP_DIR = Path("/tmp/autobyteus_audio")
20
+
21
+
22
+ def _save_audio_bytes(audio_bytes: bytes, file_extension: Optional[str]) -> str:
23
+ """Saves audio bytes to disk with a random file name."""
24
+ _AUDIO_TEMP_DIR.mkdir(parents=True, exist_ok=True)
25
+ suffix = (file_extension or "mp3").lstrip(".")
26
+ file_path = _AUDIO_TEMP_DIR / f"{uuid.uuid4()}.{suffix}"
27
+ file_path.write_bytes(audio_bytes)
28
+ logger.info(f"Successfully saved generated audio to {file_path}")
29
+ return str(file_path)
30
+
31
+
32
+ class OpenAIAudioClient(BaseAudioClient):
33
+ """
34
+ An audio client that uses OpenAI's Text-to-Speech (Speech) API.
35
+
36
+ **Setup Requirements:**
37
+ 1. Set the `OPENAI_API_KEY` environment variable with your OpenAI API key.
38
+ """
39
+
40
+ def __init__(self, model: "AudioModel", config: "MultimediaConfig"):
41
+ super().__init__(model, config)
42
+ api_key = os.getenv("OPENAI_API_KEY")
43
+ if not api_key:
44
+ logger.error("OPENAI_API_KEY environment variable is not set.")
45
+ raise ValueError("OPENAI_API_KEY environment variable is not set.")
46
+
47
+ try:
48
+ self.client = OpenAI(api_key=api_key, base_url="https://api.openai.com/v1")
49
+ logger.info(f"OpenAIAudioClient initialized for model '{self.model.name}'.")
50
+ except Exception as exc:
51
+ logger.error(f"Failed to configure OpenAI client: {exc}")
52
+ raise RuntimeError(f"Failed to configure OpenAI client: {exc}") from exc
53
+
54
+ async def generate_speech(
55
+ self,
56
+ prompt: str,
57
+ generation_config: Optional[Dict[str, Any]] = None,
58
+ **kwargs
59
+ ) -> SpeechGenerationResponse:
60
+ """
61
+ Generates speech using OpenAI's Speech endpoint and returns a local file path.
62
+ """
63
+ try:
64
+ final_config = self.config.to_dict().copy()
65
+ if generation_config:
66
+ final_config.update(generation_config)
67
+
68
+ voice = final_config.get("voice", "alloy")
69
+ response_format = (
70
+ final_config.get("response_format")
71
+ or final_config.get("format")
72
+ or "mp3"
73
+ )
74
+ instructions = final_config.get("instructions")
75
+
76
+ logger.info(
77
+ "Generating speech with OpenAI TTS model '%s' using voice '%s' and format '%s'.",
78
+ self.model.value,
79
+ voice,
80
+ response_format,
81
+ )
82
+
83
+ request_kwargs = {
84
+ "model": self.model.value,
85
+ "voice": voice,
86
+ "input": prompt,
87
+ }
88
+
89
+ if instructions:
90
+ request_kwargs["instructions"] = instructions
91
+
92
+ if response_format:
93
+ request_kwargs["response_format"] = response_format
94
+
95
+ response = await asyncio.to_thread(
96
+ self.client.audio.speech.create,
97
+ **request_kwargs,
98
+ )
99
+
100
+ audio_bytes = getattr(response, "content", None)
101
+ if not audio_bytes:
102
+ raise ValueError("OpenAI Speech API returned an empty response.")
103
+
104
+ audio_path = _save_audio_bytes(audio_bytes, response_format)
105
+ return SpeechGenerationResponse(audio_urls=[audio_path])
106
+
107
+ except Exception as exc:
108
+ logger.error("Error during OpenAI speech generation: %s", exc)
109
+ raise ValueError(f"OpenAI speech generation failed: {exc}") from exc
110
+
111
+ async def cleanup(self):
112
+ logger.debug("OpenAIAudioClient cleanup called.")
@@ -5,6 +5,7 @@ from autobyteus.multimedia.audio.base_audio_client import BaseAudioClient
5
5
  from autobyteus.multimedia.audio.audio_model import AudioModel
6
6
  from autobyteus.multimedia.providers import MultimediaProvider
7
7
  from autobyteus.multimedia.audio.api.gemini_audio_client import GeminiAudioClient
8
+ from autobyteus.multimedia.audio.api.openai_audio_client import OpenAIAudioClient
8
9
  from autobyteus.multimedia.audio.autobyteus_audio_provider import AutobyteusAudioModelProvider
9
10
  from autobyteus.multimedia.utils.multimedia_config import MultimediaConfig
10
11
  from autobyteus.utils.singleton import SingletonMeta
@@ -12,12 +13,55 @@ from autobyteus.utils.parameter_schema import ParameterSchema, ParameterDefiniti
12
13
 
13
14
  logger = logging.getLogger(__name__)
14
15
 
15
- GEMINI_TTS_VOICES = [
16
- "Zephyr", "Puck", "Charon", "Kore", "Fenrir", "Leda", "Orus", "Aoede",
17
- "Callirrhoe", "Autonoe", "Enceladus", "Iapetus", "Umbriel", "Algieba",
18
- "Despina", "Erinome", "Algenib", "Rasalgethi", "Laomedeia", "Achernar",
19
- "Alnilam", "Schedar", "Gacrux", "Pulcherrima", "Achird", "Zubenelgenubi",
20
- "Vindemiatrix", "Sadachbia", "Sadaltager", "Sulafat"
16
+ # Enhanced metadata for Google Gemini TTS voices, including gender and description.
17
+ GEMINI_VOICE_DETAILS = {
18
+ "Zephyr": {"gender": "female", "description": "Bright, Higher pitch"},
19
+ "Puck": {"gender": "male", "description": "Upbeat, Middle pitch"},
20
+ "Charon": {"gender": "male", "description": "Informative, Lower pitch"},
21
+ "Kore": {"gender": "female", "description": "Firm, Middle pitch"},
22
+ "Fenrir": {"gender": "male", "description": "Excitable, Lower middle pitch"},
23
+ "Leda": {"gender": "female", "description": "Youthful, Higher pitch"},
24
+ "Orus": {"gender": "male", "description": "Firm, Lower middle pitch"},
25
+ "Aoede": {"gender": "female", "description": "Breezy, Middle pitch"},
26
+ "Callirrhoe": {"gender": "female", "description": "Easy-going, Middle pitch"},
27
+ "Autonoe": {"gender": "female", "description": "Bright, Middle pitch"},
28
+ "Enceladus": {"gender": "male", "description": "Breathy, Lower pitch"},
29
+ "Iapetus": {"gender": "male", "description": "Clear, Lower middle pitch"},
30
+ "Umbriel": {"gender": "male", "description": "Easy-going, Lower middle pitch"},
31
+ "Algieba": {"gender": "male", "description": "Smooth, Lower pitch"},
32
+ "Despina": {"gender": "female", "description": "Smooth, Middle pitch"},
33
+ "Erinome": {"gender": "female", "description": "Clear, Middle pitch"},
34
+ "Algenib": {"gender": "male", "description": "Gravelly, Lower pitch"},
35
+ "Rasalgethi": {"gender": "male", "description": "Informative, Middle pitch"},
36
+ "Laomedeia": {"gender": "female", "description": "Upbeat, Higher pitch"},
37
+ "Achernar": {"gender": "female", "description": "Soft, Higher pitch"},
38
+ "Alnilam": {"gender": "male", "description": "Firm, Lower middle pitch"},
39
+ "Schedar": {"gender": "male", "description": "Even, Lower middle pitch"},
40
+ "Gacrux": {"gender": "female", "description": "Mature, Middle pitch"},
41
+ "Pulcherrima": {"gender": "female", "description": "Forward, Middle pitch"},
42
+ "Achird": {"gender": "male", "description": "Friendly, Lower middle pitch"},
43
+ "Zubenelgenubi": {"gender": "male", "description": "Casual, Lower middle pitch"},
44
+ "Vindemiatrix": {"gender": "female", "description": "Gentle, Middle pitch"},
45
+ "Sadachbia": {"gender": "male", "description": "Lively, Lower pitch"},
46
+ "Sadaltager": {"gender": "male", "description": "Knowledgeable, Middle pitch"},
47
+ "Sulafat": {"gender": "female", "description": "Warm, Middle pitch"},
48
+ }
49
+
50
+ # The list of voice names, derived from the keys of the details dictionary.
51
+ # This is used for the `enum_values` to maintain compatibility.
52
+ GEMINI_TTS_VOICES = list(GEMINI_VOICE_DETAILS.keys())
53
+
54
+ # Generate a formatted string of voice metadata to be appended to parameter descriptions.
55
+ _voice_descriptions_list = [
56
+ f"- {name} ({details['gender']}): {details['description']}"
57
+ for name, details in GEMINI_VOICE_DETAILS.items()
58
+ ]
59
+ GEMINI_VOICE_METADATA_DESC = "\n\nDetailed Voice Options:\n" + "\n".join(_voice_descriptions_list)
60
+
61
+
62
+ OPENAI_TTS_VOICES = [
63
+ "alloy", "ash", "ballad", "coral", "echo", "fable", "onyx",
64
+ "nova", "sage", "shimmer", "verse"
21
65
  ]
22
66
 
23
67
  class AudioClientFactory(metaclass=SingletonMeta):
@@ -58,7 +102,7 @@ class AudioClientFactory(metaclass=SingletonMeta):
58
102
  ParameterDefinition(
59
103
  name="voice",
60
104
  param_type=ParameterType.ENUM,
61
- description="The voice to assign to this speaker.",
105
+ description="The voice to assign to this speaker." + GEMINI_VOICE_METADATA_DESC,
62
106
  enum_values=GEMINI_TTS_VOICES,
63
107
  required=True
64
108
  )
@@ -78,7 +122,7 @@ class AudioClientFactory(metaclass=SingletonMeta):
78
122
  param_type=ParameterType.ENUM,
79
123
  default_value="Kore",
80
124
  enum_values=GEMINI_TTS_VOICES,
81
- description="The voice to use for single-speaker generation."
125
+ description="The voice to use for single-speaker generation." + GEMINI_VOICE_METADATA_DESC
82
126
  ),
83
127
  ParameterDefinition(
84
128
  name="style_instructions",
@@ -96,12 +140,43 @@ class AudioClientFactory(metaclass=SingletonMeta):
96
140
  gemini_tts_model = AudioModel(
97
141
  name="gemini-2.5-flash-tts",
98
142
  value="gemini-2.5-flash-preview-tts",
99
- provider=MultimediaProvider.GOOGLE,
143
+ provider=MultimediaProvider.GEMINI,
100
144
  client_class=GeminiAudioClient,
101
145
  parameter_schema=gemini_tts_schema
102
146
  )
103
147
 
148
+ openai_tts_schema = ParameterSchema(parameters=[
149
+ ParameterDefinition(
150
+ name="voice",
151
+ param_type=ParameterType.ENUM,
152
+ default_value="alloy",
153
+ enum_values=OPENAI_TTS_VOICES,
154
+ description="The OpenAI TTS voice to use for generation."
155
+ ),
156
+ ParameterDefinition(
157
+ name="format",
158
+ param_type=ParameterType.ENUM,
159
+ default_value="mp3",
160
+ enum_values=["mp3", "wav"],
161
+ description="The audio format to generate."
162
+ ),
163
+ ParameterDefinition(
164
+ name="instructions",
165
+ param_type=ParameterType.STRING,
166
+ description="Optional delivery instructions (tone, pacing, accent, etc.)."
167
+ )
168
+ ])
169
+
170
+ openai_tts_model = AudioModel(
171
+ name="gpt-4o-mini-tts",
172
+ value="gpt-4o-mini-tts",
173
+ provider=MultimediaProvider.OPENAI,
174
+ client_class=OpenAIAudioClient,
175
+ parameter_schema=openai_tts_schema
176
+ )
177
+
104
178
  models_to_register = [
179
+ openai_tts_model,
105
180
  gemini_tts_model,
106
181
  ]
107
182
 
@@ -79,7 +79,8 @@ class AudioModel(metaclass=AudioModelMeta):
79
79
  """Returns the unique identifier for the model."""
80
80
  if self.runtime == MultimediaRuntime.AUTOBYTEUS and self.host_url:
81
81
  try:
82
- host = urlparse(self.host_url).hostname
82
+ parsed = urlparse(self.host_url)
83
+ host = parsed.netloc or parsed.hostname or self.host_url
83
84
  return f"{self.name}@{host}"
84
85
  except Exception:
85
86
  return f"{self.name}@{self.host_url}" # Fallback
@@ -1,4 +1,5 @@
1
1
  import logging
2
+ import uuid
2
3
  from typing import Optional, List, Dict, Any, TYPE_CHECKING
3
4
  from autobyteus.clients import AutobyteusClient
4
5
  from autobyteus.multimedia.image.base_image_client import BaseImageClient
@@ -13,6 +14,7 @@ logger = logging.getLogger(__name__)
13
14
  class AutobyteusImageClient(BaseImageClient):
14
15
  """
15
16
  An image client that connects to an Autobyteus LLM server instance for image tasks.
17
+ Maintains a persistent session ID for stateful interactions (e.g. conversational editing).
16
18
  """
17
19
 
18
20
  def __init__(self, model: "ImageModel", config: "MultimediaConfig"):
@@ -21,7 +23,9 @@ class AutobyteusImageClient(BaseImageClient):
21
23
  raise ValueError("AutobyteusImageClient requires a host_url in its ImageModel.")
22
24
 
23
25
  self.autobyteus_client = AutobyteusClient(server_url=model.host_url)
24
- logger.info(f"AutobyteusImageClient initialized for model '{self.model.name}' on host '{model.host_url}'.")
26
+ self.session_id = str(uuid.uuid4())
27
+ logger.info(f"AutobyteusImageClient initialized for model '{self.model.name}' "
28
+ f"on host '{model.host_url}' with session_id '{self.session_id}'.")
25
29
 
26
30
  async def generate_image(
27
31
  self,
@@ -72,7 +76,7 @@ class AutobyteusImageClient(BaseImageClient):
72
76
  ) -> ImageGenerationResponse:
73
77
  """Internal helper to call the remote server."""
74
78
  try:
75
- logger.info(f"Sending image generation request for model '{self.model.name}' to {self.model.host_url}")
79
+ logger.info(f"Sending image generation request for model '{self.model.name}' to {self.model.host_url} (Session: {self.session_id})")
76
80
 
77
81
  # The model name for the remote server is the `value`, not the unique `model_identifier`
78
82
  model_name_for_server = self.model.name
@@ -84,7 +88,8 @@ class AutobyteusImageClient(BaseImageClient):
84
88
  prompt=prompt,
85
89
  input_image_urls=input_image_urls,
86
90
  mask_url=mask_url,
87
- generation_config=generation_config
91
+ generation_config=generation_config,
92
+ session_id=self.session_id
88
93
  )
89
94
 
90
95
  image_urls = response_data.get("image_urls", [])
@@ -98,7 +103,16 @@ class AutobyteusImageClient(BaseImageClient):
98
103
  raise
99
104
 
100
105
  async def cleanup(self):
101
- """Closes the underlying AutobyteusClient."""
106
+ """
107
+ Notifies the server to cleanup the session, then closes the underlying HTTP client.
108
+ """
102
109
  if self.autobyteus_client:
103
- await self.autobyteus_client.close()
110
+ try:
111
+ logger.info(f"Notifying server to cleanup image session '{self.session_id}'...")
112
+ await self.autobyteus_client.cleanup_image_session(self.session_id)
113
+ except Exception as e:
114
+ logger.error(f"Failed to cleanup remote image session '{self.session_id}': {e}")
115
+ finally:
116
+ await self.autobyteus_client.close()
117
+
104
118
  logger.debug("AutobyteusImageClient cleaned up.")
@@ -1,14 +1,13 @@
1
1
  import logging
2
2
  import base64
3
- import os
4
3
  from typing import Optional, List, Dict, Any, TYPE_CHECKING
5
- from google import genai
6
- from PIL import Image
7
- import requests
4
+ from google.genai import types as genai_types
8
5
 
9
6
  from autobyteus.multimedia.image.base_image_client import BaseImageClient
10
7
  from autobyteus.multimedia.utils.response_types import ImageGenerationResponse
11
8
  from autobyteus.multimedia.utils.api_utils import load_image_from_url
9
+ from autobyteus.utils.gemini_helper import initialize_gemini_client_with_runtime
10
+ from autobyteus.utils.gemini_model_mapping import resolve_model_for_runtime
12
11
 
13
12
  if TYPE_CHECKING:
14
13
  from autobyteus.multimedia.image.image_model import ImageModel
@@ -21,17 +20,15 @@ class GeminiImageClient(BaseImageClient):
21
20
  An image client that uses Google's Gemini models for image generation tasks.
22
21
 
23
22
  **Setup Requirements:**
24
- 1. **Authentication:** Set the `GEMINI_API_KEY` environment variable with your API key.
23
+ 1. **AI Studio Mode:** Set `GEMINI_API_KEY`.
24
+ 2. **Vertex AI Mode:** Set `VERTEX_AI_PROJECT` and `VERTEX_AI_LOCATION`.
25
25
  """
26
26
 
27
27
  def __init__(self, model: "ImageModel", config: "MultimediaConfig"):
28
28
  super().__init__(model, config)
29
- api_key = os.getenv("GEMINI_API_KEY")
30
- if not api_key:
31
- raise ValueError("Please set the GEMINI_API_KEY environment variable.")
32
29
 
33
30
  try:
34
- self.client = genai.Client()
31
+ self.client, self.runtime_info = initialize_gemini_client_with_runtime()
35
32
  self.async_client = self.client.aio
36
33
  logger.info(f"GeminiImageClient initialized for model '{self.model.name}'.")
37
34
  except Exception as e:
@@ -60,16 +57,40 @@ class GeminiImageClient(BaseImageClient):
60
57
  except Exception as e:
61
58
  logger.error(f"Skipping image at '{url}' due to loading error: {e}")
62
59
 
63
- # Note: The google-genai library uses the synchronous client for the `.generate_content` method on a model
64
- # even in an async context, as there isn't a direct async equivalent exposed for this specific call on the model object.
65
- # We use the top-level async client for other potential future calls if the library API changes.
66
- model_instance = self.client.get_generative_model(model_name=f"models/{self.model.value}")
67
- response = await model_instance.generate_content_async(contents=content)
60
+ config_dict: Dict[str, Any] = {}
61
+ if self.config and self.config.params:
62
+ config_dict.update(self.config.params)
63
+ if generation_config:
64
+ config_dict.update(generation_config)
65
+ if "response_modalities" not in config_dict:
66
+ if getattr(self, "runtime_info", None) and self.runtime_info.runtime == "vertex":
67
+ config_dict["response_modalities"] = ["TEXT", "IMAGE"]
68
+ else:
69
+ config_dict["response_modalities"] = ["IMAGE"]
70
+ config = genai_types.GenerateContentConfig(**config_dict)
71
+
72
+ # FIX: Removed 'models/' prefix from model_name to support Vertex AI
73
+ runtime_adjusted_model = resolve_model_for_runtime(
74
+ self.model.value,
75
+ modality="image",
76
+ runtime=getattr(self, "runtime_info", None) and self.runtime_info.runtime,
77
+ )
78
+ if runtime_adjusted_model != self.model.value:
79
+ logger.info(
80
+ "Using runtime-adjusted Gemini image model '%s' (requested '%s').",
81
+ runtime_adjusted_model,
82
+ self.model.value,
83
+ )
84
+ response = await self.async_client.models.generate_content(
85
+ model=runtime_adjusted_model,
86
+ contents=content,
87
+ config=config,
88
+ )
68
89
 
69
90
 
70
91
  image_urls = []
71
- for part in response.parts:
72
- if part.inline_data and "image" in part.inline_data.mime_type:
92
+ for part in response.parts or []:
93
+ if part.inline_data and part.inline_data.mime_type and "image" in part.inline_data.mime_type:
73
94
  image_bytes = part.inline_data.data
74
95
  base64_image = base64.b64encode(image_bytes).decode("utf-8")
75
96
  data_uri = f"data:{part.inline_data.mime_type};base64,{base64_image}"
@@ -77,7 +98,7 @@ class GeminiImageClient(BaseImageClient):
77
98
 
78
99
  if not image_urls:
79
100
  # Check for a safety-related refusal to generate content
80
- if response.prompt_feedback.block_reason:
101
+ if response.prompt_feedback and response.prompt_feedback.block_reason:
81
102
  reason = response.prompt_feedback.block_reason.name
82
103
  logger.error(f"Image generation blocked due to safety settings. Reason: {reason}")
83
104
  raise ValueError(f"Image generation failed due to safety settings: {reason}")
@@ -1,9 +1,14 @@
1
1
  import logging
2
2
  import os
3
+ import tempfile
4
+ from pathlib import Path
3
5
  from typing import Optional, List, Dict, Any, TYPE_CHECKING
6
+
4
7
  from openai import OpenAI
8
+
5
9
  from autobyteus.multimedia.image.base_image_client import BaseImageClient
6
10
  from autobyteus.multimedia.utils.response_types import ImageGenerationResponse
11
+ from autobyteus.utils.download_utils import download_file_from_url
7
12
 
8
13
  if TYPE_CHECKING:
9
14
  from autobyteus.multimedia.image.image_model import ImageModel
@@ -11,9 +16,19 @@ if TYPE_CHECKING:
11
16
 
12
17
  logger = logging.getLogger(__name__)
13
18
 
19
+
20
+ def _mime_type_from_format(output_format: str) -> str:
21
+ fmt = (output_format or "png").lower()
22
+ if fmt in {"jpg", "jpeg"}:
23
+ return "image/jpeg"
24
+ if fmt == "webp":
25
+ return "image/webp"
26
+ return "image/png"
27
+
28
+
14
29
  class OpenAIImageClient(BaseImageClient):
15
30
  """
16
- An image client that uses OpenAI's DALL-E models.
31
+ An image client that uses OpenAI's gpt-image series via the images API.
17
32
  """
18
33
 
19
34
  def __init__(self, model: "ImageModel", config: "MultimediaConfig"):
@@ -34,49 +49,68 @@ class OpenAIImageClient(BaseImageClient):
34
49
  **kwargs
35
50
  ) -> ImageGenerationResponse:
36
51
  """
37
- Generates an image using an OpenAI DALL-E model via the v1/images/generations endpoint.
38
- Note: This endpoint does not support image inputs, even for multimodal models like gpt-image-1.
52
+ Generates an image using OpenAI's images generation endpoint.
53
+ Note: This endpoint does not support image inputs.
39
54
  """
40
55
  if input_image_urls:
41
56
  logger.warning(
42
- f"The OpenAI `images.generate` API used by this client does not support input images. "
43
- f"The images provided for model '{self.model.value}' will be ignored. "
44
- f"To use image inputs, a client based on the Chat Completions API is required."
57
+ "The OpenAI `images.generate` API used by this client does not support input images. "
58
+ "The images provided for model '%s' will be ignored. "
59
+ "To use image inputs, a client based on the Chat Completions API is required.",
60
+ self.model.value,
45
61
  )
46
62
 
47
63
  try:
48
64
  image_model = self.model.value
49
- logger.info(f"Generating image with OpenAI model '{image_model}' and prompt: '{prompt[:50]}...'")
65
+ logger.info("Generating image with OpenAI model '%s' and prompt: '%s...'", image_model, prompt[:50])
50
66
 
51
67
  # Combine default config with any overrides
52
68
  final_config = self.config.to_dict().copy()
53
69
  if generation_config:
54
70
  final_config.update(generation_config)
55
-
56
- response = self.client.images.generate(
57
- model=image_model,
58
- prompt=prompt,
59
- n=final_config.get("n", 1),
60
- size=final_config.get("size", "1024x1024"),
61
- quality=final_config.get("quality", "standard"),
62
- style=final_config.get("style", "vivid"),
63
- response_format="url"
71
+ # Always request a single image for simplicity
72
+ final_config["n"] = 1
73
+
74
+ request_kwargs = {
75
+ "model": image_model,
76
+ "prompt": prompt,
77
+ "n": 1,
78
+ "size": final_config.get("size", "1024x1024"),
79
+ "quality": final_config.get("quality", "standard"),
80
+ }
81
+ if "output_format" in final_config:
82
+ request_kwargs["output_format"] = final_config["output_format"]
83
+ if "output_compression" in final_config:
84
+ request_kwargs["output_compression"] = final_config["output_compression"]
85
+
86
+ response = self.client.images.generate(**request_kwargs)
87
+
88
+ output_format = final_config.get("output_format", "png")
89
+ mime_type = _mime_type_from_format(output_format)
90
+ image_urls_list: List[str] = []
91
+ for img in response.data:
92
+ if getattr(img, "url", None):
93
+ image_urls_list.append(img.url)
94
+ elif getattr(img, "b64_json", None):
95
+ image_urls_list.append(f"data:{mime_type};base64,{img.b64_json}")
96
+
97
+ revised_prompt: Optional[str] = (
98
+ response.data[0].revised_prompt
99
+ if response.data and hasattr(response.data[0], "revised_prompt")
100
+ else None
64
101
  )
65
102
 
66
- image_urls_list: List[str] = [img.url for img in response.data if img.url]
67
- revised_prompt: Optional[str] = response.data[0].revised_prompt if response.data and hasattr(response.data[0], 'revised_prompt') else None
68
-
69
103
  if not image_urls_list:
70
- raise ValueError("OpenAI API did not return any image URLs.")
104
+ raise ValueError("OpenAI API did not return any image data.")
71
105
 
72
- logger.info(f"Successfully generated {len(image_urls_list)} image(s).")
106
+ logger.info("Successfully generated %s image(s).", len(image_urls_list))
73
107
 
74
108
  return ImageGenerationResponse(
75
109
  image_urls=image_urls_list,
76
110
  revised_prompt=revised_prompt
77
111
  )
78
112
  except Exception as e:
79
- logger.error(f"Error during OpenAI image generation: {str(e)}")
113
+ logger.error("Error during OpenAI image generation: %s", str(e))
80
114
  raise ValueError(f"OpenAI image generation failed: {str(e)}")
81
115
 
82
116
  async def edit_image(
@@ -95,49 +129,97 @@ class OpenAIImageClient(BaseImageClient):
95
129
 
96
130
  source_image_url = input_image_urls[0]
97
131
  if len(input_image_urls) > 1:
98
- logger.warning(f"OpenAI edit endpoint only supports one input image. Using '{source_image_url}' and ignoring the rest.")
132
+ logger.warning(
133
+ "OpenAI edit endpoint only supports one input image. Using '%s' and ignoring the rest.",
134
+ source_image_url,
135
+ )
99
136
 
137
+ temp_image_path: Optional[Path] = None
138
+ temp_mask_path: Optional[Path] = None
100
139
  try:
101
- logger.info(f"Editing image '{source_image_url}' with prompt: '{prompt[:50]}...'")
140
+ logger.info("Editing image '%s' with prompt: '%s...'", source_image_url, prompt[:50])
102
141
 
103
142
  # Combine default config with any overrides
104
143
  final_config = self.config.to_dict().copy()
105
144
  if generation_config:
106
145
  final_config.update(generation_config)
107
-
108
- with open(source_image_url, "rb") as image_file:
109
- mask_file = open(mask_url, "rb") if mask_url else None
146
+ # Always request a single edited image
147
+ final_config["n"] = 1
148
+
149
+ source_path = Path(source_image_url)
150
+ if not source_path.exists():
151
+ temp_image_file = tempfile.NamedTemporaryFile(delete=False, suffix=".png")
152
+ temp_image_file.close()
153
+ temp_image_path = Path(temp_image_file.name)
154
+ await download_file_from_url(source_image_url, temp_image_path)
155
+ source_path = temp_image_path
156
+
157
+ if mask_url:
158
+ mask_path = Path(mask_url)
159
+ if not mask_path.exists():
160
+ temp_mask_file = tempfile.NamedTemporaryFile(delete=False, suffix=".png")
161
+ temp_mask_file.close()
162
+ temp_mask_path = Path(temp_mask_file.name)
163
+ await download_file_from_url(mask_url, temp_mask_path)
164
+ mask_path = temp_mask_path
165
+ else:
166
+ mask_path = None
167
+
168
+ with open(source_path, "rb") as image_file:
169
+ mask_file = open(mask_path, "rb") if mask_path else None
110
170
  try:
111
- response = self.client.images.edit(
112
- image=image_file,
113
- mask=mask_file,
114
- prompt=prompt,
115
- model=self.model.value,
116
- n=final_config.get("n", 1),
117
- size=final_config.get("size", "1024x1024"),
118
- response_format="url"
119
- )
171
+ request_kwargs = {
172
+ "image": image_file,
173
+ "prompt": prompt,
174
+ "model": self.model.value,
175
+ "n": final_config.get("n", 1),
176
+ "size": final_config.get("size", "1024x1024"),
177
+ }
178
+ if mask_file:
179
+ request_kwargs["mask"] = mask_file
180
+ if "output_format" in final_config:
181
+ request_kwargs["output_format"] = final_config["output_format"]
182
+ if "output_compression" in final_config:
183
+ request_kwargs["output_compression"] = final_config["output_compression"]
184
+ response = self.client.images.edit(**request_kwargs)
120
185
  finally:
121
186
  if mask_file:
122
187
  mask_file.close()
123
188
 
124
- image_urls_list: List[str] = [img.url for img in response.data if img.url]
189
+ output_format = final_config.get("output_format", "png")
190
+ mime_type = _mime_type_from_format(output_format)
191
+ image_urls_list: List[str] = []
192
+ for img in response.data:
193
+ if getattr(img, "url", None):
194
+ image_urls_list.append(img.url)
195
+ elif getattr(img, "b64_json", None):
196
+ image_urls_list.append(f"data:{mime_type};base64,{img.b64_json}")
197
+
125
198
  if not image_urls_list:
126
- raise ValueError("OpenAI API did not return any edited image URLs.")
199
+ raise ValueError("OpenAI API did not return any edited image data.")
127
200
 
128
- logger.info(f"Successfully edited image, generated {len(image_urls_list)} version(s).")
201
+ logger.info("Successfully edited image, generated %s version(s).", len(image_urls_list))
129
202
  return ImageGenerationResponse(image_urls=image_urls_list)
130
203
 
131
204
  except FileNotFoundError as e:
132
- logger.error(f"Image file not found for editing: {e.filename}")
205
+ logger.error("Image file not found for editing: %s", e.filename)
133
206
  raise
134
207
  except Exception as e:
135
- logger.error(f"Error during OpenAI image editing: {str(e)}")
136
- # The API might return a 400 Bad Request if the model doesn't support edits
208
+ logger.error("Error during OpenAI image editing: %s", str(e))
137
209
  if "does not support image editing" in str(e):
138
210
  raise ValueError(f"The model '{self.model.value}' does not support the image editing endpoint.")
139
211
  raise ValueError(f"OpenAI image editing failed: {str(e)}")
140
-
212
+ finally:
213
+ if temp_image_path and temp_image_path.exists():
214
+ try:
215
+ temp_image_path.unlink()
216
+ except OSError:
217
+ logger.warning("Failed to clean up temp image file: %s", temp_image_path)
218
+ if temp_mask_path and temp_mask_path.exists():
219
+ try:
220
+ temp_mask_path.unlink()
221
+ except OSError:
222
+ logger.warning("Failed to clean up temp mask file: %s", temp_mask_path)
141
223
 
142
224
  async def cleanup(self):
143
225
  # The OpenAI client does not require explicit cleanup of a session.