autobyteus 1.2.1__py3-none-any.whl → 1.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (472) hide show
  1. autobyteus/agent/agent.py +15 -5
  2. autobyteus/agent/bootstrap_steps/__init__.py +3 -3
  3. autobyteus/agent/bootstrap_steps/agent_bootstrapper.py +5 -59
  4. autobyteus/agent/bootstrap_steps/base_bootstrap_step.py +1 -4
  5. autobyteus/agent/bootstrap_steps/mcp_server_prewarming_step.py +1 -3
  6. autobyteus/agent/bootstrap_steps/system_prompt_processing_step.py +16 -13
  7. autobyteus/agent/bootstrap_steps/working_context_snapshot_restore_step.py +38 -0
  8. autobyteus/agent/bootstrap_steps/workspace_context_initialization_step.py +2 -4
  9. autobyteus/agent/context/agent_config.py +47 -20
  10. autobyteus/agent/context/agent_context.py +23 -18
  11. autobyteus/agent/context/agent_runtime_state.py +21 -19
  12. autobyteus/agent/events/__init__.py +16 -1
  13. autobyteus/agent/events/agent_events.py +43 -3
  14. autobyteus/agent/events/agent_input_event_queue_manager.py +79 -26
  15. autobyteus/agent/events/event_store.py +57 -0
  16. autobyteus/agent/events/notifiers.py +69 -59
  17. autobyteus/agent/events/worker_event_dispatcher.py +21 -64
  18. autobyteus/agent/factory/agent_factory.py +83 -6
  19. autobyteus/agent/handlers/__init__.py +2 -0
  20. autobyteus/agent/handlers/approved_tool_invocation_event_handler.py +51 -34
  21. autobyteus/agent/handlers/bootstrap_event_handler.py +155 -0
  22. autobyteus/agent/handlers/inter_agent_message_event_handler.py +10 -0
  23. autobyteus/agent/handlers/lifecycle_event_logger.py +19 -11
  24. autobyteus/agent/handlers/llm_complete_response_received_event_handler.py +10 -15
  25. autobyteus/agent/handlers/llm_user_message_ready_event_handler.py +188 -48
  26. autobyteus/agent/handlers/tool_execution_approval_event_handler.py +0 -10
  27. autobyteus/agent/handlers/tool_invocation_request_event_handler.py +53 -48
  28. autobyteus/agent/handlers/tool_result_event_handler.py +7 -8
  29. autobyteus/agent/handlers/user_input_message_event_handler.py +10 -3
  30. autobyteus/agent/input_processor/memory_ingest_input_processor.py +44 -0
  31. autobyteus/agent/lifecycle/__init__.py +12 -0
  32. autobyteus/agent/lifecycle/base_processor.py +109 -0
  33. autobyteus/agent/lifecycle/events.py +35 -0
  34. autobyteus/agent/lifecycle/processor_definition.py +36 -0
  35. autobyteus/agent/lifecycle/processor_registry.py +106 -0
  36. autobyteus/agent/llm_request_assembler.py +98 -0
  37. autobyteus/agent/llm_response_processor/__init__.py +1 -8
  38. autobyteus/agent/message/context_file_type.py +1 -1
  39. autobyteus/agent/runtime/agent_runtime.py +29 -21
  40. autobyteus/agent/runtime/agent_worker.py +98 -19
  41. autobyteus/agent/shutdown_steps/__init__.py +2 -0
  42. autobyteus/agent/shutdown_steps/agent_shutdown_orchestrator.py +2 -0
  43. autobyteus/agent/shutdown_steps/tool_cleanup_step.py +58 -0
  44. autobyteus/agent/status/__init__.py +14 -0
  45. autobyteus/agent/status/manager.py +93 -0
  46. autobyteus/agent/status/status_deriver.py +96 -0
  47. autobyteus/agent/{phases/phase_enum.py → status/status_enum.py} +16 -16
  48. autobyteus/agent/status/status_update_utils.py +73 -0
  49. autobyteus/agent/streaming/__init__.py +52 -5
  50. autobyteus/agent/streaming/adapters/__init__.py +18 -0
  51. autobyteus/agent/streaming/adapters/invocation_adapter.py +184 -0
  52. autobyteus/agent/streaming/adapters/tool_call_parsing.py +163 -0
  53. autobyteus/agent/streaming/adapters/tool_syntax_registry.py +67 -0
  54. autobyteus/agent/streaming/agent_event_stream.py +3 -183
  55. autobyteus/agent/streaming/api_tool_call/__init__.py +16 -0
  56. autobyteus/agent/streaming/api_tool_call/file_content_streamer.py +56 -0
  57. autobyteus/agent/streaming/api_tool_call/json_string_field_extractor.py +175 -0
  58. autobyteus/agent/streaming/api_tool_call_streaming_response_handler.py +4 -0
  59. autobyteus/agent/streaming/events/__init__.py +6 -0
  60. autobyteus/agent/streaming/events/stream_event_payloads.py +284 -0
  61. autobyteus/agent/streaming/events/stream_events.py +141 -0
  62. autobyteus/agent/streaming/handlers/__init__.py +15 -0
  63. autobyteus/agent/streaming/handlers/api_tool_call_streaming_response_handler.py +303 -0
  64. autobyteus/agent/streaming/handlers/parsing_streaming_response_handler.py +107 -0
  65. autobyteus/agent/streaming/handlers/pass_through_streaming_response_handler.py +107 -0
  66. autobyteus/agent/streaming/handlers/streaming_handler_factory.py +177 -0
  67. autobyteus/agent/streaming/handlers/streaming_response_handler.py +58 -0
  68. autobyteus/agent/streaming/parser/__init__.py +61 -0
  69. autobyteus/agent/streaming/parser/event_emitter.py +181 -0
  70. autobyteus/agent/streaming/parser/events.py +4 -0
  71. autobyteus/agent/streaming/parser/invocation_adapter.py +4 -0
  72. autobyteus/agent/streaming/parser/json_parsing_strategies/__init__.py +19 -0
  73. autobyteus/agent/streaming/parser/json_parsing_strategies/base.py +32 -0
  74. autobyteus/agent/streaming/parser/json_parsing_strategies/default.py +34 -0
  75. autobyteus/agent/streaming/parser/json_parsing_strategies/gemini.py +31 -0
  76. autobyteus/agent/streaming/parser/json_parsing_strategies/openai.py +64 -0
  77. autobyteus/agent/streaming/parser/json_parsing_strategies/registry.py +75 -0
  78. autobyteus/agent/streaming/parser/parser_context.py +227 -0
  79. autobyteus/agent/streaming/parser/parser_factory.py +132 -0
  80. autobyteus/agent/streaming/parser/sentinel_format.py +7 -0
  81. autobyteus/agent/streaming/parser/state_factory.py +62 -0
  82. autobyteus/agent/streaming/parser/states/__init__.py +1 -0
  83. autobyteus/agent/streaming/parser/states/base_state.py +60 -0
  84. autobyteus/agent/streaming/parser/states/custom_xml_tag_run_bash_parsing_state.py +38 -0
  85. autobyteus/agent/streaming/parser/states/custom_xml_tag_write_file_parsing_state.py +55 -0
  86. autobyteus/agent/streaming/parser/states/delimited_content_state.py +146 -0
  87. autobyteus/agent/streaming/parser/states/json_initialization_state.py +144 -0
  88. autobyteus/agent/streaming/parser/states/json_tool_parsing_state.py +137 -0
  89. autobyteus/agent/streaming/parser/states/sentinel_content_state.py +30 -0
  90. autobyteus/agent/streaming/parser/states/sentinel_initialization_state.py +117 -0
  91. autobyteus/agent/streaming/parser/states/text_state.py +78 -0
  92. autobyteus/agent/streaming/parser/states/xml_patch_file_tool_parsing_state.py +328 -0
  93. autobyteus/agent/streaming/parser/states/xml_run_bash_tool_parsing_state.py +129 -0
  94. autobyteus/agent/streaming/parser/states/xml_tag_initialization_state.py +151 -0
  95. autobyteus/agent/streaming/parser/states/xml_tool_parsing_state.py +63 -0
  96. autobyteus/agent/streaming/parser/states/xml_write_file_tool_parsing_state.py +343 -0
  97. autobyteus/agent/streaming/parser/strategies/__init__.py +17 -0
  98. autobyteus/agent/streaming/parser/strategies/base.py +24 -0
  99. autobyteus/agent/streaming/parser/strategies/json_tool_strategy.py +26 -0
  100. autobyteus/agent/streaming/parser/strategies/registry.py +28 -0
  101. autobyteus/agent/streaming/parser/strategies/sentinel_strategy.py +23 -0
  102. autobyteus/agent/streaming/parser/strategies/xml_tag_strategy.py +21 -0
  103. autobyteus/agent/streaming/parser/stream_scanner.py +167 -0
  104. autobyteus/agent/streaming/parser/streaming_parser.py +212 -0
  105. autobyteus/agent/streaming/parser/tool_call_parsing.py +4 -0
  106. autobyteus/agent/streaming/parser/tool_constants.py +7 -0
  107. autobyteus/agent/streaming/parser/tool_syntax_registry.py +4 -0
  108. autobyteus/agent/streaming/parser/xml_tool_parsing_state_registry.py +55 -0
  109. autobyteus/agent/streaming/parsing_streaming_response_handler.py +4 -0
  110. autobyteus/agent/streaming/pass_through_streaming_response_handler.py +4 -0
  111. autobyteus/agent/streaming/queue_streamer.py +3 -57
  112. autobyteus/agent/streaming/segments/__init__.py +5 -0
  113. autobyteus/agent/streaming/segments/segment_events.py +82 -0
  114. autobyteus/agent/streaming/stream_event_payloads.py +2 -223
  115. autobyteus/agent/streaming/stream_events.py +3 -140
  116. autobyteus/agent/streaming/streaming_handler_factory.py +4 -0
  117. autobyteus/agent/streaming/streaming_response_handler.py +4 -0
  118. autobyteus/agent/streaming/streams/__init__.py +5 -0
  119. autobyteus/agent/streaming/streams/agent_event_stream.py +197 -0
  120. autobyteus/agent/streaming/utils/__init__.py +5 -0
  121. autobyteus/agent/streaming/utils/queue_streamer.py +59 -0
  122. autobyteus/agent/system_prompt_processor/__init__.py +2 -0
  123. autobyteus/agent/system_prompt_processor/available_skills_processor.py +96 -0
  124. autobyteus/agent/system_prompt_processor/base_processor.py +1 -1
  125. autobyteus/agent/system_prompt_processor/processor_meta.py +15 -2
  126. autobyteus/agent/system_prompt_processor/tool_manifest_injector_processor.py +39 -58
  127. autobyteus/agent/token_budget.py +56 -0
  128. autobyteus/agent/tool_execution_result_processor/memory_ingest_tool_result_processor.py +29 -0
  129. autobyteus/agent/tool_invocation.py +16 -40
  130. autobyteus/agent/tool_invocation_preprocessor/__init__.py +9 -0
  131. autobyteus/agent/tool_invocation_preprocessor/base_preprocessor.py +45 -0
  132. autobyteus/agent/tool_invocation_preprocessor/processor_definition.py +15 -0
  133. autobyteus/agent/tool_invocation_preprocessor/processor_meta.py +33 -0
  134. autobyteus/agent/tool_invocation_preprocessor/processor_registry.py +60 -0
  135. autobyteus/agent/utils/wait_for_idle.py +12 -14
  136. autobyteus/agent/workspace/base_workspace.py +6 -27
  137. autobyteus/agent_team/agent_team.py +3 -3
  138. autobyteus/agent_team/agent_team_builder.py +1 -41
  139. autobyteus/agent_team/bootstrap_steps/__init__.py +0 -4
  140. autobyteus/agent_team/bootstrap_steps/agent_configuration_preparation_step.py +8 -18
  141. autobyteus/agent_team/bootstrap_steps/agent_team_bootstrapper.py +4 -16
  142. autobyteus/agent_team/bootstrap_steps/base_agent_team_bootstrap_step.py +1 -2
  143. autobyteus/agent_team/bootstrap_steps/coordinator_initialization_step.py +1 -2
  144. autobyteus/agent_team/bootstrap_steps/task_notifier_initialization_step.py +1 -2
  145. autobyteus/agent_team/bootstrap_steps/team_context_initialization_step.py +4 -4
  146. autobyteus/agent_team/context/agent_team_config.py +6 -3
  147. autobyteus/agent_team/context/agent_team_context.py +25 -3
  148. autobyteus/agent_team/context/agent_team_runtime_state.py +9 -6
  149. autobyteus/agent_team/events/__init__.py +11 -0
  150. autobyteus/agent_team/events/agent_team_event_dispatcher.py +22 -9
  151. autobyteus/agent_team/events/agent_team_events.py +16 -0
  152. autobyteus/agent_team/events/event_store.py +57 -0
  153. autobyteus/agent_team/factory/agent_team_factory.py +8 -0
  154. autobyteus/agent_team/handlers/inter_agent_message_request_event_handler.py +18 -2
  155. autobyteus/agent_team/handlers/lifecycle_agent_team_event_handler.py +21 -5
  156. autobyteus/agent_team/handlers/process_user_message_event_handler.py +17 -8
  157. autobyteus/agent_team/handlers/tool_approval_team_event_handler.py +19 -4
  158. autobyteus/agent_team/runtime/agent_team_runtime.py +41 -10
  159. autobyteus/agent_team/runtime/agent_team_worker.py +69 -5
  160. autobyteus/agent_team/status/__init__.py +14 -0
  161. autobyteus/agent_team/status/agent_team_status.py +18 -0
  162. autobyteus/agent_team/status/agent_team_status_manager.py +33 -0
  163. autobyteus/agent_team/status/status_deriver.py +62 -0
  164. autobyteus/agent_team/status/status_update_utils.py +42 -0
  165. autobyteus/agent_team/streaming/__init__.py +2 -2
  166. autobyteus/agent_team/streaming/agent_team_event_notifier.py +6 -6
  167. autobyteus/agent_team/streaming/agent_team_stream_event_payloads.py +4 -4
  168. autobyteus/agent_team/streaming/agent_team_stream_events.py +3 -3
  169. autobyteus/agent_team/system_prompt_processor/__init__.py +6 -0
  170. autobyteus/agent_team/system_prompt_processor/team_manifest_injector_processor.py +76 -0
  171. autobyteus/agent_team/task_notification/task_notification_mode.py +19 -0
  172. autobyteus/agent_team/utils/wait_for_idle.py +4 -4
  173. autobyteus/cli/agent_cli.py +18 -10
  174. autobyteus/cli/agent_team_tui/app.py +14 -11
  175. autobyteus/cli/agent_team_tui/state.py +13 -15
  176. autobyteus/cli/agent_team_tui/widgets/agent_list_sidebar.py +15 -15
  177. autobyteus/cli/agent_team_tui/widgets/focus_pane.py +143 -36
  178. autobyteus/cli/agent_team_tui/widgets/renderables.py +1 -1
  179. autobyteus/cli/agent_team_tui/widgets/shared.py +25 -25
  180. autobyteus/cli/cli_display.py +193 -44
  181. autobyteus/cli/workflow_tui/app.py +9 -10
  182. autobyteus/cli/workflow_tui/state.py +14 -16
  183. autobyteus/cli/workflow_tui/widgets/agent_list_sidebar.py +15 -15
  184. autobyteus/cli/workflow_tui/widgets/focus_pane.py +137 -35
  185. autobyteus/cli/workflow_tui/widgets/renderables.py +1 -1
  186. autobyteus/cli/workflow_tui/widgets/shared.py +25 -25
  187. autobyteus/clients/autobyteus_client.py +94 -1
  188. autobyteus/events/event_types.py +11 -18
  189. autobyteus/llm/api/autobyteus_llm.py +33 -29
  190. autobyteus/llm/api/claude_llm.py +142 -36
  191. autobyteus/llm/api/gemini_llm.py +163 -59
  192. autobyteus/llm/api/grok_llm.py +1 -1
  193. autobyteus/llm/api/minimax_llm.py +26 -0
  194. autobyteus/llm/api/mistral_llm.py +113 -87
  195. autobyteus/llm/api/ollama_llm.py +9 -42
  196. autobyteus/llm/api/openai_compatible_llm.py +127 -91
  197. autobyteus/llm/api/openai_llm.py +3 -3
  198. autobyteus/llm/api/openai_responses_llm.py +324 -0
  199. autobyteus/llm/api/zhipu_llm.py +21 -2
  200. autobyteus/llm/autobyteus_provider.py +70 -60
  201. autobyteus/llm/base_llm.py +85 -81
  202. autobyteus/llm/converters/__init__.py +14 -0
  203. autobyteus/llm/converters/anthropic_tool_call_converter.py +37 -0
  204. autobyteus/llm/converters/gemini_tool_call_converter.py +57 -0
  205. autobyteus/llm/converters/mistral_tool_call_converter.py +37 -0
  206. autobyteus/llm/converters/openai_tool_call_converter.py +38 -0
  207. autobyteus/llm/extensions/base_extension.py +6 -12
  208. autobyteus/llm/extensions/token_usage_tracking_extension.py +45 -18
  209. autobyteus/llm/llm_factory.py +282 -204
  210. autobyteus/llm/lmstudio_provider.py +60 -49
  211. autobyteus/llm/models.py +35 -2
  212. autobyteus/llm/ollama_provider.py +60 -49
  213. autobyteus/llm/ollama_provider_resolver.py +0 -1
  214. autobyteus/llm/prompt_renderers/__init__.py +19 -0
  215. autobyteus/llm/prompt_renderers/anthropic_prompt_renderer.py +104 -0
  216. autobyteus/llm/prompt_renderers/autobyteus_prompt_renderer.py +19 -0
  217. autobyteus/llm/prompt_renderers/base_prompt_renderer.py +10 -0
  218. autobyteus/llm/prompt_renderers/gemini_prompt_renderer.py +63 -0
  219. autobyteus/llm/prompt_renderers/mistral_prompt_renderer.py +87 -0
  220. autobyteus/llm/prompt_renderers/ollama_prompt_renderer.py +51 -0
  221. autobyteus/llm/prompt_renderers/openai_chat_renderer.py +97 -0
  222. autobyteus/llm/prompt_renderers/openai_responses_renderer.py +101 -0
  223. autobyteus/llm/providers.py +1 -3
  224. autobyteus/llm/token_counter/claude_token_counter.py +56 -25
  225. autobyteus/llm/token_counter/mistral_token_counter.py +12 -8
  226. autobyteus/llm/token_counter/openai_token_counter.py +24 -5
  227. autobyteus/llm/token_counter/token_counter_factory.py +12 -5
  228. autobyteus/llm/utils/llm_config.py +6 -12
  229. autobyteus/llm/utils/media_payload_formatter.py +27 -20
  230. autobyteus/llm/utils/messages.py +55 -3
  231. autobyteus/llm/utils/response_types.py +3 -0
  232. autobyteus/llm/utils/tool_call_delta.py +31 -0
  233. autobyteus/memory/__init__.py +35 -0
  234. autobyteus/memory/compaction/__init__.py +9 -0
  235. autobyteus/memory/compaction/compaction_result.py +8 -0
  236. autobyteus/memory/compaction/compactor.py +89 -0
  237. autobyteus/memory/compaction/summarizer.py +11 -0
  238. autobyteus/memory/compaction_snapshot_builder.py +84 -0
  239. autobyteus/memory/memory_manager.py +205 -0
  240. autobyteus/memory/models/__init__.py +14 -0
  241. autobyteus/memory/models/episodic_item.py +41 -0
  242. autobyteus/memory/models/memory_types.py +7 -0
  243. autobyteus/memory/models/raw_trace_item.py +79 -0
  244. autobyteus/memory/models/semantic_item.py +41 -0
  245. autobyteus/memory/models/tool_interaction.py +20 -0
  246. autobyteus/memory/path_resolver.py +27 -0
  247. autobyteus/memory/policies/__init__.py +5 -0
  248. autobyteus/memory/policies/compaction_policy.py +16 -0
  249. autobyteus/memory/restore/__init__.py +1 -0
  250. autobyteus/memory/restore/working_context_snapshot_bootstrapper.py +61 -0
  251. autobyteus/memory/retrieval/__init__.py +7 -0
  252. autobyteus/memory/retrieval/memory_bundle.py +11 -0
  253. autobyteus/memory/retrieval/retriever.py +13 -0
  254. autobyteus/memory/store/__init__.py +9 -0
  255. autobyteus/memory/store/base_store.py +14 -0
  256. autobyteus/memory/store/file_store.py +98 -0
  257. autobyteus/memory/store/working_context_snapshot_store.py +28 -0
  258. autobyteus/memory/tool_interaction_builder.py +46 -0
  259. autobyteus/memory/turn_tracker.py +9 -0
  260. autobyteus/memory/working_context_snapshot.py +69 -0
  261. autobyteus/memory/working_context_snapshot_serializer.py +135 -0
  262. autobyteus/multimedia/audio/api/autobyteus_audio_client.py +19 -5
  263. autobyteus/multimedia/audio/api/gemini_audio_client.py +109 -16
  264. autobyteus/multimedia/audio/audio_client_factory.py +47 -9
  265. autobyteus/multimedia/audio/audio_model.py +2 -1
  266. autobyteus/multimedia/image/api/autobyteus_image_client.py +19 -5
  267. autobyteus/multimedia/image/api/gemini_image_client.py +39 -17
  268. autobyteus/multimedia/image/api/openai_image_client.py +125 -43
  269. autobyteus/multimedia/image/autobyteus_image_provider.py +2 -1
  270. autobyteus/multimedia/image/image_client_factory.py +47 -15
  271. autobyteus/multimedia/image/image_model.py +5 -2
  272. autobyteus/multimedia/providers.py +3 -2
  273. autobyteus/skills/loader.py +71 -0
  274. autobyteus/skills/model.py +11 -0
  275. autobyteus/skills/registry.py +70 -0
  276. autobyteus/task_management/tools/todo_tools/add_todo.py +2 -2
  277. autobyteus/task_management/tools/todo_tools/create_todo_list.py +2 -2
  278. autobyteus/task_management/tools/todo_tools/update_todo_status.py +2 -2
  279. autobyteus/tools/__init__.py +34 -47
  280. autobyteus/tools/base_tool.py +7 -0
  281. autobyteus/tools/file/__init__.py +2 -6
  282. autobyteus/tools/file/patch_file.py +149 -0
  283. autobyteus/tools/file/read_file.py +36 -5
  284. autobyteus/tools/file/write_file.py +4 -1
  285. autobyteus/tools/functional_tool.py +43 -6
  286. autobyteus/tools/mcp/__init__.py +2 -0
  287. autobyteus/tools/mcp/config_service.py +5 -1
  288. autobyteus/tools/mcp/server/__init__.py +2 -0
  289. autobyteus/tools/mcp/server/http_managed_mcp_server.py +1 -1
  290. autobyteus/tools/mcp/server/websocket_managed_mcp_server.py +141 -0
  291. autobyteus/tools/mcp/server_instance_manager.py +8 -1
  292. autobyteus/tools/mcp/types.py +61 -0
  293. autobyteus/tools/multimedia/audio_tools.py +70 -17
  294. autobyteus/tools/multimedia/download_media_tool.py +18 -4
  295. autobyteus/tools/multimedia/image_tools.py +246 -62
  296. autobyteus/tools/operation_executor/journal_manager.py +107 -0
  297. autobyteus/tools/operation_executor/operation_event_buffer.py +57 -0
  298. autobyteus/tools/operation_executor/operation_event_producer.py +29 -0
  299. autobyteus/tools/operation_executor/operation_executor.py +58 -0
  300. autobyteus/tools/registry/tool_definition.py +43 -2
  301. autobyteus/tools/skill/load_skill.py +50 -0
  302. autobyteus/tools/terminal/__init__.py +45 -0
  303. autobyteus/tools/terminal/ansi_utils.py +32 -0
  304. autobyteus/tools/terminal/background_process_manager.py +233 -0
  305. autobyteus/tools/terminal/output_buffer.py +105 -0
  306. autobyteus/tools/terminal/prompt_detector.py +63 -0
  307. autobyteus/tools/terminal/pty_session.py +241 -0
  308. autobyteus/tools/terminal/session_factory.py +20 -0
  309. autobyteus/tools/terminal/terminal_session_manager.py +226 -0
  310. autobyteus/tools/terminal/tools/__init__.py +13 -0
  311. autobyteus/tools/terminal/tools/get_process_output.py +81 -0
  312. autobyteus/tools/terminal/tools/run_bash.py +109 -0
  313. autobyteus/tools/terminal/tools/start_background_process.py +104 -0
  314. autobyteus/tools/terminal/tools/stop_background_process.py +67 -0
  315. autobyteus/tools/terminal/types.py +54 -0
  316. autobyteus/tools/terminal/wsl_tmux_session.py +221 -0
  317. autobyteus/tools/terminal/wsl_utils.py +156 -0
  318. autobyteus/tools/transaction_management/backup_handler.py +48 -0
  319. autobyteus/tools/transaction_management/operation_lifecycle_manager.py +62 -0
  320. autobyteus/tools/usage/__init__.py +1 -2
  321. autobyteus/tools/usage/formatters/__init__.py +17 -1
  322. autobyteus/tools/usage/formatters/base_formatter.py +8 -0
  323. autobyteus/tools/usage/formatters/default_xml_schema_formatter.py +2 -2
  324. autobyteus/tools/usage/formatters/mistral_json_schema_formatter.py +18 -0
  325. autobyteus/tools/usage/formatters/patch_file_xml_example_formatter.py +64 -0
  326. autobyteus/tools/usage/formatters/patch_file_xml_schema_formatter.py +31 -0
  327. autobyteus/tools/usage/formatters/run_bash_xml_example_formatter.py +32 -0
  328. autobyteus/tools/usage/formatters/run_bash_xml_schema_formatter.py +36 -0
  329. autobyteus/tools/usage/formatters/write_file_xml_example_formatter.py +53 -0
  330. autobyteus/tools/usage/formatters/write_file_xml_schema_formatter.py +31 -0
  331. autobyteus/tools/usage/providers/tool_manifest_provider.py +10 -10
  332. autobyteus/tools/usage/registries/__init__.py +1 -3
  333. autobyteus/tools/usage/registries/tool_formatting_registry.py +115 -8
  334. autobyteus/tools/usage/tool_schema_provider.py +51 -0
  335. autobyteus/tools/web/__init__.py +4 -0
  336. autobyteus/tools/web/read_url_tool.py +80 -0
  337. autobyteus/utils/diff_utils.py +271 -0
  338. autobyteus/utils/download_utils.py +109 -0
  339. autobyteus/utils/file_utils.py +57 -2
  340. autobyteus/utils/gemini_helper.py +64 -0
  341. autobyteus/utils/gemini_model_mapping.py +71 -0
  342. autobyteus/utils/llm_output_formatter.py +75 -0
  343. autobyteus/utils/tool_call_format.py +36 -0
  344. autobyteus/workflow/agentic_workflow.py +3 -3
  345. autobyteus/workflow/bootstrap_steps/agent_tool_injection_step.py +2 -2
  346. autobyteus/workflow/bootstrap_steps/base_workflow_bootstrap_step.py +2 -2
  347. autobyteus/workflow/bootstrap_steps/coordinator_initialization_step.py +2 -2
  348. autobyteus/workflow/bootstrap_steps/coordinator_prompt_preparation_step.py +3 -9
  349. autobyteus/workflow/bootstrap_steps/workflow_bootstrapper.py +6 -6
  350. autobyteus/workflow/bootstrap_steps/workflow_runtime_queue_initialization_step.py +2 -2
  351. autobyteus/workflow/context/workflow_context.py +3 -3
  352. autobyteus/workflow/context/workflow_runtime_state.py +5 -5
  353. autobyteus/workflow/events/workflow_event_dispatcher.py +5 -5
  354. autobyteus/workflow/handlers/lifecycle_workflow_event_handler.py +3 -3
  355. autobyteus/workflow/handlers/process_user_message_event_handler.py +5 -5
  356. autobyteus/workflow/handlers/tool_approval_workflow_event_handler.py +2 -2
  357. autobyteus/workflow/runtime/workflow_runtime.py +8 -8
  358. autobyteus/workflow/runtime/workflow_worker.py +3 -3
  359. autobyteus/workflow/status/__init__.py +11 -0
  360. autobyteus/workflow/status/workflow_status.py +19 -0
  361. autobyteus/workflow/status/workflow_status_manager.py +48 -0
  362. autobyteus/workflow/streaming/__init__.py +2 -2
  363. autobyteus/workflow/streaming/workflow_event_notifier.py +7 -7
  364. autobyteus/workflow/streaming/workflow_stream_event_payloads.py +4 -4
  365. autobyteus/workflow/streaming/workflow_stream_events.py +3 -3
  366. autobyteus/workflow/utils/wait_for_idle.py +4 -4
  367. autobyteus-1.3.0.dist-info/METADATA +293 -0
  368. autobyteus-1.3.0.dist-info/RECORD +606 -0
  369. {autobyteus-1.2.1.dist-info → autobyteus-1.3.0.dist-info}/WHEEL +1 -1
  370. {autobyteus-1.2.1.dist-info → autobyteus-1.3.0.dist-info}/top_level.txt +0 -1
  371. autobyteus/agent/bootstrap_steps/agent_runtime_queue_initialization_step.py +0 -57
  372. autobyteus/agent/hooks/__init__.py +0 -16
  373. autobyteus/agent/hooks/base_phase_hook.py +0 -78
  374. autobyteus/agent/hooks/hook_definition.py +0 -36
  375. autobyteus/agent/hooks/hook_meta.py +0 -37
  376. autobyteus/agent/hooks/hook_registry.py +0 -106
  377. autobyteus/agent/llm_response_processor/provider_aware_tool_usage_processor.py +0 -103
  378. autobyteus/agent/phases/__init__.py +0 -18
  379. autobyteus/agent/phases/discover.py +0 -53
  380. autobyteus/agent/phases/manager.py +0 -265
  381. autobyteus/agent/phases/transition_decorator.py +0 -40
  382. autobyteus/agent/phases/transition_info.py +0 -33
  383. autobyteus/agent/remote_agent.py +0 -244
  384. autobyteus/agent/workspace/workspace_definition.py +0 -36
  385. autobyteus/agent/workspace/workspace_meta.py +0 -37
  386. autobyteus/agent/workspace/workspace_registry.py +0 -72
  387. autobyteus/agent_team/bootstrap_steps/agent_team_runtime_queue_initialization_step.py +0 -25
  388. autobyteus/agent_team/bootstrap_steps/coordinator_prompt_preparation_step.py +0 -85
  389. autobyteus/agent_team/phases/__init__.py +0 -11
  390. autobyteus/agent_team/phases/agent_team_operational_phase.py +0 -19
  391. autobyteus/agent_team/phases/agent_team_phase_manager.py +0 -48
  392. autobyteus/llm/api/bedrock_llm.py +0 -92
  393. autobyteus/llm/api/groq_llm.py +0 -94
  394. autobyteus/llm/api/nvidia_llm.py +0 -108
  395. autobyteus/llm/utils/token_pricing_config.py +0 -87
  396. autobyteus/rpc/__init__.py +0 -73
  397. autobyteus/rpc/client/__init__.py +0 -17
  398. autobyteus/rpc/client/abstract_client_connection.py +0 -124
  399. autobyteus/rpc/client/client_connection_manager.py +0 -153
  400. autobyteus/rpc/client/sse_client_connection.py +0 -306
  401. autobyteus/rpc/client/stdio_client_connection.py +0 -280
  402. autobyteus/rpc/config/__init__.py +0 -13
  403. autobyteus/rpc/config/agent_server_config.py +0 -153
  404. autobyteus/rpc/config/agent_server_registry.py +0 -152
  405. autobyteus/rpc/hosting.py +0 -244
  406. autobyteus/rpc/protocol.py +0 -244
  407. autobyteus/rpc/server/__init__.py +0 -20
  408. autobyteus/rpc/server/agent_server_endpoint.py +0 -181
  409. autobyteus/rpc/server/base_method_handler.py +0 -40
  410. autobyteus/rpc/server/method_handlers.py +0 -259
  411. autobyteus/rpc/server/sse_server_handler.py +0 -182
  412. autobyteus/rpc/server/stdio_server_handler.py +0 -151
  413. autobyteus/rpc/server_main.py +0 -198
  414. autobyteus/rpc/transport_type.py +0 -13
  415. autobyteus/tools/bash/__init__.py +0 -2
  416. autobyteus/tools/bash/bash_executor.py +0 -100
  417. autobyteus/tools/browser/__init__.py +0 -2
  418. autobyteus/tools/browser/session_aware/browser_session_aware_navigate_to.py +0 -75
  419. autobyteus/tools/browser/session_aware/browser_session_aware_tool.py +0 -30
  420. autobyteus/tools/browser/session_aware/browser_session_aware_web_element_trigger.py +0 -154
  421. autobyteus/tools/browser/session_aware/browser_session_aware_webpage_reader.py +0 -89
  422. autobyteus/tools/browser/session_aware/browser_session_aware_webpage_screenshot_taker.py +0 -107
  423. autobyteus/tools/browser/session_aware/factory/browser_session_aware_web_element_trigger_factory.py +0 -14
  424. autobyteus/tools/browser/session_aware/factory/browser_session_aware_webpage_reader_factory.py +0 -26
  425. autobyteus/tools/browser/session_aware/factory/browser_session_aware_webpage_screenshot_taker_factory.py +0 -14
  426. autobyteus/tools/browser/session_aware/shared_browser_session.py +0 -11
  427. autobyteus/tools/browser/session_aware/shared_browser_session_manager.py +0 -25
  428. autobyteus/tools/browser/session_aware/web_element_action.py +0 -20
  429. autobyteus/tools/browser/standalone/__init__.py +0 -6
  430. autobyteus/tools/browser/standalone/factory/__init__.py +0 -0
  431. autobyteus/tools/browser/standalone/factory/webpage_reader_factory.py +0 -25
  432. autobyteus/tools/browser/standalone/factory/webpage_screenshot_taker_factory.py +0 -14
  433. autobyteus/tools/browser/standalone/navigate_to.py +0 -84
  434. autobyteus/tools/browser/standalone/web_page_pdf_generator.py +0 -101
  435. autobyteus/tools/browser/standalone/webpage_image_downloader.py +0 -169
  436. autobyteus/tools/browser/standalone/webpage_reader.py +0 -105
  437. autobyteus/tools/browser/standalone/webpage_screenshot_taker.py +0 -105
  438. autobyteus/tools/file/edit_file.py +0 -200
  439. autobyteus/tools/file/list_directory.py +0 -168
  440. autobyteus/tools/file/search_files.py +0 -188
  441. autobyteus/tools/timer.py +0 -175
  442. autobyteus/tools/usage/parsers/__init__.py +0 -22
  443. autobyteus/tools/usage/parsers/_json_extractor.py +0 -99
  444. autobyteus/tools/usage/parsers/_string_decoders.py +0 -18
  445. autobyteus/tools/usage/parsers/anthropic_xml_tool_usage_parser.py +0 -10
  446. autobyteus/tools/usage/parsers/base_parser.py +0 -41
  447. autobyteus/tools/usage/parsers/default_json_tool_usage_parser.py +0 -83
  448. autobyteus/tools/usage/parsers/default_xml_tool_usage_parser.py +0 -316
  449. autobyteus/tools/usage/parsers/exceptions.py +0 -13
  450. autobyteus/tools/usage/parsers/gemini_json_tool_usage_parser.py +0 -77
  451. autobyteus/tools/usage/parsers/openai_json_tool_usage_parser.py +0 -149
  452. autobyteus/tools/usage/parsers/provider_aware_tool_usage_parser.py +0 -59
  453. autobyteus/tools/usage/registries/tool_usage_parser_registry.py +0 -62
  454. autobyteus/workflow/phases/__init__.py +0 -11
  455. autobyteus/workflow/phases/workflow_operational_phase.py +0 -19
  456. autobyteus/workflow/phases/workflow_phase_manager.py +0 -48
  457. autobyteus-1.2.1.dist-info/METADATA +0 -205
  458. autobyteus-1.2.1.dist-info/RECORD +0 -511
  459. examples/__init__.py +0 -1
  460. examples/agent_team/__init__.py +0 -1
  461. examples/discover_phase_transitions.py +0 -104
  462. examples/run_agentic_software_engineer.py +0 -239
  463. examples/run_browser_agent.py +0 -262
  464. examples/run_google_slides_agent.py +0 -287
  465. examples/run_mcp_browser_client.py +0 -174
  466. examples/run_mcp_google_slides_client.py +0 -270
  467. examples/run_mcp_list_tools.py +0 -189
  468. examples/run_poem_writer.py +0 -284
  469. examples/run_sqlite_agent.py +0 -295
  470. /autobyteus/{tools/browser/session_aware → skills}/__init__.py +0 -0
  471. /autobyteus/tools/{browser/session_aware/factory → skill}/__init__.py +0 -0
  472. {autobyteus-1.2.1.dist-info → autobyteus-1.3.0.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,135 @@
1
+ import json
2
+ from typing import Any, Dict, Iterable, List, Optional, Tuple
3
+
4
+ from autobyteus.llm.utils.messages import (
5
+ Message,
6
+ MessageRole,
7
+ ToolCallPayload,
8
+ ToolCallSpec,
9
+ ToolResultPayload,
10
+ )
11
+ from autobyteus.memory.working_context_snapshot import WorkingContextSnapshot
12
+
13
+
14
+ class WorkingContextSnapshotSerializer:
15
+ @staticmethod
16
+ def serialize(working_context_snapshot: WorkingContextSnapshot, metadata: Dict[str, Any]) -> Dict[str, Any]:
17
+ payload = {
18
+ "schema_version": metadata.get("schema_version", 1),
19
+ "agent_id": metadata.get("agent_id"),
20
+ "epoch_id": metadata.get("epoch_id", working_context_snapshot.epoch_id),
21
+ "last_compaction_ts": metadata.get("last_compaction_ts", working_context_snapshot.last_compaction_ts),
22
+ "messages": [WorkingContextSnapshotSerializer._serialize_message(msg) for msg in working_context_snapshot.build_messages()],
23
+ }
24
+ return payload
25
+
26
+ @staticmethod
27
+ def deserialize(payload: Dict[str, Any]) -> Tuple[WorkingContextSnapshot, Dict[str, Any]]:
28
+ messages = [
29
+ WorkingContextSnapshotSerializer._deserialize_message(msg)
30
+ for msg in payload.get("messages", [])
31
+ if isinstance(msg, dict)
32
+ ]
33
+ snapshot = WorkingContextSnapshot(initial_messages=messages)
34
+ metadata = {
35
+ "schema_version": payload.get("schema_version"),
36
+ "agent_id": payload.get("agent_id"),
37
+ "epoch_id": payload.get("epoch_id"),
38
+ "last_compaction_ts": payload.get("last_compaction_ts"),
39
+ }
40
+ if isinstance(metadata["epoch_id"], int):
41
+ snapshot.epoch_id = metadata["epoch_id"]
42
+ if metadata["last_compaction_ts"] is not None:
43
+ snapshot.last_compaction_ts = metadata["last_compaction_ts"]
44
+ return snapshot, metadata
45
+
46
+ @staticmethod
47
+ def validate(payload: Dict[str, Any]) -> bool:
48
+ if not isinstance(payload, dict):
49
+ return False
50
+ if not isinstance(payload.get("schema_version"), int):
51
+ return False
52
+ if not isinstance(payload.get("agent_id"), str):
53
+ return False
54
+ messages = payload.get("messages")
55
+ if not isinstance(messages, list):
56
+ return False
57
+ for msg in messages:
58
+ if not isinstance(msg, dict):
59
+ return False
60
+ if not isinstance(msg.get("role"), str):
61
+ return False
62
+ return True
63
+
64
+ @staticmethod
65
+ def _serialize_message(message: Message) -> Dict[str, Any]:
66
+ base = message.to_dict()
67
+ if base.get("tool_payload"):
68
+ base["tool_payload"] = WorkingContextSnapshotSerializer._normalize_tool_payload(base["tool_payload"])
69
+ return base
70
+
71
+ @staticmethod
72
+ def _deserialize_message(data: Dict[str, Any]) -> Message:
73
+ role = MessageRole(data.get("role"))
74
+ tool_payload = WorkingContextSnapshotSerializer._deserialize_tool_payload(data.get("tool_payload"))
75
+ return Message(
76
+ role=role,
77
+ content=data.get("content"),
78
+ reasoning_content=data.get("reasoning_content"),
79
+ image_urls=data.get("image_urls") or [],
80
+ audio_urls=data.get("audio_urls") or [],
81
+ video_urls=data.get("video_urls") or [],
82
+ tool_payload=tool_payload,
83
+ )
84
+
85
+ @staticmethod
86
+ def _normalize_tool_payload(payload: Dict[str, Any]) -> Dict[str, Any]:
87
+ if "tool_calls" in payload:
88
+ return {
89
+ "tool_calls": [
90
+ {
91
+ "id": call.get("id"),
92
+ "name": call.get("name"),
93
+ "arguments": WorkingContextSnapshotSerializer._safe_json_value(call.get("arguments")),
94
+ }
95
+ for call in payload.get("tool_calls", [])
96
+ ]
97
+ }
98
+ return {
99
+ "tool_call_id": payload.get("tool_call_id"),
100
+ "tool_name": payload.get("tool_name"),
101
+ "tool_result": WorkingContextSnapshotSerializer._safe_json_value(payload.get("tool_result")),
102
+ "tool_error": payload.get("tool_error"),
103
+ }
104
+
105
+ @staticmethod
106
+ def _deserialize_tool_payload(payload: Optional[Dict[str, Any]]) -> Optional[Any]:
107
+ if not payload:
108
+ return None
109
+ if "tool_calls" in payload:
110
+ calls = []
111
+ for call in payload.get("tool_calls", []) or []:
112
+ calls.append(
113
+ ToolCallSpec(
114
+ id=str(call.get("id")),
115
+ name=str(call.get("name")),
116
+ arguments=call.get("arguments") or {},
117
+ )
118
+ )
119
+ return ToolCallPayload(tool_calls=calls)
120
+ if "tool_call_id" in payload:
121
+ return ToolResultPayload(
122
+ tool_call_id=str(payload.get("tool_call_id")),
123
+ tool_name=str(payload.get("tool_name")),
124
+ tool_result=payload.get("tool_result"),
125
+ tool_error=payload.get("tool_error"),
126
+ )
127
+ return None
128
+
129
+ @staticmethod
130
+ def _safe_json_value(value: Any) -> Any:
131
+ try:
132
+ json.dumps(value)
133
+ return value
134
+ except TypeError:
135
+ return str(value)
@@ -1,4 +1,5 @@
1
1
  import logging
2
+ import uuid
2
3
  from typing import Optional, List, Dict, Any, TYPE_CHECKING
3
4
  from autobyteus.clients import AutobyteusClient
4
5
  from autobyteus.multimedia.audio.base_audio_client import BaseAudioClient
@@ -13,6 +14,7 @@ logger = logging.getLogger(__name__)
13
14
  class AutobyteusAudioClient(BaseAudioClient):
14
15
  """
15
16
  An audio client that connects to an Autobyteus server instance for audio tasks.
17
+ Maintains a persistent session ID for stateful interactions.
16
18
  """
17
19
 
18
20
  def __init__(self, model: "AudioModel", config: "MultimediaConfig"):
@@ -21,7 +23,9 @@ class AutobyteusAudioClient(BaseAudioClient):
21
23
  raise ValueError("AutobyteusAudioClient requires a host_url in its AudioModel.")
22
24
 
23
25
  self.autobyteus_client = AutobyteusClient(server_url=model.host_url)
24
- logger.info(f"AutobyteusAudioClient initialized for model '{model.name}' on host '{model.host_url}'.")
26
+ self.session_id = str(uuid.uuid4())
27
+ logger.info(f"AutobyteusAudioClient initialized for model '{model.name}' "
28
+ f"on host '{model.host_url}' with session_id '{self.session_id}'.")
25
29
 
26
30
  async def generate_speech(
27
31
  self,
@@ -33,7 +37,7 @@ class AutobyteusAudioClient(BaseAudioClient):
33
37
  Generates speech by calling the generate_speech endpoint on the remote Autobyteus server.
34
38
  """
35
39
  try:
36
- logger.info(f"Sending speech generation request for model '{self.model.name}' to {self.model.host_url}")
40
+ logger.info(f"Sending speech generation request for model '{self.model.name}' to {self.model.host_url} (Session: {self.session_id})")
37
41
 
38
42
  model_name_for_server = self.model.name
39
43
 
@@ -42,7 +46,8 @@ class AutobyteusAudioClient(BaseAudioClient):
42
46
  response_data = await self.autobyteus_client.generate_speech(
43
47
  model_name=model_name_for_server,
44
48
  prompt=prompt,
45
- generation_config=generation_config
49
+ generation_config=generation_config,
50
+ session_id=self.session_id
46
51
  )
47
52
 
48
53
  audio_urls = response_data.get("audio_urls", [])
@@ -56,7 +61,16 @@ class AutobyteusAudioClient(BaseAudioClient):
56
61
  raise
57
62
 
58
63
  async def cleanup(self):
59
- """Closes the underlying AutobyteusClient."""
64
+ """
65
+ Notifies the server to cleanup the session, then closes the underlying HTTP client.
66
+ """
60
67
  if self.autobyteus_client:
61
- await self.autobyteus_client.close()
68
+ try:
69
+ logger.info(f"Notifying server to cleanup audio session '{self.session_id}'...")
70
+ await self.autobyteus_client.cleanup_audio_session(self.session_id)
71
+ except Exception as e:
72
+ logger.error(f"Failed to cleanup remote audio session '{self.session_id}': {e}")
73
+ finally:
74
+ await self.autobyteus_client.close()
75
+
62
76
  logger.debug("AutobyteusAudioClient cleaned up.")
@@ -9,6 +9,8 @@ from google.genai import types as genai_types
9
9
 
10
10
  from autobyteus.multimedia.audio.base_audio_client import BaseAudioClient
11
11
  from autobyteus.multimedia.utils.response_types import SpeechGenerationResponse
12
+ from autobyteus.utils.gemini_helper import initialize_gemini_client_with_runtime
13
+ from autobyteus.utils.gemini_model_mapping import resolve_model_for_runtime
12
14
 
13
15
  if TYPE_CHECKING:
14
16
  from autobyteus.multimedia.audio.audio_model import AudioModel
@@ -17,11 +19,22 @@ if TYPE_CHECKING:
17
19
  logger = logging.getLogger(__name__)
18
20
 
19
21
 
22
+ _AUDIO_TEMP_DIR = "/tmp/autobyteus_audio"
23
+
24
+ _AUDIO_MIME_EXTENSION_MAP = {
25
+ "audio/wav": "wav",
26
+ "audio/x-wav": "wav",
27
+ "audio/mpeg": "mp3",
28
+ "audio/mp3": "mp3",
29
+ "audio/ogg": "ogg",
30
+ "audio/webm": "webm",
31
+ }
32
+
33
+
20
34
  def _save_audio_bytes_to_wav(pcm_bytes: bytes, channels=1, rate=24000, sample_width=2) -> str:
21
35
  """Saves PCM audio bytes to a temporary WAV file and returns the path."""
22
- temp_dir = "/tmp/autobyteus_audio"
23
- os.makedirs(temp_dir, exist_ok=True)
24
- file_path = os.path.join(temp_dir, f"{uuid.uuid4()}.wav")
36
+ os.makedirs(_AUDIO_TEMP_DIR, exist_ok=True)
37
+ file_path = os.path.join(_AUDIO_TEMP_DIR, f"{uuid.uuid4()}.wav")
25
38
 
26
39
  try:
27
40
  with wave.open(file_path, "wb") as wf:
@@ -36,22 +49,63 @@ def _save_audio_bytes_to_wav(pcm_bytes: bytes, channels=1, rate=24000, sample_wi
36
49
  raise
37
50
 
38
51
 
52
+ def _save_audio_bytes(audio_bytes: bytes, extension: Optional[str]) -> str:
53
+ """Saves audio bytes to a temporary file and returns the path."""
54
+ os.makedirs(_AUDIO_TEMP_DIR, exist_ok=True)
55
+ suffix = (extension or "bin").lstrip(".")
56
+ file_path = os.path.join(_AUDIO_TEMP_DIR, f"{uuid.uuid4()}.{suffix}")
57
+ try:
58
+ with open(file_path, "wb") as audio_file:
59
+ audio_file.write(audio_bytes)
60
+ logger.info(f"Successfully saved generated audio to {file_path}")
61
+ return file_path
62
+ except Exception as e:
63
+ logger.error(f"Failed to save audio to file at {file_path}: {e}")
64
+ raise
65
+
66
+
67
+ def _parse_mime_type(mime_type: Optional[str]) -> tuple[str, Dict[str, str]]:
68
+ if not mime_type:
69
+ return "", {}
70
+ parts = [part.strip() for part in mime_type.split(";") if part.strip()]
71
+ base = parts[0].lower() if parts else ""
72
+ params: Dict[str, str] = {}
73
+ for part in parts[1:]:
74
+ if "=" in part:
75
+ key, value = part.split("=", 1)
76
+ params[key.strip().lower()] = value.strip()
77
+ return base, params
78
+
79
+
80
+ def _coerce_audio_bytes(audio_data: Any) -> bytes:
81
+ if audio_data is None:
82
+ return b""
83
+ if isinstance(audio_data, bytes):
84
+ return audio_data
85
+ if isinstance(audio_data, bytearray):
86
+ return bytes(audio_data)
87
+ if isinstance(audio_data, memoryview):
88
+ return audio_data.tobytes()
89
+ if isinstance(audio_data, str):
90
+ return base64.b64decode(audio_data)
91
+ return bytes(audio_data)
92
+
93
+
39
94
  class GeminiAudioClient(BaseAudioClient):
40
95
  """
41
96
  An audio client that uses Google's Gemini models for audio tasks.
42
97
 
43
98
  **Setup Requirements:**
44
- 1. **Authentication:** Set the `GEMINI_API_KEY` environment variable with your API key.
99
+ 1. **Vertex AI Express Mode:** Set `VERTEX_AI_API_KEY`.
100
+ 2. **Vertex AI Mode:** Set `VERTEX_AI_PROJECT` and `VERTEX_AI_LOCATION`.
101
+ 3. **AI Studio Mode:** Set `GEMINI_API_KEY`.
45
102
  """
46
103
 
47
104
  def __init__(self, model: "AudioModel", config: "MultimediaConfig"):
48
105
  super().__init__(model, config)
49
- api_key = os.getenv("GEMINI_API_KEY")
50
- if not api_key:
51
- raise ValueError("Please set the GEMINI_API_KEY environment variable.")
52
-
106
+
53
107
  try:
54
- self.client = genai.Client()
108
+ self.client, self.runtime_info = initialize_gemini_client_with_runtime()
55
109
  self.async_client = self.client.aio
56
110
  logger.info(f"GeminiAudioClient initialized for model '{self.model.name}'.")
57
111
  except Exception as e:
@@ -70,8 +124,6 @@ class GeminiAudioClient(BaseAudioClient):
70
124
  multi-speaker, and style-controlled generation.
71
125
  """
72
126
  try:
73
- logger.info(f"Generating speech with Gemini TTS model '{self.model.value}'...")
74
-
75
127
  final_config = self.config.to_dict().copy()
76
128
  if generation_config:
77
129
  final_config.update(generation_config)
@@ -126,8 +178,19 @@ class GeminiAudioClient(BaseAudioClient):
126
178
  )
127
179
 
128
180
  # The google-genai library's TTS endpoint uses a synchronous call.
181
+ # FIX: Ensure no 'models/' prefix is used here.
182
+ runtime_adjusted_model = resolve_model_for_runtime(
183
+ self.model.value,
184
+ modality="tts",
185
+ runtime=getattr(self, "runtime_info", None) and self.runtime_info.runtime,
186
+ )
187
+ logger.info(
188
+ "Generating speech with Gemini TTS model '%s' (requested '%s').",
189
+ runtime_adjusted_model,
190
+ self.model.value,
191
+ )
129
192
  resp = self.client.models.generate_content(
130
- model=self.model.value,
193
+ model=runtime_adjusted_model,
131
194
  contents=final_prompt,
132
195
  config=genai_types.GenerateContentConfig(
133
196
  response_modalities=["AUDIO"],
@@ -135,10 +198,40 @@ class GeminiAudioClient(BaseAudioClient):
135
198
  ),
136
199
  )
137
200
 
138
- audio_b64 = resp.candidates[0].content.parts[0].inline_data.data
139
- audio_pcm = base64.b64decode(audio_b64)
140
-
141
- audio_path = _save_audio_bytes_to_wav(audio_pcm)
201
+ part = resp.candidates[0].content.parts[0]
202
+ inline_data = part.inline_data
203
+ if not inline_data or not inline_data.data:
204
+ raise ValueError("Gemini TTS response did not include audio data.")
205
+
206
+ mime_type, mime_params = _parse_mime_type(inline_data.mime_type)
207
+ audio_bytes = _coerce_audio_bytes(inline_data.data)
208
+ if not audio_bytes:
209
+ raise ValueError("Gemini TTS returned empty audio data.")
210
+
211
+ logger.info(
212
+ "Received Gemini TTS audio payload (mime_type='%s', bytes=%d).",
213
+ mime_type or "unknown",
214
+ len(audio_bytes),
215
+ )
216
+
217
+ if not mime_type or mime_type.startswith("audio/pcm") or mime_type == "audio/l16":
218
+ rate = 24000
219
+ channels = 1
220
+ if "rate" in mime_params:
221
+ try:
222
+ rate = int(mime_params["rate"])
223
+ except ValueError:
224
+ logger.warning("Invalid sample rate in mime_type '%s'; using default 24000.", inline_data.mime_type)
225
+ if "channels" in mime_params:
226
+ try:
227
+ channels = int(mime_params["channels"])
228
+ except ValueError:
229
+ logger.warning("Invalid channel count in mime_type '%s'; using default 1.", inline_data.mime_type)
230
+
231
+ audio_path = _save_audio_bytes_to_wav(audio_bytes, channels=channels, rate=rate, sample_width=2)
232
+ else:
233
+ extension = _AUDIO_MIME_EXTENSION_MAP.get(mime_type, "bin")
234
+ audio_path = _save_audio_bytes(audio_bytes, extension)
142
235
 
143
236
  return SpeechGenerationResponse(audio_urls=[audio_path])
144
237
 
@@ -13,13 +13,51 @@ from autobyteus.utils.parameter_schema import ParameterSchema, ParameterDefiniti
13
13
 
14
14
  logger = logging.getLogger(__name__)
15
15
 
16
- GEMINI_TTS_VOICES = [
17
- "Zephyr", "Puck", "Charon", "Kore", "Fenrir", "Leda", "Orus", "Aoede",
18
- "Callirrhoe", "Autonoe", "Enceladus", "Iapetus", "Umbriel", "Algieba",
19
- "Despina", "Erinome", "Algenib", "Rasalgethi", "Laomedeia", "Achernar",
20
- "Alnilam", "Schedar", "Gacrux", "Pulcherrima", "Achird", "Zubenelgenubi",
21
- "Vindemiatrix", "Sadachbia", "Sadaltager", "Sulafat"
16
+ # Enhanced metadata for Google Gemini TTS voices, including gender and description.
17
+ GEMINI_VOICE_DETAILS = {
18
+ "Zephyr": {"gender": "female", "description": "Bright, Higher pitch"},
19
+ "Puck": {"gender": "male", "description": "Upbeat, Middle pitch"},
20
+ "Charon": {"gender": "male", "description": "Informative, Lower pitch"},
21
+ "Kore": {"gender": "female", "description": "Firm, Middle pitch"},
22
+ "Fenrir": {"gender": "male", "description": "Excitable, Lower middle pitch"},
23
+ "Leda": {"gender": "female", "description": "Youthful, Higher pitch"},
24
+ "Orus": {"gender": "male", "description": "Firm, Lower middle pitch"},
25
+ "Aoede": {"gender": "female", "description": "Breezy, Middle pitch"},
26
+ "Callirrhoe": {"gender": "female", "description": "Easy-going, Middle pitch"},
27
+ "Autonoe": {"gender": "female", "description": "Bright, Middle pitch"},
28
+ "Enceladus": {"gender": "male", "description": "Breathy, Lower pitch"},
29
+ "Iapetus": {"gender": "male", "description": "Clear, Lower middle pitch"},
30
+ "Umbriel": {"gender": "male", "description": "Easy-going, Lower middle pitch"},
31
+ "Algieba": {"gender": "male", "description": "Smooth, Lower pitch"},
32
+ "Despina": {"gender": "female", "description": "Smooth, Middle pitch"},
33
+ "Erinome": {"gender": "female", "description": "Clear, Middle pitch"},
34
+ "Algenib": {"gender": "male", "description": "Gravelly, Lower pitch"},
35
+ "Rasalgethi": {"gender": "male", "description": "Informative, Middle pitch"},
36
+ "Laomedeia": {"gender": "female", "description": "Upbeat, Higher pitch"},
37
+ "Achernar": {"gender": "female", "description": "Soft, Higher pitch"},
38
+ "Alnilam": {"gender": "male", "description": "Firm, Lower middle pitch"},
39
+ "Schedar": {"gender": "male", "description": "Even, Lower middle pitch"},
40
+ "Gacrux": {"gender": "female", "description": "Mature, Middle pitch"},
41
+ "Pulcherrima": {"gender": "female", "description": "Forward, Middle pitch"},
42
+ "Achird": {"gender": "male", "description": "Friendly, Lower middle pitch"},
43
+ "Zubenelgenubi": {"gender": "male", "description": "Casual, Lower middle pitch"},
44
+ "Vindemiatrix": {"gender": "female", "description": "Gentle, Middle pitch"},
45
+ "Sadachbia": {"gender": "male", "description": "Lively, Lower pitch"},
46
+ "Sadaltager": {"gender": "male", "description": "Knowledgeable, Middle pitch"},
47
+ "Sulafat": {"gender": "female", "description": "Warm, Middle pitch"},
48
+ }
49
+
50
+ # The list of voice names, derived from the keys of the details dictionary.
51
+ # This is used for the `enum_values` to maintain compatibility.
52
+ GEMINI_TTS_VOICES = list(GEMINI_VOICE_DETAILS.keys())
53
+
54
+ # Generate a formatted string of voice metadata to be appended to parameter descriptions.
55
+ _voice_descriptions_list = [
56
+ f"- {name} ({details['gender']}): {details['description']}"
57
+ for name, details in GEMINI_VOICE_DETAILS.items()
22
58
  ]
59
+ GEMINI_VOICE_METADATA_DESC = "\n\nDetailed Voice Options:\n" + "\n".join(_voice_descriptions_list)
60
+
23
61
 
24
62
  OPENAI_TTS_VOICES = [
25
63
  "alloy", "ash", "ballad", "coral", "echo", "fable", "onyx",
@@ -64,7 +102,7 @@ class AudioClientFactory(metaclass=SingletonMeta):
64
102
  ParameterDefinition(
65
103
  name="voice",
66
104
  param_type=ParameterType.ENUM,
67
- description="The voice to assign to this speaker.",
105
+ description="The voice to assign to this speaker." + GEMINI_VOICE_METADATA_DESC,
68
106
  enum_values=GEMINI_TTS_VOICES,
69
107
  required=True
70
108
  )
@@ -84,7 +122,7 @@ class AudioClientFactory(metaclass=SingletonMeta):
84
122
  param_type=ParameterType.ENUM,
85
123
  default_value="Kore",
86
124
  enum_values=GEMINI_TTS_VOICES,
87
- description="The voice to use for single-speaker generation."
125
+ description="The voice to use for single-speaker generation." + GEMINI_VOICE_METADATA_DESC
88
126
  ),
89
127
  ParameterDefinition(
90
128
  name="style_instructions",
@@ -102,7 +140,7 @@ class AudioClientFactory(metaclass=SingletonMeta):
102
140
  gemini_tts_model = AudioModel(
103
141
  name="gemini-2.5-flash-tts",
104
142
  value="gemini-2.5-flash-preview-tts",
105
- provider=MultimediaProvider.GOOGLE,
143
+ provider=MultimediaProvider.GEMINI,
106
144
  client_class=GeminiAudioClient,
107
145
  parameter_schema=gemini_tts_schema
108
146
  )
@@ -79,7 +79,8 @@ class AudioModel(metaclass=AudioModelMeta):
79
79
  """Returns the unique identifier for the model."""
80
80
  if self.runtime == MultimediaRuntime.AUTOBYTEUS and self.host_url:
81
81
  try:
82
- host = urlparse(self.host_url).hostname
82
+ parsed = urlparse(self.host_url)
83
+ host = parsed.netloc or parsed.hostname or self.host_url
83
84
  return f"{self.name}@{host}"
84
85
  except Exception:
85
86
  return f"{self.name}@{self.host_url}" # Fallback
@@ -1,4 +1,5 @@
1
1
  import logging
2
+ import uuid
2
3
  from typing import Optional, List, Dict, Any, TYPE_CHECKING
3
4
  from autobyteus.clients import AutobyteusClient
4
5
  from autobyteus.multimedia.image.base_image_client import BaseImageClient
@@ -13,6 +14,7 @@ logger = logging.getLogger(__name__)
13
14
  class AutobyteusImageClient(BaseImageClient):
14
15
  """
15
16
  An image client that connects to an Autobyteus LLM server instance for image tasks.
17
+ Maintains a persistent session ID for stateful interactions (e.g. conversational editing).
16
18
  """
17
19
 
18
20
  def __init__(self, model: "ImageModel", config: "MultimediaConfig"):
@@ -21,7 +23,9 @@ class AutobyteusImageClient(BaseImageClient):
21
23
  raise ValueError("AutobyteusImageClient requires a host_url in its ImageModel.")
22
24
 
23
25
  self.autobyteus_client = AutobyteusClient(server_url=model.host_url)
24
- logger.info(f"AutobyteusImageClient initialized for model '{self.model.name}' on host '{model.host_url}'.")
26
+ self.session_id = str(uuid.uuid4())
27
+ logger.info(f"AutobyteusImageClient initialized for model '{self.model.name}' "
28
+ f"on host '{model.host_url}' with session_id '{self.session_id}'.")
25
29
 
26
30
  async def generate_image(
27
31
  self,
@@ -72,7 +76,7 @@ class AutobyteusImageClient(BaseImageClient):
72
76
  ) -> ImageGenerationResponse:
73
77
  """Internal helper to call the remote server."""
74
78
  try:
75
- logger.info(f"Sending image generation request for model '{self.model.name}' to {self.model.host_url}")
79
+ logger.info(f"Sending image generation request for model '{self.model.name}' to {self.model.host_url} (Session: {self.session_id})")
76
80
 
77
81
  # The model name for the remote server is the `value`, not the unique `model_identifier`
78
82
  model_name_for_server = self.model.name
@@ -84,7 +88,8 @@ class AutobyteusImageClient(BaseImageClient):
84
88
  prompt=prompt,
85
89
  input_image_urls=input_image_urls,
86
90
  mask_url=mask_url,
87
- generation_config=generation_config
91
+ generation_config=generation_config,
92
+ session_id=self.session_id
88
93
  )
89
94
 
90
95
  image_urls = response_data.get("image_urls", [])
@@ -98,7 +103,16 @@ class AutobyteusImageClient(BaseImageClient):
98
103
  raise
99
104
 
100
105
  async def cleanup(self):
101
- """Closes the underlying AutobyteusClient."""
106
+ """
107
+ Notifies the server to cleanup the session, then closes the underlying HTTP client.
108
+ """
102
109
  if self.autobyteus_client:
103
- await self.autobyteus_client.close()
110
+ try:
111
+ logger.info(f"Notifying server to cleanup image session '{self.session_id}'...")
112
+ await self.autobyteus_client.cleanup_image_session(self.session_id)
113
+ except Exception as e:
114
+ logger.error(f"Failed to cleanup remote image session '{self.session_id}': {e}")
115
+ finally:
116
+ await self.autobyteus_client.close()
117
+
104
118
  logger.debug("AutobyteusImageClient cleaned up.")
@@ -1,14 +1,13 @@
1
1
  import logging
2
2
  import base64
3
- import os
4
3
  from typing import Optional, List, Dict, Any, TYPE_CHECKING
5
- from google import genai
6
- from PIL import Image
7
- import requests
4
+ from google.genai import types as genai_types
8
5
 
9
6
  from autobyteus.multimedia.image.base_image_client import BaseImageClient
10
7
  from autobyteus.multimedia.utils.response_types import ImageGenerationResponse
11
8
  from autobyteus.multimedia.utils.api_utils import load_image_from_url
9
+ from autobyteus.utils.gemini_helper import initialize_gemini_client_with_runtime
10
+ from autobyteus.utils.gemini_model_mapping import resolve_model_for_runtime
12
11
 
13
12
  if TYPE_CHECKING:
14
13
  from autobyteus.multimedia.image.image_model import ImageModel
@@ -21,17 +20,16 @@ class GeminiImageClient(BaseImageClient):
21
20
  An image client that uses Google's Gemini models for image generation tasks.
22
21
 
23
22
  **Setup Requirements:**
24
- 1. **Authentication:** Set the `GEMINI_API_KEY` environment variable with your API key.
23
+ 1. **Vertex AI Express Mode:** Set `VERTEX_AI_API_KEY`.
24
+ 2. **Vertex AI Mode:** Set `VERTEX_AI_PROJECT` and `VERTEX_AI_LOCATION`.
25
+ 3. **AI Studio Mode:** Set `GEMINI_API_KEY`.
25
26
  """
26
27
 
27
28
  def __init__(self, model: "ImageModel", config: "MultimediaConfig"):
28
29
  super().__init__(model, config)
29
- api_key = os.getenv("GEMINI_API_KEY")
30
- if not api_key:
31
- raise ValueError("Please set the GEMINI_API_KEY environment variable.")
32
30
 
33
31
  try:
34
- self.client = genai.Client()
32
+ self.client, self.runtime_info = initialize_gemini_client_with_runtime()
35
33
  self.async_client = self.client.aio
36
34
  logger.info(f"GeminiImageClient initialized for model '{self.model.name}'.")
37
35
  except Exception as e:
@@ -60,16 +58,40 @@ class GeminiImageClient(BaseImageClient):
60
58
  except Exception as e:
61
59
  logger.error(f"Skipping image at '{url}' due to loading error: {e}")
62
60
 
63
- # Note: The google-genai library uses the synchronous client for the `.generate_content` method on a model
64
- # even in an async context, as there isn't a direct async equivalent exposed for this specific call on the model object.
65
- # We use the top-level async client for other potential future calls if the library API changes.
66
- model_instance = self.client.get_generative_model(model_name=f"models/{self.model.value}")
67
- response = await model_instance.generate_content_async(contents=content)
61
+ config_dict: Dict[str, Any] = {}
62
+ if self.config and self.config.params:
63
+ config_dict.update(self.config.params)
64
+ if generation_config:
65
+ config_dict.update(generation_config)
66
+ if "response_modalities" not in config_dict:
67
+ if getattr(self, "runtime_info", None) and self.runtime_info.runtime == "vertex":
68
+ config_dict["response_modalities"] = ["TEXT", "IMAGE"]
69
+ else:
70
+ config_dict["response_modalities"] = ["IMAGE"]
71
+ config = genai_types.GenerateContentConfig(**config_dict)
72
+
73
+ # FIX: Removed 'models/' prefix from model_name to support Vertex AI
74
+ runtime_adjusted_model = resolve_model_for_runtime(
75
+ self.model.value,
76
+ modality="image",
77
+ runtime=getattr(self, "runtime_info", None) and self.runtime_info.runtime,
78
+ )
79
+ if runtime_adjusted_model != self.model.value:
80
+ logger.info(
81
+ "Using runtime-adjusted Gemini image model '%s' (requested '%s').",
82
+ runtime_adjusted_model,
83
+ self.model.value,
84
+ )
85
+ response = await self.async_client.models.generate_content(
86
+ model=runtime_adjusted_model,
87
+ contents=content,
88
+ config=config,
89
+ )
68
90
 
69
91
 
70
92
  image_urls = []
71
- for part in response.parts:
72
- if part.inline_data and "image" in part.inline_data.mime_type:
93
+ for part in response.parts or []:
94
+ if part.inline_data and part.inline_data.mime_type and "image" in part.inline_data.mime_type:
73
95
  image_bytes = part.inline_data.data
74
96
  base64_image = base64.b64encode(image_bytes).decode("utf-8")
75
97
  data_uri = f"data:{part.inline_data.mime_type};base64,{base64_image}"
@@ -77,7 +99,7 @@ class GeminiImageClient(BaseImageClient):
77
99
 
78
100
  if not image_urls:
79
101
  # Check for a safety-related refusal to generate content
80
- if response.prompt_feedback.block_reason:
102
+ if response.prompt_feedback and response.prompt_feedback.block_reason:
81
103
  reason = response.prompt_feedback.block_reason.name
82
104
  logger.error(f"Image generation blocked due to safety settings. Reason: {reason}")
83
105
  raise ValueError(f"Image generation failed due to safety settings: {reason}")