autobyteus 1.2.1__py3-none-any.whl → 1.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- autobyteus/agent/agent.py +15 -5
- autobyteus/agent/bootstrap_steps/__init__.py +3 -3
- autobyteus/agent/bootstrap_steps/agent_bootstrapper.py +5 -59
- autobyteus/agent/bootstrap_steps/base_bootstrap_step.py +1 -4
- autobyteus/agent/bootstrap_steps/mcp_server_prewarming_step.py +1 -3
- autobyteus/agent/bootstrap_steps/system_prompt_processing_step.py +16 -13
- autobyteus/agent/bootstrap_steps/working_context_snapshot_restore_step.py +38 -0
- autobyteus/agent/bootstrap_steps/workspace_context_initialization_step.py +2 -4
- autobyteus/agent/context/agent_config.py +47 -20
- autobyteus/agent/context/agent_context.py +23 -18
- autobyteus/agent/context/agent_runtime_state.py +21 -19
- autobyteus/agent/events/__init__.py +16 -1
- autobyteus/agent/events/agent_events.py +43 -3
- autobyteus/agent/events/agent_input_event_queue_manager.py +79 -26
- autobyteus/agent/events/event_store.py +57 -0
- autobyteus/agent/events/notifiers.py +69 -59
- autobyteus/agent/events/worker_event_dispatcher.py +21 -64
- autobyteus/agent/factory/agent_factory.py +83 -6
- autobyteus/agent/handlers/__init__.py +2 -0
- autobyteus/agent/handlers/approved_tool_invocation_event_handler.py +51 -34
- autobyteus/agent/handlers/bootstrap_event_handler.py +155 -0
- autobyteus/agent/handlers/inter_agent_message_event_handler.py +10 -0
- autobyteus/agent/handlers/lifecycle_event_logger.py +19 -11
- autobyteus/agent/handlers/llm_complete_response_received_event_handler.py +10 -15
- autobyteus/agent/handlers/llm_user_message_ready_event_handler.py +188 -48
- autobyteus/agent/handlers/tool_execution_approval_event_handler.py +0 -10
- autobyteus/agent/handlers/tool_invocation_request_event_handler.py +53 -48
- autobyteus/agent/handlers/tool_result_event_handler.py +7 -8
- autobyteus/agent/handlers/user_input_message_event_handler.py +10 -3
- autobyteus/agent/input_processor/memory_ingest_input_processor.py +44 -0
- autobyteus/agent/lifecycle/__init__.py +12 -0
- autobyteus/agent/lifecycle/base_processor.py +109 -0
- autobyteus/agent/lifecycle/events.py +35 -0
- autobyteus/agent/lifecycle/processor_definition.py +36 -0
- autobyteus/agent/lifecycle/processor_registry.py +106 -0
- autobyteus/agent/llm_request_assembler.py +98 -0
- autobyteus/agent/llm_response_processor/__init__.py +1 -8
- autobyteus/agent/message/context_file_type.py +1 -1
- autobyteus/agent/runtime/agent_runtime.py +29 -21
- autobyteus/agent/runtime/agent_worker.py +98 -19
- autobyteus/agent/shutdown_steps/__init__.py +2 -0
- autobyteus/agent/shutdown_steps/agent_shutdown_orchestrator.py +2 -0
- autobyteus/agent/shutdown_steps/tool_cleanup_step.py +58 -0
- autobyteus/agent/status/__init__.py +14 -0
- autobyteus/agent/status/manager.py +93 -0
- autobyteus/agent/status/status_deriver.py +96 -0
- autobyteus/agent/{phases/phase_enum.py → status/status_enum.py} +16 -16
- autobyteus/agent/status/status_update_utils.py +73 -0
- autobyteus/agent/streaming/__init__.py +52 -5
- autobyteus/agent/streaming/adapters/__init__.py +18 -0
- autobyteus/agent/streaming/adapters/invocation_adapter.py +184 -0
- autobyteus/agent/streaming/adapters/tool_call_parsing.py +163 -0
- autobyteus/agent/streaming/adapters/tool_syntax_registry.py +67 -0
- autobyteus/agent/streaming/agent_event_stream.py +3 -183
- autobyteus/agent/streaming/api_tool_call/__init__.py +16 -0
- autobyteus/agent/streaming/api_tool_call/file_content_streamer.py +56 -0
- autobyteus/agent/streaming/api_tool_call/json_string_field_extractor.py +175 -0
- autobyteus/agent/streaming/api_tool_call_streaming_response_handler.py +4 -0
- autobyteus/agent/streaming/events/__init__.py +6 -0
- autobyteus/agent/streaming/events/stream_event_payloads.py +284 -0
- autobyteus/agent/streaming/events/stream_events.py +141 -0
- autobyteus/agent/streaming/handlers/__init__.py +15 -0
- autobyteus/agent/streaming/handlers/api_tool_call_streaming_response_handler.py +303 -0
- autobyteus/agent/streaming/handlers/parsing_streaming_response_handler.py +107 -0
- autobyteus/agent/streaming/handlers/pass_through_streaming_response_handler.py +107 -0
- autobyteus/agent/streaming/handlers/streaming_handler_factory.py +177 -0
- autobyteus/agent/streaming/handlers/streaming_response_handler.py +58 -0
- autobyteus/agent/streaming/parser/__init__.py +61 -0
- autobyteus/agent/streaming/parser/event_emitter.py +181 -0
- autobyteus/agent/streaming/parser/events.py +4 -0
- autobyteus/agent/streaming/parser/invocation_adapter.py +4 -0
- autobyteus/agent/streaming/parser/json_parsing_strategies/__init__.py +19 -0
- autobyteus/agent/streaming/parser/json_parsing_strategies/base.py +32 -0
- autobyteus/agent/streaming/parser/json_parsing_strategies/default.py +34 -0
- autobyteus/agent/streaming/parser/json_parsing_strategies/gemini.py +31 -0
- autobyteus/agent/streaming/parser/json_parsing_strategies/openai.py +64 -0
- autobyteus/agent/streaming/parser/json_parsing_strategies/registry.py +75 -0
- autobyteus/agent/streaming/parser/parser_context.py +227 -0
- autobyteus/agent/streaming/parser/parser_factory.py +132 -0
- autobyteus/agent/streaming/parser/sentinel_format.py +7 -0
- autobyteus/agent/streaming/parser/state_factory.py +62 -0
- autobyteus/agent/streaming/parser/states/__init__.py +1 -0
- autobyteus/agent/streaming/parser/states/base_state.py +60 -0
- autobyteus/agent/streaming/parser/states/custom_xml_tag_run_bash_parsing_state.py +38 -0
- autobyteus/agent/streaming/parser/states/custom_xml_tag_write_file_parsing_state.py +55 -0
- autobyteus/agent/streaming/parser/states/delimited_content_state.py +146 -0
- autobyteus/agent/streaming/parser/states/json_initialization_state.py +144 -0
- autobyteus/agent/streaming/parser/states/json_tool_parsing_state.py +137 -0
- autobyteus/agent/streaming/parser/states/sentinel_content_state.py +30 -0
- autobyteus/agent/streaming/parser/states/sentinel_initialization_state.py +117 -0
- autobyteus/agent/streaming/parser/states/text_state.py +78 -0
- autobyteus/agent/streaming/parser/states/xml_patch_file_tool_parsing_state.py +328 -0
- autobyteus/agent/streaming/parser/states/xml_run_bash_tool_parsing_state.py +129 -0
- autobyteus/agent/streaming/parser/states/xml_tag_initialization_state.py +151 -0
- autobyteus/agent/streaming/parser/states/xml_tool_parsing_state.py +63 -0
- autobyteus/agent/streaming/parser/states/xml_write_file_tool_parsing_state.py +343 -0
- autobyteus/agent/streaming/parser/strategies/__init__.py +17 -0
- autobyteus/agent/streaming/parser/strategies/base.py +24 -0
- autobyteus/agent/streaming/parser/strategies/json_tool_strategy.py +26 -0
- autobyteus/agent/streaming/parser/strategies/registry.py +28 -0
- autobyteus/agent/streaming/parser/strategies/sentinel_strategy.py +23 -0
- autobyteus/agent/streaming/parser/strategies/xml_tag_strategy.py +21 -0
- autobyteus/agent/streaming/parser/stream_scanner.py +167 -0
- autobyteus/agent/streaming/parser/streaming_parser.py +212 -0
- autobyteus/agent/streaming/parser/tool_call_parsing.py +4 -0
- autobyteus/agent/streaming/parser/tool_constants.py +7 -0
- autobyteus/agent/streaming/parser/tool_syntax_registry.py +4 -0
- autobyteus/agent/streaming/parser/xml_tool_parsing_state_registry.py +55 -0
- autobyteus/agent/streaming/parsing_streaming_response_handler.py +4 -0
- autobyteus/agent/streaming/pass_through_streaming_response_handler.py +4 -0
- autobyteus/agent/streaming/queue_streamer.py +3 -57
- autobyteus/agent/streaming/segments/__init__.py +5 -0
- autobyteus/agent/streaming/segments/segment_events.py +82 -0
- autobyteus/agent/streaming/stream_event_payloads.py +2 -223
- autobyteus/agent/streaming/stream_events.py +3 -140
- autobyteus/agent/streaming/streaming_handler_factory.py +4 -0
- autobyteus/agent/streaming/streaming_response_handler.py +4 -0
- autobyteus/agent/streaming/streams/__init__.py +5 -0
- autobyteus/agent/streaming/streams/agent_event_stream.py +197 -0
- autobyteus/agent/streaming/utils/__init__.py +5 -0
- autobyteus/agent/streaming/utils/queue_streamer.py +59 -0
- autobyteus/agent/system_prompt_processor/__init__.py +2 -0
- autobyteus/agent/system_prompt_processor/available_skills_processor.py +96 -0
- autobyteus/agent/system_prompt_processor/base_processor.py +1 -1
- autobyteus/agent/system_prompt_processor/processor_meta.py +15 -2
- autobyteus/agent/system_prompt_processor/tool_manifest_injector_processor.py +39 -58
- autobyteus/agent/token_budget.py +56 -0
- autobyteus/agent/tool_execution_result_processor/memory_ingest_tool_result_processor.py +29 -0
- autobyteus/agent/tool_invocation.py +16 -40
- autobyteus/agent/tool_invocation_preprocessor/__init__.py +9 -0
- autobyteus/agent/tool_invocation_preprocessor/base_preprocessor.py +45 -0
- autobyteus/agent/tool_invocation_preprocessor/processor_definition.py +15 -0
- autobyteus/agent/tool_invocation_preprocessor/processor_meta.py +33 -0
- autobyteus/agent/tool_invocation_preprocessor/processor_registry.py +60 -0
- autobyteus/agent/utils/wait_for_idle.py +12 -14
- autobyteus/agent/workspace/base_workspace.py +6 -27
- autobyteus/agent_team/agent_team.py +3 -3
- autobyteus/agent_team/agent_team_builder.py +1 -41
- autobyteus/agent_team/bootstrap_steps/__init__.py +0 -4
- autobyteus/agent_team/bootstrap_steps/agent_configuration_preparation_step.py +8 -18
- autobyteus/agent_team/bootstrap_steps/agent_team_bootstrapper.py +4 -16
- autobyteus/agent_team/bootstrap_steps/base_agent_team_bootstrap_step.py +1 -2
- autobyteus/agent_team/bootstrap_steps/coordinator_initialization_step.py +1 -2
- autobyteus/agent_team/bootstrap_steps/task_notifier_initialization_step.py +1 -2
- autobyteus/agent_team/bootstrap_steps/team_context_initialization_step.py +4 -4
- autobyteus/agent_team/context/agent_team_config.py +6 -3
- autobyteus/agent_team/context/agent_team_context.py +25 -3
- autobyteus/agent_team/context/agent_team_runtime_state.py +9 -6
- autobyteus/agent_team/events/__init__.py +11 -0
- autobyteus/agent_team/events/agent_team_event_dispatcher.py +22 -9
- autobyteus/agent_team/events/agent_team_events.py +16 -0
- autobyteus/agent_team/events/event_store.py +57 -0
- autobyteus/agent_team/factory/agent_team_factory.py +8 -0
- autobyteus/agent_team/handlers/inter_agent_message_request_event_handler.py +18 -2
- autobyteus/agent_team/handlers/lifecycle_agent_team_event_handler.py +21 -5
- autobyteus/agent_team/handlers/process_user_message_event_handler.py +17 -8
- autobyteus/agent_team/handlers/tool_approval_team_event_handler.py +19 -4
- autobyteus/agent_team/runtime/agent_team_runtime.py +41 -10
- autobyteus/agent_team/runtime/agent_team_worker.py +69 -5
- autobyteus/agent_team/status/__init__.py +14 -0
- autobyteus/agent_team/status/agent_team_status.py +18 -0
- autobyteus/agent_team/status/agent_team_status_manager.py +33 -0
- autobyteus/agent_team/status/status_deriver.py +62 -0
- autobyteus/agent_team/status/status_update_utils.py +42 -0
- autobyteus/agent_team/streaming/__init__.py +2 -2
- autobyteus/agent_team/streaming/agent_team_event_notifier.py +6 -6
- autobyteus/agent_team/streaming/agent_team_stream_event_payloads.py +4 -4
- autobyteus/agent_team/streaming/agent_team_stream_events.py +3 -3
- autobyteus/agent_team/system_prompt_processor/__init__.py +6 -0
- autobyteus/agent_team/system_prompt_processor/team_manifest_injector_processor.py +76 -0
- autobyteus/agent_team/task_notification/task_notification_mode.py +19 -0
- autobyteus/agent_team/utils/wait_for_idle.py +4 -4
- autobyteus/cli/agent_cli.py +18 -10
- autobyteus/cli/agent_team_tui/app.py +14 -11
- autobyteus/cli/agent_team_tui/state.py +13 -15
- autobyteus/cli/agent_team_tui/widgets/agent_list_sidebar.py +15 -15
- autobyteus/cli/agent_team_tui/widgets/focus_pane.py +143 -36
- autobyteus/cli/agent_team_tui/widgets/renderables.py +1 -1
- autobyteus/cli/agent_team_tui/widgets/shared.py +25 -25
- autobyteus/cli/cli_display.py +193 -44
- autobyteus/cli/workflow_tui/app.py +9 -10
- autobyteus/cli/workflow_tui/state.py +14 -16
- autobyteus/cli/workflow_tui/widgets/agent_list_sidebar.py +15 -15
- autobyteus/cli/workflow_tui/widgets/focus_pane.py +137 -35
- autobyteus/cli/workflow_tui/widgets/renderables.py +1 -1
- autobyteus/cli/workflow_tui/widgets/shared.py +25 -25
- autobyteus/clients/autobyteus_client.py +94 -1
- autobyteus/events/event_types.py +11 -18
- autobyteus/llm/api/autobyteus_llm.py +33 -29
- autobyteus/llm/api/claude_llm.py +142 -36
- autobyteus/llm/api/gemini_llm.py +163 -59
- autobyteus/llm/api/grok_llm.py +1 -1
- autobyteus/llm/api/minimax_llm.py +26 -0
- autobyteus/llm/api/mistral_llm.py +113 -87
- autobyteus/llm/api/ollama_llm.py +9 -42
- autobyteus/llm/api/openai_compatible_llm.py +127 -91
- autobyteus/llm/api/openai_llm.py +3 -3
- autobyteus/llm/api/openai_responses_llm.py +324 -0
- autobyteus/llm/api/zhipu_llm.py +21 -2
- autobyteus/llm/autobyteus_provider.py +70 -60
- autobyteus/llm/base_llm.py +85 -81
- autobyteus/llm/converters/__init__.py +14 -0
- autobyteus/llm/converters/anthropic_tool_call_converter.py +37 -0
- autobyteus/llm/converters/gemini_tool_call_converter.py +57 -0
- autobyteus/llm/converters/mistral_tool_call_converter.py +37 -0
- autobyteus/llm/converters/openai_tool_call_converter.py +38 -0
- autobyteus/llm/extensions/base_extension.py +6 -12
- autobyteus/llm/extensions/token_usage_tracking_extension.py +45 -18
- autobyteus/llm/llm_factory.py +282 -204
- autobyteus/llm/lmstudio_provider.py +60 -49
- autobyteus/llm/models.py +35 -2
- autobyteus/llm/ollama_provider.py +60 -49
- autobyteus/llm/ollama_provider_resolver.py +0 -1
- autobyteus/llm/prompt_renderers/__init__.py +19 -0
- autobyteus/llm/prompt_renderers/anthropic_prompt_renderer.py +104 -0
- autobyteus/llm/prompt_renderers/autobyteus_prompt_renderer.py +19 -0
- autobyteus/llm/prompt_renderers/base_prompt_renderer.py +10 -0
- autobyteus/llm/prompt_renderers/gemini_prompt_renderer.py +63 -0
- autobyteus/llm/prompt_renderers/mistral_prompt_renderer.py +87 -0
- autobyteus/llm/prompt_renderers/ollama_prompt_renderer.py +51 -0
- autobyteus/llm/prompt_renderers/openai_chat_renderer.py +97 -0
- autobyteus/llm/prompt_renderers/openai_responses_renderer.py +101 -0
- autobyteus/llm/providers.py +1 -3
- autobyteus/llm/token_counter/claude_token_counter.py +56 -25
- autobyteus/llm/token_counter/mistral_token_counter.py +12 -8
- autobyteus/llm/token_counter/openai_token_counter.py +24 -5
- autobyteus/llm/token_counter/token_counter_factory.py +12 -5
- autobyteus/llm/utils/llm_config.py +6 -12
- autobyteus/llm/utils/media_payload_formatter.py +27 -20
- autobyteus/llm/utils/messages.py +55 -3
- autobyteus/llm/utils/response_types.py +3 -0
- autobyteus/llm/utils/tool_call_delta.py +31 -0
- autobyteus/memory/__init__.py +35 -0
- autobyteus/memory/compaction/__init__.py +9 -0
- autobyteus/memory/compaction/compaction_result.py +8 -0
- autobyteus/memory/compaction/compactor.py +89 -0
- autobyteus/memory/compaction/summarizer.py +11 -0
- autobyteus/memory/compaction_snapshot_builder.py +84 -0
- autobyteus/memory/memory_manager.py +205 -0
- autobyteus/memory/models/__init__.py +14 -0
- autobyteus/memory/models/episodic_item.py +41 -0
- autobyteus/memory/models/memory_types.py +7 -0
- autobyteus/memory/models/raw_trace_item.py +79 -0
- autobyteus/memory/models/semantic_item.py +41 -0
- autobyteus/memory/models/tool_interaction.py +20 -0
- autobyteus/memory/path_resolver.py +27 -0
- autobyteus/memory/policies/__init__.py +5 -0
- autobyteus/memory/policies/compaction_policy.py +16 -0
- autobyteus/memory/restore/__init__.py +1 -0
- autobyteus/memory/restore/working_context_snapshot_bootstrapper.py +61 -0
- autobyteus/memory/retrieval/__init__.py +7 -0
- autobyteus/memory/retrieval/memory_bundle.py +11 -0
- autobyteus/memory/retrieval/retriever.py +13 -0
- autobyteus/memory/store/__init__.py +9 -0
- autobyteus/memory/store/base_store.py +14 -0
- autobyteus/memory/store/file_store.py +98 -0
- autobyteus/memory/store/working_context_snapshot_store.py +28 -0
- autobyteus/memory/tool_interaction_builder.py +46 -0
- autobyteus/memory/turn_tracker.py +9 -0
- autobyteus/memory/working_context_snapshot.py +69 -0
- autobyteus/memory/working_context_snapshot_serializer.py +135 -0
- autobyteus/multimedia/audio/api/autobyteus_audio_client.py +19 -5
- autobyteus/multimedia/audio/api/gemini_audio_client.py +109 -16
- autobyteus/multimedia/audio/audio_client_factory.py +47 -9
- autobyteus/multimedia/audio/audio_model.py +2 -1
- autobyteus/multimedia/image/api/autobyteus_image_client.py +19 -5
- autobyteus/multimedia/image/api/gemini_image_client.py +39 -17
- autobyteus/multimedia/image/api/openai_image_client.py +125 -43
- autobyteus/multimedia/image/autobyteus_image_provider.py +2 -1
- autobyteus/multimedia/image/image_client_factory.py +47 -15
- autobyteus/multimedia/image/image_model.py +5 -2
- autobyteus/multimedia/providers.py +3 -2
- autobyteus/skills/loader.py +71 -0
- autobyteus/skills/model.py +11 -0
- autobyteus/skills/registry.py +70 -0
- autobyteus/task_management/tools/todo_tools/add_todo.py +2 -2
- autobyteus/task_management/tools/todo_tools/create_todo_list.py +2 -2
- autobyteus/task_management/tools/todo_tools/update_todo_status.py +2 -2
- autobyteus/tools/__init__.py +34 -47
- autobyteus/tools/base_tool.py +7 -0
- autobyteus/tools/file/__init__.py +2 -6
- autobyteus/tools/file/patch_file.py +149 -0
- autobyteus/tools/file/read_file.py +36 -5
- autobyteus/tools/file/write_file.py +4 -1
- autobyteus/tools/functional_tool.py +43 -6
- autobyteus/tools/mcp/__init__.py +2 -0
- autobyteus/tools/mcp/config_service.py +5 -1
- autobyteus/tools/mcp/server/__init__.py +2 -0
- autobyteus/tools/mcp/server/http_managed_mcp_server.py +1 -1
- autobyteus/tools/mcp/server/websocket_managed_mcp_server.py +141 -0
- autobyteus/tools/mcp/server_instance_manager.py +8 -1
- autobyteus/tools/mcp/types.py +61 -0
- autobyteus/tools/multimedia/audio_tools.py +70 -17
- autobyteus/tools/multimedia/download_media_tool.py +18 -4
- autobyteus/tools/multimedia/image_tools.py +246 -62
- autobyteus/tools/operation_executor/journal_manager.py +107 -0
- autobyteus/tools/operation_executor/operation_event_buffer.py +57 -0
- autobyteus/tools/operation_executor/operation_event_producer.py +29 -0
- autobyteus/tools/operation_executor/operation_executor.py +58 -0
- autobyteus/tools/registry/tool_definition.py +43 -2
- autobyteus/tools/skill/load_skill.py +50 -0
- autobyteus/tools/terminal/__init__.py +45 -0
- autobyteus/tools/terminal/ansi_utils.py +32 -0
- autobyteus/tools/terminal/background_process_manager.py +233 -0
- autobyteus/tools/terminal/output_buffer.py +105 -0
- autobyteus/tools/terminal/prompt_detector.py +63 -0
- autobyteus/tools/terminal/pty_session.py +241 -0
- autobyteus/tools/terminal/session_factory.py +20 -0
- autobyteus/tools/terminal/terminal_session_manager.py +226 -0
- autobyteus/tools/terminal/tools/__init__.py +13 -0
- autobyteus/tools/terminal/tools/get_process_output.py +81 -0
- autobyteus/tools/terminal/tools/run_bash.py +109 -0
- autobyteus/tools/terminal/tools/start_background_process.py +104 -0
- autobyteus/tools/terminal/tools/stop_background_process.py +67 -0
- autobyteus/tools/terminal/types.py +54 -0
- autobyteus/tools/terminal/wsl_tmux_session.py +221 -0
- autobyteus/tools/terminal/wsl_utils.py +156 -0
- autobyteus/tools/transaction_management/backup_handler.py +48 -0
- autobyteus/tools/transaction_management/operation_lifecycle_manager.py +62 -0
- autobyteus/tools/usage/__init__.py +1 -2
- autobyteus/tools/usage/formatters/__init__.py +17 -1
- autobyteus/tools/usage/formatters/base_formatter.py +8 -0
- autobyteus/tools/usage/formatters/default_xml_schema_formatter.py +2 -2
- autobyteus/tools/usage/formatters/mistral_json_schema_formatter.py +18 -0
- autobyteus/tools/usage/formatters/patch_file_xml_example_formatter.py +64 -0
- autobyteus/tools/usage/formatters/patch_file_xml_schema_formatter.py +31 -0
- autobyteus/tools/usage/formatters/run_bash_xml_example_formatter.py +32 -0
- autobyteus/tools/usage/formatters/run_bash_xml_schema_formatter.py +36 -0
- autobyteus/tools/usage/formatters/write_file_xml_example_formatter.py +53 -0
- autobyteus/tools/usage/formatters/write_file_xml_schema_formatter.py +31 -0
- autobyteus/tools/usage/providers/tool_manifest_provider.py +10 -10
- autobyteus/tools/usage/registries/__init__.py +1 -3
- autobyteus/tools/usage/registries/tool_formatting_registry.py +115 -8
- autobyteus/tools/usage/tool_schema_provider.py +51 -0
- autobyteus/tools/web/__init__.py +4 -0
- autobyteus/tools/web/read_url_tool.py +80 -0
- autobyteus/utils/diff_utils.py +271 -0
- autobyteus/utils/download_utils.py +109 -0
- autobyteus/utils/file_utils.py +57 -2
- autobyteus/utils/gemini_helper.py +64 -0
- autobyteus/utils/gemini_model_mapping.py +71 -0
- autobyteus/utils/llm_output_formatter.py +75 -0
- autobyteus/utils/tool_call_format.py +36 -0
- autobyteus/workflow/agentic_workflow.py +3 -3
- autobyteus/workflow/bootstrap_steps/agent_tool_injection_step.py +2 -2
- autobyteus/workflow/bootstrap_steps/base_workflow_bootstrap_step.py +2 -2
- autobyteus/workflow/bootstrap_steps/coordinator_initialization_step.py +2 -2
- autobyteus/workflow/bootstrap_steps/coordinator_prompt_preparation_step.py +3 -9
- autobyteus/workflow/bootstrap_steps/workflow_bootstrapper.py +6 -6
- autobyteus/workflow/bootstrap_steps/workflow_runtime_queue_initialization_step.py +2 -2
- autobyteus/workflow/context/workflow_context.py +3 -3
- autobyteus/workflow/context/workflow_runtime_state.py +5 -5
- autobyteus/workflow/events/workflow_event_dispatcher.py +5 -5
- autobyteus/workflow/handlers/lifecycle_workflow_event_handler.py +3 -3
- autobyteus/workflow/handlers/process_user_message_event_handler.py +5 -5
- autobyteus/workflow/handlers/tool_approval_workflow_event_handler.py +2 -2
- autobyteus/workflow/runtime/workflow_runtime.py +8 -8
- autobyteus/workflow/runtime/workflow_worker.py +3 -3
- autobyteus/workflow/status/__init__.py +11 -0
- autobyteus/workflow/status/workflow_status.py +19 -0
- autobyteus/workflow/status/workflow_status_manager.py +48 -0
- autobyteus/workflow/streaming/__init__.py +2 -2
- autobyteus/workflow/streaming/workflow_event_notifier.py +7 -7
- autobyteus/workflow/streaming/workflow_stream_event_payloads.py +4 -4
- autobyteus/workflow/streaming/workflow_stream_events.py +3 -3
- autobyteus/workflow/utils/wait_for_idle.py +4 -4
- autobyteus-1.3.0.dist-info/METADATA +293 -0
- autobyteus-1.3.0.dist-info/RECORD +606 -0
- {autobyteus-1.2.1.dist-info → autobyteus-1.3.0.dist-info}/WHEEL +1 -1
- {autobyteus-1.2.1.dist-info → autobyteus-1.3.0.dist-info}/top_level.txt +0 -1
- autobyteus/agent/bootstrap_steps/agent_runtime_queue_initialization_step.py +0 -57
- autobyteus/agent/hooks/__init__.py +0 -16
- autobyteus/agent/hooks/base_phase_hook.py +0 -78
- autobyteus/agent/hooks/hook_definition.py +0 -36
- autobyteus/agent/hooks/hook_meta.py +0 -37
- autobyteus/agent/hooks/hook_registry.py +0 -106
- autobyteus/agent/llm_response_processor/provider_aware_tool_usage_processor.py +0 -103
- autobyteus/agent/phases/__init__.py +0 -18
- autobyteus/agent/phases/discover.py +0 -53
- autobyteus/agent/phases/manager.py +0 -265
- autobyteus/agent/phases/transition_decorator.py +0 -40
- autobyteus/agent/phases/transition_info.py +0 -33
- autobyteus/agent/remote_agent.py +0 -244
- autobyteus/agent/workspace/workspace_definition.py +0 -36
- autobyteus/agent/workspace/workspace_meta.py +0 -37
- autobyteus/agent/workspace/workspace_registry.py +0 -72
- autobyteus/agent_team/bootstrap_steps/agent_team_runtime_queue_initialization_step.py +0 -25
- autobyteus/agent_team/bootstrap_steps/coordinator_prompt_preparation_step.py +0 -85
- autobyteus/agent_team/phases/__init__.py +0 -11
- autobyteus/agent_team/phases/agent_team_operational_phase.py +0 -19
- autobyteus/agent_team/phases/agent_team_phase_manager.py +0 -48
- autobyteus/llm/api/bedrock_llm.py +0 -92
- autobyteus/llm/api/groq_llm.py +0 -94
- autobyteus/llm/api/nvidia_llm.py +0 -108
- autobyteus/llm/utils/token_pricing_config.py +0 -87
- autobyteus/rpc/__init__.py +0 -73
- autobyteus/rpc/client/__init__.py +0 -17
- autobyteus/rpc/client/abstract_client_connection.py +0 -124
- autobyteus/rpc/client/client_connection_manager.py +0 -153
- autobyteus/rpc/client/sse_client_connection.py +0 -306
- autobyteus/rpc/client/stdio_client_connection.py +0 -280
- autobyteus/rpc/config/__init__.py +0 -13
- autobyteus/rpc/config/agent_server_config.py +0 -153
- autobyteus/rpc/config/agent_server_registry.py +0 -152
- autobyteus/rpc/hosting.py +0 -244
- autobyteus/rpc/protocol.py +0 -244
- autobyteus/rpc/server/__init__.py +0 -20
- autobyteus/rpc/server/agent_server_endpoint.py +0 -181
- autobyteus/rpc/server/base_method_handler.py +0 -40
- autobyteus/rpc/server/method_handlers.py +0 -259
- autobyteus/rpc/server/sse_server_handler.py +0 -182
- autobyteus/rpc/server/stdio_server_handler.py +0 -151
- autobyteus/rpc/server_main.py +0 -198
- autobyteus/rpc/transport_type.py +0 -13
- autobyteus/tools/bash/__init__.py +0 -2
- autobyteus/tools/bash/bash_executor.py +0 -100
- autobyteus/tools/browser/__init__.py +0 -2
- autobyteus/tools/browser/session_aware/browser_session_aware_navigate_to.py +0 -75
- autobyteus/tools/browser/session_aware/browser_session_aware_tool.py +0 -30
- autobyteus/tools/browser/session_aware/browser_session_aware_web_element_trigger.py +0 -154
- autobyteus/tools/browser/session_aware/browser_session_aware_webpage_reader.py +0 -89
- autobyteus/tools/browser/session_aware/browser_session_aware_webpage_screenshot_taker.py +0 -107
- autobyteus/tools/browser/session_aware/factory/browser_session_aware_web_element_trigger_factory.py +0 -14
- autobyteus/tools/browser/session_aware/factory/browser_session_aware_webpage_reader_factory.py +0 -26
- autobyteus/tools/browser/session_aware/factory/browser_session_aware_webpage_screenshot_taker_factory.py +0 -14
- autobyteus/tools/browser/session_aware/shared_browser_session.py +0 -11
- autobyteus/tools/browser/session_aware/shared_browser_session_manager.py +0 -25
- autobyteus/tools/browser/session_aware/web_element_action.py +0 -20
- autobyteus/tools/browser/standalone/__init__.py +0 -6
- autobyteus/tools/browser/standalone/factory/__init__.py +0 -0
- autobyteus/tools/browser/standalone/factory/webpage_reader_factory.py +0 -25
- autobyteus/tools/browser/standalone/factory/webpage_screenshot_taker_factory.py +0 -14
- autobyteus/tools/browser/standalone/navigate_to.py +0 -84
- autobyteus/tools/browser/standalone/web_page_pdf_generator.py +0 -101
- autobyteus/tools/browser/standalone/webpage_image_downloader.py +0 -169
- autobyteus/tools/browser/standalone/webpage_reader.py +0 -105
- autobyteus/tools/browser/standalone/webpage_screenshot_taker.py +0 -105
- autobyteus/tools/file/edit_file.py +0 -200
- autobyteus/tools/file/list_directory.py +0 -168
- autobyteus/tools/file/search_files.py +0 -188
- autobyteus/tools/timer.py +0 -175
- autobyteus/tools/usage/parsers/__init__.py +0 -22
- autobyteus/tools/usage/parsers/_json_extractor.py +0 -99
- autobyteus/tools/usage/parsers/_string_decoders.py +0 -18
- autobyteus/tools/usage/parsers/anthropic_xml_tool_usage_parser.py +0 -10
- autobyteus/tools/usage/parsers/base_parser.py +0 -41
- autobyteus/tools/usage/parsers/default_json_tool_usage_parser.py +0 -83
- autobyteus/tools/usage/parsers/default_xml_tool_usage_parser.py +0 -316
- autobyteus/tools/usage/parsers/exceptions.py +0 -13
- autobyteus/tools/usage/parsers/gemini_json_tool_usage_parser.py +0 -77
- autobyteus/tools/usage/parsers/openai_json_tool_usage_parser.py +0 -149
- autobyteus/tools/usage/parsers/provider_aware_tool_usage_parser.py +0 -59
- autobyteus/tools/usage/registries/tool_usage_parser_registry.py +0 -62
- autobyteus/workflow/phases/__init__.py +0 -11
- autobyteus/workflow/phases/workflow_operational_phase.py +0 -19
- autobyteus/workflow/phases/workflow_phase_manager.py +0 -48
- autobyteus-1.2.1.dist-info/METADATA +0 -205
- autobyteus-1.2.1.dist-info/RECORD +0 -511
- examples/__init__.py +0 -1
- examples/agent_team/__init__.py +0 -1
- examples/discover_phase_transitions.py +0 -104
- examples/run_agentic_software_engineer.py +0 -239
- examples/run_browser_agent.py +0 -262
- examples/run_google_slides_agent.py +0 -287
- examples/run_mcp_browser_client.py +0 -174
- examples/run_mcp_google_slides_client.py +0 -270
- examples/run_mcp_list_tools.py +0 -189
- examples/run_poem_writer.py +0 -284
- examples/run_sqlite_agent.py +0 -295
- /autobyteus/{tools/browser/session_aware → skills}/__init__.py +0 -0
- /autobyteus/tools/{browser/session_aware/factory → skill}/__init__.py +0 -0
- {autobyteus-1.2.1.dist-info → autobyteus-1.3.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,135 @@
|
|
|
1
|
+
import json
|
|
2
|
+
from typing import Any, Dict, Iterable, List, Optional, Tuple
|
|
3
|
+
|
|
4
|
+
from autobyteus.llm.utils.messages import (
|
|
5
|
+
Message,
|
|
6
|
+
MessageRole,
|
|
7
|
+
ToolCallPayload,
|
|
8
|
+
ToolCallSpec,
|
|
9
|
+
ToolResultPayload,
|
|
10
|
+
)
|
|
11
|
+
from autobyteus.memory.working_context_snapshot import WorkingContextSnapshot
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class WorkingContextSnapshotSerializer:
|
|
15
|
+
@staticmethod
|
|
16
|
+
def serialize(working_context_snapshot: WorkingContextSnapshot, metadata: Dict[str, Any]) -> Dict[str, Any]:
|
|
17
|
+
payload = {
|
|
18
|
+
"schema_version": metadata.get("schema_version", 1),
|
|
19
|
+
"agent_id": metadata.get("agent_id"),
|
|
20
|
+
"epoch_id": metadata.get("epoch_id", working_context_snapshot.epoch_id),
|
|
21
|
+
"last_compaction_ts": metadata.get("last_compaction_ts", working_context_snapshot.last_compaction_ts),
|
|
22
|
+
"messages": [WorkingContextSnapshotSerializer._serialize_message(msg) for msg in working_context_snapshot.build_messages()],
|
|
23
|
+
}
|
|
24
|
+
return payload
|
|
25
|
+
|
|
26
|
+
@staticmethod
|
|
27
|
+
def deserialize(payload: Dict[str, Any]) -> Tuple[WorkingContextSnapshot, Dict[str, Any]]:
|
|
28
|
+
messages = [
|
|
29
|
+
WorkingContextSnapshotSerializer._deserialize_message(msg)
|
|
30
|
+
for msg in payload.get("messages", [])
|
|
31
|
+
if isinstance(msg, dict)
|
|
32
|
+
]
|
|
33
|
+
snapshot = WorkingContextSnapshot(initial_messages=messages)
|
|
34
|
+
metadata = {
|
|
35
|
+
"schema_version": payload.get("schema_version"),
|
|
36
|
+
"agent_id": payload.get("agent_id"),
|
|
37
|
+
"epoch_id": payload.get("epoch_id"),
|
|
38
|
+
"last_compaction_ts": payload.get("last_compaction_ts"),
|
|
39
|
+
}
|
|
40
|
+
if isinstance(metadata["epoch_id"], int):
|
|
41
|
+
snapshot.epoch_id = metadata["epoch_id"]
|
|
42
|
+
if metadata["last_compaction_ts"] is not None:
|
|
43
|
+
snapshot.last_compaction_ts = metadata["last_compaction_ts"]
|
|
44
|
+
return snapshot, metadata
|
|
45
|
+
|
|
46
|
+
@staticmethod
|
|
47
|
+
def validate(payload: Dict[str, Any]) -> bool:
|
|
48
|
+
if not isinstance(payload, dict):
|
|
49
|
+
return False
|
|
50
|
+
if not isinstance(payload.get("schema_version"), int):
|
|
51
|
+
return False
|
|
52
|
+
if not isinstance(payload.get("agent_id"), str):
|
|
53
|
+
return False
|
|
54
|
+
messages = payload.get("messages")
|
|
55
|
+
if not isinstance(messages, list):
|
|
56
|
+
return False
|
|
57
|
+
for msg in messages:
|
|
58
|
+
if not isinstance(msg, dict):
|
|
59
|
+
return False
|
|
60
|
+
if not isinstance(msg.get("role"), str):
|
|
61
|
+
return False
|
|
62
|
+
return True
|
|
63
|
+
|
|
64
|
+
@staticmethod
|
|
65
|
+
def _serialize_message(message: Message) -> Dict[str, Any]:
|
|
66
|
+
base = message.to_dict()
|
|
67
|
+
if base.get("tool_payload"):
|
|
68
|
+
base["tool_payload"] = WorkingContextSnapshotSerializer._normalize_tool_payload(base["tool_payload"])
|
|
69
|
+
return base
|
|
70
|
+
|
|
71
|
+
@staticmethod
|
|
72
|
+
def _deserialize_message(data: Dict[str, Any]) -> Message:
|
|
73
|
+
role = MessageRole(data.get("role"))
|
|
74
|
+
tool_payload = WorkingContextSnapshotSerializer._deserialize_tool_payload(data.get("tool_payload"))
|
|
75
|
+
return Message(
|
|
76
|
+
role=role,
|
|
77
|
+
content=data.get("content"),
|
|
78
|
+
reasoning_content=data.get("reasoning_content"),
|
|
79
|
+
image_urls=data.get("image_urls") or [],
|
|
80
|
+
audio_urls=data.get("audio_urls") or [],
|
|
81
|
+
video_urls=data.get("video_urls") or [],
|
|
82
|
+
tool_payload=tool_payload,
|
|
83
|
+
)
|
|
84
|
+
|
|
85
|
+
@staticmethod
|
|
86
|
+
def _normalize_tool_payload(payload: Dict[str, Any]) -> Dict[str, Any]:
|
|
87
|
+
if "tool_calls" in payload:
|
|
88
|
+
return {
|
|
89
|
+
"tool_calls": [
|
|
90
|
+
{
|
|
91
|
+
"id": call.get("id"),
|
|
92
|
+
"name": call.get("name"),
|
|
93
|
+
"arguments": WorkingContextSnapshotSerializer._safe_json_value(call.get("arguments")),
|
|
94
|
+
}
|
|
95
|
+
for call in payload.get("tool_calls", [])
|
|
96
|
+
]
|
|
97
|
+
}
|
|
98
|
+
return {
|
|
99
|
+
"tool_call_id": payload.get("tool_call_id"),
|
|
100
|
+
"tool_name": payload.get("tool_name"),
|
|
101
|
+
"tool_result": WorkingContextSnapshotSerializer._safe_json_value(payload.get("tool_result")),
|
|
102
|
+
"tool_error": payload.get("tool_error"),
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
@staticmethod
|
|
106
|
+
def _deserialize_tool_payload(payload: Optional[Dict[str, Any]]) -> Optional[Any]:
|
|
107
|
+
if not payload:
|
|
108
|
+
return None
|
|
109
|
+
if "tool_calls" in payload:
|
|
110
|
+
calls = []
|
|
111
|
+
for call in payload.get("tool_calls", []) or []:
|
|
112
|
+
calls.append(
|
|
113
|
+
ToolCallSpec(
|
|
114
|
+
id=str(call.get("id")),
|
|
115
|
+
name=str(call.get("name")),
|
|
116
|
+
arguments=call.get("arguments") or {},
|
|
117
|
+
)
|
|
118
|
+
)
|
|
119
|
+
return ToolCallPayload(tool_calls=calls)
|
|
120
|
+
if "tool_call_id" in payload:
|
|
121
|
+
return ToolResultPayload(
|
|
122
|
+
tool_call_id=str(payload.get("tool_call_id")),
|
|
123
|
+
tool_name=str(payload.get("tool_name")),
|
|
124
|
+
tool_result=payload.get("tool_result"),
|
|
125
|
+
tool_error=payload.get("tool_error"),
|
|
126
|
+
)
|
|
127
|
+
return None
|
|
128
|
+
|
|
129
|
+
@staticmethod
|
|
130
|
+
def _safe_json_value(value: Any) -> Any:
|
|
131
|
+
try:
|
|
132
|
+
json.dumps(value)
|
|
133
|
+
return value
|
|
134
|
+
except TypeError:
|
|
135
|
+
return str(value)
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import logging
|
|
2
|
+
import uuid
|
|
2
3
|
from typing import Optional, List, Dict, Any, TYPE_CHECKING
|
|
3
4
|
from autobyteus.clients import AutobyteusClient
|
|
4
5
|
from autobyteus.multimedia.audio.base_audio_client import BaseAudioClient
|
|
@@ -13,6 +14,7 @@ logger = logging.getLogger(__name__)
|
|
|
13
14
|
class AutobyteusAudioClient(BaseAudioClient):
|
|
14
15
|
"""
|
|
15
16
|
An audio client that connects to an Autobyteus server instance for audio tasks.
|
|
17
|
+
Maintains a persistent session ID for stateful interactions.
|
|
16
18
|
"""
|
|
17
19
|
|
|
18
20
|
def __init__(self, model: "AudioModel", config: "MultimediaConfig"):
|
|
@@ -21,7 +23,9 @@ class AutobyteusAudioClient(BaseAudioClient):
|
|
|
21
23
|
raise ValueError("AutobyteusAudioClient requires a host_url in its AudioModel.")
|
|
22
24
|
|
|
23
25
|
self.autobyteus_client = AutobyteusClient(server_url=model.host_url)
|
|
24
|
-
|
|
26
|
+
self.session_id = str(uuid.uuid4())
|
|
27
|
+
logger.info(f"AutobyteusAudioClient initialized for model '{model.name}' "
|
|
28
|
+
f"on host '{model.host_url}' with session_id '{self.session_id}'.")
|
|
25
29
|
|
|
26
30
|
async def generate_speech(
|
|
27
31
|
self,
|
|
@@ -33,7 +37,7 @@ class AutobyteusAudioClient(BaseAudioClient):
|
|
|
33
37
|
Generates speech by calling the generate_speech endpoint on the remote Autobyteus server.
|
|
34
38
|
"""
|
|
35
39
|
try:
|
|
36
|
-
logger.info(f"Sending speech generation request for model '{self.model.name}' to {self.model.host_url}")
|
|
40
|
+
logger.info(f"Sending speech generation request for model '{self.model.name}' to {self.model.host_url} (Session: {self.session_id})")
|
|
37
41
|
|
|
38
42
|
model_name_for_server = self.model.name
|
|
39
43
|
|
|
@@ -42,7 +46,8 @@ class AutobyteusAudioClient(BaseAudioClient):
|
|
|
42
46
|
response_data = await self.autobyteus_client.generate_speech(
|
|
43
47
|
model_name=model_name_for_server,
|
|
44
48
|
prompt=prompt,
|
|
45
|
-
generation_config=generation_config
|
|
49
|
+
generation_config=generation_config,
|
|
50
|
+
session_id=self.session_id
|
|
46
51
|
)
|
|
47
52
|
|
|
48
53
|
audio_urls = response_data.get("audio_urls", [])
|
|
@@ -56,7 +61,16 @@ class AutobyteusAudioClient(BaseAudioClient):
|
|
|
56
61
|
raise
|
|
57
62
|
|
|
58
63
|
async def cleanup(self):
|
|
59
|
-
"""
|
|
64
|
+
"""
|
|
65
|
+
Notifies the server to cleanup the session, then closes the underlying HTTP client.
|
|
66
|
+
"""
|
|
60
67
|
if self.autobyteus_client:
|
|
61
|
-
|
|
68
|
+
try:
|
|
69
|
+
logger.info(f"Notifying server to cleanup audio session '{self.session_id}'...")
|
|
70
|
+
await self.autobyteus_client.cleanup_audio_session(self.session_id)
|
|
71
|
+
except Exception as e:
|
|
72
|
+
logger.error(f"Failed to cleanup remote audio session '{self.session_id}': {e}")
|
|
73
|
+
finally:
|
|
74
|
+
await self.autobyteus_client.close()
|
|
75
|
+
|
|
62
76
|
logger.debug("AutobyteusAudioClient cleaned up.")
|
|
@@ -9,6 +9,8 @@ from google.genai import types as genai_types
|
|
|
9
9
|
|
|
10
10
|
from autobyteus.multimedia.audio.base_audio_client import BaseAudioClient
|
|
11
11
|
from autobyteus.multimedia.utils.response_types import SpeechGenerationResponse
|
|
12
|
+
from autobyteus.utils.gemini_helper import initialize_gemini_client_with_runtime
|
|
13
|
+
from autobyteus.utils.gemini_model_mapping import resolve_model_for_runtime
|
|
12
14
|
|
|
13
15
|
if TYPE_CHECKING:
|
|
14
16
|
from autobyteus.multimedia.audio.audio_model import AudioModel
|
|
@@ -17,11 +19,22 @@ if TYPE_CHECKING:
|
|
|
17
19
|
logger = logging.getLogger(__name__)
|
|
18
20
|
|
|
19
21
|
|
|
22
|
+
_AUDIO_TEMP_DIR = "/tmp/autobyteus_audio"
|
|
23
|
+
|
|
24
|
+
_AUDIO_MIME_EXTENSION_MAP = {
|
|
25
|
+
"audio/wav": "wav",
|
|
26
|
+
"audio/x-wav": "wav",
|
|
27
|
+
"audio/mpeg": "mp3",
|
|
28
|
+
"audio/mp3": "mp3",
|
|
29
|
+
"audio/ogg": "ogg",
|
|
30
|
+
"audio/webm": "webm",
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
|
|
20
34
|
def _save_audio_bytes_to_wav(pcm_bytes: bytes, channels=1, rate=24000, sample_width=2) -> str:
|
|
21
35
|
"""Saves PCM audio bytes to a temporary WAV file and returns the path."""
|
|
22
|
-
|
|
23
|
-
os.
|
|
24
|
-
file_path = os.path.join(temp_dir, f"{uuid.uuid4()}.wav")
|
|
36
|
+
os.makedirs(_AUDIO_TEMP_DIR, exist_ok=True)
|
|
37
|
+
file_path = os.path.join(_AUDIO_TEMP_DIR, f"{uuid.uuid4()}.wav")
|
|
25
38
|
|
|
26
39
|
try:
|
|
27
40
|
with wave.open(file_path, "wb") as wf:
|
|
@@ -36,22 +49,63 @@ def _save_audio_bytes_to_wav(pcm_bytes: bytes, channels=1, rate=24000, sample_wi
|
|
|
36
49
|
raise
|
|
37
50
|
|
|
38
51
|
|
|
52
|
+
def _save_audio_bytes(audio_bytes: bytes, extension: Optional[str]) -> str:
|
|
53
|
+
"""Saves audio bytes to a temporary file and returns the path."""
|
|
54
|
+
os.makedirs(_AUDIO_TEMP_DIR, exist_ok=True)
|
|
55
|
+
suffix = (extension or "bin").lstrip(".")
|
|
56
|
+
file_path = os.path.join(_AUDIO_TEMP_DIR, f"{uuid.uuid4()}.{suffix}")
|
|
57
|
+
try:
|
|
58
|
+
with open(file_path, "wb") as audio_file:
|
|
59
|
+
audio_file.write(audio_bytes)
|
|
60
|
+
logger.info(f"Successfully saved generated audio to {file_path}")
|
|
61
|
+
return file_path
|
|
62
|
+
except Exception as e:
|
|
63
|
+
logger.error(f"Failed to save audio to file at {file_path}: {e}")
|
|
64
|
+
raise
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def _parse_mime_type(mime_type: Optional[str]) -> tuple[str, Dict[str, str]]:
|
|
68
|
+
if not mime_type:
|
|
69
|
+
return "", {}
|
|
70
|
+
parts = [part.strip() for part in mime_type.split(";") if part.strip()]
|
|
71
|
+
base = parts[0].lower() if parts else ""
|
|
72
|
+
params: Dict[str, str] = {}
|
|
73
|
+
for part in parts[1:]:
|
|
74
|
+
if "=" in part:
|
|
75
|
+
key, value = part.split("=", 1)
|
|
76
|
+
params[key.strip().lower()] = value.strip()
|
|
77
|
+
return base, params
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def _coerce_audio_bytes(audio_data: Any) -> bytes:
|
|
81
|
+
if audio_data is None:
|
|
82
|
+
return b""
|
|
83
|
+
if isinstance(audio_data, bytes):
|
|
84
|
+
return audio_data
|
|
85
|
+
if isinstance(audio_data, bytearray):
|
|
86
|
+
return bytes(audio_data)
|
|
87
|
+
if isinstance(audio_data, memoryview):
|
|
88
|
+
return audio_data.tobytes()
|
|
89
|
+
if isinstance(audio_data, str):
|
|
90
|
+
return base64.b64decode(audio_data)
|
|
91
|
+
return bytes(audio_data)
|
|
92
|
+
|
|
93
|
+
|
|
39
94
|
class GeminiAudioClient(BaseAudioClient):
|
|
40
95
|
"""
|
|
41
96
|
An audio client that uses Google's Gemini models for audio tasks.
|
|
42
97
|
|
|
43
98
|
**Setup Requirements:**
|
|
44
|
-
1. **
|
|
99
|
+
1. **Vertex AI Express Mode:** Set `VERTEX_AI_API_KEY`.
|
|
100
|
+
2. **Vertex AI Mode:** Set `VERTEX_AI_PROJECT` and `VERTEX_AI_LOCATION`.
|
|
101
|
+
3. **AI Studio Mode:** Set `GEMINI_API_KEY`.
|
|
45
102
|
"""
|
|
46
103
|
|
|
47
104
|
def __init__(self, model: "AudioModel", config: "MultimediaConfig"):
|
|
48
105
|
super().__init__(model, config)
|
|
49
|
-
|
|
50
|
-
if not api_key:
|
|
51
|
-
raise ValueError("Please set the GEMINI_API_KEY environment variable.")
|
|
52
|
-
|
|
106
|
+
|
|
53
107
|
try:
|
|
54
|
-
self.client =
|
|
108
|
+
self.client, self.runtime_info = initialize_gemini_client_with_runtime()
|
|
55
109
|
self.async_client = self.client.aio
|
|
56
110
|
logger.info(f"GeminiAudioClient initialized for model '{self.model.name}'.")
|
|
57
111
|
except Exception as e:
|
|
@@ -70,8 +124,6 @@ class GeminiAudioClient(BaseAudioClient):
|
|
|
70
124
|
multi-speaker, and style-controlled generation.
|
|
71
125
|
"""
|
|
72
126
|
try:
|
|
73
|
-
logger.info(f"Generating speech with Gemini TTS model '{self.model.value}'...")
|
|
74
|
-
|
|
75
127
|
final_config = self.config.to_dict().copy()
|
|
76
128
|
if generation_config:
|
|
77
129
|
final_config.update(generation_config)
|
|
@@ -126,8 +178,19 @@ class GeminiAudioClient(BaseAudioClient):
|
|
|
126
178
|
)
|
|
127
179
|
|
|
128
180
|
# The google-genai library's TTS endpoint uses a synchronous call.
|
|
181
|
+
# FIX: Ensure no 'models/' prefix is used here.
|
|
182
|
+
runtime_adjusted_model = resolve_model_for_runtime(
|
|
183
|
+
self.model.value,
|
|
184
|
+
modality="tts",
|
|
185
|
+
runtime=getattr(self, "runtime_info", None) and self.runtime_info.runtime,
|
|
186
|
+
)
|
|
187
|
+
logger.info(
|
|
188
|
+
"Generating speech with Gemini TTS model '%s' (requested '%s').",
|
|
189
|
+
runtime_adjusted_model,
|
|
190
|
+
self.model.value,
|
|
191
|
+
)
|
|
129
192
|
resp = self.client.models.generate_content(
|
|
130
|
-
model=
|
|
193
|
+
model=runtime_adjusted_model,
|
|
131
194
|
contents=final_prompt,
|
|
132
195
|
config=genai_types.GenerateContentConfig(
|
|
133
196
|
response_modalities=["AUDIO"],
|
|
@@ -135,10 +198,40 @@ class GeminiAudioClient(BaseAudioClient):
|
|
|
135
198
|
),
|
|
136
199
|
)
|
|
137
200
|
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
201
|
+
part = resp.candidates[0].content.parts[0]
|
|
202
|
+
inline_data = part.inline_data
|
|
203
|
+
if not inline_data or not inline_data.data:
|
|
204
|
+
raise ValueError("Gemini TTS response did not include audio data.")
|
|
205
|
+
|
|
206
|
+
mime_type, mime_params = _parse_mime_type(inline_data.mime_type)
|
|
207
|
+
audio_bytes = _coerce_audio_bytes(inline_data.data)
|
|
208
|
+
if not audio_bytes:
|
|
209
|
+
raise ValueError("Gemini TTS returned empty audio data.")
|
|
210
|
+
|
|
211
|
+
logger.info(
|
|
212
|
+
"Received Gemini TTS audio payload (mime_type='%s', bytes=%d).",
|
|
213
|
+
mime_type or "unknown",
|
|
214
|
+
len(audio_bytes),
|
|
215
|
+
)
|
|
216
|
+
|
|
217
|
+
if not mime_type or mime_type.startswith("audio/pcm") or mime_type == "audio/l16":
|
|
218
|
+
rate = 24000
|
|
219
|
+
channels = 1
|
|
220
|
+
if "rate" in mime_params:
|
|
221
|
+
try:
|
|
222
|
+
rate = int(mime_params["rate"])
|
|
223
|
+
except ValueError:
|
|
224
|
+
logger.warning("Invalid sample rate in mime_type '%s'; using default 24000.", inline_data.mime_type)
|
|
225
|
+
if "channels" in mime_params:
|
|
226
|
+
try:
|
|
227
|
+
channels = int(mime_params["channels"])
|
|
228
|
+
except ValueError:
|
|
229
|
+
logger.warning("Invalid channel count in mime_type '%s'; using default 1.", inline_data.mime_type)
|
|
230
|
+
|
|
231
|
+
audio_path = _save_audio_bytes_to_wav(audio_bytes, channels=channels, rate=rate, sample_width=2)
|
|
232
|
+
else:
|
|
233
|
+
extension = _AUDIO_MIME_EXTENSION_MAP.get(mime_type, "bin")
|
|
234
|
+
audio_path = _save_audio_bytes(audio_bytes, extension)
|
|
142
235
|
|
|
143
236
|
return SpeechGenerationResponse(audio_urls=[audio_path])
|
|
144
237
|
|
|
@@ -13,13 +13,51 @@ from autobyteus.utils.parameter_schema import ParameterSchema, ParameterDefiniti
|
|
|
13
13
|
|
|
14
14
|
logger = logging.getLogger(__name__)
|
|
15
15
|
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
"
|
|
19
|
-
"
|
|
20
|
-
"
|
|
21
|
-
"
|
|
16
|
+
# Enhanced metadata for Google Gemini TTS voices, including gender and description.
|
|
17
|
+
GEMINI_VOICE_DETAILS = {
|
|
18
|
+
"Zephyr": {"gender": "female", "description": "Bright, Higher pitch"},
|
|
19
|
+
"Puck": {"gender": "male", "description": "Upbeat, Middle pitch"},
|
|
20
|
+
"Charon": {"gender": "male", "description": "Informative, Lower pitch"},
|
|
21
|
+
"Kore": {"gender": "female", "description": "Firm, Middle pitch"},
|
|
22
|
+
"Fenrir": {"gender": "male", "description": "Excitable, Lower middle pitch"},
|
|
23
|
+
"Leda": {"gender": "female", "description": "Youthful, Higher pitch"},
|
|
24
|
+
"Orus": {"gender": "male", "description": "Firm, Lower middle pitch"},
|
|
25
|
+
"Aoede": {"gender": "female", "description": "Breezy, Middle pitch"},
|
|
26
|
+
"Callirrhoe": {"gender": "female", "description": "Easy-going, Middle pitch"},
|
|
27
|
+
"Autonoe": {"gender": "female", "description": "Bright, Middle pitch"},
|
|
28
|
+
"Enceladus": {"gender": "male", "description": "Breathy, Lower pitch"},
|
|
29
|
+
"Iapetus": {"gender": "male", "description": "Clear, Lower middle pitch"},
|
|
30
|
+
"Umbriel": {"gender": "male", "description": "Easy-going, Lower middle pitch"},
|
|
31
|
+
"Algieba": {"gender": "male", "description": "Smooth, Lower pitch"},
|
|
32
|
+
"Despina": {"gender": "female", "description": "Smooth, Middle pitch"},
|
|
33
|
+
"Erinome": {"gender": "female", "description": "Clear, Middle pitch"},
|
|
34
|
+
"Algenib": {"gender": "male", "description": "Gravelly, Lower pitch"},
|
|
35
|
+
"Rasalgethi": {"gender": "male", "description": "Informative, Middle pitch"},
|
|
36
|
+
"Laomedeia": {"gender": "female", "description": "Upbeat, Higher pitch"},
|
|
37
|
+
"Achernar": {"gender": "female", "description": "Soft, Higher pitch"},
|
|
38
|
+
"Alnilam": {"gender": "male", "description": "Firm, Lower middle pitch"},
|
|
39
|
+
"Schedar": {"gender": "male", "description": "Even, Lower middle pitch"},
|
|
40
|
+
"Gacrux": {"gender": "female", "description": "Mature, Middle pitch"},
|
|
41
|
+
"Pulcherrima": {"gender": "female", "description": "Forward, Middle pitch"},
|
|
42
|
+
"Achird": {"gender": "male", "description": "Friendly, Lower middle pitch"},
|
|
43
|
+
"Zubenelgenubi": {"gender": "male", "description": "Casual, Lower middle pitch"},
|
|
44
|
+
"Vindemiatrix": {"gender": "female", "description": "Gentle, Middle pitch"},
|
|
45
|
+
"Sadachbia": {"gender": "male", "description": "Lively, Lower pitch"},
|
|
46
|
+
"Sadaltager": {"gender": "male", "description": "Knowledgeable, Middle pitch"},
|
|
47
|
+
"Sulafat": {"gender": "female", "description": "Warm, Middle pitch"},
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
# The list of voice names, derived from the keys of the details dictionary.
|
|
51
|
+
# This is used for the `enum_values` to maintain compatibility.
|
|
52
|
+
GEMINI_TTS_VOICES = list(GEMINI_VOICE_DETAILS.keys())
|
|
53
|
+
|
|
54
|
+
# Generate a formatted string of voice metadata to be appended to parameter descriptions.
|
|
55
|
+
_voice_descriptions_list = [
|
|
56
|
+
f"- {name} ({details['gender']}): {details['description']}"
|
|
57
|
+
for name, details in GEMINI_VOICE_DETAILS.items()
|
|
22
58
|
]
|
|
59
|
+
GEMINI_VOICE_METADATA_DESC = "\n\nDetailed Voice Options:\n" + "\n".join(_voice_descriptions_list)
|
|
60
|
+
|
|
23
61
|
|
|
24
62
|
OPENAI_TTS_VOICES = [
|
|
25
63
|
"alloy", "ash", "ballad", "coral", "echo", "fable", "onyx",
|
|
@@ -64,7 +102,7 @@ class AudioClientFactory(metaclass=SingletonMeta):
|
|
|
64
102
|
ParameterDefinition(
|
|
65
103
|
name="voice",
|
|
66
104
|
param_type=ParameterType.ENUM,
|
|
67
|
-
description="The voice to assign to this speaker.",
|
|
105
|
+
description="The voice to assign to this speaker." + GEMINI_VOICE_METADATA_DESC,
|
|
68
106
|
enum_values=GEMINI_TTS_VOICES,
|
|
69
107
|
required=True
|
|
70
108
|
)
|
|
@@ -84,7 +122,7 @@ class AudioClientFactory(metaclass=SingletonMeta):
|
|
|
84
122
|
param_type=ParameterType.ENUM,
|
|
85
123
|
default_value="Kore",
|
|
86
124
|
enum_values=GEMINI_TTS_VOICES,
|
|
87
|
-
description="The voice to use for single-speaker generation."
|
|
125
|
+
description="The voice to use for single-speaker generation." + GEMINI_VOICE_METADATA_DESC
|
|
88
126
|
),
|
|
89
127
|
ParameterDefinition(
|
|
90
128
|
name="style_instructions",
|
|
@@ -102,7 +140,7 @@ class AudioClientFactory(metaclass=SingletonMeta):
|
|
|
102
140
|
gemini_tts_model = AudioModel(
|
|
103
141
|
name="gemini-2.5-flash-tts",
|
|
104
142
|
value="gemini-2.5-flash-preview-tts",
|
|
105
|
-
provider=MultimediaProvider.
|
|
143
|
+
provider=MultimediaProvider.GEMINI,
|
|
106
144
|
client_class=GeminiAudioClient,
|
|
107
145
|
parameter_schema=gemini_tts_schema
|
|
108
146
|
)
|
|
@@ -79,7 +79,8 @@ class AudioModel(metaclass=AudioModelMeta):
|
|
|
79
79
|
"""Returns the unique identifier for the model."""
|
|
80
80
|
if self.runtime == MultimediaRuntime.AUTOBYTEUS and self.host_url:
|
|
81
81
|
try:
|
|
82
|
-
|
|
82
|
+
parsed = urlparse(self.host_url)
|
|
83
|
+
host = parsed.netloc or parsed.hostname or self.host_url
|
|
83
84
|
return f"{self.name}@{host}"
|
|
84
85
|
except Exception:
|
|
85
86
|
return f"{self.name}@{self.host_url}" # Fallback
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import logging
|
|
2
|
+
import uuid
|
|
2
3
|
from typing import Optional, List, Dict, Any, TYPE_CHECKING
|
|
3
4
|
from autobyteus.clients import AutobyteusClient
|
|
4
5
|
from autobyteus.multimedia.image.base_image_client import BaseImageClient
|
|
@@ -13,6 +14,7 @@ logger = logging.getLogger(__name__)
|
|
|
13
14
|
class AutobyteusImageClient(BaseImageClient):
|
|
14
15
|
"""
|
|
15
16
|
An image client that connects to an Autobyteus LLM server instance for image tasks.
|
|
17
|
+
Maintains a persistent session ID for stateful interactions (e.g. conversational editing).
|
|
16
18
|
"""
|
|
17
19
|
|
|
18
20
|
def __init__(self, model: "ImageModel", config: "MultimediaConfig"):
|
|
@@ -21,7 +23,9 @@ class AutobyteusImageClient(BaseImageClient):
|
|
|
21
23
|
raise ValueError("AutobyteusImageClient requires a host_url in its ImageModel.")
|
|
22
24
|
|
|
23
25
|
self.autobyteus_client = AutobyteusClient(server_url=model.host_url)
|
|
24
|
-
|
|
26
|
+
self.session_id = str(uuid.uuid4())
|
|
27
|
+
logger.info(f"AutobyteusImageClient initialized for model '{self.model.name}' "
|
|
28
|
+
f"on host '{model.host_url}' with session_id '{self.session_id}'.")
|
|
25
29
|
|
|
26
30
|
async def generate_image(
|
|
27
31
|
self,
|
|
@@ -72,7 +76,7 @@ class AutobyteusImageClient(BaseImageClient):
|
|
|
72
76
|
) -> ImageGenerationResponse:
|
|
73
77
|
"""Internal helper to call the remote server."""
|
|
74
78
|
try:
|
|
75
|
-
logger.info(f"Sending image generation request for model '{self.model.name}' to {self.model.host_url}")
|
|
79
|
+
logger.info(f"Sending image generation request for model '{self.model.name}' to {self.model.host_url} (Session: {self.session_id})")
|
|
76
80
|
|
|
77
81
|
# The model name for the remote server is the `value`, not the unique `model_identifier`
|
|
78
82
|
model_name_for_server = self.model.name
|
|
@@ -84,7 +88,8 @@ class AutobyteusImageClient(BaseImageClient):
|
|
|
84
88
|
prompt=prompt,
|
|
85
89
|
input_image_urls=input_image_urls,
|
|
86
90
|
mask_url=mask_url,
|
|
87
|
-
generation_config=generation_config
|
|
91
|
+
generation_config=generation_config,
|
|
92
|
+
session_id=self.session_id
|
|
88
93
|
)
|
|
89
94
|
|
|
90
95
|
image_urls = response_data.get("image_urls", [])
|
|
@@ -98,7 +103,16 @@ class AutobyteusImageClient(BaseImageClient):
|
|
|
98
103
|
raise
|
|
99
104
|
|
|
100
105
|
async def cleanup(self):
|
|
101
|
-
"""
|
|
106
|
+
"""
|
|
107
|
+
Notifies the server to cleanup the session, then closes the underlying HTTP client.
|
|
108
|
+
"""
|
|
102
109
|
if self.autobyteus_client:
|
|
103
|
-
|
|
110
|
+
try:
|
|
111
|
+
logger.info(f"Notifying server to cleanup image session '{self.session_id}'...")
|
|
112
|
+
await self.autobyteus_client.cleanup_image_session(self.session_id)
|
|
113
|
+
except Exception as e:
|
|
114
|
+
logger.error(f"Failed to cleanup remote image session '{self.session_id}': {e}")
|
|
115
|
+
finally:
|
|
116
|
+
await self.autobyteus_client.close()
|
|
117
|
+
|
|
104
118
|
logger.debug("AutobyteusImageClient cleaned up.")
|
|
@@ -1,14 +1,13 @@
|
|
|
1
1
|
import logging
|
|
2
2
|
import base64
|
|
3
|
-
import os
|
|
4
3
|
from typing import Optional, List, Dict, Any, TYPE_CHECKING
|
|
5
|
-
from google import
|
|
6
|
-
from PIL import Image
|
|
7
|
-
import requests
|
|
4
|
+
from google.genai import types as genai_types
|
|
8
5
|
|
|
9
6
|
from autobyteus.multimedia.image.base_image_client import BaseImageClient
|
|
10
7
|
from autobyteus.multimedia.utils.response_types import ImageGenerationResponse
|
|
11
8
|
from autobyteus.multimedia.utils.api_utils import load_image_from_url
|
|
9
|
+
from autobyteus.utils.gemini_helper import initialize_gemini_client_with_runtime
|
|
10
|
+
from autobyteus.utils.gemini_model_mapping import resolve_model_for_runtime
|
|
12
11
|
|
|
13
12
|
if TYPE_CHECKING:
|
|
14
13
|
from autobyteus.multimedia.image.image_model import ImageModel
|
|
@@ -21,17 +20,16 @@ class GeminiImageClient(BaseImageClient):
|
|
|
21
20
|
An image client that uses Google's Gemini models for image generation tasks.
|
|
22
21
|
|
|
23
22
|
**Setup Requirements:**
|
|
24
|
-
1. **
|
|
23
|
+
1. **Vertex AI Express Mode:** Set `VERTEX_AI_API_KEY`.
|
|
24
|
+
2. **Vertex AI Mode:** Set `VERTEX_AI_PROJECT` and `VERTEX_AI_LOCATION`.
|
|
25
|
+
3. **AI Studio Mode:** Set `GEMINI_API_KEY`.
|
|
25
26
|
"""
|
|
26
27
|
|
|
27
28
|
def __init__(self, model: "ImageModel", config: "MultimediaConfig"):
|
|
28
29
|
super().__init__(model, config)
|
|
29
|
-
api_key = os.getenv("GEMINI_API_KEY")
|
|
30
|
-
if not api_key:
|
|
31
|
-
raise ValueError("Please set the GEMINI_API_KEY environment variable.")
|
|
32
30
|
|
|
33
31
|
try:
|
|
34
|
-
self.client =
|
|
32
|
+
self.client, self.runtime_info = initialize_gemini_client_with_runtime()
|
|
35
33
|
self.async_client = self.client.aio
|
|
36
34
|
logger.info(f"GeminiImageClient initialized for model '{self.model.name}'.")
|
|
37
35
|
except Exception as e:
|
|
@@ -60,16 +58,40 @@ class GeminiImageClient(BaseImageClient):
|
|
|
60
58
|
except Exception as e:
|
|
61
59
|
logger.error(f"Skipping image at '{url}' due to loading error: {e}")
|
|
62
60
|
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
61
|
+
config_dict: Dict[str, Any] = {}
|
|
62
|
+
if self.config and self.config.params:
|
|
63
|
+
config_dict.update(self.config.params)
|
|
64
|
+
if generation_config:
|
|
65
|
+
config_dict.update(generation_config)
|
|
66
|
+
if "response_modalities" not in config_dict:
|
|
67
|
+
if getattr(self, "runtime_info", None) and self.runtime_info.runtime == "vertex":
|
|
68
|
+
config_dict["response_modalities"] = ["TEXT", "IMAGE"]
|
|
69
|
+
else:
|
|
70
|
+
config_dict["response_modalities"] = ["IMAGE"]
|
|
71
|
+
config = genai_types.GenerateContentConfig(**config_dict)
|
|
72
|
+
|
|
73
|
+
# FIX: Removed 'models/' prefix from model_name to support Vertex AI
|
|
74
|
+
runtime_adjusted_model = resolve_model_for_runtime(
|
|
75
|
+
self.model.value,
|
|
76
|
+
modality="image",
|
|
77
|
+
runtime=getattr(self, "runtime_info", None) and self.runtime_info.runtime,
|
|
78
|
+
)
|
|
79
|
+
if runtime_adjusted_model != self.model.value:
|
|
80
|
+
logger.info(
|
|
81
|
+
"Using runtime-adjusted Gemini image model '%s' (requested '%s').",
|
|
82
|
+
runtime_adjusted_model,
|
|
83
|
+
self.model.value,
|
|
84
|
+
)
|
|
85
|
+
response = await self.async_client.models.generate_content(
|
|
86
|
+
model=runtime_adjusted_model,
|
|
87
|
+
contents=content,
|
|
88
|
+
config=config,
|
|
89
|
+
)
|
|
68
90
|
|
|
69
91
|
|
|
70
92
|
image_urls = []
|
|
71
|
-
for part in response.parts:
|
|
72
|
-
if part.inline_data and "image" in part.inline_data.mime_type:
|
|
93
|
+
for part in response.parts or []:
|
|
94
|
+
if part.inline_data and part.inline_data.mime_type and "image" in part.inline_data.mime_type:
|
|
73
95
|
image_bytes = part.inline_data.data
|
|
74
96
|
base64_image = base64.b64encode(image_bytes).decode("utf-8")
|
|
75
97
|
data_uri = f"data:{part.inline_data.mime_type};base64,{base64_image}"
|
|
@@ -77,7 +99,7 @@ class GeminiImageClient(BaseImageClient):
|
|
|
77
99
|
|
|
78
100
|
if not image_urls:
|
|
79
101
|
# Check for a safety-related refusal to generate content
|
|
80
|
-
if response.prompt_feedback.block_reason:
|
|
102
|
+
if response.prompt_feedback and response.prompt_feedback.block_reason:
|
|
81
103
|
reason = response.prompt_feedback.block_reason.name
|
|
82
104
|
logger.error(f"Image generation blocked due to safety settings. Reason: {reason}")
|
|
83
105
|
raise ValueError(f"Image generation failed due to safety settings: {reason}")
|