ag2 0.9.1a1__py3-none-any.whl → 0.9.1.post0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ag2 might be problematic. Click here for more details.
- {ag2-0.9.1a1.dist-info → ag2-0.9.1.post0.dist-info}/METADATA +264 -73
- ag2-0.9.1.post0.dist-info/RECORD +392 -0
- {ag2-0.9.1a1.dist-info → ag2-0.9.1.post0.dist-info}/WHEEL +1 -2
- autogen/__init__.py +89 -0
- autogen/_website/__init__.py +3 -0
- autogen/_website/generate_api_references.py +427 -0
- autogen/_website/generate_mkdocs.py +1174 -0
- autogen/_website/notebook_processor.py +476 -0
- autogen/_website/process_notebooks.py +656 -0
- autogen/_website/utils.py +412 -0
- autogen/agentchat/__init__.py +44 -0
- autogen/agentchat/agent.py +182 -0
- autogen/agentchat/assistant_agent.py +85 -0
- autogen/agentchat/chat.py +309 -0
- autogen/agentchat/contrib/__init__.py +5 -0
- autogen/agentchat/contrib/agent_eval/README.md +7 -0
- autogen/agentchat/contrib/agent_eval/agent_eval.py +108 -0
- autogen/agentchat/contrib/agent_eval/criterion.py +43 -0
- autogen/agentchat/contrib/agent_eval/critic_agent.py +44 -0
- autogen/agentchat/contrib/agent_eval/quantifier_agent.py +39 -0
- autogen/agentchat/contrib/agent_eval/subcritic_agent.py +45 -0
- autogen/agentchat/contrib/agent_eval/task.py +42 -0
- autogen/agentchat/contrib/agent_optimizer.py +429 -0
- autogen/agentchat/contrib/capabilities/__init__.py +5 -0
- autogen/agentchat/contrib/capabilities/agent_capability.py +20 -0
- autogen/agentchat/contrib/capabilities/generate_images.py +301 -0
- autogen/agentchat/contrib/capabilities/teachability.py +393 -0
- autogen/agentchat/contrib/capabilities/text_compressors.py +66 -0
- autogen/agentchat/contrib/capabilities/tools_capability.py +22 -0
- autogen/agentchat/contrib/capabilities/transform_messages.py +93 -0
- autogen/agentchat/contrib/capabilities/transforms.py +566 -0
- autogen/agentchat/contrib/capabilities/transforms_util.py +122 -0
- autogen/agentchat/contrib/capabilities/vision_capability.py +214 -0
- autogen/agentchat/contrib/captainagent/__init__.py +9 -0
- autogen/agentchat/contrib/captainagent/agent_builder.py +790 -0
- autogen/agentchat/contrib/captainagent/captainagent.py +512 -0
- autogen/agentchat/contrib/captainagent/tool_retriever.py +335 -0
- autogen/agentchat/contrib/captainagent/tools/README.md +44 -0
- autogen/agentchat/contrib/captainagent/tools/__init__.py +5 -0
- autogen/agentchat/contrib/captainagent/tools/data_analysis/calculate_correlation.py +40 -0
- autogen/agentchat/contrib/captainagent/tools/data_analysis/calculate_skewness_and_kurtosis.py +28 -0
- autogen/agentchat/contrib/captainagent/tools/data_analysis/detect_outlier_iqr.py +28 -0
- autogen/agentchat/contrib/captainagent/tools/data_analysis/detect_outlier_zscore.py +28 -0
- autogen/agentchat/contrib/captainagent/tools/data_analysis/explore_csv.py +21 -0
- autogen/agentchat/contrib/captainagent/tools/data_analysis/shapiro_wilk_test.py +30 -0
- autogen/agentchat/contrib/captainagent/tools/information_retrieval/arxiv_download.py +27 -0
- autogen/agentchat/contrib/captainagent/tools/information_retrieval/arxiv_search.py +53 -0
- autogen/agentchat/contrib/captainagent/tools/information_retrieval/extract_pdf_image.py +53 -0
- autogen/agentchat/contrib/captainagent/tools/information_retrieval/extract_pdf_text.py +38 -0
- autogen/agentchat/contrib/captainagent/tools/information_retrieval/get_wikipedia_text.py +21 -0
- autogen/agentchat/contrib/captainagent/tools/information_retrieval/get_youtube_caption.py +34 -0
- autogen/agentchat/contrib/captainagent/tools/information_retrieval/image_qa.py +60 -0
- autogen/agentchat/contrib/captainagent/tools/information_retrieval/optical_character_recognition.py +61 -0
- autogen/agentchat/contrib/captainagent/tools/information_retrieval/perform_web_search.py +47 -0
- autogen/agentchat/contrib/captainagent/tools/information_retrieval/scrape_wikipedia_tables.py +33 -0
- autogen/agentchat/contrib/captainagent/tools/information_retrieval/transcribe_audio_file.py +21 -0
- autogen/agentchat/contrib/captainagent/tools/information_retrieval/youtube_download.py +35 -0
- autogen/agentchat/contrib/captainagent/tools/math/calculate_circle_area_from_diameter.py +21 -0
- autogen/agentchat/contrib/captainagent/tools/math/calculate_day_of_the_week.py +18 -0
- autogen/agentchat/contrib/captainagent/tools/math/calculate_fraction_sum.py +28 -0
- autogen/agentchat/contrib/captainagent/tools/math/calculate_matrix_power.py +31 -0
- autogen/agentchat/contrib/captainagent/tools/math/calculate_reflected_point.py +16 -0
- autogen/agentchat/contrib/captainagent/tools/math/complex_numbers_product.py +25 -0
- autogen/agentchat/contrib/captainagent/tools/math/compute_currency_conversion.py +23 -0
- autogen/agentchat/contrib/captainagent/tools/math/count_distinct_permutations.py +27 -0
- autogen/agentchat/contrib/captainagent/tools/math/evaluate_expression.py +28 -0
- autogen/agentchat/contrib/captainagent/tools/math/find_continuity_point.py +34 -0
- autogen/agentchat/contrib/captainagent/tools/math/fraction_to_mixed_numbers.py +39 -0
- autogen/agentchat/contrib/captainagent/tools/math/modular_inverse_sum.py +23 -0
- autogen/agentchat/contrib/captainagent/tools/math/simplify_mixed_numbers.py +36 -0
- autogen/agentchat/contrib/captainagent/tools/math/sum_of_digit_factorials.py +15 -0
- autogen/agentchat/contrib/captainagent/tools/math/sum_of_primes_below.py +15 -0
- autogen/agentchat/contrib/captainagent/tools/requirements.txt +10 -0
- autogen/agentchat/contrib/captainagent/tools/tool_description.tsv +34 -0
- autogen/agentchat/contrib/gpt_assistant_agent.py +526 -0
- autogen/agentchat/contrib/graph_rag/__init__.py +9 -0
- autogen/agentchat/contrib/graph_rag/document.py +29 -0
- autogen/agentchat/contrib/graph_rag/falkor_graph_query_engine.py +170 -0
- autogen/agentchat/contrib/graph_rag/falkor_graph_rag_capability.py +103 -0
- autogen/agentchat/contrib/graph_rag/graph_query_engine.py +53 -0
- autogen/agentchat/contrib/graph_rag/graph_rag_capability.py +63 -0
- autogen/agentchat/contrib/graph_rag/neo4j_graph_query_engine.py +268 -0
- autogen/agentchat/contrib/graph_rag/neo4j_graph_rag_capability.py +83 -0
- autogen/agentchat/contrib/graph_rag/neo4j_native_graph_query_engine.py +210 -0
- autogen/agentchat/contrib/graph_rag/neo4j_native_graph_rag_capability.py +93 -0
- autogen/agentchat/contrib/img_utils.py +397 -0
- autogen/agentchat/contrib/llamaindex_conversable_agent.py +117 -0
- autogen/agentchat/contrib/llava_agent.py +187 -0
- autogen/agentchat/contrib/math_user_proxy_agent.py +464 -0
- autogen/agentchat/contrib/multimodal_conversable_agent.py +125 -0
- autogen/agentchat/contrib/qdrant_retrieve_user_proxy_agent.py +324 -0
- autogen/agentchat/contrib/rag/__init__.py +10 -0
- autogen/agentchat/contrib/rag/chromadb_query_engine.py +272 -0
- autogen/agentchat/contrib/rag/llamaindex_query_engine.py +198 -0
- autogen/agentchat/contrib/rag/mongodb_query_engine.py +329 -0
- autogen/agentchat/contrib/rag/query_engine.py +74 -0
- autogen/agentchat/contrib/retrieve_assistant_agent.py +56 -0
- autogen/agentchat/contrib/retrieve_user_proxy_agent.py +703 -0
- autogen/agentchat/contrib/society_of_mind_agent.py +199 -0
- autogen/agentchat/contrib/swarm_agent.py +1425 -0
- autogen/agentchat/contrib/text_analyzer_agent.py +79 -0
- autogen/agentchat/contrib/vectordb/__init__.py +5 -0
- autogen/agentchat/contrib/vectordb/base.py +232 -0
- autogen/agentchat/contrib/vectordb/chromadb.py +315 -0
- autogen/agentchat/contrib/vectordb/couchbase.py +407 -0
- autogen/agentchat/contrib/vectordb/mongodb.py +550 -0
- autogen/agentchat/contrib/vectordb/pgvectordb.py +928 -0
- autogen/agentchat/contrib/vectordb/qdrant.py +320 -0
- autogen/agentchat/contrib/vectordb/utils.py +126 -0
- autogen/agentchat/contrib/web_surfer.py +303 -0
- autogen/agentchat/conversable_agent.py +4020 -0
- autogen/agentchat/group/__init__.py +64 -0
- autogen/agentchat/group/available_condition.py +91 -0
- autogen/agentchat/group/context_condition.py +77 -0
- autogen/agentchat/group/context_expression.py +238 -0
- autogen/agentchat/group/context_str.py +41 -0
- autogen/agentchat/group/context_variables.py +192 -0
- autogen/agentchat/group/group_tool_executor.py +202 -0
- autogen/agentchat/group/group_utils.py +591 -0
- autogen/agentchat/group/handoffs.py +244 -0
- autogen/agentchat/group/llm_condition.py +93 -0
- autogen/agentchat/group/multi_agent_chat.py +237 -0
- autogen/agentchat/group/on_condition.py +58 -0
- autogen/agentchat/group/on_context_condition.py +54 -0
- autogen/agentchat/group/patterns/__init__.py +18 -0
- autogen/agentchat/group/patterns/auto.py +159 -0
- autogen/agentchat/group/patterns/manual.py +176 -0
- autogen/agentchat/group/patterns/pattern.py +288 -0
- autogen/agentchat/group/patterns/random.py +106 -0
- autogen/agentchat/group/patterns/round_robin.py +117 -0
- autogen/agentchat/group/reply_result.py +26 -0
- autogen/agentchat/group/speaker_selection_result.py +41 -0
- autogen/agentchat/group/targets/__init__.py +4 -0
- autogen/agentchat/group/targets/group_chat_target.py +132 -0
- autogen/agentchat/group/targets/group_manager_target.py +151 -0
- autogen/agentchat/group/targets/transition_target.py +413 -0
- autogen/agentchat/group/targets/transition_utils.py +6 -0
- autogen/agentchat/groupchat.py +1694 -0
- autogen/agentchat/realtime/__init__.py +3 -0
- autogen/agentchat/realtime/experimental/__init__.py +20 -0
- autogen/agentchat/realtime/experimental/audio_adapters/__init__.py +8 -0
- autogen/agentchat/realtime/experimental/audio_adapters/twilio_audio_adapter.py +148 -0
- autogen/agentchat/realtime/experimental/audio_adapters/websocket_audio_adapter.py +139 -0
- autogen/agentchat/realtime/experimental/audio_observer.py +42 -0
- autogen/agentchat/realtime/experimental/clients/__init__.py +15 -0
- autogen/agentchat/realtime/experimental/clients/gemini/__init__.py +7 -0
- autogen/agentchat/realtime/experimental/clients/gemini/client.py +274 -0
- autogen/agentchat/realtime/experimental/clients/oai/__init__.py +8 -0
- autogen/agentchat/realtime/experimental/clients/oai/base_client.py +220 -0
- autogen/agentchat/realtime/experimental/clients/oai/rtc_client.py +243 -0
- autogen/agentchat/realtime/experimental/clients/oai/utils.py +48 -0
- autogen/agentchat/realtime/experimental/clients/realtime_client.py +190 -0
- autogen/agentchat/realtime/experimental/function_observer.py +85 -0
- autogen/agentchat/realtime/experimental/realtime_agent.py +158 -0
- autogen/agentchat/realtime/experimental/realtime_events.py +42 -0
- autogen/agentchat/realtime/experimental/realtime_observer.py +100 -0
- autogen/agentchat/realtime/experimental/realtime_swarm.py +475 -0
- autogen/agentchat/realtime/experimental/websockets.py +21 -0
- autogen/agentchat/realtime_agent/__init__.py +21 -0
- autogen/agentchat/user_proxy_agent.py +111 -0
- autogen/agentchat/utils.py +206 -0
- autogen/agents/__init__.py +3 -0
- autogen/agents/contrib/__init__.py +10 -0
- autogen/agents/contrib/time/__init__.py +8 -0
- autogen/agents/contrib/time/time_reply_agent.py +73 -0
- autogen/agents/contrib/time/time_tool_agent.py +51 -0
- autogen/agents/experimental/__init__.py +27 -0
- autogen/agents/experimental/deep_research/__init__.py +7 -0
- autogen/agents/experimental/deep_research/deep_research.py +52 -0
- autogen/agents/experimental/discord/__init__.py +7 -0
- autogen/agents/experimental/discord/discord.py +66 -0
- autogen/agents/experimental/document_agent/__init__.py +19 -0
- autogen/agents/experimental/document_agent/chroma_query_engine.py +316 -0
- autogen/agents/experimental/document_agent/docling_doc_ingest_agent.py +118 -0
- autogen/agents/experimental/document_agent/document_agent.py +461 -0
- autogen/agents/experimental/document_agent/document_conditions.py +50 -0
- autogen/agents/experimental/document_agent/document_utils.py +380 -0
- autogen/agents/experimental/document_agent/inmemory_query_engine.py +220 -0
- autogen/agents/experimental/document_agent/parser_utils.py +130 -0
- autogen/agents/experimental/document_agent/url_utils.py +426 -0
- autogen/agents/experimental/reasoning/__init__.py +7 -0
- autogen/agents/experimental/reasoning/reasoning_agent.py +1178 -0
- autogen/agents/experimental/slack/__init__.py +7 -0
- autogen/agents/experimental/slack/slack.py +73 -0
- autogen/agents/experimental/telegram/__init__.py +7 -0
- autogen/agents/experimental/telegram/telegram.py +77 -0
- autogen/agents/experimental/websurfer/__init__.py +7 -0
- autogen/agents/experimental/websurfer/websurfer.py +62 -0
- autogen/agents/experimental/wikipedia/__init__.py +7 -0
- autogen/agents/experimental/wikipedia/wikipedia.py +90 -0
- autogen/browser_utils.py +309 -0
- autogen/cache/__init__.py +10 -0
- autogen/cache/abstract_cache_base.py +75 -0
- autogen/cache/cache.py +203 -0
- autogen/cache/cache_factory.py +88 -0
- autogen/cache/cosmos_db_cache.py +144 -0
- autogen/cache/disk_cache.py +102 -0
- autogen/cache/in_memory_cache.py +58 -0
- autogen/cache/redis_cache.py +123 -0
- autogen/code_utils.py +596 -0
- autogen/coding/__init__.py +22 -0
- autogen/coding/base.py +119 -0
- autogen/coding/docker_commandline_code_executor.py +268 -0
- autogen/coding/factory.py +47 -0
- autogen/coding/func_with_reqs.py +202 -0
- autogen/coding/jupyter/__init__.py +23 -0
- autogen/coding/jupyter/base.py +36 -0
- autogen/coding/jupyter/docker_jupyter_server.py +167 -0
- autogen/coding/jupyter/embedded_ipython_code_executor.py +182 -0
- autogen/coding/jupyter/import_utils.py +82 -0
- autogen/coding/jupyter/jupyter_client.py +231 -0
- autogen/coding/jupyter/jupyter_code_executor.py +160 -0
- autogen/coding/jupyter/local_jupyter_server.py +172 -0
- autogen/coding/local_commandline_code_executor.py +405 -0
- autogen/coding/markdown_code_extractor.py +45 -0
- autogen/coding/utils.py +56 -0
- autogen/doc_utils.py +34 -0
- autogen/events/__init__.py +7 -0
- autogen/events/agent_events.py +1010 -0
- autogen/events/base_event.py +99 -0
- autogen/events/client_events.py +167 -0
- autogen/events/helpers.py +36 -0
- autogen/events/print_event.py +46 -0
- autogen/exception_utils.py +73 -0
- autogen/extensions/__init__.py +5 -0
- autogen/fast_depends/__init__.py +16 -0
- autogen/fast_depends/_compat.py +80 -0
- autogen/fast_depends/core/__init__.py +14 -0
- autogen/fast_depends/core/build.py +225 -0
- autogen/fast_depends/core/model.py +576 -0
- autogen/fast_depends/dependencies/__init__.py +15 -0
- autogen/fast_depends/dependencies/model.py +29 -0
- autogen/fast_depends/dependencies/provider.py +39 -0
- autogen/fast_depends/library/__init__.py +10 -0
- autogen/fast_depends/library/model.py +46 -0
- autogen/fast_depends/py.typed +6 -0
- autogen/fast_depends/schema.py +66 -0
- autogen/fast_depends/use.py +280 -0
- autogen/fast_depends/utils.py +187 -0
- autogen/formatting_utils.py +83 -0
- autogen/function_utils.py +13 -0
- autogen/graph_utils.py +178 -0
- autogen/import_utils.py +526 -0
- autogen/interop/__init__.py +22 -0
- autogen/interop/crewai/__init__.py +7 -0
- autogen/interop/crewai/crewai.py +88 -0
- autogen/interop/interoperability.py +71 -0
- autogen/interop/interoperable.py +46 -0
- autogen/interop/langchain/__init__.py +8 -0
- autogen/interop/langchain/langchain_chat_model_factory.py +155 -0
- autogen/interop/langchain/langchain_tool.py +82 -0
- autogen/interop/litellm/__init__.py +7 -0
- autogen/interop/litellm/litellm_config_factory.py +113 -0
- autogen/interop/pydantic_ai/__init__.py +7 -0
- autogen/interop/pydantic_ai/pydantic_ai.py +168 -0
- autogen/interop/registry.py +69 -0
- autogen/io/__init__.py +15 -0
- autogen/io/base.py +151 -0
- autogen/io/console.py +56 -0
- autogen/io/processors/__init__.py +12 -0
- autogen/io/processors/base.py +21 -0
- autogen/io/processors/console_event_processor.py +56 -0
- autogen/io/run_response.py +293 -0
- autogen/io/thread_io_stream.py +63 -0
- autogen/io/websockets.py +213 -0
- autogen/json_utils.py +43 -0
- autogen/llm_config.py +379 -0
- autogen/logger/__init__.py +11 -0
- autogen/logger/base_logger.py +128 -0
- autogen/logger/file_logger.py +261 -0
- autogen/logger/logger_factory.py +42 -0
- autogen/logger/logger_utils.py +57 -0
- autogen/logger/sqlite_logger.py +523 -0
- autogen/math_utils.py +339 -0
- autogen/mcp/__init__.py +7 -0
- autogen/mcp/mcp_client.py +208 -0
- autogen/messages/__init__.py +7 -0
- autogen/messages/agent_messages.py +948 -0
- autogen/messages/base_message.py +107 -0
- autogen/messages/client_messages.py +171 -0
- autogen/messages/print_message.py +49 -0
- autogen/oai/__init__.py +53 -0
- autogen/oai/anthropic.py +714 -0
- autogen/oai/bedrock.py +628 -0
- autogen/oai/cerebras.py +299 -0
- autogen/oai/client.py +1435 -0
- autogen/oai/client_utils.py +169 -0
- autogen/oai/cohere.py +479 -0
- autogen/oai/gemini.py +990 -0
- autogen/oai/gemini_types.py +129 -0
- autogen/oai/groq.py +305 -0
- autogen/oai/mistral.py +303 -0
- autogen/oai/oai_models/__init__.py +11 -0
- autogen/oai/oai_models/_models.py +16 -0
- autogen/oai/oai_models/chat_completion.py +87 -0
- autogen/oai/oai_models/chat_completion_audio.py +32 -0
- autogen/oai/oai_models/chat_completion_message.py +86 -0
- autogen/oai/oai_models/chat_completion_message_tool_call.py +37 -0
- autogen/oai/oai_models/chat_completion_token_logprob.py +63 -0
- autogen/oai/oai_models/completion_usage.py +60 -0
- autogen/oai/ollama.py +643 -0
- autogen/oai/openai_utils.py +881 -0
- autogen/oai/together.py +370 -0
- autogen/retrieve_utils.py +491 -0
- autogen/runtime_logging.py +160 -0
- autogen/token_count_utils.py +267 -0
- autogen/tools/__init__.py +20 -0
- autogen/tools/contrib/__init__.py +9 -0
- autogen/tools/contrib/time/__init__.py +7 -0
- autogen/tools/contrib/time/time.py +41 -0
- autogen/tools/dependency_injection.py +254 -0
- autogen/tools/experimental/__init__.py +43 -0
- autogen/tools/experimental/browser_use/__init__.py +7 -0
- autogen/tools/experimental/browser_use/browser_use.py +161 -0
- autogen/tools/experimental/crawl4ai/__init__.py +7 -0
- autogen/tools/experimental/crawl4ai/crawl4ai.py +153 -0
- autogen/tools/experimental/deep_research/__init__.py +7 -0
- autogen/tools/experimental/deep_research/deep_research.py +328 -0
- autogen/tools/experimental/duckduckgo/__init__.py +7 -0
- autogen/tools/experimental/duckduckgo/duckduckgo_search.py +109 -0
- autogen/tools/experimental/google/__init__.py +14 -0
- autogen/tools/experimental/google/authentication/__init__.py +11 -0
- autogen/tools/experimental/google/authentication/credentials_hosted_provider.py +43 -0
- autogen/tools/experimental/google/authentication/credentials_local_provider.py +91 -0
- autogen/tools/experimental/google/authentication/credentials_provider.py +35 -0
- autogen/tools/experimental/google/drive/__init__.py +9 -0
- autogen/tools/experimental/google/drive/drive_functions.py +124 -0
- autogen/tools/experimental/google/drive/toolkit.py +88 -0
- autogen/tools/experimental/google/model.py +17 -0
- autogen/tools/experimental/google/toolkit_protocol.py +19 -0
- autogen/tools/experimental/google_search/__init__.py +8 -0
- autogen/tools/experimental/google_search/google_search.py +93 -0
- autogen/tools/experimental/google_search/youtube_search.py +181 -0
- autogen/tools/experimental/messageplatform/__init__.py +17 -0
- autogen/tools/experimental/messageplatform/discord/__init__.py +7 -0
- autogen/tools/experimental/messageplatform/discord/discord.py +288 -0
- autogen/tools/experimental/messageplatform/slack/__init__.py +7 -0
- autogen/tools/experimental/messageplatform/slack/slack.py +391 -0
- autogen/tools/experimental/messageplatform/telegram/__init__.py +7 -0
- autogen/tools/experimental/messageplatform/telegram/telegram.py +275 -0
- autogen/tools/experimental/perplexity/__init__.py +7 -0
- autogen/tools/experimental/perplexity/perplexity_search.py +260 -0
- autogen/tools/experimental/tavily/__init__.py +7 -0
- autogen/tools/experimental/tavily/tavily_search.py +183 -0
- autogen/tools/experimental/web_search_preview/__init__.py +7 -0
- autogen/tools/experimental/web_search_preview/web_search_preview.py +114 -0
- autogen/tools/experimental/wikipedia/__init__.py +7 -0
- autogen/tools/experimental/wikipedia/wikipedia.py +287 -0
- autogen/tools/function_utils.py +411 -0
- autogen/tools/tool.py +187 -0
- autogen/tools/toolkit.py +86 -0
- autogen/types.py +29 -0
- autogen/version.py +7 -0
- ag2-0.9.1a1.dist-info/RECORD +0 -6
- ag2-0.9.1a1.dist-info/top_level.txt +0 -1
- {ag2-0.9.1a1.dist-info → ag2-0.9.1.post0.dist-info/licenses}/LICENSE +0 -0
- {ag2-0.9.1a1.dist-info → ag2-0.9.1.post0.dist-info/licenses}/NOTICE.md +0 -0
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
# Copyright (c) 2023 - 2025, AG2ai, Inc., AG2ai open-source projects maintainers and core contributors
|
|
2
|
+
#
|
|
3
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
4
|
+
|
|
5
|
+
from .audio_adapters import TwilioAudioAdapter, WebSocketAudioAdapter
|
|
6
|
+
from .audio_observer import AudioObserver
|
|
7
|
+
from .function_observer import FunctionObserver
|
|
8
|
+
from .realtime_agent import RealtimeAgent
|
|
9
|
+
from .realtime_observer import RealtimeObserver
|
|
10
|
+
from .realtime_swarm import register_swarm
|
|
11
|
+
|
|
12
|
+
__all__ = [
|
|
13
|
+
"AudioObserver",
|
|
14
|
+
"FunctionObserver",
|
|
15
|
+
"RealtimeAgent",
|
|
16
|
+
"RealtimeObserver",
|
|
17
|
+
"TwilioAudioAdapter",
|
|
18
|
+
"WebSocketAudioAdapter",
|
|
19
|
+
"register_swarm",
|
|
20
|
+
]
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
# Copyright (c) 2023 - 2025, AG2ai, Inc., AG2ai open-source projects maintainers and core contributors
|
|
2
|
+
#
|
|
3
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
4
|
+
|
|
5
|
+
from .twilio_audio_adapter import TwilioAudioAdapter
|
|
6
|
+
from .websocket_audio_adapter import WebSocketAudioAdapter
|
|
7
|
+
|
|
8
|
+
__all__ = ["TwilioAudioAdapter", "WebSocketAudioAdapter"]
|
|
@@ -0,0 +1,148 @@
|
|
|
1
|
+
# Copyright (c) 2023 - 2025, AG2ai, Inc., AG2ai open-source projects maintainers and core contributors
|
|
2
|
+
#
|
|
3
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
4
|
+
|
|
5
|
+
import base64
|
|
6
|
+
import json
|
|
7
|
+
from logging import Logger
|
|
8
|
+
from typing import TYPE_CHECKING, Optional
|
|
9
|
+
|
|
10
|
+
from .....doc_utils import export_module
|
|
11
|
+
from ..realtime_events import AudioDelta, RealtimeEvent, SpeechStarted
|
|
12
|
+
from ..realtime_observer import RealtimeObserver
|
|
13
|
+
|
|
14
|
+
if TYPE_CHECKING:
|
|
15
|
+
from ..websockets import WebSocketProtocol as WebSocket
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
LOG_EVENT_TYPES = [
|
|
19
|
+
"error",
|
|
20
|
+
"response.content.done",
|
|
21
|
+
"rate_limits.updated",
|
|
22
|
+
"response.done",
|
|
23
|
+
"input_audio_buffer.committed",
|
|
24
|
+
"input_audio_buffer.speech_stopped",
|
|
25
|
+
"input_audio_buffer.speech_started",
|
|
26
|
+
"session.created",
|
|
27
|
+
]
|
|
28
|
+
SHOW_TIMING_MATH = False
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
@export_module("autogen.agentchat.realtime.experimental")
|
|
32
|
+
class TwilioAudioAdapter(RealtimeObserver):
|
|
33
|
+
"""Adapter for streaming audio from Twilio to OpenAI Realtime API and vice versa."""
|
|
34
|
+
|
|
35
|
+
def __init__(self, websocket: "WebSocket", *, logger: Optional[Logger] = None):
|
|
36
|
+
"""Adapter for streaming audio from Twilio to OpenAI Realtime API and vice versa.
|
|
37
|
+
|
|
38
|
+
Args:
|
|
39
|
+
websocket: the websocket connection to the Twilio service
|
|
40
|
+
logger: the logger to use for logging events
|
|
41
|
+
"""
|
|
42
|
+
super().__init__(logger=logger)
|
|
43
|
+
self.websocket = websocket
|
|
44
|
+
|
|
45
|
+
# Connection specific state
|
|
46
|
+
self.stream_sid = None
|
|
47
|
+
self.latest_media_timestamp = 0
|
|
48
|
+
self.last_assistant_item: Optional[str] = None
|
|
49
|
+
self.mark_queue: list[str] = []
|
|
50
|
+
self.response_start_timestamp_twilio: Optional[int] = None
|
|
51
|
+
|
|
52
|
+
async def on_event(self, event: RealtimeEvent) -> None:
|
|
53
|
+
"""Receive events from the OpenAI Realtime API, send audio back to Twilio."""
|
|
54
|
+
logger = self.logger
|
|
55
|
+
|
|
56
|
+
if isinstance(event, AudioDelta):
|
|
57
|
+
audio_payload = base64.b64encode(base64.b64decode(event.delta)).decode("utf-8")
|
|
58
|
+
audio_delta = {"event": "media", "streamSid": self.stream_sid, "media": {"payload": audio_payload}}
|
|
59
|
+
await self.websocket.send_json(audio_delta)
|
|
60
|
+
|
|
61
|
+
if self.response_start_timestamp_twilio is None:
|
|
62
|
+
self.response_start_timestamp_twilio = self.latest_media_timestamp
|
|
63
|
+
if SHOW_TIMING_MATH:
|
|
64
|
+
logger.info(f"Setting start timestamp for new response: {self.response_start_timestamp_twilio}ms")
|
|
65
|
+
|
|
66
|
+
# Update last_assistant_item safely
|
|
67
|
+
if event.item_id:
|
|
68
|
+
self.last_assistant_item = event.item_id
|
|
69
|
+
|
|
70
|
+
await self.send_mark()
|
|
71
|
+
|
|
72
|
+
# Trigger an interruption. Your use case might work better using `input_audio_buffer.speech_stopped`, or combining the two.
|
|
73
|
+
if isinstance(event, SpeechStarted):
|
|
74
|
+
logger.info("Speech start detected.")
|
|
75
|
+
if self.last_assistant_item:
|
|
76
|
+
logger.info(f"Interrupting response with id: {self.last_assistant_item}")
|
|
77
|
+
await self.handle_speech_started_event()
|
|
78
|
+
|
|
79
|
+
async def handle_speech_started_event(self) -> None:
|
|
80
|
+
"""Handle interruption when the caller's speech starts."""
|
|
81
|
+
logger = self.logger
|
|
82
|
+
|
|
83
|
+
logger.info("Handling speech started event.")
|
|
84
|
+
if self.mark_queue and self.response_start_timestamp_twilio is not None:
|
|
85
|
+
elapsed_time = self.latest_media_timestamp - self.response_start_timestamp_twilio
|
|
86
|
+
if SHOW_TIMING_MATH:
|
|
87
|
+
logger.info(
|
|
88
|
+
f"Calculating elapsed time for truncation: {self.latest_media_timestamp} - {self.response_start_timestamp_twilio} = {elapsed_time}ms"
|
|
89
|
+
)
|
|
90
|
+
|
|
91
|
+
if self.last_assistant_item:
|
|
92
|
+
if SHOW_TIMING_MATH:
|
|
93
|
+
logger.info(f"Truncating item with ID: {self.last_assistant_item}, Truncated at: {elapsed_time}ms")
|
|
94
|
+
|
|
95
|
+
await self.realtime_client.truncate_audio(
|
|
96
|
+
audio_end_ms=elapsed_time,
|
|
97
|
+
content_index=0,
|
|
98
|
+
item_id=self.last_assistant_item,
|
|
99
|
+
)
|
|
100
|
+
|
|
101
|
+
await self.websocket.send_json({"event": "clear", "streamSid": self.stream_sid})
|
|
102
|
+
|
|
103
|
+
self.mark_queue.clear()
|
|
104
|
+
self.last_assistant_item = None
|
|
105
|
+
self.response_start_timestamp_twilio = None
|
|
106
|
+
|
|
107
|
+
async def send_mark(self) -> None:
|
|
108
|
+
"""Send a mark of audio interruption to the Twilio websocket."""
|
|
109
|
+
if self.stream_sid:
|
|
110
|
+
mark_event = {"event": "mark", "streamSid": self.stream_sid, "mark": {"name": "responsePart"}}
|
|
111
|
+
await self.websocket.send_json(mark_event)
|
|
112
|
+
self.mark_queue.append("responsePart")
|
|
113
|
+
|
|
114
|
+
async def run_loop(self) -> None:
|
|
115
|
+
"""Run the adapter loop."""
|
|
116
|
+
logger = self.logger
|
|
117
|
+
|
|
118
|
+
async for message in self.websocket.iter_text():
|
|
119
|
+
try:
|
|
120
|
+
data = json.loads(message)
|
|
121
|
+
if data["event"] == "media":
|
|
122
|
+
self.latest_media_timestamp = int(data["media"]["timestamp"])
|
|
123
|
+
await self.realtime_client.send_audio(audio=data["media"]["payload"])
|
|
124
|
+
elif data["event"] == "start":
|
|
125
|
+
self.stream_sid = data["start"]["streamSid"]
|
|
126
|
+
logger.info(f"Incoming stream has started {self.stream_sid}")
|
|
127
|
+
self.response_start_timestamp_twilio = None
|
|
128
|
+
self.latest_media_timestamp = 0
|
|
129
|
+
self.last_assistant_item = None
|
|
130
|
+
elif data["event"] == "mark":
|
|
131
|
+
if self.mark_queue:
|
|
132
|
+
self.mark_queue.pop(0)
|
|
133
|
+
except Exception as e:
|
|
134
|
+
logger.warning(f"Error processing Twilio message: {e}", stack_info=True)
|
|
135
|
+
|
|
136
|
+
async def initialize_session(self) -> None:
|
|
137
|
+
"""Control initial session with OpenAI."""
|
|
138
|
+
session_update = {
|
|
139
|
+
"input_audio_format": "g711_ulaw",
|
|
140
|
+
"output_audio_format": "g711_ulaw",
|
|
141
|
+
}
|
|
142
|
+
await self.realtime_client.session_update(session_update)
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
if TYPE_CHECKING:
|
|
146
|
+
|
|
147
|
+
def twilio_audio_adapter(websocket: "WebSocket") -> RealtimeObserver:
|
|
148
|
+
return TwilioAudioAdapter(websocket)
|
|
@@ -0,0 +1,139 @@
|
|
|
1
|
+
# Copyright (c) 2023 - 2025, AG2ai, Inc., AG2ai open-source projects maintainers and core contributors
|
|
2
|
+
#
|
|
3
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
4
|
+
|
|
5
|
+
import base64
|
|
6
|
+
import json
|
|
7
|
+
from logging import Logger
|
|
8
|
+
from typing import TYPE_CHECKING, Optional
|
|
9
|
+
|
|
10
|
+
from .....doc_utils import export_module
|
|
11
|
+
from ..realtime_events import AudioDelta, RealtimeEvent, SpeechStarted
|
|
12
|
+
from ..realtime_observer import RealtimeObserver
|
|
13
|
+
|
|
14
|
+
if TYPE_CHECKING:
|
|
15
|
+
from ..websockets import WebSocketProtocol as WebSocket
|
|
16
|
+
|
|
17
|
+
LOG_EVENT_TYPES = [
|
|
18
|
+
"error",
|
|
19
|
+
"response.content.done",
|
|
20
|
+
"rate_limits.updated",
|
|
21
|
+
"response.done",
|
|
22
|
+
"input_audio_buffer.committed",
|
|
23
|
+
"input_audio_buffer.speech_stopped",
|
|
24
|
+
"input_audio_buffer.speech_started",
|
|
25
|
+
"session.created",
|
|
26
|
+
]
|
|
27
|
+
SHOW_TIMING_MATH = False
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
@export_module("autogen.agentchat.realtime.experimental")
|
|
31
|
+
class WebSocketAudioAdapter(RealtimeObserver):
|
|
32
|
+
def __init__(self, websocket: "WebSocket", *, logger: Optional[Logger] = None) -> None:
|
|
33
|
+
"""Observer for handling function calls from the OpenAI Realtime API.
|
|
34
|
+
|
|
35
|
+
Args:
|
|
36
|
+
websocket (WebSocket): The websocket connection.
|
|
37
|
+
logger (Logger): The logger for the observer.
|
|
38
|
+
"""
|
|
39
|
+
super().__init__(logger=logger)
|
|
40
|
+
self.websocket = websocket
|
|
41
|
+
|
|
42
|
+
# Connection specific state
|
|
43
|
+
self.stream_sid = None
|
|
44
|
+
self.latest_media_timestamp = 0
|
|
45
|
+
self.last_assistant_item: Optional[str] = None
|
|
46
|
+
self.mark_queue: list[str] = []
|
|
47
|
+
self.response_start_timestamp_socket: Optional[int] = None
|
|
48
|
+
|
|
49
|
+
async def on_event(self, event: RealtimeEvent) -> None:
|
|
50
|
+
"""Receive events from the OpenAI Realtime API, send audio back to websocket."""
|
|
51
|
+
logger = self.logger
|
|
52
|
+
|
|
53
|
+
if isinstance(event, AudioDelta):
|
|
54
|
+
audio_payload = base64.b64encode(base64.b64decode(event.delta)).decode("utf-8")
|
|
55
|
+
audio_delta = {"event": "media", "streamSid": self.stream_sid, "media": {"payload": audio_payload}}
|
|
56
|
+
await self.websocket.send_json(audio_delta)
|
|
57
|
+
|
|
58
|
+
if self.response_start_timestamp_socket is None:
|
|
59
|
+
self.response_start_timestamp_socket = self.latest_media_timestamp
|
|
60
|
+
if SHOW_TIMING_MATH:
|
|
61
|
+
logger.info(f"Setting start timestamp for new response: {self.response_start_timestamp_socket}ms")
|
|
62
|
+
|
|
63
|
+
# Update last_assistant_item safely
|
|
64
|
+
if event.item_id:
|
|
65
|
+
self.last_assistant_item = event.item_id
|
|
66
|
+
|
|
67
|
+
await self.send_mark()
|
|
68
|
+
|
|
69
|
+
# Trigger an interruption. Your use case might work better using `input_audio_buffer.speech_stopped`, or combining the two.
|
|
70
|
+
if isinstance(event, SpeechStarted):
|
|
71
|
+
logger.info("Speech start detected.")
|
|
72
|
+
if self.last_assistant_item:
|
|
73
|
+
logger.info(f"Interrupting response with id: {self.last_assistant_item}")
|
|
74
|
+
await self.handle_speech_started_event()
|
|
75
|
+
|
|
76
|
+
async def handle_speech_started_event(self) -> None:
|
|
77
|
+
"""Handle interruption when the caller's speech starts."""
|
|
78
|
+
logger = self.logger
|
|
79
|
+
logger.info("Handling speech started event.")
|
|
80
|
+
if self.mark_queue and self.response_start_timestamp_socket is not None:
|
|
81
|
+
elapsed_time = self.latest_media_timestamp - self.response_start_timestamp_socket
|
|
82
|
+
if SHOW_TIMING_MATH:
|
|
83
|
+
logger.info(
|
|
84
|
+
f"Calculating elapsed time for truncation: {self.latest_media_timestamp} - {self.response_start_timestamp_socket} = {elapsed_time}ms"
|
|
85
|
+
)
|
|
86
|
+
|
|
87
|
+
if self.last_assistant_item:
|
|
88
|
+
if SHOW_TIMING_MATH:
|
|
89
|
+
logger.info(f"Truncating item with ID: {self.last_assistant_item}, Truncated at: {elapsed_time}ms")
|
|
90
|
+
|
|
91
|
+
await self.realtime_client.truncate_audio(
|
|
92
|
+
audio_end_ms=elapsed_time,
|
|
93
|
+
content_index=0,
|
|
94
|
+
item_id=self.last_assistant_item,
|
|
95
|
+
)
|
|
96
|
+
|
|
97
|
+
await self.websocket.send_json({"event": "clear", "streamSid": self.stream_sid})
|
|
98
|
+
|
|
99
|
+
self.mark_queue.clear()
|
|
100
|
+
self.last_assistant_item = None
|
|
101
|
+
self.response_start_timestamp_socket = None
|
|
102
|
+
|
|
103
|
+
async def send_mark(self) -> None:
|
|
104
|
+
if self.stream_sid:
|
|
105
|
+
mark_event = {"event": "mark", "streamSid": self.stream_sid, "mark": {"name": "responsePart"}}
|
|
106
|
+
await self.websocket.send_json(mark_event)
|
|
107
|
+
self.mark_queue.append("responsePart")
|
|
108
|
+
|
|
109
|
+
async def initialize_session(self) -> None:
|
|
110
|
+
"""Control initial session with OpenAI."""
|
|
111
|
+
session_update = {"input_audio_format": "pcm16", "output_audio_format": "pcm16"}
|
|
112
|
+
await self.realtime_client.session_update(session_update)
|
|
113
|
+
|
|
114
|
+
async def run_loop(self) -> None:
|
|
115
|
+
"""Reads data from websocket and sends it to the RealtimeClient."""
|
|
116
|
+
logger = self.logger
|
|
117
|
+
async for message in self.websocket.iter_text():
|
|
118
|
+
try:
|
|
119
|
+
data = json.loads(message)
|
|
120
|
+
if data["event"] == "media":
|
|
121
|
+
self.latest_media_timestamp = int(data["media"]["timestamp"])
|
|
122
|
+
await self.realtime_client.send_audio(audio=data["media"]["payload"])
|
|
123
|
+
elif data["event"] == "start":
|
|
124
|
+
self.stream_sid = data["start"]["streamSid"]
|
|
125
|
+
logger.info(f"Incoming stream has started {self.stream_sid}")
|
|
126
|
+
self.response_start_timestamp_socket = None
|
|
127
|
+
self.latest_media_timestamp = 0
|
|
128
|
+
self.last_assistant_item = None
|
|
129
|
+
elif data["event"] == "mark":
|
|
130
|
+
if self.mark_queue:
|
|
131
|
+
self.mark_queue.pop(0)
|
|
132
|
+
except Exception as e:
|
|
133
|
+
logger.warning(f"Failed to process message: {e}", stack_info=True)
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
if TYPE_CHECKING:
|
|
137
|
+
|
|
138
|
+
def websocket_audio_adapter(websocket: "WebSocket") -> RealtimeObserver:
|
|
139
|
+
return WebSocketAudioAdapter(websocket)
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
# Copyright (c) 2023 - 2025, AG2ai, Inc., AG2ai open-source projects maintainers and core contributors
|
|
2
|
+
#
|
|
3
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
4
|
+
|
|
5
|
+
from typing import TYPE_CHECKING, Optional
|
|
6
|
+
|
|
7
|
+
from ....doc_utils import export_module
|
|
8
|
+
from .realtime_events import InputAudioBufferDelta, RealtimeEvent
|
|
9
|
+
from .realtime_observer import RealtimeObserver
|
|
10
|
+
|
|
11
|
+
if TYPE_CHECKING:
|
|
12
|
+
from logging import Logger
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
@export_module("autogen.agentchat.realtime.experimental")
|
|
16
|
+
class AudioObserver(RealtimeObserver):
|
|
17
|
+
"""Observer for user voice input"""
|
|
18
|
+
|
|
19
|
+
def __init__(self, *, logger: Optional["Logger"] = None) -> None:
|
|
20
|
+
"""Observer for user voice input"""
|
|
21
|
+
super().__init__(logger=logger)
|
|
22
|
+
|
|
23
|
+
async def on_event(self, event: RealtimeEvent) -> None:
|
|
24
|
+
"""Observe voice input events from the Realtime.
|
|
25
|
+
|
|
26
|
+
Args:
|
|
27
|
+
event (dict[str, Any]): The event from the OpenAI Realtime API.
|
|
28
|
+
"""
|
|
29
|
+
if isinstance(event, InputAudioBufferDelta):
|
|
30
|
+
self.logger.info("Received audio buffer delta")
|
|
31
|
+
|
|
32
|
+
async def initialize_session(self) -> None:
|
|
33
|
+
"""No need to initialize session from this observer"""
|
|
34
|
+
pass
|
|
35
|
+
|
|
36
|
+
async def run_loop(self) -> None:
|
|
37
|
+
"""Run the observer loop."""
|
|
38
|
+
pass
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
if TYPE_CHECKING:
|
|
42
|
+
function_observer: RealtimeObserver = AudioObserver()
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
# Copyright (c) 2023 - 2025, AG2ai, Inc., AG2ai open-source projects maintainers and core contributors
|
|
2
|
+
#
|
|
3
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
4
|
+
|
|
5
|
+
from .gemini.client import GeminiRealtimeClient
|
|
6
|
+
from .oai.base_client import OpenAIRealtimeClient
|
|
7
|
+
from .realtime_client import RealtimeClientProtocol, Role, get_client
|
|
8
|
+
|
|
9
|
+
__all__ = [
|
|
10
|
+
"GeminiRealtimeClient",
|
|
11
|
+
"OpenAIRealtimeClient",
|
|
12
|
+
"RealtimeClientProtocol",
|
|
13
|
+
"Role",
|
|
14
|
+
"get_client",
|
|
15
|
+
]
|
|
@@ -0,0 +1,274 @@
|
|
|
1
|
+
# Copyright (c) 2023 - 2025, AG2ai, Inc., AG2ai open-source projects maintainers and core contributors
|
|
2
|
+
#
|
|
3
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
4
|
+
|
|
5
|
+
import json
|
|
6
|
+
from collections.abc import AsyncGenerator
|
|
7
|
+
from contextlib import asynccontextmanager
|
|
8
|
+
from logging import Logger, getLogger
|
|
9
|
+
from typing import TYPE_CHECKING, Any, Callable, Optional, Union
|
|
10
|
+
|
|
11
|
+
from ......doc_utils import export_module
|
|
12
|
+
from ......import_utils import optional_import_block, require_optional_import
|
|
13
|
+
from ......llm_config import LLMConfig
|
|
14
|
+
from ...realtime_events import AudioDelta, FunctionCall, RealtimeEvent, SessionCreated
|
|
15
|
+
from ..realtime_client import RealtimeClientBase, Role, register_realtime_client
|
|
16
|
+
|
|
17
|
+
with optional_import_block():
|
|
18
|
+
from websockets.asyncio.client import connect
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
if TYPE_CHECKING:
|
|
22
|
+
from websockets.asyncio.client import ClientConnection
|
|
23
|
+
|
|
24
|
+
from ..realtime_client import RealtimeClientProtocol
|
|
25
|
+
|
|
26
|
+
__all__ = ["GeminiRealtimeClient"]
|
|
27
|
+
|
|
28
|
+
global_logger = getLogger(__name__)
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
HOST = "generativelanguage.googleapis.com"
|
|
32
|
+
API_VERSION = "v1alpha"
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
@register_realtime_client()
|
|
36
|
+
@require_optional_import("websockets", "gemini", except_for=["get_factory", "__init__"])
|
|
37
|
+
@export_module("autogen.agentchat.realtime.experimental.clients")
|
|
38
|
+
class GeminiRealtimeClient(RealtimeClientBase):
|
|
39
|
+
"""(Experimental) Client for Gemini Realtime API."""
|
|
40
|
+
|
|
41
|
+
def __init__(
|
|
42
|
+
self,
|
|
43
|
+
*,
|
|
44
|
+
llm_config: Union[LLMConfig, dict[str, Any]],
|
|
45
|
+
logger: Optional[Logger] = None,
|
|
46
|
+
) -> None:
|
|
47
|
+
"""(Experimental) Client for Gemini Realtime API.
|
|
48
|
+
|
|
49
|
+
Args:
|
|
50
|
+
llm_config: The config for the client.
|
|
51
|
+
logger: The logger for the client.
|
|
52
|
+
"""
|
|
53
|
+
super().__init__()
|
|
54
|
+
self._llm_config = llm_config
|
|
55
|
+
self._logger = logger
|
|
56
|
+
|
|
57
|
+
self._connection: Optional["ClientConnection"] = None
|
|
58
|
+
config = llm_config["config_list"][0]
|
|
59
|
+
|
|
60
|
+
self._model: str = config["model"]
|
|
61
|
+
self._voice = config.get("voice", "charon")
|
|
62
|
+
self._temperature: float = config.get("temperature", 0.8) # type: ignore[union-attr]
|
|
63
|
+
|
|
64
|
+
self._response_modality = "AUDIO"
|
|
65
|
+
|
|
66
|
+
self._api_key = config.get("api_key", None)
|
|
67
|
+
# todo: add test with base_url just to make sure it works
|
|
68
|
+
self._base_url: str = config.get(
|
|
69
|
+
"base_url",
|
|
70
|
+
f"wss://{HOST}/ws/google.ai.generativelanguage.{API_VERSION}.GenerativeService.BidiGenerateContent?key={self._api_key}",
|
|
71
|
+
)
|
|
72
|
+
self._final_config: dict[str, Any] = {}
|
|
73
|
+
self._pending_session_updates: dict[str, Any] = {}
|
|
74
|
+
self._is_reading_events = False
|
|
75
|
+
|
|
76
|
+
@property
|
|
77
|
+
def logger(self) -> Logger:
|
|
78
|
+
"""Get the logger for the Gemini Realtime API."""
|
|
79
|
+
return self._logger or global_logger
|
|
80
|
+
|
|
81
|
+
@property
|
|
82
|
+
def connection(self) -> "ClientConnection":
|
|
83
|
+
"""Get the Gemini WebSocket connection."""
|
|
84
|
+
if self._connection is None:
|
|
85
|
+
raise RuntimeError("Gemini WebSocket is not initialized")
|
|
86
|
+
return self._connection
|
|
87
|
+
|
|
88
|
+
async def send_function_result(self, call_id: str, result: str) -> None:
|
|
89
|
+
"""Send the result of a function call to the Gemini Realtime API.
|
|
90
|
+
|
|
91
|
+
Args:
|
|
92
|
+
call_id (str): The ID of the function call.
|
|
93
|
+
result (str): The result of the function call.
|
|
94
|
+
"""
|
|
95
|
+
msg = {
|
|
96
|
+
"tool_response": {"function_responses": [{"id": call_id, "response": {"result": {"string_value": result}}}]}
|
|
97
|
+
}
|
|
98
|
+
if self._is_reading_events:
|
|
99
|
+
await self.connection.send(json.dumps(msg))
|
|
100
|
+
|
|
101
|
+
async def send_text(self, *, role: Role, text: str, turn_complete: bool = True) -> None:
|
|
102
|
+
"""Send a text message to the Gemini Realtime API.
|
|
103
|
+
|
|
104
|
+
Args:
|
|
105
|
+
role: The role of the message.
|
|
106
|
+
text: The text of the message.
|
|
107
|
+
turn_complete: A flag indicating if the turn is complete.
|
|
108
|
+
"""
|
|
109
|
+
msg = {
|
|
110
|
+
"client_content": {
|
|
111
|
+
"turn_complete": turn_complete,
|
|
112
|
+
"turns": [{"role": role, "parts": [{"text": text}]}],
|
|
113
|
+
}
|
|
114
|
+
}
|
|
115
|
+
if self._is_reading_events:
|
|
116
|
+
await self.connection.send(json.dumps(msg))
|
|
117
|
+
|
|
118
|
+
async def send_audio(self, audio: str) -> None:
|
|
119
|
+
"""Send audio to the Gemini Realtime API.
|
|
120
|
+
|
|
121
|
+
Args:
|
|
122
|
+
audio (str): The audio to send.
|
|
123
|
+
"""
|
|
124
|
+
msg = {
|
|
125
|
+
"realtime_input": {
|
|
126
|
+
"media_chunks": [
|
|
127
|
+
{
|
|
128
|
+
"data": audio,
|
|
129
|
+
"mime_type": "audio/pcm",
|
|
130
|
+
}
|
|
131
|
+
]
|
|
132
|
+
}
|
|
133
|
+
}
|
|
134
|
+
await self.queue_input_audio_buffer_delta(audio)
|
|
135
|
+
if self._is_reading_events:
|
|
136
|
+
await self.connection.send(json.dumps(msg))
|
|
137
|
+
|
|
138
|
+
async def truncate_audio(self, audio_end_ms: int, content_index: int, item_id: str) -> None:
|
|
139
|
+
self.logger.info("This is not natively supported by Gemini Realtime API.")
|
|
140
|
+
pass
|
|
141
|
+
|
|
142
|
+
async def _initialize_session(self) -> None:
|
|
143
|
+
"""Initialize the session with the Gemini Realtime API."""
|
|
144
|
+
session_config = {
|
|
145
|
+
"setup": {
|
|
146
|
+
"system_instruction": {
|
|
147
|
+
"role": "system",
|
|
148
|
+
"parts": [{"text": self._pending_session_updates.get("instructions", "")}],
|
|
149
|
+
},
|
|
150
|
+
"model": f"models/{self._model}",
|
|
151
|
+
"tools": [
|
|
152
|
+
{
|
|
153
|
+
"function_declarations": [
|
|
154
|
+
{
|
|
155
|
+
"name": tool_schema["name"],
|
|
156
|
+
"description": tool_schema["description"],
|
|
157
|
+
"parameters": tool_schema["parameters"],
|
|
158
|
+
}
|
|
159
|
+
for tool_schema in self._pending_session_updates.get("tools", [])
|
|
160
|
+
]
|
|
161
|
+
},
|
|
162
|
+
],
|
|
163
|
+
"generation_config": {
|
|
164
|
+
"response_modalities": [self._response_modality],
|
|
165
|
+
"speech_config": {"voiceConfig": {"prebuiltVoiceConfig": {"voiceName": self._voice}}},
|
|
166
|
+
"temperature": self._temperature,
|
|
167
|
+
},
|
|
168
|
+
}
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
self.logger.info(f"Sending session update: {session_config}")
|
|
172
|
+
await self.connection.send(json.dumps(session_config))
|
|
173
|
+
|
|
174
|
+
async def session_update(self, session_options: dict[str, Any]) -> None:
|
|
175
|
+
"""Record session updates to be applied when the connection is established.
|
|
176
|
+
|
|
177
|
+
Args:
|
|
178
|
+
session_options (dict[str, Any]): The session options to update.
|
|
179
|
+
"""
|
|
180
|
+
if self._is_reading_events:
|
|
181
|
+
self.logger.warning("Is reading events. Session update will be ignored.")
|
|
182
|
+
else:
|
|
183
|
+
self._pending_session_updates.update(session_options)
|
|
184
|
+
|
|
185
|
+
@asynccontextmanager
|
|
186
|
+
async def connect(self) -> AsyncGenerator[None, None]:
|
|
187
|
+
"""Connect to the Gemini Realtime API."""
|
|
188
|
+
try:
|
|
189
|
+
async with connect(
|
|
190
|
+
self._base_url, additional_headers={"Content-Type": "application/json"}
|
|
191
|
+
) as self._connection:
|
|
192
|
+
yield
|
|
193
|
+
finally:
|
|
194
|
+
self._connection = None
|
|
195
|
+
|
|
196
|
+
async def read_events(self) -> AsyncGenerator[RealtimeEvent, None]:
|
|
197
|
+
"""Read Events from the Gemini Realtime Client"""
|
|
198
|
+
if self._connection is None:
|
|
199
|
+
raise RuntimeError("Client is not connected, call connect() first.")
|
|
200
|
+
await self._initialize_session()
|
|
201
|
+
|
|
202
|
+
self._is_reading_events = True
|
|
203
|
+
|
|
204
|
+
async for event in self._read_events():
|
|
205
|
+
yield event
|
|
206
|
+
|
|
207
|
+
async def _read_from_connection(self) -> AsyncGenerator[RealtimeEvent, None]:
|
|
208
|
+
"""Read messages from the Gemini Realtime connection."""
|
|
209
|
+
async for raw_message in self.connection:
|
|
210
|
+
message = raw_message.decode("ascii") if isinstance(raw_message, bytes) else raw_message
|
|
211
|
+
events = self._parse_message(json.loads(message))
|
|
212
|
+
for event in events:
|
|
213
|
+
yield event
|
|
214
|
+
|
|
215
|
+
def _parse_message(self, response: dict[str, Any]) -> list[RealtimeEvent]:
|
|
216
|
+
"""Parse a message from the Gemini Realtime API.
|
|
217
|
+
|
|
218
|
+
Args:
|
|
219
|
+
response (dict[str, Any]): The response to parse.
|
|
220
|
+
|
|
221
|
+
Returns:
|
|
222
|
+
list[RealtimeEvent]: The parsed events.
|
|
223
|
+
"""
|
|
224
|
+
if "serverContent" in response and "modelTurn" in response["serverContent"]:
|
|
225
|
+
try:
|
|
226
|
+
b64data = response["serverContent"]["modelTurn"]["parts"][0]["inlineData"].pop("data")
|
|
227
|
+
return [
|
|
228
|
+
AudioDelta(
|
|
229
|
+
delta=b64data,
|
|
230
|
+
item_id=None,
|
|
231
|
+
raw_message=response,
|
|
232
|
+
)
|
|
233
|
+
]
|
|
234
|
+
except KeyError:
|
|
235
|
+
return []
|
|
236
|
+
elif "toolCall" in response:
|
|
237
|
+
return [
|
|
238
|
+
FunctionCall(
|
|
239
|
+
raw_message=response,
|
|
240
|
+
call_id=call["id"],
|
|
241
|
+
name=call["name"],
|
|
242
|
+
arguments=call["args"],
|
|
243
|
+
)
|
|
244
|
+
for call in response["toolCall"]["functionCalls"]
|
|
245
|
+
]
|
|
246
|
+
elif "setupComplete" in response:
|
|
247
|
+
return [
|
|
248
|
+
SessionCreated(raw_message=response),
|
|
249
|
+
]
|
|
250
|
+
else:
|
|
251
|
+
return [RealtimeEvent(raw_message=response)]
|
|
252
|
+
|
|
253
|
+
@classmethod
|
|
254
|
+
def get_factory(
|
|
255
|
+
cls, llm_config: Union[LLMConfig, dict[str, Any]], logger: Logger, **kwargs: Any
|
|
256
|
+
) -> Optional[Callable[[], "RealtimeClientProtocol"]]:
|
|
257
|
+
"""Create a Realtime API client.
|
|
258
|
+
|
|
259
|
+
Args:
|
|
260
|
+
llm_config: The LLM config for the client.
|
|
261
|
+
logger: The logger for the client.
|
|
262
|
+
**kwargs: Additional arguments.
|
|
263
|
+
|
|
264
|
+
Returns:
|
|
265
|
+
RealtimeClientProtocol: The Realtime API client is returned if the model matches the pattern
|
|
266
|
+
"""
|
|
267
|
+
if llm_config["config_list"][0].get("api_type") == "google" and list(kwargs.keys()) == []:
|
|
268
|
+
return lambda: GeminiRealtimeClient(llm_config=llm_config, logger=logger, **kwargs)
|
|
269
|
+
return None
|
|
270
|
+
|
|
271
|
+
|
|
272
|
+
# needed for mypy to check if GeminiRealtimeClient implements RealtimeClientProtocol
|
|
273
|
+
if TYPE_CHECKING:
|
|
274
|
+
_client: RealtimeClientProtocol = GeminiRealtimeClient(llm_config={})
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
# Copyright (c) 2023 - 2025, AG2ai, Inc., AG2ai open-source projects maintainers and core contributors
|
|
2
|
+
#
|
|
3
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
4
|
+
|
|
5
|
+
from .base_client import OpenAIRealtimeClient
|
|
6
|
+
from .rtc_client import OpenAIRealtimeWebRTCClient
|
|
7
|
+
|
|
8
|
+
__all__ = ["OpenAIRealtimeClient", "OpenAIRealtimeWebRTCClient"]
|