ag2 0.10.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ag2-0.10.2.dist-info/METADATA +819 -0
- ag2-0.10.2.dist-info/RECORD +423 -0
- ag2-0.10.2.dist-info/WHEEL +4 -0
- ag2-0.10.2.dist-info/licenses/LICENSE +201 -0
- ag2-0.10.2.dist-info/licenses/NOTICE.md +19 -0
- autogen/__init__.py +88 -0
- autogen/_website/__init__.py +3 -0
- autogen/_website/generate_api_references.py +426 -0
- autogen/_website/generate_mkdocs.py +1216 -0
- autogen/_website/notebook_processor.py +475 -0
- autogen/_website/process_notebooks.py +656 -0
- autogen/_website/utils.py +413 -0
- autogen/a2a/__init__.py +36 -0
- autogen/a2a/agent_executor.py +86 -0
- autogen/a2a/client.py +357 -0
- autogen/a2a/errors.py +18 -0
- autogen/a2a/httpx_client_factory.py +79 -0
- autogen/a2a/server.py +221 -0
- autogen/a2a/utils.py +207 -0
- autogen/agentchat/__init__.py +47 -0
- autogen/agentchat/agent.py +180 -0
- autogen/agentchat/assistant_agent.py +86 -0
- autogen/agentchat/chat.py +325 -0
- autogen/agentchat/contrib/__init__.py +5 -0
- autogen/agentchat/contrib/agent_eval/README.md +7 -0
- autogen/agentchat/contrib/agent_eval/agent_eval.py +108 -0
- autogen/agentchat/contrib/agent_eval/criterion.py +43 -0
- autogen/agentchat/contrib/agent_eval/critic_agent.py +44 -0
- autogen/agentchat/contrib/agent_eval/quantifier_agent.py +39 -0
- autogen/agentchat/contrib/agent_eval/subcritic_agent.py +45 -0
- autogen/agentchat/contrib/agent_eval/task.py +42 -0
- autogen/agentchat/contrib/agent_optimizer.py +432 -0
- autogen/agentchat/contrib/capabilities/__init__.py +5 -0
- autogen/agentchat/contrib/capabilities/agent_capability.py +20 -0
- autogen/agentchat/contrib/capabilities/generate_images.py +301 -0
- autogen/agentchat/contrib/capabilities/teachability.py +393 -0
- autogen/agentchat/contrib/capabilities/text_compressors.py +66 -0
- autogen/agentchat/contrib/capabilities/tools_capability.py +22 -0
- autogen/agentchat/contrib/capabilities/transform_messages.py +93 -0
- autogen/agentchat/contrib/capabilities/transforms.py +578 -0
- autogen/agentchat/contrib/capabilities/transforms_util.py +122 -0
- autogen/agentchat/contrib/capabilities/vision_capability.py +215 -0
- autogen/agentchat/contrib/captainagent/__init__.py +9 -0
- autogen/agentchat/contrib/captainagent/agent_builder.py +790 -0
- autogen/agentchat/contrib/captainagent/captainagent.py +514 -0
- autogen/agentchat/contrib/captainagent/tool_retriever.py +334 -0
- autogen/agentchat/contrib/captainagent/tools/README.md +44 -0
- autogen/agentchat/contrib/captainagent/tools/__init__.py +5 -0
- autogen/agentchat/contrib/captainagent/tools/data_analysis/calculate_correlation.py +40 -0
- autogen/agentchat/contrib/captainagent/tools/data_analysis/calculate_skewness_and_kurtosis.py +28 -0
- autogen/agentchat/contrib/captainagent/tools/data_analysis/detect_outlier_iqr.py +28 -0
- autogen/agentchat/contrib/captainagent/tools/data_analysis/detect_outlier_zscore.py +28 -0
- autogen/agentchat/contrib/captainagent/tools/data_analysis/explore_csv.py +21 -0
- autogen/agentchat/contrib/captainagent/tools/data_analysis/shapiro_wilk_test.py +30 -0
- autogen/agentchat/contrib/captainagent/tools/information_retrieval/arxiv_download.py +27 -0
- autogen/agentchat/contrib/captainagent/tools/information_retrieval/arxiv_search.py +53 -0
- autogen/agentchat/contrib/captainagent/tools/information_retrieval/extract_pdf_image.py +53 -0
- autogen/agentchat/contrib/captainagent/tools/information_retrieval/extract_pdf_text.py +38 -0
- autogen/agentchat/contrib/captainagent/tools/information_retrieval/get_wikipedia_text.py +21 -0
- autogen/agentchat/contrib/captainagent/tools/information_retrieval/get_youtube_caption.py +34 -0
- autogen/agentchat/contrib/captainagent/tools/information_retrieval/image_qa.py +60 -0
- autogen/agentchat/contrib/captainagent/tools/information_retrieval/optical_character_recognition.py +61 -0
- autogen/agentchat/contrib/captainagent/tools/information_retrieval/perform_web_search.py +47 -0
- autogen/agentchat/contrib/captainagent/tools/information_retrieval/scrape_wikipedia_tables.py +33 -0
- autogen/agentchat/contrib/captainagent/tools/information_retrieval/transcribe_audio_file.py +21 -0
- autogen/agentchat/contrib/captainagent/tools/information_retrieval/youtube_download.py +35 -0
- autogen/agentchat/contrib/captainagent/tools/math/calculate_circle_area_from_diameter.py +21 -0
- autogen/agentchat/contrib/captainagent/tools/math/calculate_day_of_the_week.py +18 -0
- autogen/agentchat/contrib/captainagent/tools/math/calculate_fraction_sum.py +28 -0
- autogen/agentchat/contrib/captainagent/tools/math/calculate_matrix_power.py +31 -0
- autogen/agentchat/contrib/captainagent/tools/math/calculate_reflected_point.py +16 -0
- autogen/agentchat/contrib/captainagent/tools/math/complex_numbers_product.py +25 -0
- autogen/agentchat/contrib/captainagent/tools/math/compute_currency_conversion.py +23 -0
- autogen/agentchat/contrib/captainagent/tools/math/count_distinct_permutations.py +27 -0
- autogen/agentchat/contrib/captainagent/tools/math/evaluate_expression.py +28 -0
- autogen/agentchat/contrib/captainagent/tools/math/find_continuity_point.py +34 -0
- autogen/agentchat/contrib/captainagent/tools/math/fraction_to_mixed_numbers.py +39 -0
- autogen/agentchat/contrib/captainagent/tools/math/modular_inverse_sum.py +23 -0
- autogen/agentchat/contrib/captainagent/tools/math/simplify_mixed_numbers.py +36 -0
- autogen/agentchat/contrib/captainagent/tools/math/sum_of_digit_factorials.py +15 -0
- autogen/agentchat/contrib/captainagent/tools/math/sum_of_primes_below.py +15 -0
- autogen/agentchat/contrib/captainagent/tools/requirements.txt +10 -0
- autogen/agentchat/contrib/captainagent/tools/tool_description.tsv +34 -0
- autogen/agentchat/contrib/gpt_assistant_agent.py +526 -0
- autogen/agentchat/contrib/graph_rag/__init__.py +9 -0
- autogen/agentchat/contrib/graph_rag/document.py +29 -0
- autogen/agentchat/contrib/graph_rag/falkor_graph_query_engine.py +167 -0
- autogen/agentchat/contrib/graph_rag/falkor_graph_rag_capability.py +103 -0
- autogen/agentchat/contrib/graph_rag/graph_query_engine.py +53 -0
- autogen/agentchat/contrib/graph_rag/graph_rag_capability.py +63 -0
- autogen/agentchat/contrib/graph_rag/neo4j_graph_query_engine.py +263 -0
- autogen/agentchat/contrib/graph_rag/neo4j_graph_rag_capability.py +83 -0
- autogen/agentchat/contrib/graph_rag/neo4j_native_graph_query_engine.py +210 -0
- autogen/agentchat/contrib/graph_rag/neo4j_native_graph_rag_capability.py +93 -0
- autogen/agentchat/contrib/img_utils.py +397 -0
- autogen/agentchat/contrib/llamaindex_conversable_agent.py +117 -0
- autogen/agentchat/contrib/llava_agent.py +189 -0
- autogen/agentchat/contrib/math_user_proxy_agent.py +464 -0
- autogen/agentchat/contrib/multimodal_conversable_agent.py +125 -0
- autogen/agentchat/contrib/qdrant_retrieve_user_proxy_agent.py +325 -0
- autogen/agentchat/contrib/rag/__init__.py +10 -0
- autogen/agentchat/contrib/rag/chromadb_query_engine.py +268 -0
- autogen/agentchat/contrib/rag/llamaindex_query_engine.py +195 -0
- autogen/agentchat/contrib/rag/mongodb_query_engine.py +319 -0
- autogen/agentchat/contrib/rag/query_engine.py +76 -0
- autogen/agentchat/contrib/retrieve_assistant_agent.py +59 -0
- autogen/agentchat/contrib/retrieve_user_proxy_agent.py +704 -0
- autogen/agentchat/contrib/society_of_mind_agent.py +200 -0
- autogen/agentchat/contrib/swarm_agent.py +1404 -0
- autogen/agentchat/contrib/text_analyzer_agent.py +79 -0
- autogen/agentchat/contrib/vectordb/__init__.py +5 -0
- autogen/agentchat/contrib/vectordb/base.py +224 -0
- autogen/agentchat/contrib/vectordb/chromadb.py +316 -0
- autogen/agentchat/contrib/vectordb/couchbase.py +405 -0
- autogen/agentchat/contrib/vectordb/mongodb.py +551 -0
- autogen/agentchat/contrib/vectordb/pgvectordb.py +927 -0
- autogen/agentchat/contrib/vectordb/qdrant.py +320 -0
- autogen/agentchat/contrib/vectordb/utils.py +126 -0
- autogen/agentchat/contrib/web_surfer.py +304 -0
- autogen/agentchat/conversable_agent.py +4307 -0
- autogen/agentchat/group/__init__.py +67 -0
- autogen/agentchat/group/available_condition.py +91 -0
- autogen/agentchat/group/context_condition.py +77 -0
- autogen/agentchat/group/context_expression.py +238 -0
- autogen/agentchat/group/context_str.py +39 -0
- autogen/agentchat/group/context_variables.py +182 -0
- autogen/agentchat/group/events/transition_events.py +111 -0
- autogen/agentchat/group/group_tool_executor.py +324 -0
- autogen/agentchat/group/group_utils.py +659 -0
- autogen/agentchat/group/guardrails.py +179 -0
- autogen/agentchat/group/handoffs.py +303 -0
- autogen/agentchat/group/llm_condition.py +93 -0
- autogen/agentchat/group/multi_agent_chat.py +291 -0
- autogen/agentchat/group/on_condition.py +55 -0
- autogen/agentchat/group/on_context_condition.py +51 -0
- autogen/agentchat/group/patterns/__init__.py +18 -0
- autogen/agentchat/group/patterns/auto.py +160 -0
- autogen/agentchat/group/patterns/manual.py +177 -0
- autogen/agentchat/group/patterns/pattern.py +295 -0
- autogen/agentchat/group/patterns/random.py +106 -0
- autogen/agentchat/group/patterns/round_robin.py +117 -0
- autogen/agentchat/group/reply_result.py +24 -0
- autogen/agentchat/group/safeguards/__init__.py +21 -0
- autogen/agentchat/group/safeguards/api.py +241 -0
- autogen/agentchat/group/safeguards/enforcer.py +1158 -0
- autogen/agentchat/group/safeguards/events.py +140 -0
- autogen/agentchat/group/safeguards/validator.py +435 -0
- autogen/agentchat/group/speaker_selection_result.py +41 -0
- autogen/agentchat/group/targets/__init__.py +4 -0
- autogen/agentchat/group/targets/function_target.py +245 -0
- autogen/agentchat/group/targets/group_chat_target.py +133 -0
- autogen/agentchat/group/targets/group_manager_target.py +151 -0
- autogen/agentchat/group/targets/transition_target.py +424 -0
- autogen/agentchat/group/targets/transition_utils.py +6 -0
- autogen/agentchat/groupchat.py +1832 -0
- autogen/agentchat/realtime/__init__.py +3 -0
- autogen/agentchat/realtime/experimental/__init__.py +20 -0
- autogen/agentchat/realtime/experimental/audio_adapters/__init__.py +8 -0
- autogen/agentchat/realtime/experimental/audio_adapters/twilio_audio_adapter.py +148 -0
- autogen/agentchat/realtime/experimental/audio_adapters/websocket_audio_adapter.py +139 -0
- autogen/agentchat/realtime/experimental/audio_observer.py +42 -0
- autogen/agentchat/realtime/experimental/clients/__init__.py +15 -0
- autogen/agentchat/realtime/experimental/clients/gemini/__init__.py +7 -0
- autogen/agentchat/realtime/experimental/clients/gemini/client.py +274 -0
- autogen/agentchat/realtime/experimental/clients/oai/__init__.py +8 -0
- autogen/agentchat/realtime/experimental/clients/oai/base_client.py +220 -0
- autogen/agentchat/realtime/experimental/clients/oai/rtc_client.py +243 -0
- autogen/agentchat/realtime/experimental/clients/oai/utils.py +48 -0
- autogen/agentchat/realtime/experimental/clients/realtime_client.py +191 -0
- autogen/agentchat/realtime/experimental/function_observer.py +84 -0
- autogen/agentchat/realtime/experimental/realtime_agent.py +158 -0
- autogen/agentchat/realtime/experimental/realtime_events.py +42 -0
- autogen/agentchat/realtime/experimental/realtime_observer.py +100 -0
- autogen/agentchat/realtime/experimental/realtime_swarm.py +533 -0
- autogen/agentchat/realtime/experimental/websockets.py +21 -0
- autogen/agentchat/realtime_agent/__init__.py +21 -0
- autogen/agentchat/user_proxy_agent.py +114 -0
- autogen/agentchat/utils.py +206 -0
- autogen/agents/__init__.py +3 -0
- autogen/agents/contrib/__init__.py +10 -0
- autogen/agents/contrib/time/__init__.py +8 -0
- autogen/agents/contrib/time/time_reply_agent.py +74 -0
- autogen/agents/contrib/time/time_tool_agent.py +52 -0
- autogen/agents/experimental/__init__.py +27 -0
- autogen/agents/experimental/deep_research/__init__.py +7 -0
- autogen/agents/experimental/deep_research/deep_research.py +52 -0
- autogen/agents/experimental/discord/__init__.py +7 -0
- autogen/agents/experimental/discord/discord.py +66 -0
- autogen/agents/experimental/document_agent/__init__.py +19 -0
- autogen/agents/experimental/document_agent/chroma_query_engine.py +301 -0
- autogen/agents/experimental/document_agent/docling_doc_ingest_agent.py +113 -0
- autogen/agents/experimental/document_agent/document_agent.py +643 -0
- autogen/agents/experimental/document_agent/document_conditions.py +50 -0
- autogen/agents/experimental/document_agent/document_utils.py +376 -0
- autogen/agents/experimental/document_agent/inmemory_query_engine.py +214 -0
- autogen/agents/experimental/document_agent/parser_utils.py +134 -0
- autogen/agents/experimental/document_agent/url_utils.py +417 -0
- autogen/agents/experimental/reasoning/__init__.py +7 -0
- autogen/agents/experimental/reasoning/reasoning_agent.py +1178 -0
- autogen/agents/experimental/slack/__init__.py +7 -0
- autogen/agents/experimental/slack/slack.py +73 -0
- autogen/agents/experimental/telegram/__init__.py +7 -0
- autogen/agents/experimental/telegram/telegram.py +76 -0
- autogen/agents/experimental/websurfer/__init__.py +7 -0
- autogen/agents/experimental/websurfer/websurfer.py +70 -0
- autogen/agents/experimental/wikipedia/__init__.py +7 -0
- autogen/agents/experimental/wikipedia/wikipedia.py +88 -0
- autogen/browser_utils.py +309 -0
- autogen/cache/__init__.py +10 -0
- autogen/cache/abstract_cache_base.py +71 -0
- autogen/cache/cache.py +203 -0
- autogen/cache/cache_factory.py +88 -0
- autogen/cache/cosmos_db_cache.py +144 -0
- autogen/cache/disk_cache.py +97 -0
- autogen/cache/in_memory_cache.py +54 -0
- autogen/cache/redis_cache.py +119 -0
- autogen/code_utils.py +598 -0
- autogen/coding/__init__.py +30 -0
- autogen/coding/base.py +120 -0
- autogen/coding/docker_commandline_code_executor.py +283 -0
- autogen/coding/factory.py +56 -0
- autogen/coding/func_with_reqs.py +203 -0
- autogen/coding/jupyter/__init__.py +23 -0
- autogen/coding/jupyter/base.py +36 -0
- autogen/coding/jupyter/docker_jupyter_server.py +160 -0
- autogen/coding/jupyter/embedded_ipython_code_executor.py +182 -0
- autogen/coding/jupyter/import_utils.py +82 -0
- autogen/coding/jupyter/jupyter_client.py +224 -0
- autogen/coding/jupyter/jupyter_code_executor.py +154 -0
- autogen/coding/jupyter/local_jupyter_server.py +164 -0
- autogen/coding/local_commandline_code_executor.py +341 -0
- autogen/coding/markdown_code_extractor.py +44 -0
- autogen/coding/utils.py +55 -0
- autogen/coding/yepcode_code_executor.py +197 -0
- autogen/doc_utils.py +35 -0
- autogen/environments/__init__.py +10 -0
- autogen/environments/docker_python_environment.py +365 -0
- autogen/environments/python_environment.py +125 -0
- autogen/environments/system_python_environment.py +85 -0
- autogen/environments/venv_python_environment.py +220 -0
- autogen/environments/working_directory.py +74 -0
- autogen/events/__init__.py +7 -0
- autogen/events/agent_events.py +1016 -0
- autogen/events/base_event.py +100 -0
- autogen/events/client_events.py +168 -0
- autogen/events/helpers.py +44 -0
- autogen/events/print_event.py +45 -0
- autogen/exception_utils.py +73 -0
- autogen/extensions/__init__.py +5 -0
- autogen/fast_depends/__init__.py +16 -0
- autogen/fast_depends/_compat.py +75 -0
- autogen/fast_depends/core/__init__.py +14 -0
- autogen/fast_depends/core/build.py +206 -0
- autogen/fast_depends/core/model.py +527 -0
- autogen/fast_depends/dependencies/__init__.py +15 -0
- autogen/fast_depends/dependencies/model.py +30 -0
- autogen/fast_depends/dependencies/provider.py +40 -0
- autogen/fast_depends/library/__init__.py +10 -0
- autogen/fast_depends/library/model.py +46 -0
- autogen/fast_depends/py.typed +6 -0
- autogen/fast_depends/schema.py +66 -0
- autogen/fast_depends/use.py +272 -0
- autogen/fast_depends/utils.py +177 -0
- autogen/formatting_utils.py +83 -0
- autogen/function_utils.py +13 -0
- autogen/graph_utils.py +173 -0
- autogen/import_utils.py +539 -0
- autogen/interop/__init__.py +22 -0
- autogen/interop/crewai/__init__.py +7 -0
- autogen/interop/crewai/crewai.py +88 -0
- autogen/interop/interoperability.py +71 -0
- autogen/interop/interoperable.py +46 -0
- autogen/interop/langchain/__init__.py +8 -0
- autogen/interop/langchain/langchain_chat_model_factory.py +156 -0
- autogen/interop/langchain/langchain_tool.py +78 -0
- autogen/interop/litellm/__init__.py +7 -0
- autogen/interop/litellm/litellm_config_factory.py +178 -0
- autogen/interop/pydantic_ai/__init__.py +7 -0
- autogen/interop/pydantic_ai/pydantic_ai.py +172 -0
- autogen/interop/registry.py +70 -0
- autogen/io/__init__.py +15 -0
- autogen/io/base.py +151 -0
- autogen/io/console.py +56 -0
- autogen/io/processors/__init__.py +12 -0
- autogen/io/processors/base.py +21 -0
- autogen/io/processors/console_event_processor.py +61 -0
- autogen/io/run_response.py +294 -0
- autogen/io/thread_io_stream.py +63 -0
- autogen/io/websockets.py +214 -0
- autogen/json_utils.py +42 -0
- autogen/llm_clients/MIGRATION_TO_V2.md +782 -0
- autogen/llm_clients/__init__.py +77 -0
- autogen/llm_clients/client_v2.py +122 -0
- autogen/llm_clients/models/__init__.py +55 -0
- autogen/llm_clients/models/content_blocks.py +389 -0
- autogen/llm_clients/models/unified_message.py +145 -0
- autogen/llm_clients/models/unified_response.py +83 -0
- autogen/llm_clients/openai_completions_client.py +444 -0
- autogen/llm_config/__init__.py +11 -0
- autogen/llm_config/client.py +59 -0
- autogen/llm_config/config.py +461 -0
- autogen/llm_config/entry.py +169 -0
- autogen/llm_config/types.py +37 -0
- autogen/llm_config/utils.py +223 -0
- autogen/logger/__init__.py +11 -0
- autogen/logger/base_logger.py +129 -0
- autogen/logger/file_logger.py +262 -0
- autogen/logger/logger_factory.py +42 -0
- autogen/logger/logger_utils.py +57 -0
- autogen/logger/sqlite_logger.py +524 -0
- autogen/math_utils.py +338 -0
- autogen/mcp/__init__.py +7 -0
- autogen/mcp/__main__.py +78 -0
- autogen/mcp/helpers.py +45 -0
- autogen/mcp/mcp_client.py +349 -0
- autogen/mcp/mcp_proxy/__init__.py +19 -0
- autogen/mcp/mcp_proxy/fastapi_code_generator_helpers.py +62 -0
- autogen/mcp/mcp_proxy/mcp_proxy.py +577 -0
- autogen/mcp/mcp_proxy/operation_grouping.py +166 -0
- autogen/mcp/mcp_proxy/operation_renaming.py +110 -0
- autogen/mcp/mcp_proxy/patch_fastapi_code_generator.py +98 -0
- autogen/mcp/mcp_proxy/security.py +399 -0
- autogen/mcp/mcp_proxy/security_schema_visitor.py +37 -0
- autogen/messages/__init__.py +7 -0
- autogen/messages/agent_messages.py +946 -0
- autogen/messages/base_message.py +108 -0
- autogen/messages/client_messages.py +172 -0
- autogen/messages/print_message.py +48 -0
- autogen/oai/__init__.py +61 -0
- autogen/oai/anthropic.py +1516 -0
- autogen/oai/bedrock.py +800 -0
- autogen/oai/cerebras.py +302 -0
- autogen/oai/client.py +1658 -0
- autogen/oai/client_utils.py +196 -0
- autogen/oai/cohere.py +494 -0
- autogen/oai/gemini.py +1045 -0
- autogen/oai/gemini_types.py +156 -0
- autogen/oai/groq.py +319 -0
- autogen/oai/mistral.py +311 -0
- autogen/oai/oai_models/__init__.py +23 -0
- autogen/oai/oai_models/_models.py +16 -0
- autogen/oai/oai_models/chat_completion.py +86 -0
- autogen/oai/oai_models/chat_completion_audio.py +32 -0
- autogen/oai/oai_models/chat_completion_message.py +97 -0
- autogen/oai/oai_models/chat_completion_message_tool_call.py +60 -0
- autogen/oai/oai_models/chat_completion_token_logprob.py +62 -0
- autogen/oai/oai_models/completion_usage.py +59 -0
- autogen/oai/ollama.py +657 -0
- autogen/oai/openai_responses.py +451 -0
- autogen/oai/openai_utils.py +897 -0
- autogen/oai/together.py +387 -0
- autogen/remote/__init__.py +18 -0
- autogen/remote/agent.py +199 -0
- autogen/remote/agent_service.py +197 -0
- autogen/remote/errors.py +17 -0
- autogen/remote/httpx_client_factory.py +131 -0
- autogen/remote/protocol.py +37 -0
- autogen/remote/retry.py +102 -0
- autogen/remote/runtime.py +96 -0
- autogen/retrieve_utils.py +490 -0
- autogen/runtime_logging.py +161 -0
- autogen/testing/__init__.py +12 -0
- autogen/testing/messages.py +45 -0
- autogen/testing/test_agent.py +111 -0
- autogen/token_count_utils.py +280 -0
- autogen/tools/__init__.py +20 -0
- autogen/tools/contrib/__init__.py +9 -0
- autogen/tools/contrib/time/__init__.py +7 -0
- autogen/tools/contrib/time/time.py +40 -0
- autogen/tools/dependency_injection.py +249 -0
- autogen/tools/experimental/__init__.py +54 -0
- autogen/tools/experimental/browser_use/__init__.py +7 -0
- autogen/tools/experimental/browser_use/browser_use.py +154 -0
- autogen/tools/experimental/code_execution/__init__.py +7 -0
- autogen/tools/experimental/code_execution/python_code_execution.py +86 -0
- autogen/tools/experimental/crawl4ai/__init__.py +7 -0
- autogen/tools/experimental/crawl4ai/crawl4ai.py +150 -0
- autogen/tools/experimental/deep_research/__init__.py +7 -0
- autogen/tools/experimental/deep_research/deep_research.py +329 -0
- autogen/tools/experimental/duckduckgo/__init__.py +7 -0
- autogen/tools/experimental/duckduckgo/duckduckgo_search.py +103 -0
- autogen/tools/experimental/firecrawl/__init__.py +7 -0
- autogen/tools/experimental/firecrawl/firecrawl_tool.py +836 -0
- autogen/tools/experimental/google/__init__.py +14 -0
- autogen/tools/experimental/google/authentication/__init__.py +11 -0
- autogen/tools/experimental/google/authentication/credentials_hosted_provider.py +43 -0
- autogen/tools/experimental/google/authentication/credentials_local_provider.py +91 -0
- autogen/tools/experimental/google/authentication/credentials_provider.py +35 -0
- autogen/tools/experimental/google/drive/__init__.py +9 -0
- autogen/tools/experimental/google/drive/drive_functions.py +124 -0
- autogen/tools/experimental/google/drive/toolkit.py +88 -0
- autogen/tools/experimental/google/model.py +17 -0
- autogen/tools/experimental/google/toolkit_protocol.py +19 -0
- autogen/tools/experimental/google_search/__init__.py +8 -0
- autogen/tools/experimental/google_search/google_search.py +93 -0
- autogen/tools/experimental/google_search/youtube_search.py +181 -0
- autogen/tools/experimental/messageplatform/__init__.py +17 -0
- autogen/tools/experimental/messageplatform/discord/__init__.py +7 -0
- autogen/tools/experimental/messageplatform/discord/discord.py +284 -0
- autogen/tools/experimental/messageplatform/slack/__init__.py +7 -0
- autogen/tools/experimental/messageplatform/slack/slack.py +385 -0
- autogen/tools/experimental/messageplatform/telegram/__init__.py +7 -0
- autogen/tools/experimental/messageplatform/telegram/telegram.py +271 -0
- autogen/tools/experimental/perplexity/__init__.py +7 -0
- autogen/tools/experimental/perplexity/perplexity_search.py +249 -0
- autogen/tools/experimental/reliable/__init__.py +10 -0
- autogen/tools/experimental/reliable/reliable.py +1311 -0
- autogen/tools/experimental/searxng/__init__.py +7 -0
- autogen/tools/experimental/searxng/searxng_search.py +142 -0
- autogen/tools/experimental/tavily/__init__.py +7 -0
- autogen/tools/experimental/tavily/tavily_search.py +176 -0
- autogen/tools/experimental/web_search_preview/__init__.py +7 -0
- autogen/tools/experimental/web_search_preview/web_search_preview.py +120 -0
- autogen/tools/experimental/wikipedia/__init__.py +7 -0
- autogen/tools/experimental/wikipedia/wikipedia.py +284 -0
- autogen/tools/function_utils.py +412 -0
- autogen/tools/tool.py +188 -0
- autogen/tools/toolkit.py +86 -0
- autogen/types.py +29 -0
- autogen/version.py +7 -0
- templates/client_template/main.jinja2 +72 -0
- templates/config_template/config.jinja2 +7 -0
- templates/main.jinja2 +61 -0
autogen/oai/client.py
ADDED
|
@@ -0,0 +1,1658 @@
|
|
|
1
|
+
# Copyright (c) 2023 - 2025, AG2ai, Inc., AG2ai open-source projects maintainers and core contributors
|
|
2
|
+
#
|
|
3
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
4
|
+
#
|
|
5
|
+
# SPDX-License-Identifier: MIT
|
|
6
|
+
from __future__ import annotations
|
|
7
|
+
|
|
8
|
+
import inspect
|
|
9
|
+
import json
|
|
10
|
+
import logging
|
|
11
|
+
import re
|
|
12
|
+
import sys
|
|
13
|
+
import uuid
|
|
14
|
+
import warnings
|
|
15
|
+
from collections import deque
|
|
16
|
+
from collections.abc import Callable
|
|
17
|
+
from functools import lru_cache
|
|
18
|
+
from typing import Any, Literal
|
|
19
|
+
|
|
20
|
+
from pydantic import BaseModel, Field, HttpUrl
|
|
21
|
+
from pydantic.type_adapter import TypeAdapter
|
|
22
|
+
|
|
23
|
+
from autogen.oai.oai_models.chat_completion import ChatCompletionExtended
|
|
24
|
+
|
|
25
|
+
from ..cache import Cache
|
|
26
|
+
from ..code_utils import content_str
|
|
27
|
+
from ..doc_utils import export_module
|
|
28
|
+
from ..events.client_events import StreamEvent, UsageSummaryEvent
|
|
29
|
+
from ..exception_utils import ModelToolNotSupportedError
|
|
30
|
+
from ..import_utils import optional_import_block, require_optional_import
|
|
31
|
+
from ..io.base import IOStream
|
|
32
|
+
from ..llm_config import ModelClient
|
|
33
|
+
from ..llm_config.entry import LLMConfigEntry, LLMConfigEntryDict
|
|
34
|
+
from ..logger.logger_utils import get_current_ts
|
|
35
|
+
from ..runtime_logging import log_chat_completion, log_new_client, log_new_wrapper, logging_enabled
|
|
36
|
+
from .client_utils import FormatterProtocol, logging_formatter, merge_config_with_tools
|
|
37
|
+
from .openai_utils import OAI_PRICE1K, get_key, is_valid_api_key
|
|
38
|
+
|
|
39
|
+
TOOL_ENABLED = False
|
|
40
|
+
with optional_import_block() as openai_result:
|
|
41
|
+
import openai
|
|
42
|
+
|
|
43
|
+
if openai_result.is_successful:
|
|
44
|
+
# raises exception if openai>=1 is installed and something is wrong with imports
|
|
45
|
+
from openai import APIError, APITimeoutError, AzureOpenAI, OpenAI
|
|
46
|
+
from openai import __version__ as openai_version
|
|
47
|
+
from openai.lib._parsing._completions import type_to_response_format_param
|
|
48
|
+
from openai.types.chat import ChatCompletion, ChatCompletionChunk
|
|
49
|
+
from openai.types.chat.chat_completion import ChatCompletionMessage, Choice # type: ignore [attr-defined]
|
|
50
|
+
from openai.types.chat.chat_completion_chunk import (
|
|
51
|
+
ChoiceDeltaFunctionCall,
|
|
52
|
+
ChoiceDeltaToolCall,
|
|
53
|
+
ChoiceDeltaToolCallFunction,
|
|
54
|
+
)
|
|
55
|
+
from openai.types.completion import Completion
|
|
56
|
+
from openai.types.completion_usage import CompletionUsage
|
|
57
|
+
|
|
58
|
+
from autogen.oai.openai_responses import OpenAIResponsesClient
|
|
59
|
+
|
|
60
|
+
if openai.__version__ >= "1.1.0":
|
|
61
|
+
TOOL_ENABLED = True
|
|
62
|
+
ERROR: ImportError | None = None
|
|
63
|
+
from openai.lib._pydantic import _ensure_strict_json_schema
|
|
64
|
+
else:
|
|
65
|
+
ERROR = ImportError("Please install openai>=1 and diskcache to use autogen.OpenAIWrapper.") # type: ignore[assignment]
|
|
66
|
+
|
|
67
|
+
# OpenAI = object
|
|
68
|
+
# AzureOpenAI = object
|
|
69
|
+
|
|
70
|
+
with optional_import_block() as cerebras_result:
|
|
71
|
+
from cerebras.cloud.sdk import ( # noqa
|
|
72
|
+
AuthenticationError as cerebras_AuthenticationError,
|
|
73
|
+
InternalServerError as cerebras_InternalServerError,
|
|
74
|
+
RateLimitError as cerebras_RateLimitError,
|
|
75
|
+
)
|
|
76
|
+
|
|
77
|
+
from .cerebras import CerebrasClient
|
|
78
|
+
|
|
79
|
+
if cerebras_result.is_successful:
|
|
80
|
+
cerebras_import_exception: ImportError | None = None
|
|
81
|
+
else:
|
|
82
|
+
cerebras_AuthenticationError = cerebras_InternalServerError = cerebras_RateLimitError = Exception # type: ignore[assignment,misc] # noqa: N816
|
|
83
|
+
cerebras_import_exception = ImportError("cerebras_cloud_sdk not found")
|
|
84
|
+
|
|
85
|
+
with optional_import_block() as gemini_result:
|
|
86
|
+
from google.api_core.exceptions import ( # noqa
|
|
87
|
+
InternalServerError as gemini_InternalServerError,
|
|
88
|
+
ResourceExhausted as gemini_ResourceExhausted,
|
|
89
|
+
)
|
|
90
|
+
|
|
91
|
+
from .gemini import GeminiClient
|
|
92
|
+
|
|
93
|
+
if gemini_result.is_successful:
|
|
94
|
+
gemini_import_exception: ImportError | None = None
|
|
95
|
+
else:
|
|
96
|
+
gemini_InternalServerError = gemini_ResourceExhausted = Exception # type: ignore[assignment,misc] # noqa: N816
|
|
97
|
+
gemini_import_exception = ImportError("google-genai not found")
|
|
98
|
+
|
|
99
|
+
with optional_import_block() as anthropic_result:
|
|
100
|
+
from anthropic import ( # noqa
|
|
101
|
+
InternalServerError as anthorpic_InternalServerError,
|
|
102
|
+
RateLimitError as anthorpic_RateLimitError,
|
|
103
|
+
)
|
|
104
|
+
|
|
105
|
+
from .anthropic import AnthropicClient
|
|
106
|
+
|
|
107
|
+
if anthropic_result.is_successful:
|
|
108
|
+
anthropic_import_exception: ImportError | None = None
|
|
109
|
+
else:
|
|
110
|
+
anthorpic_InternalServerError = anthorpic_RateLimitError = Exception # type: ignore[assignment,misc] # noqa: N816
|
|
111
|
+
anthropic_import_exception = ImportError("anthropic not found")
|
|
112
|
+
|
|
113
|
+
with optional_import_block() as mistral_result:
|
|
114
|
+
from mistralai.models import ( # noqa
|
|
115
|
+
HTTPValidationError as mistral_HTTPValidationError,
|
|
116
|
+
SDKError as mistral_SDKError,
|
|
117
|
+
)
|
|
118
|
+
|
|
119
|
+
from .mistral import MistralAIClient
|
|
120
|
+
|
|
121
|
+
if mistral_result.is_successful:
|
|
122
|
+
mistral_import_exception: ImportError | None = None
|
|
123
|
+
else:
|
|
124
|
+
mistral_SDKError = mistral_HTTPValidationError = Exception # noqa: N816
|
|
125
|
+
mistral_import_exception = ImportError("mistralai not found")
|
|
126
|
+
|
|
127
|
+
with optional_import_block() as together_result:
|
|
128
|
+
from together.error import TogetherException as together_TogetherException
|
|
129
|
+
|
|
130
|
+
from .together import TogetherClient
|
|
131
|
+
|
|
132
|
+
if together_result.is_successful:
|
|
133
|
+
together_import_exception: ImportError | None = None
|
|
134
|
+
else:
|
|
135
|
+
together_TogetherException = Exception # noqa: N816
|
|
136
|
+
together_import_exception = ImportError("together not found")
|
|
137
|
+
|
|
138
|
+
with optional_import_block() as groq_result:
|
|
139
|
+
from groq import ( # noqa
|
|
140
|
+
APIConnectionError as groq_APIConnectionError,
|
|
141
|
+
InternalServerError as groq_InternalServerError,
|
|
142
|
+
RateLimitError as groq_RateLimitError,
|
|
143
|
+
)
|
|
144
|
+
|
|
145
|
+
from .groq import GroqClient
|
|
146
|
+
|
|
147
|
+
if groq_result.is_successful:
|
|
148
|
+
groq_import_exception: ImportError | None = None
|
|
149
|
+
else:
|
|
150
|
+
groq_InternalServerError = groq_RateLimitError = groq_APIConnectionError = Exception # noqa: N816
|
|
151
|
+
groq_import_exception = ImportError("groq not found")
|
|
152
|
+
|
|
153
|
+
with optional_import_block() as cohere_result:
|
|
154
|
+
from cohere.errors import ( # noqa
|
|
155
|
+
InternalServerError as cohere_InternalServerError,
|
|
156
|
+
ServiceUnavailableError as cohere_ServiceUnavailableError,
|
|
157
|
+
TooManyRequestsError as cohere_TooManyRequestsError,
|
|
158
|
+
)
|
|
159
|
+
|
|
160
|
+
from .cohere import CohereClient
|
|
161
|
+
|
|
162
|
+
if cohere_result.is_successful:
|
|
163
|
+
cohere_import_exception: ImportError | None = None
|
|
164
|
+
else:
|
|
165
|
+
cohere_InternalServerError = cohere_TooManyRequestsError = cohere_ServiceUnavailableError = Exception # noqa: N816
|
|
166
|
+
cohere_import_exception = ImportError("cohere not found")
|
|
167
|
+
|
|
168
|
+
with optional_import_block() as ollama_result:
|
|
169
|
+
from ollama import ( # noqa
|
|
170
|
+
RequestError as ollama_RequestError,
|
|
171
|
+
ResponseError as ollama_ResponseError,
|
|
172
|
+
)
|
|
173
|
+
|
|
174
|
+
from .ollama import OllamaClient
|
|
175
|
+
|
|
176
|
+
if ollama_result.is_successful:
|
|
177
|
+
ollama_import_exception: ImportError | None = None
|
|
178
|
+
else:
|
|
179
|
+
ollama_RequestError = ollama_ResponseError = Exception # type: ignore[assignment,misc] # noqa: N816
|
|
180
|
+
ollama_import_exception = ImportError("ollama not found")
|
|
181
|
+
|
|
182
|
+
with optional_import_block() as bedrock_result:
|
|
183
|
+
from botocore.exceptions import ( # noqa
|
|
184
|
+
BotoCoreError as bedrock_BotoCoreError,
|
|
185
|
+
ClientError as bedrock_ClientError,
|
|
186
|
+
)
|
|
187
|
+
|
|
188
|
+
from .bedrock import BedrockClient
|
|
189
|
+
|
|
190
|
+
if bedrock_result.is_successful:
|
|
191
|
+
bedrock_import_exception: ImportError | None = None
|
|
192
|
+
else:
|
|
193
|
+
bedrock_BotoCoreError = bedrock_ClientError = Exception # noqa: N816
|
|
194
|
+
bedrock_import_exception = ImportError("botocore not found")
|
|
195
|
+
|
|
196
|
+
logger = logging.getLogger(__name__)
|
|
197
|
+
if not logger.handlers:
|
|
198
|
+
# Add the console handler.
|
|
199
|
+
_ch = logging.StreamHandler(stream=sys.stdout)
|
|
200
|
+
_ch.setFormatter(logging_formatter)
|
|
201
|
+
logger.addHandler(_ch)
|
|
202
|
+
|
|
203
|
+
LEGACY_DEFAULT_CACHE_SEED = 41
|
|
204
|
+
LEGACY_CACHE_DIR = ".cache"
|
|
205
|
+
OPEN_API_BASE_URL_PREFIX = "https://api.openai.com"
|
|
206
|
+
|
|
207
|
+
OPENAI_FALLBACK_KWARGS = {
|
|
208
|
+
"api_key",
|
|
209
|
+
"organization",
|
|
210
|
+
"project",
|
|
211
|
+
"base_url",
|
|
212
|
+
"websocket_base_url",
|
|
213
|
+
"timeout",
|
|
214
|
+
"max_retries",
|
|
215
|
+
"default_headers",
|
|
216
|
+
"default_query",
|
|
217
|
+
"http_client",
|
|
218
|
+
"_strict_response_validation",
|
|
219
|
+
"webhook_secret",
|
|
220
|
+
}
|
|
221
|
+
|
|
222
|
+
AOPENAI_FALLBACK_KWARGS = {
|
|
223
|
+
"azure_endpoint",
|
|
224
|
+
"azure_deployment",
|
|
225
|
+
"api_version",
|
|
226
|
+
"api_key",
|
|
227
|
+
"azure_ad_token",
|
|
228
|
+
"azure_ad_token_provider",
|
|
229
|
+
"organization",
|
|
230
|
+
"websocket_base_url",
|
|
231
|
+
"timeout",
|
|
232
|
+
"max_retries",
|
|
233
|
+
"default_headers",
|
|
234
|
+
"default_query",
|
|
235
|
+
"http_client",
|
|
236
|
+
"_strict_response_validation",
|
|
237
|
+
"base_url",
|
|
238
|
+
"project",
|
|
239
|
+
"webhook_secret",
|
|
240
|
+
}
|
|
241
|
+
|
|
242
|
+
|
|
243
|
+
@lru_cache(maxsize=128)
|
|
244
|
+
def log_cache_seed_value(cache_seed_value: str | int, client: ModelClient) -> None:
|
|
245
|
+
logger.debug(f"Using cache with seed value {cache_seed_value} for client {client.__class__.__name__}")
|
|
246
|
+
|
|
247
|
+
|
|
248
|
+
class OpenAIEntryDict(LLMConfigEntryDict, total=False):
|
|
249
|
+
api_type: Literal["openai"]
|
|
250
|
+
|
|
251
|
+
price: list[float] | None
|
|
252
|
+
tool_choice: Literal["none", "auto", "required"] | None
|
|
253
|
+
user: str | None
|
|
254
|
+
stream: bool
|
|
255
|
+
verbosity: Literal["low", "medium", "high"] | None
|
|
256
|
+
extra_body: dict[str, Any] | None
|
|
257
|
+
reasoning_effort: Literal["low", "minimal", "medium", "high"] | None
|
|
258
|
+
max_completion_tokens: int | None
|
|
259
|
+
|
|
260
|
+
|
|
261
|
+
class OpenAILLMConfigEntry(LLMConfigEntry):
|
|
262
|
+
api_type: Literal["openai"] = "openai"
|
|
263
|
+
|
|
264
|
+
price: list[float] | None = Field(default=None, min_length=2, max_length=2)
|
|
265
|
+
tool_choice: Literal["none", "auto", "required"] | None = None
|
|
266
|
+
user: str | None = None
|
|
267
|
+
stream: bool = False
|
|
268
|
+
verbosity: Literal["low", "medium", "high"] | None = None
|
|
269
|
+
# The extra_body parameter flows from OpenAILLMConfigEntry to the LLM request through this path:
|
|
270
|
+
# 1. Config Definition: extra_body is defined in OpenAILLMConfigEntry (autogen/oai/client.py:248)
|
|
271
|
+
# 2. Parameter Classification: It's classified as an OpenAI client parameter (not AG2-specific) via the openai_kwargs property (autogen/oai/client.py:752-758)
|
|
272
|
+
# 3. Request Separation: In _separate_create_config() (autogen/oai/client.py:842), extra_body goes into create_config since it's not in the extra_kwargs set.
|
|
273
|
+
# 4. API Call: The create_config becomes params and gets passed directly to OpenAI's create() method via **params (autogen/oai/client.py:551,658)
|
|
274
|
+
extra_body: dict[str, Any] | None = (
|
|
275
|
+
None # For VLLM - See here: https://docs.vllm.ai/en/latest/serving/openai_compatible_server.html#extra-parameters
|
|
276
|
+
)
|
|
277
|
+
# reasoning models - see: https://platform.openai.com/docs/api-reference/chat/create#chat-create-reasoning_effort
|
|
278
|
+
reasoning_effort: Literal["low", "minimal", "medium", "high"] | None = None
|
|
279
|
+
max_completion_tokens: int | None = None
|
|
280
|
+
|
|
281
|
+
def create_client(self) -> ModelClient:
|
|
282
|
+
raise NotImplementedError("create_client method must be implemented in the derived class.")
|
|
283
|
+
|
|
284
|
+
|
|
285
|
+
class AzureOpenAIEntryDict(LLMConfigEntryDict, total=False):
|
|
286
|
+
api_type: Literal["azure"]
|
|
287
|
+
|
|
288
|
+
azure_ad_token_provider: str | Callable[[], str] | None
|
|
289
|
+
stream: bool
|
|
290
|
+
tool_choice: Literal["none", "auto", "required"] | None
|
|
291
|
+
user: str | None
|
|
292
|
+
reasoning_effort: Literal["low", "minimal", "medium", "high"] | None
|
|
293
|
+
max_completion_tokens: int | None
|
|
294
|
+
|
|
295
|
+
|
|
296
|
+
class AzureOpenAILLMConfigEntry(LLMConfigEntry):
|
|
297
|
+
api_type: Literal["azure"] = "azure"
|
|
298
|
+
|
|
299
|
+
azure_ad_token_provider: str | Callable[[], str] | None = None
|
|
300
|
+
stream: bool = False
|
|
301
|
+
tool_choice: Literal["none", "auto", "required"] | None = None
|
|
302
|
+
user: str | None = None
|
|
303
|
+
# reasoning models - see:
|
|
304
|
+
# - https://learn.microsoft.com/en-us/azure/ai-services/openai/how-to/reasoning
|
|
305
|
+
# - https://learn.microsoft.com/en-us/azure/ai-services/openai/reference-preview
|
|
306
|
+
reasoning_effort: Literal["low", "minimal", "medium", "high"] | None = None
|
|
307
|
+
max_completion_tokens: int | None = None
|
|
308
|
+
|
|
309
|
+
def create_client(self) -> ModelClient:
|
|
310
|
+
raise NotImplementedError
|
|
311
|
+
|
|
312
|
+
|
|
313
|
+
class DeepSeekEntyDict(LLMConfigEntryDict, total=False):
|
|
314
|
+
api_type: Literal["deepseek"]
|
|
315
|
+
|
|
316
|
+
base_url: HttpUrl
|
|
317
|
+
stream: bool
|
|
318
|
+
tool_choice: Literal["none", "auto", "required"] | None
|
|
319
|
+
|
|
320
|
+
|
|
321
|
+
class DeepSeekLLMConfigEntry(LLMConfigEntry):
|
|
322
|
+
api_type: Literal["deepseek"] = "deepseek"
|
|
323
|
+
|
|
324
|
+
temperature: float | None = Field(default=None, ge=0.0, le=1.0)
|
|
325
|
+
top_p: float | None = Field(None, ge=0.0, le=1.0)
|
|
326
|
+
max_tokens: int = Field(8192, ge=1, le=8192)
|
|
327
|
+
|
|
328
|
+
base_url: HttpUrl = HttpUrl("https://api.deepseek.com/v1")
|
|
329
|
+
stream: bool = False
|
|
330
|
+
tool_choice: Literal["none", "auto", "required"] | None = None
|
|
331
|
+
|
|
332
|
+
def create_client(self) -> None: # type: ignore [override]
|
|
333
|
+
raise NotImplementedError("DeepSeekLLMConfigEntry.create_client is not implemented.")
|
|
334
|
+
|
|
335
|
+
|
|
336
|
+
class PlaceHolderClient:
|
|
337
|
+
def __init__(self, config):
|
|
338
|
+
self.config = config
|
|
339
|
+
|
|
340
|
+
|
|
341
|
+
@require_optional_import("openai>=1.66.2", "openai")
|
|
342
|
+
class OpenAIClient:
|
|
343
|
+
"""Follows the Client protocol and wraps the OpenAI client."""
|
|
344
|
+
|
|
345
|
+
RESPONSE_USAGE_KEYS: list[str] = ["prompt_tokens", "completion_tokens", "total_tokens", "cost", "model"]
|
|
346
|
+
|
|
347
|
+
def __init__(self, client: OpenAI | AzureOpenAI, response_format: BaseModel | dict[str, Any] | None = None):
|
|
348
|
+
self._oai_client = client
|
|
349
|
+
self.response_format = response_format
|
|
350
|
+
if (
|
|
351
|
+
not isinstance(client, openai.AzureOpenAI)
|
|
352
|
+
and str(client.base_url).startswith(OPEN_API_BASE_URL_PREFIX)
|
|
353
|
+
and not is_valid_api_key(self._oai_client.api_key)
|
|
354
|
+
):
|
|
355
|
+
logger.warning(
|
|
356
|
+
"The API key specified is not a valid OpenAI format; it won't work with the OpenAI-hosted model."
|
|
357
|
+
)
|
|
358
|
+
|
|
359
|
+
def message_retrieval(self, response: ChatCompletion | Completion) -> list[str] | list[ChatCompletionMessage]:
|
|
360
|
+
"""Retrieve the messages from the response.
|
|
361
|
+
|
|
362
|
+
Args:
|
|
363
|
+
response (ChatCompletion | Completion): The response from openai.
|
|
364
|
+
|
|
365
|
+
|
|
366
|
+
Returns:
|
|
367
|
+
The message from the response.
|
|
368
|
+
"""
|
|
369
|
+
choices = response.choices
|
|
370
|
+
if isinstance(response, Completion):
|
|
371
|
+
return [choice.text for choice in choices] # type: ignore [union-attr]
|
|
372
|
+
|
|
373
|
+
def _format_content(content: str | list[dict[str, Any]] | None) -> str:
|
|
374
|
+
normalized_content = content_str(content)
|
|
375
|
+
return (
|
|
376
|
+
self.response_format.model_validate_json(normalized_content).format()
|
|
377
|
+
if isinstance(self.response_format, FormatterProtocol)
|
|
378
|
+
else normalized_content
|
|
379
|
+
)
|
|
380
|
+
|
|
381
|
+
if TOOL_ENABLED:
|
|
382
|
+
return [ # type: ignore [return-value]
|
|
383
|
+
(
|
|
384
|
+
choice.message # type: ignore [union-attr]
|
|
385
|
+
if choice.message.function_call is not None or choice.message.tool_calls is not None # type: ignore [union-attr]
|
|
386
|
+
else _format_content(choice.message.content)
|
|
387
|
+
) # type: ignore [union-attr]
|
|
388
|
+
for choice in choices
|
|
389
|
+
]
|
|
390
|
+
else:
|
|
391
|
+
return [ # type: ignore [return-value]
|
|
392
|
+
choice.message if choice.message.function_call is not None else _format_content(choice.message.content) # type: ignore [union-attr]
|
|
393
|
+
for choice in choices
|
|
394
|
+
]
|
|
395
|
+
|
|
396
|
+
@staticmethod
|
|
397
|
+
def _is_agent_name_error_message(message: str) -> bool:
|
|
398
|
+
pattern = re.compile(r"Invalid 'messages\[\d+\]\.name': string does not match pattern.")
|
|
399
|
+
return bool(pattern.match(message))
|
|
400
|
+
|
|
401
|
+
@staticmethod
|
|
402
|
+
def _move_system_message_to_beginning(messages: list[dict[str, Any]]) -> None:
|
|
403
|
+
for msg in messages:
|
|
404
|
+
if msg["role"] == "system":
|
|
405
|
+
messages.insert(0, messages.pop(messages.index(msg)))
|
|
406
|
+
break
|
|
407
|
+
|
|
408
|
+
@staticmethod
|
|
409
|
+
def _patch_messages_for_deepseek_reasoner(**kwargs: Any) -> Any:
|
|
410
|
+
if (
|
|
411
|
+
"model" not in kwargs
|
|
412
|
+
or kwargs["model"] != "deepseek-reasoner"
|
|
413
|
+
or "messages" not in kwargs
|
|
414
|
+
or len(kwargs["messages"]) == 0
|
|
415
|
+
):
|
|
416
|
+
return kwargs
|
|
417
|
+
|
|
418
|
+
# The system message of deepseek-reasoner must be put on the beginning of the message sequence.
|
|
419
|
+
OpenAIClient._move_system_message_to_beginning(kwargs["messages"])
|
|
420
|
+
|
|
421
|
+
new_messages = []
|
|
422
|
+
previous_role = None
|
|
423
|
+
for message in kwargs["messages"]:
|
|
424
|
+
if "role" in message:
|
|
425
|
+
current_role = message["role"]
|
|
426
|
+
|
|
427
|
+
# This model requires alternating roles
|
|
428
|
+
if current_role == previous_role:
|
|
429
|
+
# Swap the role
|
|
430
|
+
if current_role == "user":
|
|
431
|
+
message["role"] = "assistant"
|
|
432
|
+
elif current_role == "assistant":
|
|
433
|
+
message["role"] = "user"
|
|
434
|
+
|
|
435
|
+
previous_role = message["role"]
|
|
436
|
+
|
|
437
|
+
new_messages.append(message)
|
|
438
|
+
|
|
439
|
+
# The last message of deepseek-reasoner must be a user message
|
|
440
|
+
# , or an assistant message with prefix mode on (but this is supported only for beta api)
|
|
441
|
+
if new_messages[-1]["role"] != "user":
|
|
442
|
+
new_messages.append({"role": "user", "content": "continue"})
|
|
443
|
+
|
|
444
|
+
kwargs["messages"] = new_messages
|
|
445
|
+
|
|
446
|
+
return kwargs
|
|
447
|
+
|
|
448
|
+
@staticmethod
|
|
449
|
+
def _handle_openai_bad_request_error(func: Callable[..., Any]) -> Callable[..., Any]:
|
|
450
|
+
def wrapper(*args: Any, **kwargs: Any):
|
|
451
|
+
try:
|
|
452
|
+
kwargs = OpenAIClient._patch_messages_for_deepseek_reasoner(**kwargs)
|
|
453
|
+
return func(*args, **kwargs)
|
|
454
|
+
except openai.BadRequestError as e:
|
|
455
|
+
response_json = e.response.json()
|
|
456
|
+
# Check if the error message is related to the agent name. If so, raise a ValueError with a more informative message.
|
|
457
|
+
if (
|
|
458
|
+
"error" in response_json
|
|
459
|
+
and "message" in response_json["error"]
|
|
460
|
+
and OpenAIClient._is_agent_name_error_message(response_json["error"]["message"])
|
|
461
|
+
):
|
|
462
|
+
error_message = (
|
|
463
|
+
f"This error typically occurs when the agent name contains invalid characters, such as spaces or special symbols.\n"
|
|
464
|
+
"Please ensure that your agent name follows the correct format and doesn't include any unsupported characters.\n"
|
|
465
|
+
"Check the agent name and try again.\n"
|
|
466
|
+
f"Here is the full BadRequestError from openai:\n{e.message}."
|
|
467
|
+
)
|
|
468
|
+
raise ValueError(error_message)
|
|
469
|
+
|
|
470
|
+
raise e
|
|
471
|
+
|
|
472
|
+
return wrapper
|
|
473
|
+
|
|
474
|
+
@staticmethod
|
|
475
|
+
def _convert_system_role_to_user(messages: list[dict[str, Any]]) -> None:
|
|
476
|
+
for msg in messages:
|
|
477
|
+
if msg.get("role", "") == "system":
|
|
478
|
+
msg["role"] = "user"
|
|
479
|
+
|
|
480
|
+
@staticmethod
|
|
481
|
+
def _add_streaming_usage_to_params(params: dict[str, Any]) -> None:
|
|
482
|
+
if params.get("stream", False):
|
|
483
|
+
params.setdefault("stream_options", {}).setdefault("include_usage", True)
|
|
484
|
+
|
|
485
|
+
def create(self, params: dict[str, Any]) -> ChatCompletion:
|
|
486
|
+
"""Create a completion for a given config using openai's client.
|
|
487
|
+
|
|
488
|
+
Args:
|
|
489
|
+
params: The params for the completion.
|
|
490
|
+
|
|
491
|
+
Returns:
|
|
492
|
+
The completion.
|
|
493
|
+
"""
|
|
494
|
+
iostream = IOStream.get_default()
|
|
495
|
+
|
|
496
|
+
is_structured_output = self.response_format is not None or "response_format" in params
|
|
497
|
+
|
|
498
|
+
if is_structured_output:
|
|
499
|
+
|
|
500
|
+
def _create_or_parse(*args, **kwargs):
|
|
501
|
+
if "stream" in kwargs:
|
|
502
|
+
kwargs.pop("stream")
|
|
503
|
+
kwargs.pop("stream_options", None)
|
|
504
|
+
|
|
505
|
+
if (
|
|
506
|
+
isinstance(kwargs["response_format"], dict)
|
|
507
|
+
and kwargs["response_format"].get("type") != "json_object"
|
|
508
|
+
):
|
|
509
|
+
kwargs["response_format"] = {
|
|
510
|
+
"type": "json_schema",
|
|
511
|
+
"json_schema": {
|
|
512
|
+
"schema": _ensure_strict_json_schema(
|
|
513
|
+
kwargs["response_format"], path=(), root=kwargs["response_format"]
|
|
514
|
+
),
|
|
515
|
+
"name": "response_format",
|
|
516
|
+
"strict": True,
|
|
517
|
+
},
|
|
518
|
+
}
|
|
519
|
+
else:
|
|
520
|
+
kwargs["response_format"] = type_to_response_format_param(
|
|
521
|
+
self.response_format or params["response_format"]
|
|
522
|
+
)
|
|
523
|
+
|
|
524
|
+
return self._oai_client.chat.completions.create(*args, **kwargs)
|
|
525
|
+
|
|
526
|
+
create_or_parse = _create_or_parse
|
|
527
|
+
else:
|
|
528
|
+
completions = self._oai_client.chat.completions if "messages" in params else self._oai_client.completions # type: ignore [attr-defined]
|
|
529
|
+
create_or_parse = completions.create
|
|
530
|
+
# Wrap _create_or_parse with exception handling
|
|
531
|
+
create_or_parse = OpenAIClient._handle_openai_bad_request_error(create_or_parse)
|
|
532
|
+
|
|
533
|
+
# needs to be updated when the o3 is released to generalize
|
|
534
|
+
is_o1 = "model" in params and params["model"].startswith("o1")
|
|
535
|
+
|
|
536
|
+
is_mistral = "model" in params and "mistral" in params["model"]
|
|
537
|
+
if is_mistral:
|
|
538
|
+
OpenAIClient._convert_system_role_to_user(params["messages"])
|
|
539
|
+
|
|
540
|
+
# If streaming is enabled and has messages, then iterate over the chunks of the response and is not using structured outputs.
|
|
541
|
+
if params.get("stream", False) and "messages" in params and not is_o1 and not is_structured_output:
|
|
542
|
+
# Usage will be returned as the last chunk
|
|
543
|
+
OpenAIClient._add_streaming_usage_to_params(params)
|
|
544
|
+
|
|
545
|
+
response_contents = [""] * params.get("n", 1)
|
|
546
|
+
finish_reasons = [""] * params.get("n", 1)
|
|
547
|
+
completion_tokens = 0
|
|
548
|
+
|
|
549
|
+
# Prepare for potential function call
|
|
550
|
+
full_function_call: dict[str, Any] | None = None
|
|
551
|
+
full_tool_calls: list[dict[str, Any] | None] | None = None
|
|
552
|
+
|
|
553
|
+
# Send the chat completion request to OpenAI's API and process the response in chunks
|
|
554
|
+
chunks_id: str = ""
|
|
555
|
+
chunks_model: str = ""
|
|
556
|
+
chunks_created: int = 0
|
|
557
|
+
chunks_usage_prompt_tokens: int = 0
|
|
558
|
+
chunks_usage_completion_tokens: int = 0
|
|
559
|
+
for chunk in create_or_parse(**params):
|
|
560
|
+
if not isinstance(chunk, ChatCompletionChunk):
|
|
561
|
+
logger.debug(f"Skipping unexpected chunk type: {type(chunk)}")
|
|
562
|
+
continue
|
|
563
|
+
|
|
564
|
+
chunk_cc: ChatCompletionChunk = chunk
|
|
565
|
+
if chunk_cc.choices:
|
|
566
|
+
for choice in chunk_cc.choices:
|
|
567
|
+
content = choice.delta.content
|
|
568
|
+
tool_calls_chunks = choice.delta.tool_calls
|
|
569
|
+
finish_reasons[choice.index] = choice.finish_reason
|
|
570
|
+
|
|
571
|
+
# todo: remove this after function calls are removed from the API
|
|
572
|
+
# the code should work regardless of whether function calls are removed or not, but test_chat_functions_stream should fail
|
|
573
|
+
# begin block
|
|
574
|
+
function_call_chunk = (
|
|
575
|
+
choice.delta.function_call if hasattr(choice.delta, "function_call") else None
|
|
576
|
+
)
|
|
577
|
+
# Handle function call
|
|
578
|
+
if function_call_chunk:
|
|
579
|
+
# Handle function call
|
|
580
|
+
if function_call_chunk:
|
|
581
|
+
full_function_call, completion_tokens = OpenAIWrapper._update_function_call_from_chunk(
|
|
582
|
+
function_call_chunk, full_function_call, completion_tokens
|
|
583
|
+
)
|
|
584
|
+
if not content:
|
|
585
|
+
continue
|
|
586
|
+
# end block
|
|
587
|
+
|
|
588
|
+
# Handle tool calls
|
|
589
|
+
if tool_calls_chunks:
|
|
590
|
+
for tool_calls_chunk in tool_calls_chunks:
|
|
591
|
+
# the current tool call to be reconstructed
|
|
592
|
+
ix = tool_calls_chunk.index
|
|
593
|
+
if full_tool_calls is None:
|
|
594
|
+
full_tool_calls = []
|
|
595
|
+
if ix >= len(full_tool_calls):
|
|
596
|
+
# in case ix is not sequential
|
|
597
|
+
full_tool_calls = full_tool_calls + [None] * (ix - len(full_tool_calls) + 1)
|
|
598
|
+
|
|
599
|
+
full_tool_calls[ix], completion_tokens = OpenAIWrapper._update_tool_calls_from_chunk(
|
|
600
|
+
tool_calls_chunk, full_tool_calls[ix], completion_tokens
|
|
601
|
+
)
|
|
602
|
+
if not content:
|
|
603
|
+
continue
|
|
604
|
+
|
|
605
|
+
# End handle tool calls
|
|
606
|
+
|
|
607
|
+
# If content is present, print it to the terminal and update response variables
|
|
608
|
+
if content is not None:
|
|
609
|
+
iostream.send(StreamEvent(content=content))
|
|
610
|
+
response_contents[choice.index] += content
|
|
611
|
+
completion_tokens += 1
|
|
612
|
+
else:
|
|
613
|
+
pass
|
|
614
|
+
else:
|
|
615
|
+
if chunk_cc.usage:
|
|
616
|
+
# Usage will be in the last chunk as we have set include_usage=True on stream_options
|
|
617
|
+
chunks_usage_prompt_tokens = getattr(chunk_cc.usage, "prompt_tokens", 0)
|
|
618
|
+
chunks_usage_completion_tokens = getattr(chunk_cc.usage, "completion_tokens", 0)
|
|
619
|
+
|
|
620
|
+
if not chunks_id:
|
|
621
|
+
chunks_id = chunk_cc.id
|
|
622
|
+
chunks_model = chunk_cc.model
|
|
623
|
+
chunks_created = chunk_cc.created
|
|
624
|
+
|
|
625
|
+
# Prepare the final ChatCompletion object based on the accumulated data
|
|
626
|
+
response = ChatCompletion(
|
|
627
|
+
id=chunks_id,
|
|
628
|
+
model=chunks_model,
|
|
629
|
+
created=chunks_created,
|
|
630
|
+
object="chat.completion",
|
|
631
|
+
choices=[],
|
|
632
|
+
usage=CompletionUsage(
|
|
633
|
+
prompt_tokens=chunks_usage_prompt_tokens,
|
|
634
|
+
completion_tokens=chunks_usage_completion_tokens,
|
|
635
|
+
total_tokens=chunks_usage_prompt_tokens + chunks_usage_completion_tokens,
|
|
636
|
+
),
|
|
637
|
+
)
|
|
638
|
+
for i in range(len(response_contents)):
|
|
639
|
+
if openai_version >= "1.5": # pragma: no cover
|
|
640
|
+
# OpenAI versions 1.5.0 and above
|
|
641
|
+
choice = Choice(
|
|
642
|
+
index=i,
|
|
643
|
+
finish_reason=finish_reasons[i],
|
|
644
|
+
message=ChatCompletionMessage(
|
|
645
|
+
role="assistant",
|
|
646
|
+
content=response_contents[i],
|
|
647
|
+
function_call=full_function_call,
|
|
648
|
+
tool_calls=full_tool_calls,
|
|
649
|
+
),
|
|
650
|
+
logprobs=None,
|
|
651
|
+
)
|
|
652
|
+
else:
|
|
653
|
+
# OpenAI versions below 1.5.0
|
|
654
|
+
choice = Choice( # type: ignore [call-arg]
|
|
655
|
+
index=i,
|
|
656
|
+
finish_reason=finish_reasons[i],
|
|
657
|
+
message=ChatCompletionMessage(
|
|
658
|
+
role="assistant",
|
|
659
|
+
content=response_contents[i],
|
|
660
|
+
function_call=full_function_call,
|
|
661
|
+
tool_calls=full_tool_calls,
|
|
662
|
+
),
|
|
663
|
+
)
|
|
664
|
+
|
|
665
|
+
response.choices.append(choice)
|
|
666
|
+
else:
|
|
667
|
+
# If streaming is not enabled, send a regular chat completion request
|
|
668
|
+
params = params.copy()
|
|
669
|
+
if is_o1:
|
|
670
|
+
# add a warning that model does not support stream
|
|
671
|
+
if params.get("stream", False):
|
|
672
|
+
warnings.warn(
|
|
673
|
+
f"The {params.get('model')} model does not support streaming. The stream will be set to False."
|
|
674
|
+
)
|
|
675
|
+
if "tools" in params:
|
|
676
|
+
if params["tools"]: # If tools exist, raise as unsupported
|
|
677
|
+
raise ModelToolNotSupportedError(params.get("model"))
|
|
678
|
+
else:
|
|
679
|
+
params.pop("tools") # Remove empty tools list
|
|
680
|
+
self._process_reasoning_model_params(params)
|
|
681
|
+
params["stream"] = False
|
|
682
|
+
response = create_or_parse(**params)
|
|
683
|
+
# remove the system_message from the response and add it in the prompt at the start.
|
|
684
|
+
if is_o1:
|
|
685
|
+
for msg in params["messages"]:
|
|
686
|
+
if msg["role"] == "user" and msg["content"].startswith("System message: "):
|
|
687
|
+
msg["role"] = "system"
|
|
688
|
+
msg["content"] = msg["content"][len("System message: ") :]
|
|
689
|
+
|
|
690
|
+
return response
|
|
691
|
+
|
|
692
|
+
def _process_reasoning_model_params(self, params: dict[str, Any]) -> None:
|
|
693
|
+
"""Cater for the reasoning model (o1, o3..) parameters
|
|
694
|
+
please refer: https://platform.openai.com/docs/guides/reasoning#limitations
|
|
695
|
+
"""
|
|
696
|
+
# Unsupported parameters
|
|
697
|
+
unsupported_params = [
|
|
698
|
+
"temperature",
|
|
699
|
+
"top_p",
|
|
700
|
+
"frequency_penalty",
|
|
701
|
+
"presence_penalty",
|
|
702
|
+
"logprobs",
|
|
703
|
+
"top_logprobs",
|
|
704
|
+
"logit_bias",
|
|
705
|
+
]
|
|
706
|
+
model_name = params.get("model")
|
|
707
|
+
for param in unsupported_params:
|
|
708
|
+
if param in params:
|
|
709
|
+
warnings.warn(f"`{param}` is not supported with {model_name} model and will be ignored.")
|
|
710
|
+
params.pop(param)
|
|
711
|
+
# Replace max_tokens with max_completion_tokens as reasoning tokens are now factored in
|
|
712
|
+
# and max_tokens isn't valid
|
|
713
|
+
if "max_tokens" in params:
|
|
714
|
+
params["max_completion_tokens"] = params.pop("max_tokens")
|
|
715
|
+
|
|
716
|
+
# TODO - When o1-mini and o1-preview point to newer models (e.g. 2024-12-...), remove them from this list but leave the 2024-09-12 dated versions
|
|
717
|
+
system_not_allowed = model_name in ("o1-mini", "o1-preview", "o1-mini-2024-09-12", "o1-preview-2024-09-12")
|
|
718
|
+
|
|
719
|
+
if "messages" in params and system_not_allowed:
|
|
720
|
+
# o1-mini (2024-09-12) and o1-preview (2024-09-12) don't support role='system' messages, only 'user' and 'assistant'
|
|
721
|
+
# replace the system messages with user messages preappended with "System message: "
|
|
722
|
+
for msg in params["messages"]:
|
|
723
|
+
if msg["role"] == "system":
|
|
724
|
+
msg["role"] = "user"
|
|
725
|
+
msg["content"] = f"System message: {msg['content']}"
|
|
726
|
+
|
|
727
|
+
def cost(self, response: ChatCompletion | Completion) -> float:
|
|
728
|
+
"""Calculate the cost of the response."""
|
|
729
|
+
model = response.model
|
|
730
|
+
if model not in OAI_PRICE1K:
|
|
731
|
+
# log warning that the model is not found
|
|
732
|
+
logger.warning(
|
|
733
|
+
f'Model {model} is not found. The cost will be 0. In your config_list, add field {{"price" : [prompt_price_per_1k, completion_token_price_per_1k]}} for customized pricing.'
|
|
734
|
+
)
|
|
735
|
+
return 0
|
|
736
|
+
|
|
737
|
+
n_input_tokens = response.usage.prompt_tokens if response.usage is not None else 0 # type: ignore [union-attr]
|
|
738
|
+
n_output_tokens = response.usage.completion_tokens if response.usage is not None else 0 # type: ignore [union-attr]
|
|
739
|
+
if n_output_tokens is None:
|
|
740
|
+
n_output_tokens = 0
|
|
741
|
+
tmp_price1K = OAI_PRICE1K[model] # noqa: N806
|
|
742
|
+
# First value is input token rate, second value is output token rate
|
|
743
|
+
if isinstance(tmp_price1K, tuple):
|
|
744
|
+
return (tmp_price1K[0] * n_input_tokens + tmp_price1K[1] * n_output_tokens) / 1000 # type: ignore [no-any-return]
|
|
745
|
+
return tmp_price1K * (n_input_tokens + n_output_tokens) / 1000 # type: ignore [operator]
|
|
746
|
+
|
|
747
|
+
@staticmethod
|
|
748
|
+
def get_usage(response: ChatCompletion | Completion) -> dict[str, Any]:
|
|
749
|
+
return {
|
|
750
|
+
"prompt_tokens": response.usage.prompt_tokens if response.usage is not None else 0,
|
|
751
|
+
"completion_tokens": response.usage.completion_tokens if response.usage is not None else 0,
|
|
752
|
+
"total_tokens": response.usage.total_tokens if response.usage is not None else 0,
|
|
753
|
+
"cost": response.cost if hasattr(response, "cost") else 0,
|
|
754
|
+
"model": response.model,
|
|
755
|
+
}
|
|
756
|
+
|
|
757
|
+
|
|
758
|
+
@export_module("autogen")
|
|
759
|
+
class OpenAIWrapper:
|
|
760
|
+
"""A wrapper class for openai client."""
|
|
761
|
+
|
|
762
|
+
extra_kwargs = {
|
|
763
|
+
"agent",
|
|
764
|
+
"cache",
|
|
765
|
+
"cache_seed",
|
|
766
|
+
"filter_func",
|
|
767
|
+
"allow_format_str_template",
|
|
768
|
+
"context",
|
|
769
|
+
"api_version",
|
|
770
|
+
"api_type",
|
|
771
|
+
"tags",
|
|
772
|
+
"price",
|
|
773
|
+
}
|
|
774
|
+
|
|
775
|
+
@property
|
|
776
|
+
def openai_kwargs(self) -> set[str]:
|
|
777
|
+
if openai_result.is_successful:
|
|
778
|
+
return set(inspect.getfullargspec(OpenAI.__init__).kwonlyargs) | set(
|
|
779
|
+
inspect.getfullargspec(AzureOpenAI.__init__).kwonlyargs
|
|
780
|
+
)
|
|
781
|
+
else:
|
|
782
|
+
return OPENAI_FALLBACK_KWARGS | AOPENAI_FALLBACK_KWARGS
|
|
783
|
+
|
|
784
|
+
total_usage_summary: dict[str, Any] | None = None
|
|
785
|
+
actual_usage_summary: dict[str, Any] | None = None
|
|
786
|
+
|
|
787
|
+
def __init__(
|
|
788
|
+
self,
|
|
789
|
+
*,
|
|
790
|
+
config_list: list[dict[str, Any]] | None = None,
|
|
791
|
+
**base_config: Any,
|
|
792
|
+
):
|
|
793
|
+
"""Initialize the OpenAIWrapper.
|
|
794
|
+
|
|
795
|
+
Args:
|
|
796
|
+
config_list: a list of config dicts to override the base_config.
|
|
797
|
+
They can contain additional kwargs as allowed in the [create](https://docs.ag2.ai/latest/docs/api-reference/autogen/OpenAIWrapper/#autogen.OpenAIWrapper.create) method. E.g.,
|
|
798
|
+
|
|
799
|
+
```python
|
|
800
|
+
config_list = [
|
|
801
|
+
{
|
|
802
|
+
"model": "gpt-4",
|
|
803
|
+
"api_key": os.environ.get("AZURE_OPENAI_API_KEY"),
|
|
804
|
+
"api_type": "azure",
|
|
805
|
+
"base_url": os.environ.get("AZURE_OPENAI_API_BASE"),
|
|
806
|
+
"api_version": "2024-02-01",
|
|
807
|
+
},
|
|
808
|
+
{
|
|
809
|
+
"model": "gpt-3.5-turbo",
|
|
810
|
+
"api_key": os.environ.get("OPENAI_API_KEY"),
|
|
811
|
+
"base_url": "https://api.openai.com/v1",
|
|
812
|
+
},
|
|
813
|
+
{
|
|
814
|
+
"model": "llama-7B",
|
|
815
|
+
"base_url": "http://127.0.0.1:8080",
|
|
816
|
+
},
|
|
817
|
+
]
|
|
818
|
+
```
|
|
819
|
+
|
|
820
|
+
base_config: base config. It can contain both keyword arguments for openai client
|
|
821
|
+
and additional kwargs.
|
|
822
|
+
When using OpenAI or Azure OpenAI endpoints, please specify a non-empty 'model' either in `base_config` or in each config of `config_list`.
|
|
823
|
+
"""
|
|
824
|
+
if logging_enabled():
|
|
825
|
+
log_new_wrapper(self, locals())
|
|
826
|
+
openai_config, extra_kwargs = self._separate_openai_config(base_config)
|
|
827
|
+
# It's OK if "model" is not provided in base_config or config_list
|
|
828
|
+
# Because one can provide "model" at `create` time.
|
|
829
|
+
|
|
830
|
+
self._clients: list[ModelClient] = []
|
|
831
|
+
self._config_list: list[dict[str, Any]] = []
|
|
832
|
+
|
|
833
|
+
# Determine routing_method from base_config only.
|
|
834
|
+
self.routing_method = base_config.get("routing_method") or "fixed_order"
|
|
835
|
+
self._round_robin_index = 0
|
|
836
|
+
|
|
837
|
+
# Response metadata storage (for serializable responses)
|
|
838
|
+
# Store metadata separately instead of mutating response objects
|
|
839
|
+
self._response_metadata: dict[str, dict[str, Any]] = {} # response_id → metadata
|
|
840
|
+
self._response_buffer: deque[str] = deque(maxlen=100) # Circular buffer of response IDs
|
|
841
|
+
self._response_buffer_size = base_config.get("response_buffer_size", 100)
|
|
842
|
+
if self._response_buffer_size != 100:
|
|
843
|
+
self._response_buffer = deque(maxlen=self._response_buffer_size)
|
|
844
|
+
|
|
845
|
+
# Remove routing_method from extra_kwargs after it has been used to set self.routing_method
|
|
846
|
+
# This ensures it's not part of the individual client configurations that are based on extra_kwargs.
|
|
847
|
+
extra_kwargs.pop("routing_method", None)
|
|
848
|
+
|
|
849
|
+
if config_list:
|
|
850
|
+
config_list = [config.copy() for config in config_list] # make a copy before modifying
|
|
851
|
+
for config_item in config_list:
|
|
852
|
+
self._register_default_client(config_item, openai_config)
|
|
853
|
+
# Construct current_config_extra_kwargs using the cleaned extra_kwargs
|
|
854
|
+
# (which doesn't have routing_method from base_config)
|
|
855
|
+
# and specific non-openai kwargs from config_item.
|
|
856
|
+
config_item_specific_extras = {k: v for k, v in config_item.items() if k not in self.openai_kwargs}
|
|
857
|
+
self._config_list.append({**extra_kwargs, **config_item_specific_extras})
|
|
858
|
+
else:
|
|
859
|
+
# For a single config passed via base_config (already in extra_kwargs)
|
|
860
|
+
self._register_default_client(extra_kwargs, openai_config)
|
|
861
|
+
# extra_kwargs has already had routing_method popped.
|
|
862
|
+
self._config_list = [extra_kwargs]
|
|
863
|
+
|
|
864
|
+
self.wrapper_id = id(self)
|
|
865
|
+
|
|
866
|
+
def _separate_openai_config(self, config: dict[str, Any]) -> tuple[dict[str, Any], dict[str, Any]]:
|
|
867
|
+
"""Separate the config into openai_config and extra_kwargs."""
|
|
868
|
+
openai_config = {k: v for k, v in config.items() if k in self.openai_kwargs}
|
|
869
|
+
extra_kwargs = {k: v for k, v in config.items() if k not in self.openai_kwargs}
|
|
870
|
+
return openai_config, extra_kwargs
|
|
871
|
+
|
|
872
|
+
def _separate_create_config(self, config: dict[str, Any]) -> tuple[dict[str, Any], dict[str, Any]]:
|
|
873
|
+
"""Separate the config into create_config and extra_kwargs."""
|
|
874
|
+
create_config = {k: v for k, v in config.items() if k not in self.extra_kwargs}
|
|
875
|
+
extra_kwargs = {k: v for k, v in config.items() if k in self.extra_kwargs}
|
|
876
|
+
return create_config, extra_kwargs
|
|
877
|
+
|
|
878
|
+
def _store_response_metadata(
|
|
879
|
+
self, response_id: str, client: ModelClient, config_id: int, pass_filter: bool
|
|
880
|
+
) -> None:
|
|
881
|
+
"""Store response metadata with circular buffer to prevent memory overflow.
|
|
882
|
+
|
|
883
|
+
Args:
|
|
884
|
+
response_id: Unique ID of the response (response.id)
|
|
885
|
+
client: ModelClient that generated the response
|
|
886
|
+
config_id: Index of the client in config_list
|
|
887
|
+
pass_filter: Whether the response passed the filter function
|
|
888
|
+
"""
|
|
889
|
+
# If buffer is full, remove oldest entry
|
|
890
|
+
if len(self._response_buffer) >= self._response_buffer_size:
|
|
891
|
+
oldest_id = self._response_buffer[0] # Will be auto-removed by deque
|
|
892
|
+
self._response_metadata.pop(oldest_id, None)
|
|
893
|
+
|
|
894
|
+
# Add new metadata
|
|
895
|
+
self._response_metadata[response_id] = {
|
|
896
|
+
"client": client,
|
|
897
|
+
"config_id": config_id,
|
|
898
|
+
"pass_filter": pass_filter,
|
|
899
|
+
}
|
|
900
|
+
self._response_buffer.append(response_id)
|
|
901
|
+
|
|
902
|
+
def _configure_azure_openai(self, config: dict[str, Any], openai_config: dict[str, Any]) -> None:
|
|
903
|
+
openai_config["azure_deployment"] = openai_config.get("azure_deployment", config.get("model"))
|
|
904
|
+
openai_config["azure_endpoint"] = openai_config.get("azure_endpoint", openai_config.pop("base_url", None))
|
|
905
|
+
|
|
906
|
+
# Create a default Azure token provider if requested
|
|
907
|
+
if openai_config.get("azure_ad_token_provider") == "DEFAULT":
|
|
908
|
+
import azure.identity
|
|
909
|
+
|
|
910
|
+
openai_config["azure_ad_token_provider"] = azure.identity.get_bearer_token_provider(
|
|
911
|
+
azure.identity.DefaultAzureCredential(), "https://cognitiveservices.azure.com/.default"
|
|
912
|
+
)
|
|
913
|
+
|
|
914
|
+
def _configure_openai_config_for_bedrock(self, config: dict[str, Any], openai_config: dict[str, Any]) -> None:
|
|
915
|
+
"""Update openai_config with AWS credentials from config."""
|
|
916
|
+
required_keys = ["aws_access_key", "aws_secret_key", "aws_region"]
|
|
917
|
+
optional_keys = ["aws_session_token", "aws_profile_name"]
|
|
918
|
+
for key in required_keys:
|
|
919
|
+
if key in config:
|
|
920
|
+
openai_config[key] = config[key]
|
|
921
|
+
for key in optional_keys:
|
|
922
|
+
if key in config:
|
|
923
|
+
openai_config[key] = config[key]
|
|
924
|
+
|
|
925
|
+
def _configure_openai_config_for_vertextai(self, config: dict[str, Any], openai_config: dict[str, Any]) -> None:
|
|
926
|
+
"""Update openai_config with Google credentials from config."""
|
|
927
|
+
required_keys = ["gcp_project_id", "gcp_region", "gcp_auth_token"]
|
|
928
|
+
for key in required_keys:
|
|
929
|
+
if key in config:
|
|
930
|
+
openai_config[key] = config[key]
|
|
931
|
+
|
|
932
|
+
def _configure_openai_config_for_gemini(self, config: dict[str, Any], openai_config: dict[str, Any]) -> None:
|
|
933
|
+
"""Update openai_config with additional gemini genai configs."""
|
|
934
|
+
optional_keys = ["proxy"]
|
|
935
|
+
for key in optional_keys:
|
|
936
|
+
if key in config:
|
|
937
|
+
openai_config[key] = config[key]
|
|
938
|
+
|
|
939
|
+
def _register_default_client(self, config: dict[str, Any], openai_config: dict[str, Any]) -> None:
|
|
940
|
+
"""Create a client with the given config to override openai_config,
|
|
941
|
+
after removing extra kwargs.
|
|
942
|
+
|
|
943
|
+
For Azure models/deployment names there's a convenience modification of model removing dots in
|
|
944
|
+
the it's value (Azure deployment names can't have dots). I.e. if you have Azure deployment name
|
|
945
|
+
"gpt-35-turbo" and define model "gpt-3.5-turbo" in the config the function will remove the dot
|
|
946
|
+
from the name and create a client that connects to "gpt-35-turbo" Azure deployment.
|
|
947
|
+
"""
|
|
948
|
+
openai_config = {**openai_config, **{k: v for k, v in config.items() if k in self.openai_kwargs}}
|
|
949
|
+
api_type = config.get("api_type")
|
|
950
|
+
model_client_cls_name = config.get("model_client_cls")
|
|
951
|
+
response_format = config.get("response_format")
|
|
952
|
+
if model_client_cls_name is not None:
|
|
953
|
+
# a config for a custom client is set
|
|
954
|
+
# adding placeholder until the register_model_client is called with the appropriate class
|
|
955
|
+
self._clients.append(PlaceHolderClient(config))
|
|
956
|
+
# codeql[py/clear-text-logging-sensitive-data]
|
|
957
|
+
logger.info(
|
|
958
|
+
f"Detected custom model client in config: {model_client_cls_name}, model client can not be used until register_model_client is called."
|
|
959
|
+
)
|
|
960
|
+
# TODO: logging for custom client
|
|
961
|
+
else:
|
|
962
|
+
if api_type is not None and api_type.startswith("azure"):
|
|
963
|
+
|
|
964
|
+
@require_optional_import("openai>=1.66.2", "openai")
|
|
965
|
+
def create_azure_openai_client() -> AzureOpenAI:
|
|
966
|
+
self._configure_azure_openai(config, openai_config)
|
|
967
|
+
client = AzureOpenAI(**openai_config)
|
|
968
|
+
self._clients.append(OpenAIClient(client, response_format=response_format)) # type: ignore[arg-type]
|
|
969
|
+
return client
|
|
970
|
+
|
|
971
|
+
client = create_azure_openai_client()
|
|
972
|
+
elif api_type is not None and api_type.startswith("cerebras"):
|
|
973
|
+
if cerebras_import_exception:
|
|
974
|
+
raise ImportError("Please install `cerebras_cloud_sdk` to use Cerebras OpenAI API.")
|
|
975
|
+
client = CerebrasClient(response_format=response_format, **openai_config)
|
|
976
|
+
self._clients.append(client) # type: ignore[arg-type]
|
|
977
|
+
elif api_type is not None and api_type.startswith("google"):
|
|
978
|
+
if gemini_import_exception:
|
|
979
|
+
raise ImportError("Please install `google-genai` and 'vertexai' to use Google's API.")
|
|
980
|
+
self._configure_openai_config_for_gemini(config, openai_config)
|
|
981
|
+
client = GeminiClient(response_format=response_format, **openai_config)
|
|
982
|
+
self._clients.append(client) # type: ignore[arg-type]
|
|
983
|
+
elif api_type is not None and api_type.startswith("anthropic"):
|
|
984
|
+
if "api_key" not in config and "aws_region" in config:
|
|
985
|
+
self._configure_openai_config_for_bedrock(config, openai_config)
|
|
986
|
+
elif "api_key" not in config and "gcp_region" in config:
|
|
987
|
+
self._configure_openai_config_for_vertextai(config, openai_config)
|
|
988
|
+
if anthropic_import_exception:
|
|
989
|
+
raise ImportError("Please install `anthropic` to use Anthropic API.")
|
|
990
|
+
client = AnthropicClient(response_format=response_format, **openai_config)
|
|
991
|
+
self._clients.append(client) # type: ignore[arg-type]
|
|
992
|
+
elif api_type is not None and api_type.startswith("mistral"):
|
|
993
|
+
if mistral_import_exception:
|
|
994
|
+
raise ImportError("Please install `mistralai` to use the Mistral.AI API.")
|
|
995
|
+
client = MistralAIClient(response_format=response_format, **openai_config)
|
|
996
|
+
self._clients.append(client) # type: ignore[arg-type]
|
|
997
|
+
elif api_type is not None and api_type.startswith("together"):
|
|
998
|
+
if together_import_exception:
|
|
999
|
+
raise ImportError("Please install `together` to use the Together.AI API.")
|
|
1000
|
+
client = TogetherClient(response_format=response_format, **openai_config)
|
|
1001
|
+
self._clients.append(client) # type: ignore[arg-type]
|
|
1002
|
+
elif api_type is not None and api_type.startswith("groq"):
|
|
1003
|
+
if groq_import_exception:
|
|
1004
|
+
raise ImportError("Please install `groq` to use the Groq API.")
|
|
1005
|
+
client = GroqClient(response_format=response_format, **openai_config)
|
|
1006
|
+
self._clients.append(client) # type: ignore[arg-type]
|
|
1007
|
+
elif api_type is not None and api_type.startswith("cohere"):
|
|
1008
|
+
if cohere_import_exception:
|
|
1009
|
+
raise ImportError("Please install `cohere` to use the Cohere API.")
|
|
1010
|
+
client = CohereClient(response_format=response_format, **openai_config)
|
|
1011
|
+
self._clients.append(client) # type: ignore[arg-type]
|
|
1012
|
+
elif api_type is not None and api_type.startswith("ollama"):
|
|
1013
|
+
if ollama_import_exception:
|
|
1014
|
+
raise ImportError("Please install `ollama` and `fix-busted-json` to use the Ollama API.")
|
|
1015
|
+
client = OllamaClient(response_format=response_format, **openai_config)
|
|
1016
|
+
self._clients.append(client) # type: ignore[arg-type]
|
|
1017
|
+
elif api_type is not None and api_type.startswith("bedrock"):
|
|
1018
|
+
self._configure_openai_config_for_bedrock(config, openai_config)
|
|
1019
|
+
if bedrock_import_exception:
|
|
1020
|
+
raise ImportError("Please install `boto3` to use the Amazon Bedrock API.")
|
|
1021
|
+
client = BedrockClient(response_format=response_format, **openai_config)
|
|
1022
|
+
self._clients.append(client) # type: ignore[arg-type]
|
|
1023
|
+
elif api_type is not None and api_type.startswith("openai_v2"):
|
|
1024
|
+
# OpenAI V2 Client with ModelClientV2 architecture (rich UnifiedResponse)
|
|
1025
|
+
from autogen.llm_clients import OpenAICompletionsClient as V2Client
|
|
1026
|
+
|
|
1027
|
+
v2_client = V2Client(
|
|
1028
|
+
api_key=openai_config.get("api_key"),
|
|
1029
|
+
base_url=openai_config.get("base_url"),
|
|
1030
|
+
timeout=openai_config.get("timeout", 60.0),
|
|
1031
|
+
response_format=response_format,
|
|
1032
|
+
)
|
|
1033
|
+
self._clients.append(v2_client) # type: ignore[arg-type]
|
|
1034
|
+
client = v2_client
|
|
1035
|
+
elif api_type is not None and api_type.startswith("responses"):
|
|
1036
|
+
# OpenAI Responses API (stateful). Reuse the same OpenAI SDK but call the `/responses` endpoint via the new client.
|
|
1037
|
+
@require_optional_import("openai>=1.66.2", "openai")
|
|
1038
|
+
def create_responses_client() -> OpenAI:
|
|
1039
|
+
client = OpenAI(**openai_config)
|
|
1040
|
+
self._clients.append(OpenAIResponsesClient(client, response_format=response_format)) # type: ignore[arg-type]
|
|
1041
|
+
return client
|
|
1042
|
+
|
|
1043
|
+
client = create_responses_client()
|
|
1044
|
+
else:
|
|
1045
|
+
|
|
1046
|
+
@require_optional_import("openai>=1.66.2", "openai")
|
|
1047
|
+
def create_openai_client() -> OpenAI:
|
|
1048
|
+
client = OpenAI(**openai_config)
|
|
1049
|
+
self._clients.append(OpenAIClient(client, response_format)) # type: ignore[arg-type]
|
|
1050
|
+
return client
|
|
1051
|
+
|
|
1052
|
+
client = create_openai_client()
|
|
1053
|
+
|
|
1054
|
+
if logging_enabled():
|
|
1055
|
+
log_new_client(client, self, openai_config)
|
|
1056
|
+
|
|
1057
|
+
def register_model_client(self, model_client_cls: ModelClient, **kwargs: Any):
|
|
1058
|
+
"""Register a model client.
|
|
1059
|
+
|
|
1060
|
+
Args:
|
|
1061
|
+
model_client_cls: A custom client class that follows the ModelClient interface
|
|
1062
|
+
kwargs: The kwargs for the custom client class to be initialized with
|
|
1063
|
+
"""
|
|
1064
|
+
existing_client_class = False
|
|
1065
|
+
for i, client in enumerate(self._clients):
|
|
1066
|
+
if isinstance(client, PlaceHolderClient):
|
|
1067
|
+
placeholder_config = client.config
|
|
1068
|
+
|
|
1069
|
+
if placeholder_config.get("model_client_cls") == model_client_cls.__name__:
|
|
1070
|
+
self._clients[i] = model_client_cls(placeholder_config, **kwargs)
|
|
1071
|
+
return
|
|
1072
|
+
elif isinstance(client, model_client_cls):
|
|
1073
|
+
existing_client_class = True
|
|
1074
|
+
|
|
1075
|
+
if existing_client_class:
|
|
1076
|
+
logger.warning(
|
|
1077
|
+
f"Model client {model_client_cls.__name__} is already registered. Add more entries in the config_list to use multiple model clients."
|
|
1078
|
+
)
|
|
1079
|
+
else:
|
|
1080
|
+
raise ValueError(
|
|
1081
|
+
f'Model client "{model_client_cls.__name__}" is being registered but was not found in the config_list. '
|
|
1082
|
+
f'Please make sure to include an entry in the config_list with "model_client_cls": "{model_client_cls.__name__}"'
|
|
1083
|
+
)
|
|
1084
|
+
|
|
1085
|
+
@classmethod
|
|
1086
|
+
def instantiate(
|
|
1087
|
+
cls,
|
|
1088
|
+
template: str | Callable[[dict[str, Any]], str] | None,
|
|
1089
|
+
context: dict[str, Any] | None = None,
|
|
1090
|
+
allow_format_str_template: bool | None = False,
|
|
1091
|
+
) -> str | None:
|
|
1092
|
+
if not context or template is None:
|
|
1093
|
+
return template # type: ignore [return-value]
|
|
1094
|
+
if isinstance(template, str):
|
|
1095
|
+
return template.format(**context) if allow_format_str_template else template
|
|
1096
|
+
return template(context)
|
|
1097
|
+
|
|
1098
|
+
def _construct_create_params(self, create_config: dict[str, Any], extra_kwargs: dict[str, Any]) -> dict[str, Any]:
|
|
1099
|
+
"""Prime the create_config with additional_kwargs."""
|
|
1100
|
+
# Validate the config
|
|
1101
|
+
prompt: str | None = create_config.get("prompt")
|
|
1102
|
+
messages: list[dict[str, Any]] | None = create_config.get("messages")
|
|
1103
|
+
if (prompt is None) == (messages is None):
|
|
1104
|
+
raise ValueError("Either prompt or messages should be in create config but not both.")
|
|
1105
|
+
context = extra_kwargs.get("context")
|
|
1106
|
+
if context is None:
|
|
1107
|
+
# No need to instantiate if no context is provided.
|
|
1108
|
+
return create_config
|
|
1109
|
+
# Instantiate the prompt or messages
|
|
1110
|
+
allow_format_str_template = extra_kwargs.get("allow_format_str_template", False)
|
|
1111
|
+
# Make a copy of the config
|
|
1112
|
+
params = create_config.copy()
|
|
1113
|
+
if prompt is not None:
|
|
1114
|
+
# Instantiate the prompt
|
|
1115
|
+
params["prompt"] = self.instantiate(prompt, context, allow_format_str_template)
|
|
1116
|
+
elif context:
|
|
1117
|
+
# Instantiate the messages
|
|
1118
|
+
params["messages"] = [
|
|
1119
|
+
(
|
|
1120
|
+
{
|
|
1121
|
+
**m,
|
|
1122
|
+
"content": self.instantiate(m["content"], context, allow_format_str_template),
|
|
1123
|
+
}
|
|
1124
|
+
if m.get("content")
|
|
1125
|
+
else m
|
|
1126
|
+
)
|
|
1127
|
+
for m in messages # type: ignore [union-attr]
|
|
1128
|
+
]
|
|
1129
|
+
return params
|
|
1130
|
+
|
|
1131
|
+
def create(self, **config: Any) -> ModelClient.ModelClientResponseProtocol:
|
|
1132
|
+
"""Make a completion for a given config using available clients.
|
|
1133
|
+
Besides the kwargs allowed in openai's [or other] client, we allow the following additional kwargs.
|
|
1134
|
+
The config in each client will be overridden by the config.
|
|
1135
|
+
|
|
1136
|
+
Args:
|
|
1137
|
+
**config: The config for the completion.
|
|
1138
|
+
|
|
1139
|
+
Raises:
|
|
1140
|
+
RuntimeError: If all declared custom model clients are not registered
|
|
1141
|
+
APIError: If any model client create call raises an APIError
|
|
1142
|
+
"""
|
|
1143
|
+
# if ERROR:
|
|
1144
|
+
# raise ERROR
|
|
1145
|
+
invocation_id = str(uuid.uuid4())
|
|
1146
|
+
last = len(self._clients) - 1
|
|
1147
|
+
# Check if all configs in config list are activated
|
|
1148
|
+
non_activated = [
|
|
1149
|
+
client.config["model_client_cls"] for client in self._clients if isinstance(client, PlaceHolderClient)
|
|
1150
|
+
]
|
|
1151
|
+
if non_activated:
|
|
1152
|
+
raise RuntimeError(
|
|
1153
|
+
f"Model client(s) {non_activated} are not activated. Please register the custom model clients using `register_model_client` or filter them out form the config list."
|
|
1154
|
+
)
|
|
1155
|
+
|
|
1156
|
+
ordered_clients_indices = list(range(len(self._clients)))
|
|
1157
|
+
if self.routing_method == "round_robin" and len(self._clients) > 0:
|
|
1158
|
+
ordered_clients_indices = (
|
|
1159
|
+
ordered_clients_indices[self._round_robin_index :] + ordered_clients_indices[: self._round_robin_index]
|
|
1160
|
+
)
|
|
1161
|
+
self._round_robin_index = (self._round_robin_index + 1) % len(self._clients)
|
|
1162
|
+
|
|
1163
|
+
for i in ordered_clients_indices:
|
|
1164
|
+
# merge the input config with the i-th config in the config list
|
|
1165
|
+
client_config = self._config_list[i]
|
|
1166
|
+
full_config = merge_config_with_tools(config, client_config)
|
|
1167
|
+
|
|
1168
|
+
# separate the config into create_config and extra_kwargs
|
|
1169
|
+
create_config, extra_kwargs = self._separate_create_config(full_config)
|
|
1170
|
+
# construct the create params
|
|
1171
|
+
params = self._construct_create_params(create_config, extra_kwargs)
|
|
1172
|
+
# get the cache_seed, filter_func and context
|
|
1173
|
+
cache_seed = extra_kwargs.get("cache_seed")
|
|
1174
|
+
cache = extra_kwargs.get("cache")
|
|
1175
|
+
filter_func = extra_kwargs.get("filter_func")
|
|
1176
|
+
context = extra_kwargs.get("context")
|
|
1177
|
+
agent = extra_kwargs.get("agent")
|
|
1178
|
+
price = extra_kwargs.get("price", None)
|
|
1179
|
+
if isinstance(price, list):
|
|
1180
|
+
price = tuple(price)
|
|
1181
|
+
elif isinstance(price, (float, int)):
|
|
1182
|
+
logger.warning(
|
|
1183
|
+
"Input price is a float/int. Using the same price for prompt and completion tokens. Use a list/tuple if prompt and completion token prices are different."
|
|
1184
|
+
)
|
|
1185
|
+
price = (price, price)
|
|
1186
|
+
|
|
1187
|
+
total_usage = None
|
|
1188
|
+
actual_usage = None
|
|
1189
|
+
|
|
1190
|
+
cache_client = None
|
|
1191
|
+
if cache is not None:
|
|
1192
|
+
# Use the cache object if provided.
|
|
1193
|
+
cache_client = cache
|
|
1194
|
+
elif cache_seed is not None:
|
|
1195
|
+
# Legacy cache behavior, if cache_seed is given, use DiskCache.
|
|
1196
|
+
cache_client = Cache.disk(cache_seed, LEGACY_CACHE_DIR)
|
|
1197
|
+
|
|
1198
|
+
client = self._clients[i]
|
|
1199
|
+
log_cache_seed_value(cache if cache is not None else cache_seed, client=client)
|
|
1200
|
+
|
|
1201
|
+
if cache_client is not None:
|
|
1202
|
+
with cache_client as cache:
|
|
1203
|
+
# Try to get the response from cache
|
|
1204
|
+
key = get_key(
|
|
1205
|
+
{
|
|
1206
|
+
**params,
|
|
1207
|
+
**{"response_format": json.dumps(TypeAdapter(params["response_format"]).json_schema())},
|
|
1208
|
+
}
|
|
1209
|
+
if "response_format" in params and not isinstance(params["response_format"], dict)
|
|
1210
|
+
else params
|
|
1211
|
+
)
|
|
1212
|
+
request_ts = get_current_ts()
|
|
1213
|
+
|
|
1214
|
+
response: ChatCompletionExtended | None = cache.get(key, None)
|
|
1215
|
+
|
|
1216
|
+
if response is not None:
|
|
1217
|
+
# Backward compatibility: set message_retrieval_function for ChatCompletionExtended
|
|
1218
|
+
if hasattr(response, "message_retrieval_function"):
|
|
1219
|
+
response.message_retrieval_function = client.message_retrieval
|
|
1220
|
+
|
|
1221
|
+
try:
|
|
1222
|
+
response.cost
|
|
1223
|
+
except AttributeError:
|
|
1224
|
+
# update attribute if cost is not calculated
|
|
1225
|
+
response.cost = client.cost(response)
|
|
1226
|
+
cache.set(key, response)
|
|
1227
|
+
total_usage = client.get_usage(response)
|
|
1228
|
+
|
|
1229
|
+
if logging_enabled():
|
|
1230
|
+
# Log the cache hit
|
|
1231
|
+
# TODO: log the config_id and pass_filter etc.
|
|
1232
|
+
log_chat_completion(
|
|
1233
|
+
invocation_id=invocation_id,
|
|
1234
|
+
client_id=id(client),
|
|
1235
|
+
wrapper_id=id(self),
|
|
1236
|
+
agent=agent,
|
|
1237
|
+
request=params,
|
|
1238
|
+
response=response,
|
|
1239
|
+
is_cached=1,
|
|
1240
|
+
cost=response.cost if response.cost is not None else 0.0,
|
|
1241
|
+
start_time=request_ts,
|
|
1242
|
+
)
|
|
1243
|
+
|
|
1244
|
+
# check the filter
|
|
1245
|
+
pass_filter = filter_func is None or filter_func(context=context, response=response)
|
|
1246
|
+
if pass_filter or i == last:
|
|
1247
|
+
# Store metadata for serializable responses
|
|
1248
|
+
if hasattr(response, "id"):
|
|
1249
|
+
self._store_response_metadata(response.id, client, i, pass_filter)
|
|
1250
|
+
|
|
1251
|
+
# Backward compatibility: set attributes on ChatCompletionExtended
|
|
1252
|
+
if hasattr(response, "config_id"):
|
|
1253
|
+
response.config_id = i
|
|
1254
|
+
if hasattr(response, "pass_filter"):
|
|
1255
|
+
response.pass_filter = pass_filter
|
|
1256
|
+
self._update_usage(actual_usage=actual_usage, total_usage=total_usage)
|
|
1257
|
+
return response
|
|
1258
|
+
continue # filter is not passed; try the next config
|
|
1259
|
+
try:
|
|
1260
|
+
request_ts = get_current_ts()
|
|
1261
|
+
response = client.create(params)
|
|
1262
|
+
except Exception as e:
|
|
1263
|
+
if openai_result.is_successful:
|
|
1264
|
+
if APITimeoutError is not None and isinstance(e, APITimeoutError):
|
|
1265
|
+
# logger.debug(f"config {i} timed out", exc_info=True)
|
|
1266
|
+
if i == last:
|
|
1267
|
+
raise TimeoutError(
|
|
1268
|
+
"OpenAI API call timed out. This could be due to congestion or too small a timeout value. The timeout can be specified by setting the 'timeout' value (in seconds) in the llm_config (if you are using agents) or the OpenAIWrapper constructor (if you are using the OpenAIWrapper directly)."
|
|
1269
|
+
) from e
|
|
1270
|
+
elif APIError is not None and isinstance(e, APIError):
|
|
1271
|
+
error_code = getattr(e, "code", None)
|
|
1272
|
+
if logging_enabled():
|
|
1273
|
+
log_chat_completion(
|
|
1274
|
+
invocation_id=invocation_id,
|
|
1275
|
+
client_id=id(client),
|
|
1276
|
+
wrapper_id=id(self),
|
|
1277
|
+
agent=agent,
|
|
1278
|
+
request=params,
|
|
1279
|
+
response=f"error_code:{error_code}, config {i} failed",
|
|
1280
|
+
is_cached=0,
|
|
1281
|
+
cost=0,
|
|
1282
|
+
start_time=request_ts,
|
|
1283
|
+
)
|
|
1284
|
+
|
|
1285
|
+
if error_code == "content_filter":
|
|
1286
|
+
# raise the error for content_filter
|
|
1287
|
+
raise
|
|
1288
|
+
# logger.debug(f"config {i} failed", exc_info=True)
|
|
1289
|
+
if i == last:
|
|
1290
|
+
raise
|
|
1291
|
+
else:
|
|
1292
|
+
raise
|
|
1293
|
+
else:
|
|
1294
|
+
raise
|
|
1295
|
+
except (
|
|
1296
|
+
gemini_InternalServerError,
|
|
1297
|
+
gemini_ResourceExhausted,
|
|
1298
|
+
anthorpic_InternalServerError,
|
|
1299
|
+
anthorpic_RateLimitError,
|
|
1300
|
+
mistral_SDKError,
|
|
1301
|
+
mistral_HTTPValidationError,
|
|
1302
|
+
together_TogetherException,
|
|
1303
|
+
groq_InternalServerError,
|
|
1304
|
+
groq_RateLimitError,
|
|
1305
|
+
groq_APIConnectionError,
|
|
1306
|
+
cohere_InternalServerError,
|
|
1307
|
+
cohere_TooManyRequestsError,
|
|
1308
|
+
cohere_ServiceUnavailableError,
|
|
1309
|
+
ollama_RequestError,
|
|
1310
|
+
ollama_ResponseError,
|
|
1311
|
+
bedrock_BotoCoreError,
|
|
1312
|
+
bedrock_ClientError,
|
|
1313
|
+
cerebras_AuthenticationError,
|
|
1314
|
+
cerebras_InternalServerError,
|
|
1315
|
+
cerebras_RateLimitError,
|
|
1316
|
+
):
|
|
1317
|
+
# logger.debug(f"config {i} failed", exc_info=True)
|
|
1318
|
+
if i == last:
|
|
1319
|
+
raise
|
|
1320
|
+
else:
|
|
1321
|
+
# add cost calculation before caching no matter filter is passed or not
|
|
1322
|
+
if price is not None:
|
|
1323
|
+
response.cost = self._cost_with_customized_price(response, price)
|
|
1324
|
+
else:
|
|
1325
|
+
response.cost = client.cost(response)
|
|
1326
|
+
actual_usage = client.get_usage(response)
|
|
1327
|
+
total_usage = actual_usage.copy() if actual_usage is not None else total_usage
|
|
1328
|
+
self._update_usage(actual_usage=actual_usage, total_usage=total_usage)
|
|
1329
|
+
|
|
1330
|
+
if cache_client is not None:
|
|
1331
|
+
# Cache the response
|
|
1332
|
+
with cache_client as cache:
|
|
1333
|
+
cache.set(key, response)
|
|
1334
|
+
|
|
1335
|
+
if logging_enabled():
|
|
1336
|
+
# TODO: log the config_id and pass_filter etc.
|
|
1337
|
+
log_chat_completion(
|
|
1338
|
+
invocation_id=invocation_id,
|
|
1339
|
+
client_id=id(client),
|
|
1340
|
+
wrapper_id=id(self),
|
|
1341
|
+
agent=agent,
|
|
1342
|
+
request=params,
|
|
1343
|
+
response=response,
|
|
1344
|
+
is_cached=0,
|
|
1345
|
+
cost=response.cost,
|
|
1346
|
+
start_time=request_ts,
|
|
1347
|
+
)
|
|
1348
|
+
|
|
1349
|
+
# Store metadata instead of mutating response
|
|
1350
|
+
# Keep backward compatibility by setting message_retrieval_function for now
|
|
1351
|
+
if hasattr(response, "message_retrieval_function"):
|
|
1352
|
+
response.message_retrieval_function = client.message_retrieval
|
|
1353
|
+
|
|
1354
|
+
# check the filter
|
|
1355
|
+
pass_filter = filter_func is None or filter_func(context=context, response=response)
|
|
1356
|
+
if pass_filter or i == last:
|
|
1357
|
+
# Store metadata for serializable responses
|
|
1358
|
+
if hasattr(response, "id"):
|
|
1359
|
+
self._store_response_metadata(response.id, client, i, pass_filter)
|
|
1360
|
+
|
|
1361
|
+
# Backward compatibility: set attributes on ChatCompletionExtended
|
|
1362
|
+
if hasattr(response, "config_id"):
|
|
1363
|
+
response.config_id = i
|
|
1364
|
+
if hasattr(response, "pass_filter"):
|
|
1365
|
+
response.pass_filter = pass_filter
|
|
1366
|
+
|
|
1367
|
+
# Return the response if it passes the filter or it is the last client
|
|
1368
|
+
return response
|
|
1369
|
+
continue # filter is not passed; try the next config
|
|
1370
|
+
raise RuntimeError("Should not reach here.")
|
|
1371
|
+
|
|
1372
|
+
@staticmethod
|
|
1373
|
+
def _cost_with_customized_price(response: ChatCompletion | Completion, price_1k: tuple[float, float]) -> float:
|
|
1374
|
+
"""If a customized cost is passed, overwrite the cost in the response."""
|
|
1375
|
+
n_input_tokens = response.usage.prompt_tokens if response.usage is not None else 0
|
|
1376
|
+
n_output_tokens = response.usage.completion_tokens if response.usage is not None else 0
|
|
1377
|
+
if n_output_tokens is None:
|
|
1378
|
+
n_output_tokens = 0
|
|
1379
|
+
return (n_input_tokens * price_1k[0] + n_output_tokens * price_1k[1]) / 1000
|
|
1380
|
+
|
|
1381
|
+
@staticmethod
|
|
1382
|
+
def _update_dict_from_chunk(chunk: BaseModel, d: dict[str, Any], field: str) -> int:
|
|
1383
|
+
"""Update the dict from the chunk.
|
|
1384
|
+
|
|
1385
|
+
Reads `chunk.field` and if present updates `d[field]` accordingly.
|
|
1386
|
+
|
|
1387
|
+
Args:
|
|
1388
|
+
chunk: The chunk.
|
|
1389
|
+
d: The dict to be updated in place.
|
|
1390
|
+
field: The field.
|
|
1391
|
+
|
|
1392
|
+
Returns:
|
|
1393
|
+
The updated dict.
|
|
1394
|
+
|
|
1395
|
+
"""
|
|
1396
|
+
completion_tokens = 0
|
|
1397
|
+
assert isinstance(d, dict), d
|
|
1398
|
+
if hasattr(chunk, field) and getattr(chunk, field) is not None:
|
|
1399
|
+
new_value = getattr(chunk, field)
|
|
1400
|
+
if isinstance(new_value, (list, dict)):
|
|
1401
|
+
raise NotImplementedError(
|
|
1402
|
+
f"Field {field} is a list or dict, which is currently not supported. "
|
|
1403
|
+
"Only string and numbers are supported."
|
|
1404
|
+
)
|
|
1405
|
+
if field not in d:
|
|
1406
|
+
d[field] = ""
|
|
1407
|
+
if isinstance(new_value, str):
|
|
1408
|
+
d[field] += getattr(chunk, field)
|
|
1409
|
+
else:
|
|
1410
|
+
d[field] = new_value
|
|
1411
|
+
completion_tokens = 1
|
|
1412
|
+
|
|
1413
|
+
return completion_tokens
|
|
1414
|
+
|
|
1415
|
+
@staticmethod
|
|
1416
|
+
def _update_function_call_from_chunk(
|
|
1417
|
+
function_call_chunk: ChoiceDeltaToolCallFunction | ChoiceDeltaFunctionCall,
|
|
1418
|
+
full_function_call: dict[str, Any] | None,
|
|
1419
|
+
completion_tokens: int,
|
|
1420
|
+
) -> tuple[dict[str, Any], int]:
|
|
1421
|
+
"""Update the function call from the chunk.
|
|
1422
|
+
|
|
1423
|
+
Args:
|
|
1424
|
+
function_call_chunk: The function call chunk.
|
|
1425
|
+
full_function_call: The full function call.
|
|
1426
|
+
completion_tokens: The number of completion tokens.
|
|
1427
|
+
|
|
1428
|
+
Returns:
|
|
1429
|
+
The updated full function call and the updated number of completion tokens.
|
|
1430
|
+
|
|
1431
|
+
"""
|
|
1432
|
+
# Handle function call
|
|
1433
|
+
if function_call_chunk:
|
|
1434
|
+
if full_function_call is None:
|
|
1435
|
+
full_function_call = {}
|
|
1436
|
+
for field in ["name", "arguments"]:
|
|
1437
|
+
completion_tokens += OpenAIWrapper._update_dict_from_chunk(
|
|
1438
|
+
function_call_chunk, full_function_call, field
|
|
1439
|
+
)
|
|
1440
|
+
|
|
1441
|
+
if full_function_call:
|
|
1442
|
+
return full_function_call, completion_tokens
|
|
1443
|
+
else:
|
|
1444
|
+
raise RuntimeError("Function call is not found, this should not happen.")
|
|
1445
|
+
|
|
1446
|
+
@staticmethod
|
|
1447
|
+
def _update_tool_calls_from_chunk(
|
|
1448
|
+
tool_calls_chunk: ChoiceDeltaToolCall,
|
|
1449
|
+
full_tool_call: dict[str, Any] | None,
|
|
1450
|
+
completion_tokens: int,
|
|
1451
|
+
) -> tuple[dict[str, Any], int]:
|
|
1452
|
+
"""Update the tool call from the chunk.
|
|
1453
|
+
|
|
1454
|
+
Args:
|
|
1455
|
+
tool_calls_chunk: The tool call chunk.
|
|
1456
|
+
full_tool_call: The full tool call.
|
|
1457
|
+
completion_tokens: The number of completion tokens.
|
|
1458
|
+
|
|
1459
|
+
Returns:
|
|
1460
|
+
The updated full tool call and the updated number of completion tokens.
|
|
1461
|
+
|
|
1462
|
+
"""
|
|
1463
|
+
# future proofing for when tool calls other than function calls are supported
|
|
1464
|
+
if tool_calls_chunk.type and tool_calls_chunk.type != "function":
|
|
1465
|
+
raise NotImplementedError(
|
|
1466
|
+
f"Tool call type {tool_calls_chunk.type} is currently not supported. Only function calls are supported."
|
|
1467
|
+
)
|
|
1468
|
+
|
|
1469
|
+
# Handle tool call
|
|
1470
|
+
assert full_tool_call is None or isinstance(full_tool_call, dict), full_tool_call
|
|
1471
|
+
if tool_calls_chunk:
|
|
1472
|
+
if full_tool_call is None:
|
|
1473
|
+
full_tool_call = {}
|
|
1474
|
+
for field in ["index", "id", "type"]:
|
|
1475
|
+
completion_tokens += OpenAIWrapper._update_dict_from_chunk(tool_calls_chunk, full_tool_call, field)
|
|
1476
|
+
|
|
1477
|
+
if hasattr(tool_calls_chunk, "function") and tool_calls_chunk.function:
|
|
1478
|
+
if "function" not in full_tool_call:
|
|
1479
|
+
full_tool_call["function"] = None
|
|
1480
|
+
|
|
1481
|
+
full_tool_call["function"], completion_tokens = OpenAIWrapper._update_function_call_from_chunk(
|
|
1482
|
+
tool_calls_chunk.function, full_tool_call["function"], completion_tokens
|
|
1483
|
+
)
|
|
1484
|
+
|
|
1485
|
+
if full_tool_call:
|
|
1486
|
+
return full_tool_call, completion_tokens
|
|
1487
|
+
else:
|
|
1488
|
+
raise RuntimeError("Tool call is not found, this should not happen.")
|
|
1489
|
+
|
|
1490
|
+
def _update_usage(self, actual_usage, total_usage):
|
|
1491
|
+
def update_usage(usage_summary, response_usage):
|
|
1492
|
+
# go through RESPONSE_USAGE_KEYS and check that they are in response_usage and if not just return usage_summary
|
|
1493
|
+
for key in ModelClient.RESPONSE_USAGE_KEYS:
|
|
1494
|
+
if key not in response_usage:
|
|
1495
|
+
return usage_summary
|
|
1496
|
+
|
|
1497
|
+
model = response_usage["model"]
|
|
1498
|
+
cost = response_usage["cost"]
|
|
1499
|
+
prompt_tokens = response_usage["prompt_tokens"]
|
|
1500
|
+
completion_tokens = response_usage["completion_tokens"]
|
|
1501
|
+
if completion_tokens is None:
|
|
1502
|
+
completion_tokens = 0
|
|
1503
|
+
total_tokens = response_usage["total_tokens"]
|
|
1504
|
+
|
|
1505
|
+
if usage_summary is None:
|
|
1506
|
+
usage_summary = {"total_cost": cost}
|
|
1507
|
+
else:
|
|
1508
|
+
usage_summary["total_cost"] += cost
|
|
1509
|
+
|
|
1510
|
+
usage_summary[model] = {
|
|
1511
|
+
"cost": usage_summary.get(model, {}).get("cost", 0) + cost,
|
|
1512
|
+
"prompt_tokens": usage_summary.get(model, {}).get("prompt_tokens", 0) + prompt_tokens,
|
|
1513
|
+
"completion_tokens": usage_summary.get(model, {}).get("completion_tokens", 0) + completion_tokens,
|
|
1514
|
+
"total_tokens": usage_summary.get(model, {}).get("total_tokens", 0) + total_tokens,
|
|
1515
|
+
}
|
|
1516
|
+
return usage_summary
|
|
1517
|
+
|
|
1518
|
+
if total_usage is not None:
|
|
1519
|
+
self.total_usage_summary = update_usage(self.total_usage_summary, total_usage)
|
|
1520
|
+
if actual_usage is not None:
|
|
1521
|
+
self.actual_usage_summary = update_usage(self.actual_usage_summary, actual_usage)
|
|
1522
|
+
|
|
1523
|
+
def print_usage_summary(self, mode: str | list[str] = ["actual", "total"]) -> None:
|
|
1524
|
+
"""Print the usage summary."""
|
|
1525
|
+
iostream = IOStream.get_default()
|
|
1526
|
+
|
|
1527
|
+
if isinstance(mode, list):
|
|
1528
|
+
if len(mode) == 0 or len(mode) > 2:
|
|
1529
|
+
raise ValueError(f'Invalid mode: {mode}, choose from "actual", "total", ["actual", "total"]')
|
|
1530
|
+
if "actual" in mode and "total" in mode:
|
|
1531
|
+
mode = "both"
|
|
1532
|
+
elif "actual" in mode:
|
|
1533
|
+
mode = "actual"
|
|
1534
|
+
elif "total" in mode:
|
|
1535
|
+
mode = "total"
|
|
1536
|
+
|
|
1537
|
+
iostream.send(
|
|
1538
|
+
UsageSummaryEvent(
|
|
1539
|
+
actual_usage_summary=self.actual_usage_summary, total_usage_summary=self.total_usage_summary, mode=mode
|
|
1540
|
+
)
|
|
1541
|
+
)
|
|
1542
|
+
|
|
1543
|
+
def clear_usage_summary(self) -> None:
|
|
1544
|
+
"""Clear the usage summary."""
|
|
1545
|
+
self.total_usage_summary = None
|
|
1546
|
+
self.actual_usage_summary = None
|
|
1547
|
+
|
|
1548
|
+
def extract_text_or_completion_object(self, response: Any) -> list[str] | list[dict[str, Any]]:
|
|
1549
|
+
"""Extract the text or ChatCompletion objects from a completion or chat response.
|
|
1550
|
+
|
|
1551
|
+
Supports both legacy responses (with message_retrieval_function) and new serializable responses.
|
|
1552
|
+
|
|
1553
|
+
Args:
|
|
1554
|
+
response: The response from any client (ChatCompletion, UnifiedResponse, etc.)
|
|
1555
|
+
|
|
1556
|
+
Returns:
|
|
1557
|
+
A list of text, or a list of message dicts if function_call/tool_calls are present.
|
|
1558
|
+
"""
|
|
1559
|
+
# Option 1: Legacy path - response has message_retrieval_function attached
|
|
1560
|
+
if hasattr(response, "message_retrieval_function") and callable(response.message_retrieval_function):
|
|
1561
|
+
return response.message_retrieval_function(response) # type: ignore [misc]
|
|
1562
|
+
|
|
1563
|
+
# Option 2: Use stored metadata to find client
|
|
1564
|
+
if hasattr(response, "id") and response.id in self._response_metadata:
|
|
1565
|
+
metadata = self._response_metadata[response.id]
|
|
1566
|
+
client = metadata["client"]
|
|
1567
|
+
return client.message_retrieval(response)
|
|
1568
|
+
|
|
1569
|
+
# Option 3: Fallback - try to extract from response structure directly
|
|
1570
|
+
# This handles cases where response is not in buffer
|
|
1571
|
+
if hasattr(response, "choices"):
|
|
1572
|
+
# OpenAI-style response
|
|
1573
|
+
return [
|
|
1574
|
+
choice.message
|
|
1575
|
+
if hasattr(choice.message, "tool_calls") and choice.message.tool_calls
|
|
1576
|
+
else getattr(choice.message, "content", "")
|
|
1577
|
+
for choice in response.choices
|
|
1578
|
+
]
|
|
1579
|
+
|
|
1580
|
+
# Last resort: return empty list
|
|
1581
|
+
warnings.warn(
|
|
1582
|
+
f"Could not extract messages from response type {type(response).__name__}. "
|
|
1583
|
+
"Response may not be in metadata buffer or may not support extraction.",
|
|
1584
|
+
UserWarning,
|
|
1585
|
+
)
|
|
1586
|
+
return []
|
|
1587
|
+
|
|
1588
|
+
|
|
1589
|
+
# -----------------------------------------------------------------------------
|
|
1590
|
+
# New: Responses API config entry (OpenAI-hosted preview endpoint)
|
|
1591
|
+
# -----------------------------------------------------------------------------
|
|
1592
|
+
|
|
1593
|
+
|
|
1594
|
+
class OpenAIResponsesEntryDict(LLMConfigEntryDict, total=False):
|
|
1595
|
+
api_type: Literal["responses"]
|
|
1596
|
+
|
|
1597
|
+
tool_choice: Literal["none", "auto", "required"] | None
|
|
1598
|
+
built_in_tools: list[str] | None
|
|
1599
|
+
|
|
1600
|
+
|
|
1601
|
+
class OpenAIResponsesLLMConfigEntry(OpenAILLMConfigEntry):
|
|
1602
|
+
"""LLMConfig entry for the OpenAI Responses API (stateful, tool-enabled).
|
|
1603
|
+
|
|
1604
|
+
This reuses all the OpenAI fields but changes *api_type* so the wrapper can
|
|
1605
|
+
route traffic to the `client.responses` endpoint instead of
|
|
1606
|
+
`chat.completions`. It inherits everything else – including reasoning
|
|
1607
|
+
fields – from *OpenAILLMConfigEntry* so users can simply set
|
|
1608
|
+
|
|
1609
|
+
```python
|
|
1610
|
+
{
|
|
1611
|
+
"api_type": "responses", # <-- key differentiator
|
|
1612
|
+
"model": "o3", # reasoning model
|
|
1613
|
+
"reasoning_effort": "medium", # low / medium / high
|
|
1614
|
+
"stream": True,
|
|
1615
|
+
}
|
|
1616
|
+
```
|
|
1617
|
+
"""
|
|
1618
|
+
|
|
1619
|
+
api_type: Literal["responses"] = "responses"
|
|
1620
|
+
tool_choice: Literal["none", "auto", "required"] | None = "auto"
|
|
1621
|
+
built_in_tools: list[str] | None = None
|
|
1622
|
+
|
|
1623
|
+
def create_client(self) -> ModelClient: # pragma: no cover
|
|
1624
|
+
raise NotImplementedError("Handled via OpenAIWrapper._register_default_client")
|
|
1625
|
+
|
|
1626
|
+
|
|
1627
|
+
class OpenAIV2EntryDict(LLMConfigEntryDict, total=False):
|
|
1628
|
+
api_type: Literal["openai_v2"]
|
|
1629
|
+
|
|
1630
|
+
|
|
1631
|
+
class OpenAIV2LLMConfigEntry(OpenAILLMConfigEntry):
|
|
1632
|
+
"""LLMConfig entry for OpenAI V2 Client with ModelClientV2 architecture.
|
|
1633
|
+
|
|
1634
|
+
This uses the new OpenAIResponsesClient from autogen.llm_clients which returns
|
|
1635
|
+
rich UnifiedResponse objects with typed content blocks (ReasoningContent,
|
|
1636
|
+
CitationContent, ToolCallContent, etc.).
|
|
1637
|
+
|
|
1638
|
+
Example:
|
|
1639
|
+
```python
|
|
1640
|
+
{
|
|
1641
|
+
"api_type": "openai_v2", # <-- uses ModelClientV2 architecture
|
|
1642
|
+
"model": "gpt-4o-mini", # vision-capable model
|
|
1643
|
+
"api_key": "...",
|
|
1644
|
+
}
|
|
1645
|
+
```
|
|
1646
|
+
|
|
1647
|
+
Benefits over standard OpenAI client:
|
|
1648
|
+
- Returns UnifiedResponse with typed content blocks
|
|
1649
|
+
- Access to reasoning blocks from o1/o3 models via response.reasoning
|
|
1650
|
+
- Forward-compatible with unknown content types via GenericContent
|
|
1651
|
+
- Rich metadata and citations support
|
|
1652
|
+
- Type-safe with Pydantic validation
|
|
1653
|
+
"""
|
|
1654
|
+
|
|
1655
|
+
api_type: Literal["openai_v2"] = "openai_v2"
|
|
1656
|
+
|
|
1657
|
+
def create_client(self) -> ModelClient: # pragma: no cover
|
|
1658
|
+
raise NotImplementedError("Handled via OpenAIWrapper._register_default_client")
|