ag2 0.10.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ag2-0.10.2.dist-info/METADATA +819 -0
- ag2-0.10.2.dist-info/RECORD +423 -0
- ag2-0.10.2.dist-info/WHEEL +4 -0
- ag2-0.10.2.dist-info/licenses/LICENSE +201 -0
- ag2-0.10.2.dist-info/licenses/NOTICE.md +19 -0
- autogen/__init__.py +88 -0
- autogen/_website/__init__.py +3 -0
- autogen/_website/generate_api_references.py +426 -0
- autogen/_website/generate_mkdocs.py +1216 -0
- autogen/_website/notebook_processor.py +475 -0
- autogen/_website/process_notebooks.py +656 -0
- autogen/_website/utils.py +413 -0
- autogen/a2a/__init__.py +36 -0
- autogen/a2a/agent_executor.py +86 -0
- autogen/a2a/client.py +357 -0
- autogen/a2a/errors.py +18 -0
- autogen/a2a/httpx_client_factory.py +79 -0
- autogen/a2a/server.py +221 -0
- autogen/a2a/utils.py +207 -0
- autogen/agentchat/__init__.py +47 -0
- autogen/agentchat/agent.py +180 -0
- autogen/agentchat/assistant_agent.py +86 -0
- autogen/agentchat/chat.py +325 -0
- autogen/agentchat/contrib/__init__.py +5 -0
- autogen/agentchat/contrib/agent_eval/README.md +7 -0
- autogen/agentchat/contrib/agent_eval/agent_eval.py +108 -0
- autogen/agentchat/contrib/agent_eval/criterion.py +43 -0
- autogen/agentchat/contrib/agent_eval/critic_agent.py +44 -0
- autogen/agentchat/contrib/agent_eval/quantifier_agent.py +39 -0
- autogen/agentchat/contrib/agent_eval/subcritic_agent.py +45 -0
- autogen/agentchat/contrib/agent_eval/task.py +42 -0
- autogen/agentchat/contrib/agent_optimizer.py +432 -0
- autogen/agentchat/contrib/capabilities/__init__.py +5 -0
- autogen/agentchat/contrib/capabilities/agent_capability.py +20 -0
- autogen/agentchat/contrib/capabilities/generate_images.py +301 -0
- autogen/agentchat/contrib/capabilities/teachability.py +393 -0
- autogen/agentchat/contrib/capabilities/text_compressors.py +66 -0
- autogen/agentchat/contrib/capabilities/tools_capability.py +22 -0
- autogen/agentchat/contrib/capabilities/transform_messages.py +93 -0
- autogen/agentchat/contrib/capabilities/transforms.py +578 -0
- autogen/agentchat/contrib/capabilities/transforms_util.py +122 -0
- autogen/agentchat/contrib/capabilities/vision_capability.py +215 -0
- autogen/agentchat/contrib/captainagent/__init__.py +9 -0
- autogen/agentchat/contrib/captainagent/agent_builder.py +790 -0
- autogen/agentchat/contrib/captainagent/captainagent.py +514 -0
- autogen/agentchat/contrib/captainagent/tool_retriever.py +334 -0
- autogen/agentchat/contrib/captainagent/tools/README.md +44 -0
- autogen/agentchat/contrib/captainagent/tools/__init__.py +5 -0
- autogen/agentchat/contrib/captainagent/tools/data_analysis/calculate_correlation.py +40 -0
- autogen/agentchat/contrib/captainagent/tools/data_analysis/calculate_skewness_and_kurtosis.py +28 -0
- autogen/agentchat/contrib/captainagent/tools/data_analysis/detect_outlier_iqr.py +28 -0
- autogen/agentchat/contrib/captainagent/tools/data_analysis/detect_outlier_zscore.py +28 -0
- autogen/agentchat/contrib/captainagent/tools/data_analysis/explore_csv.py +21 -0
- autogen/agentchat/contrib/captainagent/tools/data_analysis/shapiro_wilk_test.py +30 -0
- autogen/agentchat/contrib/captainagent/tools/information_retrieval/arxiv_download.py +27 -0
- autogen/agentchat/contrib/captainagent/tools/information_retrieval/arxiv_search.py +53 -0
- autogen/agentchat/contrib/captainagent/tools/information_retrieval/extract_pdf_image.py +53 -0
- autogen/agentchat/contrib/captainagent/tools/information_retrieval/extract_pdf_text.py +38 -0
- autogen/agentchat/contrib/captainagent/tools/information_retrieval/get_wikipedia_text.py +21 -0
- autogen/agentchat/contrib/captainagent/tools/information_retrieval/get_youtube_caption.py +34 -0
- autogen/agentchat/contrib/captainagent/tools/information_retrieval/image_qa.py +60 -0
- autogen/agentchat/contrib/captainagent/tools/information_retrieval/optical_character_recognition.py +61 -0
- autogen/agentchat/contrib/captainagent/tools/information_retrieval/perform_web_search.py +47 -0
- autogen/agentchat/contrib/captainagent/tools/information_retrieval/scrape_wikipedia_tables.py +33 -0
- autogen/agentchat/contrib/captainagent/tools/information_retrieval/transcribe_audio_file.py +21 -0
- autogen/agentchat/contrib/captainagent/tools/information_retrieval/youtube_download.py +35 -0
- autogen/agentchat/contrib/captainagent/tools/math/calculate_circle_area_from_diameter.py +21 -0
- autogen/agentchat/contrib/captainagent/tools/math/calculate_day_of_the_week.py +18 -0
- autogen/agentchat/contrib/captainagent/tools/math/calculate_fraction_sum.py +28 -0
- autogen/agentchat/contrib/captainagent/tools/math/calculate_matrix_power.py +31 -0
- autogen/agentchat/contrib/captainagent/tools/math/calculate_reflected_point.py +16 -0
- autogen/agentchat/contrib/captainagent/tools/math/complex_numbers_product.py +25 -0
- autogen/agentchat/contrib/captainagent/tools/math/compute_currency_conversion.py +23 -0
- autogen/agentchat/contrib/captainagent/tools/math/count_distinct_permutations.py +27 -0
- autogen/agentchat/contrib/captainagent/tools/math/evaluate_expression.py +28 -0
- autogen/agentchat/contrib/captainagent/tools/math/find_continuity_point.py +34 -0
- autogen/agentchat/contrib/captainagent/tools/math/fraction_to_mixed_numbers.py +39 -0
- autogen/agentchat/contrib/captainagent/tools/math/modular_inverse_sum.py +23 -0
- autogen/agentchat/contrib/captainagent/tools/math/simplify_mixed_numbers.py +36 -0
- autogen/agentchat/contrib/captainagent/tools/math/sum_of_digit_factorials.py +15 -0
- autogen/agentchat/contrib/captainagent/tools/math/sum_of_primes_below.py +15 -0
- autogen/agentchat/contrib/captainagent/tools/requirements.txt +10 -0
- autogen/agentchat/contrib/captainagent/tools/tool_description.tsv +34 -0
- autogen/agentchat/contrib/gpt_assistant_agent.py +526 -0
- autogen/agentchat/contrib/graph_rag/__init__.py +9 -0
- autogen/agentchat/contrib/graph_rag/document.py +29 -0
- autogen/agentchat/contrib/graph_rag/falkor_graph_query_engine.py +167 -0
- autogen/agentchat/contrib/graph_rag/falkor_graph_rag_capability.py +103 -0
- autogen/agentchat/contrib/graph_rag/graph_query_engine.py +53 -0
- autogen/agentchat/contrib/graph_rag/graph_rag_capability.py +63 -0
- autogen/agentchat/contrib/graph_rag/neo4j_graph_query_engine.py +263 -0
- autogen/agentchat/contrib/graph_rag/neo4j_graph_rag_capability.py +83 -0
- autogen/agentchat/contrib/graph_rag/neo4j_native_graph_query_engine.py +210 -0
- autogen/agentchat/contrib/graph_rag/neo4j_native_graph_rag_capability.py +93 -0
- autogen/agentchat/contrib/img_utils.py +397 -0
- autogen/agentchat/contrib/llamaindex_conversable_agent.py +117 -0
- autogen/agentchat/contrib/llava_agent.py +189 -0
- autogen/agentchat/contrib/math_user_proxy_agent.py +464 -0
- autogen/agentchat/contrib/multimodal_conversable_agent.py +125 -0
- autogen/agentchat/contrib/qdrant_retrieve_user_proxy_agent.py +325 -0
- autogen/agentchat/contrib/rag/__init__.py +10 -0
- autogen/agentchat/contrib/rag/chromadb_query_engine.py +268 -0
- autogen/agentchat/contrib/rag/llamaindex_query_engine.py +195 -0
- autogen/agentchat/contrib/rag/mongodb_query_engine.py +319 -0
- autogen/agentchat/contrib/rag/query_engine.py +76 -0
- autogen/agentchat/contrib/retrieve_assistant_agent.py +59 -0
- autogen/agentchat/contrib/retrieve_user_proxy_agent.py +704 -0
- autogen/agentchat/contrib/society_of_mind_agent.py +200 -0
- autogen/agentchat/contrib/swarm_agent.py +1404 -0
- autogen/agentchat/contrib/text_analyzer_agent.py +79 -0
- autogen/agentchat/contrib/vectordb/__init__.py +5 -0
- autogen/agentchat/contrib/vectordb/base.py +224 -0
- autogen/agentchat/contrib/vectordb/chromadb.py +316 -0
- autogen/agentchat/contrib/vectordb/couchbase.py +405 -0
- autogen/agentchat/contrib/vectordb/mongodb.py +551 -0
- autogen/agentchat/contrib/vectordb/pgvectordb.py +927 -0
- autogen/agentchat/contrib/vectordb/qdrant.py +320 -0
- autogen/agentchat/contrib/vectordb/utils.py +126 -0
- autogen/agentchat/contrib/web_surfer.py +304 -0
- autogen/agentchat/conversable_agent.py +4307 -0
- autogen/agentchat/group/__init__.py +67 -0
- autogen/agentchat/group/available_condition.py +91 -0
- autogen/agentchat/group/context_condition.py +77 -0
- autogen/agentchat/group/context_expression.py +238 -0
- autogen/agentchat/group/context_str.py +39 -0
- autogen/agentchat/group/context_variables.py +182 -0
- autogen/agentchat/group/events/transition_events.py +111 -0
- autogen/agentchat/group/group_tool_executor.py +324 -0
- autogen/agentchat/group/group_utils.py +659 -0
- autogen/agentchat/group/guardrails.py +179 -0
- autogen/agentchat/group/handoffs.py +303 -0
- autogen/agentchat/group/llm_condition.py +93 -0
- autogen/agentchat/group/multi_agent_chat.py +291 -0
- autogen/agentchat/group/on_condition.py +55 -0
- autogen/agentchat/group/on_context_condition.py +51 -0
- autogen/agentchat/group/patterns/__init__.py +18 -0
- autogen/agentchat/group/patterns/auto.py +160 -0
- autogen/agentchat/group/patterns/manual.py +177 -0
- autogen/agentchat/group/patterns/pattern.py +295 -0
- autogen/agentchat/group/patterns/random.py +106 -0
- autogen/agentchat/group/patterns/round_robin.py +117 -0
- autogen/agentchat/group/reply_result.py +24 -0
- autogen/agentchat/group/safeguards/__init__.py +21 -0
- autogen/agentchat/group/safeguards/api.py +241 -0
- autogen/agentchat/group/safeguards/enforcer.py +1158 -0
- autogen/agentchat/group/safeguards/events.py +140 -0
- autogen/agentchat/group/safeguards/validator.py +435 -0
- autogen/agentchat/group/speaker_selection_result.py +41 -0
- autogen/agentchat/group/targets/__init__.py +4 -0
- autogen/agentchat/group/targets/function_target.py +245 -0
- autogen/agentchat/group/targets/group_chat_target.py +133 -0
- autogen/agentchat/group/targets/group_manager_target.py +151 -0
- autogen/agentchat/group/targets/transition_target.py +424 -0
- autogen/agentchat/group/targets/transition_utils.py +6 -0
- autogen/agentchat/groupchat.py +1832 -0
- autogen/agentchat/realtime/__init__.py +3 -0
- autogen/agentchat/realtime/experimental/__init__.py +20 -0
- autogen/agentchat/realtime/experimental/audio_adapters/__init__.py +8 -0
- autogen/agentchat/realtime/experimental/audio_adapters/twilio_audio_adapter.py +148 -0
- autogen/agentchat/realtime/experimental/audio_adapters/websocket_audio_adapter.py +139 -0
- autogen/agentchat/realtime/experimental/audio_observer.py +42 -0
- autogen/agentchat/realtime/experimental/clients/__init__.py +15 -0
- autogen/agentchat/realtime/experimental/clients/gemini/__init__.py +7 -0
- autogen/agentchat/realtime/experimental/clients/gemini/client.py +274 -0
- autogen/agentchat/realtime/experimental/clients/oai/__init__.py +8 -0
- autogen/agentchat/realtime/experimental/clients/oai/base_client.py +220 -0
- autogen/agentchat/realtime/experimental/clients/oai/rtc_client.py +243 -0
- autogen/agentchat/realtime/experimental/clients/oai/utils.py +48 -0
- autogen/agentchat/realtime/experimental/clients/realtime_client.py +191 -0
- autogen/agentchat/realtime/experimental/function_observer.py +84 -0
- autogen/agentchat/realtime/experimental/realtime_agent.py +158 -0
- autogen/agentchat/realtime/experimental/realtime_events.py +42 -0
- autogen/agentchat/realtime/experimental/realtime_observer.py +100 -0
- autogen/agentchat/realtime/experimental/realtime_swarm.py +533 -0
- autogen/agentchat/realtime/experimental/websockets.py +21 -0
- autogen/agentchat/realtime_agent/__init__.py +21 -0
- autogen/agentchat/user_proxy_agent.py +114 -0
- autogen/agentchat/utils.py +206 -0
- autogen/agents/__init__.py +3 -0
- autogen/agents/contrib/__init__.py +10 -0
- autogen/agents/contrib/time/__init__.py +8 -0
- autogen/agents/contrib/time/time_reply_agent.py +74 -0
- autogen/agents/contrib/time/time_tool_agent.py +52 -0
- autogen/agents/experimental/__init__.py +27 -0
- autogen/agents/experimental/deep_research/__init__.py +7 -0
- autogen/agents/experimental/deep_research/deep_research.py +52 -0
- autogen/agents/experimental/discord/__init__.py +7 -0
- autogen/agents/experimental/discord/discord.py +66 -0
- autogen/agents/experimental/document_agent/__init__.py +19 -0
- autogen/agents/experimental/document_agent/chroma_query_engine.py +301 -0
- autogen/agents/experimental/document_agent/docling_doc_ingest_agent.py +113 -0
- autogen/agents/experimental/document_agent/document_agent.py +643 -0
- autogen/agents/experimental/document_agent/document_conditions.py +50 -0
- autogen/agents/experimental/document_agent/document_utils.py +376 -0
- autogen/agents/experimental/document_agent/inmemory_query_engine.py +214 -0
- autogen/agents/experimental/document_agent/parser_utils.py +134 -0
- autogen/agents/experimental/document_agent/url_utils.py +417 -0
- autogen/agents/experimental/reasoning/__init__.py +7 -0
- autogen/agents/experimental/reasoning/reasoning_agent.py +1178 -0
- autogen/agents/experimental/slack/__init__.py +7 -0
- autogen/agents/experimental/slack/slack.py +73 -0
- autogen/agents/experimental/telegram/__init__.py +7 -0
- autogen/agents/experimental/telegram/telegram.py +76 -0
- autogen/agents/experimental/websurfer/__init__.py +7 -0
- autogen/agents/experimental/websurfer/websurfer.py +70 -0
- autogen/agents/experimental/wikipedia/__init__.py +7 -0
- autogen/agents/experimental/wikipedia/wikipedia.py +88 -0
- autogen/browser_utils.py +309 -0
- autogen/cache/__init__.py +10 -0
- autogen/cache/abstract_cache_base.py +71 -0
- autogen/cache/cache.py +203 -0
- autogen/cache/cache_factory.py +88 -0
- autogen/cache/cosmos_db_cache.py +144 -0
- autogen/cache/disk_cache.py +97 -0
- autogen/cache/in_memory_cache.py +54 -0
- autogen/cache/redis_cache.py +119 -0
- autogen/code_utils.py +598 -0
- autogen/coding/__init__.py +30 -0
- autogen/coding/base.py +120 -0
- autogen/coding/docker_commandline_code_executor.py +283 -0
- autogen/coding/factory.py +56 -0
- autogen/coding/func_with_reqs.py +203 -0
- autogen/coding/jupyter/__init__.py +23 -0
- autogen/coding/jupyter/base.py +36 -0
- autogen/coding/jupyter/docker_jupyter_server.py +160 -0
- autogen/coding/jupyter/embedded_ipython_code_executor.py +182 -0
- autogen/coding/jupyter/import_utils.py +82 -0
- autogen/coding/jupyter/jupyter_client.py +224 -0
- autogen/coding/jupyter/jupyter_code_executor.py +154 -0
- autogen/coding/jupyter/local_jupyter_server.py +164 -0
- autogen/coding/local_commandline_code_executor.py +341 -0
- autogen/coding/markdown_code_extractor.py +44 -0
- autogen/coding/utils.py +55 -0
- autogen/coding/yepcode_code_executor.py +197 -0
- autogen/doc_utils.py +35 -0
- autogen/environments/__init__.py +10 -0
- autogen/environments/docker_python_environment.py +365 -0
- autogen/environments/python_environment.py +125 -0
- autogen/environments/system_python_environment.py +85 -0
- autogen/environments/venv_python_environment.py +220 -0
- autogen/environments/working_directory.py +74 -0
- autogen/events/__init__.py +7 -0
- autogen/events/agent_events.py +1016 -0
- autogen/events/base_event.py +100 -0
- autogen/events/client_events.py +168 -0
- autogen/events/helpers.py +44 -0
- autogen/events/print_event.py +45 -0
- autogen/exception_utils.py +73 -0
- autogen/extensions/__init__.py +5 -0
- autogen/fast_depends/__init__.py +16 -0
- autogen/fast_depends/_compat.py +75 -0
- autogen/fast_depends/core/__init__.py +14 -0
- autogen/fast_depends/core/build.py +206 -0
- autogen/fast_depends/core/model.py +527 -0
- autogen/fast_depends/dependencies/__init__.py +15 -0
- autogen/fast_depends/dependencies/model.py +30 -0
- autogen/fast_depends/dependencies/provider.py +40 -0
- autogen/fast_depends/library/__init__.py +10 -0
- autogen/fast_depends/library/model.py +46 -0
- autogen/fast_depends/py.typed +6 -0
- autogen/fast_depends/schema.py +66 -0
- autogen/fast_depends/use.py +272 -0
- autogen/fast_depends/utils.py +177 -0
- autogen/formatting_utils.py +83 -0
- autogen/function_utils.py +13 -0
- autogen/graph_utils.py +173 -0
- autogen/import_utils.py +539 -0
- autogen/interop/__init__.py +22 -0
- autogen/interop/crewai/__init__.py +7 -0
- autogen/interop/crewai/crewai.py +88 -0
- autogen/interop/interoperability.py +71 -0
- autogen/interop/interoperable.py +46 -0
- autogen/interop/langchain/__init__.py +8 -0
- autogen/interop/langchain/langchain_chat_model_factory.py +156 -0
- autogen/interop/langchain/langchain_tool.py +78 -0
- autogen/interop/litellm/__init__.py +7 -0
- autogen/interop/litellm/litellm_config_factory.py +178 -0
- autogen/interop/pydantic_ai/__init__.py +7 -0
- autogen/interop/pydantic_ai/pydantic_ai.py +172 -0
- autogen/interop/registry.py +70 -0
- autogen/io/__init__.py +15 -0
- autogen/io/base.py +151 -0
- autogen/io/console.py +56 -0
- autogen/io/processors/__init__.py +12 -0
- autogen/io/processors/base.py +21 -0
- autogen/io/processors/console_event_processor.py +61 -0
- autogen/io/run_response.py +294 -0
- autogen/io/thread_io_stream.py +63 -0
- autogen/io/websockets.py +214 -0
- autogen/json_utils.py +42 -0
- autogen/llm_clients/MIGRATION_TO_V2.md +782 -0
- autogen/llm_clients/__init__.py +77 -0
- autogen/llm_clients/client_v2.py +122 -0
- autogen/llm_clients/models/__init__.py +55 -0
- autogen/llm_clients/models/content_blocks.py +389 -0
- autogen/llm_clients/models/unified_message.py +145 -0
- autogen/llm_clients/models/unified_response.py +83 -0
- autogen/llm_clients/openai_completions_client.py +444 -0
- autogen/llm_config/__init__.py +11 -0
- autogen/llm_config/client.py +59 -0
- autogen/llm_config/config.py +461 -0
- autogen/llm_config/entry.py +169 -0
- autogen/llm_config/types.py +37 -0
- autogen/llm_config/utils.py +223 -0
- autogen/logger/__init__.py +11 -0
- autogen/logger/base_logger.py +129 -0
- autogen/logger/file_logger.py +262 -0
- autogen/logger/logger_factory.py +42 -0
- autogen/logger/logger_utils.py +57 -0
- autogen/logger/sqlite_logger.py +524 -0
- autogen/math_utils.py +338 -0
- autogen/mcp/__init__.py +7 -0
- autogen/mcp/__main__.py +78 -0
- autogen/mcp/helpers.py +45 -0
- autogen/mcp/mcp_client.py +349 -0
- autogen/mcp/mcp_proxy/__init__.py +19 -0
- autogen/mcp/mcp_proxy/fastapi_code_generator_helpers.py +62 -0
- autogen/mcp/mcp_proxy/mcp_proxy.py +577 -0
- autogen/mcp/mcp_proxy/operation_grouping.py +166 -0
- autogen/mcp/mcp_proxy/operation_renaming.py +110 -0
- autogen/mcp/mcp_proxy/patch_fastapi_code_generator.py +98 -0
- autogen/mcp/mcp_proxy/security.py +399 -0
- autogen/mcp/mcp_proxy/security_schema_visitor.py +37 -0
- autogen/messages/__init__.py +7 -0
- autogen/messages/agent_messages.py +946 -0
- autogen/messages/base_message.py +108 -0
- autogen/messages/client_messages.py +172 -0
- autogen/messages/print_message.py +48 -0
- autogen/oai/__init__.py +61 -0
- autogen/oai/anthropic.py +1516 -0
- autogen/oai/bedrock.py +800 -0
- autogen/oai/cerebras.py +302 -0
- autogen/oai/client.py +1658 -0
- autogen/oai/client_utils.py +196 -0
- autogen/oai/cohere.py +494 -0
- autogen/oai/gemini.py +1045 -0
- autogen/oai/gemini_types.py +156 -0
- autogen/oai/groq.py +319 -0
- autogen/oai/mistral.py +311 -0
- autogen/oai/oai_models/__init__.py +23 -0
- autogen/oai/oai_models/_models.py +16 -0
- autogen/oai/oai_models/chat_completion.py +86 -0
- autogen/oai/oai_models/chat_completion_audio.py +32 -0
- autogen/oai/oai_models/chat_completion_message.py +97 -0
- autogen/oai/oai_models/chat_completion_message_tool_call.py +60 -0
- autogen/oai/oai_models/chat_completion_token_logprob.py +62 -0
- autogen/oai/oai_models/completion_usage.py +59 -0
- autogen/oai/ollama.py +657 -0
- autogen/oai/openai_responses.py +451 -0
- autogen/oai/openai_utils.py +897 -0
- autogen/oai/together.py +387 -0
- autogen/remote/__init__.py +18 -0
- autogen/remote/agent.py +199 -0
- autogen/remote/agent_service.py +197 -0
- autogen/remote/errors.py +17 -0
- autogen/remote/httpx_client_factory.py +131 -0
- autogen/remote/protocol.py +37 -0
- autogen/remote/retry.py +102 -0
- autogen/remote/runtime.py +96 -0
- autogen/retrieve_utils.py +490 -0
- autogen/runtime_logging.py +161 -0
- autogen/testing/__init__.py +12 -0
- autogen/testing/messages.py +45 -0
- autogen/testing/test_agent.py +111 -0
- autogen/token_count_utils.py +280 -0
- autogen/tools/__init__.py +20 -0
- autogen/tools/contrib/__init__.py +9 -0
- autogen/tools/contrib/time/__init__.py +7 -0
- autogen/tools/contrib/time/time.py +40 -0
- autogen/tools/dependency_injection.py +249 -0
- autogen/tools/experimental/__init__.py +54 -0
- autogen/tools/experimental/browser_use/__init__.py +7 -0
- autogen/tools/experimental/browser_use/browser_use.py +154 -0
- autogen/tools/experimental/code_execution/__init__.py +7 -0
- autogen/tools/experimental/code_execution/python_code_execution.py +86 -0
- autogen/tools/experimental/crawl4ai/__init__.py +7 -0
- autogen/tools/experimental/crawl4ai/crawl4ai.py +150 -0
- autogen/tools/experimental/deep_research/__init__.py +7 -0
- autogen/tools/experimental/deep_research/deep_research.py +329 -0
- autogen/tools/experimental/duckduckgo/__init__.py +7 -0
- autogen/tools/experimental/duckduckgo/duckduckgo_search.py +103 -0
- autogen/tools/experimental/firecrawl/__init__.py +7 -0
- autogen/tools/experimental/firecrawl/firecrawl_tool.py +836 -0
- autogen/tools/experimental/google/__init__.py +14 -0
- autogen/tools/experimental/google/authentication/__init__.py +11 -0
- autogen/tools/experimental/google/authentication/credentials_hosted_provider.py +43 -0
- autogen/tools/experimental/google/authentication/credentials_local_provider.py +91 -0
- autogen/tools/experimental/google/authentication/credentials_provider.py +35 -0
- autogen/tools/experimental/google/drive/__init__.py +9 -0
- autogen/tools/experimental/google/drive/drive_functions.py +124 -0
- autogen/tools/experimental/google/drive/toolkit.py +88 -0
- autogen/tools/experimental/google/model.py +17 -0
- autogen/tools/experimental/google/toolkit_protocol.py +19 -0
- autogen/tools/experimental/google_search/__init__.py +8 -0
- autogen/tools/experimental/google_search/google_search.py +93 -0
- autogen/tools/experimental/google_search/youtube_search.py +181 -0
- autogen/tools/experimental/messageplatform/__init__.py +17 -0
- autogen/tools/experimental/messageplatform/discord/__init__.py +7 -0
- autogen/tools/experimental/messageplatform/discord/discord.py +284 -0
- autogen/tools/experimental/messageplatform/slack/__init__.py +7 -0
- autogen/tools/experimental/messageplatform/slack/slack.py +385 -0
- autogen/tools/experimental/messageplatform/telegram/__init__.py +7 -0
- autogen/tools/experimental/messageplatform/telegram/telegram.py +271 -0
- autogen/tools/experimental/perplexity/__init__.py +7 -0
- autogen/tools/experimental/perplexity/perplexity_search.py +249 -0
- autogen/tools/experimental/reliable/__init__.py +10 -0
- autogen/tools/experimental/reliable/reliable.py +1311 -0
- autogen/tools/experimental/searxng/__init__.py +7 -0
- autogen/tools/experimental/searxng/searxng_search.py +142 -0
- autogen/tools/experimental/tavily/__init__.py +7 -0
- autogen/tools/experimental/tavily/tavily_search.py +176 -0
- autogen/tools/experimental/web_search_preview/__init__.py +7 -0
- autogen/tools/experimental/web_search_preview/web_search_preview.py +120 -0
- autogen/tools/experimental/wikipedia/__init__.py +7 -0
- autogen/tools/experimental/wikipedia/wikipedia.py +284 -0
- autogen/tools/function_utils.py +412 -0
- autogen/tools/tool.py +188 -0
- autogen/tools/toolkit.py +86 -0
- autogen/types.py +29 -0
- autogen/version.py +7 -0
- templates/client_template/main.jinja2 +72 -0
- templates/config_template/config.jinja2 +7 -0
- templates/main.jinja2 +61 -0
|
@@ -0,0 +1,551 @@
|
|
|
1
|
+
# Copyright (c) 2023 - 2025, AG2ai, Inc., AG2ai open-source projects maintainers and core contributors
|
|
2
|
+
#
|
|
3
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
4
|
+
#
|
|
5
|
+
# Portions derived from https://github.com/microsoft/autogen are under the MIT License.
|
|
6
|
+
# SPDX-License-Identifier: MIT
|
|
7
|
+
from collections.abc import Callable, Iterable, Mapping
|
|
8
|
+
from copy import deepcopy
|
|
9
|
+
from time import monotonic, sleep
|
|
10
|
+
from typing import Any, Literal
|
|
11
|
+
|
|
12
|
+
from ....import_utils import optional_import_block, require_optional_import
|
|
13
|
+
from .base import Document, ItemID, QueryResults, VectorDB
|
|
14
|
+
from .utils import get_logger
|
|
15
|
+
|
|
16
|
+
with optional_import_block():
|
|
17
|
+
import numpy as np
|
|
18
|
+
from pymongo import MongoClient, UpdateOne, errors
|
|
19
|
+
from pymongo.collection import Collection
|
|
20
|
+
from pymongo.driver_info import DriverInfo
|
|
21
|
+
from pymongo.operations import SearchIndexModel
|
|
22
|
+
from sentence_transformers import SentenceTransformer
|
|
23
|
+
|
|
24
|
+
logger = get_logger(__name__)
|
|
25
|
+
|
|
26
|
+
DEFAULT_INSERT_BATCH_SIZE = 100_000
|
|
27
|
+
_SAMPLE_SENTENCE = ["The weather is lovely today in paradise."]
|
|
28
|
+
_DELAY = 0.5
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def with_id_rename(docs: Iterable) -> list[dict[str, Any]]:
|
|
32
|
+
"""Utility changes _id field from Collection into id for Document."""
|
|
33
|
+
return [{**{k: v for k, v in d.items() if k != "_id"}, "id": d["_id"]} for d in docs]
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
@require_optional_import(["pymongo", "sentence_transformers", "numpy"], "retrievechat-mongodb")
|
|
37
|
+
class MongoDBAtlasVectorDB(VectorDB):
|
|
38
|
+
"""A Collection object for MongoDB."""
|
|
39
|
+
|
|
40
|
+
def __init__(
|
|
41
|
+
self,
|
|
42
|
+
connection_string: str = "",
|
|
43
|
+
database_name: str = "vector_db",
|
|
44
|
+
embedding_function: Callable[..., Any] | None = None,
|
|
45
|
+
collection_name: str = None,
|
|
46
|
+
index_name: str = "vector_index",
|
|
47
|
+
overwrite: bool = False,
|
|
48
|
+
wait_until_index_ready: float | None = None,
|
|
49
|
+
wait_until_document_ready: float | None = None,
|
|
50
|
+
):
|
|
51
|
+
"""Initialize the vector database.
|
|
52
|
+
|
|
53
|
+
Args:
|
|
54
|
+
connection_string: str | The MongoDB connection string to connect to. Default is ''.
|
|
55
|
+
database_name: str | The name of the database. Default is 'vector_db'.
|
|
56
|
+
embedding_function: Callable | The embedding function used to generate the vector representation.
|
|
57
|
+
collection_name: str | The name of the collection to create for this vector database
|
|
58
|
+
Defaults to None
|
|
59
|
+
index_name: str | Index name for the vector database, defaults to 'vector_index'
|
|
60
|
+
overwrite: bool = False
|
|
61
|
+
wait_until_index_ready: Optional[float] | Blocking call to wait until the
|
|
62
|
+
database indexes are ready. None, the default, means no wait.
|
|
63
|
+
wait_until_document_ready: Optional[float] | Blocking call to wait until the
|
|
64
|
+
database indexes are ready. None, the default, means no wait.
|
|
65
|
+
"""
|
|
66
|
+
self.embedding_function = embedding_function or SentenceTransformer("all-MiniLM-L6-v2").encode
|
|
67
|
+
self.index_name = index_name
|
|
68
|
+
self._wait_until_index_ready = wait_until_index_ready
|
|
69
|
+
self._wait_until_document_ready = wait_until_document_ready
|
|
70
|
+
|
|
71
|
+
# This will get the model dimension size by computing the embeddings dimensions
|
|
72
|
+
self.dimensions = self._get_embedding_size()
|
|
73
|
+
|
|
74
|
+
try:
|
|
75
|
+
self.client = MongoClient(connection_string, driver=DriverInfo(name="autogen"))
|
|
76
|
+
self.client.admin.command("ping")
|
|
77
|
+
logger.debug("Successfully created MongoClient")
|
|
78
|
+
except errors.ServerSelectionTimeoutError as err:
|
|
79
|
+
raise ConnectionError("Could not connect to MongoDB server") from err
|
|
80
|
+
|
|
81
|
+
self.db = self.client[database_name]
|
|
82
|
+
logger.debug(f"Atlas Database name: {self.db.name}")
|
|
83
|
+
if collection_name:
|
|
84
|
+
self.active_collection = self.create_collection(collection_name, overwrite)
|
|
85
|
+
else:
|
|
86
|
+
self.active_collection = None
|
|
87
|
+
|
|
88
|
+
def _is_index_ready(self, collection: "Collection", index_name: str):
|
|
89
|
+
"""Check for the index name in the list of available search indexes to see if the
|
|
90
|
+
specified index is of status READY
|
|
91
|
+
|
|
92
|
+
Args:
|
|
93
|
+
collection (Collection): MongoDB Collection to for the search indexes
|
|
94
|
+
index_name (str): Vector Search Index name
|
|
95
|
+
|
|
96
|
+
Returns:
|
|
97
|
+
bool : True if the index is present and READY false otherwise
|
|
98
|
+
"""
|
|
99
|
+
for index in collection.list_search_indexes(index_name):
|
|
100
|
+
if index["type"] == "vectorSearch" and index["status"] == "READY":
|
|
101
|
+
return True
|
|
102
|
+
return False
|
|
103
|
+
|
|
104
|
+
def _wait_for_index(self, collection: "Collection", index_name: str, action: str = "create"):
|
|
105
|
+
"""Waits for the index action to be completed. Otherwise throws a TimeoutError.
|
|
106
|
+
|
|
107
|
+
Timeout set on instantiation.
|
|
108
|
+
action: "create" or "delete"
|
|
109
|
+
"""
|
|
110
|
+
assert action in ["create", "delete"], f"{action=} must be create or delete."
|
|
111
|
+
start = monotonic()
|
|
112
|
+
while monotonic() - start < self._wait_until_index_ready:
|
|
113
|
+
if (action == "create" and self._is_index_ready(collection, index_name)) or (
|
|
114
|
+
action == "delete" and len(list(collection.list_search_indexes())) == 0
|
|
115
|
+
):
|
|
116
|
+
return
|
|
117
|
+
sleep(_DELAY)
|
|
118
|
+
|
|
119
|
+
raise TimeoutError(f"Index {self.index_name} is not ready!")
|
|
120
|
+
|
|
121
|
+
def _wait_for_document(self, collection: "Collection", index_name: str, doc: Document):
|
|
122
|
+
start = monotonic()
|
|
123
|
+
while monotonic() - start < self._wait_until_document_ready:
|
|
124
|
+
query_result = _vector_search(
|
|
125
|
+
embedding_vector=np.array(self.embedding_function(doc["content"])).tolist(),
|
|
126
|
+
n_results=1,
|
|
127
|
+
collection=collection,
|
|
128
|
+
index_name=index_name,
|
|
129
|
+
)
|
|
130
|
+
if query_result and query_result[0][0]["_id"] == doc["id"]:
|
|
131
|
+
return
|
|
132
|
+
sleep(_DELAY)
|
|
133
|
+
|
|
134
|
+
raise TimeoutError(f"Document {self.index_name} is not ready!")
|
|
135
|
+
|
|
136
|
+
def _get_embedding_size(self):
|
|
137
|
+
return len(self.embedding_function(_SAMPLE_SENTENCE)[0])
|
|
138
|
+
|
|
139
|
+
def list_collections(self):
|
|
140
|
+
"""List the collections in the vector database.
|
|
141
|
+
|
|
142
|
+
Returns:
|
|
143
|
+
List[str] | The list of collections.
|
|
144
|
+
"""
|
|
145
|
+
return self.db.list_collection_names()
|
|
146
|
+
|
|
147
|
+
def create_collection(
|
|
148
|
+
self,
|
|
149
|
+
collection_name: str,
|
|
150
|
+
overwrite: bool = False,
|
|
151
|
+
get_or_create: bool = True,
|
|
152
|
+
) -> "Collection":
|
|
153
|
+
"""Create a collection in the vector database and create a vector search index in the collection.
|
|
154
|
+
|
|
155
|
+
Args:
|
|
156
|
+
collection_name: str | The name of the collection.
|
|
157
|
+
overwrite: bool | Whether to overwrite the collection if it exists. Default is False.
|
|
158
|
+
get_or_create: bool | Whether to get or create the collection. Default is True
|
|
159
|
+
"""
|
|
160
|
+
if overwrite:
|
|
161
|
+
self.delete_collection(collection_name)
|
|
162
|
+
|
|
163
|
+
if collection_name not in self.db.list_collection_names():
|
|
164
|
+
# Create a new collection
|
|
165
|
+
coll = self.db.create_collection(collection_name)
|
|
166
|
+
self.create_index_if_not_exists(index_name=self.index_name, collection=coll)
|
|
167
|
+
return coll
|
|
168
|
+
|
|
169
|
+
if get_or_create:
|
|
170
|
+
# The collection already exists, return it.
|
|
171
|
+
coll = self.db[collection_name]
|
|
172
|
+
self.create_index_if_not_exists(index_name=self.index_name, collection=coll)
|
|
173
|
+
return coll
|
|
174
|
+
else:
|
|
175
|
+
# get_or_create is False and the collection already exists, raise an error.
|
|
176
|
+
raise ValueError(f"Collection {collection_name} already exists.")
|
|
177
|
+
|
|
178
|
+
def create_index_if_not_exists(self, index_name: str = "vector_index", collection: "Collection" = None) -> None:
|
|
179
|
+
"""Creates a vector search index on the specified collection in MongoDB.
|
|
180
|
+
|
|
181
|
+
Args:
|
|
182
|
+
index_name (str, optional): The name of the vector search index to create. Defaults to "vector_search_index".
|
|
183
|
+
collection (Collection, optional): The MongoDB collection to create the index on. Defaults to None.
|
|
184
|
+
"""
|
|
185
|
+
if not self._is_index_ready(collection, index_name):
|
|
186
|
+
self.create_vector_search_index(collection, index_name)
|
|
187
|
+
|
|
188
|
+
def get_collection(self, collection_name: str = None) -> "Collection":
|
|
189
|
+
"""Get the collection from the vector database.
|
|
190
|
+
|
|
191
|
+
Args:
|
|
192
|
+
collection_name: str | The name of the collection. Default is None. If None, return the
|
|
193
|
+
current active collection.
|
|
194
|
+
|
|
195
|
+
Returns:
|
|
196
|
+
Collection | The collection object.
|
|
197
|
+
"""
|
|
198
|
+
if collection_name is None:
|
|
199
|
+
if self.active_collection is None:
|
|
200
|
+
raise ValueError("No collection is specified.")
|
|
201
|
+
else:
|
|
202
|
+
logger.debug(
|
|
203
|
+
f"No collection is specified. Using current active collection {self.active_collection.name}."
|
|
204
|
+
)
|
|
205
|
+
else:
|
|
206
|
+
self.active_collection = self.db[collection_name]
|
|
207
|
+
|
|
208
|
+
return self.active_collection
|
|
209
|
+
|
|
210
|
+
def delete_collection(self, collection_name: str) -> None:
|
|
211
|
+
"""Delete the collection from the vector database.
|
|
212
|
+
|
|
213
|
+
Args:
|
|
214
|
+
collection_name: str | The name of the collection.
|
|
215
|
+
"""
|
|
216
|
+
for index in self.db[collection_name].list_search_indexes():
|
|
217
|
+
self.db[collection_name].drop_search_index(index["name"])
|
|
218
|
+
if self._wait_until_index_ready:
|
|
219
|
+
self._wait_for_index(self.db[collection_name], index["name"], "delete")
|
|
220
|
+
return self.db[collection_name].drop()
|
|
221
|
+
|
|
222
|
+
def create_vector_search_index(
|
|
223
|
+
self,
|
|
224
|
+
collection: "Collection",
|
|
225
|
+
index_name: str | None = "vector_index",
|
|
226
|
+
similarity: Literal["euclidean", "cosine", "dotProduct"] = "cosine",
|
|
227
|
+
) -> None:
|
|
228
|
+
"""Create a vector search index in the collection.
|
|
229
|
+
|
|
230
|
+
Args:
|
|
231
|
+
collection: An existing Collection in the Atlas Database.
|
|
232
|
+
index_name: Vector Search Index name.
|
|
233
|
+
similarity: Algorithm used for measuring vector similarity.
|
|
234
|
+
kwargs: Additional keyword arguments.
|
|
235
|
+
|
|
236
|
+
Returns:
|
|
237
|
+
None
|
|
238
|
+
"""
|
|
239
|
+
search_index_model = SearchIndexModel(
|
|
240
|
+
definition={
|
|
241
|
+
"fields": [
|
|
242
|
+
{
|
|
243
|
+
"type": "vector",
|
|
244
|
+
"numDimensions": self.dimensions,
|
|
245
|
+
"path": "embedding",
|
|
246
|
+
"similarity": similarity,
|
|
247
|
+
},
|
|
248
|
+
]
|
|
249
|
+
},
|
|
250
|
+
name=index_name,
|
|
251
|
+
type="vectorSearch",
|
|
252
|
+
)
|
|
253
|
+
# Create the search index
|
|
254
|
+
try:
|
|
255
|
+
collection.create_search_index(model=search_index_model)
|
|
256
|
+
if self._wait_until_index_ready:
|
|
257
|
+
self._wait_for_index(collection, index_name, "create")
|
|
258
|
+
logger.debug(f"Search index {index_name} created successfully.")
|
|
259
|
+
except Exception as e:
|
|
260
|
+
logger.error(
|
|
261
|
+
f"Error creating search index: {e}. \n"
|
|
262
|
+
f"Your client must be connected to an Atlas cluster. "
|
|
263
|
+
f"You may have to manually create a Collection and Search Index "
|
|
264
|
+
f"if you are on a free/shared cluster."
|
|
265
|
+
)
|
|
266
|
+
raise e
|
|
267
|
+
|
|
268
|
+
def insert_docs(
|
|
269
|
+
self,
|
|
270
|
+
docs: list[Document],
|
|
271
|
+
collection_name: str = None,
|
|
272
|
+
upsert: bool = False,
|
|
273
|
+
batch_size: int = DEFAULT_INSERT_BATCH_SIZE,
|
|
274
|
+
**kwargs: Any,
|
|
275
|
+
) -> None:
|
|
276
|
+
"""Insert Documents and Vector Embeddings into the collection of the vector database.
|
|
277
|
+
|
|
278
|
+
For large numbers of Documents, insertion is performed in batches.
|
|
279
|
+
|
|
280
|
+
Args:
|
|
281
|
+
docs: A list of documents. Each document is a TypedDict `Document`.
|
|
282
|
+
collection_name: The name of the collection. Default is None.
|
|
283
|
+
upsert: Whether to update the document if it exists. Default is False.
|
|
284
|
+
batch_size: Number of documents to be inserted in each batch
|
|
285
|
+
**kwargs: Additional keyword arguments.
|
|
286
|
+
"""
|
|
287
|
+
if not docs:
|
|
288
|
+
logger.info("No documents to insert.")
|
|
289
|
+
return
|
|
290
|
+
|
|
291
|
+
collection = self.get_collection(collection_name)
|
|
292
|
+
if upsert:
|
|
293
|
+
self.update_docs(docs, collection.name, upsert=True)
|
|
294
|
+
else:
|
|
295
|
+
# Sanity checking the first document
|
|
296
|
+
if docs[0].get("content") is None:
|
|
297
|
+
raise ValueError("The document content is required.")
|
|
298
|
+
if docs[0].get("id") is None:
|
|
299
|
+
raise ValueError("The document id is required.")
|
|
300
|
+
|
|
301
|
+
input_ids = set()
|
|
302
|
+
result_ids = set()
|
|
303
|
+
id_batch = []
|
|
304
|
+
text_batch = []
|
|
305
|
+
metadata_batch = []
|
|
306
|
+
size = 0
|
|
307
|
+
i = 0
|
|
308
|
+
for doc in docs:
|
|
309
|
+
id = doc["id"]
|
|
310
|
+
text = doc["content"]
|
|
311
|
+
metadata = doc.get("metadata", {})
|
|
312
|
+
id_batch.append(id)
|
|
313
|
+
text_batch.append(text)
|
|
314
|
+
metadata_batch.append(metadata)
|
|
315
|
+
id_size = 1 if isinstance(id, int) else len(id)
|
|
316
|
+
size += len(text) + len(metadata) + id_size
|
|
317
|
+
if (i + 1) % batch_size == 0 or size >= 47_000_000:
|
|
318
|
+
result_ids.update(self._insert_batch(collection, text_batch, metadata_batch, id_batch))
|
|
319
|
+
input_ids.update(id_batch)
|
|
320
|
+
id_batch = []
|
|
321
|
+
text_batch = []
|
|
322
|
+
metadata_batch = []
|
|
323
|
+
size = 0
|
|
324
|
+
i += 1 # noqa: SIM113
|
|
325
|
+
if text_batch:
|
|
326
|
+
result_ids.update(self._insert_batch(collection, text_batch, metadata_batch, id_batch))
|
|
327
|
+
input_ids.update(id_batch)
|
|
328
|
+
|
|
329
|
+
if result_ids != input_ids:
|
|
330
|
+
logger.warning(
|
|
331
|
+
"Possible data corruption. "
|
|
332
|
+
f"input_ids not in result_ids: {input_ids.difference(result_ids)}.\n"
|
|
333
|
+
f"result_ids not in input_ids: {result_ids.difference(input_ids)}"
|
|
334
|
+
)
|
|
335
|
+
if self._wait_until_document_ready and docs:
|
|
336
|
+
self._wait_for_document(collection, self.index_name, docs[-1])
|
|
337
|
+
|
|
338
|
+
def _insert_batch(
|
|
339
|
+
self, collection: "Collection", texts: list[str], metadatas: list[Mapping[str, Any]], ids: list[ItemID]
|
|
340
|
+
) -> set[ItemID]:
|
|
341
|
+
"""Compute embeddings for and insert a batch of Documents into the Collection.
|
|
342
|
+
|
|
343
|
+
For performance reasons, we chose to call self.embedding_function just once,
|
|
344
|
+
with the hopefully small tradeoff of having recreating Document dicts.
|
|
345
|
+
|
|
346
|
+
Args:
|
|
347
|
+
collection: MongoDB Collection
|
|
348
|
+
texts: List of the main contents of each document
|
|
349
|
+
metadatas: List of metadata mappings
|
|
350
|
+
ids: List of ids. Note that these are stored as _id in Collection.
|
|
351
|
+
|
|
352
|
+
Returns:
|
|
353
|
+
List of ids inserted.
|
|
354
|
+
"""
|
|
355
|
+
n_texts = len(texts)
|
|
356
|
+
if n_texts == 0:
|
|
357
|
+
return []
|
|
358
|
+
# Embed and create the documents
|
|
359
|
+
embeddings = self.embedding_function(texts).tolist()
|
|
360
|
+
assert len(embeddings) == n_texts, (
|
|
361
|
+
f"The number of embeddings produced by self.embedding_function ({len(embeddings)} does not match the number of texts provided to it ({n_texts})."
|
|
362
|
+
)
|
|
363
|
+
to_insert = [
|
|
364
|
+
{"_id": i, "content": t, "metadata": m, "embedding": e}
|
|
365
|
+
for i, t, m, e in zip(ids, texts, metadatas, embeddings)
|
|
366
|
+
]
|
|
367
|
+
# insert the documents in MongoDB Atlas
|
|
368
|
+
insert_result = collection.insert_many(to_insert) # type: ignore[union-attr]
|
|
369
|
+
return insert_result.inserted_ids # TODO Remove this. Replace by log like update_docs
|
|
370
|
+
|
|
371
|
+
def update_docs(self, docs: list[Document], collection_name: str = None, **kwargs: Any) -> None:
|
|
372
|
+
"""Update documents, including their embeddings, in the Collection.
|
|
373
|
+
|
|
374
|
+
Optionally allow upsert as kwarg.
|
|
375
|
+
|
|
376
|
+
Uses deepcopy to avoid changing docs.
|
|
377
|
+
|
|
378
|
+
Args:
|
|
379
|
+
docs: List[Document] | A list of documents.
|
|
380
|
+
collection_name: str | The name of the collection. Default is None.
|
|
381
|
+
kwargs: Any | Use upsert=True` to insert documents whose ids are not present in collection.
|
|
382
|
+
"""
|
|
383
|
+
n_docs = len(docs)
|
|
384
|
+
logger.info(f"Preparing to embed and update {n_docs=}")
|
|
385
|
+
# Compute the embeddings
|
|
386
|
+
embeddings: list[list[float]] = self.embedding_function([doc["content"] for doc in docs]).tolist()
|
|
387
|
+
# Prepare the updates
|
|
388
|
+
all_updates = []
|
|
389
|
+
for i in range(n_docs):
|
|
390
|
+
doc = deepcopy(docs[i])
|
|
391
|
+
doc["embedding"] = embeddings[i]
|
|
392
|
+
doc["_id"] = doc.pop("id")
|
|
393
|
+
|
|
394
|
+
all_updates.append(UpdateOne({"_id": doc["_id"]}, {"$set": doc}, upsert=kwargs.get("upsert", False)))
|
|
395
|
+
# Perform update in bulk
|
|
396
|
+
collection = self.get_collection(collection_name)
|
|
397
|
+
result = collection.bulk_write(all_updates)
|
|
398
|
+
|
|
399
|
+
if self._wait_until_document_ready and docs:
|
|
400
|
+
self._wait_for_document(collection, self.index_name, docs[-1])
|
|
401
|
+
|
|
402
|
+
# Log a result summary
|
|
403
|
+
logger.info(
|
|
404
|
+
"Matched: %s, Modified: %s, Upserted: %s",
|
|
405
|
+
result.matched_count,
|
|
406
|
+
result.modified_count,
|
|
407
|
+
result.upserted_count,
|
|
408
|
+
)
|
|
409
|
+
|
|
410
|
+
def delete_docs(self, ids: list[ItemID], collection_name: str = None, **kwargs):
|
|
411
|
+
"""Delete documents from the collection of the vector database.
|
|
412
|
+
|
|
413
|
+
Args:
|
|
414
|
+
ids: A list of document ids. Each id is a typed `ItemID`.
|
|
415
|
+
collection_name: The name of the collection. Default is None.
|
|
416
|
+
**kwargs: Additional keyword arguments.
|
|
417
|
+
"""
|
|
418
|
+
collection = self.get_collection(collection_name)
|
|
419
|
+
return collection.delete_many({"_id": {"$in": ids}})
|
|
420
|
+
|
|
421
|
+
def get_docs_by_ids(
|
|
422
|
+
self, ids: list[ItemID] = None, collection_name: str = None, include: list[str] = None, **kwargs
|
|
423
|
+
) -> list[Document]:
|
|
424
|
+
"""Retrieve documents from the collection of the vector database based on the ids.
|
|
425
|
+
|
|
426
|
+
Args:
|
|
427
|
+
ids: List[ItemID] | A list of document ids. If None, will return all the documents. Default is None.
|
|
428
|
+
collection_name: str | The name of the collection. Default is None.
|
|
429
|
+
include: List[str] | The fields to include.
|
|
430
|
+
If None, will include ["metadata", "content"], ids will always be included.
|
|
431
|
+
Basically, use include to choose whether to include embedding and metadata
|
|
432
|
+
kwargs: dict | Additional keyword arguments.
|
|
433
|
+
|
|
434
|
+
Returns:
|
|
435
|
+
List[Document] | The results.
|
|
436
|
+
"""
|
|
437
|
+
if include is None:
|
|
438
|
+
include_fields = {"_id": 1, "content": 1, "metadata": 1}
|
|
439
|
+
else:
|
|
440
|
+
include_fields = dict.fromkeys(set(include).union({"_id"}), 1)
|
|
441
|
+
collection = self.get_collection(collection_name)
|
|
442
|
+
if ids is not None:
|
|
443
|
+
docs = collection.find({"_id": {"$in": ids}}, include_fields)
|
|
444
|
+
# Return with _id field from Collection into id for Document
|
|
445
|
+
return with_id_rename(docs)
|
|
446
|
+
else:
|
|
447
|
+
docs = collection.find({}, include_fields)
|
|
448
|
+
# Return with _id field from Collection into id for Document
|
|
449
|
+
return with_id_rename(docs)
|
|
450
|
+
|
|
451
|
+
def retrieve_docs(
|
|
452
|
+
self,
|
|
453
|
+
queries: list[str],
|
|
454
|
+
collection_name: str = None,
|
|
455
|
+
n_results: int = 10,
|
|
456
|
+
distance_threshold: float = -1,
|
|
457
|
+
**kwargs: Any,
|
|
458
|
+
) -> QueryResults:
|
|
459
|
+
"""Retrieve documents from the collection of the vector database based on the queries.
|
|
460
|
+
|
|
461
|
+
Args:
|
|
462
|
+
queries: List[str] | A list of queries. Each query is a string.
|
|
463
|
+
collection_name: str | The name of the collection. Default is None.
|
|
464
|
+
n_results: int | The number of relevant documents to return. Default is 10.
|
|
465
|
+
distance_threshold: float | The threshold for the distance score, only distance smaller than it will be
|
|
466
|
+
returned. Don't filter with it if < 0. Default is -1.
|
|
467
|
+
kwargs: Dict | Additional keyword arguments. Ones of importance follow:
|
|
468
|
+
oversampling_factor: int | This times n_results is 'ef' in the HNSW algorithm.
|
|
469
|
+
It determines the number of nearest neighbor candidates to consider during the search phase.
|
|
470
|
+
A higher value leads to more accuracy, but is slower. Default is 10
|
|
471
|
+
|
|
472
|
+
Returns:
|
|
473
|
+
QueryResults | For each query string, a list of nearest documents and their scores.
|
|
474
|
+
"""
|
|
475
|
+
collection = self.get_collection(collection_name)
|
|
476
|
+
# Trivial case of an empty collection
|
|
477
|
+
if collection.count_documents({}) == 0:
|
|
478
|
+
return []
|
|
479
|
+
|
|
480
|
+
logger.debug(f"Using index: {self.index_name}")
|
|
481
|
+
results = []
|
|
482
|
+
for query_text in queries:
|
|
483
|
+
# Compute embedding vector from semantic query
|
|
484
|
+
logger.debug(f"Query: {query_text}")
|
|
485
|
+
query_vector = np.array(self.embedding_function([query_text])).tolist()[0]
|
|
486
|
+
# Find documents with similar vectors using the specified index
|
|
487
|
+
query_result = _vector_search(
|
|
488
|
+
query_vector,
|
|
489
|
+
n_results,
|
|
490
|
+
collection,
|
|
491
|
+
self.index_name,
|
|
492
|
+
distance_threshold,
|
|
493
|
+
**kwargs,
|
|
494
|
+
oversampling_factor=kwargs.get("oversampling_factor", 10),
|
|
495
|
+
)
|
|
496
|
+
# Change each _id key to id. with_id_rename, but with (doc, score) tuples
|
|
497
|
+
results.append([
|
|
498
|
+
({**{k: v for k, v in d[0].items() if k != "_id"}, "id": d[0]["_id"]}, d[1]) for d in query_result
|
|
499
|
+
])
|
|
500
|
+
return results
|
|
501
|
+
|
|
502
|
+
|
|
503
|
+
def _vector_search(
|
|
504
|
+
embedding_vector: list[float],
|
|
505
|
+
n_results: int,
|
|
506
|
+
collection: "Collection",
|
|
507
|
+
index_name: str,
|
|
508
|
+
distance_threshold: float = -1.0,
|
|
509
|
+
oversampling_factor=10,
|
|
510
|
+
include_embedding=False,
|
|
511
|
+
) -> list[tuple[dict[str, Any], float]]:
|
|
512
|
+
"""Core $vectorSearch Aggregation pipeline.
|
|
513
|
+
|
|
514
|
+
Args:
|
|
515
|
+
embedding_vector: Embedding vector of semantic query
|
|
516
|
+
n_results: Number of documents to return. Defaults to 4.
|
|
517
|
+
collection: MongoDB Collection with vector index
|
|
518
|
+
index_name: Name of the vector index
|
|
519
|
+
distance_threshold: Only distance measures smaller than this will be returned.
|
|
520
|
+
Don't filter with it if 1 < x < 0. Default is -1.
|
|
521
|
+
oversampling_factor: This times n_results is 'ef' in the HNSW algorithm.
|
|
522
|
+
It determines the number of nearest neighbor candidates to consider during the search phase.
|
|
523
|
+
A higher value leads to more accuracy, but is slower. Default = 10
|
|
524
|
+
include_embedding: Whether to include the embedding in the results. Default is False.
|
|
525
|
+
|
|
526
|
+
Returns:
|
|
527
|
+
List of tuples of length n_results from Collection.
|
|
528
|
+
Each tuple contains a document dict and a score.
|
|
529
|
+
"""
|
|
530
|
+
pipeline = [
|
|
531
|
+
{
|
|
532
|
+
"$vectorSearch": {
|
|
533
|
+
"index": index_name,
|
|
534
|
+
"limit": n_results,
|
|
535
|
+
"numCandidates": n_results * oversampling_factor,
|
|
536
|
+
"queryVector": embedding_vector,
|
|
537
|
+
"path": "embedding",
|
|
538
|
+
}
|
|
539
|
+
},
|
|
540
|
+
{"$set": {"score": {"$meta": "vectorSearchScore"}}},
|
|
541
|
+
]
|
|
542
|
+
if distance_threshold >= 0.0:
|
|
543
|
+
similarity_threshold = 1.0 - distance_threshold
|
|
544
|
+
pipeline.append({"$match": {"score": {"$gte": similarity_threshold}}})
|
|
545
|
+
|
|
546
|
+
if not include_embedding:
|
|
547
|
+
pipeline.append({"$project": {"embedding": 0}})
|
|
548
|
+
|
|
549
|
+
logger.debug("pipeline: %s", pipeline)
|
|
550
|
+
agg = collection.aggregate(pipeline)
|
|
551
|
+
return [(doc, doc.pop("score")) for doc in agg]
|