ag2 0.9.1a1__py3-none-any.whl → 0.9.1.post0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ag2 might be problematic. Click here for more details.
- {ag2-0.9.1a1.dist-info → ag2-0.9.1.post0.dist-info}/METADATA +264 -73
- ag2-0.9.1.post0.dist-info/RECORD +392 -0
- {ag2-0.9.1a1.dist-info → ag2-0.9.1.post0.dist-info}/WHEEL +1 -2
- autogen/__init__.py +89 -0
- autogen/_website/__init__.py +3 -0
- autogen/_website/generate_api_references.py +427 -0
- autogen/_website/generate_mkdocs.py +1174 -0
- autogen/_website/notebook_processor.py +476 -0
- autogen/_website/process_notebooks.py +656 -0
- autogen/_website/utils.py +412 -0
- autogen/agentchat/__init__.py +44 -0
- autogen/agentchat/agent.py +182 -0
- autogen/agentchat/assistant_agent.py +85 -0
- autogen/agentchat/chat.py +309 -0
- autogen/agentchat/contrib/__init__.py +5 -0
- autogen/agentchat/contrib/agent_eval/README.md +7 -0
- autogen/agentchat/contrib/agent_eval/agent_eval.py +108 -0
- autogen/agentchat/contrib/agent_eval/criterion.py +43 -0
- autogen/agentchat/contrib/agent_eval/critic_agent.py +44 -0
- autogen/agentchat/contrib/agent_eval/quantifier_agent.py +39 -0
- autogen/agentchat/contrib/agent_eval/subcritic_agent.py +45 -0
- autogen/agentchat/contrib/agent_eval/task.py +42 -0
- autogen/agentchat/contrib/agent_optimizer.py +429 -0
- autogen/agentchat/contrib/capabilities/__init__.py +5 -0
- autogen/agentchat/contrib/capabilities/agent_capability.py +20 -0
- autogen/agentchat/contrib/capabilities/generate_images.py +301 -0
- autogen/agentchat/contrib/capabilities/teachability.py +393 -0
- autogen/agentchat/contrib/capabilities/text_compressors.py +66 -0
- autogen/agentchat/contrib/capabilities/tools_capability.py +22 -0
- autogen/agentchat/contrib/capabilities/transform_messages.py +93 -0
- autogen/agentchat/contrib/capabilities/transforms.py +566 -0
- autogen/agentchat/contrib/capabilities/transforms_util.py +122 -0
- autogen/agentchat/contrib/capabilities/vision_capability.py +214 -0
- autogen/agentchat/contrib/captainagent/__init__.py +9 -0
- autogen/agentchat/contrib/captainagent/agent_builder.py +790 -0
- autogen/agentchat/contrib/captainagent/captainagent.py +512 -0
- autogen/agentchat/contrib/captainagent/tool_retriever.py +335 -0
- autogen/agentchat/contrib/captainagent/tools/README.md +44 -0
- autogen/agentchat/contrib/captainagent/tools/__init__.py +5 -0
- autogen/agentchat/contrib/captainagent/tools/data_analysis/calculate_correlation.py +40 -0
- autogen/agentchat/contrib/captainagent/tools/data_analysis/calculate_skewness_and_kurtosis.py +28 -0
- autogen/agentchat/contrib/captainagent/tools/data_analysis/detect_outlier_iqr.py +28 -0
- autogen/agentchat/contrib/captainagent/tools/data_analysis/detect_outlier_zscore.py +28 -0
- autogen/agentchat/contrib/captainagent/tools/data_analysis/explore_csv.py +21 -0
- autogen/agentchat/contrib/captainagent/tools/data_analysis/shapiro_wilk_test.py +30 -0
- autogen/agentchat/contrib/captainagent/tools/information_retrieval/arxiv_download.py +27 -0
- autogen/agentchat/contrib/captainagent/tools/information_retrieval/arxiv_search.py +53 -0
- autogen/agentchat/contrib/captainagent/tools/information_retrieval/extract_pdf_image.py +53 -0
- autogen/agentchat/contrib/captainagent/tools/information_retrieval/extract_pdf_text.py +38 -0
- autogen/agentchat/contrib/captainagent/tools/information_retrieval/get_wikipedia_text.py +21 -0
- autogen/agentchat/contrib/captainagent/tools/information_retrieval/get_youtube_caption.py +34 -0
- autogen/agentchat/contrib/captainagent/tools/information_retrieval/image_qa.py +60 -0
- autogen/agentchat/contrib/captainagent/tools/information_retrieval/optical_character_recognition.py +61 -0
- autogen/agentchat/contrib/captainagent/tools/information_retrieval/perform_web_search.py +47 -0
- autogen/agentchat/contrib/captainagent/tools/information_retrieval/scrape_wikipedia_tables.py +33 -0
- autogen/agentchat/contrib/captainagent/tools/information_retrieval/transcribe_audio_file.py +21 -0
- autogen/agentchat/contrib/captainagent/tools/information_retrieval/youtube_download.py +35 -0
- autogen/agentchat/contrib/captainagent/tools/math/calculate_circle_area_from_diameter.py +21 -0
- autogen/agentchat/contrib/captainagent/tools/math/calculate_day_of_the_week.py +18 -0
- autogen/agentchat/contrib/captainagent/tools/math/calculate_fraction_sum.py +28 -0
- autogen/agentchat/contrib/captainagent/tools/math/calculate_matrix_power.py +31 -0
- autogen/agentchat/contrib/captainagent/tools/math/calculate_reflected_point.py +16 -0
- autogen/agentchat/contrib/captainagent/tools/math/complex_numbers_product.py +25 -0
- autogen/agentchat/contrib/captainagent/tools/math/compute_currency_conversion.py +23 -0
- autogen/agentchat/contrib/captainagent/tools/math/count_distinct_permutations.py +27 -0
- autogen/agentchat/contrib/captainagent/tools/math/evaluate_expression.py +28 -0
- autogen/agentchat/contrib/captainagent/tools/math/find_continuity_point.py +34 -0
- autogen/agentchat/contrib/captainagent/tools/math/fraction_to_mixed_numbers.py +39 -0
- autogen/agentchat/contrib/captainagent/tools/math/modular_inverse_sum.py +23 -0
- autogen/agentchat/contrib/captainagent/tools/math/simplify_mixed_numbers.py +36 -0
- autogen/agentchat/contrib/captainagent/tools/math/sum_of_digit_factorials.py +15 -0
- autogen/agentchat/contrib/captainagent/tools/math/sum_of_primes_below.py +15 -0
- autogen/agentchat/contrib/captainagent/tools/requirements.txt +10 -0
- autogen/agentchat/contrib/captainagent/tools/tool_description.tsv +34 -0
- autogen/agentchat/contrib/gpt_assistant_agent.py +526 -0
- autogen/agentchat/contrib/graph_rag/__init__.py +9 -0
- autogen/agentchat/contrib/graph_rag/document.py +29 -0
- autogen/agentchat/contrib/graph_rag/falkor_graph_query_engine.py +170 -0
- autogen/agentchat/contrib/graph_rag/falkor_graph_rag_capability.py +103 -0
- autogen/agentchat/contrib/graph_rag/graph_query_engine.py +53 -0
- autogen/agentchat/contrib/graph_rag/graph_rag_capability.py +63 -0
- autogen/agentchat/contrib/graph_rag/neo4j_graph_query_engine.py +268 -0
- autogen/agentchat/contrib/graph_rag/neo4j_graph_rag_capability.py +83 -0
- autogen/agentchat/contrib/graph_rag/neo4j_native_graph_query_engine.py +210 -0
- autogen/agentchat/contrib/graph_rag/neo4j_native_graph_rag_capability.py +93 -0
- autogen/agentchat/contrib/img_utils.py +397 -0
- autogen/agentchat/contrib/llamaindex_conversable_agent.py +117 -0
- autogen/agentchat/contrib/llava_agent.py +187 -0
- autogen/agentchat/contrib/math_user_proxy_agent.py +464 -0
- autogen/agentchat/contrib/multimodal_conversable_agent.py +125 -0
- autogen/agentchat/contrib/qdrant_retrieve_user_proxy_agent.py +324 -0
- autogen/agentchat/contrib/rag/__init__.py +10 -0
- autogen/agentchat/contrib/rag/chromadb_query_engine.py +272 -0
- autogen/agentchat/contrib/rag/llamaindex_query_engine.py +198 -0
- autogen/agentchat/contrib/rag/mongodb_query_engine.py +329 -0
- autogen/agentchat/contrib/rag/query_engine.py +74 -0
- autogen/agentchat/contrib/retrieve_assistant_agent.py +56 -0
- autogen/agentchat/contrib/retrieve_user_proxy_agent.py +703 -0
- autogen/agentchat/contrib/society_of_mind_agent.py +199 -0
- autogen/agentchat/contrib/swarm_agent.py +1425 -0
- autogen/agentchat/contrib/text_analyzer_agent.py +79 -0
- autogen/agentchat/contrib/vectordb/__init__.py +5 -0
- autogen/agentchat/contrib/vectordb/base.py +232 -0
- autogen/agentchat/contrib/vectordb/chromadb.py +315 -0
- autogen/agentchat/contrib/vectordb/couchbase.py +407 -0
- autogen/agentchat/contrib/vectordb/mongodb.py +550 -0
- autogen/agentchat/contrib/vectordb/pgvectordb.py +928 -0
- autogen/agentchat/contrib/vectordb/qdrant.py +320 -0
- autogen/agentchat/contrib/vectordb/utils.py +126 -0
- autogen/agentchat/contrib/web_surfer.py +303 -0
- autogen/agentchat/conversable_agent.py +4020 -0
- autogen/agentchat/group/__init__.py +64 -0
- autogen/agentchat/group/available_condition.py +91 -0
- autogen/agentchat/group/context_condition.py +77 -0
- autogen/agentchat/group/context_expression.py +238 -0
- autogen/agentchat/group/context_str.py +41 -0
- autogen/agentchat/group/context_variables.py +192 -0
- autogen/agentchat/group/group_tool_executor.py +202 -0
- autogen/agentchat/group/group_utils.py +591 -0
- autogen/agentchat/group/handoffs.py +244 -0
- autogen/agentchat/group/llm_condition.py +93 -0
- autogen/agentchat/group/multi_agent_chat.py +237 -0
- autogen/agentchat/group/on_condition.py +58 -0
- autogen/agentchat/group/on_context_condition.py +54 -0
- autogen/agentchat/group/patterns/__init__.py +18 -0
- autogen/agentchat/group/patterns/auto.py +159 -0
- autogen/agentchat/group/patterns/manual.py +176 -0
- autogen/agentchat/group/patterns/pattern.py +288 -0
- autogen/agentchat/group/patterns/random.py +106 -0
- autogen/agentchat/group/patterns/round_robin.py +117 -0
- autogen/agentchat/group/reply_result.py +26 -0
- autogen/agentchat/group/speaker_selection_result.py +41 -0
- autogen/agentchat/group/targets/__init__.py +4 -0
- autogen/agentchat/group/targets/group_chat_target.py +132 -0
- autogen/agentchat/group/targets/group_manager_target.py +151 -0
- autogen/agentchat/group/targets/transition_target.py +413 -0
- autogen/agentchat/group/targets/transition_utils.py +6 -0
- autogen/agentchat/groupchat.py +1694 -0
- autogen/agentchat/realtime/__init__.py +3 -0
- autogen/agentchat/realtime/experimental/__init__.py +20 -0
- autogen/agentchat/realtime/experimental/audio_adapters/__init__.py +8 -0
- autogen/agentchat/realtime/experimental/audio_adapters/twilio_audio_adapter.py +148 -0
- autogen/agentchat/realtime/experimental/audio_adapters/websocket_audio_adapter.py +139 -0
- autogen/agentchat/realtime/experimental/audio_observer.py +42 -0
- autogen/agentchat/realtime/experimental/clients/__init__.py +15 -0
- autogen/agentchat/realtime/experimental/clients/gemini/__init__.py +7 -0
- autogen/agentchat/realtime/experimental/clients/gemini/client.py +274 -0
- autogen/agentchat/realtime/experimental/clients/oai/__init__.py +8 -0
- autogen/agentchat/realtime/experimental/clients/oai/base_client.py +220 -0
- autogen/agentchat/realtime/experimental/clients/oai/rtc_client.py +243 -0
- autogen/agentchat/realtime/experimental/clients/oai/utils.py +48 -0
- autogen/agentchat/realtime/experimental/clients/realtime_client.py +190 -0
- autogen/agentchat/realtime/experimental/function_observer.py +85 -0
- autogen/agentchat/realtime/experimental/realtime_agent.py +158 -0
- autogen/agentchat/realtime/experimental/realtime_events.py +42 -0
- autogen/agentchat/realtime/experimental/realtime_observer.py +100 -0
- autogen/agentchat/realtime/experimental/realtime_swarm.py +475 -0
- autogen/agentchat/realtime/experimental/websockets.py +21 -0
- autogen/agentchat/realtime_agent/__init__.py +21 -0
- autogen/agentchat/user_proxy_agent.py +111 -0
- autogen/agentchat/utils.py +206 -0
- autogen/agents/__init__.py +3 -0
- autogen/agents/contrib/__init__.py +10 -0
- autogen/agents/contrib/time/__init__.py +8 -0
- autogen/agents/contrib/time/time_reply_agent.py +73 -0
- autogen/agents/contrib/time/time_tool_agent.py +51 -0
- autogen/agents/experimental/__init__.py +27 -0
- autogen/agents/experimental/deep_research/__init__.py +7 -0
- autogen/agents/experimental/deep_research/deep_research.py +52 -0
- autogen/agents/experimental/discord/__init__.py +7 -0
- autogen/agents/experimental/discord/discord.py +66 -0
- autogen/agents/experimental/document_agent/__init__.py +19 -0
- autogen/agents/experimental/document_agent/chroma_query_engine.py +316 -0
- autogen/agents/experimental/document_agent/docling_doc_ingest_agent.py +118 -0
- autogen/agents/experimental/document_agent/document_agent.py +461 -0
- autogen/agents/experimental/document_agent/document_conditions.py +50 -0
- autogen/agents/experimental/document_agent/document_utils.py +380 -0
- autogen/agents/experimental/document_agent/inmemory_query_engine.py +220 -0
- autogen/agents/experimental/document_agent/parser_utils.py +130 -0
- autogen/agents/experimental/document_agent/url_utils.py +426 -0
- autogen/agents/experimental/reasoning/__init__.py +7 -0
- autogen/agents/experimental/reasoning/reasoning_agent.py +1178 -0
- autogen/agents/experimental/slack/__init__.py +7 -0
- autogen/agents/experimental/slack/slack.py +73 -0
- autogen/agents/experimental/telegram/__init__.py +7 -0
- autogen/agents/experimental/telegram/telegram.py +77 -0
- autogen/agents/experimental/websurfer/__init__.py +7 -0
- autogen/agents/experimental/websurfer/websurfer.py +62 -0
- autogen/agents/experimental/wikipedia/__init__.py +7 -0
- autogen/agents/experimental/wikipedia/wikipedia.py +90 -0
- autogen/browser_utils.py +309 -0
- autogen/cache/__init__.py +10 -0
- autogen/cache/abstract_cache_base.py +75 -0
- autogen/cache/cache.py +203 -0
- autogen/cache/cache_factory.py +88 -0
- autogen/cache/cosmos_db_cache.py +144 -0
- autogen/cache/disk_cache.py +102 -0
- autogen/cache/in_memory_cache.py +58 -0
- autogen/cache/redis_cache.py +123 -0
- autogen/code_utils.py +596 -0
- autogen/coding/__init__.py +22 -0
- autogen/coding/base.py +119 -0
- autogen/coding/docker_commandline_code_executor.py +268 -0
- autogen/coding/factory.py +47 -0
- autogen/coding/func_with_reqs.py +202 -0
- autogen/coding/jupyter/__init__.py +23 -0
- autogen/coding/jupyter/base.py +36 -0
- autogen/coding/jupyter/docker_jupyter_server.py +167 -0
- autogen/coding/jupyter/embedded_ipython_code_executor.py +182 -0
- autogen/coding/jupyter/import_utils.py +82 -0
- autogen/coding/jupyter/jupyter_client.py +231 -0
- autogen/coding/jupyter/jupyter_code_executor.py +160 -0
- autogen/coding/jupyter/local_jupyter_server.py +172 -0
- autogen/coding/local_commandline_code_executor.py +405 -0
- autogen/coding/markdown_code_extractor.py +45 -0
- autogen/coding/utils.py +56 -0
- autogen/doc_utils.py +34 -0
- autogen/events/__init__.py +7 -0
- autogen/events/agent_events.py +1010 -0
- autogen/events/base_event.py +99 -0
- autogen/events/client_events.py +167 -0
- autogen/events/helpers.py +36 -0
- autogen/events/print_event.py +46 -0
- autogen/exception_utils.py +73 -0
- autogen/extensions/__init__.py +5 -0
- autogen/fast_depends/__init__.py +16 -0
- autogen/fast_depends/_compat.py +80 -0
- autogen/fast_depends/core/__init__.py +14 -0
- autogen/fast_depends/core/build.py +225 -0
- autogen/fast_depends/core/model.py +576 -0
- autogen/fast_depends/dependencies/__init__.py +15 -0
- autogen/fast_depends/dependencies/model.py +29 -0
- autogen/fast_depends/dependencies/provider.py +39 -0
- autogen/fast_depends/library/__init__.py +10 -0
- autogen/fast_depends/library/model.py +46 -0
- autogen/fast_depends/py.typed +6 -0
- autogen/fast_depends/schema.py +66 -0
- autogen/fast_depends/use.py +280 -0
- autogen/fast_depends/utils.py +187 -0
- autogen/formatting_utils.py +83 -0
- autogen/function_utils.py +13 -0
- autogen/graph_utils.py +178 -0
- autogen/import_utils.py +526 -0
- autogen/interop/__init__.py +22 -0
- autogen/interop/crewai/__init__.py +7 -0
- autogen/interop/crewai/crewai.py +88 -0
- autogen/interop/interoperability.py +71 -0
- autogen/interop/interoperable.py +46 -0
- autogen/interop/langchain/__init__.py +8 -0
- autogen/interop/langchain/langchain_chat_model_factory.py +155 -0
- autogen/interop/langchain/langchain_tool.py +82 -0
- autogen/interop/litellm/__init__.py +7 -0
- autogen/interop/litellm/litellm_config_factory.py +113 -0
- autogen/interop/pydantic_ai/__init__.py +7 -0
- autogen/interop/pydantic_ai/pydantic_ai.py +168 -0
- autogen/interop/registry.py +69 -0
- autogen/io/__init__.py +15 -0
- autogen/io/base.py +151 -0
- autogen/io/console.py +56 -0
- autogen/io/processors/__init__.py +12 -0
- autogen/io/processors/base.py +21 -0
- autogen/io/processors/console_event_processor.py +56 -0
- autogen/io/run_response.py +293 -0
- autogen/io/thread_io_stream.py +63 -0
- autogen/io/websockets.py +213 -0
- autogen/json_utils.py +43 -0
- autogen/llm_config.py +379 -0
- autogen/logger/__init__.py +11 -0
- autogen/logger/base_logger.py +128 -0
- autogen/logger/file_logger.py +261 -0
- autogen/logger/logger_factory.py +42 -0
- autogen/logger/logger_utils.py +57 -0
- autogen/logger/sqlite_logger.py +523 -0
- autogen/math_utils.py +339 -0
- autogen/mcp/__init__.py +7 -0
- autogen/mcp/mcp_client.py +208 -0
- autogen/messages/__init__.py +7 -0
- autogen/messages/agent_messages.py +948 -0
- autogen/messages/base_message.py +107 -0
- autogen/messages/client_messages.py +171 -0
- autogen/messages/print_message.py +49 -0
- autogen/oai/__init__.py +53 -0
- autogen/oai/anthropic.py +714 -0
- autogen/oai/bedrock.py +628 -0
- autogen/oai/cerebras.py +299 -0
- autogen/oai/client.py +1435 -0
- autogen/oai/client_utils.py +169 -0
- autogen/oai/cohere.py +479 -0
- autogen/oai/gemini.py +990 -0
- autogen/oai/gemini_types.py +129 -0
- autogen/oai/groq.py +305 -0
- autogen/oai/mistral.py +303 -0
- autogen/oai/oai_models/__init__.py +11 -0
- autogen/oai/oai_models/_models.py +16 -0
- autogen/oai/oai_models/chat_completion.py +87 -0
- autogen/oai/oai_models/chat_completion_audio.py +32 -0
- autogen/oai/oai_models/chat_completion_message.py +86 -0
- autogen/oai/oai_models/chat_completion_message_tool_call.py +37 -0
- autogen/oai/oai_models/chat_completion_token_logprob.py +63 -0
- autogen/oai/oai_models/completion_usage.py +60 -0
- autogen/oai/ollama.py +643 -0
- autogen/oai/openai_utils.py +881 -0
- autogen/oai/together.py +370 -0
- autogen/retrieve_utils.py +491 -0
- autogen/runtime_logging.py +160 -0
- autogen/token_count_utils.py +267 -0
- autogen/tools/__init__.py +20 -0
- autogen/tools/contrib/__init__.py +9 -0
- autogen/tools/contrib/time/__init__.py +7 -0
- autogen/tools/contrib/time/time.py +41 -0
- autogen/tools/dependency_injection.py +254 -0
- autogen/tools/experimental/__init__.py +43 -0
- autogen/tools/experimental/browser_use/__init__.py +7 -0
- autogen/tools/experimental/browser_use/browser_use.py +161 -0
- autogen/tools/experimental/crawl4ai/__init__.py +7 -0
- autogen/tools/experimental/crawl4ai/crawl4ai.py +153 -0
- autogen/tools/experimental/deep_research/__init__.py +7 -0
- autogen/tools/experimental/deep_research/deep_research.py +328 -0
- autogen/tools/experimental/duckduckgo/__init__.py +7 -0
- autogen/tools/experimental/duckduckgo/duckduckgo_search.py +109 -0
- autogen/tools/experimental/google/__init__.py +14 -0
- autogen/tools/experimental/google/authentication/__init__.py +11 -0
- autogen/tools/experimental/google/authentication/credentials_hosted_provider.py +43 -0
- autogen/tools/experimental/google/authentication/credentials_local_provider.py +91 -0
- autogen/tools/experimental/google/authentication/credentials_provider.py +35 -0
- autogen/tools/experimental/google/drive/__init__.py +9 -0
- autogen/tools/experimental/google/drive/drive_functions.py +124 -0
- autogen/tools/experimental/google/drive/toolkit.py +88 -0
- autogen/tools/experimental/google/model.py +17 -0
- autogen/tools/experimental/google/toolkit_protocol.py +19 -0
- autogen/tools/experimental/google_search/__init__.py +8 -0
- autogen/tools/experimental/google_search/google_search.py +93 -0
- autogen/tools/experimental/google_search/youtube_search.py +181 -0
- autogen/tools/experimental/messageplatform/__init__.py +17 -0
- autogen/tools/experimental/messageplatform/discord/__init__.py +7 -0
- autogen/tools/experimental/messageplatform/discord/discord.py +288 -0
- autogen/tools/experimental/messageplatform/slack/__init__.py +7 -0
- autogen/tools/experimental/messageplatform/slack/slack.py +391 -0
- autogen/tools/experimental/messageplatform/telegram/__init__.py +7 -0
- autogen/tools/experimental/messageplatform/telegram/telegram.py +275 -0
- autogen/tools/experimental/perplexity/__init__.py +7 -0
- autogen/tools/experimental/perplexity/perplexity_search.py +260 -0
- autogen/tools/experimental/tavily/__init__.py +7 -0
- autogen/tools/experimental/tavily/tavily_search.py +183 -0
- autogen/tools/experimental/web_search_preview/__init__.py +7 -0
- autogen/tools/experimental/web_search_preview/web_search_preview.py +114 -0
- autogen/tools/experimental/wikipedia/__init__.py +7 -0
- autogen/tools/experimental/wikipedia/wikipedia.py +287 -0
- autogen/tools/function_utils.py +411 -0
- autogen/tools/tool.py +187 -0
- autogen/tools/toolkit.py +86 -0
- autogen/types.py +29 -0
- autogen/version.py +7 -0
- ag2-0.9.1a1.dist-info/RECORD +0 -6
- ag2-0.9.1a1.dist-info/top_level.txt +0 -1
- {ag2-0.9.1a1.dist-info → ag2-0.9.1.post0.dist-info/licenses}/LICENSE +0 -0
- {ag2-0.9.1a1.dist-info → ag2-0.9.1.post0.dist-info/licenses}/NOTICE.md +0 -0
|
@@ -0,0 +1,407 @@
|
|
|
1
|
+
# Copyright (c) 2023 - 2025, AG2ai, Inc., AG2ai open-source projects maintainers and core contributors
|
|
2
|
+
#
|
|
3
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
4
|
+
#
|
|
5
|
+
# Portions derived from https://github.com/microsoft/autogen are under the MIT License.
|
|
6
|
+
# SPDX-License-Identifier: MIT
|
|
7
|
+
|
|
8
|
+
import json
|
|
9
|
+
import time
|
|
10
|
+
from datetime import timedelta
|
|
11
|
+
from typing import Any, Callable, Literal, Optional
|
|
12
|
+
|
|
13
|
+
from ....import_utils import optional_import_block, require_optional_import
|
|
14
|
+
from .base import Document, ItemID, QueryResults, VectorDB
|
|
15
|
+
from .utils import get_logger
|
|
16
|
+
|
|
17
|
+
with optional_import_block():
|
|
18
|
+
import numpy as np
|
|
19
|
+
from couchbase import search
|
|
20
|
+
from couchbase.auth import PasswordAuthenticator
|
|
21
|
+
from couchbase.cluster import Cluster, ClusterOptions
|
|
22
|
+
from couchbase.collection import Collection
|
|
23
|
+
from couchbase.management.search import SearchIndex
|
|
24
|
+
from couchbase.options import SearchOptions
|
|
25
|
+
from couchbase.vector_search import VectorQuery, VectorSearch
|
|
26
|
+
from sentence_transformers import SentenceTransformer
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
logger = get_logger(__name__)
|
|
30
|
+
|
|
31
|
+
DEFAULT_BATCH_SIZE = 1000
|
|
32
|
+
_SAMPLE_SENTENCE = ["The weather is lovely today in paradise."]
|
|
33
|
+
TEXT_KEY = "content"
|
|
34
|
+
EMBEDDING_KEY = "embedding"
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
@require_optional_import(["couchbase", "sentence_transformers"], "retrievechat-couchbase")
|
|
38
|
+
class CouchbaseVectorDB(VectorDB):
|
|
39
|
+
"""
|
|
40
|
+
A vector database implementation that uses Couchbase as the backend.
|
|
41
|
+
"""
|
|
42
|
+
|
|
43
|
+
def __init__(
|
|
44
|
+
self,
|
|
45
|
+
connection_string: str = "couchbase://localhost",
|
|
46
|
+
username: str = "Administrator",
|
|
47
|
+
password: str = "password",
|
|
48
|
+
bucket_name: str = "vector_db",
|
|
49
|
+
embedding_function: Callable = None,
|
|
50
|
+
scope_name: str = "_default",
|
|
51
|
+
collection_name: str = "_default",
|
|
52
|
+
index_name: str = None,
|
|
53
|
+
):
|
|
54
|
+
"""
|
|
55
|
+
Initialize the vector database.
|
|
56
|
+
Args:
|
|
57
|
+
connection_string (str): The Couchbase connection string to connect to. Default is 'couchbase://localhost'.
|
|
58
|
+
username (str): The username for Couchbase authentication. Default is 'Administrator'.
|
|
59
|
+
password (str): The password for Couchbase authentication. Default is 'password'.
|
|
60
|
+
bucket_name (str): The name of the bucket. Default is 'vector_db'.
|
|
61
|
+
embedding_function (Callable): The embedding function used to generate the vector representation. Default is SentenceTransformer("all-MiniLM-L6-v2").encode.
|
|
62
|
+
scope_name (str): The name of the scope. Default is '_default'.
|
|
63
|
+
collection_name (str): The name of the collection to create for this vector database. Default is '_default'.
|
|
64
|
+
index_name (str): Index name for the vector database. Default is None.
|
|
65
|
+
overwrite (bool): Whether to overwrite existing data. Default is False.
|
|
66
|
+
wait_until_index_ready (float or None): Blocking call to wait until the database indexes are ready. None means no wait. Default is None.
|
|
67
|
+
wait_until_document_ready (float or None): Blocking call to wait until the database documents are ready. None means no wait. Default is None.
|
|
68
|
+
"""
|
|
69
|
+
if embedding_function is None:
|
|
70
|
+
embedding_function = SentenceTransformer("all-MiniLM-L6-v2").encode
|
|
71
|
+
self.embedding_function = embedding_function
|
|
72
|
+
self.index_name = index_name
|
|
73
|
+
|
|
74
|
+
# This will get the model dimension size by computing the embeddings dimensions
|
|
75
|
+
self.dimensions = self._get_embedding_size()
|
|
76
|
+
|
|
77
|
+
try:
|
|
78
|
+
auth = PasswordAuthenticator(username, password)
|
|
79
|
+
cluster = Cluster(connection_string, ClusterOptions(auth))
|
|
80
|
+
cluster.wait_until_ready(timedelta(seconds=5))
|
|
81
|
+
self.cluster = cluster
|
|
82
|
+
|
|
83
|
+
self.bucket = cluster.bucket(bucket_name)
|
|
84
|
+
self.scope = self.bucket.scope(scope_name)
|
|
85
|
+
self.collection = self.scope.collection(collection_name)
|
|
86
|
+
self.active_collection = self.collection
|
|
87
|
+
|
|
88
|
+
logger.debug("Successfully connected to Couchbase")
|
|
89
|
+
except Exception as err:
|
|
90
|
+
raise ConnectionError("Could not connect to Couchbase server") from err
|
|
91
|
+
|
|
92
|
+
def search_index_exists(self, index_name: str):
|
|
93
|
+
"""Check if the specified index is ready"""
|
|
94
|
+
try:
|
|
95
|
+
search_index_mgr = self.scope.search_indexes()
|
|
96
|
+
index = search_index_mgr.get_index(index_name)
|
|
97
|
+
return index.is_valid()
|
|
98
|
+
except Exception:
|
|
99
|
+
return False
|
|
100
|
+
|
|
101
|
+
def _get_embedding_size(self):
|
|
102
|
+
return len(self.embedding_function(_SAMPLE_SENTENCE)[0])
|
|
103
|
+
|
|
104
|
+
def create_collection(
|
|
105
|
+
self,
|
|
106
|
+
collection_name: str,
|
|
107
|
+
overwrite: bool = False,
|
|
108
|
+
get_or_create: bool = True,
|
|
109
|
+
) -> "Collection":
|
|
110
|
+
"""
|
|
111
|
+
Create a collection in the vector database and create a vector search index in the collection.
|
|
112
|
+
Args:
|
|
113
|
+
collection_name (str): The name of the collection.
|
|
114
|
+
overwrite (bool): Whether to overwrite the collection if it exists. Default is False.
|
|
115
|
+
get_or_create (bool): Whether to get or create the collection. Default is True
|
|
116
|
+
"""
|
|
117
|
+
if overwrite:
|
|
118
|
+
self.delete_collection(collection_name)
|
|
119
|
+
|
|
120
|
+
try:
|
|
121
|
+
collection_mgr = self.bucket.collections()
|
|
122
|
+
collection_mgr.create_collection(self.scope.name, collection_name)
|
|
123
|
+
self.cluster.query(f"CREATE PRIMARY INDEX ON {self.bucket.name}.{self.scope.name}.{collection_name}")
|
|
124
|
+
|
|
125
|
+
except Exception:
|
|
126
|
+
if not get_or_create:
|
|
127
|
+
raise ValueError(f"Collection {collection_name} already exists.")
|
|
128
|
+
else:
|
|
129
|
+
logger.debug(f"Collection {collection_name} already exists. Getting the collection.")
|
|
130
|
+
|
|
131
|
+
collection = self.scope.collection(collection_name)
|
|
132
|
+
self.create_index_if_not_exists(index_name=self.index_name, collection=collection)
|
|
133
|
+
return collection
|
|
134
|
+
|
|
135
|
+
def create_index_if_not_exists(
|
|
136
|
+
self, index_name: str = "vector_index", collection: Optional["Collection"] = None
|
|
137
|
+
) -> None:
|
|
138
|
+
"""
|
|
139
|
+
Creates a vector search index on the specified collection in Couchbase.
|
|
140
|
+
Args:
|
|
141
|
+
index_name (str, optional): The name of the vector search index to create. Defaults to "vector_search_index".
|
|
142
|
+
collection (Collection, optional): The Couchbase collection to create the index on. Defaults to None.
|
|
143
|
+
"""
|
|
144
|
+
if not self.search_index_exists(index_name):
|
|
145
|
+
self.create_vector_search_index(collection, index_name)
|
|
146
|
+
|
|
147
|
+
def get_collection(self, collection_name: Optional[str] = None) -> "Collection":
|
|
148
|
+
"""
|
|
149
|
+
Get the collection from the vector database.
|
|
150
|
+
Args:
|
|
151
|
+
collection_name (str): The name of the collection. Default is None. If None, return the current active collection.
|
|
152
|
+
Returns:
|
|
153
|
+
The collection object (Collection)
|
|
154
|
+
"""
|
|
155
|
+
if collection_name is None:
|
|
156
|
+
if self.active_collection is None:
|
|
157
|
+
raise ValueError("No collection is specified.")
|
|
158
|
+
else:
|
|
159
|
+
logger.debug(
|
|
160
|
+
f"No collection is specified. Using current active collection {self.active_collection.name}."
|
|
161
|
+
)
|
|
162
|
+
else:
|
|
163
|
+
self.active_collection = self.scope.collection(collection_name)
|
|
164
|
+
|
|
165
|
+
return self.active_collection
|
|
166
|
+
|
|
167
|
+
def delete_collection(self, collection_name: str) -> None:
|
|
168
|
+
"""
|
|
169
|
+
Delete the collection from the vector database.
|
|
170
|
+
Args:
|
|
171
|
+
collection_name (str): The name of the collection.
|
|
172
|
+
"""
|
|
173
|
+
try:
|
|
174
|
+
collection_mgr = self.bucket.collections()
|
|
175
|
+
collection_mgr.drop_collection(self.scope.name, collection_name)
|
|
176
|
+
except Exception as e:
|
|
177
|
+
logger.error(f"Error deleting collection: {e}")
|
|
178
|
+
|
|
179
|
+
def create_vector_search_index(
|
|
180
|
+
self,
|
|
181
|
+
collection,
|
|
182
|
+
index_name: Optional[str] = "vector_index",
|
|
183
|
+
similarity: Literal["l2_norm", "dot_product"] = "dot_product",
|
|
184
|
+
) -> None:
|
|
185
|
+
"""Create a vector search index in the collection."""
|
|
186
|
+
search_index_mgr = self.scope.search_indexes()
|
|
187
|
+
dims = self._get_embedding_size()
|
|
188
|
+
index_definition = {
|
|
189
|
+
"type": "fulltext-index",
|
|
190
|
+
"name": index_name,
|
|
191
|
+
"sourceType": "couchbase",
|
|
192
|
+
"sourceName": self.bucket.name,
|
|
193
|
+
"planParams": {"maxPartitionsPerPIndex": 1024, "indexPartitions": 1},
|
|
194
|
+
"params": {
|
|
195
|
+
"doc_config": {
|
|
196
|
+
"docid_prefix_delim": "",
|
|
197
|
+
"docid_regexp": "",
|
|
198
|
+
"mode": "scope.collection.type_field",
|
|
199
|
+
"type_field": "type",
|
|
200
|
+
},
|
|
201
|
+
"mapping": {
|
|
202
|
+
"analysis": {},
|
|
203
|
+
"default_analyzer": "standard",
|
|
204
|
+
"default_datetime_parser": "dateTimeOptional",
|
|
205
|
+
"default_field": "_all",
|
|
206
|
+
"default_mapping": {"dynamic": True, "enabled": False},
|
|
207
|
+
"default_type": "_default",
|
|
208
|
+
"docvalues_dynamic": False,
|
|
209
|
+
"index_dynamic": True,
|
|
210
|
+
"store_dynamic": True,
|
|
211
|
+
"type_field": "_type",
|
|
212
|
+
"types": {
|
|
213
|
+
f"{self.scope.name}.{collection.name}": {
|
|
214
|
+
"dynamic": False,
|
|
215
|
+
"enabled": True,
|
|
216
|
+
"properties": {
|
|
217
|
+
"embedding": {
|
|
218
|
+
"dynamic": False,
|
|
219
|
+
"enabled": True,
|
|
220
|
+
"fields": [
|
|
221
|
+
{
|
|
222
|
+
"dims": dims,
|
|
223
|
+
"index": True,
|
|
224
|
+
"name": "embedding",
|
|
225
|
+
"similarity": similarity,
|
|
226
|
+
"type": "vector",
|
|
227
|
+
"vector_index_optimized_for": "recall",
|
|
228
|
+
}
|
|
229
|
+
],
|
|
230
|
+
},
|
|
231
|
+
"metadata": {"dynamic": True, "enabled": True},
|
|
232
|
+
"content": {
|
|
233
|
+
"dynamic": False,
|
|
234
|
+
"enabled": True,
|
|
235
|
+
"fields": [
|
|
236
|
+
{
|
|
237
|
+
"include_in_all": True,
|
|
238
|
+
"index": True,
|
|
239
|
+
"name": "content",
|
|
240
|
+
"store": True,
|
|
241
|
+
"type": "text",
|
|
242
|
+
}
|
|
243
|
+
],
|
|
244
|
+
},
|
|
245
|
+
},
|
|
246
|
+
}
|
|
247
|
+
},
|
|
248
|
+
},
|
|
249
|
+
"store": {"indexType": "scorch", "segmentVersion": 16},
|
|
250
|
+
},
|
|
251
|
+
"sourceParams": {},
|
|
252
|
+
}
|
|
253
|
+
|
|
254
|
+
search_index_def = SearchIndex.from_json(json.dumps(index_definition))
|
|
255
|
+
max_attempts = 10
|
|
256
|
+
attempt = 0
|
|
257
|
+
while attempt < max_attempts:
|
|
258
|
+
try:
|
|
259
|
+
search_index_mgr.upsert_index(search_index_def)
|
|
260
|
+
break
|
|
261
|
+
except Exception as e:
|
|
262
|
+
logger.debug(f"Attempt {attempt + 1}/{max_attempts}: Error creating search index: {e}")
|
|
263
|
+
time.sleep(3)
|
|
264
|
+
attempt += 1
|
|
265
|
+
|
|
266
|
+
if attempt == max_attempts:
|
|
267
|
+
logger.error(f"Error creating search index after {max_attempts} attempts.")
|
|
268
|
+
raise RuntimeError(f"Error creating search index after {max_attempts} attempts.")
|
|
269
|
+
|
|
270
|
+
logger.info(f"Search index {index_name} created successfully.")
|
|
271
|
+
|
|
272
|
+
def upsert_docs(
|
|
273
|
+
self, docs: list[Document], collection: "Collection", batch_size: int = DEFAULT_BATCH_SIZE, **kwargs: Any
|
|
274
|
+
) -> None:
|
|
275
|
+
if docs[0].get("content") is None:
|
|
276
|
+
raise ValueError("The document content is required.")
|
|
277
|
+
if docs[0].get("id") is None:
|
|
278
|
+
raise ValueError("The document id is required.")
|
|
279
|
+
|
|
280
|
+
for i in range(0, len(docs), batch_size):
|
|
281
|
+
batch = docs[i : i + batch_size]
|
|
282
|
+
docs_to_upsert = dict()
|
|
283
|
+
for doc in batch:
|
|
284
|
+
doc_id = doc["id"]
|
|
285
|
+
embedding = self.embedding_function([
|
|
286
|
+
doc["content"]
|
|
287
|
+
]).tolist() # Gets new embedding even in case of document update
|
|
288
|
+
doc_content = {
|
|
289
|
+
TEXT_KEY: doc["content"],
|
|
290
|
+
"metadata": doc.get("metadata", {}),
|
|
291
|
+
EMBEDDING_KEY: embedding,
|
|
292
|
+
"id": doc_id,
|
|
293
|
+
}
|
|
294
|
+
docs_to_upsert[doc_id] = doc_content
|
|
295
|
+
collection.upsert_multi(docs_to_upsert)
|
|
296
|
+
|
|
297
|
+
def insert_docs(
|
|
298
|
+
self,
|
|
299
|
+
docs: list[Document],
|
|
300
|
+
collection_name: str = None,
|
|
301
|
+
upsert: bool = False,
|
|
302
|
+
batch_size: int = DEFAULT_BATCH_SIZE,
|
|
303
|
+
**kwargs: Any,
|
|
304
|
+
) -> None:
|
|
305
|
+
"""Insert Documents and Vector Embeddings into the collection of the vector database. Documents are upserted in all cases."""
|
|
306
|
+
if not docs:
|
|
307
|
+
logger.info("No documents to insert.")
|
|
308
|
+
return
|
|
309
|
+
|
|
310
|
+
collection = self.get_collection(collection_name)
|
|
311
|
+
self.upsert_docs(docs, collection, batch_size=batch_size)
|
|
312
|
+
|
|
313
|
+
def update_docs(
|
|
314
|
+
self, docs: list[Document], collection_name: str = None, batch_size: int = DEFAULT_BATCH_SIZE, **kwargs: Any
|
|
315
|
+
) -> None:
|
|
316
|
+
"""Update documents, including their embeddings, in the Collection."""
|
|
317
|
+
collection = self.get_collection(collection_name)
|
|
318
|
+
self.upsert_docs(docs, collection, batch_size)
|
|
319
|
+
|
|
320
|
+
def delete_docs(
|
|
321
|
+
self, ids: list[ItemID], collection_name: str = None, batch_size: int = DEFAULT_BATCH_SIZE, **kwargs
|
|
322
|
+
):
|
|
323
|
+
"""Delete documents from the collection of the vector database."""
|
|
324
|
+
collection = self.get_collection(collection_name)
|
|
325
|
+
# based on batch size, delete the documents
|
|
326
|
+
for i in range(0, len(ids), batch_size):
|
|
327
|
+
batch = ids[i : i + batch_size]
|
|
328
|
+
collection.remove_multi(batch)
|
|
329
|
+
|
|
330
|
+
def get_docs_by_ids(
|
|
331
|
+
self,
|
|
332
|
+
ids: Optional[list[ItemID]] = None,
|
|
333
|
+
collection_name: str = None,
|
|
334
|
+
include: Optional[list[str]] = None,
|
|
335
|
+
**kwargs: Any,
|
|
336
|
+
) -> list[Document]:
|
|
337
|
+
"""Retrieve documents from the collection of the vector database based on the ids."""
|
|
338
|
+
if include is None:
|
|
339
|
+
include = [TEXT_KEY, "metadata", "id"]
|
|
340
|
+
elif "id" not in include:
|
|
341
|
+
include.append("id")
|
|
342
|
+
|
|
343
|
+
collection = self.get_collection(collection_name)
|
|
344
|
+
if ids is not None:
|
|
345
|
+
docs = [collection.get(doc_id) for doc_id in ids]
|
|
346
|
+
else:
|
|
347
|
+
# Get all documents using couchbase query
|
|
348
|
+
include_str = ", ".join(include)
|
|
349
|
+
query = f"SELECT {include_str} FROM {self.bucket.name}.{self.scope.name}.{collection.name}"
|
|
350
|
+
result = self.cluster.query(query)
|
|
351
|
+
docs = []
|
|
352
|
+
for row in result:
|
|
353
|
+
docs.append(row)
|
|
354
|
+
|
|
355
|
+
return [{k: v for k, v in doc.items() if k in include or k == "id"} for doc in docs]
|
|
356
|
+
|
|
357
|
+
def retrieve_docs(
|
|
358
|
+
self,
|
|
359
|
+
queries: list[str],
|
|
360
|
+
collection_name: str = None,
|
|
361
|
+
n_results: int = 10,
|
|
362
|
+
distance_threshold: float = -1,
|
|
363
|
+
**kwargs: Any,
|
|
364
|
+
) -> QueryResults:
|
|
365
|
+
"""Retrieve documents from the collection of the vector database based on the queries.
|
|
366
|
+
Note: Distance threshold is not supported in Couchbase FTS.
|
|
367
|
+
"""
|
|
368
|
+
|
|
369
|
+
results: QueryResults = []
|
|
370
|
+
for query_text in queries:
|
|
371
|
+
query_vector = np.array(self.embedding_function([query_text])).tolist()[0]
|
|
372
|
+
query_result = self._vector_search(
|
|
373
|
+
query_vector,
|
|
374
|
+
n_results,
|
|
375
|
+
**kwargs,
|
|
376
|
+
)
|
|
377
|
+
results.append(query_result)
|
|
378
|
+
return results
|
|
379
|
+
|
|
380
|
+
def _vector_search(
|
|
381
|
+
self, embedding_vector: list[float], n_results: int = 10, **kwargs
|
|
382
|
+
) -> list[tuple[dict[str, Any], float]]:
|
|
383
|
+
"""Core vector search using Couchbase FTS."""
|
|
384
|
+
|
|
385
|
+
search_req = search.SearchRequest.create(
|
|
386
|
+
VectorSearch.from_vector_query(
|
|
387
|
+
VectorQuery(
|
|
388
|
+
EMBEDDING_KEY,
|
|
389
|
+
embedding_vector,
|
|
390
|
+
n_results,
|
|
391
|
+
)
|
|
392
|
+
)
|
|
393
|
+
)
|
|
394
|
+
|
|
395
|
+
search_options = SearchOptions(limit=n_results, fields=["*"])
|
|
396
|
+
result = self.scope.search(self.index_name, search_req, search_options)
|
|
397
|
+
|
|
398
|
+
docs_with_score = []
|
|
399
|
+
|
|
400
|
+
for row in result.rows():
|
|
401
|
+
doc = row.fields
|
|
402
|
+
doc["id"] = row.id
|
|
403
|
+
score = row.score
|
|
404
|
+
|
|
405
|
+
docs_with_score.append((doc, score))
|
|
406
|
+
|
|
407
|
+
return docs_with_score
|