ag2 0.10.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (423) hide show
  1. ag2-0.10.2.dist-info/METADATA +819 -0
  2. ag2-0.10.2.dist-info/RECORD +423 -0
  3. ag2-0.10.2.dist-info/WHEEL +4 -0
  4. ag2-0.10.2.dist-info/licenses/LICENSE +201 -0
  5. ag2-0.10.2.dist-info/licenses/NOTICE.md +19 -0
  6. autogen/__init__.py +88 -0
  7. autogen/_website/__init__.py +3 -0
  8. autogen/_website/generate_api_references.py +426 -0
  9. autogen/_website/generate_mkdocs.py +1216 -0
  10. autogen/_website/notebook_processor.py +475 -0
  11. autogen/_website/process_notebooks.py +656 -0
  12. autogen/_website/utils.py +413 -0
  13. autogen/a2a/__init__.py +36 -0
  14. autogen/a2a/agent_executor.py +86 -0
  15. autogen/a2a/client.py +357 -0
  16. autogen/a2a/errors.py +18 -0
  17. autogen/a2a/httpx_client_factory.py +79 -0
  18. autogen/a2a/server.py +221 -0
  19. autogen/a2a/utils.py +207 -0
  20. autogen/agentchat/__init__.py +47 -0
  21. autogen/agentchat/agent.py +180 -0
  22. autogen/agentchat/assistant_agent.py +86 -0
  23. autogen/agentchat/chat.py +325 -0
  24. autogen/agentchat/contrib/__init__.py +5 -0
  25. autogen/agentchat/contrib/agent_eval/README.md +7 -0
  26. autogen/agentchat/contrib/agent_eval/agent_eval.py +108 -0
  27. autogen/agentchat/contrib/agent_eval/criterion.py +43 -0
  28. autogen/agentchat/contrib/agent_eval/critic_agent.py +44 -0
  29. autogen/agentchat/contrib/agent_eval/quantifier_agent.py +39 -0
  30. autogen/agentchat/contrib/agent_eval/subcritic_agent.py +45 -0
  31. autogen/agentchat/contrib/agent_eval/task.py +42 -0
  32. autogen/agentchat/contrib/agent_optimizer.py +432 -0
  33. autogen/agentchat/contrib/capabilities/__init__.py +5 -0
  34. autogen/agentchat/contrib/capabilities/agent_capability.py +20 -0
  35. autogen/agentchat/contrib/capabilities/generate_images.py +301 -0
  36. autogen/agentchat/contrib/capabilities/teachability.py +393 -0
  37. autogen/agentchat/contrib/capabilities/text_compressors.py +66 -0
  38. autogen/agentchat/contrib/capabilities/tools_capability.py +22 -0
  39. autogen/agentchat/contrib/capabilities/transform_messages.py +93 -0
  40. autogen/agentchat/contrib/capabilities/transforms.py +578 -0
  41. autogen/agentchat/contrib/capabilities/transforms_util.py +122 -0
  42. autogen/agentchat/contrib/capabilities/vision_capability.py +215 -0
  43. autogen/agentchat/contrib/captainagent/__init__.py +9 -0
  44. autogen/agentchat/contrib/captainagent/agent_builder.py +790 -0
  45. autogen/agentchat/contrib/captainagent/captainagent.py +514 -0
  46. autogen/agentchat/contrib/captainagent/tool_retriever.py +334 -0
  47. autogen/agentchat/contrib/captainagent/tools/README.md +44 -0
  48. autogen/agentchat/contrib/captainagent/tools/__init__.py +5 -0
  49. autogen/agentchat/contrib/captainagent/tools/data_analysis/calculate_correlation.py +40 -0
  50. autogen/agentchat/contrib/captainagent/tools/data_analysis/calculate_skewness_and_kurtosis.py +28 -0
  51. autogen/agentchat/contrib/captainagent/tools/data_analysis/detect_outlier_iqr.py +28 -0
  52. autogen/agentchat/contrib/captainagent/tools/data_analysis/detect_outlier_zscore.py +28 -0
  53. autogen/agentchat/contrib/captainagent/tools/data_analysis/explore_csv.py +21 -0
  54. autogen/agentchat/contrib/captainagent/tools/data_analysis/shapiro_wilk_test.py +30 -0
  55. autogen/agentchat/contrib/captainagent/tools/information_retrieval/arxiv_download.py +27 -0
  56. autogen/agentchat/contrib/captainagent/tools/information_retrieval/arxiv_search.py +53 -0
  57. autogen/agentchat/contrib/captainagent/tools/information_retrieval/extract_pdf_image.py +53 -0
  58. autogen/agentchat/contrib/captainagent/tools/information_retrieval/extract_pdf_text.py +38 -0
  59. autogen/agentchat/contrib/captainagent/tools/information_retrieval/get_wikipedia_text.py +21 -0
  60. autogen/agentchat/contrib/captainagent/tools/information_retrieval/get_youtube_caption.py +34 -0
  61. autogen/agentchat/contrib/captainagent/tools/information_retrieval/image_qa.py +60 -0
  62. autogen/agentchat/contrib/captainagent/tools/information_retrieval/optical_character_recognition.py +61 -0
  63. autogen/agentchat/contrib/captainagent/tools/information_retrieval/perform_web_search.py +47 -0
  64. autogen/agentchat/contrib/captainagent/tools/information_retrieval/scrape_wikipedia_tables.py +33 -0
  65. autogen/agentchat/contrib/captainagent/tools/information_retrieval/transcribe_audio_file.py +21 -0
  66. autogen/agentchat/contrib/captainagent/tools/information_retrieval/youtube_download.py +35 -0
  67. autogen/agentchat/contrib/captainagent/tools/math/calculate_circle_area_from_diameter.py +21 -0
  68. autogen/agentchat/contrib/captainagent/tools/math/calculate_day_of_the_week.py +18 -0
  69. autogen/agentchat/contrib/captainagent/tools/math/calculate_fraction_sum.py +28 -0
  70. autogen/agentchat/contrib/captainagent/tools/math/calculate_matrix_power.py +31 -0
  71. autogen/agentchat/contrib/captainagent/tools/math/calculate_reflected_point.py +16 -0
  72. autogen/agentchat/contrib/captainagent/tools/math/complex_numbers_product.py +25 -0
  73. autogen/agentchat/contrib/captainagent/tools/math/compute_currency_conversion.py +23 -0
  74. autogen/agentchat/contrib/captainagent/tools/math/count_distinct_permutations.py +27 -0
  75. autogen/agentchat/contrib/captainagent/tools/math/evaluate_expression.py +28 -0
  76. autogen/agentchat/contrib/captainagent/tools/math/find_continuity_point.py +34 -0
  77. autogen/agentchat/contrib/captainagent/tools/math/fraction_to_mixed_numbers.py +39 -0
  78. autogen/agentchat/contrib/captainagent/tools/math/modular_inverse_sum.py +23 -0
  79. autogen/agentchat/contrib/captainagent/tools/math/simplify_mixed_numbers.py +36 -0
  80. autogen/agentchat/contrib/captainagent/tools/math/sum_of_digit_factorials.py +15 -0
  81. autogen/agentchat/contrib/captainagent/tools/math/sum_of_primes_below.py +15 -0
  82. autogen/agentchat/contrib/captainagent/tools/requirements.txt +10 -0
  83. autogen/agentchat/contrib/captainagent/tools/tool_description.tsv +34 -0
  84. autogen/agentchat/contrib/gpt_assistant_agent.py +526 -0
  85. autogen/agentchat/contrib/graph_rag/__init__.py +9 -0
  86. autogen/agentchat/contrib/graph_rag/document.py +29 -0
  87. autogen/agentchat/contrib/graph_rag/falkor_graph_query_engine.py +167 -0
  88. autogen/agentchat/contrib/graph_rag/falkor_graph_rag_capability.py +103 -0
  89. autogen/agentchat/contrib/graph_rag/graph_query_engine.py +53 -0
  90. autogen/agentchat/contrib/graph_rag/graph_rag_capability.py +63 -0
  91. autogen/agentchat/contrib/graph_rag/neo4j_graph_query_engine.py +263 -0
  92. autogen/agentchat/contrib/graph_rag/neo4j_graph_rag_capability.py +83 -0
  93. autogen/agentchat/contrib/graph_rag/neo4j_native_graph_query_engine.py +210 -0
  94. autogen/agentchat/contrib/graph_rag/neo4j_native_graph_rag_capability.py +93 -0
  95. autogen/agentchat/contrib/img_utils.py +397 -0
  96. autogen/agentchat/contrib/llamaindex_conversable_agent.py +117 -0
  97. autogen/agentchat/contrib/llava_agent.py +189 -0
  98. autogen/agentchat/contrib/math_user_proxy_agent.py +464 -0
  99. autogen/agentchat/contrib/multimodal_conversable_agent.py +125 -0
  100. autogen/agentchat/contrib/qdrant_retrieve_user_proxy_agent.py +325 -0
  101. autogen/agentchat/contrib/rag/__init__.py +10 -0
  102. autogen/agentchat/contrib/rag/chromadb_query_engine.py +268 -0
  103. autogen/agentchat/contrib/rag/llamaindex_query_engine.py +195 -0
  104. autogen/agentchat/contrib/rag/mongodb_query_engine.py +319 -0
  105. autogen/agentchat/contrib/rag/query_engine.py +76 -0
  106. autogen/agentchat/contrib/retrieve_assistant_agent.py +59 -0
  107. autogen/agentchat/contrib/retrieve_user_proxy_agent.py +704 -0
  108. autogen/agentchat/contrib/society_of_mind_agent.py +200 -0
  109. autogen/agentchat/contrib/swarm_agent.py +1404 -0
  110. autogen/agentchat/contrib/text_analyzer_agent.py +79 -0
  111. autogen/agentchat/contrib/vectordb/__init__.py +5 -0
  112. autogen/agentchat/contrib/vectordb/base.py +224 -0
  113. autogen/agentchat/contrib/vectordb/chromadb.py +316 -0
  114. autogen/agentchat/contrib/vectordb/couchbase.py +405 -0
  115. autogen/agentchat/contrib/vectordb/mongodb.py +551 -0
  116. autogen/agentchat/contrib/vectordb/pgvectordb.py +927 -0
  117. autogen/agentchat/contrib/vectordb/qdrant.py +320 -0
  118. autogen/agentchat/contrib/vectordb/utils.py +126 -0
  119. autogen/agentchat/contrib/web_surfer.py +304 -0
  120. autogen/agentchat/conversable_agent.py +4307 -0
  121. autogen/agentchat/group/__init__.py +67 -0
  122. autogen/agentchat/group/available_condition.py +91 -0
  123. autogen/agentchat/group/context_condition.py +77 -0
  124. autogen/agentchat/group/context_expression.py +238 -0
  125. autogen/agentchat/group/context_str.py +39 -0
  126. autogen/agentchat/group/context_variables.py +182 -0
  127. autogen/agentchat/group/events/transition_events.py +111 -0
  128. autogen/agentchat/group/group_tool_executor.py +324 -0
  129. autogen/agentchat/group/group_utils.py +659 -0
  130. autogen/agentchat/group/guardrails.py +179 -0
  131. autogen/agentchat/group/handoffs.py +303 -0
  132. autogen/agentchat/group/llm_condition.py +93 -0
  133. autogen/agentchat/group/multi_agent_chat.py +291 -0
  134. autogen/agentchat/group/on_condition.py +55 -0
  135. autogen/agentchat/group/on_context_condition.py +51 -0
  136. autogen/agentchat/group/patterns/__init__.py +18 -0
  137. autogen/agentchat/group/patterns/auto.py +160 -0
  138. autogen/agentchat/group/patterns/manual.py +177 -0
  139. autogen/agentchat/group/patterns/pattern.py +295 -0
  140. autogen/agentchat/group/patterns/random.py +106 -0
  141. autogen/agentchat/group/patterns/round_robin.py +117 -0
  142. autogen/agentchat/group/reply_result.py +24 -0
  143. autogen/agentchat/group/safeguards/__init__.py +21 -0
  144. autogen/agentchat/group/safeguards/api.py +241 -0
  145. autogen/agentchat/group/safeguards/enforcer.py +1158 -0
  146. autogen/agentchat/group/safeguards/events.py +140 -0
  147. autogen/agentchat/group/safeguards/validator.py +435 -0
  148. autogen/agentchat/group/speaker_selection_result.py +41 -0
  149. autogen/agentchat/group/targets/__init__.py +4 -0
  150. autogen/agentchat/group/targets/function_target.py +245 -0
  151. autogen/agentchat/group/targets/group_chat_target.py +133 -0
  152. autogen/agentchat/group/targets/group_manager_target.py +151 -0
  153. autogen/agentchat/group/targets/transition_target.py +424 -0
  154. autogen/agentchat/group/targets/transition_utils.py +6 -0
  155. autogen/agentchat/groupchat.py +1832 -0
  156. autogen/agentchat/realtime/__init__.py +3 -0
  157. autogen/agentchat/realtime/experimental/__init__.py +20 -0
  158. autogen/agentchat/realtime/experimental/audio_adapters/__init__.py +8 -0
  159. autogen/agentchat/realtime/experimental/audio_adapters/twilio_audio_adapter.py +148 -0
  160. autogen/agentchat/realtime/experimental/audio_adapters/websocket_audio_adapter.py +139 -0
  161. autogen/agentchat/realtime/experimental/audio_observer.py +42 -0
  162. autogen/agentchat/realtime/experimental/clients/__init__.py +15 -0
  163. autogen/agentchat/realtime/experimental/clients/gemini/__init__.py +7 -0
  164. autogen/agentchat/realtime/experimental/clients/gemini/client.py +274 -0
  165. autogen/agentchat/realtime/experimental/clients/oai/__init__.py +8 -0
  166. autogen/agentchat/realtime/experimental/clients/oai/base_client.py +220 -0
  167. autogen/agentchat/realtime/experimental/clients/oai/rtc_client.py +243 -0
  168. autogen/agentchat/realtime/experimental/clients/oai/utils.py +48 -0
  169. autogen/agentchat/realtime/experimental/clients/realtime_client.py +191 -0
  170. autogen/agentchat/realtime/experimental/function_observer.py +84 -0
  171. autogen/agentchat/realtime/experimental/realtime_agent.py +158 -0
  172. autogen/agentchat/realtime/experimental/realtime_events.py +42 -0
  173. autogen/agentchat/realtime/experimental/realtime_observer.py +100 -0
  174. autogen/agentchat/realtime/experimental/realtime_swarm.py +533 -0
  175. autogen/agentchat/realtime/experimental/websockets.py +21 -0
  176. autogen/agentchat/realtime_agent/__init__.py +21 -0
  177. autogen/agentchat/user_proxy_agent.py +114 -0
  178. autogen/agentchat/utils.py +206 -0
  179. autogen/agents/__init__.py +3 -0
  180. autogen/agents/contrib/__init__.py +10 -0
  181. autogen/agents/contrib/time/__init__.py +8 -0
  182. autogen/agents/contrib/time/time_reply_agent.py +74 -0
  183. autogen/agents/contrib/time/time_tool_agent.py +52 -0
  184. autogen/agents/experimental/__init__.py +27 -0
  185. autogen/agents/experimental/deep_research/__init__.py +7 -0
  186. autogen/agents/experimental/deep_research/deep_research.py +52 -0
  187. autogen/agents/experimental/discord/__init__.py +7 -0
  188. autogen/agents/experimental/discord/discord.py +66 -0
  189. autogen/agents/experimental/document_agent/__init__.py +19 -0
  190. autogen/agents/experimental/document_agent/chroma_query_engine.py +301 -0
  191. autogen/agents/experimental/document_agent/docling_doc_ingest_agent.py +113 -0
  192. autogen/agents/experimental/document_agent/document_agent.py +643 -0
  193. autogen/agents/experimental/document_agent/document_conditions.py +50 -0
  194. autogen/agents/experimental/document_agent/document_utils.py +376 -0
  195. autogen/agents/experimental/document_agent/inmemory_query_engine.py +214 -0
  196. autogen/agents/experimental/document_agent/parser_utils.py +134 -0
  197. autogen/agents/experimental/document_agent/url_utils.py +417 -0
  198. autogen/agents/experimental/reasoning/__init__.py +7 -0
  199. autogen/agents/experimental/reasoning/reasoning_agent.py +1178 -0
  200. autogen/agents/experimental/slack/__init__.py +7 -0
  201. autogen/agents/experimental/slack/slack.py +73 -0
  202. autogen/agents/experimental/telegram/__init__.py +7 -0
  203. autogen/agents/experimental/telegram/telegram.py +76 -0
  204. autogen/agents/experimental/websurfer/__init__.py +7 -0
  205. autogen/agents/experimental/websurfer/websurfer.py +70 -0
  206. autogen/agents/experimental/wikipedia/__init__.py +7 -0
  207. autogen/agents/experimental/wikipedia/wikipedia.py +88 -0
  208. autogen/browser_utils.py +309 -0
  209. autogen/cache/__init__.py +10 -0
  210. autogen/cache/abstract_cache_base.py +71 -0
  211. autogen/cache/cache.py +203 -0
  212. autogen/cache/cache_factory.py +88 -0
  213. autogen/cache/cosmos_db_cache.py +144 -0
  214. autogen/cache/disk_cache.py +97 -0
  215. autogen/cache/in_memory_cache.py +54 -0
  216. autogen/cache/redis_cache.py +119 -0
  217. autogen/code_utils.py +598 -0
  218. autogen/coding/__init__.py +30 -0
  219. autogen/coding/base.py +120 -0
  220. autogen/coding/docker_commandline_code_executor.py +283 -0
  221. autogen/coding/factory.py +56 -0
  222. autogen/coding/func_with_reqs.py +203 -0
  223. autogen/coding/jupyter/__init__.py +23 -0
  224. autogen/coding/jupyter/base.py +36 -0
  225. autogen/coding/jupyter/docker_jupyter_server.py +160 -0
  226. autogen/coding/jupyter/embedded_ipython_code_executor.py +182 -0
  227. autogen/coding/jupyter/import_utils.py +82 -0
  228. autogen/coding/jupyter/jupyter_client.py +224 -0
  229. autogen/coding/jupyter/jupyter_code_executor.py +154 -0
  230. autogen/coding/jupyter/local_jupyter_server.py +164 -0
  231. autogen/coding/local_commandline_code_executor.py +341 -0
  232. autogen/coding/markdown_code_extractor.py +44 -0
  233. autogen/coding/utils.py +55 -0
  234. autogen/coding/yepcode_code_executor.py +197 -0
  235. autogen/doc_utils.py +35 -0
  236. autogen/environments/__init__.py +10 -0
  237. autogen/environments/docker_python_environment.py +365 -0
  238. autogen/environments/python_environment.py +125 -0
  239. autogen/environments/system_python_environment.py +85 -0
  240. autogen/environments/venv_python_environment.py +220 -0
  241. autogen/environments/working_directory.py +74 -0
  242. autogen/events/__init__.py +7 -0
  243. autogen/events/agent_events.py +1016 -0
  244. autogen/events/base_event.py +100 -0
  245. autogen/events/client_events.py +168 -0
  246. autogen/events/helpers.py +44 -0
  247. autogen/events/print_event.py +45 -0
  248. autogen/exception_utils.py +73 -0
  249. autogen/extensions/__init__.py +5 -0
  250. autogen/fast_depends/__init__.py +16 -0
  251. autogen/fast_depends/_compat.py +75 -0
  252. autogen/fast_depends/core/__init__.py +14 -0
  253. autogen/fast_depends/core/build.py +206 -0
  254. autogen/fast_depends/core/model.py +527 -0
  255. autogen/fast_depends/dependencies/__init__.py +15 -0
  256. autogen/fast_depends/dependencies/model.py +30 -0
  257. autogen/fast_depends/dependencies/provider.py +40 -0
  258. autogen/fast_depends/library/__init__.py +10 -0
  259. autogen/fast_depends/library/model.py +46 -0
  260. autogen/fast_depends/py.typed +6 -0
  261. autogen/fast_depends/schema.py +66 -0
  262. autogen/fast_depends/use.py +272 -0
  263. autogen/fast_depends/utils.py +177 -0
  264. autogen/formatting_utils.py +83 -0
  265. autogen/function_utils.py +13 -0
  266. autogen/graph_utils.py +173 -0
  267. autogen/import_utils.py +539 -0
  268. autogen/interop/__init__.py +22 -0
  269. autogen/interop/crewai/__init__.py +7 -0
  270. autogen/interop/crewai/crewai.py +88 -0
  271. autogen/interop/interoperability.py +71 -0
  272. autogen/interop/interoperable.py +46 -0
  273. autogen/interop/langchain/__init__.py +8 -0
  274. autogen/interop/langchain/langchain_chat_model_factory.py +156 -0
  275. autogen/interop/langchain/langchain_tool.py +78 -0
  276. autogen/interop/litellm/__init__.py +7 -0
  277. autogen/interop/litellm/litellm_config_factory.py +178 -0
  278. autogen/interop/pydantic_ai/__init__.py +7 -0
  279. autogen/interop/pydantic_ai/pydantic_ai.py +172 -0
  280. autogen/interop/registry.py +70 -0
  281. autogen/io/__init__.py +15 -0
  282. autogen/io/base.py +151 -0
  283. autogen/io/console.py +56 -0
  284. autogen/io/processors/__init__.py +12 -0
  285. autogen/io/processors/base.py +21 -0
  286. autogen/io/processors/console_event_processor.py +61 -0
  287. autogen/io/run_response.py +294 -0
  288. autogen/io/thread_io_stream.py +63 -0
  289. autogen/io/websockets.py +214 -0
  290. autogen/json_utils.py +42 -0
  291. autogen/llm_clients/MIGRATION_TO_V2.md +782 -0
  292. autogen/llm_clients/__init__.py +77 -0
  293. autogen/llm_clients/client_v2.py +122 -0
  294. autogen/llm_clients/models/__init__.py +55 -0
  295. autogen/llm_clients/models/content_blocks.py +389 -0
  296. autogen/llm_clients/models/unified_message.py +145 -0
  297. autogen/llm_clients/models/unified_response.py +83 -0
  298. autogen/llm_clients/openai_completions_client.py +444 -0
  299. autogen/llm_config/__init__.py +11 -0
  300. autogen/llm_config/client.py +59 -0
  301. autogen/llm_config/config.py +461 -0
  302. autogen/llm_config/entry.py +169 -0
  303. autogen/llm_config/types.py +37 -0
  304. autogen/llm_config/utils.py +223 -0
  305. autogen/logger/__init__.py +11 -0
  306. autogen/logger/base_logger.py +129 -0
  307. autogen/logger/file_logger.py +262 -0
  308. autogen/logger/logger_factory.py +42 -0
  309. autogen/logger/logger_utils.py +57 -0
  310. autogen/logger/sqlite_logger.py +524 -0
  311. autogen/math_utils.py +338 -0
  312. autogen/mcp/__init__.py +7 -0
  313. autogen/mcp/__main__.py +78 -0
  314. autogen/mcp/helpers.py +45 -0
  315. autogen/mcp/mcp_client.py +349 -0
  316. autogen/mcp/mcp_proxy/__init__.py +19 -0
  317. autogen/mcp/mcp_proxy/fastapi_code_generator_helpers.py +62 -0
  318. autogen/mcp/mcp_proxy/mcp_proxy.py +577 -0
  319. autogen/mcp/mcp_proxy/operation_grouping.py +166 -0
  320. autogen/mcp/mcp_proxy/operation_renaming.py +110 -0
  321. autogen/mcp/mcp_proxy/patch_fastapi_code_generator.py +98 -0
  322. autogen/mcp/mcp_proxy/security.py +399 -0
  323. autogen/mcp/mcp_proxy/security_schema_visitor.py +37 -0
  324. autogen/messages/__init__.py +7 -0
  325. autogen/messages/agent_messages.py +946 -0
  326. autogen/messages/base_message.py +108 -0
  327. autogen/messages/client_messages.py +172 -0
  328. autogen/messages/print_message.py +48 -0
  329. autogen/oai/__init__.py +61 -0
  330. autogen/oai/anthropic.py +1516 -0
  331. autogen/oai/bedrock.py +800 -0
  332. autogen/oai/cerebras.py +302 -0
  333. autogen/oai/client.py +1658 -0
  334. autogen/oai/client_utils.py +196 -0
  335. autogen/oai/cohere.py +494 -0
  336. autogen/oai/gemini.py +1045 -0
  337. autogen/oai/gemini_types.py +156 -0
  338. autogen/oai/groq.py +319 -0
  339. autogen/oai/mistral.py +311 -0
  340. autogen/oai/oai_models/__init__.py +23 -0
  341. autogen/oai/oai_models/_models.py +16 -0
  342. autogen/oai/oai_models/chat_completion.py +86 -0
  343. autogen/oai/oai_models/chat_completion_audio.py +32 -0
  344. autogen/oai/oai_models/chat_completion_message.py +97 -0
  345. autogen/oai/oai_models/chat_completion_message_tool_call.py +60 -0
  346. autogen/oai/oai_models/chat_completion_token_logprob.py +62 -0
  347. autogen/oai/oai_models/completion_usage.py +59 -0
  348. autogen/oai/ollama.py +657 -0
  349. autogen/oai/openai_responses.py +451 -0
  350. autogen/oai/openai_utils.py +897 -0
  351. autogen/oai/together.py +387 -0
  352. autogen/remote/__init__.py +18 -0
  353. autogen/remote/agent.py +199 -0
  354. autogen/remote/agent_service.py +197 -0
  355. autogen/remote/errors.py +17 -0
  356. autogen/remote/httpx_client_factory.py +131 -0
  357. autogen/remote/protocol.py +37 -0
  358. autogen/remote/retry.py +102 -0
  359. autogen/remote/runtime.py +96 -0
  360. autogen/retrieve_utils.py +490 -0
  361. autogen/runtime_logging.py +161 -0
  362. autogen/testing/__init__.py +12 -0
  363. autogen/testing/messages.py +45 -0
  364. autogen/testing/test_agent.py +111 -0
  365. autogen/token_count_utils.py +280 -0
  366. autogen/tools/__init__.py +20 -0
  367. autogen/tools/contrib/__init__.py +9 -0
  368. autogen/tools/contrib/time/__init__.py +7 -0
  369. autogen/tools/contrib/time/time.py +40 -0
  370. autogen/tools/dependency_injection.py +249 -0
  371. autogen/tools/experimental/__init__.py +54 -0
  372. autogen/tools/experimental/browser_use/__init__.py +7 -0
  373. autogen/tools/experimental/browser_use/browser_use.py +154 -0
  374. autogen/tools/experimental/code_execution/__init__.py +7 -0
  375. autogen/tools/experimental/code_execution/python_code_execution.py +86 -0
  376. autogen/tools/experimental/crawl4ai/__init__.py +7 -0
  377. autogen/tools/experimental/crawl4ai/crawl4ai.py +150 -0
  378. autogen/tools/experimental/deep_research/__init__.py +7 -0
  379. autogen/tools/experimental/deep_research/deep_research.py +329 -0
  380. autogen/tools/experimental/duckduckgo/__init__.py +7 -0
  381. autogen/tools/experimental/duckduckgo/duckduckgo_search.py +103 -0
  382. autogen/tools/experimental/firecrawl/__init__.py +7 -0
  383. autogen/tools/experimental/firecrawl/firecrawl_tool.py +836 -0
  384. autogen/tools/experimental/google/__init__.py +14 -0
  385. autogen/tools/experimental/google/authentication/__init__.py +11 -0
  386. autogen/tools/experimental/google/authentication/credentials_hosted_provider.py +43 -0
  387. autogen/tools/experimental/google/authentication/credentials_local_provider.py +91 -0
  388. autogen/tools/experimental/google/authentication/credentials_provider.py +35 -0
  389. autogen/tools/experimental/google/drive/__init__.py +9 -0
  390. autogen/tools/experimental/google/drive/drive_functions.py +124 -0
  391. autogen/tools/experimental/google/drive/toolkit.py +88 -0
  392. autogen/tools/experimental/google/model.py +17 -0
  393. autogen/tools/experimental/google/toolkit_protocol.py +19 -0
  394. autogen/tools/experimental/google_search/__init__.py +8 -0
  395. autogen/tools/experimental/google_search/google_search.py +93 -0
  396. autogen/tools/experimental/google_search/youtube_search.py +181 -0
  397. autogen/tools/experimental/messageplatform/__init__.py +17 -0
  398. autogen/tools/experimental/messageplatform/discord/__init__.py +7 -0
  399. autogen/tools/experimental/messageplatform/discord/discord.py +284 -0
  400. autogen/tools/experimental/messageplatform/slack/__init__.py +7 -0
  401. autogen/tools/experimental/messageplatform/slack/slack.py +385 -0
  402. autogen/tools/experimental/messageplatform/telegram/__init__.py +7 -0
  403. autogen/tools/experimental/messageplatform/telegram/telegram.py +271 -0
  404. autogen/tools/experimental/perplexity/__init__.py +7 -0
  405. autogen/tools/experimental/perplexity/perplexity_search.py +249 -0
  406. autogen/tools/experimental/reliable/__init__.py +10 -0
  407. autogen/tools/experimental/reliable/reliable.py +1311 -0
  408. autogen/tools/experimental/searxng/__init__.py +7 -0
  409. autogen/tools/experimental/searxng/searxng_search.py +142 -0
  410. autogen/tools/experimental/tavily/__init__.py +7 -0
  411. autogen/tools/experimental/tavily/tavily_search.py +176 -0
  412. autogen/tools/experimental/web_search_preview/__init__.py +7 -0
  413. autogen/tools/experimental/web_search_preview/web_search_preview.py +120 -0
  414. autogen/tools/experimental/wikipedia/__init__.py +7 -0
  415. autogen/tools/experimental/wikipedia/wikipedia.py +284 -0
  416. autogen/tools/function_utils.py +412 -0
  417. autogen/tools/tool.py +188 -0
  418. autogen/tools/toolkit.py +86 -0
  419. autogen/types.py +29 -0
  420. autogen/version.py +7 -0
  421. templates/client_template/main.jinja2 +72 -0
  422. templates/config_template/config.jinja2 +7 -0
  423. templates/main.jinja2 +61 -0
@@ -0,0 +1,551 @@
1
+ # Copyright (c) 2023 - 2025, AG2ai, Inc., AG2ai open-source projects maintainers and core contributors
2
+ #
3
+ # SPDX-License-Identifier: Apache-2.0
4
+ #
5
+ # Portions derived from https://github.com/microsoft/autogen are under the MIT License.
6
+ # SPDX-License-Identifier: MIT
7
+ from collections.abc import Callable, Iterable, Mapping
8
+ from copy import deepcopy
9
+ from time import monotonic, sleep
10
+ from typing import Any, Literal
11
+
12
+ from ....import_utils import optional_import_block, require_optional_import
13
+ from .base import Document, ItemID, QueryResults, VectorDB
14
+ from .utils import get_logger
15
+
16
+ with optional_import_block():
17
+ import numpy as np
18
+ from pymongo import MongoClient, UpdateOne, errors
19
+ from pymongo.collection import Collection
20
+ from pymongo.driver_info import DriverInfo
21
+ from pymongo.operations import SearchIndexModel
22
+ from sentence_transformers import SentenceTransformer
23
+
24
+ logger = get_logger(__name__)
25
+
26
+ DEFAULT_INSERT_BATCH_SIZE = 100_000
27
+ _SAMPLE_SENTENCE = ["The weather is lovely today in paradise."]
28
+ _DELAY = 0.5
29
+
30
+
31
+ def with_id_rename(docs: Iterable) -> list[dict[str, Any]]:
32
+ """Utility changes _id field from Collection into id for Document."""
33
+ return [{**{k: v for k, v in d.items() if k != "_id"}, "id": d["_id"]} for d in docs]
34
+
35
+
36
+ @require_optional_import(["pymongo", "sentence_transformers", "numpy"], "retrievechat-mongodb")
37
+ class MongoDBAtlasVectorDB(VectorDB):
38
+ """A Collection object for MongoDB."""
39
+
40
+ def __init__(
41
+ self,
42
+ connection_string: str = "",
43
+ database_name: str = "vector_db",
44
+ embedding_function: Callable[..., Any] | None = None,
45
+ collection_name: str = None,
46
+ index_name: str = "vector_index",
47
+ overwrite: bool = False,
48
+ wait_until_index_ready: float | None = None,
49
+ wait_until_document_ready: float | None = None,
50
+ ):
51
+ """Initialize the vector database.
52
+
53
+ Args:
54
+ connection_string: str | The MongoDB connection string to connect to. Default is ''.
55
+ database_name: str | The name of the database. Default is 'vector_db'.
56
+ embedding_function: Callable | The embedding function used to generate the vector representation.
57
+ collection_name: str | The name of the collection to create for this vector database
58
+ Defaults to None
59
+ index_name: str | Index name for the vector database, defaults to 'vector_index'
60
+ overwrite: bool = False
61
+ wait_until_index_ready: Optional[float] | Blocking call to wait until the
62
+ database indexes are ready. None, the default, means no wait.
63
+ wait_until_document_ready: Optional[float] | Blocking call to wait until the
64
+ database indexes are ready. None, the default, means no wait.
65
+ """
66
+ self.embedding_function = embedding_function or SentenceTransformer("all-MiniLM-L6-v2").encode
67
+ self.index_name = index_name
68
+ self._wait_until_index_ready = wait_until_index_ready
69
+ self._wait_until_document_ready = wait_until_document_ready
70
+
71
+ # This will get the model dimension size by computing the embeddings dimensions
72
+ self.dimensions = self._get_embedding_size()
73
+
74
+ try:
75
+ self.client = MongoClient(connection_string, driver=DriverInfo(name="autogen"))
76
+ self.client.admin.command("ping")
77
+ logger.debug("Successfully created MongoClient")
78
+ except errors.ServerSelectionTimeoutError as err:
79
+ raise ConnectionError("Could not connect to MongoDB server") from err
80
+
81
+ self.db = self.client[database_name]
82
+ logger.debug(f"Atlas Database name: {self.db.name}")
83
+ if collection_name:
84
+ self.active_collection = self.create_collection(collection_name, overwrite)
85
+ else:
86
+ self.active_collection = None
87
+
88
+ def _is_index_ready(self, collection: "Collection", index_name: str):
89
+ """Check for the index name in the list of available search indexes to see if the
90
+ specified index is of status READY
91
+
92
+ Args:
93
+ collection (Collection): MongoDB Collection to for the search indexes
94
+ index_name (str): Vector Search Index name
95
+
96
+ Returns:
97
+ bool : True if the index is present and READY false otherwise
98
+ """
99
+ for index in collection.list_search_indexes(index_name):
100
+ if index["type"] == "vectorSearch" and index["status"] == "READY":
101
+ return True
102
+ return False
103
+
104
+ def _wait_for_index(self, collection: "Collection", index_name: str, action: str = "create"):
105
+ """Waits for the index action to be completed. Otherwise throws a TimeoutError.
106
+
107
+ Timeout set on instantiation.
108
+ action: "create" or "delete"
109
+ """
110
+ assert action in ["create", "delete"], f"{action=} must be create or delete."
111
+ start = monotonic()
112
+ while monotonic() - start < self._wait_until_index_ready:
113
+ if (action == "create" and self._is_index_ready(collection, index_name)) or (
114
+ action == "delete" and len(list(collection.list_search_indexes())) == 0
115
+ ):
116
+ return
117
+ sleep(_DELAY)
118
+
119
+ raise TimeoutError(f"Index {self.index_name} is not ready!")
120
+
121
+ def _wait_for_document(self, collection: "Collection", index_name: str, doc: Document):
122
+ start = monotonic()
123
+ while monotonic() - start < self._wait_until_document_ready:
124
+ query_result = _vector_search(
125
+ embedding_vector=np.array(self.embedding_function(doc["content"])).tolist(),
126
+ n_results=1,
127
+ collection=collection,
128
+ index_name=index_name,
129
+ )
130
+ if query_result and query_result[0][0]["_id"] == doc["id"]:
131
+ return
132
+ sleep(_DELAY)
133
+
134
+ raise TimeoutError(f"Document {self.index_name} is not ready!")
135
+
136
+ def _get_embedding_size(self):
137
+ return len(self.embedding_function(_SAMPLE_SENTENCE)[0])
138
+
139
+ def list_collections(self):
140
+ """List the collections in the vector database.
141
+
142
+ Returns:
143
+ List[str] | The list of collections.
144
+ """
145
+ return self.db.list_collection_names()
146
+
147
+ def create_collection(
148
+ self,
149
+ collection_name: str,
150
+ overwrite: bool = False,
151
+ get_or_create: bool = True,
152
+ ) -> "Collection":
153
+ """Create a collection in the vector database and create a vector search index in the collection.
154
+
155
+ Args:
156
+ collection_name: str | The name of the collection.
157
+ overwrite: bool | Whether to overwrite the collection if it exists. Default is False.
158
+ get_or_create: bool | Whether to get or create the collection. Default is True
159
+ """
160
+ if overwrite:
161
+ self.delete_collection(collection_name)
162
+
163
+ if collection_name not in self.db.list_collection_names():
164
+ # Create a new collection
165
+ coll = self.db.create_collection(collection_name)
166
+ self.create_index_if_not_exists(index_name=self.index_name, collection=coll)
167
+ return coll
168
+
169
+ if get_or_create:
170
+ # The collection already exists, return it.
171
+ coll = self.db[collection_name]
172
+ self.create_index_if_not_exists(index_name=self.index_name, collection=coll)
173
+ return coll
174
+ else:
175
+ # get_or_create is False and the collection already exists, raise an error.
176
+ raise ValueError(f"Collection {collection_name} already exists.")
177
+
178
+ def create_index_if_not_exists(self, index_name: str = "vector_index", collection: "Collection" = None) -> None:
179
+ """Creates a vector search index on the specified collection in MongoDB.
180
+
181
+ Args:
182
+ index_name (str, optional): The name of the vector search index to create. Defaults to "vector_search_index".
183
+ collection (Collection, optional): The MongoDB collection to create the index on. Defaults to None.
184
+ """
185
+ if not self._is_index_ready(collection, index_name):
186
+ self.create_vector_search_index(collection, index_name)
187
+
188
+ def get_collection(self, collection_name: str = None) -> "Collection":
189
+ """Get the collection from the vector database.
190
+
191
+ Args:
192
+ collection_name: str | The name of the collection. Default is None. If None, return the
193
+ current active collection.
194
+
195
+ Returns:
196
+ Collection | The collection object.
197
+ """
198
+ if collection_name is None:
199
+ if self.active_collection is None:
200
+ raise ValueError("No collection is specified.")
201
+ else:
202
+ logger.debug(
203
+ f"No collection is specified. Using current active collection {self.active_collection.name}."
204
+ )
205
+ else:
206
+ self.active_collection = self.db[collection_name]
207
+
208
+ return self.active_collection
209
+
210
+ def delete_collection(self, collection_name: str) -> None:
211
+ """Delete the collection from the vector database.
212
+
213
+ Args:
214
+ collection_name: str | The name of the collection.
215
+ """
216
+ for index in self.db[collection_name].list_search_indexes():
217
+ self.db[collection_name].drop_search_index(index["name"])
218
+ if self._wait_until_index_ready:
219
+ self._wait_for_index(self.db[collection_name], index["name"], "delete")
220
+ return self.db[collection_name].drop()
221
+
222
+ def create_vector_search_index(
223
+ self,
224
+ collection: "Collection",
225
+ index_name: str | None = "vector_index",
226
+ similarity: Literal["euclidean", "cosine", "dotProduct"] = "cosine",
227
+ ) -> None:
228
+ """Create a vector search index in the collection.
229
+
230
+ Args:
231
+ collection: An existing Collection in the Atlas Database.
232
+ index_name: Vector Search Index name.
233
+ similarity: Algorithm used for measuring vector similarity.
234
+ kwargs: Additional keyword arguments.
235
+
236
+ Returns:
237
+ None
238
+ """
239
+ search_index_model = SearchIndexModel(
240
+ definition={
241
+ "fields": [
242
+ {
243
+ "type": "vector",
244
+ "numDimensions": self.dimensions,
245
+ "path": "embedding",
246
+ "similarity": similarity,
247
+ },
248
+ ]
249
+ },
250
+ name=index_name,
251
+ type="vectorSearch",
252
+ )
253
+ # Create the search index
254
+ try:
255
+ collection.create_search_index(model=search_index_model)
256
+ if self._wait_until_index_ready:
257
+ self._wait_for_index(collection, index_name, "create")
258
+ logger.debug(f"Search index {index_name} created successfully.")
259
+ except Exception as e:
260
+ logger.error(
261
+ f"Error creating search index: {e}. \n"
262
+ f"Your client must be connected to an Atlas cluster. "
263
+ f"You may have to manually create a Collection and Search Index "
264
+ f"if you are on a free/shared cluster."
265
+ )
266
+ raise e
267
+
268
+ def insert_docs(
269
+ self,
270
+ docs: list[Document],
271
+ collection_name: str = None,
272
+ upsert: bool = False,
273
+ batch_size: int = DEFAULT_INSERT_BATCH_SIZE,
274
+ **kwargs: Any,
275
+ ) -> None:
276
+ """Insert Documents and Vector Embeddings into the collection of the vector database.
277
+
278
+ For large numbers of Documents, insertion is performed in batches.
279
+
280
+ Args:
281
+ docs: A list of documents. Each document is a TypedDict `Document`.
282
+ collection_name: The name of the collection. Default is None.
283
+ upsert: Whether to update the document if it exists. Default is False.
284
+ batch_size: Number of documents to be inserted in each batch
285
+ **kwargs: Additional keyword arguments.
286
+ """
287
+ if not docs:
288
+ logger.info("No documents to insert.")
289
+ return
290
+
291
+ collection = self.get_collection(collection_name)
292
+ if upsert:
293
+ self.update_docs(docs, collection.name, upsert=True)
294
+ else:
295
+ # Sanity checking the first document
296
+ if docs[0].get("content") is None:
297
+ raise ValueError("The document content is required.")
298
+ if docs[0].get("id") is None:
299
+ raise ValueError("The document id is required.")
300
+
301
+ input_ids = set()
302
+ result_ids = set()
303
+ id_batch = []
304
+ text_batch = []
305
+ metadata_batch = []
306
+ size = 0
307
+ i = 0
308
+ for doc in docs:
309
+ id = doc["id"]
310
+ text = doc["content"]
311
+ metadata = doc.get("metadata", {})
312
+ id_batch.append(id)
313
+ text_batch.append(text)
314
+ metadata_batch.append(metadata)
315
+ id_size = 1 if isinstance(id, int) else len(id)
316
+ size += len(text) + len(metadata) + id_size
317
+ if (i + 1) % batch_size == 0 or size >= 47_000_000:
318
+ result_ids.update(self._insert_batch(collection, text_batch, metadata_batch, id_batch))
319
+ input_ids.update(id_batch)
320
+ id_batch = []
321
+ text_batch = []
322
+ metadata_batch = []
323
+ size = 0
324
+ i += 1 # noqa: SIM113
325
+ if text_batch:
326
+ result_ids.update(self._insert_batch(collection, text_batch, metadata_batch, id_batch))
327
+ input_ids.update(id_batch)
328
+
329
+ if result_ids != input_ids:
330
+ logger.warning(
331
+ "Possible data corruption. "
332
+ f"input_ids not in result_ids: {input_ids.difference(result_ids)}.\n"
333
+ f"result_ids not in input_ids: {result_ids.difference(input_ids)}"
334
+ )
335
+ if self._wait_until_document_ready and docs:
336
+ self._wait_for_document(collection, self.index_name, docs[-1])
337
+
338
+ def _insert_batch(
339
+ self, collection: "Collection", texts: list[str], metadatas: list[Mapping[str, Any]], ids: list[ItemID]
340
+ ) -> set[ItemID]:
341
+ """Compute embeddings for and insert a batch of Documents into the Collection.
342
+
343
+ For performance reasons, we chose to call self.embedding_function just once,
344
+ with the hopefully small tradeoff of having recreating Document dicts.
345
+
346
+ Args:
347
+ collection: MongoDB Collection
348
+ texts: List of the main contents of each document
349
+ metadatas: List of metadata mappings
350
+ ids: List of ids. Note that these are stored as _id in Collection.
351
+
352
+ Returns:
353
+ List of ids inserted.
354
+ """
355
+ n_texts = len(texts)
356
+ if n_texts == 0:
357
+ return []
358
+ # Embed and create the documents
359
+ embeddings = self.embedding_function(texts).tolist()
360
+ assert len(embeddings) == n_texts, (
361
+ f"The number of embeddings produced by self.embedding_function ({len(embeddings)} does not match the number of texts provided to it ({n_texts})."
362
+ )
363
+ to_insert = [
364
+ {"_id": i, "content": t, "metadata": m, "embedding": e}
365
+ for i, t, m, e in zip(ids, texts, metadatas, embeddings)
366
+ ]
367
+ # insert the documents in MongoDB Atlas
368
+ insert_result = collection.insert_many(to_insert) # type: ignore[union-attr]
369
+ return insert_result.inserted_ids # TODO Remove this. Replace by log like update_docs
370
+
371
+ def update_docs(self, docs: list[Document], collection_name: str = None, **kwargs: Any) -> None:
372
+ """Update documents, including their embeddings, in the Collection.
373
+
374
+ Optionally allow upsert as kwarg.
375
+
376
+ Uses deepcopy to avoid changing docs.
377
+
378
+ Args:
379
+ docs: List[Document] | A list of documents.
380
+ collection_name: str | The name of the collection. Default is None.
381
+ kwargs: Any | Use upsert=True` to insert documents whose ids are not present in collection.
382
+ """
383
+ n_docs = len(docs)
384
+ logger.info(f"Preparing to embed and update {n_docs=}")
385
+ # Compute the embeddings
386
+ embeddings: list[list[float]] = self.embedding_function([doc["content"] for doc in docs]).tolist()
387
+ # Prepare the updates
388
+ all_updates = []
389
+ for i in range(n_docs):
390
+ doc = deepcopy(docs[i])
391
+ doc["embedding"] = embeddings[i]
392
+ doc["_id"] = doc.pop("id")
393
+
394
+ all_updates.append(UpdateOne({"_id": doc["_id"]}, {"$set": doc}, upsert=kwargs.get("upsert", False)))
395
+ # Perform update in bulk
396
+ collection = self.get_collection(collection_name)
397
+ result = collection.bulk_write(all_updates)
398
+
399
+ if self._wait_until_document_ready and docs:
400
+ self._wait_for_document(collection, self.index_name, docs[-1])
401
+
402
+ # Log a result summary
403
+ logger.info(
404
+ "Matched: %s, Modified: %s, Upserted: %s",
405
+ result.matched_count,
406
+ result.modified_count,
407
+ result.upserted_count,
408
+ )
409
+
410
+ def delete_docs(self, ids: list[ItemID], collection_name: str = None, **kwargs):
411
+ """Delete documents from the collection of the vector database.
412
+
413
+ Args:
414
+ ids: A list of document ids. Each id is a typed `ItemID`.
415
+ collection_name: The name of the collection. Default is None.
416
+ **kwargs: Additional keyword arguments.
417
+ """
418
+ collection = self.get_collection(collection_name)
419
+ return collection.delete_many({"_id": {"$in": ids}})
420
+
421
+ def get_docs_by_ids(
422
+ self, ids: list[ItemID] = None, collection_name: str = None, include: list[str] = None, **kwargs
423
+ ) -> list[Document]:
424
+ """Retrieve documents from the collection of the vector database based on the ids.
425
+
426
+ Args:
427
+ ids: List[ItemID] | A list of document ids. If None, will return all the documents. Default is None.
428
+ collection_name: str | The name of the collection. Default is None.
429
+ include: List[str] | The fields to include.
430
+ If None, will include ["metadata", "content"], ids will always be included.
431
+ Basically, use include to choose whether to include embedding and metadata
432
+ kwargs: dict | Additional keyword arguments.
433
+
434
+ Returns:
435
+ List[Document] | The results.
436
+ """
437
+ if include is None:
438
+ include_fields = {"_id": 1, "content": 1, "metadata": 1}
439
+ else:
440
+ include_fields = dict.fromkeys(set(include).union({"_id"}), 1)
441
+ collection = self.get_collection(collection_name)
442
+ if ids is not None:
443
+ docs = collection.find({"_id": {"$in": ids}}, include_fields)
444
+ # Return with _id field from Collection into id for Document
445
+ return with_id_rename(docs)
446
+ else:
447
+ docs = collection.find({}, include_fields)
448
+ # Return with _id field from Collection into id for Document
449
+ return with_id_rename(docs)
450
+
451
+ def retrieve_docs(
452
+ self,
453
+ queries: list[str],
454
+ collection_name: str = None,
455
+ n_results: int = 10,
456
+ distance_threshold: float = -1,
457
+ **kwargs: Any,
458
+ ) -> QueryResults:
459
+ """Retrieve documents from the collection of the vector database based on the queries.
460
+
461
+ Args:
462
+ queries: List[str] | A list of queries. Each query is a string.
463
+ collection_name: str | The name of the collection. Default is None.
464
+ n_results: int | The number of relevant documents to return. Default is 10.
465
+ distance_threshold: float | The threshold for the distance score, only distance smaller than it will be
466
+ returned. Don't filter with it if < 0. Default is -1.
467
+ kwargs: Dict | Additional keyword arguments. Ones of importance follow:
468
+ oversampling_factor: int | This times n_results is 'ef' in the HNSW algorithm.
469
+ It determines the number of nearest neighbor candidates to consider during the search phase.
470
+ A higher value leads to more accuracy, but is slower. Default is 10
471
+
472
+ Returns:
473
+ QueryResults | For each query string, a list of nearest documents and their scores.
474
+ """
475
+ collection = self.get_collection(collection_name)
476
+ # Trivial case of an empty collection
477
+ if collection.count_documents({}) == 0:
478
+ return []
479
+
480
+ logger.debug(f"Using index: {self.index_name}")
481
+ results = []
482
+ for query_text in queries:
483
+ # Compute embedding vector from semantic query
484
+ logger.debug(f"Query: {query_text}")
485
+ query_vector = np.array(self.embedding_function([query_text])).tolist()[0]
486
+ # Find documents with similar vectors using the specified index
487
+ query_result = _vector_search(
488
+ query_vector,
489
+ n_results,
490
+ collection,
491
+ self.index_name,
492
+ distance_threshold,
493
+ **kwargs,
494
+ oversampling_factor=kwargs.get("oversampling_factor", 10),
495
+ )
496
+ # Change each _id key to id. with_id_rename, but with (doc, score) tuples
497
+ results.append([
498
+ ({**{k: v for k, v in d[0].items() if k != "_id"}, "id": d[0]["_id"]}, d[1]) for d in query_result
499
+ ])
500
+ return results
501
+
502
+
503
+ def _vector_search(
504
+ embedding_vector: list[float],
505
+ n_results: int,
506
+ collection: "Collection",
507
+ index_name: str,
508
+ distance_threshold: float = -1.0,
509
+ oversampling_factor=10,
510
+ include_embedding=False,
511
+ ) -> list[tuple[dict[str, Any], float]]:
512
+ """Core $vectorSearch Aggregation pipeline.
513
+
514
+ Args:
515
+ embedding_vector: Embedding vector of semantic query
516
+ n_results: Number of documents to return. Defaults to 4.
517
+ collection: MongoDB Collection with vector index
518
+ index_name: Name of the vector index
519
+ distance_threshold: Only distance measures smaller than this will be returned.
520
+ Don't filter with it if 1 < x < 0. Default is -1.
521
+ oversampling_factor: This times n_results is 'ef' in the HNSW algorithm.
522
+ It determines the number of nearest neighbor candidates to consider during the search phase.
523
+ A higher value leads to more accuracy, but is slower. Default = 10
524
+ include_embedding: Whether to include the embedding in the results. Default is False.
525
+
526
+ Returns:
527
+ List of tuples of length n_results from Collection.
528
+ Each tuple contains a document dict and a score.
529
+ """
530
+ pipeline = [
531
+ {
532
+ "$vectorSearch": {
533
+ "index": index_name,
534
+ "limit": n_results,
535
+ "numCandidates": n_results * oversampling_factor,
536
+ "queryVector": embedding_vector,
537
+ "path": "embedding",
538
+ }
539
+ },
540
+ {"$set": {"score": {"$meta": "vectorSearchScore"}}},
541
+ ]
542
+ if distance_threshold >= 0.0:
543
+ similarity_threshold = 1.0 - distance_threshold
544
+ pipeline.append({"$match": {"score": {"$gte": similarity_threshold}}})
545
+
546
+ if not include_embedding:
547
+ pipeline.append({"$project": {"embedding": 0}})
548
+
549
+ logger.debug("pipeline: %s", pipeline)
550
+ agg = collection.aggregate(pipeline)
551
+ return [(doc, doc.pop("score")) for doc in agg]