ag2 0.10.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (423) hide show
  1. ag2-0.10.2.dist-info/METADATA +819 -0
  2. ag2-0.10.2.dist-info/RECORD +423 -0
  3. ag2-0.10.2.dist-info/WHEEL +4 -0
  4. ag2-0.10.2.dist-info/licenses/LICENSE +201 -0
  5. ag2-0.10.2.dist-info/licenses/NOTICE.md +19 -0
  6. autogen/__init__.py +88 -0
  7. autogen/_website/__init__.py +3 -0
  8. autogen/_website/generate_api_references.py +426 -0
  9. autogen/_website/generate_mkdocs.py +1216 -0
  10. autogen/_website/notebook_processor.py +475 -0
  11. autogen/_website/process_notebooks.py +656 -0
  12. autogen/_website/utils.py +413 -0
  13. autogen/a2a/__init__.py +36 -0
  14. autogen/a2a/agent_executor.py +86 -0
  15. autogen/a2a/client.py +357 -0
  16. autogen/a2a/errors.py +18 -0
  17. autogen/a2a/httpx_client_factory.py +79 -0
  18. autogen/a2a/server.py +221 -0
  19. autogen/a2a/utils.py +207 -0
  20. autogen/agentchat/__init__.py +47 -0
  21. autogen/agentchat/agent.py +180 -0
  22. autogen/agentchat/assistant_agent.py +86 -0
  23. autogen/agentchat/chat.py +325 -0
  24. autogen/agentchat/contrib/__init__.py +5 -0
  25. autogen/agentchat/contrib/agent_eval/README.md +7 -0
  26. autogen/agentchat/contrib/agent_eval/agent_eval.py +108 -0
  27. autogen/agentchat/contrib/agent_eval/criterion.py +43 -0
  28. autogen/agentchat/contrib/agent_eval/critic_agent.py +44 -0
  29. autogen/agentchat/contrib/agent_eval/quantifier_agent.py +39 -0
  30. autogen/agentchat/contrib/agent_eval/subcritic_agent.py +45 -0
  31. autogen/agentchat/contrib/agent_eval/task.py +42 -0
  32. autogen/agentchat/contrib/agent_optimizer.py +432 -0
  33. autogen/agentchat/contrib/capabilities/__init__.py +5 -0
  34. autogen/agentchat/contrib/capabilities/agent_capability.py +20 -0
  35. autogen/agentchat/contrib/capabilities/generate_images.py +301 -0
  36. autogen/agentchat/contrib/capabilities/teachability.py +393 -0
  37. autogen/agentchat/contrib/capabilities/text_compressors.py +66 -0
  38. autogen/agentchat/contrib/capabilities/tools_capability.py +22 -0
  39. autogen/agentchat/contrib/capabilities/transform_messages.py +93 -0
  40. autogen/agentchat/contrib/capabilities/transforms.py +578 -0
  41. autogen/agentchat/contrib/capabilities/transforms_util.py +122 -0
  42. autogen/agentchat/contrib/capabilities/vision_capability.py +215 -0
  43. autogen/agentchat/contrib/captainagent/__init__.py +9 -0
  44. autogen/agentchat/contrib/captainagent/agent_builder.py +790 -0
  45. autogen/agentchat/contrib/captainagent/captainagent.py +514 -0
  46. autogen/agentchat/contrib/captainagent/tool_retriever.py +334 -0
  47. autogen/agentchat/contrib/captainagent/tools/README.md +44 -0
  48. autogen/agentchat/contrib/captainagent/tools/__init__.py +5 -0
  49. autogen/agentchat/contrib/captainagent/tools/data_analysis/calculate_correlation.py +40 -0
  50. autogen/agentchat/contrib/captainagent/tools/data_analysis/calculate_skewness_and_kurtosis.py +28 -0
  51. autogen/agentchat/contrib/captainagent/tools/data_analysis/detect_outlier_iqr.py +28 -0
  52. autogen/agentchat/contrib/captainagent/tools/data_analysis/detect_outlier_zscore.py +28 -0
  53. autogen/agentchat/contrib/captainagent/tools/data_analysis/explore_csv.py +21 -0
  54. autogen/agentchat/contrib/captainagent/tools/data_analysis/shapiro_wilk_test.py +30 -0
  55. autogen/agentchat/contrib/captainagent/tools/information_retrieval/arxiv_download.py +27 -0
  56. autogen/agentchat/contrib/captainagent/tools/information_retrieval/arxiv_search.py +53 -0
  57. autogen/agentchat/contrib/captainagent/tools/information_retrieval/extract_pdf_image.py +53 -0
  58. autogen/agentchat/contrib/captainagent/tools/information_retrieval/extract_pdf_text.py +38 -0
  59. autogen/agentchat/contrib/captainagent/tools/information_retrieval/get_wikipedia_text.py +21 -0
  60. autogen/agentchat/contrib/captainagent/tools/information_retrieval/get_youtube_caption.py +34 -0
  61. autogen/agentchat/contrib/captainagent/tools/information_retrieval/image_qa.py +60 -0
  62. autogen/agentchat/contrib/captainagent/tools/information_retrieval/optical_character_recognition.py +61 -0
  63. autogen/agentchat/contrib/captainagent/tools/information_retrieval/perform_web_search.py +47 -0
  64. autogen/agentchat/contrib/captainagent/tools/information_retrieval/scrape_wikipedia_tables.py +33 -0
  65. autogen/agentchat/contrib/captainagent/tools/information_retrieval/transcribe_audio_file.py +21 -0
  66. autogen/agentchat/contrib/captainagent/tools/information_retrieval/youtube_download.py +35 -0
  67. autogen/agentchat/contrib/captainagent/tools/math/calculate_circle_area_from_diameter.py +21 -0
  68. autogen/agentchat/contrib/captainagent/tools/math/calculate_day_of_the_week.py +18 -0
  69. autogen/agentchat/contrib/captainagent/tools/math/calculate_fraction_sum.py +28 -0
  70. autogen/agentchat/contrib/captainagent/tools/math/calculate_matrix_power.py +31 -0
  71. autogen/agentchat/contrib/captainagent/tools/math/calculate_reflected_point.py +16 -0
  72. autogen/agentchat/contrib/captainagent/tools/math/complex_numbers_product.py +25 -0
  73. autogen/agentchat/contrib/captainagent/tools/math/compute_currency_conversion.py +23 -0
  74. autogen/agentchat/contrib/captainagent/tools/math/count_distinct_permutations.py +27 -0
  75. autogen/agentchat/contrib/captainagent/tools/math/evaluate_expression.py +28 -0
  76. autogen/agentchat/contrib/captainagent/tools/math/find_continuity_point.py +34 -0
  77. autogen/agentchat/contrib/captainagent/tools/math/fraction_to_mixed_numbers.py +39 -0
  78. autogen/agentchat/contrib/captainagent/tools/math/modular_inverse_sum.py +23 -0
  79. autogen/agentchat/contrib/captainagent/tools/math/simplify_mixed_numbers.py +36 -0
  80. autogen/agentchat/contrib/captainagent/tools/math/sum_of_digit_factorials.py +15 -0
  81. autogen/agentchat/contrib/captainagent/tools/math/sum_of_primes_below.py +15 -0
  82. autogen/agentchat/contrib/captainagent/tools/requirements.txt +10 -0
  83. autogen/agentchat/contrib/captainagent/tools/tool_description.tsv +34 -0
  84. autogen/agentchat/contrib/gpt_assistant_agent.py +526 -0
  85. autogen/agentchat/contrib/graph_rag/__init__.py +9 -0
  86. autogen/agentchat/contrib/graph_rag/document.py +29 -0
  87. autogen/agentchat/contrib/graph_rag/falkor_graph_query_engine.py +167 -0
  88. autogen/agentchat/contrib/graph_rag/falkor_graph_rag_capability.py +103 -0
  89. autogen/agentchat/contrib/graph_rag/graph_query_engine.py +53 -0
  90. autogen/agentchat/contrib/graph_rag/graph_rag_capability.py +63 -0
  91. autogen/agentchat/contrib/graph_rag/neo4j_graph_query_engine.py +263 -0
  92. autogen/agentchat/contrib/graph_rag/neo4j_graph_rag_capability.py +83 -0
  93. autogen/agentchat/contrib/graph_rag/neo4j_native_graph_query_engine.py +210 -0
  94. autogen/agentchat/contrib/graph_rag/neo4j_native_graph_rag_capability.py +93 -0
  95. autogen/agentchat/contrib/img_utils.py +397 -0
  96. autogen/agentchat/contrib/llamaindex_conversable_agent.py +117 -0
  97. autogen/agentchat/contrib/llava_agent.py +189 -0
  98. autogen/agentchat/contrib/math_user_proxy_agent.py +464 -0
  99. autogen/agentchat/contrib/multimodal_conversable_agent.py +125 -0
  100. autogen/agentchat/contrib/qdrant_retrieve_user_proxy_agent.py +325 -0
  101. autogen/agentchat/contrib/rag/__init__.py +10 -0
  102. autogen/agentchat/contrib/rag/chromadb_query_engine.py +268 -0
  103. autogen/agentchat/contrib/rag/llamaindex_query_engine.py +195 -0
  104. autogen/agentchat/contrib/rag/mongodb_query_engine.py +319 -0
  105. autogen/agentchat/contrib/rag/query_engine.py +76 -0
  106. autogen/agentchat/contrib/retrieve_assistant_agent.py +59 -0
  107. autogen/agentchat/contrib/retrieve_user_proxy_agent.py +704 -0
  108. autogen/agentchat/contrib/society_of_mind_agent.py +200 -0
  109. autogen/agentchat/contrib/swarm_agent.py +1404 -0
  110. autogen/agentchat/contrib/text_analyzer_agent.py +79 -0
  111. autogen/agentchat/contrib/vectordb/__init__.py +5 -0
  112. autogen/agentchat/contrib/vectordb/base.py +224 -0
  113. autogen/agentchat/contrib/vectordb/chromadb.py +316 -0
  114. autogen/agentchat/contrib/vectordb/couchbase.py +405 -0
  115. autogen/agentchat/contrib/vectordb/mongodb.py +551 -0
  116. autogen/agentchat/contrib/vectordb/pgvectordb.py +927 -0
  117. autogen/agentchat/contrib/vectordb/qdrant.py +320 -0
  118. autogen/agentchat/contrib/vectordb/utils.py +126 -0
  119. autogen/agentchat/contrib/web_surfer.py +304 -0
  120. autogen/agentchat/conversable_agent.py +4307 -0
  121. autogen/agentchat/group/__init__.py +67 -0
  122. autogen/agentchat/group/available_condition.py +91 -0
  123. autogen/agentchat/group/context_condition.py +77 -0
  124. autogen/agentchat/group/context_expression.py +238 -0
  125. autogen/agentchat/group/context_str.py +39 -0
  126. autogen/agentchat/group/context_variables.py +182 -0
  127. autogen/agentchat/group/events/transition_events.py +111 -0
  128. autogen/agentchat/group/group_tool_executor.py +324 -0
  129. autogen/agentchat/group/group_utils.py +659 -0
  130. autogen/agentchat/group/guardrails.py +179 -0
  131. autogen/agentchat/group/handoffs.py +303 -0
  132. autogen/agentchat/group/llm_condition.py +93 -0
  133. autogen/agentchat/group/multi_agent_chat.py +291 -0
  134. autogen/agentchat/group/on_condition.py +55 -0
  135. autogen/agentchat/group/on_context_condition.py +51 -0
  136. autogen/agentchat/group/patterns/__init__.py +18 -0
  137. autogen/agentchat/group/patterns/auto.py +160 -0
  138. autogen/agentchat/group/patterns/manual.py +177 -0
  139. autogen/agentchat/group/patterns/pattern.py +295 -0
  140. autogen/agentchat/group/patterns/random.py +106 -0
  141. autogen/agentchat/group/patterns/round_robin.py +117 -0
  142. autogen/agentchat/group/reply_result.py +24 -0
  143. autogen/agentchat/group/safeguards/__init__.py +21 -0
  144. autogen/agentchat/group/safeguards/api.py +241 -0
  145. autogen/agentchat/group/safeguards/enforcer.py +1158 -0
  146. autogen/agentchat/group/safeguards/events.py +140 -0
  147. autogen/agentchat/group/safeguards/validator.py +435 -0
  148. autogen/agentchat/group/speaker_selection_result.py +41 -0
  149. autogen/agentchat/group/targets/__init__.py +4 -0
  150. autogen/agentchat/group/targets/function_target.py +245 -0
  151. autogen/agentchat/group/targets/group_chat_target.py +133 -0
  152. autogen/agentchat/group/targets/group_manager_target.py +151 -0
  153. autogen/agentchat/group/targets/transition_target.py +424 -0
  154. autogen/agentchat/group/targets/transition_utils.py +6 -0
  155. autogen/agentchat/groupchat.py +1832 -0
  156. autogen/agentchat/realtime/__init__.py +3 -0
  157. autogen/agentchat/realtime/experimental/__init__.py +20 -0
  158. autogen/agentchat/realtime/experimental/audio_adapters/__init__.py +8 -0
  159. autogen/agentchat/realtime/experimental/audio_adapters/twilio_audio_adapter.py +148 -0
  160. autogen/agentchat/realtime/experimental/audio_adapters/websocket_audio_adapter.py +139 -0
  161. autogen/agentchat/realtime/experimental/audio_observer.py +42 -0
  162. autogen/agentchat/realtime/experimental/clients/__init__.py +15 -0
  163. autogen/agentchat/realtime/experimental/clients/gemini/__init__.py +7 -0
  164. autogen/agentchat/realtime/experimental/clients/gemini/client.py +274 -0
  165. autogen/agentchat/realtime/experimental/clients/oai/__init__.py +8 -0
  166. autogen/agentchat/realtime/experimental/clients/oai/base_client.py +220 -0
  167. autogen/agentchat/realtime/experimental/clients/oai/rtc_client.py +243 -0
  168. autogen/agentchat/realtime/experimental/clients/oai/utils.py +48 -0
  169. autogen/agentchat/realtime/experimental/clients/realtime_client.py +191 -0
  170. autogen/agentchat/realtime/experimental/function_observer.py +84 -0
  171. autogen/agentchat/realtime/experimental/realtime_agent.py +158 -0
  172. autogen/agentchat/realtime/experimental/realtime_events.py +42 -0
  173. autogen/agentchat/realtime/experimental/realtime_observer.py +100 -0
  174. autogen/agentchat/realtime/experimental/realtime_swarm.py +533 -0
  175. autogen/agentchat/realtime/experimental/websockets.py +21 -0
  176. autogen/agentchat/realtime_agent/__init__.py +21 -0
  177. autogen/agentchat/user_proxy_agent.py +114 -0
  178. autogen/agentchat/utils.py +206 -0
  179. autogen/agents/__init__.py +3 -0
  180. autogen/agents/contrib/__init__.py +10 -0
  181. autogen/agents/contrib/time/__init__.py +8 -0
  182. autogen/agents/contrib/time/time_reply_agent.py +74 -0
  183. autogen/agents/contrib/time/time_tool_agent.py +52 -0
  184. autogen/agents/experimental/__init__.py +27 -0
  185. autogen/agents/experimental/deep_research/__init__.py +7 -0
  186. autogen/agents/experimental/deep_research/deep_research.py +52 -0
  187. autogen/agents/experimental/discord/__init__.py +7 -0
  188. autogen/agents/experimental/discord/discord.py +66 -0
  189. autogen/agents/experimental/document_agent/__init__.py +19 -0
  190. autogen/agents/experimental/document_agent/chroma_query_engine.py +301 -0
  191. autogen/agents/experimental/document_agent/docling_doc_ingest_agent.py +113 -0
  192. autogen/agents/experimental/document_agent/document_agent.py +643 -0
  193. autogen/agents/experimental/document_agent/document_conditions.py +50 -0
  194. autogen/agents/experimental/document_agent/document_utils.py +376 -0
  195. autogen/agents/experimental/document_agent/inmemory_query_engine.py +214 -0
  196. autogen/agents/experimental/document_agent/parser_utils.py +134 -0
  197. autogen/agents/experimental/document_agent/url_utils.py +417 -0
  198. autogen/agents/experimental/reasoning/__init__.py +7 -0
  199. autogen/agents/experimental/reasoning/reasoning_agent.py +1178 -0
  200. autogen/agents/experimental/slack/__init__.py +7 -0
  201. autogen/agents/experimental/slack/slack.py +73 -0
  202. autogen/agents/experimental/telegram/__init__.py +7 -0
  203. autogen/agents/experimental/telegram/telegram.py +76 -0
  204. autogen/agents/experimental/websurfer/__init__.py +7 -0
  205. autogen/agents/experimental/websurfer/websurfer.py +70 -0
  206. autogen/agents/experimental/wikipedia/__init__.py +7 -0
  207. autogen/agents/experimental/wikipedia/wikipedia.py +88 -0
  208. autogen/browser_utils.py +309 -0
  209. autogen/cache/__init__.py +10 -0
  210. autogen/cache/abstract_cache_base.py +71 -0
  211. autogen/cache/cache.py +203 -0
  212. autogen/cache/cache_factory.py +88 -0
  213. autogen/cache/cosmos_db_cache.py +144 -0
  214. autogen/cache/disk_cache.py +97 -0
  215. autogen/cache/in_memory_cache.py +54 -0
  216. autogen/cache/redis_cache.py +119 -0
  217. autogen/code_utils.py +598 -0
  218. autogen/coding/__init__.py +30 -0
  219. autogen/coding/base.py +120 -0
  220. autogen/coding/docker_commandline_code_executor.py +283 -0
  221. autogen/coding/factory.py +56 -0
  222. autogen/coding/func_with_reqs.py +203 -0
  223. autogen/coding/jupyter/__init__.py +23 -0
  224. autogen/coding/jupyter/base.py +36 -0
  225. autogen/coding/jupyter/docker_jupyter_server.py +160 -0
  226. autogen/coding/jupyter/embedded_ipython_code_executor.py +182 -0
  227. autogen/coding/jupyter/import_utils.py +82 -0
  228. autogen/coding/jupyter/jupyter_client.py +224 -0
  229. autogen/coding/jupyter/jupyter_code_executor.py +154 -0
  230. autogen/coding/jupyter/local_jupyter_server.py +164 -0
  231. autogen/coding/local_commandline_code_executor.py +341 -0
  232. autogen/coding/markdown_code_extractor.py +44 -0
  233. autogen/coding/utils.py +55 -0
  234. autogen/coding/yepcode_code_executor.py +197 -0
  235. autogen/doc_utils.py +35 -0
  236. autogen/environments/__init__.py +10 -0
  237. autogen/environments/docker_python_environment.py +365 -0
  238. autogen/environments/python_environment.py +125 -0
  239. autogen/environments/system_python_environment.py +85 -0
  240. autogen/environments/venv_python_environment.py +220 -0
  241. autogen/environments/working_directory.py +74 -0
  242. autogen/events/__init__.py +7 -0
  243. autogen/events/agent_events.py +1016 -0
  244. autogen/events/base_event.py +100 -0
  245. autogen/events/client_events.py +168 -0
  246. autogen/events/helpers.py +44 -0
  247. autogen/events/print_event.py +45 -0
  248. autogen/exception_utils.py +73 -0
  249. autogen/extensions/__init__.py +5 -0
  250. autogen/fast_depends/__init__.py +16 -0
  251. autogen/fast_depends/_compat.py +75 -0
  252. autogen/fast_depends/core/__init__.py +14 -0
  253. autogen/fast_depends/core/build.py +206 -0
  254. autogen/fast_depends/core/model.py +527 -0
  255. autogen/fast_depends/dependencies/__init__.py +15 -0
  256. autogen/fast_depends/dependencies/model.py +30 -0
  257. autogen/fast_depends/dependencies/provider.py +40 -0
  258. autogen/fast_depends/library/__init__.py +10 -0
  259. autogen/fast_depends/library/model.py +46 -0
  260. autogen/fast_depends/py.typed +6 -0
  261. autogen/fast_depends/schema.py +66 -0
  262. autogen/fast_depends/use.py +272 -0
  263. autogen/fast_depends/utils.py +177 -0
  264. autogen/formatting_utils.py +83 -0
  265. autogen/function_utils.py +13 -0
  266. autogen/graph_utils.py +173 -0
  267. autogen/import_utils.py +539 -0
  268. autogen/interop/__init__.py +22 -0
  269. autogen/interop/crewai/__init__.py +7 -0
  270. autogen/interop/crewai/crewai.py +88 -0
  271. autogen/interop/interoperability.py +71 -0
  272. autogen/interop/interoperable.py +46 -0
  273. autogen/interop/langchain/__init__.py +8 -0
  274. autogen/interop/langchain/langchain_chat_model_factory.py +156 -0
  275. autogen/interop/langchain/langchain_tool.py +78 -0
  276. autogen/interop/litellm/__init__.py +7 -0
  277. autogen/interop/litellm/litellm_config_factory.py +178 -0
  278. autogen/interop/pydantic_ai/__init__.py +7 -0
  279. autogen/interop/pydantic_ai/pydantic_ai.py +172 -0
  280. autogen/interop/registry.py +70 -0
  281. autogen/io/__init__.py +15 -0
  282. autogen/io/base.py +151 -0
  283. autogen/io/console.py +56 -0
  284. autogen/io/processors/__init__.py +12 -0
  285. autogen/io/processors/base.py +21 -0
  286. autogen/io/processors/console_event_processor.py +61 -0
  287. autogen/io/run_response.py +294 -0
  288. autogen/io/thread_io_stream.py +63 -0
  289. autogen/io/websockets.py +214 -0
  290. autogen/json_utils.py +42 -0
  291. autogen/llm_clients/MIGRATION_TO_V2.md +782 -0
  292. autogen/llm_clients/__init__.py +77 -0
  293. autogen/llm_clients/client_v2.py +122 -0
  294. autogen/llm_clients/models/__init__.py +55 -0
  295. autogen/llm_clients/models/content_blocks.py +389 -0
  296. autogen/llm_clients/models/unified_message.py +145 -0
  297. autogen/llm_clients/models/unified_response.py +83 -0
  298. autogen/llm_clients/openai_completions_client.py +444 -0
  299. autogen/llm_config/__init__.py +11 -0
  300. autogen/llm_config/client.py +59 -0
  301. autogen/llm_config/config.py +461 -0
  302. autogen/llm_config/entry.py +169 -0
  303. autogen/llm_config/types.py +37 -0
  304. autogen/llm_config/utils.py +223 -0
  305. autogen/logger/__init__.py +11 -0
  306. autogen/logger/base_logger.py +129 -0
  307. autogen/logger/file_logger.py +262 -0
  308. autogen/logger/logger_factory.py +42 -0
  309. autogen/logger/logger_utils.py +57 -0
  310. autogen/logger/sqlite_logger.py +524 -0
  311. autogen/math_utils.py +338 -0
  312. autogen/mcp/__init__.py +7 -0
  313. autogen/mcp/__main__.py +78 -0
  314. autogen/mcp/helpers.py +45 -0
  315. autogen/mcp/mcp_client.py +349 -0
  316. autogen/mcp/mcp_proxy/__init__.py +19 -0
  317. autogen/mcp/mcp_proxy/fastapi_code_generator_helpers.py +62 -0
  318. autogen/mcp/mcp_proxy/mcp_proxy.py +577 -0
  319. autogen/mcp/mcp_proxy/operation_grouping.py +166 -0
  320. autogen/mcp/mcp_proxy/operation_renaming.py +110 -0
  321. autogen/mcp/mcp_proxy/patch_fastapi_code_generator.py +98 -0
  322. autogen/mcp/mcp_proxy/security.py +399 -0
  323. autogen/mcp/mcp_proxy/security_schema_visitor.py +37 -0
  324. autogen/messages/__init__.py +7 -0
  325. autogen/messages/agent_messages.py +946 -0
  326. autogen/messages/base_message.py +108 -0
  327. autogen/messages/client_messages.py +172 -0
  328. autogen/messages/print_message.py +48 -0
  329. autogen/oai/__init__.py +61 -0
  330. autogen/oai/anthropic.py +1516 -0
  331. autogen/oai/bedrock.py +800 -0
  332. autogen/oai/cerebras.py +302 -0
  333. autogen/oai/client.py +1658 -0
  334. autogen/oai/client_utils.py +196 -0
  335. autogen/oai/cohere.py +494 -0
  336. autogen/oai/gemini.py +1045 -0
  337. autogen/oai/gemini_types.py +156 -0
  338. autogen/oai/groq.py +319 -0
  339. autogen/oai/mistral.py +311 -0
  340. autogen/oai/oai_models/__init__.py +23 -0
  341. autogen/oai/oai_models/_models.py +16 -0
  342. autogen/oai/oai_models/chat_completion.py +86 -0
  343. autogen/oai/oai_models/chat_completion_audio.py +32 -0
  344. autogen/oai/oai_models/chat_completion_message.py +97 -0
  345. autogen/oai/oai_models/chat_completion_message_tool_call.py +60 -0
  346. autogen/oai/oai_models/chat_completion_token_logprob.py +62 -0
  347. autogen/oai/oai_models/completion_usage.py +59 -0
  348. autogen/oai/ollama.py +657 -0
  349. autogen/oai/openai_responses.py +451 -0
  350. autogen/oai/openai_utils.py +897 -0
  351. autogen/oai/together.py +387 -0
  352. autogen/remote/__init__.py +18 -0
  353. autogen/remote/agent.py +199 -0
  354. autogen/remote/agent_service.py +197 -0
  355. autogen/remote/errors.py +17 -0
  356. autogen/remote/httpx_client_factory.py +131 -0
  357. autogen/remote/protocol.py +37 -0
  358. autogen/remote/retry.py +102 -0
  359. autogen/remote/runtime.py +96 -0
  360. autogen/retrieve_utils.py +490 -0
  361. autogen/runtime_logging.py +161 -0
  362. autogen/testing/__init__.py +12 -0
  363. autogen/testing/messages.py +45 -0
  364. autogen/testing/test_agent.py +111 -0
  365. autogen/token_count_utils.py +280 -0
  366. autogen/tools/__init__.py +20 -0
  367. autogen/tools/contrib/__init__.py +9 -0
  368. autogen/tools/contrib/time/__init__.py +7 -0
  369. autogen/tools/contrib/time/time.py +40 -0
  370. autogen/tools/dependency_injection.py +249 -0
  371. autogen/tools/experimental/__init__.py +54 -0
  372. autogen/tools/experimental/browser_use/__init__.py +7 -0
  373. autogen/tools/experimental/browser_use/browser_use.py +154 -0
  374. autogen/tools/experimental/code_execution/__init__.py +7 -0
  375. autogen/tools/experimental/code_execution/python_code_execution.py +86 -0
  376. autogen/tools/experimental/crawl4ai/__init__.py +7 -0
  377. autogen/tools/experimental/crawl4ai/crawl4ai.py +150 -0
  378. autogen/tools/experimental/deep_research/__init__.py +7 -0
  379. autogen/tools/experimental/deep_research/deep_research.py +329 -0
  380. autogen/tools/experimental/duckduckgo/__init__.py +7 -0
  381. autogen/tools/experimental/duckduckgo/duckduckgo_search.py +103 -0
  382. autogen/tools/experimental/firecrawl/__init__.py +7 -0
  383. autogen/tools/experimental/firecrawl/firecrawl_tool.py +836 -0
  384. autogen/tools/experimental/google/__init__.py +14 -0
  385. autogen/tools/experimental/google/authentication/__init__.py +11 -0
  386. autogen/tools/experimental/google/authentication/credentials_hosted_provider.py +43 -0
  387. autogen/tools/experimental/google/authentication/credentials_local_provider.py +91 -0
  388. autogen/tools/experimental/google/authentication/credentials_provider.py +35 -0
  389. autogen/tools/experimental/google/drive/__init__.py +9 -0
  390. autogen/tools/experimental/google/drive/drive_functions.py +124 -0
  391. autogen/tools/experimental/google/drive/toolkit.py +88 -0
  392. autogen/tools/experimental/google/model.py +17 -0
  393. autogen/tools/experimental/google/toolkit_protocol.py +19 -0
  394. autogen/tools/experimental/google_search/__init__.py +8 -0
  395. autogen/tools/experimental/google_search/google_search.py +93 -0
  396. autogen/tools/experimental/google_search/youtube_search.py +181 -0
  397. autogen/tools/experimental/messageplatform/__init__.py +17 -0
  398. autogen/tools/experimental/messageplatform/discord/__init__.py +7 -0
  399. autogen/tools/experimental/messageplatform/discord/discord.py +284 -0
  400. autogen/tools/experimental/messageplatform/slack/__init__.py +7 -0
  401. autogen/tools/experimental/messageplatform/slack/slack.py +385 -0
  402. autogen/tools/experimental/messageplatform/telegram/__init__.py +7 -0
  403. autogen/tools/experimental/messageplatform/telegram/telegram.py +271 -0
  404. autogen/tools/experimental/perplexity/__init__.py +7 -0
  405. autogen/tools/experimental/perplexity/perplexity_search.py +249 -0
  406. autogen/tools/experimental/reliable/__init__.py +10 -0
  407. autogen/tools/experimental/reliable/reliable.py +1311 -0
  408. autogen/tools/experimental/searxng/__init__.py +7 -0
  409. autogen/tools/experimental/searxng/searxng_search.py +142 -0
  410. autogen/tools/experimental/tavily/__init__.py +7 -0
  411. autogen/tools/experimental/tavily/tavily_search.py +176 -0
  412. autogen/tools/experimental/web_search_preview/__init__.py +7 -0
  413. autogen/tools/experimental/web_search_preview/web_search_preview.py +120 -0
  414. autogen/tools/experimental/wikipedia/__init__.py +7 -0
  415. autogen/tools/experimental/wikipedia/wikipedia.py +284 -0
  416. autogen/tools/function_utils.py +412 -0
  417. autogen/tools/tool.py +188 -0
  418. autogen/tools/toolkit.py +86 -0
  419. autogen/types.py +29 -0
  420. autogen/version.py +7 -0
  421. templates/client_template/main.jinja2 +72 -0
  422. templates/config_template/config.jinja2 +7 -0
  423. templates/main.jinja2 +61 -0
@@ -0,0 +1,79 @@
1
+ # Copyright (c) 2023 - 2025, AG2ai, Inc., AG2ai open-source projects maintainers and core contributors
2
+ #
3
+ # SPDX-License-Identifier: Apache-2.0
4
+ #
5
+ # Portions derived from https://github.com/microsoft/autogen are under the MIT License.
6
+ # SPDX-License-Identifier: MIT
7
+ from typing import Any, Literal
8
+
9
+ from ...llm_config import LLMConfig
10
+ from ..agent import Agent
11
+ from ..assistant_agent import ConversableAgent
12
+
13
+ system_message = """You are an expert in text analysis.
14
+ The user will give you TEXT to analyze.
15
+ The user will give you analysis INSTRUCTIONS copied twice, at both the beginning and the end.
16
+ You will follow these INSTRUCTIONS in analyzing the TEXT, then give the results of your expert analysis in the format requested."""
17
+
18
+
19
+ class TextAnalyzerAgent(ConversableAgent):
20
+ """(Experimental) Text Analysis agent, a subclass of ConversableAgent designed to analyze text as instructed."""
21
+
22
+ def __init__(
23
+ self,
24
+ name="analyzer",
25
+ system_message: str | None = system_message,
26
+ human_input_mode: Literal["ALWAYS", "NEVER", "TERMINATE"] = "NEVER",
27
+ llm_config: LLMConfig | dict[str, Any] | bool | None = None,
28
+ **kwargs: Any,
29
+ ):
30
+ """Args:
31
+ name (str): name of the agent.
32
+ system_message (str): system message for the ChatCompletion inference.
33
+ human_input_mode (str): This agent should NEVER prompt the human for input.
34
+ llm_config (LLMConfig or dict or False): llm inference configuration.
35
+ Please refer to [OpenAIWrapper.create](https://docs.ag2.ai/latest/docs/api-reference/autogen/OpenAIWrapper/#autogen.OpenAIWrapper.create)
36
+ for available options.
37
+ To disable llm-based auto reply, set to False.
38
+ **kwargs (dict): other kwargs in [ConversableAgent](/docs/api-reference/autogen/ConversableAgent#conversableagent).
39
+ """
40
+ super().__init__(
41
+ name=name,
42
+ system_message=system_message,
43
+ human_input_mode=human_input_mode,
44
+ llm_config=llm_config,
45
+ **kwargs,
46
+ )
47
+ self.register_reply(Agent, TextAnalyzerAgent._analyze_in_reply, position=2)
48
+
49
+ def _analyze_in_reply(
50
+ self,
51
+ messages: list[dict[str, Any]] | None = None,
52
+ sender: Agent | None = None,
53
+ config: Any | None = None,
54
+ ) -> tuple[bool, str | dict[str, Any] | None]:
55
+ """Analyzes the given text as instructed, and returns the analysis as a message.
56
+ Assumes exactly two messages containing the text to analyze and the analysis instructions.
57
+ See Teachability.analyze for an example of how to use this method.
58
+ """
59
+ if self.llm_config is False:
60
+ raise ValueError("TextAnalyzerAgent requires self.llm_config to be set in its base class.")
61
+ if messages is None:
62
+ messages = self._oai_messages[sender] # In case of a direct call.
63
+ assert len(messages) == 2
64
+
65
+ # Delegate to the analysis method.
66
+ return True, self.analyze_text(messages[0]["content"], messages[1]["content"])
67
+
68
+ def analyze_text(self, text_to_analyze, analysis_instructions):
69
+ """Analyzes the given text as instructed, and returns the analysis."""
70
+ # Assemble the message.
71
+ text_to_analyze = "# TEXT\n" + text_to_analyze + "\n"
72
+ analysis_instructions = "# INSTRUCTIONS\n" + analysis_instructions + "\n"
73
+ msg_text = "\n".join([
74
+ analysis_instructions,
75
+ text_to_analyze,
76
+ analysis_instructions,
77
+ ]) # Repeat the instructions.
78
+ # Generate and return the analysis string.
79
+ return self.generate_oai_reply([{"role": "user", "content": msg_text}], None, None)[1]
@@ -0,0 +1,5 @@
1
+ # Copyright (c) 2023 - 2025, AG2ai, Inc., AG2ai open-source projects maintainers and core contributors
2
+ #
3
+ # SPDX-License-Identifier: Apache-2.0
4
+
5
+ __all__: list[str] = []
@@ -0,0 +1,224 @@
1
+ # Copyright (c) 2023 - 2025, AG2ai, Inc., AG2ai open-source projects maintainers and core contributors
2
+ #
3
+ # SPDX-License-Identifier: Apache-2.0
4
+ #
5
+ # Portions derived from https://github.com/microsoft/autogen are under the MIT License.
6
+ # SPDX-License-Identifier: MIT
7
+ from collections.abc import Callable, Mapping, Sequence
8
+ from typing import Any, Protocol, TypedDict, runtime_checkable
9
+
10
+ Metadata = Mapping[str, Any] | None
11
+ Vector = Sequence[float] | Sequence[int]
12
+ ItemID = str | int # chromadb doesn't support int ids, VikingDB does
13
+
14
+
15
+ class Document(TypedDict):
16
+ """A Document is a record in the vector database.
17
+
18
+ id: ItemID | the unique identifier of the document.
19
+ content: str | the text content of the chunk.
20
+ metadata: Metadata, Optional | contains additional information about the document such as source, date, etc.
21
+ embedding: Vector, Optional | the vector representation of the content.
22
+ """
23
+
24
+ id: ItemID
25
+ content: str
26
+ metadata: Metadata | None
27
+ embedding: Vector | None
28
+
29
+
30
+ """QueryResults is the response from the vector database for a query/queries.
31
+ A query is a list containing one string while queries is a list containing multiple strings.
32
+ The response is a list of query results, each query result is a list of tuples containing the document and the distance.
33
+ """
34
+ QueryResults = list[list[tuple[Document, float]]]
35
+
36
+
37
+ @runtime_checkable
38
+ class VectorDB(Protocol):
39
+ """Abstract class for vector database. A vector database is responsible for storing and retrieving documents.
40
+
41
+ Attributes:
42
+ active_collection: Any | The active collection in the vector database. Make get_collection faster. Default is None.
43
+ type: str | The type of the vector database, chroma, pgvector, etc. Default is "".
44
+
45
+ Methods:
46
+ create_collection: Callable[[str, bool, bool], Any] | Create a collection in the vector database.
47
+ get_collection: Callable[[str], Any] | Get the collection from the vector database.
48
+ delete_collection: Callable[[str], Any] | Delete the collection from the vector database.
49
+ insert_docs: Callable[[List[Document], str, bool], None] | Insert documents into the collection of the vector database.
50
+ update_docs: Callable[[List[Document], str], None] | Update documents in the collection of the vector database.
51
+ delete_docs: Callable[[List[ItemID], str], None] | Delete documents from the collection of the vector database.
52
+ retrieve_docs: Callable[[List[str], str, int, float], QueryResults] | Retrieve documents from the collection of the vector database based on the queries.
53
+ get_docs_by_ids: Callable[[List[ItemID], str], List[Document]] | Retrieve documents from the collection of the vector database based on the ids.
54
+ """
55
+
56
+ active_collection: Any = None
57
+ type: str = ""
58
+ embedding_function: Callable[[list[str]], list[list[float]]] | None = (
59
+ None # embeddings = embedding_function(sentences)
60
+ )
61
+
62
+ def create_collection(self, collection_name: str, overwrite: bool = False, get_or_create: bool = True) -> Any:
63
+ """Create a collection in the vector database.
64
+ Case 1. if the collection does not exist, create the collection.
65
+ Case 2. the collection exists, if overwrite is True, it will overwrite the collection.
66
+ Case 3. the collection exists and overwrite is False, if get_or_create is True, it will get the collection,
67
+ otherwise it raise a ValueError.
68
+
69
+ Args:
70
+ collection_name: str | The name of the collection.
71
+ overwrite: bool | Whether to overwrite the collection if it exists. Default is False.
72
+ get_or_create: bool | Whether to get the collection if it exists. Default is True.
73
+
74
+ Returns:
75
+ Any | The collection object.
76
+ """
77
+ ...
78
+
79
+ def get_collection(self, collection_name: str = None) -> Any:
80
+ """Get the collection from the vector database.
81
+
82
+ Args:
83
+ collection_name: str | The name of the collection. Default is None. If None, return the
84
+ current active collection.
85
+
86
+ Returns:
87
+ Any | The collection object.
88
+ """
89
+ ...
90
+
91
+ def delete_collection(self, collection_name: str) -> Any:
92
+ """Delete the collection from the vector database.
93
+
94
+ Args:
95
+ collection_name: str | The name of the collection.
96
+
97
+ Returns:
98
+ Any
99
+ """
100
+ ...
101
+
102
+ def insert_docs(self, docs: list[Document], collection_name: str = None, upsert: bool = False, **kwargs) -> None:
103
+ """Insert documents into the collection of the vector database.
104
+
105
+ Args:
106
+ docs: List[Document] | A list of documents. Each document is a TypedDict `Document`.
107
+ collection_name: str | The name of the collection. Default is None.
108
+ upsert: bool | Whether to update the document if it exists. Default is False.
109
+ kwargs: Dict | Additional keyword arguments.
110
+
111
+ Returns:
112
+ None
113
+ """
114
+ ...
115
+
116
+ def update_docs(self, docs: list[Document], collection_name: str = None, **kwargs) -> None:
117
+ """Update documents in the collection of the vector database.
118
+
119
+ Args:
120
+ docs: List[Document] | A list of documents.
121
+ collection_name: str | The name of the collection. Default is None.
122
+ kwargs: Dict | Additional keyword arguments.
123
+
124
+ Returns:
125
+ None
126
+ """
127
+ ...
128
+
129
+ def delete_docs(self, ids: list[ItemID], collection_name: str = None, **kwargs) -> None:
130
+ """Delete documents from the collection of the vector database.
131
+
132
+ Args:
133
+ ids: List[ItemID] | A list of document ids. Each id is a typed `ItemID`.
134
+ collection_name: str | The name of the collection. Default is None.
135
+ kwargs: Dict | Additional keyword arguments.
136
+
137
+ Returns:
138
+ None
139
+ """
140
+ ...
141
+
142
+ def retrieve_docs(
143
+ self,
144
+ queries: list[str],
145
+ collection_name: str = None,
146
+ n_results: int = 10,
147
+ distance_threshold: float = -1,
148
+ **kwargs: Any,
149
+ ) -> QueryResults:
150
+ """Retrieve documents from the collection of the vector database based on the queries.
151
+
152
+ Args:
153
+ queries: List[str] | A list of queries. Each query is a string.
154
+ collection_name: str | The name of the collection. Default is None.
155
+ n_results: int | The number of relevant documents to return. Default is 10.
156
+ distance_threshold: float | The threshold for the distance score, only distance smaller than it will be
157
+ returned. Don't filter with it if < 0. Default is -1.
158
+ kwargs: Dict | Additional keyword arguments.
159
+
160
+ Returns:
161
+ QueryResults | The query results. Each query result is a list of list of tuples containing the document and
162
+ the distance.
163
+ """
164
+ ...
165
+
166
+ def get_docs_by_ids(
167
+ self, ids: list[ItemID] = None, collection_name: str = None, include: list[str] | None = None, **kwargs: Any
168
+ ) -> list[Document]:
169
+ """Retrieve documents from the collection of the vector database based on the ids.
170
+
171
+ Args:
172
+ ids: List[ItemID] | A list of document ids. If None, will return all the documents. Default is None.
173
+ collection_name: str | The name of the collection. Default is None.
174
+ include: List[str] | The fields to include. Default is None.
175
+ If None, will include ["metadatas", "documents"], ids will always be included. This may differ
176
+ depending on the implementation.
177
+ kwargs: dict | Additional keyword arguments.
178
+
179
+ Returns:
180
+ List[Document] | The results.
181
+ """
182
+ ...
183
+
184
+
185
+ class VectorDBFactory:
186
+ """Factory class for creating vector databases."""
187
+
188
+ PREDEFINED_VECTOR_DB = ["chroma", "pgvector", "mongodb", "qdrant", "couchbase"]
189
+
190
+ @staticmethod
191
+ def create_vector_db(db_type: str, **kwargs) -> VectorDB:
192
+ """Create a vector database.
193
+
194
+ Args:
195
+ db_type: str | The type of the vector database.
196
+ kwargs: Dict | The keyword arguments for initializing the vector database.
197
+
198
+ Returns:
199
+ VectorDB | The vector database.
200
+ """
201
+ if db_type.lower() in ["chroma", "chromadb"]:
202
+ from .chromadb import ChromaVectorDB
203
+
204
+ return ChromaVectorDB(**kwargs)
205
+ if db_type.lower() in ["pgvector", "pgvectordb"]:
206
+ from .pgvectordb import PGVectorDB
207
+
208
+ return PGVectorDB(**kwargs)
209
+ if db_type.lower() in ["mdb", "mongodb", "atlas"]:
210
+ from .mongodb import MongoDBAtlasVectorDB
211
+
212
+ return MongoDBAtlasVectorDB(**kwargs)
213
+ if db_type.lower() in ["qdrant", "qdrantdb"]:
214
+ from .qdrant import QdrantVectorDB
215
+
216
+ return QdrantVectorDB(**kwargs)
217
+ if db_type.lower() in ["couchbase", "couchbasedb", "capella"]:
218
+ from .couchbase import CouchbaseVectorDB
219
+
220
+ return CouchbaseVectorDB(**kwargs)
221
+ else:
222
+ raise ValueError(
223
+ f"Unsupported vector database type: {db_type}. Valid types are {VectorDBFactory.PREDEFINED_VECTOR_DB}."
224
+ )
@@ -0,0 +1,316 @@
1
+ # Copyright (c) 2023 - 2025, AG2ai, Inc., AG2ai open-source projects maintainers and core contributors
2
+ #
3
+ # SPDX-License-Identifier: Apache-2.0
4
+ #
5
+ # Portions derived from https://github.com/microsoft/autogen are under the MIT License.
6
+ # SPDX-License-Identifier: MIT
7
+ import os
8
+ from collections.abc import Callable
9
+ from typing import Any
10
+
11
+ from ....import_utils import optional_import_block, require_optional_import
12
+ from .base import Document, ItemID, QueryResults, VectorDB
13
+ from .utils import chroma_results_to_query_results, filter_results_by_distance, get_logger
14
+
15
+ with optional_import_block() as result:
16
+ import chromadb
17
+ import chromadb.errors
18
+ import chromadb.utils.embedding_functions as ef
19
+ from chromadb.api.models.Collection import Collection
20
+
21
+ if result.is_successful and chromadb.__version__ < "0.4.15":
22
+ raise ImportError("Please upgrade chromadb to version 0.4.15 or later.")
23
+
24
+
25
+ CHROMADB_MAX_BATCH_SIZE = os.environ.get("CHROMADB_MAX_BATCH_SIZE", 40000)
26
+ logger = get_logger(__name__)
27
+
28
+
29
+ @require_optional_import("chromadb", "retrievechat")
30
+ class ChromaVectorDB(VectorDB):
31
+ """A vector database that uses ChromaDB as the backend."""
32
+
33
+ def __init__(
34
+ self, *, client=None, path: str = "tmp/db", embedding_function: Callable = None, metadata: dict = None, **kwargs
35
+ ) -> None:
36
+ """Initialize the vector database.
37
+
38
+ Args:
39
+ client: chromadb.Client | The client object of the vector database. Default is None.
40
+ If provided, it will use the client object directly and ignore other arguments.
41
+ path: str | The path to the vector database. Default is `tmp/db`. The default was `None` for version `<=0.2.24`.
42
+ embedding_function: Callable | The embedding function used to generate the vector representation
43
+ of the documents. Default is None, SentenceTransformerEmbeddingFunction("all-MiniLM-L6-v2") will be used.
44
+ metadata: dict | The metadata of the vector database. Default is None. If None, it will use this
45
+ setting: `{"hnsw:space": "ip", "hnsw:construction_ef": 30, "hnsw:M": 32}`. For more details of
46
+ the metadata, please refer to [distances](https://github.com/nmslib/hnswlib#supported-distances),
47
+ [hnsw](https://github.com/chroma-core/chroma/blob/566bc80f6c8ee29f7d99b6322654f32183c368c4/chromadb/segment/impl/vector/local_hnsw.py#L184),
48
+ and [ALGO_PARAMS](https://github.com/nmslib/hnswlib/blob/master/ALGO_PARAMS.md).
49
+ kwargs: dict | Additional keyword arguments.
50
+
51
+ Returns:
52
+ None
53
+ """
54
+ self.client = client
55
+ self.path = path
56
+ self.embedding_function = (
57
+ ef.SentenceTransformerEmbeddingFunction("all-MiniLM-L6-v2")
58
+ if embedding_function is None
59
+ else embedding_function
60
+ )
61
+ self.metadata = metadata if metadata else {"hnsw:space": "ip", "hnsw:construction_ef": 30, "hnsw:M": 32}
62
+ if not self.client:
63
+ if self.path is not None:
64
+ self.client = chromadb.PersistentClient(path=self.path, **kwargs)
65
+ else:
66
+ self.client = chromadb.Client(**kwargs)
67
+ self.active_collection = None
68
+ self.type = "chroma"
69
+
70
+ def create_collection(
71
+ self, collection_name: str, overwrite: bool = False, get_or_create: bool = True
72
+ ) -> "Collection":
73
+ """Create a collection in the vector database.
74
+ Case 1. if the collection does not exist, create the collection.
75
+ Case 2. the collection exists, if overwrite is True, it will overwrite the collection.
76
+ Case 3. the collection exists and overwrite is False, if get_or_create is True, it will get the collection,
77
+ otherwise it raise a ValueError.
78
+
79
+ Args:
80
+ collection_name: str | The name of the collection.
81
+ overwrite: bool | Whether to overwrite the collection if it exists. Default is False.
82
+ get_or_create: bool | Whether to get the collection if it exists. Default is True.
83
+
84
+ Returns:
85
+ Collection | The collection object.
86
+ """
87
+ try:
88
+ if self.active_collection and self.active_collection.name == collection_name:
89
+ collection = self.active_collection
90
+ else:
91
+ collection = self.client.get_collection(collection_name, embedding_function=self.embedding_function)
92
+ except (ValueError, chromadb.errors.ChromaError):
93
+ collection = None
94
+ if collection is None:
95
+ return self.client.create_collection(
96
+ collection_name,
97
+ embedding_function=self.embedding_function,
98
+ get_or_create=get_or_create,
99
+ metadata=self.metadata,
100
+ )
101
+ elif overwrite:
102
+ self.client.delete_collection(collection_name)
103
+ return self.client.create_collection(
104
+ collection_name,
105
+ embedding_function=self.embedding_function,
106
+ get_or_create=get_or_create,
107
+ metadata=self.metadata,
108
+ )
109
+ elif get_or_create:
110
+ return collection
111
+ else:
112
+ raise ValueError(f"Collection {collection_name} already exists.")
113
+
114
+ def get_collection(self, collection_name: str = None) -> "Collection":
115
+ """Get the collection from the vector database.
116
+
117
+ Args:
118
+ collection_name: str | The name of the collection. Default is None. If None, return the
119
+ current active collection.
120
+
121
+ Returns:
122
+ Collection | The collection object.
123
+ """
124
+ if collection_name is None:
125
+ if self.active_collection is None:
126
+ raise ValueError("No collection is specified.")
127
+ else:
128
+ logger.info(
129
+ f"No collection is specified. Using current active collection {self.active_collection.name}."
130
+ )
131
+ else:
132
+ if not (self.active_collection and self.active_collection.name == collection_name):
133
+ self.active_collection = self.client.get_collection(
134
+ name=collection_name, embedding_function=self.embedding_function
135
+ )
136
+ return self.active_collection
137
+
138
+ def delete_collection(self, collection_name: str) -> None:
139
+ """Delete the collection from the vector database.
140
+
141
+ Args:
142
+ collection_name: str | The name of the collection.
143
+
144
+ Returns:
145
+ None
146
+ """
147
+ self.client.delete_collection(collection_name)
148
+ if self.active_collection and self.active_collection.name == collection_name:
149
+ self.active_collection = None
150
+
151
+ def _batch_insert(
152
+ self, collection: "Collection", embeddings=None, ids=None, metadatas=None, documents=None, upsert=False
153
+ ) -> None:
154
+ batch_size = int(CHROMADB_MAX_BATCH_SIZE)
155
+ for i in range(0, len(documents), min(batch_size, len(documents))):
156
+ end_idx = i + min(batch_size, len(documents) - i)
157
+ collection_kwargs = {
158
+ "documents": documents[i:end_idx],
159
+ "ids": ids[i:end_idx],
160
+ "metadatas": metadatas[i:end_idx] if metadatas else None,
161
+ "embeddings": embeddings[i:end_idx] if embeddings else None,
162
+ }
163
+ if upsert:
164
+ collection.upsert(**collection_kwargs)
165
+ else:
166
+ collection.add(**collection_kwargs)
167
+
168
+ def insert_docs(self, docs: list[Document], collection_name: str = None, upsert: bool = False) -> None:
169
+ """Insert documents into the collection of the vector database.
170
+
171
+ Args:
172
+ docs: List[Document] | A list of documents. Each document is a TypedDict `Document`.
173
+ collection_name: str | The name of the collection. Default is None.
174
+ upsert: bool | Whether to update the document if it exists. Default is False.
175
+ kwargs: Dict | Additional keyword arguments.
176
+
177
+ Returns:
178
+ None
179
+ """
180
+ if not docs:
181
+ return
182
+ if docs[0].get("content") is None:
183
+ raise ValueError("The document content is required.")
184
+ if docs[0].get("id") is None:
185
+ raise ValueError("The document id is required.")
186
+ documents = [doc.get("content") for doc in docs]
187
+ ids = [doc.get("id") for doc in docs]
188
+ collection = self.get_collection(collection_name)
189
+ if docs[0].get("embedding") is None:
190
+ logger.info(
191
+ "No content embedding is provided. Will use the VectorDB's embedding function to generate the content embedding."
192
+ )
193
+ embeddings = None
194
+ else:
195
+ embeddings = [doc.get("embedding") for doc in docs]
196
+ metadatas = None if docs[0].get("metadata") is None else [doc.get("metadata") for doc in docs]
197
+ self._batch_insert(collection, embeddings, ids, metadatas, documents, upsert)
198
+
199
+ def update_docs(self, docs: list[Document], collection_name: str = None) -> None:
200
+ """Update documents in the collection of the vector database.
201
+
202
+ Args:
203
+ docs: List[Document] | A list of documents.
204
+ collection_name: str | The name of the collection. Default is None.
205
+
206
+ Returns:
207
+ None
208
+ """
209
+ self.insert_docs(docs, collection_name, upsert=True)
210
+
211
+ def delete_docs(self, ids: list[ItemID], collection_name: str = None, **kwargs) -> None:
212
+ """Delete documents from the collection of the vector database.
213
+
214
+ Args:
215
+ ids: List[ItemID] | A list of document ids. Each id is a typed `ItemID`.
216
+ collection_name: str | The name of the collection. Default is None.
217
+ kwargs: Dict | Additional keyword arguments.
218
+
219
+ Returns:
220
+ None
221
+ """
222
+ collection = self.get_collection(collection_name)
223
+ collection.delete(ids, **kwargs)
224
+
225
+ def retrieve_docs(
226
+ self,
227
+ queries: list[str],
228
+ collection_name: str = None,
229
+ n_results: int = 10,
230
+ distance_threshold: float = -1,
231
+ **kwargs: Any,
232
+ ) -> QueryResults:
233
+ """Retrieve documents from the collection of the vector database based on the queries.
234
+
235
+ Args:
236
+ queries: List[str] | A list of queries. Each query is a string.
237
+ collection_name: str | The name of the collection. Default is None.
238
+ n_results: int | The number of relevant documents to return. Default is 10.
239
+ distance_threshold: float | The threshold for the distance score, only distance smaller than it will be
240
+ returned. Don't filter with it if `< 0`. Default is -1.
241
+ kwargs: Dict | Additional keyword arguments.
242
+
243
+ Returns:
244
+ QueryResults | The query results. Each query result is a list of list of tuples containing the document and
245
+ the distance.
246
+ """
247
+ collection = self.get_collection(collection_name)
248
+ if isinstance(queries, str):
249
+ queries = [queries]
250
+ results = collection.query(
251
+ query_texts=queries,
252
+ n_results=n_results,
253
+ **kwargs,
254
+ )
255
+ results["contents"] = results.pop("documents")
256
+ results = chroma_results_to_query_results(results)
257
+ results = filter_results_by_distance(results, distance_threshold)
258
+ return results
259
+
260
+ @staticmethod
261
+ def _chroma_get_results_to_list_documents(data_dict) -> list[Document]:
262
+ """Converts a dictionary with list values to a list of Document.
263
+
264
+ Args:
265
+ data_dict: A dictionary where keys map to lists or None.
266
+
267
+ Returns:
268
+ List[Document] | The list of Document.
269
+
270
+ Example:
271
+ ```python
272
+ data_dict = {
273
+ "key1s": [1, 2, 3],
274
+ "key2s": ["a", "b", "c"],
275
+ "key3s": None,
276
+ "key4s": ["x", "y", "z"],
277
+ }
278
+
279
+ results = [
280
+ {"key1": 1, "key2": "a", "key4": "x"},
281
+ {"key1": 2, "key2": "b", "key4": "y"},
282
+ {"key1": 3, "key2": "c", "key4": "z"},
283
+ ]
284
+ ```
285
+ """
286
+ results = []
287
+ keys = [key for key in data_dict if data_dict[key] is not None]
288
+
289
+ for i in range(len(data_dict[keys[0]])):
290
+ sub_dict = {}
291
+ for key in data_dict:
292
+ if data_dict[key] is not None and len(data_dict[key]) > i:
293
+ sub_dict[key[:-1]] = data_dict[key][i]
294
+ results.append(sub_dict)
295
+ return results
296
+
297
+ def get_docs_by_ids(
298
+ self, ids: list[ItemID] = None, collection_name: str = None, include=None, **kwargs
299
+ ) -> list[Document]:
300
+ """Retrieve documents from the collection of the vector database based on the ids.
301
+
302
+ Args:
303
+ ids: List[ItemID] | A list of document ids. If None, will return all the documents. Default is None.
304
+ collection_name: str | The name of the collection. Default is None.
305
+ include: List[str] | The fields to include. Default is None.
306
+ If None, will include ["metadatas", "documents"], ids will always be included.
307
+ kwargs: dict | Additional keyword arguments.
308
+
309
+ Returns:
310
+ List[Document] | The results.
311
+ """
312
+ collection = self.get_collection(collection_name)
313
+ include = include if include else ["metadatas", "documents"]
314
+ results = collection.get(ids, include=include, **kwargs)
315
+ results = self._chroma_get_results_to_list_documents(results)
316
+ return results