ag2 0.10.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (423) hide show
  1. ag2-0.10.2.dist-info/METADATA +819 -0
  2. ag2-0.10.2.dist-info/RECORD +423 -0
  3. ag2-0.10.2.dist-info/WHEEL +4 -0
  4. ag2-0.10.2.dist-info/licenses/LICENSE +201 -0
  5. ag2-0.10.2.dist-info/licenses/NOTICE.md +19 -0
  6. autogen/__init__.py +88 -0
  7. autogen/_website/__init__.py +3 -0
  8. autogen/_website/generate_api_references.py +426 -0
  9. autogen/_website/generate_mkdocs.py +1216 -0
  10. autogen/_website/notebook_processor.py +475 -0
  11. autogen/_website/process_notebooks.py +656 -0
  12. autogen/_website/utils.py +413 -0
  13. autogen/a2a/__init__.py +36 -0
  14. autogen/a2a/agent_executor.py +86 -0
  15. autogen/a2a/client.py +357 -0
  16. autogen/a2a/errors.py +18 -0
  17. autogen/a2a/httpx_client_factory.py +79 -0
  18. autogen/a2a/server.py +221 -0
  19. autogen/a2a/utils.py +207 -0
  20. autogen/agentchat/__init__.py +47 -0
  21. autogen/agentchat/agent.py +180 -0
  22. autogen/agentchat/assistant_agent.py +86 -0
  23. autogen/agentchat/chat.py +325 -0
  24. autogen/agentchat/contrib/__init__.py +5 -0
  25. autogen/agentchat/contrib/agent_eval/README.md +7 -0
  26. autogen/agentchat/contrib/agent_eval/agent_eval.py +108 -0
  27. autogen/agentchat/contrib/agent_eval/criterion.py +43 -0
  28. autogen/agentchat/contrib/agent_eval/critic_agent.py +44 -0
  29. autogen/agentchat/contrib/agent_eval/quantifier_agent.py +39 -0
  30. autogen/agentchat/contrib/agent_eval/subcritic_agent.py +45 -0
  31. autogen/agentchat/contrib/agent_eval/task.py +42 -0
  32. autogen/agentchat/contrib/agent_optimizer.py +432 -0
  33. autogen/agentchat/contrib/capabilities/__init__.py +5 -0
  34. autogen/agentchat/contrib/capabilities/agent_capability.py +20 -0
  35. autogen/agentchat/contrib/capabilities/generate_images.py +301 -0
  36. autogen/agentchat/contrib/capabilities/teachability.py +393 -0
  37. autogen/agentchat/contrib/capabilities/text_compressors.py +66 -0
  38. autogen/agentchat/contrib/capabilities/tools_capability.py +22 -0
  39. autogen/agentchat/contrib/capabilities/transform_messages.py +93 -0
  40. autogen/agentchat/contrib/capabilities/transforms.py +578 -0
  41. autogen/agentchat/contrib/capabilities/transforms_util.py +122 -0
  42. autogen/agentchat/contrib/capabilities/vision_capability.py +215 -0
  43. autogen/agentchat/contrib/captainagent/__init__.py +9 -0
  44. autogen/agentchat/contrib/captainagent/agent_builder.py +790 -0
  45. autogen/agentchat/contrib/captainagent/captainagent.py +514 -0
  46. autogen/agentchat/contrib/captainagent/tool_retriever.py +334 -0
  47. autogen/agentchat/contrib/captainagent/tools/README.md +44 -0
  48. autogen/agentchat/contrib/captainagent/tools/__init__.py +5 -0
  49. autogen/agentchat/contrib/captainagent/tools/data_analysis/calculate_correlation.py +40 -0
  50. autogen/agentchat/contrib/captainagent/tools/data_analysis/calculate_skewness_and_kurtosis.py +28 -0
  51. autogen/agentchat/contrib/captainagent/tools/data_analysis/detect_outlier_iqr.py +28 -0
  52. autogen/agentchat/contrib/captainagent/tools/data_analysis/detect_outlier_zscore.py +28 -0
  53. autogen/agentchat/contrib/captainagent/tools/data_analysis/explore_csv.py +21 -0
  54. autogen/agentchat/contrib/captainagent/tools/data_analysis/shapiro_wilk_test.py +30 -0
  55. autogen/agentchat/contrib/captainagent/tools/information_retrieval/arxiv_download.py +27 -0
  56. autogen/agentchat/contrib/captainagent/tools/information_retrieval/arxiv_search.py +53 -0
  57. autogen/agentchat/contrib/captainagent/tools/information_retrieval/extract_pdf_image.py +53 -0
  58. autogen/agentchat/contrib/captainagent/tools/information_retrieval/extract_pdf_text.py +38 -0
  59. autogen/agentchat/contrib/captainagent/tools/information_retrieval/get_wikipedia_text.py +21 -0
  60. autogen/agentchat/contrib/captainagent/tools/information_retrieval/get_youtube_caption.py +34 -0
  61. autogen/agentchat/contrib/captainagent/tools/information_retrieval/image_qa.py +60 -0
  62. autogen/agentchat/contrib/captainagent/tools/information_retrieval/optical_character_recognition.py +61 -0
  63. autogen/agentchat/contrib/captainagent/tools/information_retrieval/perform_web_search.py +47 -0
  64. autogen/agentchat/contrib/captainagent/tools/information_retrieval/scrape_wikipedia_tables.py +33 -0
  65. autogen/agentchat/contrib/captainagent/tools/information_retrieval/transcribe_audio_file.py +21 -0
  66. autogen/agentchat/contrib/captainagent/tools/information_retrieval/youtube_download.py +35 -0
  67. autogen/agentchat/contrib/captainagent/tools/math/calculate_circle_area_from_diameter.py +21 -0
  68. autogen/agentchat/contrib/captainagent/tools/math/calculate_day_of_the_week.py +18 -0
  69. autogen/agentchat/contrib/captainagent/tools/math/calculate_fraction_sum.py +28 -0
  70. autogen/agentchat/contrib/captainagent/tools/math/calculate_matrix_power.py +31 -0
  71. autogen/agentchat/contrib/captainagent/tools/math/calculate_reflected_point.py +16 -0
  72. autogen/agentchat/contrib/captainagent/tools/math/complex_numbers_product.py +25 -0
  73. autogen/agentchat/contrib/captainagent/tools/math/compute_currency_conversion.py +23 -0
  74. autogen/agentchat/contrib/captainagent/tools/math/count_distinct_permutations.py +27 -0
  75. autogen/agentchat/contrib/captainagent/tools/math/evaluate_expression.py +28 -0
  76. autogen/agentchat/contrib/captainagent/tools/math/find_continuity_point.py +34 -0
  77. autogen/agentchat/contrib/captainagent/tools/math/fraction_to_mixed_numbers.py +39 -0
  78. autogen/agentchat/contrib/captainagent/tools/math/modular_inverse_sum.py +23 -0
  79. autogen/agentchat/contrib/captainagent/tools/math/simplify_mixed_numbers.py +36 -0
  80. autogen/agentchat/contrib/captainagent/tools/math/sum_of_digit_factorials.py +15 -0
  81. autogen/agentchat/contrib/captainagent/tools/math/sum_of_primes_below.py +15 -0
  82. autogen/agentchat/contrib/captainagent/tools/requirements.txt +10 -0
  83. autogen/agentchat/contrib/captainagent/tools/tool_description.tsv +34 -0
  84. autogen/agentchat/contrib/gpt_assistant_agent.py +526 -0
  85. autogen/agentchat/contrib/graph_rag/__init__.py +9 -0
  86. autogen/agentchat/contrib/graph_rag/document.py +29 -0
  87. autogen/agentchat/contrib/graph_rag/falkor_graph_query_engine.py +167 -0
  88. autogen/agentchat/contrib/graph_rag/falkor_graph_rag_capability.py +103 -0
  89. autogen/agentchat/contrib/graph_rag/graph_query_engine.py +53 -0
  90. autogen/agentchat/contrib/graph_rag/graph_rag_capability.py +63 -0
  91. autogen/agentchat/contrib/graph_rag/neo4j_graph_query_engine.py +263 -0
  92. autogen/agentchat/contrib/graph_rag/neo4j_graph_rag_capability.py +83 -0
  93. autogen/agentchat/contrib/graph_rag/neo4j_native_graph_query_engine.py +210 -0
  94. autogen/agentchat/contrib/graph_rag/neo4j_native_graph_rag_capability.py +93 -0
  95. autogen/agentchat/contrib/img_utils.py +397 -0
  96. autogen/agentchat/contrib/llamaindex_conversable_agent.py +117 -0
  97. autogen/agentchat/contrib/llava_agent.py +189 -0
  98. autogen/agentchat/contrib/math_user_proxy_agent.py +464 -0
  99. autogen/agentchat/contrib/multimodal_conversable_agent.py +125 -0
  100. autogen/agentchat/contrib/qdrant_retrieve_user_proxy_agent.py +325 -0
  101. autogen/agentchat/contrib/rag/__init__.py +10 -0
  102. autogen/agentchat/contrib/rag/chromadb_query_engine.py +268 -0
  103. autogen/agentchat/contrib/rag/llamaindex_query_engine.py +195 -0
  104. autogen/agentchat/contrib/rag/mongodb_query_engine.py +319 -0
  105. autogen/agentchat/contrib/rag/query_engine.py +76 -0
  106. autogen/agentchat/contrib/retrieve_assistant_agent.py +59 -0
  107. autogen/agentchat/contrib/retrieve_user_proxy_agent.py +704 -0
  108. autogen/agentchat/contrib/society_of_mind_agent.py +200 -0
  109. autogen/agentchat/contrib/swarm_agent.py +1404 -0
  110. autogen/agentchat/contrib/text_analyzer_agent.py +79 -0
  111. autogen/agentchat/contrib/vectordb/__init__.py +5 -0
  112. autogen/agentchat/contrib/vectordb/base.py +224 -0
  113. autogen/agentchat/contrib/vectordb/chromadb.py +316 -0
  114. autogen/agentchat/contrib/vectordb/couchbase.py +405 -0
  115. autogen/agentchat/contrib/vectordb/mongodb.py +551 -0
  116. autogen/agentchat/contrib/vectordb/pgvectordb.py +927 -0
  117. autogen/agentchat/contrib/vectordb/qdrant.py +320 -0
  118. autogen/agentchat/contrib/vectordb/utils.py +126 -0
  119. autogen/agentchat/contrib/web_surfer.py +304 -0
  120. autogen/agentchat/conversable_agent.py +4307 -0
  121. autogen/agentchat/group/__init__.py +67 -0
  122. autogen/agentchat/group/available_condition.py +91 -0
  123. autogen/agentchat/group/context_condition.py +77 -0
  124. autogen/agentchat/group/context_expression.py +238 -0
  125. autogen/agentchat/group/context_str.py +39 -0
  126. autogen/agentchat/group/context_variables.py +182 -0
  127. autogen/agentchat/group/events/transition_events.py +111 -0
  128. autogen/agentchat/group/group_tool_executor.py +324 -0
  129. autogen/agentchat/group/group_utils.py +659 -0
  130. autogen/agentchat/group/guardrails.py +179 -0
  131. autogen/agentchat/group/handoffs.py +303 -0
  132. autogen/agentchat/group/llm_condition.py +93 -0
  133. autogen/agentchat/group/multi_agent_chat.py +291 -0
  134. autogen/agentchat/group/on_condition.py +55 -0
  135. autogen/agentchat/group/on_context_condition.py +51 -0
  136. autogen/agentchat/group/patterns/__init__.py +18 -0
  137. autogen/agentchat/group/patterns/auto.py +160 -0
  138. autogen/agentchat/group/patterns/manual.py +177 -0
  139. autogen/agentchat/group/patterns/pattern.py +295 -0
  140. autogen/agentchat/group/patterns/random.py +106 -0
  141. autogen/agentchat/group/patterns/round_robin.py +117 -0
  142. autogen/agentchat/group/reply_result.py +24 -0
  143. autogen/agentchat/group/safeguards/__init__.py +21 -0
  144. autogen/agentchat/group/safeguards/api.py +241 -0
  145. autogen/agentchat/group/safeguards/enforcer.py +1158 -0
  146. autogen/agentchat/group/safeguards/events.py +140 -0
  147. autogen/agentchat/group/safeguards/validator.py +435 -0
  148. autogen/agentchat/group/speaker_selection_result.py +41 -0
  149. autogen/agentchat/group/targets/__init__.py +4 -0
  150. autogen/agentchat/group/targets/function_target.py +245 -0
  151. autogen/agentchat/group/targets/group_chat_target.py +133 -0
  152. autogen/agentchat/group/targets/group_manager_target.py +151 -0
  153. autogen/agentchat/group/targets/transition_target.py +424 -0
  154. autogen/agentchat/group/targets/transition_utils.py +6 -0
  155. autogen/agentchat/groupchat.py +1832 -0
  156. autogen/agentchat/realtime/__init__.py +3 -0
  157. autogen/agentchat/realtime/experimental/__init__.py +20 -0
  158. autogen/agentchat/realtime/experimental/audio_adapters/__init__.py +8 -0
  159. autogen/agentchat/realtime/experimental/audio_adapters/twilio_audio_adapter.py +148 -0
  160. autogen/agentchat/realtime/experimental/audio_adapters/websocket_audio_adapter.py +139 -0
  161. autogen/agentchat/realtime/experimental/audio_observer.py +42 -0
  162. autogen/agentchat/realtime/experimental/clients/__init__.py +15 -0
  163. autogen/agentchat/realtime/experimental/clients/gemini/__init__.py +7 -0
  164. autogen/agentchat/realtime/experimental/clients/gemini/client.py +274 -0
  165. autogen/agentchat/realtime/experimental/clients/oai/__init__.py +8 -0
  166. autogen/agentchat/realtime/experimental/clients/oai/base_client.py +220 -0
  167. autogen/agentchat/realtime/experimental/clients/oai/rtc_client.py +243 -0
  168. autogen/agentchat/realtime/experimental/clients/oai/utils.py +48 -0
  169. autogen/agentchat/realtime/experimental/clients/realtime_client.py +191 -0
  170. autogen/agentchat/realtime/experimental/function_observer.py +84 -0
  171. autogen/agentchat/realtime/experimental/realtime_agent.py +158 -0
  172. autogen/agentchat/realtime/experimental/realtime_events.py +42 -0
  173. autogen/agentchat/realtime/experimental/realtime_observer.py +100 -0
  174. autogen/agentchat/realtime/experimental/realtime_swarm.py +533 -0
  175. autogen/agentchat/realtime/experimental/websockets.py +21 -0
  176. autogen/agentchat/realtime_agent/__init__.py +21 -0
  177. autogen/agentchat/user_proxy_agent.py +114 -0
  178. autogen/agentchat/utils.py +206 -0
  179. autogen/agents/__init__.py +3 -0
  180. autogen/agents/contrib/__init__.py +10 -0
  181. autogen/agents/contrib/time/__init__.py +8 -0
  182. autogen/agents/contrib/time/time_reply_agent.py +74 -0
  183. autogen/agents/contrib/time/time_tool_agent.py +52 -0
  184. autogen/agents/experimental/__init__.py +27 -0
  185. autogen/agents/experimental/deep_research/__init__.py +7 -0
  186. autogen/agents/experimental/deep_research/deep_research.py +52 -0
  187. autogen/agents/experimental/discord/__init__.py +7 -0
  188. autogen/agents/experimental/discord/discord.py +66 -0
  189. autogen/agents/experimental/document_agent/__init__.py +19 -0
  190. autogen/agents/experimental/document_agent/chroma_query_engine.py +301 -0
  191. autogen/agents/experimental/document_agent/docling_doc_ingest_agent.py +113 -0
  192. autogen/agents/experimental/document_agent/document_agent.py +643 -0
  193. autogen/agents/experimental/document_agent/document_conditions.py +50 -0
  194. autogen/agents/experimental/document_agent/document_utils.py +376 -0
  195. autogen/agents/experimental/document_agent/inmemory_query_engine.py +214 -0
  196. autogen/agents/experimental/document_agent/parser_utils.py +134 -0
  197. autogen/agents/experimental/document_agent/url_utils.py +417 -0
  198. autogen/agents/experimental/reasoning/__init__.py +7 -0
  199. autogen/agents/experimental/reasoning/reasoning_agent.py +1178 -0
  200. autogen/agents/experimental/slack/__init__.py +7 -0
  201. autogen/agents/experimental/slack/slack.py +73 -0
  202. autogen/agents/experimental/telegram/__init__.py +7 -0
  203. autogen/agents/experimental/telegram/telegram.py +76 -0
  204. autogen/agents/experimental/websurfer/__init__.py +7 -0
  205. autogen/agents/experimental/websurfer/websurfer.py +70 -0
  206. autogen/agents/experimental/wikipedia/__init__.py +7 -0
  207. autogen/agents/experimental/wikipedia/wikipedia.py +88 -0
  208. autogen/browser_utils.py +309 -0
  209. autogen/cache/__init__.py +10 -0
  210. autogen/cache/abstract_cache_base.py +71 -0
  211. autogen/cache/cache.py +203 -0
  212. autogen/cache/cache_factory.py +88 -0
  213. autogen/cache/cosmos_db_cache.py +144 -0
  214. autogen/cache/disk_cache.py +97 -0
  215. autogen/cache/in_memory_cache.py +54 -0
  216. autogen/cache/redis_cache.py +119 -0
  217. autogen/code_utils.py +598 -0
  218. autogen/coding/__init__.py +30 -0
  219. autogen/coding/base.py +120 -0
  220. autogen/coding/docker_commandline_code_executor.py +283 -0
  221. autogen/coding/factory.py +56 -0
  222. autogen/coding/func_with_reqs.py +203 -0
  223. autogen/coding/jupyter/__init__.py +23 -0
  224. autogen/coding/jupyter/base.py +36 -0
  225. autogen/coding/jupyter/docker_jupyter_server.py +160 -0
  226. autogen/coding/jupyter/embedded_ipython_code_executor.py +182 -0
  227. autogen/coding/jupyter/import_utils.py +82 -0
  228. autogen/coding/jupyter/jupyter_client.py +224 -0
  229. autogen/coding/jupyter/jupyter_code_executor.py +154 -0
  230. autogen/coding/jupyter/local_jupyter_server.py +164 -0
  231. autogen/coding/local_commandline_code_executor.py +341 -0
  232. autogen/coding/markdown_code_extractor.py +44 -0
  233. autogen/coding/utils.py +55 -0
  234. autogen/coding/yepcode_code_executor.py +197 -0
  235. autogen/doc_utils.py +35 -0
  236. autogen/environments/__init__.py +10 -0
  237. autogen/environments/docker_python_environment.py +365 -0
  238. autogen/environments/python_environment.py +125 -0
  239. autogen/environments/system_python_environment.py +85 -0
  240. autogen/environments/venv_python_environment.py +220 -0
  241. autogen/environments/working_directory.py +74 -0
  242. autogen/events/__init__.py +7 -0
  243. autogen/events/agent_events.py +1016 -0
  244. autogen/events/base_event.py +100 -0
  245. autogen/events/client_events.py +168 -0
  246. autogen/events/helpers.py +44 -0
  247. autogen/events/print_event.py +45 -0
  248. autogen/exception_utils.py +73 -0
  249. autogen/extensions/__init__.py +5 -0
  250. autogen/fast_depends/__init__.py +16 -0
  251. autogen/fast_depends/_compat.py +75 -0
  252. autogen/fast_depends/core/__init__.py +14 -0
  253. autogen/fast_depends/core/build.py +206 -0
  254. autogen/fast_depends/core/model.py +527 -0
  255. autogen/fast_depends/dependencies/__init__.py +15 -0
  256. autogen/fast_depends/dependencies/model.py +30 -0
  257. autogen/fast_depends/dependencies/provider.py +40 -0
  258. autogen/fast_depends/library/__init__.py +10 -0
  259. autogen/fast_depends/library/model.py +46 -0
  260. autogen/fast_depends/py.typed +6 -0
  261. autogen/fast_depends/schema.py +66 -0
  262. autogen/fast_depends/use.py +272 -0
  263. autogen/fast_depends/utils.py +177 -0
  264. autogen/formatting_utils.py +83 -0
  265. autogen/function_utils.py +13 -0
  266. autogen/graph_utils.py +173 -0
  267. autogen/import_utils.py +539 -0
  268. autogen/interop/__init__.py +22 -0
  269. autogen/interop/crewai/__init__.py +7 -0
  270. autogen/interop/crewai/crewai.py +88 -0
  271. autogen/interop/interoperability.py +71 -0
  272. autogen/interop/interoperable.py +46 -0
  273. autogen/interop/langchain/__init__.py +8 -0
  274. autogen/interop/langchain/langchain_chat_model_factory.py +156 -0
  275. autogen/interop/langchain/langchain_tool.py +78 -0
  276. autogen/interop/litellm/__init__.py +7 -0
  277. autogen/interop/litellm/litellm_config_factory.py +178 -0
  278. autogen/interop/pydantic_ai/__init__.py +7 -0
  279. autogen/interop/pydantic_ai/pydantic_ai.py +172 -0
  280. autogen/interop/registry.py +70 -0
  281. autogen/io/__init__.py +15 -0
  282. autogen/io/base.py +151 -0
  283. autogen/io/console.py +56 -0
  284. autogen/io/processors/__init__.py +12 -0
  285. autogen/io/processors/base.py +21 -0
  286. autogen/io/processors/console_event_processor.py +61 -0
  287. autogen/io/run_response.py +294 -0
  288. autogen/io/thread_io_stream.py +63 -0
  289. autogen/io/websockets.py +214 -0
  290. autogen/json_utils.py +42 -0
  291. autogen/llm_clients/MIGRATION_TO_V2.md +782 -0
  292. autogen/llm_clients/__init__.py +77 -0
  293. autogen/llm_clients/client_v2.py +122 -0
  294. autogen/llm_clients/models/__init__.py +55 -0
  295. autogen/llm_clients/models/content_blocks.py +389 -0
  296. autogen/llm_clients/models/unified_message.py +145 -0
  297. autogen/llm_clients/models/unified_response.py +83 -0
  298. autogen/llm_clients/openai_completions_client.py +444 -0
  299. autogen/llm_config/__init__.py +11 -0
  300. autogen/llm_config/client.py +59 -0
  301. autogen/llm_config/config.py +461 -0
  302. autogen/llm_config/entry.py +169 -0
  303. autogen/llm_config/types.py +37 -0
  304. autogen/llm_config/utils.py +223 -0
  305. autogen/logger/__init__.py +11 -0
  306. autogen/logger/base_logger.py +129 -0
  307. autogen/logger/file_logger.py +262 -0
  308. autogen/logger/logger_factory.py +42 -0
  309. autogen/logger/logger_utils.py +57 -0
  310. autogen/logger/sqlite_logger.py +524 -0
  311. autogen/math_utils.py +338 -0
  312. autogen/mcp/__init__.py +7 -0
  313. autogen/mcp/__main__.py +78 -0
  314. autogen/mcp/helpers.py +45 -0
  315. autogen/mcp/mcp_client.py +349 -0
  316. autogen/mcp/mcp_proxy/__init__.py +19 -0
  317. autogen/mcp/mcp_proxy/fastapi_code_generator_helpers.py +62 -0
  318. autogen/mcp/mcp_proxy/mcp_proxy.py +577 -0
  319. autogen/mcp/mcp_proxy/operation_grouping.py +166 -0
  320. autogen/mcp/mcp_proxy/operation_renaming.py +110 -0
  321. autogen/mcp/mcp_proxy/patch_fastapi_code_generator.py +98 -0
  322. autogen/mcp/mcp_proxy/security.py +399 -0
  323. autogen/mcp/mcp_proxy/security_schema_visitor.py +37 -0
  324. autogen/messages/__init__.py +7 -0
  325. autogen/messages/agent_messages.py +946 -0
  326. autogen/messages/base_message.py +108 -0
  327. autogen/messages/client_messages.py +172 -0
  328. autogen/messages/print_message.py +48 -0
  329. autogen/oai/__init__.py +61 -0
  330. autogen/oai/anthropic.py +1516 -0
  331. autogen/oai/bedrock.py +800 -0
  332. autogen/oai/cerebras.py +302 -0
  333. autogen/oai/client.py +1658 -0
  334. autogen/oai/client_utils.py +196 -0
  335. autogen/oai/cohere.py +494 -0
  336. autogen/oai/gemini.py +1045 -0
  337. autogen/oai/gemini_types.py +156 -0
  338. autogen/oai/groq.py +319 -0
  339. autogen/oai/mistral.py +311 -0
  340. autogen/oai/oai_models/__init__.py +23 -0
  341. autogen/oai/oai_models/_models.py +16 -0
  342. autogen/oai/oai_models/chat_completion.py +86 -0
  343. autogen/oai/oai_models/chat_completion_audio.py +32 -0
  344. autogen/oai/oai_models/chat_completion_message.py +97 -0
  345. autogen/oai/oai_models/chat_completion_message_tool_call.py +60 -0
  346. autogen/oai/oai_models/chat_completion_token_logprob.py +62 -0
  347. autogen/oai/oai_models/completion_usage.py +59 -0
  348. autogen/oai/ollama.py +657 -0
  349. autogen/oai/openai_responses.py +451 -0
  350. autogen/oai/openai_utils.py +897 -0
  351. autogen/oai/together.py +387 -0
  352. autogen/remote/__init__.py +18 -0
  353. autogen/remote/agent.py +199 -0
  354. autogen/remote/agent_service.py +197 -0
  355. autogen/remote/errors.py +17 -0
  356. autogen/remote/httpx_client_factory.py +131 -0
  357. autogen/remote/protocol.py +37 -0
  358. autogen/remote/retry.py +102 -0
  359. autogen/remote/runtime.py +96 -0
  360. autogen/retrieve_utils.py +490 -0
  361. autogen/runtime_logging.py +161 -0
  362. autogen/testing/__init__.py +12 -0
  363. autogen/testing/messages.py +45 -0
  364. autogen/testing/test_agent.py +111 -0
  365. autogen/token_count_utils.py +280 -0
  366. autogen/tools/__init__.py +20 -0
  367. autogen/tools/contrib/__init__.py +9 -0
  368. autogen/tools/contrib/time/__init__.py +7 -0
  369. autogen/tools/contrib/time/time.py +40 -0
  370. autogen/tools/dependency_injection.py +249 -0
  371. autogen/tools/experimental/__init__.py +54 -0
  372. autogen/tools/experimental/browser_use/__init__.py +7 -0
  373. autogen/tools/experimental/browser_use/browser_use.py +154 -0
  374. autogen/tools/experimental/code_execution/__init__.py +7 -0
  375. autogen/tools/experimental/code_execution/python_code_execution.py +86 -0
  376. autogen/tools/experimental/crawl4ai/__init__.py +7 -0
  377. autogen/tools/experimental/crawl4ai/crawl4ai.py +150 -0
  378. autogen/tools/experimental/deep_research/__init__.py +7 -0
  379. autogen/tools/experimental/deep_research/deep_research.py +329 -0
  380. autogen/tools/experimental/duckduckgo/__init__.py +7 -0
  381. autogen/tools/experimental/duckduckgo/duckduckgo_search.py +103 -0
  382. autogen/tools/experimental/firecrawl/__init__.py +7 -0
  383. autogen/tools/experimental/firecrawl/firecrawl_tool.py +836 -0
  384. autogen/tools/experimental/google/__init__.py +14 -0
  385. autogen/tools/experimental/google/authentication/__init__.py +11 -0
  386. autogen/tools/experimental/google/authentication/credentials_hosted_provider.py +43 -0
  387. autogen/tools/experimental/google/authentication/credentials_local_provider.py +91 -0
  388. autogen/tools/experimental/google/authentication/credentials_provider.py +35 -0
  389. autogen/tools/experimental/google/drive/__init__.py +9 -0
  390. autogen/tools/experimental/google/drive/drive_functions.py +124 -0
  391. autogen/tools/experimental/google/drive/toolkit.py +88 -0
  392. autogen/tools/experimental/google/model.py +17 -0
  393. autogen/tools/experimental/google/toolkit_protocol.py +19 -0
  394. autogen/tools/experimental/google_search/__init__.py +8 -0
  395. autogen/tools/experimental/google_search/google_search.py +93 -0
  396. autogen/tools/experimental/google_search/youtube_search.py +181 -0
  397. autogen/tools/experimental/messageplatform/__init__.py +17 -0
  398. autogen/tools/experimental/messageplatform/discord/__init__.py +7 -0
  399. autogen/tools/experimental/messageplatform/discord/discord.py +284 -0
  400. autogen/tools/experimental/messageplatform/slack/__init__.py +7 -0
  401. autogen/tools/experimental/messageplatform/slack/slack.py +385 -0
  402. autogen/tools/experimental/messageplatform/telegram/__init__.py +7 -0
  403. autogen/tools/experimental/messageplatform/telegram/telegram.py +271 -0
  404. autogen/tools/experimental/perplexity/__init__.py +7 -0
  405. autogen/tools/experimental/perplexity/perplexity_search.py +249 -0
  406. autogen/tools/experimental/reliable/__init__.py +10 -0
  407. autogen/tools/experimental/reliable/reliable.py +1311 -0
  408. autogen/tools/experimental/searxng/__init__.py +7 -0
  409. autogen/tools/experimental/searxng/searxng_search.py +142 -0
  410. autogen/tools/experimental/tavily/__init__.py +7 -0
  411. autogen/tools/experimental/tavily/tavily_search.py +176 -0
  412. autogen/tools/experimental/web_search_preview/__init__.py +7 -0
  413. autogen/tools/experimental/web_search_preview/web_search_preview.py +120 -0
  414. autogen/tools/experimental/wikipedia/__init__.py +7 -0
  415. autogen/tools/experimental/wikipedia/wikipedia.py +284 -0
  416. autogen/tools/function_utils.py +412 -0
  417. autogen/tools/tool.py +188 -0
  418. autogen/tools/toolkit.py +86 -0
  419. autogen/types.py +29 -0
  420. autogen/version.py +7 -0
  421. templates/client_template/main.jinja2 +72 -0
  422. templates/config_template/config.jinja2 +7 -0
  423. templates/main.jinja2 +61 -0
@@ -0,0 +1,927 @@
1
+ # Copyright (c) 2023 - 2025, AG2ai, Inc., AG2ai open-source projects maintainers and core contributors
2
+ #
3
+ # SPDX-License-Identifier: Apache-2.0
4
+ #
5
+ # Portions derived from https://github.com/microsoft/autogen are under the MIT License.
6
+ # SPDX-License-Identifier: MIT
7
+ import os
8
+ import re
9
+ import urllib.parse
10
+ from collections.abc import Callable
11
+ from typing import Any, Optional
12
+
13
+ from ....import_utils import optional_import_block, require_optional_import
14
+ from .base import Document, ItemID, QueryResults, VectorDB
15
+ from .utils import get_logger
16
+
17
+ with optional_import_block():
18
+ import numpy as np
19
+ import pgvector # noqa: F401
20
+ import psycopg
21
+ from pgvector.psycopg import register_vector
22
+ from sentence_transformers import SentenceTransformer
23
+
24
+ PGVECTOR_MAX_BATCH_SIZE = os.environ.get("PGVECTOR_MAX_BATCH_SIZE", 40000)
25
+ logger = get_logger(__name__)
26
+
27
+
28
+ @require_optional_import(["psycopg", "sentence_transformers", "numpy"], "retrievechat-pgvector")
29
+ class Collection:
30
+ """A Collection object for PGVector.
31
+
32
+ Attributes:
33
+ client: The PGVector client.
34
+ collection_name (str): The name of the collection. Default is "documents".
35
+ embedding_function (Callable): The embedding function used to generate the vector representation.
36
+ Default is None. SentenceTransformer("all-MiniLM-L6-v2").encode will be used when None.
37
+ Models can be chosen from:
38
+ https://huggingface.co/models?library=sentence-transformers
39
+ metadata (Optional[dict[str, Any]]): The metadata of the collection.
40
+ get_or_create (Optional): The flag indicating whether to get or create the collection.
41
+ """
42
+
43
+ def __init__(
44
+ self,
45
+ client: Any | None = None,
46
+ collection_name: str = "ag2-docs",
47
+ embedding_function: Callable[..., Any] | None = None,
48
+ metadata: Any | None = None,
49
+ get_or_create: Any | None = None,
50
+ ):
51
+ """Initialize the Collection object.
52
+
53
+ Args:
54
+ client: The PostgreSQL client.
55
+ collection_name: The name of the collection. Default is "documents".
56
+ embedding_function: The embedding function used to generate the vector representation.
57
+ metadata: The metadata of the collection.
58
+ get_or_create: The flag indicating whether to get or create the collection.
59
+
60
+ Returns:
61
+ None
62
+ """
63
+ self.client = client
64
+ self.name = self.set_collection_name(collection_name)
65
+ self.require_embeddings_or_documents = False
66
+ self.ids = []
67
+ if embedding_function:
68
+ self.embedding_function = embedding_function
69
+ else:
70
+ self.embedding_function = SentenceTransformer("all-MiniLM-L6-v2").encode
71
+ self.metadata = metadata if metadata else {"hnsw:space": "ip", "hnsw:construction_ef": 32, "hnsw:M": 16}
72
+ self.documents = ""
73
+ self.get_or_create = get_or_create
74
+ # This will get the model dimension size by computing the embeddings dimensions
75
+ sentences = [
76
+ "The weather is lovely today in paradise.",
77
+ ]
78
+ embeddings = self.embedding_function(sentences)
79
+ self.dimension = len(embeddings[0])
80
+
81
+ def set_collection_name(self, collection_name) -> str:
82
+ name = re.sub("-", "_", collection_name)
83
+ self.name = name
84
+ return self.name
85
+
86
+ def add(
87
+ self,
88
+ ids: list[ItemID],
89
+ documents: list[Document] | None,
90
+ embeddings: list[Any] | None = None,
91
+ metadatas: list[Any] | None = None,
92
+ ) -> None:
93
+ """Add documents to the collection.
94
+
95
+ Args:
96
+ ids (List[ItemID]): A list of document IDs.
97
+ embeddings (List): A list of document embeddings. Optional
98
+ metadatas (List): A list of document metadatas. Optional
99
+ documents (List): A list of documents.
100
+
101
+ Returns:
102
+ None
103
+ """
104
+ cursor = self.client.cursor()
105
+ sql_values = []
106
+ if embeddings is not None and metadatas is not None:
107
+ for doc_id, embedding, metadata, document in zip(ids, embeddings, metadatas, documents):
108
+ metadata = re.sub("'", '"', str(metadata))
109
+ sql_values.append((doc_id, embedding, metadata, document))
110
+ sql_string = f"INSERT INTO {self.name} (id, embedding, metadatas, documents)\nVALUES (%s, %s, %s, %s);\n"
111
+ elif embeddings is not None:
112
+ for doc_id, embedding, document in zip(ids, embeddings, documents):
113
+ sql_values.append((doc_id, embedding, document))
114
+ sql_string = f"INSERT INTO {self.name} (id, embedding, documents) VALUES (%s, %s, %s);\n"
115
+ elif metadatas is not None:
116
+ for doc_id, metadata, document in zip(ids, metadatas, documents):
117
+ metadata = re.sub("'", '"', str(metadata))
118
+ embedding = self.embedding_function(document)
119
+ sql_values.append((doc_id, metadata, embedding, document))
120
+ sql_string = f"INSERT INTO {self.name} (id, metadatas, embedding, documents)\nVALUES (%s, %s, %s, %s);\n"
121
+ else:
122
+ for doc_id, document in zip(ids, documents):
123
+ embedding = self.embedding_function(document)
124
+ sql_values.append((doc_id, document, embedding))
125
+ sql_string = f"INSERT INTO {self.name} (id, documents, embedding)\nVALUES (%s, %s, %s);\n"
126
+ logger.debug(f"Add SQL String:\n{sql_string}\n{sql_values}")
127
+ cursor.executemany(sql_string, sql_values)
128
+ cursor.close()
129
+
130
+ def upsert(
131
+ self,
132
+ ids: list[ItemID],
133
+ documents: list[Document],
134
+ embeddings: list[Any] | None = None,
135
+ metadatas: list[Any] | None = None,
136
+ ) -> None:
137
+ """Upsert documents into the collection.
138
+
139
+ Args:
140
+ ids (List[ItemID]): A list of document IDs.
141
+ documents (List): A list of documents.
142
+ embeddings (List): A list of document embeddings.
143
+ metadatas (List): A list of document metadatas.
144
+
145
+ Returns:
146
+ None
147
+ """
148
+ cursor = self.client.cursor()
149
+ sql_values = []
150
+ if embeddings is not None and metadatas is not None:
151
+ for doc_id, embedding, metadata, document in zip(ids, embeddings, metadatas, documents):
152
+ metadata = re.sub("'", '"', str(metadata))
153
+ sql_values.append((doc_id, embedding, metadata, document, embedding, metadata, document))
154
+ sql_string = (
155
+ f"INSERT INTO {self.name} (id, embedding, metadatas, documents)\n"
156
+ f"VALUES (%s, %s, %s, %s)\n"
157
+ f"ON CONFLICT (id)\n"
158
+ f"DO UPDATE SET embedding = %s,\n"
159
+ f"metadatas = %s, documents = %s;\n"
160
+ )
161
+ elif embeddings is not None:
162
+ for doc_id, embedding, document in zip(ids, embeddings, documents):
163
+ sql_values.append((doc_id, embedding, document, embedding, document))
164
+ sql_string = (
165
+ f"INSERT INTO {self.name} (id, embedding, documents) "
166
+ f"VALUES (%s, %s, %s) ON CONFLICT (id)\n"
167
+ f"DO UPDATE SET embedding = %s, documents = %s;\n"
168
+ )
169
+ elif metadatas is not None:
170
+ for doc_id, metadata, document in zip(ids, metadatas, documents):
171
+ metadata = re.sub("'", '"', str(metadata))
172
+ embedding = self.embedding_function(document)
173
+ sql_values.append((doc_id, metadata, embedding, document, metadata, document, embedding))
174
+ sql_string = (
175
+ f"INSERT INTO {self.name} (id, metadatas, embedding, documents)\n"
176
+ f"VALUES (%s, %s, %s, %s)\n"
177
+ f"ON CONFLICT (id)\n"
178
+ f"DO UPDATE SET metadatas = %s, documents = %s, embedding = %s;\n"
179
+ )
180
+ else:
181
+ for doc_id, document in zip(ids, documents):
182
+ embedding = self.embedding_function(document)
183
+ sql_values.append((doc_id, document, embedding, document))
184
+ sql_string = (
185
+ f"INSERT INTO {self.name} (id, documents, embedding)\n"
186
+ f"VALUES (%s, %s, %s)\n"
187
+ f"ON CONFLICT (id)\n"
188
+ f"DO UPDATE SET documents = %s;\n"
189
+ )
190
+ logger.debug(f"Upsert SQL String:\n{sql_string}\n{sql_values}")
191
+ cursor.executemany(sql_string, sql_values)
192
+ cursor.close()
193
+
194
+ def count(self) -> int:
195
+ """Get the total number of documents in the collection.
196
+
197
+ Returns:
198
+ int: The total number of documents.
199
+ """
200
+ cursor = self.client.cursor()
201
+ query = f"SELECT COUNT(*) FROM {self.name}"
202
+ cursor.execute(query)
203
+ total = cursor.fetchone()[0]
204
+ cursor.close()
205
+ try:
206
+ total = int(total)
207
+ except (TypeError, ValueError):
208
+ total = None
209
+ return total
210
+
211
+ def table_exists(self, table_name: str) -> bool:
212
+ """Check if a table exists in the PostgreSQL database.
213
+
214
+ Args:
215
+ table_name (str): The name of the table to check.
216
+
217
+ Returns:
218
+ bool: True if the table exists, False otherwise.
219
+ """
220
+ cursor = self.client.cursor()
221
+ cursor.execute(
222
+ """
223
+ SELECT EXISTS (
224
+ SELECT 1
225
+ FROM information_schema.tables
226
+ WHERE table_name = %s
227
+ )
228
+ """,
229
+ (table_name,),
230
+ )
231
+ exists = cursor.fetchone()[0]
232
+ return exists
233
+
234
+ def get(
235
+ self,
236
+ ids: str | None = None,
237
+ include: str | None = None,
238
+ where: str | None = None,
239
+ limit: int | str | None = None,
240
+ offset: int | str | None = None,
241
+ ) -> list[Document]:
242
+ """Retrieve documents from the collection.
243
+
244
+ Args:
245
+ ids (Optional[List]): A list of document IDs.
246
+ include (Optional): The fields to include.
247
+ where (Optional): Additional filtering criteria.
248
+ limit (Optional): The maximum number of documents to retrieve.
249
+ offset (Optional): The offset for pagination.
250
+
251
+ Returns:
252
+ List: The retrieved documents.
253
+ """
254
+ cursor = self.client.cursor()
255
+
256
+ # Initialize variables for query components
257
+ select_clause = "SELECT id, metadatas, documents, embedding"
258
+ from_clause = f"FROM {self.name}"
259
+ where_clause = ""
260
+ limit_clause = ""
261
+ offset_clause = ""
262
+
263
+ # Handle include clause
264
+ if include:
265
+ select_clause = f"SELECT id, {', '.join(include)}, embedding"
266
+
267
+ # Handle where clause
268
+ if ids:
269
+ where_clause = f"WHERE id IN ({', '.join(['%s' for _ in ids])})"
270
+ elif where:
271
+ where_clause = f"WHERE {where}"
272
+
273
+ # Handle limit and offset clauses
274
+ if limit:
275
+ limit_clause = "LIMIT %s"
276
+ if offset:
277
+ offset_clause = "OFFSET %s"
278
+
279
+ # Construct the full query
280
+ query = f"{select_clause} {from_clause} {where_clause} {limit_clause} {offset_clause}"
281
+ retrieved_documents = []
282
+ try:
283
+ # Execute the query with the appropriate values
284
+ if ids is not None:
285
+ cursor.execute(query, ids)
286
+ else:
287
+ query_params = []
288
+ if limit:
289
+ query_params.append(limit)
290
+ if offset:
291
+ query_params.append(offset)
292
+ cursor.execute(query, query_params)
293
+
294
+ retrieval = cursor.fetchall()
295
+ for retrieved_document in retrieval:
296
+ retrieved_documents.append(
297
+ Document(
298
+ id=retrieved_document[0].strip(),
299
+ metadata=retrieved_document[1],
300
+ content=retrieved_document[2],
301
+ embedding=retrieved_document[3],
302
+ )
303
+ )
304
+ except (psycopg.errors.UndefinedTable, psycopg.errors.UndefinedColumn) as e:
305
+ logger.info(f"Error executing select on non-existent table: {self.name}. Creating it instead. Error: {e}")
306
+ self.create_collection(collection_name=self.name, dimension=self.dimension)
307
+ logger.info(f"Created table {self.name}")
308
+
309
+ cursor.close()
310
+ return retrieved_documents
311
+
312
+ def update(self, ids: list[str], embeddings: list[Any], metadatas: list[Any], documents: list[Document]) -> None:
313
+ """Update documents in the collection.
314
+
315
+ Args:
316
+ ids (List): A list of document IDs.
317
+ embeddings (List): A list of document embeddings.
318
+ metadatas (List): A list of document metadatas.
319
+ documents (List): A list of documents.
320
+
321
+ Returns:
322
+ None
323
+ """
324
+ cursor = self.client.cursor()
325
+ sql_values = []
326
+ for doc_id, embedding, metadata, document in zip(ids, embeddings, metadatas, documents):
327
+ sql_values.append((doc_id, embedding, metadata, document, doc_id, embedding, metadata, document))
328
+ sql_string = (
329
+ f"INSERT INTO {self.name} (id, embedding, metadata, document) "
330
+ f"VALUES (%s, %s, %s, %s) "
331
+ f"ON CONFLICT (id) "
332
+ f"DO UPDATE SET id = %s, embedding = %s, "
333
+ f"metadata = %s, document = %s;\n"
334
+ )
335
+ logger.debug(f"Upsert SQL String:\n{sql_string}\n")
336
+ cursor.executemany(sql_string, sql_values)
337
+ cursor.close()
338
+
339
+ @staticmethod
340
+ def euclidean_distance(arr1: list[float], arr2: list[float]) -> float:
341
+ """Calculate the Euclidean distance between two vectors.
342
+
343
+ Parameters:
344
+ - arr1 (List[float]): The first vector.
345
+ - arr2 (List[float]): The second vector.
346
+
347
+ Returns:
348
+ - float: The Euclidean distance between arr1 and arr2.
349
+ """
350
+ dist = np.linalg.norm(arr1 - arr2)
351
+ return dist
352
+
353
+ @staticmethod
354
+ def cosine_distance(arr1: list[float], arr2: list[float]) -> float:
355
+ """Calculate the cosine distance between two vectors.
356
+
357
+ Parameters:
358
+ - arr1 (List[float]): The first vector.
359
+ - arr2 (List[float]): The second vector.
360
+
361
+ Returns:
362
+ - float: The cosine distance between arr1 and arr2.
363
+ """
364
+ dist = np.dot(arr1, arr2) / (np.linalg.norm(arr1) * np.linalg.norm(arr2))
365
+ return dist
366
+
367
+ @staticmethod
368
+ def inner_product_distance(arr1: list[float], arr2: list[float]) -> float:
369
+ """Calculate the Euclidean distance between two vectors.
370
+
371
+ Parameters:
372
+ - arr1 (List[float]): The first vector.
373
+ - arr2 (List[float]): The second vector.
374
+
375
+ Returns:
376
+ - float: The Euclidean distance between arr1 and arr2.
377
+ """
378
+ dist = np.linalg.norm(arr1 - arr2)
379
+ return dist
380
+
381
+ def query(
382
+ self,
383
+ query_texts: list[str],
384
+ collection_name: str | None = None,
385
+ n_results: int | None = 10,
386
+ distance_type: str | None = "euclidean",
387
+ distance_threshold: float | None = -1,
388
+ include_embedding: bool | None = False,
389
+ ) -> QueryResults:
390
+ """Query documents in the collection.
391
+
392
+ Args:
393
+ query_texts (List[str]): A list of query texts.
394
+ collection_name (Optional[str]): The name of the collection.
395
+ n_results (int): The maximum number of results to return.
396
+ distance_type (Optional[str]): Distance search type - euclidean or cosine
397
+ distance_threshold (Optional[float]): Distance threshold to limit searches
398
+ include_embedding (Optional[bool]): Include embedding values in QueryResults
399
+ Returns:
400
+ QueryResults: The query results.
401
+ """
402
+ if collection_name:
403
+ self.name = collection_name
404
+
405
+ clause = "ORDER BY"
406
+ if distance_threshold == -1:
407
+ distance_threshold = ""
408
+ clause = "ORDER BY"
409
+ elif distance_threshold > 0:
410
+ distance_threshold = f"< {distance_threshold}"
411
+ clause = "WHERE"
412
+
413
+ cursor = self.client.cursor()
414
+ results = []
415
+ for query_text in query_texts:
416
+ vector = self.embedding_function(query_text, convert_to_tensor=False).tolist()
417
+ if distance_type.lower() == "cosine":
418
+ index_function = "<=>"
419
+ elif distance_type.lower() == "euclidean":
420
+ index_function = "<->"
421
+ elif distance_type.lower() == "inner-product":
422
+ index_function = "<#>"
423
+ else:
424
+ index_function = "<->"
425
+ query = (
426
+ f"SELECT id, documents, embedding, metadatas "
427
+ f"FROM {self.name} "
428
+ f"{clause} embedding {index_function} '{vector!s}' {distance_threshold} "
429
+ f"LIMIT {n_results}"
430
+ )
431
+ cursor.execute(query)
432
+ result = []
433
+ for row in cursor.fetchall():
434
+ fetched_document = Document(id=row[0].strip(), content=row[1], embedding=row[2], metadata=row[3])
435
+ fetched_document_array = self.convert_string_to_array(array_string=fetched_document.get("embedding"))
436
+ if distance_type.lower() == "cosine":
437
+ distance = self.cosine_distance(fetched_document_array, vector)
438
+ elif distance_type.lower() == "euclidean":
439
+ distance = self.euclidean_distance(fetched_document_array, vector)
440
+ elif distance_type.lower() == "inner-product":
441
+ distance = self.inner_product_distance(fetched_document_array, vector)
442
+ else:
443
+ distance = self.euclidean_distance(fetched_document_array, vector)
444
+ if not include_embedding:
445
+ fetched_document = Document(id=row[0].strip(), content=row[1], metadata=row[3])
446
+ result.append((fetched_document, distance))
447
+ results.append(result)
448
+ cursor.close()
449
+ logger.debug(f"Query Results: {results}")
450
+ return results
451
+
452
+ @staticmethod
453
+ def convert_string_to_array(array_string: str) -> list[float]:
454
+ """Convert a string representation of an array to a list of floats.
455
+
456
+ Parameters:
457
+ - array_string (str): The string representation of the array.
458
+
459
+ Returns:
460
+ - list: A list of floats parsed from the input string. If the input is
461
+ not a string, it returns the input itself.
462
+ """
463
+ if not isinstance(array_string, str):
464
+ return array_string
465
+ array_string = array_string.strip("[]")
466
+ array = [float(num) for num in array_string.split()]
467
+ return array
468
+
469
+ def modify(self, metadata, collection_name: str | None = None) -> None:
470
+ """Modify metadata for the collection.
471
+
472
+ Args:
473
+ collection_name: The name of the collection.
474
+ metadata: The new metadata.
475
+
476
+ Returns:
477
+ None
478
+ """
479
+ if collection_name:
480
+ self.name = collection_name
481
+ cursor = self.client.cursor()
482
+ cursor.execute("UPDATE collectionsSET metadata = '%s'WHERE collection_name = '%s';", (metadata, self.name))
483
+ cursor.close()
484
+
485
+ def delete(self, ids: list[ItemID], collection_name: str | None = None) -> None:
486
+ """Delete documents from the collection.
487
+
488
+ Args:
489
+ ids (List[ItemID]): A list of document IDs to delete.
490
+ collection_name (str): The name of the collection to delete.
491
+
492
+ Returns:
493
+ None
494
+ """
495
+ if collection_name:
496
+ self.name = collection_name
497
+ cursor = self.client.cursor()
498
+ id_placeholders = ", ".join(["%s" for _ in ids])
499
+ cursor.execute(f"DELETE FROM {self.name} WHERE id IN ({id_placeholders});", ids)
500
+ cursor.close()
501
+
502
+ def delete_collection(self, collection_name: str | None = None) -> None:
503
+ """Delete the entire collection.
504
+
505
+ Args:
506
+ collection_name (Optional[str]): The name of the collection to delete.
507
+
508
+ Returns:
509
+ None
510
+ """
511
+ if collection_name:
512
+ self.name = collection_name
513
+ cursor = self.client.cursor()
514
+ cursor.execute(f"DROP TABLE IF EXISTS {self.name}")
515
+ cursor.close()
516
+
517
+ def create_collection(self, collection_name: str | None = None, dimension: str | int | None = None) -> None:
518
+ """Create a new collection.
519
+
520
+ Args:
521
+ collection_name (Optional[str]): The name of the new collection.
522
+ dimension (Optional[Union[str, int]]): The dimension size of the sentence embedding model
523
+
524
+ Returns:
525
+ None
526
+ """
527
+ if collection_name:
528
+ self.name = collection_name
529
+
530
+ if dimension:
531
+ self.dimension = dimension
532
+ elif self.dimension is None:
533
+ self.dimension = 384
534
+
535
+ cursor = self.client.cursor()
536
+ cursor.execute(
537
+ f"CREATE TABLE {self.name} ("
538
+ f"documents text, id CHAR(8) PRIMARY KEY, metadatas JSONB, embedding vector({self.dimension}));"
539
+ f"CREATE INDEX "
540
+ f"ON {self.name} USING hnsw (embedding vector_l2_ops) WITH (m = {self.metadata['hnsw:M']}, "
541
+ f"ef_construction = {self.metadata['hnsw:construction_ef']});"
542
+ f"CREATE INDEX "
543
+ f"ON {self.name} USING hnsw (embedding vector_cosine_ops) WITH (m = {self.metadata['hnsw:M']}, "
544
+ f"ef_construction = {self.metadata['hnsw:construction_ef']});"
545
+ f"CREATE INDEX "
546
+ f"ON {self.name} USING hnsw (embedding vector_ip_ops) WITH (m = {self.metadata['hnsw:M']}, "
547
+ f"ef_construction = {self.metadata['hnsw:construction_ef']});"
548
+ )
549
+ cursor.close()
550
+
551
+
552
+ @require_optional_import(["pgvector", "psycopg", "sentence_transformers"], "retrievechat-pgvector")
553
+ class PGVectorDB(VectorDB):
554
+ """A vector database that uses PGVector as the backend."""
555
+
556
+ def __init__(
557
+ self,
558
+ *,
559
+ conn: Optional["psycopg.Connection"] = None,
560
+ connection_string: str | None = None,
561
+ host: str | None = None,
562
+ port: int | str | None = None,
563
+ dbname: str | None = None,
564
+ username: str | None = None,
565
+ password: str | None = None,
566
+ connect_timeout: int | None = 10,
567
+ embedding_function: Callable = None,
568
+ metadata: dict[str, Any] | None = None,
569
+ ) -> None:
570
+ """Initialize the vector database.
571
+
572
+ Note: connection_string or host + port + dbname must be specified
573
+
574
+ Args:
575
+ conn: psycopg.Connection | A customer connection object to connect to the database.
576
+ A connection object may include additional key/values:
577
+ https://www.postgresql.org/docs/current/libpq-connect.html#LIBPQ-CONNSTRING
578
+ connection_string: "postgresql://username:password@hostname:port/database" | The PGVector connection string. Default is None.
579
+ host: str | The host to connect to. Default is None.
580
+ port: int | The port to connect to. Default is None.
581
+ dbname: str | The database name to connect to. Default is None.
582
+ username: str | The database username to use. Default is None.
583
+ password: str | The database user password to use. Default is None.
584
+ connect_timeout: int | The timeout to set for the connection. Default is 10.
585
+ embedding_function: Callable | The embedding function used to generate the vector representation.
586
+ Default is None. SentenceTransformer("all-MiniLM-L6-v2").encode will be used when None.
587
+ Models can be chosen from:
588
+ https://huggingface.co/models?library=sentence-transformers
589
+ metadata: dict | The metadata of the vector database. Default is None. If None, it will use this
590
+ setting: `{"hnsw:space": "ip", "hnsw:construction_ef": 30, "hnsw:M": 16}`. Creates Index on table
591
+ using hnsw (embedding vector_l2_ops) WITH (m = hnsw:M) ef_construction = "hnsw:construction_ef".
592
+ For more info: https://github.com/pgvector/pgvector?tab=readme-ov-file#hnsw
593
+ Returns:
594
+ None
595
+ """
596
+ self.client = self.establish_connection(
597
+ conn=conn,
598
+ connection_string=connection_string,
599
+ host=host,
600
+ port=port,
601
+ dbname=dbname,
602
+ username=username,
603
+ password=password,
604
+ connect_timeout=connect_timeout,
605
+ )
606
+ if embedding_function:
607
+ self.embedding_function = embedding_function
608
+ else:
609
+ self.embedding_function = SentenceTransformer("all-MiniLM-L6-v2").encode
610
+ self.metadata = metadata
611
+ register_vector(self.client)
612
+ self.active_collection = None
613
+
614
+ def establish_connection(
615
+ self,
616
+ conn: Optional["psycopg.Connection"] = None,
617
+ connection_string: str | None = None,
618
+ host: str | None = None,
619
+ port: int | str | None = None,
620
+ dbname: str | None = None,
621
+ username: str | None = None,
622
+ password: str | None = None,
623
+ connect_timeout: int | None = 10,
624
+ ) -> "psycopg.Connection":
625
+ """Establishes a connection to a PostgreSQL database using psycopg.
626
+
627
+ Args:
628
+ conn: An existing psycopg connection object. If provided, this connection will be used.
629
+ connection_string: A string containing the connection information. If provided, a new connection will be established using this string.
630
+ host: The hostname of the PostgreSQL server. Used if connection_string is not provided.
631
+ port: The port number to connect to at the server host. Used if connection_string is not provided.
632
+ dbname: The database name. Used if connection_string is not provided.
633
+ username: The username to connect as. Used if connection_string is not provided.
634
+ password: The user's password. Used if connection_string is not provided.
635
+ connect_timeout: Maximum wait for connection, in seconds. The default is 10 seconds.
636
+
637
+ Returns:
638
+ A psycopg.Connection object representing the established connection.
639
+
640
+ Raises:
641
+ PermissionError if no credentials are supplied
642
+ psycopg.Error: If an error occurs while trying to connect to the database.
643
+ """
644
+ try:
645
+ if conn:
646
+ self.client = conn
647
+ elif connection_string:
648
+ parsed_connection = urllib.parse.urlparse(connection_string)
649
+ encoded_username = urllib.parse.quote(parsed_connection.username, safe="")
650
+ encoded_password = urllib.parse.quote(parsed_connection.password, safe="")
651
+ encoded_password = f":{encoded_password}@"
652
+ encoded_host = urllib.parse.quote(parsed_connection.hostname, safe="")
653
+ encoded_port = f":{parsed_connection.port}"
654
+ encoded_database = urllib.parse.quote(parsed_connection.path[1:], safe="")
655
+ connection_string_encoded = (
656
+ f"{parsed_connection.scheme}://{encoded_username}{encoded_password}"
657
+ f"{encoded_host}{encoded_port}/{encoded_database}"
658
+ )
659
+ self.client = psycopg.connect(conninfo=connection_string_encoded, autocommit=True)
660
+ elif host:
661
+ connection_string = ""
662
+ if host:
663
+ encoded_host = urllib.parse.quote(host, safe="")
664
+ connection_string += f"host={encoded_host} "
665
+ if port:
666
+ connection_string += f"port={port} "
667
+ if dbname:
668
+ encoded_database = urllib.parse.quote(dbname, safe="")
669
+ connection_string += f"dbname={encoded_database} "
670
+ if username:
671
+ encoded_username = urllib.parse.quote(username, safe="")
672
+ connection_string += f"user={encoded_username} "
673
+ if password:
674
+ encoded_password = urllib.parse.quote(password, safe="")
675
+ connection_string += f"password={encoded_password} "
676
+
677
+ self.client = psycopg.connect(
678
+ conninfo=connection_string,
679
+ connect_timeout=connect_timeout,
680
+ autocommit=True,
681
+ )
682
+ else:
683
+ logger.error("Credentials were not supplied...")
684
+ raise PermissionError
685
+ self.client.execute("CREATE EXTENSION IF NOT EXISTS vector")
686
+ except psycopg.Error as e:
687
+ logger.error("Error connecting to the database: ", e)
688
+ raise e
689
+ return self.client
690
+
691
+ def create_collection(
692
+ self, collection_name: str, overwrite: bool = False, get_or_create: bool = True
693
+ ) -> Collection:
694
+ """Create a collection in the vector database.
695
+ Case 1. if the collection does not exist, create the collection.
696
+ Case 2. the collection exists, if overwrite is True, it will overwrite the collection.
697
+ Case 3. the collection exists and overwrite is False, if get_or_create is True, it will get the collection,
698
+ otherwise it raise a ValueError.
699
+
700
+ Args:
701
+ collection_name: str | The name of the collection.
702
+ overwrite: bool | Whether to overwrite the collection if it exists. Default is False.
703
+ get_or_create: bool | Whether to get the collection if it exists. Default is True.
704
+
705
+ Returns:
706
+ Collection | The collection object.
707
+ """
708
+ try:
709
+ if self.active_collection and self.active_collection.name == collection_name:
710
+ collection = self.active_collection
711
+ else:
712
+ collection = self.get_collection(collection_name)
713
+ except ValueError:
714
+ collection = None
715
+ if collection is None:
716
+ collection = Collection(
717
+ client=self.client,
718
+ collection_name=collection_name,
719
+ embedding_function=self.embedding_function,
720
+ get_or_create=get_or_create,
721
+ metadata=self.metadata,
722
+ )
723
+ collection.set_collection_name(collection_name=collection_name)
724
+ collection.create_collection(collection_name=collection_name)
725
+ return collection
726
+ elif overwrite:
727
+ self.delete_collection(collection_name)
728
+ collection = Collection(
729
+ client=self.client,
730
+ collection_name=collection_name,
731
+ embedding_function=self.embedding_function,
732
+ get_or_create=get_or_create,
733
+ metadata=self.metadata,
734
+ )
735
+ collection.set_collection_name(collection_name=collection_name)
736
+ collection.create_collection(collection_name=collection_name)
737
+ return collection
738
+ elif get_or_create:
739
+ return collection
740
+ elif not collection.table_exists(table_name=collection_name):
741
+ collection = Collection(
742
+ client=self.client,
743
+ collection_name=collection_name,
744
+ embedding_function=self.embedding_function,
745
+ get_or_create=get_or_create,
746
+ metadata=self.metadata,
747
+ )
748
+ collection.set_collection_name(collection_name=collection_name)
749
+ collection.create_collection(collection_name=collection_name)
750
+ return collection
751
+ else:
752
+ raise ValueError(f"Collection {collection_name} already exists.")
753
+
754
+ def get_collection(self, collection_name: str = None) -> Collection:
755
+ """Get the collection from the vector database.
756
+
757
+ Args:
758
+ collection_name: str | The name of the collection. Default is None. If None, return the
759
+ current active collection.
760
+
761
+ Returns:
762
+ Collection | The collection object.
763
+ """
764
+ if collection_name is None:
765
+ if self.active_collection is None:
766
+ raise ValueError("No collection is specified.")
767
+ else:
768
+ logger.debug(
769
+ f"No collection is specified. Using current active collection {self.active_collection.name}."
770
+ )
771
+ else:
772
+ if not (self.active_collection and self.active_collection.name == collection_name):
773
+ self.active_collection = Collection(
774
+ client=self.client,
775
+ collection_name=collection_name,
776
+ embedding_function=self.embedding_function,
777
+ )
778
+ return self.active_collection
779
+
780
+ def delete_collection(self, collection_name: str) -> None:
781
+ """Delete the collection from the vector database.
782
+
783
+ Args:
784
+ collection_name: str | The name of the collection.
785
+
786
+ Returns:
787
+ None
788
+ """
789
+ if self.active_collection:
790
+ self.active_collection.delete_collection(collection_name)
791
+ else:
792
+ collection = self.get_collection(collection_name)
793
+ collection.delete_collection(collection_name)
794
+ if self.active_collection and self.active_collection.name == collection_name:
795
+ self.active_collection = None
796
+
797
+ def _batch_insert(
798
+ self, collection: Collection, embeddings=None, ids=None, metadatas=None, documents=None, upsert=False
799
+ ) -> None:
800
+ batch_size = int(PGVECTOR_MAX_BATCH_SIZE)
801
+ default_metadata = {"hnsw:space": "ip", "hnsw:construction_ef": 32, "hnsw:M": 16}
802
+ default_metadatas = [default_metadata] * min(batch_size, len(documents))
803
+ for i in range(0, len(documents), min(batch_size, len(documents))):
804
+ end_idx = i + min(batch_size, len(documents) - i)
805
+ collection_kwargs = {
806
+ "documents": documents[i:end_idx],
807
+ "ids": ids[i:end_idx],
808
+ "metadatas": metadatas[i:end_idx] if metadatas else default_metadatas,
809
+ "embeddings": embeddings[i:end_idx] if embeddings else None,
810
+ }
811
+ if upsert:
812
+ collection.upsert(**collection_kwargs)
813
+ else:
814
+ collection.add(**collection_kwargs)
815
+
816
+ def insert_docs(self, docs: list[Document], collection_name: str = None, upsert: bool = False) -> None:
817
+ """Insert documents into the collection of the vector database.
818
+
819
+ Args:
820
+ docs: List[Document] | A list of documents. Each document is a TypedDict `Document`.
821
+ collection_name: str | The name of the collection. Default is None.
822
+ upsert: bool | Whether to update the document if it exists. Default is False.
823
+ kwargs: Dict | Additional keyword arguments.
824
+
825
+ Returns:
826
+ None
827
+ """
828
+ if not docs:
829
+ return
830
+ if docs[0].get("content") is None:
831
+ raise ValueError("The document content is required.")
832
+ if docs[0].get("id") is None:
833
+ raise ValueError("The document id is required.")
834
+ documents = [doc.get("content") for doc in docs]
835
+ ids = [doc.get("id") for doc in docs]
836
+
837
+ collection = self.get_collection(collection_name)
838
+ if docs[0].get("embedding") is None:
839
+ logger.debug(
840
+ "No content embedding is provided. "
841
+ "Will use the VectorDB's embedding function to generate the content embedding."
842
+ )
843
+ embeddings = None
844
+ else:
845
+ embeddings = [doc.get("embedding") for doc in docs]
846
+ metadatas = None if docs[0].get("metadata") is None else [doc.get("metadata") for doc in docs]
847
+
848
+ self._batch_insert(collection, embeddings, ids, metadatas, documents, upsert)
849
+
850
+ def update_docs(self, docs: list[Document], collection_name: str = None) -> None:
851
+ """Update documents in the collection of the vector database.
852
+
853
+ Args:
854
+ docs: List[Document] | A list of documents.
855
+ collection_name: str | The name of the collection. Default is None.
856
+
857
+ Returns:
858
+ None
859
+ """
860
+ self.insert_docs(docs, collection_name, upsert=True)
861
+
862
+ def delete_docs(self, ids: list[ItemID], collection_name: str = None) -> None:
863
+ """Delete documents from the collection of the vector database.
864
+
865
+ Args:
866
+ ids: List[ItemID] | A list of document ids. Each id is a typed `ItemID`.
867
+ collection_name: str | The name of the collection. Default is None.
868
+ kwargs: Dict | Additional keyword arguments.
869
+
870
+ Returns:
871
+ None
872
+ """
873
+ collection = self.get_collection(collection_name)
874
+ collection.delete(ids=ids, collection_name=collection_name)
875
+
876
+ def retrieve_docs(
877
+ self,
878
+ queries: list[str],
879
+ collection_name: str = None,
880
+ n_results: int = 10,
881
+ distance_threshold: float = -1,
882
+ ) -> QueryResults:
883
+ """Retrieve documents from the collection of the vector database based on the queries.
884
+
885
+ Args:
886
+ queries: List[str] | A list of queries. Each query is a string.
887
+ collection_name: str | The name of the collection. Default is None.
888
+ n_results: int | The number of relevant documents to return. Default is 10.
889
+ distance_threshold: float | The threshold for the distance score, only distance smaller than it will be
890
+ returned. Don't filter with it if `< 0`. Default is -1.
891
+ kwargs: Dict | Additional keyword arguments.
892
+
893
+ Returns:
894
+ QueryResults | The query results. Each query result is a list of list of tuples containing the document and
895
+ the distance.
896
+ """
897
+ collection = self.get_collection(collection_name)
898
+ if isinstance(queries, str):
899
+ queries = [queries]
900
+ results = collection.query(
901
+ query_texts=queries,
902
+ n_results=n_results,
903
+ distance_threshold=distance_threshold,
904
+ )
905
+ logger.debug(f"Retrieve Docs Results:\n{results}")
906
+ return results
907
+
908
+ def get_docs_by_ids(
909
+ self, ids: list[ItemID] = None, collection_name: str = None, include=None, **kwargs
910
+ ) -> list[Document]:
911
+ """Retrieve documents from the collection of the vector database based on the ids.
912
+
913
+ Args:
914
+ ids: List[ItemID] | A list of document ids. If None, will return all the documents. Default is None.
915
+ collection_name: str | The name of the collection. Default is None.
916
+ include: List[str] | The fields to include. Default is None.
917
+ If None, will include ["metadatas", "documents"], ids will always be included.
918
+ kwargs: dict | Additional keyword arguments.
919
+
920
+ Returns:
921
+ List[Document] | The results.
922
+ """
923
+ collection = self.get_collection(collection_name)
924
+ include = include if include else ["metadatas", "documents"]
925
+ results = collection.get(ids, include=include, **kwargs)
926
+ logger.debug(f"Retrieve Documents by ID Results:\n{results}")
927
+ return results