ag2 0.10.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (423) hide show
  1. ag2-0.10.2.dist-info/METADATA +819 -0
  2. ag2-0.10.2.dist-info/RECORD +423 -0
  3. ag2-0.10.2.dist-info/WHEEL +4 -0
  4. ag2-0.10.2.dist-info/licenses/LICENSE +201 -0
  5. ag2-0.10.2.dist-info/licenses/NOTICE.md +19 -0
  6. autogen/__init__.py +88 -0
  7. autogen/_website/__init__.py +3 -0
  8. autogen/_website/generate_api_references.py +426 -0
  9. autogen/_website/generate_mkdocs.py +1216 -0
  10. autogen/_website/notebook_processor.py +475 -0
  11. autogen/_website/process_notebooks.py +656 -0
  12. autogen/_website/utils.py +413 -0
  13. autogen/a2a/__init__.py +36 -0
  14. autogen/a2a/agent_executor.py +86 -0
  15. autogen/a2a/client.py +357 -0
  16. autogen/a2a/errors.py +18 -0
  17. autogen/a2a/httpx_client_factory.py +79 -0
  18. autogen/a2a/server.py +221 -0
  19. autogen/a2a/utils.py +207 -0
  20. autogen/agentchat/__init__.py +47 -0
  21. autogen/agentchat/agent.py +180 -0
  22. autogen/agentchat/assistant_agent.py +86 -0
  23. autogen/agentchat/chat.py +325 -0
  24. autogen/agentchat/contrib/__init__.py +5 -0
  25. autogen/agentchat/contrib/agent_eval/README.md +7 -0
  26. autogen/agentchat/contrib/agent_eval/agent_eval.py +108 -0
  27. autogen/agentchat/contrib/agent_eval/criterion.py +43 -0
  28. autogen/agentchat/contrib/agent_eval/critic_agent.py +44 -0
  29. autogen/agentchat/contrib/agent_eval/quantifier_agent.py +39 -0
  30. autogen/agentchat/contrib/agent_eval/subcritic_agent.py +45 -0
  31. autogen/agentchat/contrib/agent_eval/task.py +42 -0
  32. autogen/agentchat/contrib/agent_optimizer.py +432 -0
  33. autogen/agentchat/contrib/capabilities/__init__.py +5 -0
  34. autogen/agentchat/contrib/capabilities/agent_capability.py +20 -0
  35. autogen/agentchat/contrib/capabilities/generate_images.py +301 -0
  36. autogen/agentchat/contrib/capabilities/teachability.py +393 -0
  37. autogen/agentchat/contrib/capabilities/text_compressors.py +66 -0
  38. autogen/agentchat/contrib/capabilities/tools_capability.py +22 -0
  39. autogen/agentchat/contrib/capabilities/transform_messages.py +93 -0
  40. autogen/agentchat/contrib/capabilities/transforms.py +578 -0
  41. autogen/agentchat/contrib/capabilities/transforms_util.py +122 -0
  42. autogen/agentchat/contrib/capabilities/vision_capability.py +215 -0
  43. autogen/agentchat/contrib/captainagent/__init__.py +9 -0
  44. autogen/agentchat/contrib/captainagent/agent_builder.py +790 -0
  45. autogen/agentchat/contrib/captainagent/captainagent.py +514 -0
  46. autogen/agentchat/contrib/captainagent/tool_retriever.py +334 -0
  47. autogen/agentchat/contrib/captainagent/tools/README.md +44 -0
  48. autogen/agentchat/contrib/captainagent/tools/__init__.py +5 -0
  49. autogen/agentchat/contrib/captainagent/tools/data_analysis/calculate_correlation.py +40 -0
  50. autogen/agentchat/contrib/captainagent/tools/data_analysis/calculate_skewness_and_kurtosis.py +28 -0
  51. autogen/agentchat/contrib/captainagent/tools/data_analysis/detect_outlier_iqr.py +28 -0
  52. autogen/agentchat/contrib/captainagent/tools/data_analysis/detect_outlier_zscore.py +28 -0
  53. autogen/agentchat/contrib/captainagent/tools/data_analysis/explore_csv.py +21 -0
  54. autogen/agentchat/contrib/captainagent/tools/data_analysis/shapiro_wilk_test.py +30 -0
  55. autogen/agentchat/contrib/captainagent/tools/information_retrieval/arxiv_download.py +27 -0
  56. autogen/agentchat/contrib/captainagent/tools/information_retrieval/arxiv_search.py +53 -0
  57. autogen/agentchat/contrib/captainagent/tools/information_retrieval/extract_pdf_image.py +53 -0
  58. autogen/agentchat/contrib/captainagent/tools/information_retrieval/extract_pdf_text.py +38 -0
  59. autogen/agentchat/contrib/captainagent/tools/information_retrieval/get_wikipedia_text.py +21 -0
  60. autogen/agentchat/contrib/captainagent/tools/information_retrieval/get_youtube_caption.py +34 -0
  61. autogen/agentchat/contrib/captainagent/tools/information_retrieval/image_qa.py +60 -0
  62. autogen/agentchat/contrib/captainagent/tools/information_retrieval/optical_character_recognition.py +61 -0
  63. autogen/agentchat/contrib/captainagent/tools/information_retrieval/perform_web_search.py +47 -0
  64. autogen/agentchat/contrib/captainagent/tools/information_retrieval/scrape_wikipedia_tables.py +33 -0
  65. autogen/agentchat/contrib/captainagent/tools/information_retrieval/transcribe_audio_file.py +21 -0
  66. autogen/agentchat/contrib/captainagent/tools/information_retrieval/youtube_download.py +35 -0
  67. autogen/agentchat/contrib/captainagent/tools/math/calculate_circle_area_from_diameter.py +21 -0
  68. autogen/agentchat/contrib/captainagent/tools/math/calculate_day_of_the_week.py +18 -0
  69. autogen/agentchat/contrib/captainagent/tools/math/calculate_fraction_sum.py +28 -0
  70. autogen/agentchat/contrib/captainagent/tools/math/calculate_matrix_power.py +31 -0
  71. autogen/agentchat/contrib/captainagent/tools/math/calculate_reflected_point.py +16 -0
  72. autogen/agentchat/contrib/captainagent/tools/math/complex_numbers_product.py +25 -0
  73. autogen/agentchat/contrib/captainagent/tools/math/compute_currency_conversion.py +23 -0
  74. autogen/agentchat/contrib/captainagent/tools/math/count_distinct_permutations.py +27 -0
  75. autogen/agentchat/contrib/captainagent/tools/math/evaluate_expression.py +28 -0
  76. autogen/agentchat/contrib/captainagent/tools/math/find_continuity_point.py +34 -0
  77. autogen/agentchat/contrib/captainagent/tools/math/fraction_to_mixed_numbers.py +39 -0
  78. autogen/agentchat/contrib/captainagent/tools/math/modular_inverse_sum.py +23 -0
  79. autogen/agentchat/contrib/captainagent/tools/math/simplify_mixed_numbers.py +36 -0
  80. autogen/agentchat/contrib/captainagent/tools/math/sum_of_digit_factorials.py +15 -0
  81. autogen/agentchat/contrib/captainagent/tools/math/sum_of_primes_below.py +15 -0
  82. autogen/agentchat/contrib/captainagent/tools/requirements.txt +10 -0
  83. autogen/agentchat/contrib/captainagent/tools/tool_description.tsv +34 -0
  84. autogen/agentchat/contrib/gpt_assistant_agent.py +526 -0
  85. autogen/agentchat/contrib/graph_rag/__init__.py +9 -0
  86. autogen/agentchat/contrib/graph_rag/document.py +29 -0
  87. autogen/agentchat/contrib/graph_rag/falkor_graph_query_engine.py +167 -0
  88. autogen/agentchat/contrib/graph_rag/falkor_graph_rag_capability.py +103 -0
  89. autogen/agentchat/contrib/graph_rag/graph_query_engine.py +53 -0
  90. autogen/agentchat/contrib/graph_rag/graph_rag_capability.py +63 -0
  91. autogen/agentchat/contrib/graph_rag/neo4j_graph_query_engine.py +263 -0
  92. autogen/agentchat/contrib/graph_rag/neo4j_graph_rag_capability.py +83 -0
  93. autogen/agentchat/contrib/graph_rag/neo4j_native_graph_query_engine.py +210 -0
  94. autogen/agentchat/contrib/graph_rag/neo4j_native_graph_rag_capability.py +93 -0
  95. autogen/agentchat/contrib/img_utils.py +397 -0
  96. autogen/agentchat/contrib/llamaindex_conversable_agent.py +117 -0
  97. autogen/agentchat/contrib/llava_agent.py +189 -0
  98. autogen/agentchat/contrib/math_user_proxy_agent.py +464 -0
  99. autogen/agentchat/contrib/multimodal_conversable_agent.py +125 -0
  100. autogen/agentchat/contrib/qdrant_retrieve_user_proxy_agent.py +325 -0
  101. autogen/agentchat/contrib/rag/__init__.py +10 -0
  102. autogen/agentchat/contrib/rag/chromadb_query_engine.py +268 -0
  103. autogen/agentchat/contrib/rag/llamaindex_query_engine.py +195 -0
  104. autogen/agentchat/contrib/rag/mongodb_query_engine.py +319 -0
  105. autogen/agentchat/contrib/rag/query_engine.py +76 -0
  106. autogen/agentchat/contrib/retrieve_assistant_agent.py +59 -0
  107. autogen/agentchat/contrib/retrieve_user_proxy_agent.py +704 -0
  108. autogen/agentchat/contrib/society_of_mind_agent.py +200 -0
  109. autogen/agentchat/contrib/swarm_agent.py +1404 -0
  110. autogen/agentchat/contrib/text_analyzer_agent.py +79 -0
  111. autogen/agentchat/contrib/vectordb/__init__.py +5 -0
  112. autogen/agentchat/contrib/vectordb/base.py +224 -0
  113. autogen/agentchat/contrib/vectordb/chromadb.py +316 -0
  114. autogen/agentchat/contrib/vectordb/couchbase.py +405 -0
  115. autogen/agentchat/contrib/vectordb/mongodb.py +551 -0
  116. autogen/agentchat/contrib/vectordb/pgvectordb.py +927 -0
  117. autogen/agentchat/contrib/vectordb/qdrant.py +320 -0
  118. autogen/agentchat/contrib/vectordb/utils.py +126 -0
  119. autogen/agentchat/contrib/web_surfer.py +304 -0
  120. autogen/agentchat/conversable_agent.py +4307 -0
  121. autogen/agentchat/group/__init__.py +67 -0
  122. autogen/agentchat/group/available_condition.py +91 -0
  123. autogen/agentchat/group/context_condition.py +77 -0
  124. autogen/agentchat/group/context_expression.py +238 -0
  125. autogen/agentchat/group/context_str.py +39 -0
  126. autogen/agentchat/group/context_variables.py +182 -0
  127. autogen/agentchat/group/events/transition_events.py +111 -0
  128. autogen/agentchat/group/group_tool_executor.py +324 -0
  129. autogen/agentchat/group/group_utils.py +659 -0
  130. autogen/agentchat/group/guardrails.py +179 -0
  131. autogen/agentchat/group/handoffs.py +303 -0
  132. autogen/agentchat/group/llm_condition.py +93 -0
  133. autogen/agentchat/group/multi_agent_chat.py +291 -0
  134. autogen/agentchat/group/on_condition.py +55 -0
  135. autogen/agentchat/group/on_context_condition.py +51 -0
  136. autogen/agentchat/group/patterns/__init__.py +18 -0
  137. autogen/agentchat/group/patterns/auto.py +160 -0
  138. autogen/agentchat/group/patterns/manual.py +177 -0
  139. autogen/agentchat/group/patterns/pattern.py +295 -0
  140. autogen/agentchat/group/patterns/random.py +106 -0
  141. autogen/agentchat/group/patterns/round_robin.py +117 -0
  142. autogen/agentchat/group/reply_result.py +24 -0
  143. autogen/agentchat/group/safeguards/__init__.py +21 -0
  144. autogen/agentchat/group/safeguards/api.py +241 -0
  145. autogen/agentchat/group/safeguards/enforcer.py +1158 -0
  146. autogen/agentchat/group/safeguards/events.py +140 -0
  147. autogen/agentchat/group/safeguards/validator.py +435 -0
  148. autogen/agentchat/group/speaker_selection_result.py +41 -0
  149. autogen/agentchat/group/targets/__init__.py +4 -0
  150. autogen/agentchat/group/targets/function_target.py +245 -0
  151. autogen/agentchat/group/targets/group_chat_target.py +133 -0
  152. autogen/agentchat/group/targets/group_manager_target.py +151 -0
  153. autogen/agentchat/group/targets/transition_target.py +424 -0
  154. autogen/agentchat/group/targets/transition_utils.py +6 -0
  155. autogen/agentchat/groupchat.py +1832 -0
  156. autogen/agentchat/realtime/__init__.py +3 -0
  157. autogen/agentchat/realtime/experimental/__init__.py +20 -0
  158. autogen/agentchat/realtime/experimental/audio_adapters/__init__.py +8 -0
  159. autogen/agentchat/realtime/experimental/audio_adapters/twilio_audio_adapter.py +148 -0
  160. autogen/agentchat/realtime/experimental/audio_adapters/websocket_audio_adapter.py +139 -0
  161. autogen/agentchat/realtime/experimental/audio_observer.py +42 -0
  162. autogen/agentchat/realtime/experimental/clients/__init__.py +15 -0
  163. autogen/agentchat/realtime/experimental/clients/gemini/__init__.py +7 -0
  164. autogen/agentchat/realtime/experimental/clients/gemini/client.py +274 -0
  165. autogen/agentchat/realtime/experimental/clients/oai/__init__.py +8 -0
  166. autogen/agentchat/realtime/experimental/clients/oai/base_client.py +220 -0
  167. autogen/agentchat/realtime/experimental/clients/oai/rtc_client.py +243 -0
  168. autogen/agentchat/realtime/experimental/clients/oai/utils.py +48 -0
  169. autogen/agentchat/realtime/experimental/clients/realtime_client.py +191 -0
  170. autogen/agentchat/realtime/experimental/function_observer.py +84 -0
  171. autogen/agentchat/realtime/experimental/realtime_agent.py +158 -0
  172. autogen/agentchat/realtime/experimental/realtime_events.py +42 -0
  173. autogen/agentchat/realtime/experimental/realtime_observer.py +100 -0
  174. autogen/agentchat/realtime/experimental/realtime_swarm.py +533 -0
  175. autogen/agentchat/realtime/experimental/websockets.py +21 -0
  176. autogen/agentchat/realtime_agent/__init__.py +21 -0
  177. autogen/agentchat/user_proxy_agent.py +114 -0
  178. autogen/agentchat/utils.py +206 -0
  179. autogen/agents/__init__.py +3 -0
  180. autogen/agents/contrib/__init__.py +10 -0
  181. autogen/agents/contrib/time/__init__.py +8 -0
  182. autogen/agents/contrib/time/time_reply_agent.py +74 -0
  183. autogen/agents/contrib/time/time_tool_agent.py +52 -0
  184. autogen/agents/experimental/__init__.py +27 -0
  185. autogen/agents/experimental/deep_research/__init__.py +7 -0
  186. autogen/agents/experimental/deep_research/deep_research.py +52 -0
  187. autogen/agents/experimental/discord/__init__.py +7 -0
  188. autogen/agents/experimental/discord/discord.py +66 -0
  189. autogen/agents/experimental/document_agent/__init__.py +19 -0
  190. autogen/agents/experimental/document_agent/chroma_query_engine.py +301 -0
  191. autogen/agents/experimental/document_agent/docling_doc_ingest_agent.py +113 -0
  192. autogen/agents/experimental/document_agent/document_agent.py +643 -0
  193. autogen/agents/experimental/document_agent/document_conditions.py +50 -0
  194. autogen/agents/experimental/document_agent/document_utils.py +376 -0
  195. autogen/agents/experimental/document_agent/inmemory_query_engine.py +214 -0
  196. autogen/agents/experimental/document_agent/parser_utils.py +134 -0
  197. autogen/agents/experimental/document_agent/url_utils.py +417 -0
  198. autogen/agents/experimental/reasoning/__init__.py +7 -0
  199. autogen/agents/experimental/reasoning/reasoning_agent.py +1178 -0
  200. autogen/agents/experimental/slack/__init__.py +7 -0
  201. autogen/agents/experimental/slack/slack.py +73 -0
  202. autogen/agents/experimental/telegram/__init__.py +7 -0
  203. autogen/agents/experimental/telegram/telegram.py +76 -0
  204. autogen/agents/experimental/websurfer/__init__.py +7 -0
  205. autogen/agents/experimental/websurfer/websurfer.py +70 -0
  206. autogen/agents/experimental/wikipedia/__init__.py +7 -0
  207. autogen/agents/experimental/wikipedia/wikipedia.py +88 -0
  208. autogen/browser_utils.py +309 -0
  209. autogen/cache/__init__.py +10 -0
  210. autogen/cache/abstract_cache_base.py +71 -0
  211. autogen/cache/cache.py +203 -0
  212. autogen/cache/cache_factory.py +88 -0
  213. autogen/cache/cosmos_db_cache.py +144 -0
  214. autogen/cache/disk_cache.py +97 -0
  215. autogen/cache/in_memory_cache.py +54 -0
  216. autogen/cache/redis_cache.py +119 -0
  217. autogen/code_utils.py +598 -0
  218. autogen/coding/__init__.py +30 -0
  219. autogen/coding/base.py +120 -0
  220. autogen/coding/docker_commandline_code_executor.py +283 -0
  221. autogen/coding/factory.py +56 -0
  222. autogen/coding/func_with_reqs.py +203 -0
  223. autogen/coding/jupyter/__init__.py +23 -0
  224. autogen/coding/jupyter/base.py +36 -0
  225. autogen/coding/jupyter/docker_jupyter_server.py +160 -0
  226. autogen/coding/jupyter/embedded_ipython_code_executor.py +182 -0
  227. autogen/coding/jupyter/import_utils.py +82 -0
  228. autogen/coding/jupyter/jupyter_client.py +224 -0
  229. autogen/coding/jupyter/jupyter_code_executor.py +154 -0
  230. autogen/coding/jupyter/local_jupyter_server.py +164 -0
  231. autogen/coding/local_commandline_code_executor.py +341 -0
  232. autogen/coding/markdown_code_extractor.py +44 -0
  233. autogen/coding/utils.py +55 -0
  234. autogen/coding/yepcode_code_executor.py +197 -0
  235. autogen/doc_utils.py +35 -0
  236. autogen/environments/__init__.py +10 -0
  237. autogen/environments/docker_python_environment.py +365 -0
  238. autogen/environments/python_environment.py +125 -0
  239. autogen/environments/system_python_environment.py +85 -0
  240. autogen/environments/venv_python_environment.py +220 -0
  241. autogen/environments/working_directory.py +74 -0
  242. autogen/events/__init__.py +7 -0
  243. autogen/events/agent_events.py +1016 -0
  244. autogen/events/base_event.py +100 -0
  245. autogen/events/client_events.py +168 -0
  246. autogen/events/helpers.py +44 -0
  247. autogen/events/print_event.py +45 -0
  248. autogen/exception_utils.py +73 -0
  249. autogen/extensions/__init__.py +5 -0
  250. autogen/fast_depends/__init__.py +16 -0
  251. autogen/fast_depends/_compat.py +75 -0
  252. autogen/fast_depends/core/__init__.py +14 -0
  253. autogen/fast_depends/core/build.py +206 -0
  254. autogen/fast_depends/core/model.py +527 -0
  255. autogen/fast_depends/dependencies/__init__.py +15 -0
  256. autogen/fast_depends/dependencies/model.py +30 -0
  257. autogen/fast_depends/dependencies/provider.py +40 -0
  258. autogen/fast_depends/library/__init__.py +10 -0
  259. autogen/fast_depends/library/model.py +46 -0
  260. autogen/fast_depends/py.typed +6 -0
  261. autogen/fast_depends/schema.py +66 -0
  262. autogen/fast_depends/use.py +272 -0
  263. autogen/fast_depends/utils.py +177 -0
  264. autogen/formatting_utils.py +83 -0
  265. autogen/function_utils.py +13 -0
  266. autogen/graph_utils.py +173 -0
  267. autogen/import_utils.py +539 -0
  268. autogen/interop/__init__.py +22 -0
  269. autogen/interop/crewai/__init__.py +7 -0
  270. autogen/interop/crewai/crewai.py +88 -0
  271. autogen/interop/interoperability.py +71 -0
  272. autogen/interop/interoperable.py +46 -0
  273. autogen/interop/langchain/__init__.py +8 -0
  274. autogen/interop/langchain/langchain_chat_model_factory.py +156 -0
  275. autogen/interop/langchain/langchain_tool.py +78 -0
  276. autogen/interop/litellm/__init__.py +7 -0
  277. autogen/interop/litellm/litellm_config_factory.py +178 -0
  278. autogen/interop/pydantic_ai/__init__.py +7 -0
  279. autogen/interop/pydantic_ai/pydantic_ai.py +172 -0
  280. autogen/interop/registry.py +70 -0
  281. autogen/io/__init__.py +15 -0
  282. autogen/io/base.py +151 -0
  283. autogen/io/console.py +56 -0
  284. autogen/io/processors/__init__.py +12 -0
  285. autogen/io/processors/base.py +21 -0
  286. autogen/io/processors/console_event_processor.py +61 -0
  287. autogen/io/run_response.py +294 -0
  288. autogen/io/thread_io_stream.py +63 -0
  289. autogen/io/websockets.py +214 -0
  290. autogen/json_utils.py +42 -0
  291. autogen/llm_clients/MIGRATION_TO_V2.md +782 -0
  292. autogen/llm_clients/__init__.py +77 -0
  293. autogen/llm_clients/client_v2.py +122 -0
  294. autogen/llm_clients/models/__init__.py +55 -0
  295. autogen/llm_clients/models/content_blocks.py +389 -0
  296. autogen/llm_clients/models/unified_message.py +145 -0
  297. autogen/llm_clients/models/unified_response.py +83 -0
  298. autogen/llm_clients/openai_completions_client.py +444 -0
  299. autogen/llm_config/__init__.py +11 -0
  300. autogen/llm_config/client.py +59 -0
  301. autogen/llm_config/config.py +461 -0
  302. autogen/llm_config/entry.py +169 -0
  303. autogen/llm_config/types.py +37 -0
  304. autogen/llm_config/utils.py +223 -0
  305. autogen/logger/__init__.py +11 -0
  306. autogen/logger/base_logger.py +129 -0
  307. autogen/logger/file_logger.py +262 -0
  308. autogen/logger/logger_factory.py +42 -0
  309. autogen/logger/logger_utils.py +57 -0
  310. autogen/logger/sqlite_logger.py +524 -0
  311. autogen/math_utils.py +338 -0
  312. autogen/mcp/__init__.py +7 -0
  313. autogen/mcp/__main__.py +78 -0
  314. autogen/mcp/helpers.py +45 -0
  315. autogen/mcp/mcp_client.py +349 -0
  316. autogen/mcp/mcp_proxy/__init__.py +19 -0
  317. autogen/mcp/mcp_proxy/fastapi_code_generator_helpers.py +62 -0
  318. autogen/mcp/mcp_proxy/mcp_proxy.py +577 -0
  319. autogen/mcp/mcp_proxy/operation_grouping.py +166 -0
  320. autogen/mcp/mcp_proxy/operation_renaming.py +110 -0
  321. autogen/mcp/mcp_proxy/patch_fastapi_code_generator.py +98 -0
  322. autogen/mcp/mcp_proxy/security.py +399 -0
  323. autogen/mcp/mcp_proxy/security_schema_visitor.py +37 -0
  324. autogen/messages/__init__.py +7 -0
  325. autogen/messages/agent_messages.py +946 -0
  326. autogen/messages/base_message.py +108 -0
  327. autogen/messages/client_messages.py +172 -0
  328. autogen/messages/print_message.py +48 -0
  329. autogen/oai/__init__.py +61 -0
  330. autogen/oai/anthropic.py +1516 -0
  331. autogen/oai/bedrock.py +800 -0
  332. autogen/oai/cerebras.py +302 -0
  333. autogen/oai/client.py +1658 -0
  334. autogen/oai/client_utils.py +196 -0
  335. autogen/oai/cohere.py +494 -0
  336. autogen/oai/gemini.py +1045 -0
  337. autogen/oai/gemini_types.py +156 -0
  338. autogen/oai/groq.py +319 -0
  339. autogen/oai/mistral.py +311 -0
  340. autogen/oai/oai_models/__init__.py +23 -0
  341. autogen/oai/oai_models/_models.py +16 -0
  342. autogen/oai/oai_models/chat_completion.py +86 -0
  343. autogen/oai/oai_models/chat_completion_audio.py +32 -0
  344. autogen/oai/oai_models/chat_completion_message.py +97 -0
  345. autogen/oai/oai_models/chat_completion_message_tool_call.py +60 -0
  346. autogen/oai/oai_models/chat_completion_token_logprob.py +62 -0
  347. autogen/oai/oai_models/completion_usage.py +59 -0
  348. autogen/oai/ollama.py +657 -0
  349. autogen/oai/openai_responses.py +451 -0
  350. autogen/oai/openai_utils.py +897 -0
  351. autogen/oai/together.py +387 -0
  352. autogen/remote/__init__.py +18 -0
  353. autogen/remote/agent.py +199 -0
  354. autogen/remote/agent_service.py +197 -0
  355. autogen/remote/errors.py +17 -0
  356. autogen/remote/httpx_client_factory.py +131 -0
  357. autogen/remote/protocol.py +37 -0
  358. autogen/remote/retry.py +102 -0
  359. autogen/remote/runtime.py +96 -0
  360. autogen/retrieve_utils.py +490 -0
  361. autogen/runtime_logging.py +161 -0
  362. autogen/testing/__init__.py +12 -0
  363. autogen/testing/messages.py +45 -0
  364. autogen/testing/test_agent.py +111 -0
  365. autogen/token_count_utils.py +280 -0
  366. autogen/tools/__init__.py +20 -0
  367. autogen/tools/contrib/__init__.py +9 -0
  368. autogen/tools/contrib/time/__init__.py +7 -0
  369. autogen/tools/contrib/time/time.py +40 -0
  370. autogen/tools/dependency_injection.py +249 -0
  371. autogen/tools/experimental/__init__.py +54 -0
  372. autogen/tools/experimental/browser_use/__init__.py +7 -0
  373. autogen/tools/experimental/browser_use/browser_use.py +154 -0
  374. autogen/tools/experimental/code_execution/__init__.py +7 -0
  375. autogen/tools/experimental/code_execution/python_code_execution.py +86 -0
  376. autogen/tools/experimental/crawl4ai/__init__.py +7 -0
  377. autogen/tools/experimental/crawl4ai/crawl4ai.py +150 -0
  378. autogen/tools/experimental/deep_research/__init__.py +7 -0
  379. autogen/tools/experimental/deep_research/deep_research.py +329 -0
  380. autogen/tools/experimental/duckduckgo/__init__.py +7 -0
  381. autogen/tools/experimental/duckduckgo/duckduckgo_search.py +103 -0
  382. autogen/tools/experimental/firecrawl/__init__.py +7 -0
  383. autogen/tools/experimental/firecrawl/firecrawl_tool.py +836 -0
  384. autogen/tools/experimental/google/__init__.py +14 -0
  385. autogen/tools/experimental/google/authentication/__init__.py +11 -0
  386. autogen/tools/experimental/google/authentication/credentials_hosted_provider.py +43 -0
  387. autogen/tools/experimental/google/authentication/credentials_local_provider.py +91 -0
  388. autogen/tools/experimental/google/authentication/credentials_provider.py +35 -0
  389. autogen/tools/experimental/google/drive/__init__.py +9 -0
  390. autogen/tools/experimental/google/drive/drive_functions.py +124 -0
  391. autogen/tools/experimental/google/drive/toolkit.py +88 -0
  392. autogen/tools/experimental/google/model.py +17 -0
  393. autogen/tools/experimental/google/toolkit_protocol.py +19 -0
  394. autogen/tools/experimental/google_search/__init__.py +8 -0
  395. autogen/tools/experimental/google_search/google_search.py +93 -0
  396. autogen/tools/experimental/google_search/youtube_search.py +181 -0
  397. autogen/tools/experimental/messageplatform/__init__.py +17 -0
  398. autogen/tools/experimental/messageplatform/discord/__init__.py +7 -0
  399. autogen/tools/experimental/messageplatform/discord/discord.py +284 -0
  400. autogen/tools/experimental/messageplatform/slack/__init__.py +7 -0
  401. autogen/tools/experimental/messageplatform/slack/slack.py +385 -0
  402. autogen/tools/experimental/messageplatform/telegram/__init__.py +7 -0
  403. autogen/tools/experimental/messageplatform/telegram/telegram.py +271 -0
  404. autogen/tools/experimental/perplexity/__init__.py +7 -0
  405. autogen/tools/experimental/perplexity/perplexity_search.py +249 -0
  406. autogen/tools/experimental/reliable/__init__.py +10 -0
  407. autogen/tools/experimental/reliable/reliable.py +1311 -0
  408. autogen/tools/experimental/searxng/__init__.py +7 -0
  409. autogen/tools/experimental/searxng/searxng_search.py +142 -0
  410. autogen/tools/experimental/tavily/__init__.py +7 -0
  411. autogen/tools/experimental/tavily/tavily_search.py +176 -0
  412. autogen/tools/experimental/web_search_preview/__init__.py +7 -0
  413. autogen/tools/experimental/web_search_preview/web_search_preview.py +120 -0
  414. autogen/tools/experimental/wikipedia/__init__.py +7 -0
  415. autogen/tools/experimental/wikipedia/wikipedia.py +284 -0
  416. autogen/tools/function_utils.py +412 -0
  417. autogen/tools/tool.py +188 -0
  418. autogen/tools/toolkit.py +86 -0
  419. autogen/types.py +29 -0
  420. autogen/version.py +7 -0
  421. templates/client_template/main.jinja2 +72 -0
  422. templates/config_template/config.jinja2 +7 -0
  423. templates/main.jinja2 +61 -0
@@ -0,0 +1,704 @@
1
+ # Copyright (c) 2023 - 2025, AG2ai, Inc., AG2ai open-source projects maintainers and core contributors
2
+ #
3
+ # SPDX-License-Identifier: Apache-2.0
4
+ #
5
+ # Portions derived from https://github.com/microsoft/autogen are under the MIT License.
6
+ # SPDX-License-Identifier: MIT
7
+ import hashlib
8
+ import os
9
+ import re
10
+ import uuid
11
+ from collections.abc import Callable
12
+ from typing import Any, Literal
13
+
14
+ from ...code_utils import extract_code
15
+ from ...formatting_utils import colored
16
+ from ...import_utils import optional_import_block, require_optional_import
17
+ from ...retrieve_utils import (
18
+ TEXT_FORMATS,
19
+ create_vector_db_from_dir,
20
+ get_files_from_dir,
21
+ query_vector_db,
22
+ split_files_to_chunks,
23
+ )
24
+ from ...token_count_utils import count_token
25
+ from .. import UserProxyAgent
26
+ from ..agent import Agent
27
+ from ..contrib.vectordb.base import Document, QueryResults, VectorDB, VectorDBFactory
28
+ from ..contrib.vectordb.utils import (
29
+ chroma_results_to_query_results,
30
+ filter_results_by_distance,
31
+ get_logger,
32
+ )
33
+
34
+ __all__ = ["RetrieveUserProxyAgent"]
35
+
36
+ with optional_import_block():
37
+ import chromadb
38
+ from IPython import get_ipython
39
+
40
+ logger = get_logger(__name__)
41
+
42
+ PROMPT_DEFAULT = """You're a retrieve augmented chatbot. You answer user's questions based on your own knowledge and the
43
+ context provided by the user. You should follow the following steps to answer a question:
44
+ Step 1, you estimate the user's intent based on the question and context. The intent can be a code generation task or
45
+ a question answering task.
46
+ Step 2, you reply based on the intent.
47
+ If you can't answer the question with or without the current context, you should reply exactly `UPDATE CONTEXT`.
48
+ If user's intent is code generation, you must obey the following rules:
49
+ Rule 1. You MUST NOT install any packages because all the packages needed are already installed.
50
+ Rule 2. You must follow the formats below to write your code:
51
+ ```language
52
+ # your code
53
+ ```
54
+
55
+ If user's intent is question answering, you must give as short an answer as possible.
56
+
57
+ User's question is: {input_question}
58
+
59
+ Context is: {input_context}
60
+
61
+ The source of the context is: {input_sources}
62
+
63
+ If you can answer the question, in the end of your answer, add the source of the context in the format of `Sources: source1, source2, ...`.
64
+ """
65
+
66
+ PROMPT_CODE = """You're a retrieve augmented coding assistant. You answer user's questions based on your own knowledge and the
67
+ context provided by the user.
68
+ If you can't answer the question with or without the current context, you should reply exactly `UPDATE CONTEXT`.
69
+ For code generation, you must obey the following rules:
70
+ Rule 1. You MUST NOT install any packages because all the packages needed are already installed.
71
+ Rule 2. You must follow the formats below to write your code:
72
+ ```language
73
+ # your code
74
+ ```
75
+
76
+ User's question is: {input_question}
77
+
78
+ Context is: {input_context}
79
+ """
80
+
81
+ PROMPT_QA = """You're a retrieve augmented chatbot. You answer user's questions based on your own knowledge and the
82
+ context provided by the user.
83
+ If you can't answer the question with or without the current context, you should reply exactly `UPDATE CONTEXT`.
84
+ You must give as short an answer as possible.
85
+
86
+ User's question is: {input_question}
87
+
88
+ Context is: {input_context}
89
+ """
90
+
91
+ HASH_LENGTH = int(os.environ.get("HASH_LENGTH", 8))
92
+ UPDATE_CONTEXT_IN_PROMPT = "you should reply exactly `UPDATE CONTEXT`"
93
+
94
+
95
+ @require_optional_import(["chromadb", "IPython"], "retrievechat")
96
+ class RetrieveUserProxyAgent(UserProxyAgent):
97
+ """(In preview) The Retrieval-Augmented User Proxy retrieves document chunks based on the embedding
98
+ similarity, and sends them along with the question to the Retrieval-Augmented Assistant
99
+ """
100
+
101
+ def __init__(
102
+ self,
103
+ name="RetrieveChatAgent", # default set to RetrieveChatAgent
104
+ human_input_mode: Literal["ALWAYS", "NEVER", "TERMINATE"] = "ALWAYS",
105
+ is_termination_msg: Callable[[dict[str, Any]], bool] | None = None,
106
+ retrieve_config: dict[str, Any] | None = None, # config for the retrieve agent
107
+ **kwargs: Any,
108
+ ):
109
+ r"""Args:
110
+ name (str): name of the agent.
111
+
112
+ human_input_mode (str): whether to ask for human inputs every time a message is received.
113
+ Possible values are "ALWAYS", "TERMINATE", "NEVER".
114
+ 1. When "ALWAYS", the agent prompts for human input every time a message is received.
115
+ Under this mode, the conversation stops when the human input is "exit",
116
+ or when is_termination_msg is True and there is no human input.
117
+ 2. When "TERMINATE", the agent only prompts for human input only when a termination
118
+ message is received or the number of auto reply reaches
119
+ the max_consecutive_auto_reply.
120
+ 3. When "NEVER", the agent will never prompt for human input. Under this mode, the
121
+ conversation stops when the number of auto reply reaches the
122
+ max_consecutive_auto_reply or when is_termination_msg is True.
123
+
124
+ is_termination_msg (function): a function that takes a message in the form of a dictionary
125
+ and returns a boolean value indicating if this received message is a termination message.
126
+ The dict can contain the following keys: "content", "role", "name", "function_call".
127
+
128
+ retrieve_config (dict or None): config for the retrieve agent.
129
+
130
+ To use default config, set to None. Otherwise, set to a dictionary with the
131
+ following keys:
132
+ - `task` (Optional, str) - the task of the retrieve chat. Possible values are
133
+ "code", "qa" and "default". System prompt will be different for different tasks.
134
+ The default value is `default`, which supports both code and qa, and provides
135
+ source information in the end of the response.
136
+ - `vector_db` (Optional, Union[str, VectorDB]) - the vector db for the retrieve chat.
137
+ If it's a string, it should be the type of the vector db, such as "chroma"; otherwise,
138
+ it should be an instance of the VectorDB protocol. Default is "chroma".
139
+ Set `None` to use the deprecated `client`.
140
+ - `db_config` (Optional, Dict) - the config for the vector db. Default is `{}`. Please make
141
+ sure you understand the config for the vector db you are using, otherwise, leave it as `{}`.
142
+ Only valid when `vector_db` is a string.
143
+ - `client` (Optional, chromadb.Client) - the chromadb client. If key not provided, a
144
+ default client `chromadb.Client()` will be used. If you want to use other
145
+ vector db, extend this class and override the `retrieve_docs` function.
146
+ *[Deprecated]* use `vector_db` instead.
147
+ - `docs_path` (Optional, Union[str, List[str]]) - the path to the docs directory. It
148
+ can also be the path to a single file, the url to a single file or a list
149
+ of directories, files and urls. Default is None, which works only if the
150
+ collection is already created.
151
+ - `extra_docs` (Optional, bool) - when true, allows adding documents with unique IDs
152
+ without overwriting existing ones; when false, it replaces existing documents
153
+ using default IDs, risking collection overwrite., when set to true it enables
154
+ the system to assign unique IDs starting from "length+i" for new document
155
+ chunks, preventing the replacement of existing documents and facilitating the
156
+ addition of more content to the collection..
157
+ By default, "extra_docs" is set to false, starting document IDs from zero.
158
+ This poses a risk as new documents might overwrite existing ones, potentially
159
+ causing unintended loss or alteration of data in the collection.
160
+ *[Deprecated]* use `new_docs` when use `vector_db` instead of `client`.
161
+ - `new_docs` (Optional, bool) - when True, only adds new documents to the collection;
162
+ when False, updates existing documents and adds new ones. Default is True.
163
+ Document id is used to determine if a document is new or existing. By default, the
164
+ id is the hash value of the content.
165
+ - `model` (Optional, str) - the model to use for the retrieve chat.
166
+ If key not provided, a default model `gpt-4` will be used.
167
+ - `chunk_token_size` (Optional, int) - the chunk token size for the retrieve chat.
168
+ If key not provided, a default size `max_tokens * 0.4` will be used.
169
+ - `context_max_tokens` (Optional, int) - the context max token size for the
170
+ retrieve chat.
171
+ If key not provided, a default size `max_tokens * 0.8` will be used.
172
+ - `chunk_mode` (Optional, str) - the chunk mode for the retrieve chat. Possible values
173
+ are "multi_lines" and "one_line". If key not provided, a default mode
174
+ `multi_lines` will be used.
175
+ - `must_break_at_empty_line` (Optional, bool) - chunk will only break at empty line
176
+ if True. Default is True.
177
+ If chunk_mode is "one_line", this parameter will be ignored.
178
+ - `embedding_model` (Optional, str) - the embedding model to use for the retrieve chat.
179
+ If key not provided, a default model `all-MiniLM-L6-v2` will be used. All available
180
+ models can be found at `https://www.sbert.net/docs/sentence_transformer/pretrained_models.html`.
181
+ The default model is a fast model. If you want to use a high performance model,
182
+ `all-mpnet-base-v2` is recommended.
183
+ *[Deprecated]* no need when use `vector_db` instead of `client`.
184
+ - `embedding_function` (Optional, Callable) - the embedding function for creating the
185
+ vector db. Default is None, SentenceTransformer with the given `embedding_model`
186
+ will be used. If you want to use OpenAI, Cohere, HuggingFace or other embedding
187
+ functions, you can pass it here,
188
+ follow the examples in `https://docs.trychroma.com/embeddings`.
189
+ - `customized_prompt` (Optional, str) - the customized prompt for the retrieve chat.
190
+ Default is None.
191
+ - `customized_answer_prefix` (Optional, str) - the customized answer prefix for the
192
+ retrieve chat. Default is "".
193
+ If not "" and the customized_answer_prefix is not in the answer,
194
+ `Update Context` will be triggered.
195
+ - `update_context` (Optional, bool) - if False, will not apply `Update Context` for
196
+ interactive retrieval. Default is True.
197
+ - `collection_name` (Optional, str) - the name of the collection.
198
+ If key not provided, a default name `ag2-docs` will be used.
199
+ - `get_or_create` (Optional, bool) - Whether to get the collection if it exists. Default is False.
200
+ - `overwrite` (Optional, bool) - Whether to overwrite the collection if it exists. Default is False.
201
+ Case 1. if the collection does not exist, create the collection.
202
+ Case 2. the collection exists, if overwrite is True, it will overwrite the collection.
203
+ Case 3. the collection exists and overwrite is False, if get_or_create is True, it will get the collection,
204
+ otherwise it raise a ValueError.
205
+ - `custom_token_count_function` (Optional, Callable) - a custom function to count the
206
+ number of tokens in a string.
207
+ The function should take (text:str, model:str) as input and return the
208
+ token_count(int). the retrieve_config["model"] will be passed in the function.
209
+ Default is autogen.token_count_utils.count_token that uses tiktoken, which may
210
+ not be accurate for non-OpenAI models.
211
+ - `custom_text_split_function` (Optional, Callable) - a custom function to split a
212
+ string into a list of strings.
213
+ Default is None, will use the default function in
214
+ `autogen.retrieve_utils.split_text_to_chunks`.
215
+ - `custom_text_types` (Optional, List[str]) - a list of file types to be processed.
216
+ Default is `autogen.retrieve_utils.TEXT_FORMATS`.
217
+ This only applies to files under the directories in `docs_path`. Explicitly
218
+ included files and urls will be chunked regardless of their types.
219
+ - `recursive` (Optional, bool) - whether to search documents recursively in the
220
+ docs_path. Default is True.
221
+ - `distance_threshold` (Optional, float) - the threshold for the distance score, only
222
+ distance smaller than it will be returned. Will be ignored if < 0. Default is -1.
223
+
224
+ `**kwargs` (dict): other kwargs in [UserProxyAgent](https://docs.ag2.ai/latest/docs/api-reference/autogen/UserProxyAgent).
225
+
226
+ Example:
227
+ Example of overriding retrieve_docs - If you have set up a customized vector db, and it's
228
+ not compatible with chromadb, you can easily plug in it with below code.
229
+ *[Deprecated]* use `vector_db` instead. You can extend VectorDB and pass it to the agent.
230
+ ```python
231
+ class MyRetrieveUserProxyAgent(RetrieveUserProxyAgent):
232
+ def query_vector_db(
233
+ self,
234
+ query_texts: List[str],
235
+ n_results: int = 10,
236
+ search_string: str = "",
237
+ **kwargs: Any,
238
+ ) -> Dict[str, Union[List[str], List[List[str]]]]:
239
+ # define your own query function here
240
+ pass
241
+
242
+ def retrieve_docs(self, problem: str, n_results: int = 20, search_string: str = "", **kwargs):
243
+ results = self.query_vector_db(
244
+ query_texts=[problem],
245
+ n_results=n_results,
246
+ search_string=search_string,
247
+ **kwargs: Any,
248
+ )
249
+
250
+ self._results = results
251
+ print("doc_ids: ", results["ids"])
252
+ ```
253
+ """
254
+ super().__init__(
255
+ name=name,
256
+ human_input_mode=human_input_mode,
257
+ **kwargs,
258
+ )
259
+
260
+ self._retrieve_config = {} if retrieve_config is None else retrieve_config
261
+ self._task = self._retrieve_config.get("task", "default")
262
+ self._vector_db = self._retrieve_config.get("vector_db", "chroma")
263
+ self._db_config = self._retrieve_config.get("db_config", {})
264
+ self._docs_path = self._retrieve_config.get("docs_path", None)
265
+ self._extra_docs = self._retrieve_config.get("extra_docs", False)
266
+ self._new_docs = self._retrieve_config.get("new_docs", True)
267
+ self._collection_name = self._retrieve_config.get("collection_name", "ag2-docs")
268
+ if "docs_path" not in self._retrieve_config:
269
+ logger.warning(
270
+ "docs_path is not provided in retrieve_config. "
271
+ f"Will raise ValueError if the collection `{self._collection_name}` doesn't exist. "
272
+ "Set docs_path to None to suppress this warning."
273
+ )
274
+ self._model = self._retrieve_config.get("model", "gpt-4")
275
+ self._max_tokens = self.get_max_tokens(self._model)
276
+ self._chunk_token_size = int(self._retrieve_config.get("chunk_token_size", self._max_tokens * 0.4))
277
+ self._chunk_mode = self._retrieve_config.get("chunk_mode", "multi_lines")
278
+ self._must_break_at_empty_line = self._retrieve_config.get("must_break_at_empty_line", True)
279
+ self._embedding_model = self._retrieve_config.get("embedding_model", "all-MiniLM-L6-v2")
280
+ self._embedding_function = self._retrieve_config.get("embedding_function", None)
281
+ self.customized_prompt = self._retrieve_config.get("customized_prompt", None)
282
+ self.customized_answer_prefix = self._retrieve_config.get("customized_answer_prefix", "").upper()
283
+ self.update_context = self._retrieve_config.get("update_context", True)
284
+ self._get_or_create = self._retrieve_config.get("get_or_create", False) if self._docs_path is not None else True
285
+ self._overwrite = self._retrieve_config.get("overwrite", False)
286
+ self.custom_token_count_function = self._retrieve_config.get("custom_token_count_function", count_token)
287
+ self.custom_text_split_function = self._retrieve_config.get("custom_text_split_function", None)
288
+ self._custom_text_types = self._retrieve_config.get("custom_text_types", TEXT_FORMATS)
289
+ self._recursive = self._retrieve_config.get("recursive", True)
290
+ self._context_max_tokens = self._retrieve_config.get("context_max_tokens", self._max_tokens * 0.8)
291
+ self._collection = self._docs_path is None # whether the collection is created
292
+ self._ipython = get_ipython()
293
+ self._doc_idx = -1 # the index of the current used doc
294
+ self._results = [] # the results of the current query
295
+ self._intermediate_answers = set() # the intermediate answers
296
+ self._doc_contents = [] # the contents of the current used doc
297
+ self._doc_ids = [] # the ids of the current used doc
298
+ self._current_docs_in_context = [] # the ids of the current context sources
299
+ self._search_string = "" # the search string used in the current query
300
+ self._distance_threshold = self._retrieve_config.get("distance_threshold", -1)
301
+ # update the termination message function
302
+ self._is_termination_msg = (
303
+ self._is_termination_msg_retrievechat if is_termination_msg is None else is_termination_msg
304
+ )
305
+ if isinstance(self._vector_db, str):
306
+ if not isinstance(self._db_config, dict):
307
+ raise ValueError("`db_config` should be a dictionary.")
308
+ if "embedding_function" in self._retrieve_config:
309
+ self._db_config["embedding_function"] = self._embedding_function
310
+ self._vector_db = VectorDBFactory.create_vector_db(db_type=self._vector_db, **self._db_config)
311
+ self._client = self._retrieve_config.get("client", None)
312
+ if self._client is None and hasattr(self._vector_db, "client"):
313
+ # Since the client arg is deprecated, let's check
314
+ # if the `vector_db` instance has a 'client' attribute.
315
+ self._client = getattr(self._vector_db, "client", None)
316
+ if self._client is None:
317
+ self._client = chromadb.Client()
318
+ self.register_reply(Agent, RetrieveUserProxyAgent._generate_retrieve_user_reply, position=2)
319
+ self.register_hook(
320
+ hookable_method="process_message_before_send",
321
+ hook=self._check_update_context_before_send,
322
+ )
323
+
324
+ def _init_db(self):
325
+ if not self._vector_db:
326
+ return
327
+
328
+ is_to_chunk = False # whether to chunk the raw files
329
+ if self._new_docs:
330
+ is_to_chunk = True
331
+ if not self._docs_path:
332
+ try:
333
+ self._vector_db.get_collection(self._collection_name)
334
+ logger.warning(f"`docs_path` is not provided. Use the existing collection `{self._collection_name}`.")
335
+ self._overwrite = False
336
+ self._get_or_create = True
337
+ is_to_chunk = False
338
+ except ValueError:
339
+ raise ValueError(
340
+ "`docs_path` is not provided. "
341
+ f"The collection `{self._collection_name}` doesn't exist either. "
342
+ "Please provide `docs_path` or create the collection first."
343
+ )
344
+ elif self._get_or_create and not self._overwrite:
345
+ try:
346
+ self._vector_db.get_collection(self._collection_name)
347
+ logger.info(f"Use the existing collection `{self._collection_name}`.", color="green")
348
+ except ValueError:
349
+ is_to_chunk = True
350
+ else:
351
+ is_to_chunk = True
352
+
353
+ self._vector_db.active_collection = self._vector_db.create_collection(
354
+ self._collection_name, overwrite=self._overwrite, get_or_create=self._get_or_create
355
+ )
356
+
357
+ docs = None
358
+ if is_to_chunk:
359
+ if self.custom_text_split_function is not None:
360
+ chunks, sources = split_files_to_chunks(
361
+ get_files_from_dir(self._docs_path, self._custom_text_types, self._recursive),
362
+ custom_text_split_function=self.custom_text_split_function,
363
+ )
364
+ else:
365
+ chunks, sources = split_files_to_chunks(
366
+ get_files_from_dir(self._docs_path, self._custom_text_types, self._recursive),
367
+ self._chunk_token_size,
368
+ self._chunk_mode,
369
+ self._must_break_at_empty_line,
370
+ )
371
+ logger.info(f"Found {len(chunks)} chunks.")
372
+
373
+ if self._new_docs:
374
+ all_docs_ids = {
375
+ doc["id"]
376
+ for doc in self._vector_db.get_docs_by_ids(ids=None, collection_name=self._collection_name)
377
+ }
378
+ else:
379
+ all_docs_ids = set()
380
+
381
+ chunk_ids = (
382
+ [hashlib.blake2b(chunk.encode("utf-8")).hexdigest()[:HASH_LENGTH] for chunk in chunks]
383
+ if self._vector_db.type != "qdrant"
384
+ else [str(uuid.UUID(hex=hashlib.md5(chunk.encode("utf-8")).hexdigest())) for chunk in chunks]
385
+ )
386
+ chunk_ids_set = set(chunk_ids)
387
+ chunk_ids_set_idx = [chunk_ids.index(hash_value) for hash_value in chunk_ids_set]
388
+ docs = [
389
+ Document(id=chunk_ids[idx], content=chunks[idx], metadata=sources[idx])
390
+ for idx in chunk_ids_set_idx
391
+ if chunk_ids[idx] not in all_docs_ids
392
+ ]
393
+
394
+ self._vector_db.insert_docs(docs=docs, collection_name=self._collection_name, upsert=True)
395
+
396
+ def _is_termination_msg_retrievechat(self, message):
397
+ """Check if a message is a termination message.
398
+ For code generation, terminate when no code block is detected. Currently only detect python code blocks.
399
+ For question answering, terminate when don't update context, i.e., answer is given.
400
+ """
401
+ if isinstance(message, dict):
402
+ message = message.get("content")
403
+ if message is None:
404
+ return False
405
+ cb = extract_code(message)
406
+ contain_code = False
407
+ for c in cb:
408
+ # todo: support more languages
409
+ if c[0] == "python":
410
+ contain_code = True
411
+ break
412
+ update_context_case1, update_context_case2 = self._check_update_context(message)
413
+ return not (contain_code or update_context_case1 or update_context_case2)
414
+
415
+ def _check_update_context_before_send(self, sender, message, recipient, silent):
416
+ if not isinstance(message, (str, dict)):
417
+ return message
418
+ elif isinstance(message, dict):
419
+ msg_text = message.get("content", message)
420
+ else:
421
+ msg_text = message
422
+
423
+ if msg_text.strip().upper() == "UPDATE CONTEXT":
424
+ doc_contents = self._get_context(self._results)
425
+
426
+ # Always use self.problem as the query text to retrieve docs, but each time we replace the context with the
427
+ # next similar docs in the retrieved doc results.
428
+ if not doc_contents:
429
+ for _tmp_retrieve_count in range(1, 5):
430
+ self._reset(intermediate=True)
431
+ self.retrieve_docs(
432
+ self.problem, self.n_results * (2 * _tmp_retrieve_count + 1), self._search_string
433
+ )
434
+ doc_contents = self._get_context(self._results)
435
+ if doc_contents or self.n_results * (2 * _tmp_retrieve_count + 1) >= len(self._results[0]):
436
+ break
437
+ msg_text = self._generate_message(doc_contents, task=self._task)
438
+
439
+ if isinstance(message, dict):
440
+ message["content"] = msg_text
441
+ return message
442
+
443
+ @staticmethod
444
+ def get_max_tokens(model="gpt-3.5-turbo"):
445
+ if "32k" in model:
446
+ return 32000
447
+ elif "16k" in model:
448
+ return 16000
449
+ elif "gpt-4" in model:
450
+ return 8000
451
+ else:
452
+ return 4000
453
+
454
+ def _reset(self, intermediate=False):
455
+ self._doc_idx = -1 # the index of the current used doc
456
+ self._results = [] # the results of the current query
457
+ if not intermediate:
458
+ self._intermediate_answers = set() # the intermediate answers
459
+ self._doc_contents = [] # the contents of the current used doc
460
+ self._doc_ids = [] # the ids of the current used doc
461
+
462
+ def _get_context(self, results: QueryResults):
463
+ doc_contents = ""
464
+ self._current_docs_in_context = []
465
+ current_tokens = 0
466
+ _doc_idx = self._doc_idx
467
+ _tmp_retrieve_count = 0
468
+ for idx, doc in enumerate(results[0]):
469
+ doc = doc[0]
470
+ if idx <= _doc_idx:
471
+ continue
472
+ if doc["id"] in self._doc_ids:
473
+ continue
474
+ _doc_tokens = self.custom_token_count_function(doc["content"], self._model)
475
+ if _doc_tokens > self._context_max_tokens:
476
+ func_print = f"Skip doc_id {doc['id']} as it is too long to fit in the context."
477
+ print(colored(func_print, "green"), flush=True)
478
+ self._doc_idx = idx
479
+ continue
480
+ if current_tokens + _doc_tokens > self._context_max_tokens:
481
+ break
482
+ func_print = f"Adding content of doc {doc['id']} to context."
483
+ print(colored(func_print, "green"), flush=True)
484
+ current_tokens += _doc_tokens
485
+ doc_contents += doc["content"] + "\n"
486
+ _metadata = doc.get("metadata")
487
+ if isinstance(_metadata, dict):
488
+ self._current_docs_in_context.append(_metadata.get("source", ""))
489
+ self._doc_idx = idx
490
+ self._doc_ids.append(doc["id"])
491
+ self._doc_contents.append(doc["content"])
492
+ _tmp_retrieve_count += 1
493
+ if _tmp_retrieve_count >= self.n_results:
494
+ break
495
+ return doc_contents
496
+
497
+ def _generate_message(self, doc_contents, task="default"):
498
+ if not doc_contents:
499
+ print(colored("No more context, will terminate.", "green"), flush=True)
500
+ return "TERMINATE"
501
+ if self.customized_prompt:
502
+ message = self.customized_prompt.format(input_question=self.problem, input_context=doc_contents)
503
+ elif task.upper() == "CODE":
504
+ message = PROMPT_CODE.format(input_question=self.problem, input_context=doc_contents)
505
+ elif task.upper() == "QA":
506
+ message = PROMPT_QA.format(input_question=self.problem, input_context=doc_contents)
507
+ elif task.upper() == "DEFAULT":
508
+ message = PROMPT_DEFAULT.format(
509
+ input_question=self.problem, input_context=doc_contents, input_sources=self._current_docs_in_context
510
+ )
511
+ else:
512
+ raise NotImplementedError(f"task {task} is not implemented.")
513
+ return message
514
+
515
+ def _check_update_context(self, message):
516
+ if isinstance(message, dict):
517
+ message = message.get("content", "")
518
+ elif not isinstance(message, str):
519
+ message = ""
520
+ update_context_case1 = "UPDATE CONTEXT" in message.upper() and UPDATE_CONTEXT_IN_PROMPT not in message
521
+ update_context_case2 = self.customized_answer_prefix and self.customized_answer_prefix not in message.upper()
522
+ return update_context_case1, update_context_case2
523
+
524
+ def _generate_retrieve_user_reply(
525
+ self,
526
+ messages: list[dict[str, Any]] | None = None,
527
+ sender: Agent | None = None,
528
+ config: Any | None = None,
529
+ ) -> tuple[bool, str | dict[str, Any] | None]:
530
+ """In this function, we will update the context and reset the conversation based on different conditions.
531
+ We'll update the context and reset the conversation if update_context is True and either of the following:
532
+ (1) the last message contains "UPDATE CONTEXT",
533
+ (2) the last message doesn't contain "UPDATE CONTEXT" and the customized_answer_prefix is not in the message.
534
+ """
535
+ if config is None:
536
+ config = self
537
+ if messages is None:
538
+ messages = self._oai_messages[sender]
539
+ message = messages[-1]
540
+ update_context_case1, update_context_case2 = self._check_update_context(message)
541
+ if (update_context_case1 or update_context_case2) and self.update_context:
542
+ print(colored("Updating context and resetting conversation.", "green"), flush=True)
543
+ # extract the first sentence in the response as the intermediate answer
544
+ _message = message.get("content", "").split("\n")[0].strip()
545
+ _intermediate_info = re.split(r"(?<=[.!?])\s+", _message)
546
+ self._intermediate_answers.add(_intermediate_info[0])
547
+
548
+ if update_context_case1:
549
+ # try to get more context from the current retrieved doc results because the results may be too long to fit
550
+ # in the LLM context.
551
+ doc_contents = self._get_context(self._results)
552
+
553
+ # Always use self.problem as the query text to retrieve docs, but each time we replace the context with the
554
+ # next similar docs in the retrieved doc results.
555
+ if not doc_contents:
556
+ for _tmp_retrieve_count in range(1, 5):
557
+ self._reset(intermediate=True)
558
+ self.retrieve_docs(
559
+ self.problem, self.n_results * (2 * _tmp_retrieve_count + 1), self._search_string
560
+ )
561
+ doc_contents = self._get_context(self._results)
562
+ if doc_contents or self.n_results * (2 * _tmp_retrieve_count + 1) >= len(self._results[0]):
563
+ break
564
+ elif update_context_case2:
565
+ # Use the current intermediate info as the query text to retrieve docs, and each time we append the top similar
566
+ # docs in the retrieved doc results to the context.
567
+ for _tmp_retrieve_count in range(5):
568
+ self._reset(intermediate=True)
569
+ self.retrieve_docs(
570
+ _intermediate_info[0], self.n_results * (2 * _tmp_retrieve_count + 1), self._search_string
571
+ )
572
+ self._get_context(self._results)
573
+ doc_contents = "\n".join(self._doc_contents) # + "\n" + "\n".join(self._intermediate_answers)
574
+ if doc_contents or self.n_results * (2 * _tmp_retrieve_count + 1) >= len(self._results[0]):
575
+ break
576
+
577
+ self.clear_history()
578
+ sender.clear_history()
579
+ return True, self._generate_message(doc_contents, task=self._task)
580
+ else:
581
+ return False, None
582
+
583
+ def retrieve_docs(self, problem: str, n_results: int = 20, search_string: str = ""):
584
+ """Retrieve docs based on the given problem and assign the results to the class property `_results`.
585
+ The retrieved docs should be type of `QueryResults` which is a list of tuples containing the document and
586
+ the distance.
587
+
588
+ Args:
589
+ problem (str): the problem to be solved.
590
+ n_results (int): the number of results to be retrieved. Default is 20.
591
+ search_string (str): only docs that contain an exact match of this string will be retrieved. Default is "".
592
+ Not used if the vector_db doesn't support it.
593
+
594
+ Returns:
595
+ None.
596
+ """
597
+ if isinstance(self._vector_db, VectorDB):
598
+ if not self._collection or not self._get_or_create:
599
+ print("Trying to create collection.")
600
+ self._init_db()
601
+ self._collection = True
602
+ self._get_or_create = True
603
+
604
+ kwargs = {}
605
+ if hasattr(self._vector_db, "type") and self._vector_db.type == "chroma":
606
+ kwargs["where_document"] = {"$contains": search_string} if search_string else None
607
+ results = self._vector_db.retrieve_docs(
608
+ queries=[problem],
609
+ n_results=n_results,
610
+ collection_name=self._collection_name,
611
+ distance_threshold=self._distance_threshold,
612
+ **kwargs,
613
+ )
614
+ self._search_string = search_string
615
+ self._results = results
616
+ print("VectorDB returns doc_ids: ", [[r[0]["id"] for r in rr] for rr in results])
617
+ return
618
+
619
+ if not self._collection or not self._get_or_create:
620
+ print("Trying to create collection.")
621
+ self._client = create_vector_db_from_dir(
622
+ dir_path=self._docs_path,
623
+ max_tokens=self._chunk_token_size,
624
+ client=self._client,
625
+ collection_name=self._collection_name,
626
+ chunk_mode=self._chunk_mode,
627
+ must_break_at_empty_line=self._must_break_at_empty_line,
628
+ embedding_model=self._embedding_model,
629
+ get_or_create=self._get_or_create,
630
+ embedding_function=self._embedding_function,
631
+ custom_text_split_function=self.custom_text_split_function,
632
+ custom_text_types=self._custom_text_types,
633
+ recursive=self._recursive,
634
+ extra_docs=self._extra_docs,
635
+ )
636
+ self._collection = True
637
+ self._get_or_create = True
638
+
639
+ results = query_vector_db(
640
+ query_texts=[problem],
641
+ n_results=n_results,
642
+ search_string=search_string,
643
+ client=self._client,
644
+ collection_name=self._collection_name,
645
+ embedding_model=self._embedding_model,
646
+ embedding_function=self._embedding_function,
647
+ )
648
+ results["contents"] = results.pop("documents")
649
+ results = chroma_results_to_query_results(results, "distances")
650
+ results = filter_results_by_distance(results, self._distance_threshold)
651
+
652
+ self._search_string = search_string
653
+ self._results = results
654
+ print("doc_ids: ", [[r[0]["id"] for r in rr] for rr in results])
655
+
656
+ @staticmethod
657
+ def message_generator(sender, recipient, context):
658
+ """Generate an initial message with the given context for the RetrieveUserProxyAgent.
659
+
660
+ Args:
661
+ sender (Agent): the sender agent. It should be the instance of RetrieveUserProxyAgent.
662
+ recipient (Agent): the recipient agent. Usually it's the assistant agent.
663
+ context (dict): the context for the message generation. It should contain the following keys:
664
+ - `problem` (str) - the problem to be solved.
665
+ - `n_results` (int) - the number of results to be retrieved. Default is 20.
666
+ - `search_string` (str) - only docs that contain an exact match of this string will be retrieved. Default is "".
667
+
668
+ Returns:
669
+ str: the generated message ready to be sent to the recipient agent.
670
+ """
671
+ sender._reset()
672
+
673
+ problem = context.get("problem", "")
674
+ n_results = context.get("n_results", 20)
675
+ search_string = context.get("search_string", "")
676
+
677
+ sender.retrieve_docs(problem, n_results, search_string)
678
+ sender.problem = problem
679
+ sender.n_results = n_results
680
+ doc_contents = sender._get_context(sender._results)
681
+ message = sender._generate_message(doc_contents, sender._task)
682
+ return message
683
+
684
+ def run_code(self, code, **kwargs):
685
+ lang = kwargs.get("lang")
686
+ if code.startswith("!") or code.startswith("pip") or lang in ["bash", "shell", "sh"]:
687
+ return (
688
+ 0,
689
+ "You MUST NOT install any packages because all the packages needed are already installed.",
690
+ None,
691
+ )
692
+ if self._ipython is None or lang != "python":
693
+ return super().run_code(code, **kwargs)
694
+ else:
695
+ result = self._ipython.run_cell(code)
696
+ log = str(result.result)
697
+ exitcode = 0 if result.success else 1
698
+ if result.error_before_exec is not None:
699
+ log += f"\n{result.error_before_exec}"
700
+ exitcode = 1
701
+ if result.error_in_exec is not None:
702
+ log += f"\n{result.error_in_exec}"
703
+ exitcode = 1
704
+ return exitcode, log, None