PyPI - ag2 - Versions diffs - 0.9.1__py3-none-any.whl → 0.9.1.post0__py3-none-any.whl - Mend

ag2 0.9.1py3-none-any.whl → 0.9.1.post0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of ag2 might be problematic. Click here for more details.

Files changed (357) hide show

{ag2-0.9.1.dist-info → ag2-0.9.1.post0.dist-info}/METADATA +264 -73
ag2-0.9.1.post0.dist-info/RECORD +392 -0
{ag2-0.9.1.dist-info → ag2-0.9.1.post0.dist-info}/WHEEL +1 -2
autogen/__init__.py +89 -0
autogen/_website/__init__.py +3 -0
autogen/_website/generate_api_references.py +427 -0
autogen/_website/generate_mkdocs.py +1174 -0
autogen/_website/notebook_processor.py +476 -0
autogen/_website/process_notebooks.py +656 -0
autogen/_website/utils.py +412 -0
autogen/agentchat/__init__.py +44 -0
autogen/agentchat/agent.py +182 -0
autogen/agentchat/assistant_agent.py +85 -0
autogen/agentchat/chat.py +309 -0
autogen/agentchat/contrib/__init__.py +5 -0
autogen/agentchat/contrib/agent_eval/README.md +7 -0
autogen/agentchat/contrib/agent_eval/agent_eval.py +108 -0
autogen/agentchat/contrib/agent_eval/criterion.py +43 -0
autogen/agentchat/contrib/agent_eval/critic_agent.py +44 -0
autogen/agentchat/contrib/agent_eval/quantifier_agent.py +39 -0
autogen/agentchat/contrib/agent_eval/subcritic_agent.py +45 -0
autogen/agentchat/contrib/agent_eval/task.py +42 -0
autogen/agentchat/contrib/agent_optimizer.py +429 -0
autogen/agentchat/contrib/capabilities/__init__.py +5 -0
autogen/agentchat/contrib/capabilities/agent_capability.py +20 -0
autogen/agentchat/contrib/capabilities/generate_images.py +301 -0
autogen/agentchat/contrib/capabilities/teachability.py +393 -0
autogen/agentchat/contrib/capabilities/text_compressors.py +66 -0
autogen/agentchat/contrib/capabilities/tools_capability.py +22 -0
autogen/agentchat/contrib/capabilities/transform_messages.py +93 -0
autogen/agentchat/contrib/capabilities/transforms.py +566 -0
autogen/agentchat/contrib/capabilities/transforms_util.py +122 -0
autogen/agentchat/contrib/capabilities/vision_capability.py +214 -0
autogen/agentchat/contrib/captainagent/__init__.py +9 -0
autogen/agentchat/contrib/captainagent/agent_builder.py +790 -0
autogen/agentchat/contrib/captainagent/captainagent.py +512 -0
autogen/agentchat/contrib/captainagent/tool_retriever.py +335 -0
autogen/agentchat/contrib/captainagent/tools/README.md +44 -0
autogen/agentchat/contrib/captainagent/tools/__init__.py +5 -0
autogen/agentchat/contrib/captainagent/tools/data_analysis/calculate_correlation.py +40 -0
autogen/agentchat/contrib/captainagent/tools/data_analysis/calculate_skewness_and_kurtosis.py +28 -0
autogen/agentchat/contrib/captainagent/tools/data_analysis/detect_outlier_iqr.py +28 -0
autogen/agentchat/contrib/captainagent/tools/data_analysis/detect_outlier_zscore.py +28 -0
autogen/agentchat/contrib/captainagent/tools/data_analysis/explore_csv.py +21 -0
autogen/agentchat/contrib/captainagent/tools/data_analysis/shapiro_wilk_test.py +30 -0
autogen/agentchat/contrib/captainagent/tools/information_retrieval/arxiv_download.py +27 -0
autogen/agentchat/contrib/captainagent/tools/information_retrieval/arxiv_search.py +53 -0
autogen/agentchat/contrib/captainagent/tools/information_retrieval/extract_pdf_image.py +53 -0
autogen/agentchat/contrib/captainagent/tools/information_retrieval/extract_pdf_text.py +38 -0
autogen/agentchat/contrib/captainagent/tools/information_retrieval/get_wikipedia_text.py +21 -0
autogen/agentchat/contrib/captainagent/tools/information_retrieval/get_youtube_caption.py +34 -0
autogen/agentchat/contrib/captainagent/tools/information_retrieval/image_qa.py +60 -0
autogen/agentchat/contrib/captainagent/tools/information_retrieval/optical_character_recognition.py +61 -0
autogen/agentchat/contrib/captainagent/tools/information_retrieval/perform_web_search.py +47 -0
autogen/agentchat/contrib/captainagent/tools/information_retrieval/scrape_wikipedia_tables.py +33 -0
autogen/agentchat/contrib/captainagent/tools/information_retrieval/transcribe_audio_file.py +21 -0
autogen/agentchat/contrib/captainagent/tools/information_retrieval/youtube_download.py +35 -0
autogen/agentchat/contrib/captainagent/tools/math/calculate_circle_area_from_diameter.py +21 -0
autogen/agentchat/contrib/captainagent/tools/math/calculate_day_of_the_week.py +18 -0
autogen/agentchat/contrib/captainagent/tools/math/calculate_fraction_sum.py +28 -0
autogen/agentchat/contrib/captainagent/tools/math/calculate_matrix_power.py +31 -0
autogen/agentchat/contrib/captainagent/tools/math/calculate_reflected_point.py +16 -0
autogen/agentchat/contrib/captainagent/tools/math/complex_numbers_product.py +25 -0
autogen/agentchat/contrib/captainagent/tools/math/compute_currency_conversion.py +23 -0
autogen/agentchat/contrib/captainagent/tools/math/count_distinct_permutations.py +27 -0
autogen/agentchat/contrib/captainagent/tools/math/evaluate_expression.py +28 -0
autogen/agentchat/contrib/captainagent/tools/math/find_continuity_point.py +34 -0
autogen/agentchat/contrib/captainagent/tools/math/fraction_to_mixed_numbers.py +39 -0
autogen/agentchat/contrib/captainagent/tools/math/modular_inverse_sum.py +23 -0
autogen/agentchat/contrib/captainagent/tools/math/simplify_mixed_numbers.py +36 -0
autogen/agentchat/contrib/captainagent/tools/math/sum_of_digit_factorials.py +15 -0
autogen/agentchat/contrib/captainagent/tools/math/sum_of_primes_below.py +15 -0
autogen/agentchat/contrib/captainagent/tools/requirements.txt +10 -0
autogen/agentchat/contrib/captainagent/tools/tool_description.tsv +34 -0
autogen/agentchat/contrib/gpt_assistant_agent.py +526 -0
autogen/agentchat/contrib/graph_rag/__init__.py +9 -0
autogen/agentchat/contrib/graph_rag/document.py +29 -0
autogen/agentchat/contrib/graph_rag/falkor_graph_query_engine.py +170 -0
autogen/agentchat/contrib/graph_rag/falkor_graph_rag_capability.py +103 -0
autogen/agentchat/contrib/graph_rag/graph_query_engine.py +53 -0
autogen/agentchat/contrib/graph_rag/graph_rag_capability.py +63 -0
autogen/agentchat/contrib/graph_rag/neo4j_graph_query_engine.py +268 -0
autogen/agentchat/contrib/graph_rag/neo4j_graph_rag_capability.py +83 -0
autogen/agentchat/contrib/graph_rag/neo4j_native_graph_query_engine.py +210 -0
autogen/agentchat/contrib/graph_rag/neo4j_native_graph_rag_capability.py +93 -0
autogen/agentchat/contrib/img_utils.py +397 -0
autogen/agentchat/contrib/llamaindex_conversable_agent.py +117 -0
autogen/agentchat/contrib/llava_agent.py +187 -0
autogen/agentchat/contrib/math_user_proxy_agent.py +464 -0
autogen/agentchat/contrib/multimodal_conversable_agent.py +125 -0
autogen/agentchat/contrib/qdrant_retrieve_user_proxy_agent.py +324 -0
autogen/agentchat/contrib/rag/__init__.py +10 -0
autogen/agentchat/contrib/rag/chromadb_query_engine.py +272 -0
autogen/agentchat/contrib/rag/llamaindex_query_engine.py +198 -0
autogen/agentchat/contrib/rag/mongodb_query_engine.py +329 -0
autogen/agentchat/contrib/rag/query_engine.py +74 -0
autogen/agentchat/contrib/retrieve_assistant_agent.py +56 -0
autogen/agentchat/contrib/retrieve_user_proxy_agent.py +703 -0
autogen/agentchat/contrib/society_of_mind_agent.py +199 -0
autogen/agentchat/contrib/swarm_agent.py +1425 -0
autogen/agentchat/contrib/text_analyzer_agent.py +79 -0
autogen/agentchat/contrib/vectordb/__init__.py +5 -0
autogen/agentchat/contrib/vectordb/base.py +232 -0
autogen/agentchat/contrib/vectordb/chromadb.py +315 -0
autogen/agentchat/contrib/vectordb/couchbase.py +407 -0
autogen/agentchat/contrib/vectordb/mongodb.py +550 -0
autogen/agentchat/contrib/vectordb/pgvectordb.py +928 -0
autogen/agentchat/contrib/vectordb/qdrant.py +320 -0
autogen/agentchat/contrib/vectordb/utils.py +126 -0
autogen/agentchat/contrib/web_surfer.py +303 -0
autogen/agentchat/conversable_agent.py +4020 -0
autogen/agentchat/group/__init__.py +64 -0
autogen/agentchat/group/available_condition.py +91 -0
autogen/agentchat/group/context_condition.py +77 -0
autogen/agentchat/group/context_expression.py +238 -0
autogen/agentchat/group/context_str.py +41 -0
autogen/agentchat/group/context_variables.py +192 -0
autogen/agentchat/group/group_tool_executor.py +202 -0
autogen/agentchat/group/group_utils.py +591 -0
autogen/agentchat/group/handoffs.py +244 -0
autogen/agentchat/group/llm_condition.py +93 -0
autogen/agentchat/group/multi_agent_chat.py +237 -0
autogen/agentchat/group/on_condition.py +58 -0
autogen/agentchat/group/on_context_condition.py +54 -0
autogen/agentchat/group/patterns/__init__.py +18 -0
autogen/agentchat/group/patterns/auto.py +159 -0
autogen/agentchat/group/patterns/manual.py +176 -0
autogen/agentchat/group/patterns/pattern.py +288 -0
autogen/agentchat/group/patterns/random.py +106 -0
autogen/agentchat/group/patterns/round_robin.py +117 -0
autogen/agentchat/group/reply_result.py +26 -0
autogen/agentchat/group/speaker_selection_result.py +41 -0
autogen/agentchat/group/targets/__init__.py +4 -0
autogen/agentchat/group/targets/group_chat_target.py +132 -0
autogen/agentchat/group/targets/group_manager_target.py +151 -0
autogen/agentchat/group/targets/transition_target.py +413 -0
autogen/agentchat/group/targets/transition_utils.py +6 -0
autogen/agentchat/groupchat.py +1694 -0
autogen/agentchat/realtime/__init__.py +3 -0
autogen/agentchat/realtime/experimental/__init__.py +20 -0
autogen/agentchat/realtime/experimental/audio_adapters/__init__.py +8 -0
autogen/agentchat/realtime/experimental/audio_adapters/twilio_audio_adapter.py +148 -0
autogen/agentchat/realtime/experimental/audio_adapters/websocket_audio_adapter.py +139 -0
autogen/agentchat/realtime/experimental/audio_observer.py +42 -0
autogen/agentchat/realtime/experimental/clients/__init__.py +15 -0
autogen/agentchat/realtime/experimental/clients/gemini/__init__.py +7 -0
autogen/agentchat/realtime/experimental/clients/gemini/client.py +274 -0
autogen/agentchat/realtime/experimental/clients/oai/__init__.py +8 -0
autogen/agentchat/realtime/experimental/clients/oai/base_client.py +220 -0
autogen/agentchat/realtime/experimental/clients/oai/rtc_client.py +243 -0
autogen/agentchat/realtime/experimental/clients/oai/utils.py +48 -0
autogen/agentchat/realtime/experimental/clients/realtime_client.py +190 -0
autogen/agentchat/realtime/experimental/function_observer.py +85 -0
autogen/agentchat/realtime/experimental/realtime_agent.py +158 -0
autogen/agentchat/realtime/experimental/realtime_events.py +42 -0
autogen/agentchat/realtime/experimental/realtime_observer.py +100 -0
autogen/agentchat/realtime/experimental/realtime_swarm.py +475 -0
autogen/agentchat/realtime/experimental/websockets.py +21 -0
autogen/agentchat/realtime_agent/__init__.py +21 -0
autogen/agentchat/user_proxy_agent.py +111 -0
autogen/agentchat/utils.py +206 -0
autogen/agents/__init__.py +3 -0
autogen/agents/contrib/__init__.py +10 -0
autogen/agents/contrib/time/__init__.py +8 -0
autogen/agents/contrib/time/time_reply_agent.py +73 -0
autogen/agents/contrib/time/time_tool_agent.py +51 -0
autogen/agents/experimental/__init__.py +27 -0
autogen/agents/experimental/deep_research/__init__.py +7 -0
autogen/agents/experimental/deep_research/deep_research.py +52 -0
autogen/agents/experimental/discord/__init__.py +7 -0
autogen/agents/experimental/discord/discord.py +66 -0
autogen/agents/experimental/document_agent/__init__.py +19 -0
autogen/agents/experimental/document_agent/chroma_query_engine.py +316 -0
autogen/agents/experimental/document_agent/docling_doc_ingest_agent.py +118 -0
autogen/agents/experimental/document_agent/document_agent.py +461 -0
autogen/agents/experimental/document_agent/document_conditions.py +50 -0
autogen/agents/experimental/document_agent/document_utils.py +380 -0
autogen/agents/experimental/document_agent/inmemory_query_engine.py +220 -0
autogen/agents/experimental/document_agent/parser_utils.py +130 -0
autogen/agents/experimental/document_agent/url_utils.py +426 -0
autogen/agents/experimental/reasoning/__init__.py +7 -0
autogen/agents/experimental/reasoning/reasoning_agent.py +1178 -0
autogen/agents/experimental/slack/__init__.py +7 -0
autogen/agents/experimental/slack/slack.py +73 -0
autogen/agents/experimental/telegram/__init__.py +7 -0
autogen/agents/experimental/telegram/telegram.py +77 -0
autogen/agents/experimental/websurfer/__init__.py +7 -0
autogen/agents/experimental/websurfer/websurfer.py +62 -0
autogen/agents/experimental/wikipedia/__init__.py +7 -0
autogen/agents/experimental/wikipedia/wikipedia.py +90 -0
autogen/browser_utils.py +309 -0
autogen/cache/__init__.py +10 -0
autogen/cache/abstract_cache_base.py +75 -0
autogen/cache/cache.py +203 -0
autogen/cache/cache_factory.py +88 -0
autogen/cache/cosmos_db_cache.py +144 -0
autogen/cache/disk_cache.py +102 -0
autogen/cache/in_memory_cache.py +58 -0
autogen/cache/redis_cache.py +123 -0
autogen/code_utils.py +596 -0
autogen/coding/__init__.py +22 -0
autogen/coding/base.py +119 -0
autogen/coding/docker_commandline_code_executor.py +268 -0
autogen/coding/factory.py +47 -0
autogen/coding/func_with_reqs.py +202 -0
autogen/coding/jupyter/__init__.py +23 -0
autogen/coding/jupyter/base.py +36 -0
autogen/coding/jupyter/docker_jupyter_server.py +167 -0
autogen/coding/jupyter/embedded_ipython_code_executor.py +182 -0
autogen/coding/jupyter/import_utils.py +82 -0
autogen/coding/jupyter/jupyter_client.py +231 -0
autogen/coding/jupyter/jupyter_code_executor.py +160 -0
autogen/coding/jupyter/local_jupyter_server.py +172 -0
autogen/coding/local_commandline_code_executor.py +405 -0
autogen/coding/markdown_code_extractor.py +45 -0
autogen/coding/utils.py +56 -0
autogen/doc_utils.py +34 -0
autogen/events/__init__.py +7 -0
autogen/events/agent_events.py +1010 -0
autogen/events/base_event.py +99 -0
autogen/events/client_events.py +167 -0
autogen/events/helpers.py +36 -0
autogen/events/print_event.py +46 -0
autogen/exception_utils.py +73 -0
autogen/extensions/__init__.py +5 -0
autogen/fast_depends/__init__.py +16 -0
autogen/fast_depends/_compat.py +80 -0
autogen/fast_depends/core/__init__.py +14 -0
autogen/fast_depends/core/build.py +225 -0
autogen/fast_depends/core/model.py +576 -0
autogen/fast_depends/dependencies/__init__.py +15 -0
autogen/fast_depends/dependencies/model.py +29 -0
autogen/fast_depends/dependencies/provider.py +39 -0
autogen/fast_depends/library/__init__.py +10 -0
autogen/fast_depends/library/model.py +46 -0
autogen/fast_depends/py.typed +6 -0
autogen/fast_depends/schema.py +66 -0
autogen/fast_depends/use.py +280 -0
autogen/fast_depends/utils.py +187 -0
autogen/formatting_utils.py +83 -0
autogen/function_utils.py +13 -0
autogen/graph_utils.py +178 -0
autogen/import_utils.py +526 -0
autogen/interop/__init__.py +22 -0
autogen/interop/crewai/__init__.py +7 -0
autogen/interop/crewai/crewai.py +88 -0
autogen/interop/interoperability.py +71 -0
autogen/interop/interoperable.py +46 -0
autogen/interop/langchain/__init__.py +8 -0
autogen/interop/langchain/langchain_chat_model_factory.py +155 -0
autogen/interop/langchain/langchain_tool.py +82 -0
autogen/interop/litellm/__init__.py +7 -0
autogen/interop/litellm/litellm_config_factory.py +113 -0
autogen/interop/pydantic_ai/__init__.py +7 -0
autogen/interop/pydantic_ai/pydantic_ai.py +168 -0
autogen/interop/registry.py +69 -0
autogen/io/__init__.py +15 -0
autogen/io/base.py +151 -0
autogen/io/console.py +56 -0
autogen/io/processors/__init__.py +12 -0
autogen/io/processors/base.py +21 -0
autogen/io/processors/console_event_processor.py +56 -0
autogen/io/run_response.py +293 -0
autogen/io/thread_io_stream.py +63 -0
autogen/io/websockets.py +213 -0
autogen/json_utils.py +43 -0
autogen/llm_config.py +379 -0
autogen/logger/__init__.py +11 -0
autogen/logger/base_logger.py +128 -0
autogen/logger/file_logger.py +261 -0
autogen/logger/logger_factory.py +42 -0
autogen/logger/logger_utils.py +57 -0
autogen/logger/sqlite_logger.py +523 -0
autogen/math_utils.py +339 -0
autogen/mcp/__init__.py +7 -0
autogen/mcp/mcp_client.py +208 -0
autogen/messages/__init__.py +7 -0
autogen/messages/agent_messages.py +948 -0
autogen/messages/base_message.py +107 -0
autogen/messages/client_messages.py +171 -0
autogen/messages/print_message.py +49 -0
autogen/oai/__init__.py +53 -0
autogen/oai/anthropic.py +714 -0
autogen/oai/bedrock.py +628 -0
autogen/oai/cerebras.py +299 -0
autogen/oai/client.py +1435 -0
autogen/oai/client_utils.py +169 -0
autogen/oai/cohere.py +479 -0
autogen/oai/gemini.py +990 -0
autogen/oai/gemini_types.py +129 -0
autogen/oai/groq.py +305 -0
autogen/oai/mistral.py +303 -0
autogen/oai/oai_models/__init__.py +11 -0
autogen/oai/oai_models/_models.py +16 -0
autogen/oai/oai_models/chat_completion.py +87 -0
autogen/oai/oai_models/chat_completion_audio.py +32 -0
autogen/oai/oai_models/chat_completion_message.py +86 -0
autogen/oai/oai_models/chat_completion_message_tool_call.py +37 -0
autogen/oai/oai_models/chat_completion_token_logprob.py +63 -0
autogen/oai/oai_models/completion_usage.py +60 -0
autogen/oai/ollama.py +643 -0
autogen/oai/openai_utils.py +881 -0
autogen/oai/together.py +370 -0
autogen/retrieve_utils.py +491 -0
autogen/runtime_logging.py +160 -0
autogen/token_count_utils.py +267 -0
autogen/tools/__init__.py +20 -0
autogen/tools/contrib/__init__.py +9 -0
autogen/tools/contrib/time/__init__.py +7 -0
autogen/tools/contrib/time/time.py +41 -0
autogen/tools/dependency_injection.py +254 -0
autogen/tools/experimental/__init__.py +43 -0
autogen/tools/experimental/browser_use/__init__.py +7 -0
autogen/tools/experimental/browser_use/browser_use.py +161 -0
autogen/tools/experimental/crawl4ai/__init__.py +7 -0
autogen/tools/experimental/crawl4ai/crawl4ai.py +153 -0
autogen/tools/experimental/deep_research/__init__.py +7 -0
autogen/tools/experimental/deep_research/deep_research.py +328 -0
autogen/tools/experimental/duckduckgo/__init__.py +7 -0
autogen/tools/experimental/duckduckgo/duckduckgo_search.py +109 -0
autogen/tools/experimental/google/__init__.py +14 -0
autogen/tools/experimental/google/authentication/__init__.py +11 -0
autogen/tools/experimental/google/authentication/credentials_hosted_provider.py +43 -0
autogen/tools/experimental/google/authentication/credentials_local_provider.py +91 -0
autogen/tools/experimental/google/authentication/credentials_provider.py +35 -0
autogen/tools/experimental/google/drive/__init__.py +9 -0
autogen/tools/experimental/google/drive/drive_functions.py +124 -0
autogen/tools/experimental/google/drive/toolkit.py +88 -0
autogen/tools/experimental/google/model.py +17 -0
autogen/tools/experimental/google/toolkit_protocol.py +19 -0
autogen/tools/experimental/google_search/__init__.py +8 -0
autogen/tools/experimental/google_search/google_search.py +93 -0
autogen/tools/experimental/google_search/youtube_search.py +181 -0
autogen/tools/experimental/messageplatform/__init__.py +17 -0
autogen/tools/experimental/messageplatform/discord/__init__.py +7 -0
autogen/tools/experimental/messageplatform/discord/discord.py +288 -0
autogen/tools/experimental/messageplatform/slack/__init__.py +7 -0
autogen/tools/experimental/messageplatform/slack/slack.py +391 -0
autogen/tools/experimental/messageplatform/telegram/__init__.py +7 -0
autogen/tools/experimental/messageplatform/telegram/telegram.py +275 -0
autogen/tools/experimental/perplexity/__init__.py +7 -0
autogen/tools/experimental/perplexity/perplexity_search.py +260 -0
autogen/tools/experimental/tavily/__init__.py +7 -0
autogen/tools/experimental/tavily/tavily_search.py +183 -0
autogen/tools/experimental/web_search_preview/__init__.py +7 -0
autogen/tools/experimental/web_search_preview/web_search_preview.py +114 -0
autogen/tools/experimental/wikipedia/__init__.py +7 -0
autogen/tools/experimental/wikipedia/wikipedia.py +287 -0
autogen/tools/function_utils.py +411 -0
autogen/tools/tool.py +187 -0
autogen/tools/toolkit.py +86 -0
autogen/types.py +29 -0
autogen/version.py +7 -0
ag2-0.9.1.dist-info/RECORD +0 -6
ag2-0.9.1.dist-info/top_level.txt +0 -1
{ag2-0.9.1.dist-info → ag2-0.9.1.post0.dist-info/licenses}/LICENSE +0 -0
{ag2-0.9.1.dist-info → ag2-0.9.1.post0.dist-info/licenses}/NOTICE.md +0 -0

autogen/agents/experimental/document_agent/document_utils.py ADDED Viewed

@@ -0,0 +1,380 @@
+# Copyright (c) 2023 - 2025, AG2ai, Inc., AG2ai open-source projects maintainers and core contributors
+#
+# SPDX-License-Identifier: Apache-2.0
+import logging
+from enum import Enum
+from pathlib import Path
+from typing import Any, Optional, Union
+from urllib.parse import urlparse
+from pydantic import BaseModel, Field
+from ....doc_utils import export_module
+from ....import_utils import optional_import_block, require_optional_import
+from .url_utils import ExtensionToFormat, InputFormat, URLAnalyzer
+with optional_import_block():
+    import requests
+    from selenium import webdriver
+    from selenium.webdriver.chrome.service import Service as ChromeService
+    from webdriver_manager.chrome import ChromeDriverManager
+__all__ = ["handle_input", "preprocess_path"]
+_logger = logging.getLogger(__name__)
+class QueryType(Enum):
+    RAG_QUERY = "RAG_QUERY"
+    # COMMON_QUESTION = "COMMON_QUESTION"
+class Ingest(BaseModel):
+    path_or_url: str = Field(description="The path or URL of the documents to ingest.")
+class Query(BaseModel):
+    query_type: QueryType = Field(description="The type of query to perform for the Document Agent.")
+    query: str = Field(description="The query to perform for the Document Agent.")
+def is_url(url: str) -> bool:
+    """Check if the string is a valid URL.
+    It checks whether the URL has a valid scheme and network location.
+    """
+    try:
+        url = url.strip()
+        result = urlparse(url)
+        # urlparse will not raise an exception for invalid URLs, so we need to check the components
+        return_bool = bool(result.scheme and result.netloc)
+        return return_bool
+    except Exception:
+        return False
+@require_optional_import(["selenium", "webdriver_manager", "requests"], "rag")
+def _download_rendered_html(url: str) -> str:
+    """Downloads a rendered HTML page of a given URL using headless ChromeDriver.
+    Args:
+        url (str): URL of the page to download.
+    Returns:
+        str: The rendered HTML content of the page.
+    """
+    # Set up Chrome options
+    options = webdriver.ChromeOptions()
+    options.add_argument("--headless")  # Enable headless mode
+    options.add_argument("--disable-gpu")  # Disabling GPU hardware acceleration
+    options.add_argument("--no-sandbox")  # Bypass OS security model
+    options.add_argument("--disable-dev-shm-usage")  # Overcome limited resource problems
+    # Set the location of the ChromeDriver
+    service = ChromeService(ChromeDriverManager().install())
+    # Create a new instance of the Chrome driver with specified options
+    driver = webdriver.Chrome(service=service, options=options)
+    try:
+        # Open a page
+        driver.get(url)
+        # Get the rendered HTML
+        html_content = driver.page_source
+        return str(html_content)
+    finally:
+        # Close the browser
+        driver.quit()
+@require_optional_import(["requests", "selenium", "webdriver_manager"], "rag")
+def _download_binary_file(url: str, output_dir: Path) -> Path:
+    """Downloads a file directly from the given URL.
+    Uses appropriate mode (binary/text) based on file extension or content type.
+    Args:
+        url (str): URL of the file to download.
+        output_dir (Path): Directory to save the file.
+    Returns:
+        Path: Path to the saved file.
+    """
+    # Ensure output directory exists
+    output_dir.mkdir(parents=True, exist_ok=True)
+    # Use URLAnalyzer to get information about the URL
+    analyzer = URLAnalyzer(url)
+    analysis = analyzer.analyze(test_url=True, follow_redirects=True)
+    # Get file info
+    final_url = analysis.get("final_url", url)
+    file_type = analysis.get("file_type")
+    content_type = analysis.get("mime_type", "")
+    _logger.info(f"Original URL: {url}")
+    _logger.info(f"Final URL after redirects: {final_url}")
+    _logger.info(f"Detected content type: {content_type}")
+    _logger.info(f"Detected file type: {file_type}")
+    # Check if the file type is supported
+    if file_type == InputFormat.INVALID:
+        raise ValueError(f"File type is not supported: {analysis}")
+    # Parse URL components from the final URL
+    parsed_url = urlparse(final_url)
+    path = Path(parsed_url.path)
+    # Extract filename and extension from URL
+    filename = path.name
+    suffix = path.suffix.lower()
+    # For URLs without proper filename/extension, or with generic content types
+    if not filename or not suffix:
+        # Create a unique filename
+        unique_id = abs(hash(url)) % 10000
+        # Determine extension from file type
+        if file_type is not None and isinstance(file_type, InputFormat):
+            ext = _get_extension_from_file_type(file_type, content_type)
+        else:
+            ext = None
+        # Create filename
+        prefix = "image" if file_type == InputFormat.IMAGE else "download"
+        filename = f"{prefix}_{unique_id}{ext}"
+    # Ensure the filename has the correct extension
+    if suffix:
+        # Check if the extension is valid for the file type
+        current_ext = suffix[1:] if suffix.startswith(".") else suffix
+        if file_type is not None and isinstance(file_type, InputFormat):
+            if not _is_valid_extension_for_file_type(current_ext, file_type):
+                # If not, add the correct extension
+                ext = _get_extension_from_file_type(file_type, content_type)
+                filename = f"{Path(filename).stem}{ext}"
+        else:
+            ext = _get_extension_from_file_type(InputFormat.INVALID, content_type)
+            filename = f"{Path(filename).stem}{ext}"
+    else:
+        # No extension, add one based on file type
+        if file_type is not None and isinstance(file_type, InputFormat):
+            ext = _get_extension_from_file_type(file_type, content_type)
+        else:
+            ext = _get_extension_from_file_type(InputFormat.INVALID, content_type)
+        filename = f"{filename}{ext}"
+    _logger.info(f"Using filename: {filename} for URL: {url}")
+    # Create final filepath
+    filepath = output_dir / filename
+    # Determine if this is binary or text based on extension
+    suffix = Path(filename).suffix.lower()
+    text_extensions = [".md", ".txt", ".csv", ".html", ".htm", ".xml", ".json", ".adoc"]
+    is_binary = suffix not in text_extensions
+    # Download with appropriate mode
+    try:
+        if not is_binary:
+            _logger.info(f"Downloading as text file: {final_url}")
+            response = requests.get(final_url, timeout=30)
+            response.raise_for_status()
+            with open(filepath, "w", encoding="utf-8") as f:
+                f.write(response.text)
+        else:
+            _logger.info(f"Downloading as binary file: {final_url}")
+            response = requests.get(final_url, stream=True, timeout=30)
+            response.raise_for_status()
+            with open(filepath, "wb") as f:
+                for chunk in response.iter_content(chunk_size=8192):
+                    if chunk:  # Filter out keep-alive chunks
+                        f.write(chunk)
+    except Exception as e:
+        _logger.error(f"Download failed: {e}")
+        raise
+    return filepath
+def _get_extension_from_file_type(file_type: InputFormat, content_type: str = "") -> str:
+    """Get a file extension based on the file type and content type."""
+    # Create a reverse mapping from InputFormat to a default extension
+    # We choose the first extension found for each format
+    format_to_extension = {}
+    for ext, fmt in ExtensionToFormat.items():
+        if fmt not in format_to_extension:
+            format_to_extension[fmt] = ext
+    # Special case for images: use content type to determine exact image format
+    if file_type == InputFormat.IMAGE:
+        if "jpeg" in content_type or "jpg" in content_type:
+            return ".jpeg"
+        elif "png" in content_type:
+            return ".png"
+        elif "tiff" in content_type:
+            return ".tiff"
+        elif "bmp" in content_type:
+            return ".bmp"
+        # Fallback to default image extension
+        ext = format_to_extension.get(InputFormat.IMAGE, "png")
+        return f".{ext}"
+    # For all other formats, use the default extension
+    if file_type in format_to_extension:
+        return f".{format_to_extension[file_type]}"
+    return ".bin"  # Default for unknown types
+def _is_valid_extension_for_file_type(extension: str, file_type: InputFormat) -> bool:
+    """Check if the extension is valid for the given file type."""
+    # Remove leading dot if present
+    if extension.startswith("."):
+        extension = extension[1:]
+    # Check if the extension is in URLAnalyzer.ExtensionToFormat
+    # and if it maps to the given file type
+    return extension in ExtensionToFormat and ExtensionToFormat[extension] == file_type
+@require_optional_import(["selenium", "webdriver_manager", "requests"], "rag")
+def download_url(url: Any, output_dir: Optional[Union[str, Path]] = None) -> Path:
+    """Download the content of a URL and save it as a file.
+    For direct file URLs (.md, .pdf, .docx, etc.), downloads the raw file.
+    For web pages without file extensions or .html/.htm extensions, uses Selenium to render the content.
+    """
+    url = str(url)
+    output_dir = Path(output_dir) if output_dir else Path()
+    # Use URLAnalyzer to determine what type of file the URL is
+    analyzer = URLAnalyzer(url)
+    analysis = analyzer.analyze(test_url=True, follow_redirects=True)
+    # Log the analysis result
+    _logger.info(f"URL analysis result: {analysis}")
+    # Get the final URL after redirects
+    final_url = analysis.get("final_url", url)
+    # Determine the file type
+    is_file = analysis.get("is_file", False)
+    file_type = analysis.get("file_type")
+    # If it's a direct file URL (not HTML), download it directly
+    if is_file and file_type != InputFormat.HTML and file_type != InputFormat.INVALID:
+        _logger.info("Detected direct file URL. Downloading...")
+        return _download_binary_file(url=final_url, output_dir=output_dir)
+    # If it's a web page, use Selenium to render it
+    if file_type == InputFormat.HTML or not is_file:
+        _logger.info("Detected web page. Rendering...")
+        rendered_html = _download_rendered_html(final_url)
+        # Determine filename
+        parsed_url = urlparse(final_url)
+        path = Path(parsed_url.path)
+        filename = path.name or "downloaded_content.html"
+        if not filename.endswith(".html"):
+            filename += ".html"
+        # Save the rendered HTML
+        filepath = output_dir / filename
+        with open(file=filepath, mode="w", encoding="utf-8") as f:
+            f.write(rendered_html)
+        return filepath
+    # Otherwise, try to download as a binary file
+    _logger.info("Unknown URL type. Trying to download as binary file...")
+    return _download_binary_file(url=final_url, output_dir=output_dir)
+def list_files(directory: Union[Path, str]) -> list[Path]:
+    """Recursively list all files in a directory.
+    This function will raise an exception if the directory does not exist.
+    """
+    path = Path(directory)
+    if not path.is_dir():
+        raise ValueError(f"The directory {directory} does not exist.")
+    return [f for f in path.rglob("*") if f.is_file()]
+@export_module("autogen.agents.experimental.document_agent")
+def handle_input(input_path: Union[Path, str], output_dir: Union[Path, str] = "./output") -> list[Path]:
+    """Process the input string and return the appropriate file paths"""
+    output_dir = preprocess_path(str_or_path=output_dir, is_dir=True, mk_path=True)
+    if isinstance(input_path, str) and is_url(input_path):
+        _logger.info("Detected URL. Downloading content...")
+        try:
+            return [download_url(url=input_path, output_dir=output_dir)]
+        except Exception as e:
+            raise e
+    if isinstance(input_path, str):
+        input_path = Path(input_path)
+    if not input_path.exists():
+        raise ValueError("The input provided does not exist.")
+    elif input_path.is_dir():
+        _logger.info("Detected directory. Listing files...")
+        return list_files(directory=input_path)
+    elif input_path.is_file():
+        _logger.info("Detected file. Returning file path...")
+        return [input_path]
+    else:
+        raise ValueError("The input provided is neither a URL, directory, nor a file path.")
+@export_module("autogen.agents.experimental.document_agent")
+def preprocess_path(
+    str_or_path: Union[Path, str], mk_path: bool = False, is_file: bool = False, is_dir: bool = True
+) -> Path:
+    """Preprocess the path for file operations.
+    Args:
+        str_or_path (Union[Path, str]): The path to be processed.
+        mk_path (bool, optional): Whether to create the path if it doesn't exist. Default is True.
+        is_file (bool, optional): Whether the path is a file. Default is False.
+        is_dir (bool, optional): Whether the path is a directory. Default is True.
+    Returns:
+        Path: The preprocessed path.
+    """
+    # Convert the input to a Path object if it's a string
+    temp_path = Path(str_or_path)
+    # Ensure the path is absolute
+    absolute_path = temp_path.absolute()
+    absolute_path = absolute_path.resolve()
+    if absolute_path.exists():
+        return absolute_path
+    # Check if the path should be a file or directory
+    if is_file and is_dir:
+        raise ValueError("Path cannot be both a file and a directory.")
+    # If mk_path is True, create the directory or parent directory
+    if mk_path:
+        if is_file and not absolute_path.parent.exists():
+            absolute_path.parent.mkdir(parents=True, exist_ok=True)
+        elif is_dir and not absolute_path.exists():
+            absolute_path.mkdir(parents=True, exist_ok=True)
+    # Perform checks based on is_file and is_dir flags
+    if is_file and not absolute_path.is_file():
+        raise FileNotFoundError(f"File not found: {absolute_path}")
+    elif is_dir and not absolute_path.is_dir():
+        raise NotADirectoryError(f"Directory not found: {absolute_path}")
+    return absolute_path

autogen/agents/experimental/document_agent/inmemory_query_engine.py ADDED Viewed

@@ -0,0 +1,220 @@
+# Copyright (c) 2023 - 2025, AG2ai, Inc., AG2ai open-source projects maintainers and core contributors
+#
+# SPDX-License-Identifier: Apache-2.0
+import copy
+import json
+import os
+from pathlib import Path
+from typing import TYPE_CHECKING, Any, Optional, Sequence, Union
+from pydantic import BaseModel
+from .... import ConversableAgent
+from ....agentchat.contrib.rag import RAGQueryEngine
+from ....doc_utils import export_module
+from ....llm_config import LLMConfig
+__all__ = ["InMemoryQueryEngine"]
+# REPLIES
+QUERY_NO_INGESTIONS_REPLY = "Sorry, please ingest some documents/URLs before querying."  # Default response for queries without ingested documents
+EMPTY_RESPONSE_REPLY = "Sorry, I couldn't find any information on that. If you haven't ingested any documents, please try that."  # Default response for queries without results
+ERROR_RESPONSE_REPLY = "Sorry, there was an error processing your query: "  # Default response for queries with errors
+COULD_NOT_ANSWER_REPLY = "Sorry, I couldn't answer that question from the ingested documents/URLs"  # Default response for queries that could not be answered
+# Documents and Content structure
+class DocumentStore(BaseModel):
+    ingestation_name: str
+    content: str
+# Answer question structure
+class QueryAnswer(BaseModel):
+    could_answer: bool
+    answer: str
+@export_module("autogen.agents.experimental")
+class InMemoryQueryEngine:
+    """
+    This engine stores ingested documents in memory and then injects them into an internal agent's system message for answering queries.
+    This implements the autogen.agentchat.contrib.rag.RAGQueryEngine protocol.
+    """
+    def __init__(
+        self,
+        llm_config: Union[LLMConfig, dict[str, Any]],
+    ) -> None:
+        # Deep copy the llm config to avoid changing the original
+        structured_config = copy.deepcopy(llm_config)
+        # The query agent will answer with a structured output
+        structured_config["response_format"] = QueryAnswer
+        # Our agents for querying
+        self._query_agent = ConversableAgent(
+            name="inmemory_query_agent",
+            llm_config=structured_config,
+        )
+        # In-memory storage for ingested documents
+        self._ingested_documents: list[DocumentStore] = []
+    def query(self, question: str, *args: Any, **kwargs: Any) -> str:
+        """Run a query against the ingested documents and return the answer."""
+        # If no documents have been ingested, return an empty response
+        if not self._ingested_documents:
+            return QUERY_NO_INGESTIONS_REPLY
+        # Put the context into the system message
+        context_parts = []
+        for i, doc in enumerate(self._ingested_documents, 1):
+            context_parts.append(f"Ingested File/URL {i} - '{doc.ingestation_name}':\n{doc.content}\n")
+        context = "\n".join(context_parts)
+        system_message = (
+            "You are a query agent tasked with answering questions based on ingested documents.\n\n"
+            "AVAILABLE DOCUMENTS:\n"
+            + "\n".join([f"- {doc.ingestation_name}" for doc in self._ingested_documents])
+            + "\n\n"
+            "When answering questions about these documents, use ONLY the information in the following context:\n\n"
+            f"{context}\n\n"
+            "IMPORTANT: The user will ask about these documents by name. When they do, provide helpful, detailed answers based on the document content above."
+        )
+        self._query_agent.update_system_message(system_message)
+        message = f"Using ONLY the document content in your system message, answer this question: {question}"
+        response = self._query_agent.run(
+            message=message,
+            max_turns=1,
+        )
+        response.process()
+        try:
+            # Get the structured output and return the answer
+            answer_object = QueryAnswer.model_validate(json.loads(response.summary))  # type: ignore[arg-type]
+            if answer_object.could_answer:
+                return answer_object.answer
+            else:
+                if answer_object.answer:
+                    return COULD_NOT_ANSWER_REPLY + ": " + answer_object.answer
+                else:
+                    return COULD_NOT_ANSWER_REPLY
+        except Exception as e:
+            # Error converting the response to the structured output
+            return ERROR_RESPONSE_REPLY + str(e)
+    def add_docs(
+        self,
+        new_doc_dir: Optional[Union[Path, str]] = None,
+        new_doc_paths_or_urls: Optional[Sequence[Union[Path, str]]] = None,
+    ) -> None:
+        """
+        Add additional documents to the in-memory store
+        Loads new Docling-parsed Markdown files from a specified directory or a list of file paths
+        and inserts them into the in-memory store.
+        Args:
+            new_doc_dir: The directory path from which to load additional documents.
+                If provided, all eligible files in this directory are loaded.
+            new_doc_paths_or_urls: A list of file paths specifying additional documents to load.
+                Each file should be a Docling-parsed Markdown file.
+        """
+        new_doc_dir = new_doc_dir or ""
+        new_doc_paths = new_doc_paths_or_urls or []
+        self._load_doc(input_dir=new_doc_dir, input_docs=new_doc_paths)
+    def _load_doc(
+        self, input_dir: Optional[Union[Path, str]], input_docs: Optional[Sequence[Union[Path, str]]]
+    ) -> None:
+        """
+        Load documents from a directory and/or a list of file paths into the in-memory store.
+        This helper method reads files using native Python file operations and stores them
+        in the in-memory document store. It supports reading text-based files, with the primary
+        intended use being for documents processed by Docling.
+        Args:
+            input_dir (Optional[Union[Path, str]]): The directory containing documents to be loaded.
+                If provided, all files in the directory will be considered.
+            input_docs (Optional[list[Union[Path, str]]]): A list of individual file paths to load.
+                Each path must point to an existing file.
+        Raises:
+            ValueError: If the specified directory does not exist.
+            ValueError: If any provided file path does not exist.
+            ValueError: If neither input_dir nor input_docs is provided.
+        """
+        if not input_dir and not input_docs:
+            raise ValueError("No input directory or docs provided!")
+        # Process directory if provided
+        if input_dir:
+            # logger.info(f"Loading docs from directory: {input_dir}")
+            if not os.path.exists(input_dir):
+                raise ValueError(f"Input directory not found: {input_dir}")
+            # Get all files from the directory
+            dir_path = Path(input_dir)
+            for file_path in dir_path.iterdir():
+                if file_path.is_file():
+                    self._read_and_store_file(file_path)
+        # Process individual files if provided
+        if input_docs:
+            for doc_path in input_docs:
+                # logger.info(f"Loading input doc: {doc_path}")
+                if not os.path.exists(doc_path):
+                    raise ValueError(f"Document file not found: {doc_path}")
+                self._read_and_store_file(doc_path)
+    def _read_and_store_file(self, file_path: Union[Path, str]) -> None:
+        """
+        Read a file and store its content in the in-memory document store.
+        Args:
+            file_path (Union[Path, str]): Path to the file to be read
+        """
+        file_path = Path(file_path)
+        try:
+            with open(file_path, "r", encoding="utf-8") as file:
+                content = file.read()
+            # Store the document in the in-memory store
+            document = DocumentStore(ingestation_name=file_path.name, content=content)
+            self._ingested_documents.append(document)
+        except Exception as e:
+            raise ValueError(f"Error reading file {file_path}: {str(e)}")
+    def init_db(
+        self,
+        new_doc_dir: Optional[Union[Path, str]] = None,
+        new_doc_paths_or_urls: Optional[Sequence[Union[Path, str]]] = None,
+        *args: Any,
+        **kwargs: Any,
+    ) -> bool:
+        """Not required nor implemented for InMemoryQueryEngine"""
+        raise NotImplementedError("Method, init_db, not required nor implemented for InMemoryQueryEngine")
+    def connect_db(self, *args: Any, **kwargs: Any) -> bool:
+        """Not required nor implemented for InMemoryQueryEngine"""
+        raise NotImplementedError("Method, connect_db, not required nor implemented for InMemoryQueryEngine")
+# mypy will fail if ChromaDBQueryEngine does not implement RAGQueryEngine protocol
+if TYPE_CHECKING:
+    from ....agentchat.contrib.rag.query_engine import RAGQueryEngine
+    def _check_implement_protocol(o: InMemoryQueryEngine) -> RAGQueryEngine:
+        return o

ag2 0.9.1__py3-none-any.whl → 0.9.1.post0__py3-none-any.whl

Potentially problematic release.

ag2 0.9.1py3-none-any.whl → 0.9.1.post0py3-none-any.whl