ag2 0.9.1__py3-none-any.whl → 0.9.1.post0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ag2 might be problematic. Click here for more details.
- {ag2-0.9.1.dist-info → ag2-0.9.1.post0.dist-info}/METADATA +264 -73
- ag2-0.9.1.post0.dist-info/RECORD +392 -0
- {ag2-0.9.1.dist-info → ag2-0.9.1.post0.dist-info}/WHEEL +1 -2
- autogen/__init__.py +89 -0
- autogen/_website/__init__.py +3 -0
- autogen/_website/generate_api_references.py +427 -0
- autogen/_website/generate_mkdocs.py +1174 -0
- autogen/_website/notebook_processor.py +476 -0
- autogen/_website/process_notebooks.py +656 -0
- autogen/_website/utils.py +412 -0
- autogen/agentchat/__init__.py +44 -0
- autogen/agentchat/agent.py +182 -0
- autogen/agentchat/assistant_agent.py +85 -0
- autogen/agentchat/chat.py +309 -0
- autogen/agentchat/contrib/__init__.py +5 -0
- autogen/agentchat/contrib/agent_eval/README.md +7 -0
- autogen/agentchat/contrib/agent_eval/agent_eval.py +108 -0
- autogen/agentchat/contrib/agent_eval/criterion.py +43 -0
- autogen/agentchat/contrib/agent_eval/critic_agent.py +44 -0
- autogen/agentchat/contrib/agent_eval/quantifier_agent.py +39 -0
- autogen/agentchat/contrib/agent_eval/subcritic_agent.py +45 -0
- autogen/agentchat/contrib/agent_eval/task.py +42 -0
- autogen/agentchat/contrib/agent_optimizer.py +429 -0
- autogen/agentchat/contrib/capabilities/__init__.py +5 -0
- autogen/agentchat/contrib/capabilities/agent_capability.py +20 -0
- autogen/agentchat/contrib/capabilities/generate_images.py +301 -0
- autogen/agentchat/contrib/capabilities/teachability.py +393 -0
- autogen/agentchat/contrib/capabilities/text_compressors.py +66 -0
- autogen/agentchat/contrib/capabilities/tools_capability.py +22 -0
- autogen/agentchat/contrib/capabilities/transform_messages.py +93 -0
- autogen/agentchat/contrib/capabilities/transforms.py +566 -0
- autogen/agentchat/contrib/capabilities/transforms_util.py +122 -0
- autogen/agentchat/contrib/capabilities/vision_capability.py +214 -0
- autogen/agentchat/contrib/captainagent/__init__.py +9 -0
- autogen/agentchat/contrib/captainagent/agent_builder.py +790 -0
- autogen/agentchat/contrib/captainagent/captainagent.py +512 -0
- autogen/agentchat/contrib/captainagent/tool_retriever.py +335 -0
- autogen/agentchat/contrib/captainagent/tools/README.md +44 -0
- autogen/agentchat/contrib/captainagent/tools/__init__.py +5 -0
- autogen/agentchat/contrib/captainagent/tools/data_analysis/calculate_correlation.py +40 -0
- autogen/agentchat/contrib/captainagent/tools/data_analysis/calculate_skewness_and_kurtosis.py +28 -0
- autogen/agentchat/contrib/captainagent/tools/data_analysis/detect_outlier_iqr.py +28 -0
- autogen/agentchat/contrib/captainagent/tools/data_analysis/detect_outlier_zscore.py +28 -0
- autogen/agentchat/contrib/captainagent/tools/data_analysis/explore_csv.py +21 -0
- autogen/agentchat/contrib/captainagent/tools/data_analysis/shapiro_wilk_test.py +30 -0
- autogen/agentchat/contrib/captainagent/tools/information_retrieval/arxiv_download.py +27 -0
- autogen/agentchat/contrib/captainagent/tools/information_retrieval/arxiv_search.py +53 -0
- autogen/agentchat/contrib/captainagent/tools/information_retrieval/extract_pdf_image.py +53 -0
- autogen/agentchat/contrib/captainagent/tools/information_retrieval/extract_pdf_text.py +38 -0
- autogen/agentchat/contrib/captainagent/tools/information_retrieval/get_wikipedia_text.py +21 -0
- autogen/agentchat/contrib/captainagent/tools/information_retrieval/get_youtube_caption.py +34 -0
- autogen/agentchat/contrib/captainagent/tools/information_retrieval/image_qa.py +60 -0
- autogen/agentchat/contrib/captainagent/tools/information_retrieval/optical_character_recognition.py +61 -0
- autogen/agentchat/contrib/captainagent/tools/information_retrieval/perform_web_search.py +47 -0
- autogen/agentchat/contrib/captainagent/tools/information_retrieval/scrape_wikipedia_tables.py +33 -0
- autogen/agentchat/contrib/captainagent/tools/information_retrieval/transcribe_audio_file.py +21 -0
- autogen/agentchat/contrib/captainagent/tools/information_retrieval/youtube_download.py +35 -0
- autogen/agentchat/contrib/captainagent/tools/math/calculate_circle_area_from_diameter.py +21 -0
- autogen/agentchat/contrib/captainagent/tools/math/calculate_day_of_the_week.py +18 -0
- autogen/agentchat/contrib/captainagent/tools/math/calculate_fraction_sum.py +28 -0
- autogen/agentchat/contrib/captainagent/tools/math/calculate_matrix_power.py +31 -0
- autogen/agentchat/contrib/captainagent/tools/math/calculate_reflected_point.py +16 -0
- autogen/agentchat/contrib/captainagent/tools/math/complex_numbers_product.py +25 -0
- autogen/agentchat/contrib/captainagent/tools/math/compute_currency_conversion.py +23 -0
- autogen/agentchat/contrib/captainagent/tools/math/count_distinct_permutations.py +27 -0
- autogen/agentchat/contrib/captainagent/tools/math/evaluate_expression.py +28 -0
- autogen/agentchat/contrib/captainagent/tools/math/find_continuity_point.py +34 -0
- autogen/agentchat/contrib/captainagent/tools/math/fraction_to_mixed_numbers.py +39 -0
- autogen/agentchat/contrib/captainagent/tools/math/modular_inverse_sum.py +23 -0
- autogen/agentchat/contrib/captainagent/tools/math/simplify_mixed_numbers.py +36 -0
- autogen/agentchat/contrib/captainagent/tools/math/sum_of_digit_factorials.py +15 -0
- autogen/agentchat/contrib/captainagent/tools/math/sum_of_primes_below.py +15 -0
- autogen/agentchat/contrib/captainagent/tools/requirements.txt +10 -0
- autogen/agentchat/contrib/captainagent/tools/tool_description.tsv +34 -0
- autogen/agentchat/contrib/gpt_assistant_agent.py +526 -0
- autogen/agentchat/contrib/graph_rag/__init__.py +9 -0
- autogen/agentchat/contrib/graph_rag/document.py +29 -0
- autogen/agentchat/contrib/graph_rag/falkor_graph_query_engine.py +170 -0
- autogen/agentchat/contrib/graph_rag/falkor_graph_rag_capability.py +103 -0
- autogen/agentchat/contrib/graph_rag/graph_query_engine.py +53 -0
- autogen/agentchat/contrib/graph_rag/graph_rag_capability.py +63 -0
- autogen/agentchat/contrib/graph_rag/neo4j_graph_query_engine.py +268 -0
- autogen/agentchat/contrib/graph_rag/neo4j_graph_rag_capability.py +83 -0
- autogen/agentchat/contrib/graph_rag/neo4j_native_graph_query_engine.py +210 -0
- autogen/agentchat/contrib/graph_rag/neo4j_native_graph_rag_capability.py +93 -0
- autogen/agentchat/contrib/img_utils.py +397 -0
- autogen/agentchat/contrib/llamaindex_conversable_agent.py +117 -0
- autogen/agentchat/contrib/llava_agent.py +187 -0
- autogen/agentchat/contrib/math_user_proxy_agent.py +464 -0
- autogen/agentchat/contrib/multimodal_conversable_agent.py +125 -0
- autogen/agentchat/contrib/qdrant_retrieve_user_proxy_agent.py +324 -0
- autogen/agentchat/contrib/rag/__init__.py +10 -0
- autogen/agentchat/contrib/rag/chromadb_query_engine.py +272 -0
- autogen/agentchat/contrib/rag/llamaindex_query_engine.py +198 -0
- autogen/agentchat/contrib/rag/mongodb_query_engine.py +329 -0
- autogen/agentchat/contrib/rag/query_engine.py +74 -0
- autogen/agentchat/contrib/retrieve_assistant_agent.py +56 -0
- autogen/agentchat/contrib/retrieve_user_proxy_agent.py +703 -0
- autogen/agentchat/contrib/society_of_mind_agent.py +199 -0
- autogen/agentchat/contrib/swarm_agent.py +1425 -0
- autogen/agentchat/contrib/text_analyzer_agent.py +79 -0
- autogen/agentchat/contrib/vectordb/__init__.py +5 -0
- autogen/agentchat/contrib/vectordb/base.py +232 -0
- autogen/agentchat/contrib/vectordb/chromadb.py +315 -0
- autogen/agentchat/contrib/vectordb/couchbase.py +407 -0
- autogen/agentchat/contrib/vectordb/mongodb.py +550 -0
- autogen/agentchat/contrib/vectordb/pgvectordb.py +928 -0
- autogen/agentchat/contrib/vectordb/qdrant.py +320 -0
- autogen/agentchat/contrib/vectordb/utils.py +126 -0
- autogen/agentchat/contrib/web_surfer.py +303 -0
- autogen/agentchat/conversable_agent.py +4020 -0
- autogen/agentchat/group/__init__.py +64 -0
- autogen/agentchat/group/available_condition.py +91 -0
- autogen/agentchat/group/context_condition.py +77 -0
- autogen/agentchat/group/context_expression.py +238 -0
- autogen/agentchat/group/context_str.py +41 -0
- autogen/agentchat/group/context_variables.py +192 -0
- autogen/agentchat/group/group_tool_executor.py +202 -0
- autogen/agentchat/group/group_utils.py +591 -0
- autogen/agentchat/group/handoffs.py +244 -0
- autogen/agentchat/group/llm_condition.py +93 -0
- autogen/agentchat/group/multi_agent_chat.py +237 -0
- autogen/agentchat/group/on_condition.py +58 -0
- autogen/agentchat/group/on_context_condition.py +54 -0
- autogen/agentchat/group/patterns/__init__.py +18 -0
- autogen/agentchat/group/patterns/auto.py +159 -0
- autogen/agentchat/group/patterns/manual.py +176 -0
- autogen/agentchat/group/patterns/pattern.py +288 -0
- autogen/agentchat/group/patterns/random.py +106 -0
- autogen/agentchat/group/patterns/round_robin.py +117 -0
- autogen/agentchat/group/reply_result.py +26 -0
- autogen/agentchat/group/speaker_selection_result.py +41 -0
- autogen/agentchat/group/targets/__init__.py +4 -0
- autogen/agentchat/group/targets/group_chat_target.py +132 -0
- autogen/agentchat/group/targets/group_manager_target.py +151 -0
- autogen/agentchat/group/targets/transition_target.py +413 -0
- autogen/agentchat/group/targets/transition_utils.py +6 -0
- autogen/agentchat/groupchat.py +1694 -0
- autogen/agentchat/realtime/__init__.py +3 -0
- autogen/agentchat/realtime/experimental/__init__.py +20 -0
- autogen/agentchat/realtime/experimental/audio_adapters/__init__.py +8 -0
- autogen/agentchat/realtime/experimental/audio_adapters/twilio_audio_adapter.py +148 -0
- autogen/agentchat/realtime/experimental/audio_adapters/websocket_audio_adapter.py +139 -0
- autogen/agentchat/realtime/experimental/audio_observer.py +42 -0
- autogen/agentchat/realtime/experimental/clients/__init__.py +15 -0
- autogen/agentchat/realtime/experimental/clients/gemini/__init__.py +7 -0
- autogen/agentchat/realtime/experimental/clients/gemini/client.py +274 -0
- autogen/agentchat/realtime/experimental/clients/oai/__init__.py +8 -0
- autogen/agentchat/realtime/experimental/clients/oai/base_client.py +220 -0
- autogen/agentchat/realtime/experimental/clients/oai/rtc_client.py +243 -0
- autogen/agentchat/realtime/experimental/clients/oai/utils.py +48 -0
- autogen/agentchat/realtime/experimental/clients/realtime_client.py +190 -0
- autogen/agentchat/realtime/experimental/function_observer.py +85 -0
- autogen/agentchat/realtime/experimental/realtime_agent.py +158 -0
- autogen/agentchat/realtime/experimental/realtime_events.py +42 -0
- autogen/agentchat/realtime/experimental/realtime_observer.py +100 -0
- autogen/agentchat/realtime/experimental/realtime_swarm.py +475 -0
- autogen/agentchat/realtime/experimental/websockets.py +21 -0
- autogen/agentchat/realtime_agent/__init__.py +21 -0
- autogen/agentchat/user_proxy_agent.py +111 -0
- autogen/agentchat/utils.py +206 -0
- autogen/agents/__init__.py +3 -0
- autogen/agents/contrib/__init__.py +10 -0
- autogen/agents/contrib/time/__init__.py +8 -0
- autogen/agents/contrib/time/time_reply_agent.py +73 -0
- autogen/agents/contrib/time/time_tool_agent.py +51 -0
- autogen/agents/experimental/__init__.py +27 -0
- autogen/agents/experimental/deep_research/__init__.py +7 -0
- autogen/agents/experimental/deep_research/deep_research.py +52 -0
- autogen/agents/experimental/discord/__init__.py +7 -0
- autogen/agents/experimental/discord/discord.py +66 -0
- autogen/agents/experimental/document_agent/__init__.py +19 -0
- autogen/agents/experimental/document_agent/chroma_query_engine.py +316 -0
- autogen/agents/experimental/document_agent/docling_doc_ingest_agent.py +118 -0
- autogen/agents/experimental/document_agent/document_agent.py +461 -0
- autogen/agents/experimental/document_agent/document_conditions.py +50 -0
- autogen/agents/experimental/document_agent/document_utils.py +380 -0
- autogen/agents/experimental/document_agent/inmemory_query_engine.py +220 -0
- autogen/agents/experimental/document_agent/parser_utils.py +130 -0
- autogen/agents/experimental/document_agent/url_utils.py +426 -0
- autogen/agents/experimental/reasoning/__init__.py +7 -0
- autogen/agents/experimental/reasoning/reasoning_agent.py +1178 -0
- autogen/agents/experimental/slack/__init__.py +7 -0
- autogen/agents/experimental/slack/slack.py +73 -0
- autogen/agents/experimental/telegram/__init__.py +7 -0
- autogen/agents/experimental/telegram/telegram.py +77 -0
- autogen/agents/experimental/websurfer/__init__.py +7 -0
- autogen/agents/experimental/websurfer/websurfer.py +62 -0
- autogen/agents/experimental/wikipedia/__init__.py +7 -0
- autogen/agents/experimental/wikipedia/wikipedia.py +90 -0
- autogen/browser_utils.py +309 -0
- autogen/cache/__init__.py +10 -0
- autogen/cache/abstract_cache_base.py +75 -0
- autogen/cache/cache.py +203 -0
- autogen/cache/cache_factory.py +88 -0
- autogen/cache/cosmos_db_cache.py +144 -0
- autogen/cache/disk_cache.py +102 -0
- autogen/cache/in_memory_cache.py +58 -0
- autogen/cache/redis_cache.py +123 -0
- autogen/code_utils.py +596 -0
- autogen/coding/__init__.py +22 -0
- autogen/coding/base.py +119 -0
- autogen/coding/docker_commandline_code_executor.py +268 -0
- autogen/coding/factory.py +47 -0
- autogen/coding/func_with_reqs.py +202 -0
- autogen/coding/jupyter/__init__.py +23 -0
- autogen/coding/jupyter/base.py +36 -0
- autogen/coding/jupyter/docker_jupyter_server.py +167 -0
- autogen/coding/jupyter/embedded_ipython_code_executor.py +182 -0
- autogen/coding/jupyter/import_utils.py +82 -0
- autogen/coding/jupyter/jupyter_client.py +231 -0
- autogen/coding/jupyter/jupyter_code_executor.py +160 -0
- autogen/coding/jupyter/local_jupyter_server.py +172 -0
- autogen/coding/local_commandline_code_executor.py +405 -0
- autogen/coding/markdown_code_extractor.py +45 -0
- autogen/coding/utils.py +56 -0
- autogen/doc_utils.py +34 -0
- autogen/events/__init__.py +7 -0
- autogen/events/agent_events.py +1010 -0
- autogen/events/base_event.py +99 -0
- autogen/events/client_events.py +167 -0
- autogen/events/helpers.py +36 -0
- autogen/events/print_event.py +46 -0
- autogen/exception_utils.py +73 -0
- autogen/extensions/__init__.py +5 -0
- autogen/fast_depends/__init__.py +16 -0
- autogen/fast_depends/_compat.py +80 -0
- autogen/fast_depends/core/__init__.py +14 -0
- autogen/fast_depends/core/build.py +225 -0
- autogen/fast_depends/core/model.py +576 -0
- autogen/fast_depends/dependencies/__init__.py +15 -0
- autogen/fast_depends/dependencies/model.py +29 -0
- autogen/fast_depends/dependencies/provider.py +39 -0
- autogen/fast_depends/library/__init__.py +10 -0
- autogen/fast_depends/library/model.py +46 -0
- autogen/fast_depends/py.typed +6 -0
- autogen/fast_depends/schema.py +66 -0
- autogen/fast_depends/use.py +280 -0
- autogen/fast_depends/utils.py +187 -0
- autogen/formatting_utils.py +83 -0
- autogen/function_utils.py +13 -0
- autogen/graph_utils.py +178 -0
- autogen/import_utils.py +526 -0
- autogen/interop/__init__.py +22 -0
- autogen/interop/crewai/__init__.py +7 -0
- autogen/interop/crewai/crewai.py +88 -0
- autogen/interop/interoperability.py +71 -0
- autogen/interop/interoperable.py +46 -0
- autogen/interop/langchain/__init__.py +8 -0
- autogen/interop/langchain/langchain_chat_model_factory.py +155 -0
- autogen/interop/langchain/langchain_tool.py +82 -0
- autogen/interop/litellm/__init__.py +7 -0
- autogen/interop/litellm/litellm_config_factory.py +113 -0
- autogen/interop/pydantic_ai/__init__.py +7 -0
- autogen/interop/pydantic_ai/pydantic_ai.py +168 -0
- autogen/interop/registry.py +69 -0
- autogen/io/__init__.py +15 -0
- autogen/io/base.py +151 -0
- autogen/io/console.py +56 -0
- autogen/io/processors/__init__.py +12 -0
- autogen/io/processors/base.py +21 -0
- autogen/io/processors/console_event_processor.py +56 -0
- autogen/io/run_response.py +293 -0
- autogen/io/thread_io_stream.py +63 -0
- autogen/io/websockets.py +213 -0
- autogen/json_utils.py +43 -0
- autogen/llm_config.py +379 -0
- autogen/logger/__init__.py +11 -0
- autogen/logger/base_logger.py +128 -0
- autogen/logger/file_logger.py +261 -0
- autogen/logger/logger_factory.py +42 -0
- autogen/logger/logger_utils.py +57 -0
- autogen/logger/sqlite_logger.py +523 -0
- autogen/math_utils.py +339 -0
- autogen/mcp/__init__.py +7 -0
- autogen/mcp/mcp_client.py +208 -0
- autogen/messages/__init__.py +7 -0
- autogen/messages/agent_messages.py +948 -0
- autogen/messages/base_message.py +107 -0
- autogen/messages/client_messages.py +171 -0
- autogen/messages/print_message.py +49 -0
- autogen/oai/__init__.py +53 -0
- autogen/oai/anthropic.py +714 -0
- autogen/oai/bedrock.py +628 -0
- autogen/oai/cerebras.py +299 -0
- autogen/oai/client.py +1435 -0
- autogen/oai/client_utils.py +169 -0
- autogen/oai/cohere.py +479 -0
- autogen/oai/gemini.py +990 -0
- autogen/oai/gemini_types.py +129 -0
- autogen/oai/groq.py +305 -0
- autogen/oai/mistral.py +303 -0
- autogen/oai/oai_models/__init__.py +11 -0
- autogen/oai/oai_models/_models.py +16 -0
- autogen/oai/oai_models/chat_completion.py +87 -0
- autogen/oai/oai_models/chat_completion_audio.py +32 -0
- autogen/oai/oai_models/chat_completion_message.py +86 -0
- autogen/oai/oai_models/chat_completion_message_tool_call.py +37 -0
- autogen/oai/oai_models/chat_completion_token_logprob.py +63 -0
- autogen/oai/oai_models/completion_usage.py +60 -0
- autogen/oai/ollama.py +643 -0
- autogen/oai/openai_utils.py +881 -0
- autogen/oai/together.py +370 -0
- autogen/retrieve_utils.py +491 -0
- autogen/runtime_logging.py +160 -0
- autogen/token_count_utils.py +267 -0
- autogen/tools/__init__.py +20 -0
- autogen/tools/contrib/__init__.py +9 -0
- autogen/tools/contrib/time/__init__.py +7 -0
- autogen/tools/contrib/time/time.py +41 -0
- autogen/tools/dependency_injection.py +254 -0
- autogen/tools/experimental/__init__.py +43 -0
- autogen/tools/experimental/browser_use/__init__.py +7 -0
- autogen/tools/experimental/browser_use/browser_use.py +161 -0
- autogen/tools/experimental/crawl4ai/__init__.py +7 -0
- autogen/tools/experimental/crawl4ai/crawl4ai.py +153 -0
- autogen/tools/experimental/deep_research/__init__.py +7 -0
- autogen/tools/experimental/deep_research/deep_research.py +328 -0
- autogen/tools/experimental/duckduckgo/__init__.py +7 -0
- autogen/tools/experimental/duckduckgo/duckduckgo_search.py +109 -0
- autogen/tools/experimental/google/__init__.py +14 -0
- autogen/tools/experimental/google/authentication/__init__.py +11 -0
- autogen/tools/experimental/google/authentication/credentials_hosted_provider.py +43 -0
- autogen/tools/experimental/google/authentication/credentials_local_provider.py +91 -0
- autogen/tools/experimental/google/authentication/credentials_provider.py +35 -0
- autogen/tools/experimental/google/drive/__init__.py +9 -0
- autogen/tools/experimental/google/drive/drive_functions.py +124 -0
- autogen/tools/experimental/google/drive/toolkit.py +88 -0
- autogen/tools/experimental/google/model.py +17 -0
- autogen/tools/experimental/google/toolkit_protocol.py +19 -0
- autogen/tools/experimental/google_search/__init__.py +8 -0
- autogen/tools/experimental/google_search/google_search.py +93 -0
- autogen/tools/experimental/google_search/youtube_search.py +181 -0
- autogen/tools/experimental/messageplatform/__init__.py +17 -0
- autogen/tools/experimental/messageplatform/discord/__init__.py +7 -0
- autogen/tools/experimental/messageplatform/discord/discord.py +288 -0
- autogen/tools/experimental/messageplatform/slack/__init__.py +7 -0
- autogen/tools/experimental/messageplatform/slack/slack.py +391 -0
- autogen/tools/experimental/messageplatform/telegram/__init__.py +7 -0
- autogen/tools/experimental/messageplatform/telegram/telegram.py +275 -0
- autogen/tools/experimental/perplexity/__init__.py +7 -0
- autogen/tools/experimental/perplexity/perplexity_search.py +260 -0
- autogen/tools/experimental/tavily/__init__.py +7 -0
- autogen/tools/experimental/tavily/tavily_search.py +183 -0
- autogen/tools/experimental/web_search_preview/__init__.py +7 -0
- autogen/tools/experimental/web_search_preview/web_search_preview.py +114 -0
- autogen/tools/experimental/wikipedia/__init__.py +7 -0
- autogen/tools/experimental/wikipedia/wikipedia.py +287 -0
- autogen/tools/function_utils.py +411 -0
- autogen/tools/tool.py +187 -0
- autogen/tools/toolkit.py +86 -0
- autogen/types.py +29 -0
- autogen/version.py +7 -0
- ag2-0.9.1.dist-info/RECORD +0 -6
- ag2-0.9.1.dist-info/top_level.txt +0 -1
- {ag2-0.9.1.dist-info → ag2-0.9.1.post0.dist-info/licenses}/LICENSE +0 -0
- {ag2-0.9.1.dist-info → ag2-0.9.1.post0.dist-info/licenses}/NOTICE.md +0 -0
|
@@ -0,0 +1,272 @@
|
|
|
1
|
+
# Copyright (c) 2023 - 2025, AG2ai, Inc., AG2ai open-source projects maintainers and core contributors
|
|
2
|
+
#
|
|
3
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
4
|
+
|
|
5
|
+
import logging
|
|
6
|
+
import os
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
from typing import TYPE_CHECKING, Any, Optional, Sequence, Union
|
|
9
|
+
|
|
10
|
+
from ....doc_utils import export_module
|
|
11
|
+
from ....import_utils import optional_import_block, require_optional_import
|
|
12
|
+
from ..vectordb.base import VectorDBFactory
|
|
13
|
+
|
|
14
|
+
with optional_import_block():
|
|
15
|
+
from chromadb import HttpClient
|
|
16
|
+
from chromadb.api.types import EmbeddingFunction
|
|
17
|
+
from chromadb.config import DEFAULT_DATABASE, DEFAULT_TENANT, Settings
|
|
18
|
+
from llama_index.core import SimpleDirectoryReader, StorageContext, VectorStoreIndex
|
|
19
|
+
from llama_index.core.llms import LLM
|
|
20
|
+
from llama_index.core.schema import Document as LlamaDocument
|
|
21
|
+
from llama_index.llms.openai import OpenAI
|
|
22
|
+
from llama_index.vector_stores.chroma import ChromaVectorStore
|
|
23
|
+
|
|
24
|
+
__all__ = ["ChromaDBQueryEngine"]
|
|
25
|
+
|
|
26
|
+
DEFAULT_COLLECTION_NAME = "docling-parsed-docs"
|
|
27
|
+
EMPTY_RESPONSE_TEXT = "Empty Response" # Indicates that the query did not return any results
|
|
28
|
+
EMPTY_RESPONSE_REPLY = "Sorry, I couldn't find any information on that. If you haven't ingested any documents, please try that." # Default response for queries without results
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
# Set up logging
|
|
32
|
+
logging.basicConfig(level=logging.INFO)
|
|
33
|
+
logging.getLogger("httpx").setLevel(logging.WARNING)
|
|
34
|
+
logger = logging.getLogger(__name__)
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
@require_optional_import(["chromadb", "llama_index"], "rag")
|
|
38
|
+
@export_module("autogen.agentchat.contrib.rag")
|
|
39
|
+
class ChromaDBQueryEngine:
|
|
40
|
+
"""
|
|
41
|
+
This engine leverages Chromadb to persist document embeddings in a named collection
|
|
42
|
+
and LlamaIndex's VectorStoreIndex to efficiently index and retrieve documents, and generate an answer in response
|
|
43
|
+
to natural language queries. Collection can be regarded as an abstraction of group of documents in the database.
|
|
44
|
+
|
|
45
|
+
It expects a Chromadb server to be running and accessible at the specified host and port.
|
|
46
|
+
Refer to this [link](https://docs.trychroma.com/production/containers/docker) for running Chromadb in a Docker container.
|
|
47
|
+
If the host and port are not provided, the engine will create an in-memory ChromaDB client.
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
"""
|
|
51
|
+
|
|
52
|
+
def __init__( # type: ignore[no-any-unimported]
|
|
53
|
+
self,
|
|
54
|
+
host: Optional[str] = "localhost",
|
|
55
|
+
port: Optional[int] = 8000,
|
|
56
|
+
settings: Optional["Settings"] = None,
|
|
57
|
+
tenant: Optional[str] = None,
|
|
58
|
+
database: Optional[str] = None,
|
|
59
|
+
embedding_function: "Optional[EmbeddingFunction[Any]]" = None,
|
|
60
|
+
metadata: Optional[dict[str, Any]] = None,
|
|
61
|
+
llm: Optional["LLM"] = None,
|
|
62
|
+
collection_name: Optional[str] = None,
|
|
63
|
+
) -> None:
|
|
64
|
+
"""
|
|
65
|
+
Initializes the ChromaDBQueryEngine with db_path, metadata, and embedding function and llm.
|
|
66
|
+
Args:
|
|
67
|
+
host: The host address of the ChromaDB server. Default is localhost.
|
|
68
|
+
port: The port number of the ChromaDB server. Default is 8000.
|
|
69
|
+
settings: A dictionary of settings to communicate with the chroma server. Default is None.
|
|
70
|
+
tenant: The tenant to use for this client. Defaults to the default tenant.
|
|
71
|
+
database: The database to use for this client. Defaults to the default database.
|
|
72
|
+
embedding_function: A callable that converts text into vector embeddings. Default embedding uses Sentence Transformers model all-MiniLM-L6-v2.
|
|
73
|
+
For more embeddings that ChromaDB support, please refer to [embeddings](https://docs.trychroma.com/docs/embeddings/embedding-functions)
|
|
74
|
+
metadata: A dictionary containing configuration parameters for the Chromadb collection.
|
|
75
|
+
This metadata is typically used to configure the HNSW indexing algorithm. Defaults to `{"hnsw:space": "ip", "hnsw:construction_ef": 30, "hnsw:M": 32}`
|
|
76
|
+
For more details about the default metadata, please refer to [HNSW configuration](https://cookbook.chromadb.dev/core/configuration/#hnsw-configuration)
|
|
77
|
+
llm: LLM model used by LlamaIndex for query processing.
|
|
78
|
+
You can find more supported LLMs at [LLM](https://docs.llamaindex.ai/en/stable/module_guides/models/llms/)
|
|
79
|
+
collection_name (str): The unique name for the Chromadb collection. If omitted, a constant name will be used. Populate this to reuse previous ingested data.
|
|
80
|
+
"""
|
|
81
|
+
self.llm: LLM = llm or OpenAI(model="gpt-4o", temperature=0.0) # type: ignore[no-any-unimported]
|
|
82
|
+
if not host or not port:
|
|
83
|
+
logger.warning(
|
|
84
|
+
"Can't connect to remote Chroma client without host or port not. Using an ephemeral, in-memory client."
|
|
85
|
+
)
|
|
86
|
+
self.client = None
|
|
87
|
+
else:
|
|
88
|
+
try:
|
|
89
|
+
self.client = HttpClient(
|
|
90
|
+
host=host,
|
|
91
|
+
port=port,
|
|
92
|
+
settings=settings,
|
|
93
|
+
tenant=tenant if tenant else DEFAULT_TENANT, # type: ignore[arg-type, no-any-unimported]
|
|
94
|
+
database=database if database else DEFAULT_DATABASE, # type: ignore[arg-type, no-any-unimported]
|
|
95
|
+
)
|
|
96
|
+
except Exception as e:
|
|
97
|
+
raise ValueError(f"Failed to connect to the ChromaDB client: {e}")
|
|
98
|
+
|
|
99
|
+
self.db_config = {"client": self.client, "embedding_function": embedding_function, "metadata": metadata}
|
|
100
|
+
self.collection_name = collection_name if collection_name else DEFAULT_COLLECTION_NAME
|
|
101
|
+
|
|
102
|
+
def init_db(
|
|
103
|
+
self,
|
|
104
|
+
new_doc_dir: Optional[Union[Path, str]] = None,
|
|
105
|
+
new_doc_paths_or_urls: Optional[Sequence[Union[Path, str]]] = None,
|
|
106
|
+
*args: Any,
|
|
107
|
+
**kwargs: Any,
|
|
108
|
+
) -> bool:
|
|
109
|
+
"""Initialize the database with the input documents or records.
|
|
110
|
+
It overwrites the existing collection in the database.
|
|
111
|
+
|
|
112
|
+
It takes the following steps,
|
|
113
|
+
1. Set up ChromaDB and LlamaIndex storage.
|
|
114
|
+
2. insert documents and build indexes upon them.
|
|
115
|
+
|
|
116
|
+
Args:
|
|
117
|
+
new_doc_dir: a dir of input documents that are used to create the records in database.
|
|
118
|
+
new_doc_paths_or_urls:
|
|
119
|
+
a sequence of input documents that are used to create the records in database.
|
|
120
|
+
a document can be a path to a file or a url.
|
|
121
|
+
*args: Any additional arguments
|
|
122
|
+
**kwargs: Any additional keyword arguments
|
|
123
|
+
|
|
124
|
+
Returns:
|
|
125
|
+
bool: True if initialization is successful
|
|
126
|
+
|
|
127
|
+
"""
|
|
128
|
+
|
|
129
|
+
self._set_up(overwrite=True)
|
|
130
|
+
documents = self._load_doc(input_dir=new_doc_dir, input_docs=new_doc_paths_or_urls)
|
|
131
|
+
self.index = VectorStoreIndex.from_documents(documents=documents, storage_context=self.storage_context)
|
|
132
|
+
return True
|
|
133
|
+
|
|
134
|
+
def connect_db(self, *args: Any, **kwargs: Any) -> bool:
|
|
135
|
+
"""Connect to the database.
|
|
136
|
+
It does not overwrite the existing collection in the database.
|
|
137
|
+
|
|
138
|
+
It takes the following steps,
|
|
139
|
+
1. Set up ChromaDB and LlamaIndex storage.
|
|
140
|
+
2. Create the llamaIndex vector store index for querying or inserting docs later
|
|
141
|
+
|
|
142
|
+
Args:
|
|
143
|
+
*args: Any additional arguments
|
|
144
|
+
**kwargs: Any additional keyword arguments
|
|
145
|
+
|
|
146
|
+
Returns:
|
|
147
|
+
bool: True if connection is successful
|
|
148
|
+
"""
|
|
149
|
+
self._set_up(overwrite=False)
|
|
150
|
+
self.index = VectorStoreIndex.from_vector_store(
|
|
151
|
+
vector_store=self.vector_store, storage_context=self.storage_context
|
|
152
|
+
)
|
|
153
|
+
|
|
154
|
+
return True
|
|
155
|
+
|
|
156
|
+
def add_docs(
|
|
157
|
+
self,
|
|
158
|
+
new_doc_dir: Optional[Union[Path, str]] = None,
|
|
159
|
+
new_doc_paths_or_urls: Optional[Sequence[Union[Path, str]]] = None,
|
|
160
|
+
*args: Any,
|
|
161
|
+
**kwargs: Any,
|
|
162
|
+
) -> None:
|
|
163
|
+
"""Add new documents to the underlying database and add to the index.
|
|
164
|
+
|
|
165
|
+
Args:
|
|
166
|
+
new_doc_dir: A dir of input documents that are used to create the records in database.
|
|
167
|
+
new_doc_paths_or_urls: A sequence of input documents that are used to create the records in database. A document can be a path to a file or a url.
|
|
168
|
+
*args: Any additional arguments
|
|
169
|
+
**kwargs: Any additional keyword arguments
|
|
170
|
+
"""
|
|
171
|
+
self._validate_query_index()
|
|
172
|
+
documents = self._load_doc(input_dir=new_doc_dir, input_docs=new_doc_paths_or_urls)
|
|
173
|
+
for doc in documents:
|
|
174
|
+
self.index.insert(doc)
|
|
175
|
+
|
|
176
|
+
def query(self, question: str) -> str:
|
|
177
|
+
"""
|
|
178
|
+
Retrieve information from indexed documents by processing a query using the engine's LLM.
|
|
179
|
+
|
|
180
|
+
Args:
|
|
181
|
+
question: A natural language query string used to search the indexed documents.
|
|
182
|
+
|
|
183
|
+
Returns:
|
|
184
|
+
A string containing the response generated by LLM.
|
|
185
|
+
"""
|
|
186
|
+
self._validate_query_index()
|
|
187
|
+
self.query_engine = self.index.as_query_engine(llm=self.llm)
|
|
188
|
+
response = self.query_engine.query(question)
|
|
189
|
+
|
|
190
|
+
if str(response) == EMPTY_RESPONSE_TEXT:
|
|
191
|
+
return EMPTY_RESPONSE_REPLY
|
|
192
|
+
|
|
193
|
+
return str(response)
|
|
194
|
+
|
|
195
|
+
def get_collection_name(self) -> str:
|
|
196
|
+
"""
|
|
197
|
+
Get the name of the collection used by the query engine.
|
|
198
|
+
|
|
199
|
+
Returns:
|
|
200
|
+
The name of the collection.
|
|
201
|
+
"""
|
|
202
|
+
if self.collection_name:
|
|
203
|
+
return self.collection_name
|
|
204
|
+
else:
|
|
205
|
+
raise ValueError("Collection name not set.")
|
|
206
|
+
|
|
207
|
+
def _validate_query_index(self) -> None:
|
|
208
|
+
"""Ensures an index exists"""
|
|
209
|
+
if not hasattr(self, "index"):
|
|
210
|
+
raise Exception("Query index is not initialized. Please call init_db or connect_db first.")
|
|
211
|
+
|
|
212
|
+
def _set_up(self, overwrite: bool) -> None:
|
|
213
|
+
"""
|
|
214
|
+
Set up ChromaDB and LlamaIndex storage by:
|
|
215
|
+
1. Initialize the ChromaDB using VectorDBFactory and create a collection with the given name.
|
|
216
|
+
2. Create the LlamaIndex vector store and storage context for the collection.
|
|
217
|
+
Args:
|
|
218
|
+
overwrite: If True, overwrite the existing collection with the same name.
|
|
219
|
+
"""
|
|
220
|
+
self.vector_db = VectorDBFactory().create_vector_db(db_type="chroma", **self.db_config)
|
|
221
|
+
self.collection = self.vector_db.create_collection(collection_name=self.collection_name, overwrite=overwrite)
|
|
222
|
+
self.vector_store = ChromaVectorStore(chroma_collection=self.collection)
|
|
223
|
+
self.storage_context = StorageContext.from_defaults(vector_store=self.vector_store)
|
|
224
|
+
|
|
225
|
+
def _load_doc( # type: ignore[no-any-unimported]
|
|
226
|
+
self, input_dir: Optional[Union[Path, str]], input_docs: Optional[Sequence[Union[Path, str]]]
|
|
227
|
+
) -> Sequence["LlamaDocument"]:
|
|
228
|
+
"""
|
|
229
|
+
Load documents from a directory and/or a sequence of file paths.
|
|
230
|
+
|
|
231
|
+
It uses LlamaIndex's SimpleDirectoryReader that supports multiple file[formats]((https://docs.llamaindex.ai/en/stable/module_guides/loading/simpledirectoryreader/#supported-file-types)).
|
|
232
|
+
|
|
233
|
+
Args:
|
|
234
|
+
input_dir (Optional[Union[Path, str]]): The directory containing documents to be loaded.
|
|
235
|
+
If provided, all files in the directory will be considered.
|
|
236
|
+
input_docs (Optional[Sequence[Union[Path, str]]]): A sequence of individual file paths to load.
|
|
237
|
+
Each path must point to an existing file.
|
|
238
|
+
|
|
239
|
+
Returns:
|
|
240
|
+
A sequence of documents loaded as LlamaDocument objects.
|
|
241
|
+
|
|
242
|
+
Raises:
|
|
243
|
+
ValueError: If the specified directory does not exist.
|
|
244
|
+
ValueError: If any provided file path does not exist.
|
|
245
|
+
ValueError: If neither input_dir nor input_docs is provided.
|
|
246
|
+
"""
|
|
247
|
+
loaded_documents = []
|
|
248
|
+
if input_dir:
|
|
249
|
+
logger.info(f"Loading docs from directory: {input_dir}")
|
|
250
|
+
if not os.path.exists(input_dir):
|
|
251
|
+
raise ValueError(f"Input directory not found: {input_dir}")
|
|
252
|
+
loaded_documents.extend(SimpleDirectoryReader(input_dir=input_dir).load_data())
|
|
253
|
+
|
|
254
|
+
if input_docs:
|
|
255
|
+
for doc in input_docs:
|
|
256
|
+
logger.info(f"Loading input doc: {doc}")
|
|
257
|
+
if not os.path.exists(doc):
|
|
258
|
+
raise ValueError(f"Document file not found: {doc}")
|
|
259
|
+
loaded_documents.extend(SimpleDirectoryReader(input_files=input_docs).load_data()) # type: ignore[arg-type]
|
|
260
|
+
|
|
261
|
+
if not input_dir and not input_docs:
|
|
262
|
+
raise ValueError("No input directory or docs provided!")
|
|
263
|
+
|
|
264
|
+
return loaded_documents
|
|
265
|
+
|
|
266
|
+
|
|
267
|
+
# mypy will fail if ChromaDBQueryEngine does not implement RAGQueryEngine protocol
|
|
268
|
+
if TYPE_CHECKING:
|
|
269
|
+
from .query_engine import RAGQueryEngine
|
|
270
|
+
|
|
271
|
+
def _check_implement_protocol(o: ChromaDBQueryEngine) -> RAGQueryEngine:
|
|
272
|
+
return o
|
|
@@ -0,0 +1,198 @@
|
|
|
1
|
+
# Copyright (c) 2023 - 2025, AG2ai, Inc., AG2ai open-source projects maintainers and core contributors
|
|
2
|
+
#
|
|
3
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
4
|
+
|
|
5
|
+
import logging
|
|
6
|
+
import os
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
from typing import TYPE_CHECKING, Any, Optional, Sequence, Union
|
|
9
|
+
|
|
10
|
+
from ....doc_utils import export_module
|
|
11
|
+
from ....import_utils import optional_import_block, require_optional_import
|
|
12
|
+
|
|
13
|
+
with optional_import_block():
|
|
14
|
+
from llama_index.core import SimpleDirectoryReader, StorageContext, VectorStoreIndex
|
|
15
|
+
from llama_index.core.llms import LLM
|
|
16
|
+
from llama_index.core.schema import Document as LlamaDocument
|
|
17
|
+
from llama_index.core.vector_stores.types import BasePydanticVectorStore
|
|
18
|
+
from llama_index.llms.openai import OpenAI
|
|
19
|
+
|
|
20
|
+
__all__ = ["LlamaIndexQueryEngine"]
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
EMPTY_RESPONSE_TEXT = "Empty Response" # Indicates that the query did not return any results
|
|
24
|
+
EMPTY_RESPONSE_REPLY = "Sorry, I couldn't find any information on that. If you haven't ingested any documents, please try that." # Default response for queries without results
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
# Set up logging
|
|
28
|
+
logging.basicConfig(level=logging.INFO)
|
|
29
|
+
logging.getLogger("httpx").setLevel(logging.WARNING)
|
|
30
|
+
logger = logging.getLogger(__name__)
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
@require_optional_import("llama_index", "rag")
|
|
34
|
+
@export_module("autogen.agentchat.contrib.rag")
|
|
35
|
+
class LlamaIndexQueryEngine:
|
|
36
|
+
"""
|
|
37
|
+
This engine leverages LlamaIndex's VectorStoreIndex to efficiently index and retrieve documents, and generate an answer in response
|
|
38
|
+
to natural language queries. It use any LlamaIndex [vector store](https://docs.llamaindex.ai/en/stable/module_guides/storing/vector_stores/).
|
|
39
|
+
|
|
40
|
+
By default the engine will use OpenAI's GPT-4o model (use the `llm` parameter to change that).
|
|
41
|
+
"""
|
|
42
|
+
|
|
43
|
+
def __init__( # type: ignore[no-any-unimported]
|
|
44
|
+
self,
|
|
45
|
+
vector_store: "BasePydanticVectorStore",
|
|
46
|
+
llm: Optional["LLM"] = None,
|
|
47
|
+
file_reader_class: Optional[type["SimpleDirectoryReader"]] = None,
|
|
48
|
+
) -> None:
|
|
49
|
+
"""
|
|
50
|
+
Initializes the LlamaIndexQueryEngine with the given vector store.
|
|
51
|
+
Args:
|
|
52
|
+
vector_store: The vector store to use for indexing and querying documents.
|
|
53
|
+
llm: LLM model used by LlamaIndex for query processing. You can find more supported LLMs at [LLM](https://docs.llamaindex.ai/en/stable/module_guides/models/llms/).
|
|
54
|
+
file_reader_class: The file reader class to use for loading documents. Only SimpleDirectoryReader is currently supported.
|
|
55
|
+
"""
|
|
56
|
+
self.llm: LLM = llm or OpenAI(model="gpt-4o", temperature=0.0) # type: ignore[no-any-unimported]
|
|
57
|
+
self.vector_store = vector_store
|
|
58
|
+
self.file_reader_class = file_reader_class if file_reader_class else SimpleDirectoryReader
|
|
59
|
+
|
|
60
|
+
def init_db(
|
|
61
|
+
self,
|
|
62
|
+
new_doc_dir: Optional[Union[Path, str]] = None,
|
|
63
|
+
new_doc_paths_or_urls: Optional[Sequence[Union[Path, str]]] = None,
|
|
64
|
+
*args: Any,
|
|
65
|
+
**kwargs: Any,
|
|
66
|
+
) -> bool:
|
|
67
|
+
"""Initialize the database with the input documents or records.
|
|
68
|
+
|
|
69
|
+
It takes the following steps:
|
|
70
|
+
1. Set up LlamaIndex storage context.
|
|
71
|
+
2. insert documents and build an index upon them.
|
|
72
|
+
|
|
73
|
+
Args:
|
|
74
|
+
new_doc_dir: a dir of input documents that are used to create the records in database.
|
|
75
|
+
new_doc_paths_or_urls: A sequence of input documents that are used to create the records in database. A document can be a Path to a file or a url.
|
|
76
|
+
*args: Any additional arguments
|
|
77
|
+
**kwargs: Any additional keyword arguments
|
|
78
|
+
|
|
79
|
+
Returns:
|
|
80
|
+
bool: True if initialization is successful
|
|
81
|
+
|
|
82
|
+
"""
|
|
83
|
+
|
|
84
|
+
self.storage_context = StorageContext.from_defaults(vector_store=self.vector_store)
|
|
85
|
+
documents = self._load_doc(input_dir=new_doc_dir, input_docs=new_doc_paths_or_urls)
|
|
86
|
+
self.index = VectorStoreIndex.from_documents(documents=documents, storage_context=self.storage_context)
|
|
87
|
+
return True
|
|
88
|
+
|
|
89
|
+
def connect_db(self, *args: Any, **kwargs: Any) -> bool:
|
|
90
|
+
"""Connect to the database.
|
|
91
|
+
It sets up the LlamaIndex storage and create an index from the existing vector store.
|
|
92
|
+
|
|
93
|
+
Args:
|
|
94
|
+
*args: Any additional arguments
|
|
95
|
+
**kwargs: Any additional keyword arguments
|
|
96
|
+
|
|
97
|
+
Returns:
|
|
98
|
+
bool: True if connection is successful
|
|
99
|
+
"""
|
|
100
|
+
self.storage_context = StorageContext.from_defaults(vector_store=self.vector_store)
|
|
101
|
+
self.index = VectorStoreIndex.from_vector_store(
|
|
102
|
+
vector_store=self.vector_store, storage_context=self.storage_context
|
|
103
|
+
)
|
|
104
|
+
|
|
105
|
+
return True
|
|
106
|
+
|
|
107
|
+
def add_docs(
|
|
108
|
+
self,
|
|
109
|
+
new_doc_dir: Optional[Union[Path, str]] = None,
|
|
110
|
+
new_doc_paths_or_urls: Optional[Sequence[Union[Path, str]]] = None,
|
|
111
|
+
*args: Any,
|
|
112
|
+
**kwargs: Any,
|
|
113
|
+
) -> None:
|
|
114
|
+
"""Add new documents to the underlying database and add to the index.
|
|
115
|
+
|
|
116
|
+
Args:
|
|
117
|
+
new_doc_dir: A dir of input documents that are used to create the records in database.
|
|
118
|
+
new_doc_paths_or_urls: A sequence of input documents that are used to create the records in database. A document can be a Path to a file or a url.
|
|
119
|
+
*args: Any additional arguments
|
|
120
|
+
**kwargs: Any additional keyword arguments
|
|
121
|
+
"""
|
|
122
|
+
self._validate_query_index()
|
|
123
|
+
documents = self._load_doc(input_dir=new_doc_dir, input_docs=new_doc_paths_or_urls)
|
|
124
|
+
for doc in documents:
|
|
125
|
+
self.index.insert(doc)
|
|
126
|
+
|
|
127
|
+
def query(self, question: str) -> str:
|
|
128
|
+
"""
|
|
129
|
+
Retrieve information from indexed documents by processing a query using the engine's LLM.
|
|
130
|
+
|
|
131
|
+
Args:
|
|
132
|
+
question: A natural language query string used to search the indexed documents.
|
|
133
|
+
|
|
134
|
+
Returns:
|
|
135
|
+
A string containing the response generated by LLM.
|
|
136
|
+
"""
|
|
137
|
+
self._validate_query_index()
|
|
138
|
+
self.query_engine = self.index.as_query_engine(llm=self.llm)
|
|
139
|
+
response = self.query_engine.query(question)
|
|
140
|
+
|
|
141
|
+
if str(response) == EMPTY_RESPONSE_TEXT:
|
|
142
|
+
return EMPTY_RESPONSE_REPLY
|
|
143
|
+
|
|
144
|
+
return str(response)
|
|
145
|
+
|
|
146
|
+
def _validate_query_index(self) -> None:
|
|
147
|
+
"""Ensures an index exists"""
|
|
148
|
+
if not hasattr(self, "index"):
|
|
149
|
+
raise Exception("Query index is not initialized. Please call init_db or connect_db first.")
|
|
150
|
+
|
|
151
|
+
def _load_doc( # type: ignore[no-any-unimported]
|
|
152
|
+
self, input_dir: Optional[Union[Path, str]], input_docs: Optional[Sequence[Union[Path, str]]]
|
|
153
|
+
) -> Sequence["LlamaDocument"]:
|
|
154
|
+
"""
|
|
155
|
+
Load documents from a directory and/or a sequence of file paths.
|
|
156
|
+
|
|
157
|
+
Default to uses LlamaIndex's SimpleDirectoryReader that supports multiple file[formats](https://docs.llamaindex.ai/en/stable/module_guides/loading/simpledirectoryreader/#supported-file-types).
|
|
158
|
+
|
|
159
|
+
Args:
|
|
160
|
+
input_dir (Optional[Union[Path, str]]): The directory containing documents to be loaded.
|
|
161
|
+
If provided, all files in the directory will be considered.
|
|
162
|
+
input_docs (Optional[Sequence[Union[Path, str]]]): A sequence of individual file paths to load.
|
|
163
|
+
Each path must point to an existing file.
|
|
164
|
+
|
|
165
|
+
Returns:
|
|
166
|
+
A sequence of documents loaded as LlamaDocument objects.
|
|
167
|
+
|
|
168
|
+
Raises:
|
|
169
|
+
ValueError: If the specified directory does not exist.
|
|
170
|
+
ValueError: If any provided file path does not exist.
|
|
171
|
+
ValueError: If neither input_dir nor input_docs is provided.
|
|
172
|
+
"""
|
|
173
|
+
loaded_documents: list["LlamaDocument"] = [] # type: ignore[no-any-unimported]
|
|
174
|
+
if input_dir:
|
|
175
|
+
logger.info(f"Loading docs from directory: {input_dir}")
|
|
176
|
+
if not os.path.exists(input_dir):
|
|
177
|
+
raise ValueError(f"Input directory not found: {input_dir}")
|
|
178
|
+
loaded_documents.extend(self.file_reader_class(input_dir=input_dir).load_data()) # type: ignore[operator]
|
|
179
|
+
|
|
180
|
+
if input_docs:
|
|
181
|
+
for doc in input_docs:
|
|
182
|
+
logger.info(f"Loading input doc: {doc}")
|
|
183
|
+
if not os.path.exists(doc):
|
|
184
|
+
raise ValueError(f"Document file not found: {doc}")
|
|
185
|
+
loaded_documents.extend(self.file_reader_class(input_files=input_docs).load_data()) # type: ignore[operator, arg-type]
|
|
186
|
+
|
|
187
|
+
if not input_dir and not input_docs:
|
|
188
|
+
raise ValueError("No input directory or docs provided!")
|
|
189
|
+
|
|
190
|
+
return loaded_documents
|
|
191
|
+
|
|
192
|
+
|
|
193
|
+
# mypy will fail if LlamaIndexQueryEngine does not implement RAGQueryEngine protocol
|
|
194
|
+
if TYPE_CHECKING:
|
|
195
|
+
from .query_engine import RAGQueryEngine
|
|
196
|
+
|
|
197
|
+
def _check_implement_protocol(o: LlamaIndexQueryEngine) -> RAGQueryEngine:
|
|
198
|
+
return o
|