ag2 0.9.6__py3-none-any.whl → 0.9.8.post1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ag2 might be problematic. Click here for more details.
- {ag2-0.9.6.dist-info → ag2-0.9.8.post1.dist-info}/METADATA +102 -75
- ag2-0.9.8.post1.dist-info/RECORD +387 -0
- autogen/__init__.py +1 -2
- autogen/_website/generate_api_references.py +4 -5
- autogen/_website/generate_mkdocs.py +9 -15
- autogen/_website/notebook_processor.py +13 -14
- autogen/_website/process_notebooks.py +10 -10
- autogen/_website/utils.py +5 -4
- autogen/agentchat/agent.py +13 -13
- autogen/agentchat/assistant_agent.py +7 -6
- autogen/agentchat/contrib/agent_eval/agent_eval.py +3 -3
- autogen/agentchat/contrib/agent_eval/critic_agent.py +3 -3
- autogen/agentchat/contrib/agent_eval/quantifier_agent.py +3 -3
- autogen/agentchat/contrib/agent_eval/subcritic_agent.py +3 -3
- autogen/agentchat/contrib/agent_optimizer.py +3 -3
- autogen/agentchat/contrib/capabilities/generate_images.py +11 -11
- autogen/agentchat/contrib/capabilities/teachability.py +15 -15
- autogen/agentchat/contrib/capabilities/transforms.py +17 -18
- autogen/agentchat/contrib/capabilities/transforms_util.py +5 -5
- autogen/agentchat/contrib/capabilities/vision_capability.py +4 -3
- autogen/agentchat/contrib/captainagent/agent_builder.py +30 -30
- autogen/agentchat/contrib/captainagent/captainagent.py +22 -21
- autogen/agentchat/contrib/captainagent/tool_retriever.py +2 -3
- autogen/agentchat/contrib/gpt_assistant_agent.py +9 -9
- autogen/agentchat/contrib/graph_rag/document.py +3 -3
- autogen/agentchat/contrib/graph_rag/falkor_graph_query_engine.py +3 -3
- autogen/agentchat/contrib/graph_rag/falkor_graph_rag_capability.py +6 -6
- autogen/agentchat/contrib/graph_rag/graph_query_engine.py +3 -3
- autogen/agentchat/contrib/graph_rag/neo4j_graph_query_engine.py +5 -11
- autogen/agentchat/contrib/graph_rag/neo4j_graph_rag_capability.py +6 -6
- autogen/agentchat/contrib/graph_rag/neo4j_native_graph_query_engine.py +7 -7
- autogen/agentchat/contrib/graph_rag/neo4j_native_graph_rag_capability.py +6 -6
- autogen/agentchat/contrib/img_utils.py +1 -1
- autogen/agentchat/contrib/llamaindex_conversable_agent.py +11 -11
- autogen/agentchat/contrib/llava_agent.py +18 -4
- autogen/agentchat/contrib/math_user_proxy_agent.py +11 -11
- autogen/agentchat/contrib/multimodal_conversable_agent.py +8 -8
- autogen/agentchat/contrib/qdrant_retrieve_user_proxy_agent.py +6 -5
- autogen/agentchat/contrib/rag/chromadb_query_engine.py +22 -26
- autogen/agentchat/contrib/rag/llamaindex_query_engine.py +14 -17
- autogen/agentchat/contrib/rag/mongodb_query_engine.py +27 -37
- autogen/agentchat/contrib/rag/query_engine.py +7 -5
- autogen/agentchat/contrib/retrieve_assistant_agent.py +5 -5
- autogen/agentchat/contrib/retrieve_user_proxy_agent.py +8 -7
- autogen/agentchat/contrib/society_of_mind_agent.py +15 -14
- autogen/agentchat/contrib/swarm_agent.py +76 -98
- autogen/agentchat/contrib/text_analyzer_agent.py +7 -7
- autogen/agentchat/contrib/vectordb/base.py +10 -18
- autogen/agentchat/contrib/vectordb/chromadb.py +2 -1
- autogen/agentchat/contrib/vectordb/couchbase.py +18 -20
- autogen/agentchat/contrib/vectordb/mongodb.py +6 -5
- autogen/agentchat/contrib/vectordb/pgvectordb.py +40 -41
- autogen/agentchat/contrib/vectordb/qdrant.py +5 -5
- autogen/agentchat/contrib/web_surfer.py +20 -19
- autogen/agentchat/conversable_agent.py +311 -295
- autogen/agentchat/group/context_str.py +1 -3
- autogen/agentchat/group/context_variables.py +15 -25
- autogen/agentchat/group/group_tool_executor.py +10 -10
- autogen/agentchat/group/group_utils.py +15 -15
- autogen/agentchat/group/guardrails.py +7 -7
- autogen/agentchat/group/handoffs.py +19 -36
- autogen/agentchat/group/multi_agent_chat.py +7 -7
- autogen/agentchat/group/on_condition.py +4 -7
- autogen/agentchat/group/on_context_condition.py +4 -7
- autogen/agentchat/group/patterns/auto.py +8 -7
- autogen/agentchat/group/patterns/manual.py +7 -6
- autogen/agentchat/group/patterns/pattern.py +13 -12
- autogen/agentchat/group/patterns/random.py +3 -3
- autogen/agentchat/group/patterns/round_robin.py +3 -3
- autogen/agentchat/group/reply_result.py +2 -4
- autogen/agentchat/group/speaker_selection_result.py +5 -5
- autogen/agentchat/group/targets/group_chat_target.py +7 -6
- autogen/agentchat/group/targets/group_manager_target.py +4 -4
- autogen/agentchat/group/targets/transition_target.py +2 -1
- autogen/agentchat/groupchat.py +58 -61
- autogen/agentchat/realtime/experimental/audio_adapters/twilio_audio_adapter.py +4 -4
- autogen/agentchat/realtime/experimental/audio_adapters/websocket_audio_adapter.py +4 -4
- autogen/agentchat/realtime/experimental/clients/gemini/client.py +7 -7
- autogen/agentchat/realtime/experimental/clients/oai/base_client.py +8 -8
- autogen/agentchat/realtime/experimental/clients/oai/rtc_client.py +6 -6
- autogen/agentchat/realtime/experimental/clients/realtime_client.py +10 -9
- autogen/agentchat/realtime/experimental/realtime_agent.py +10 -9
- autogen/agentchat/realtime/experimental/realtime_observer.py +3 -3
- autogen/agentchat/realtime/experimental/realtime_swarm.py +44 -44
- autogen/agentchat/user_proxy_agent.py +10 -9
- autogen/agentchat/utils.py +3 -3
- autogen/agents/contrib/time/time_reply_agent.py +6 -5
- autogen/agents/contrib/time/time_tool_agent.py +2 -1
- autogen/agents/experimental/deep_research/deep_research.py +3 -3
- autogen/agents/experimental/discord/discord.py +2 -2
- autogen/agents/experimental/document_agent/chroma_query_engine.py +29 -44
- autogen/agents/experimental/document_agent/docling_doc_ingest_agent.py +9 -14
- autogen/agents/experimental/document_agent/document_agent.py +15 -16
- autogen/agents/experimental/document_agent/document_conditions.py +3 -3
- autogen/agents/experimental/document_agent/document_utils.py +5 -9
- autogen/agents/experimental/document_agent/inmemory_query_engine.py +14 -20
- autogen/agents/experimental/document_agent/parser_utils.py +4 -4
- autogen/agents/experimental/document_agent/url_utils.py +14 -23
- autogen/agents/experimental/reasoning/reasoning_agent.py +33 -33
- autogen/agents/experimental/slack/slack.py +2 -2
- autogen/agents/experimental/telegram/telegram.py +2 -3
- autogen/agents/experimental/websurfer/websurfer.py +4 -4
- autogen/agents/experimental/wikipedia/wikipedia.py +5 -7
- autogen/browser_utils.py +8 -8
- autogen/cache/abstract_cache_base.py +5 -5
- autogen/cache/cache.py +12 -12
- autogen/cache/cache_factory.py +4 -4
- autogen/cache/cosmos_db_cache.py +9 -9
- autogen/cache/disk_cache.py +6 -6
- autogen/cache/in_memory_cache.py +4 -4
- autogen/cache/redis_cache.py +4 -4
- autogen/code_utils.py +18 -18
- autogen/coding/base.py +6 -6
- autogen/coding/docker_commandline_code_executor.py +9 -9
- autogen/coding/func_with_reqs.py +7 -6
- autogen/coding/jupyter/base.py +3 -3
- autogen/coding/jupyter/docker_jupyter_server.py +3 -4
- autogen/coding/jupyter/import_utils.py +3 -3
- autogen/coding/jupyter/jupyter_client.py +5 -5
- autogen/coding/jupyter/jupyter_code_executor.py +3 -4
- autogen/coding/jupyter/local_jupyter_server.py +2 -6
- autogen/coding/local_commandline_code_executor.py +8 -7
- autogen/coding/markdown_code_extractor.py +1 -2
- autogen/coding/utils.py +1 -2
- autogen/doc_utils.py +3 -2
- autogen/environments/docker_python_environment.py +19 -29
- autogen/environments/python_environment.py +8 -17
- autogen/environments/system_python_environment.py +3 -4
- autogen/environments/venv_python_environment.py +8 -12
- autogen/environments/working_directory.py +1 -2
- autogen/events/agent_events.py +106 -109
- autogen/events/base_event.py +6 -5
- autogen/events/client_events.py +15 -14
- autogen/events/helpers.py +1 -1
- autogen/events/print_event.py +4 -5
- autogen/fast_depends/_compat.py +10 -15
- autogen/fast_depends/core/build.py +17 -36
- autogen/fast_depends/core/model.py +64 -113
- autogen/fast_depends/dependencies/model.py +2 -1
- autogen/fast_depends/dependencies/provider.py +3 -2
- autogen/fast_depends/library/model.py +4 -4
- autogen/fast_depends/schema.py +7 -7
- autogen/fast_depends/use.py +17 -25
- autogen/fast_depends/utils.py +10 -30
- autogen/formatting_utils.py +6 -6
- autogen/graph_utils.py +1 -4
- autogen/import_utils.py +13 -13
- autogen/interop/crewai/crewai.py +2 -2
- autogen/interop/interoperable.py +2 -2
- autogen/interop/langchain/langchain_chat_model_factory.py +3 -2
- autogen/interop/langchain/langchain_tool.py +2 -6
- autogen/interop/litellm/litellm_config_factory.py +6 -7
- autogen/interop/pydantic_ai/pydantic_ai.py +4 -7
- autogen/interop/registry.py +2 -1
- autogen/io/base.py +5 -5
- autogen/io/run_response.py +33 -32
- autogen/io/websockets.py +6 -5
- autogen/json_utils.py +1 -2
- autogen/llm_config/__init__.py +11 -0
- autogen/llm_config/client.py +58 -0
- autogen/llm_config/config.py +384 -0
- autogen/llm_config/entry.py +154 -0
- autogen/logger/base_logger.py +4 -3
- autogen/logger/file_logger.py +2 -1
- autogen/logger/logger_factory.py +2 -2
- autogen/logger/logger_utils.py +2 -2
- autogen/logger/sqlite_logger.py +3 -2
- autogen/math_utils.py +4 -5
- autogen/mcp/__main__.py +6 -6
- autogen/mcp/helpers.py +4 -4
- autogen/mcp/mcp_client.py +170 -29
- autogen/mcp/mcp_proxy/fastapi_code_generator_helpers.py +3 -4
- autogen/mcp/mcp_proxy/mcp_proxy.py +23 -26
- autogen/mcp/mcp_proxy/operation_grouping.py +4 -5
- autogen/mcp/mcp_proxy/operation_renaming.py +6 -10
- autogen/mcp/mcp_proxy/security.py +2 -3
- autogen/messages/agent_messages.py +96 -98
- autogen/messages/base_message.py +6 -5
- autogen/messages/client_messages.py +15 -14
- autogen/messages/print_message.py +4 -5
- autogen/oai/__init__.py +1 -2
- autogen/oai/anthropic.py +42 -41
- autogen/oai/bedrock.py +68 -57
- autogen/oai/cerebras.py +26 -25
- autogen/oai/client.py +118 -138
- autogen/oai/client_utils.py +3 -3
- autogen/oai/cohere.py +34 -11
- autogen/oai/gemini.py +40 -17
- autogen/oai/gemini_types.py +11 -12
- autogen/oai/groq.py +22 -10
- autogen/oai/mistral.py +17 -11
- autogen/oai/oai_models/__init__.py +14 -2
- autogen/oai/oai_models/_models.py +2 -2
- autogen/oai/oai_models/chat_completion.py +13 -14
- autogen/oai/oai_models/chat_completion_message.py +11 -9
- autogen/oai/oai_models/chat_completion_message_tool_call.py +26 -3
- autogen/oai/oai_models/chat_completion_token_logprob.py +3 -4
- autogen/oai/oai_models/completion_usage.py +8 -9
- autogen/oai/ollama.py +22 -10
- autogen/oai/openai_responses.py +40 -17
- autogen/oai/openai_utils.py +159 -85
- autogen/oai/together.py +29 -14
- autogen/retrieve_utils.py +6 -7
- autogen/runtime_logging.py +5 -4
- autogen/token_count_utils.py +7 -4
- autogen/tools/contrib/time/time.py +0 -1
- autogen/tools/dependency_injection.py +5 -6
- autogen/tools/experimental/browser_use/browser_use.py +10 -10
- autogen/tools/experimental/code_execution/python_code_execution.py +5 -7
- autogen/tools/experimental/crawl4ai/crawl4ai.py +12 -15
- autogen/tools/experimental/deep_research/deep_research.py +9 -8
- autogen/tools/experimental/duckduckgo/duckduckgo_search.py +5 -11
- autogen/tools/experimental/firecrawl/firecrawl_tool.py +98 -115
- autogen/tools/experimental/google/authentication/credentials_local_provider.py +1 -1
- autogen/tools/experimental/google/drive/drive_functions.py +4 -4
- autogen/tools/experimental/google/drive/toolkit.py +5 -5
- autogen/tools/experimental/google_search/google_search.py +5 -5
- autogen/tools/experimental/google_search/youtube_search.py +5 -5
- autogen/tools/experimental/messageplatform/discord/discord.py +8 -12
- autogen/tools/experimental/messageplatform/slack/slack.py +14 -20
- autogen/tools/experimental/messageplatform/telegram/telegram.py +8 -12
- autogen/tools/experimental/perplexity/perplexity_search.py +18 -29
- autogen/tools/experimental/reliable/reliable.py +68 -74
- autogen/tools/experimental/searxng/searxng_search.py +20 -19
- autogen/tools/experimental/tavily/tavily_search.py +12 -19
- autogen/tools/experimental/web_search_preview/web_search_preview.py +13 -7
- autogen/tools/experimental/wikipedia/wikipedia.py +7 -10
- autogen/tools/function_utils.py +7 -7
- autogen/tools/tool.py +6 -5
- autogen/types.py +2 -2
- autogen/version.py +1 -1
- ag2-0.9.6.dist-info/RECORD +0 -421
- autogen/llm_config.py +0 -385
- {ag2-0.9.6.dist-info → ag2-0.9.8.post1.dist-info}/WHEEL +0 -0
- {ag2-0.9.6.dist-info → ag2-0.9.8.post1.dist-info}/licenses/LICENSE +0 -0
- {ag2-0.9.6.dist-info → ag2-0.9.8.post1.dist-info}/licenses/NOTICE.md +0 -0
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
#
|
|
3
3
|
# SPDX-License-Identifier: Apache-2.0
|
|
4
4
|
|
|
5
|
-
from typing import Any
|
|
5
|
+
from typing import Any
|
|
6
6
|
|
|
7
7
|
from .... import ConversableAgent
|
|
8
8
|
from ....doc_utils import export_module
|
|
@@ -21,8 +21,8 @@ class DeepResearchAgent(ConversableAgent):
|
|
|
21
21
|
def __init__(
|
|
22
22
|
self,
|
|
23
23
|
name: str,
|
|
24
|
-
llm_config:
|
|
25
|
-
system_message:
|
|
24
|
+
llm_config: LLMConfig | dict[str, Any] | None = None,
|
|
25
|
+
system_message: str | list[str] | None = DEFAULT_PROMPT,
|
|
26
26
|
max_web_steps: int = 30,
|
|
27
27
|
**kwargs: Any,
|
|
28
28
|
) -> None:
|
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
#
|
|
3
3
|
# SPDX-License-Identifier: Apache-2.0
|
|
4
4
|
|
|
5
|
-
from typing import Any
|
|
5
|
+
from typing import Any
|
|
6
6
|
|
|
7
7
|
from .... import ConversableAgent
|
|
8
8
|
from ....doc_utils import export_module
|
|
@@ -24,7 +24,7 @@ class DiscordAgent(ConversableAgent):
|
|
|
24
24
|
def __init__(
|
|
25
25
|
self,
|
|
26
26
|
name: str,
|
|
27
|
-
system_message:
|
|
27
|
+
system_message: str | None = None,
|
|
28
28
|
*,
|
|
29
29
|
bot_token: str,
|
|
30
30
|
channel_name: str,
|
|
@@ -4,8 +4,9 @@
|
|
|
4
4
|
|
|
5
5
|
import logging
|
|
6
6
|
import os
|
|
7
|
+
from collections.abc import Sequence
|
|
7
8
|
from pathlib import Path
|
|
8
|
-
from typing import TYPE_CHECKING, Any, Optional
|
|
9
|
+
from typing import TYPE_CHECKING, Any, Optional
|
|
9
10
|
|
|
10
11
|
from pydantic import BaseModel
|
|
11
12
|
|
|
@@ -40,8 +41,7 @@ logger = logging.getLogger(__name__)
|
|
|
40
41
|
@require_optional_import(["chromadb", "llama_index"], "rag")
|
|
41
42
|
@export_module("autogen.agents.experimental")
|
|
42
43
|
class VectorChromaQueryEngine:
|
|
43
|
-
"""
|
|
44
|
-
This engine leverages Chromadb to persist document embeddings in a named collection
|
|
44
|
+
"""This engine leverages Chromadb to persist document embeddings in a named collection
|
|
45
45
|
and LlamaIndex's VectorStoreIndex to efficiently index and retrieve documents, and generate an answer in response
|
|
46
46
|
to natural language queries. The Chromadb collection serves as the storage layer, while
|
|
47
47
|
the collection name uniquely identifies the set of documents within the persistent database.
|
|
@@ -51,14 +51,14 @@ class VectorChromaQueryEngine:
|
|
|
51
51
|
|
|
52
52
|
def __init__( # type: ignore[no-any-unimported]
|
|
53
53
|
self,
|
|
54
|
-
db_path:
|
|
55
|
-
embedding_function: "
|
|
56
|
-
metadata:
|
|
54
|
+
db_path: str | None = None,
|
|
55
|
+
embedding_function: "EmbeddingFunction[Any] | None" = None,
|
|
56
|
+
metadata: dict[str, Any] | None = None,
|
|
57
57
|
llm: Optional["LLM"] = None,
|
|
58
|
-
collection_name:
|
|
58
|
+
collection_name: str | None = None,
|
|
59
59
|
) -> None:
|
|
60
|
-
"""
|
|
61
|
-
|
|
60
|
+
"""Initializes the VectorChromaQueryEngine with db_path, metadata, and embedding function and llm.
|
|
61
|
+
|
|
62
62
|
Args:
|
|
63
63
|
db_path: The file system path where Chromadb will store its persistent data.
|
|
64
64
|
If not specified, the default directory "./chroma" is used.
|
|
@@ -79,15 +79,12 @@ class VectorChromaQueryEngine:
|
|
|
79
79
|
"hnsw:M": 32,
|
|
80
80
|
}
|
|
81
81
|
self.client = chromadb.PersistentClient(path=db_path or "./chroma")
|
|
82
|
-
self.collection_name:
|
|
82
|
+
self.collection_name: str | None = collection_name
|
|
83
83
|
|
|
84
84
|
self.connect_db()
|
|
85
85
|
|
|
86
86
|
def connect_db(self, *args: Any, **kwargs: Any) -> bool:
|
|
87
|
-
"""
|
|
88
|
-
Establish a connection to the Chromadb database and initialize the collection.
|
|
89
|
-
"""
|
|
90
|
-
|
|
87
|
+
"""Establish a connection to the Chromadb database and initialize the collection."""
|
|
91
88
|
self.collection_name = self.collection_name or DEFAULT_COLLECTION_NAME
|
|
92
89
|
|
|
93
90
|
if self._collection_exists(self.collection_name):
|
|
@@ -106,8 +103,7 @@ class VectorChromaQueryEngine:
|
|
|
106
103
|
return True
|
|
107
104
|
|
|
108
105
|
def query(self, question: str) -> str:
|
|
109
|
-
"""
|
|
110
|
-
Retrieve information from indexed documents by processing a natural language query.
|
|
106
|
+
"""Retrieve information from indexed documents by processing a natural language query.
|
|
111
107
|
|
|
112
108
|
Args:
|
|
113
109
|
question: A natural language query string used to search the indexed documents.
|
|
@@ -126,11 +122,10 @@ class VectorChromaQueryEngine:
|
|
|
126
122
|
|
|
127
123
|
def add_docs(
|
|
128
124
|
self,
|
|
129
|
-
new_doc_dir:
|
|
130
|
-
new_doc_paths_or_urls:
|
|
125
|
+
new_doc_dir: Path | str | None = None,
|
|
126
|
+
new_doc_paths_or_urls: Sequence[Path | str] | None = None,
|
|
131
127
|
) -> None:
|
|
132
|
-
"""
|
|
133
|
-
Add additional documents to the existing vector index.
|
|
128
|
+
"""Add additional documents to the existing vector index.
|
|
134
129
|
|
|
135
130
|
Loads new Docling-parsed Markdown files from a specified directory or a list of file paths
|
|
136
131
|
and inserts them into the current index for future queries.
|
|
@@ -149,10 +144,9 @@ class VectorChromaQueryEngine:
|
|
|
149
144
|
self.index.insert(doc)
|
|
150
145
|
|
|
151
146
|
def _load_doc( # type: ignore[no-any-unimported]
|
|
152
|
-
self, input_dir:
|
|
147
|
+
self, input_dir: Path | str | None, input_docs: Sequence[Path | str] | None
|
|
153
148
|
) -> list["LlamaDocument"]:
|
|
154
|
-
"""
|
|
155
|
-
Load documents from a directory and/or a list of file paths.
|
|
149
|
+
"""Load documents from a directory and/or a list of file paths.
|
|
156
150
|
|
|
157
151
|
This helper method reads Docling-parsed Markdown files using LlamaIndex's
|
|
158
152
|
SimpleDirectoryReader. It supports multiple file [formats]((https://docs.llamaindex.ai/en/stable/module_guides/loading/simpledirectoryreader/#supported-file-types)),
|
|
@@ -194,8 +188,7 @@ class VectorChromaQueryEngine:
|
|
|
194
188
|
def _create_index( # type: ignore[no-any-unimported]
|
|
195
189
|
self, collection: "Collection"
|
|
196
190
|
) -> "VectorStoreIndex":
|
|
197
|
-
"""
|
|
198
|
-
Build a vector index for document retrieval using a Chromadb collection.
|
|
191
|
+
"""Build a vector index for document retrieval using a Chromadb collection.
|
|
199
192
|
|
|
200
193
|
Wraps the provided Chromadb collection into a vector store and uses LlamaIndex's
|
|
201
194
|
StorageContext to create a VectorStoreIndex from the collection.
|
|
@@ -214,8 +207,7 @@ class VectorChromaQueryEngine:
|
|
|
214
207
|
return index
|
|
215
208
|
|
|
216
209
|
def _collection_exists(self, collection_name: str) -> bool:
|
|
217
|
-
"""
|
|
218
|
-
Check if a collection with the given name exists in the database.
|
|
210
|
+
"""Check if a collection with the given name exists in the database.
|
|
219
211
|
|
|
220
212
|
Args:
|
|
221
213
|
collection_name (str): The name of the collection to check.
|
|
@@ -227,8 +219,7 @@ class VectorChromaQueryEngine:
|
|
|
227
219
|
return any(col == collection_name for col in existing_collections)
|
|
228
220
|
|
|
229
221
|
def get_collection_name(self) -> str:
|
|
230
|
-
"""
|
|
231
|
-
Get the name of the collection used by the query engine.
|
|
222
|
+
"""Get the name of the collection used by the query engine.
|
|
232
223
|
|
|
233
224
|
Returns:
|
|
234
225
|
The name of the collection.
|
|
@@ -245,8 +236,8 @@ class VectorChromaQueryEngine:
|
|
|
245
236
|
|
|
246
237
|
def init_db(
|
|
247
238
|
self,
|
|
248
|
-
new_doc_dir:
|
|
249
|
-
new_doc_paths_or_urls:
|
|
239
|
+
new_doc_dir: Path | str | None = None,
|
|
240
|
+
new_doc_paths_or_urls: Sequence[Path | str] | None = None,
|
|
250
241
|
*args: Any,
|
|
251
242
|
**kwargs: Any,
|
|
252
243
|
) -> bool:
|
|
@@ -262,30 +253,25 @@ class AnswerWithCitations(BaseModel): # type: ignore[no-any-unimported]
|
|
|
262
253
|
@require_optional_import(["chromadb", "llama_index"], "rag")
|
|
263
254
|
@export_module("autogen.agents.experimental")
|
|
264
255
|
class VectorChromaCitationQueryEngine(VectorChromaQueryEngine):
|
|
265
|
-
"""
|
|
266
|
-
This engine leverages VectorChromaQueryEngine and CitationQueryEngine to answer queries with citations.
|
|
267
|
-
"""
|
|
256
|
+
"""This engine leverages VectorChromaQueryEngine and CitationQueryEngine to answer queries with citations."""
|
|
268
257
|
|
|
269
258
|
def __init__( # type: ignore[no-any-unimported]
|
|
270
259
|
self,
|
|
271
|
-
db_path:
|
|
272
|
-
embedding_function: "
|
|
273
|
-
metadata:
|
|
260
|
+
db_path: str | None = None,
|
|
261
|
+
embedding_function: "EmbeddingFunction[Any] | None" = None,
|
|
262
|
+
metadata: dict[str, Any] | None = None,
|
|
274
263
|
llm: Optional["LLM"] = None,
|
|
275
|
-
collection_name:
|
|
264
|
+
collection_name: str | None = None,
|
|
276
265
|
enable_query_citations: bool = False,
|
|
277
266
|
citation_chunk_size: int = 512,
|
|
278
267
|
) -> None:
|
|
279
|
-
"""
|
|
280
|
-
see parent class VectorChromaQueryEngine.
|
|
281
|
-
"""
|
|
268
|
+
"""See parent class VectorChromaQueryEngine."""
|
|
282
269
|
super().__init__(db_path, embedding_function, metadata, llm, collection_name)
|
|
283
270
|
self.enable_query_citations = enable_query_citations
|
|
284
271
|
self.citation_chunk_size = citation_chunk_size
|
|
285
272
|
|
|
286
273
|
def query_with_citations(self, query: str) -> AnswerWithCitations:
|
|
287
|
-
"""
|
|
288
|
-
Query the index with the given query and return the answer along with citations.
|
|
274
|
+
"""Query the index with the given query and return the answer along with citations.
|
|
289
275
|
|
|
290
276
|
Args:
|
|
291
277
|
query (str): The query to be answered.
|
|
@@ -294,7 +280,6 @@ class VectorChromaCitationQueryEngine(VectorChromaQueryEngine):
|
|
|
294
280
|
Returns:
|
|
295
281
|
AnswerWithCitations: An object containing the answer and citations.
|
|
296
282
|
"""
|
|
297
|
-
|
|
298
283
|
query_engine = CitationQueryEngine.from_args(
|
|
299
284
|
index=self.index,
|
|
300
285
|
citation_chunk_size=self.citation_chunk_size,
|
|
@@ -4,7 +4,7 @@
|
|
|
4
4
|
|
|
5
5
|
import logging
|
|
6
6
|
from pathlib import Path
|
|
7
|
-
from typing import Literal
|
|
7
|
+
from typing import Literal
|
|
8
8
|
|
|
9
9
|
from .... import ConversableAgent
|
|
10
10
|
from ....agentchat.contrib.rag.query_engine import RAGQueryEngine
|
|
@@ -30,22 +30,19 @@ You are an expert in parsing and understanding text. You can use {DOCLING_PARSE_
|
|
|
30
30
|
|
|
31
31
|
@export_module("autogen.agents.experimental")
|
|
32
32
|
class DoclingDocIngestAgent(ConversableAgent):
|
|
33
|
-
"""
|
|
34
|
-
A DoclingDocIngestAgent is a swarm agent that ingests documents using the docling_parse_docs tool.
|
|
35
|
-
"""
|
|
33
|
+
"""A DoclingDocIngestAgent is a swarm agent that ingests documents using the docling_parse_docs tool."""
|
|
36
34
|
|
|
37
35
|
def __init__(
|
|
38
36
|
self,
|
|
39
|
-
name:
|
|
40
|
-
llm_config:
|
|
41
|
-
parsed_docs_path:
|
|
42
|
-
query_engine:
|
|
37
|
+
name: str | None = None,
|
|
38
|
+
llm_config: LLMConfig | dict | Literal[False] | None = None, # type: ignore[type-arg]
|
|
39
|
+
parsed_docs_path: Path | str | None = None,
|
|
40
|
+
query_engine: RAGQueryEngine | None = None,
|
|
43
41
|
return_agent_success: str = "TaskManagerAgent",
|
|
44
42
|
return_agent_error: str = "ErrorManagerAgent",
|
|
45
|
-
collection_name:
|
|
43
|
+
collection_name: str | None = None,
|
|
46
44
|
):
|
|
47
|
-
"""
|
|
48
|
-
Initialize the DoclingDocIngestAgent.
|
|
45
|
+
"""Initialize the DoclingDocIngestAgent.
|
|
49
46
|
|
|
50
47
|
Args:
|
|
51
48
|
name: The name of the DoclingDocIngestAgent.
|
|
@@ -64,8 +61,7 @@ class DoclingDocIngestAgent(ConversableAgent):
|
|
|
64
61
|
self._query_engine = query_engine or VectorChromaQueryEngine(collection_name=collection_name)
|
|
65
62
|
|
|
66
63
|
def data_ingest_task(context_variables: ContextVariables) -> ReplyResult:
|
|
67
|
-
"""
|
|
68
|
-
A tool for Swarm agent to ingests documents using the docling_parse_docs to parse documents to markdown
|
|
64
|
+
"""A tool for Swarm agent to ingests documents using the docling_parse_docs to parse documents to markdown
|
|
69
65
|
and add them to the docling_query_engine.
|
|
70
66
|
|
|
71
67
|
Args:
|
|
@@ -74,7 +70,6 @@ class DoclingDocIngestAgent(ConversableAgent):
|
|
|
74
70
|
Returns:
|
|
75
71
|
ReplyResult: The result of the task.
|
|
76
72
|
"""
|
|
77
|
-
|
|
78
73
|
try:
|
|
79
74
|
input_file_path = ""
|
|
80
75
|
tasks = context_variables.get("DocumentsToIngest", [])
|
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
import logging
|
|
6
6
|
from copy import deepcopy
|
|
7
7
|
from pathlib import Path
|
|
8
|
-
from typing import Annotated, Any,
|
|
8
|
+
from typing import Annotated, Any, cast
|
|
9
9
|
|
|
10
10
|
from pydantic import BaseModel, Field
|
|
11
11
|
|
|
@@ -123,7 +123,7 @@ class DocumentTask(BaseModel):
|
|
|
123
123
|
class DocumentTriageAgent(ConversableAgent):
|
|
124
124
|
"""The DocumentTriageAgent is responsible for deciding what type of task to perform from user requests."""
|
|
125
125
|
|
|
126
|
-
def __init__(self, llm_config:
|
|
126
|
+
def __init__(self, llm_config: LLMConfig | dict[str, Any] | None = None):
|
|
127
127
|
# Add the structured message to the LLM configuration
|
|
128
128
|
structured_config_list = deepcopy(llm_config)
|
|
129
129
|
structured_config_list["response_format"] = DocumentTask # type: ignore[index]
|
|
@@ -145,20 +145,19 @@ class DocumentTriageAgent(ConversableAgent):
|
|
|
145
145
|
|
|
146
146
|
@export_module("autogen.agents.experimental")
|
|
147
147
|
class DocAgent(ConversableAgent):
|
|
148
|
-
"""
|
|
149
|
-
The DocAgent is responsible for ingest and querying documents.
|
|
148
|
+
"""The DocAgent is responsible for ingest and querying documents.
|
|
150
149
|
|
|
151
150
|
Internally, it generates a group chat with a set of agents to ingest, query, and summarize.
|
|
152
151
|
"""
|
|
153
152
|
|
|
154
153
|
def __init__(
|
|
155
154
|
self,
|
|
156
|
-
name:
|
|
157
|
-
llm_config:
|
|
158
|
-
system_message:
|
|
159
|
-
parsed_docs_path:
|
|
160
|
-
collection_name:
|
|
161
|
-
query_engine:
|
|
155
|
+
name: str | None = None,
|
|
156
|
+
llm_config: LLMConfig | dict[str, Any] | None = None,
|
|
157
|
+
system_message: str | None = None,
|
|
158
|
+
parsed_docs_path: str | Path | None = None,
|
|
159
|
+
collection_name: str | None = None,
|
|
160
|
+
query_engine: RAGQueryEngine | None = None,
|
|
162
161
|
):
|
|
163
162
|
"""Initialize the DocAgent.
|
|
164
163
|
|
|
@@ -564,14 +563,14 @@ class DocAgent(ConversableAgent):
|
|
|
564
563
|
self.register_reply([Agent, None], DocAgent.generate_inner_group_chat_reply)
|
|
565
564
|
|
|
566
565
|
self.documents_ingested: list[str] = []
|
|
567
|
-
self._group_chat_context_variables:
|
|
566
|
+
self._group_chat_context_variables: ContextVariables | None = None
|
|
568
567
|
|
|
569
568
|
def generate_inner_group_chat_reply(
|
|
570
569
|
self,
|
|
571
|
-
messages:
|
|
572
|
-
sender:
|
|
573
|
-
config:
|
|
574
|
-
) -> tuple[bool,
|
|
570
|
+
messages: list[dict[str, Any]] | str | None = None,
|
|
571
|
+
sender: Agent | None = None,
|
|
572
|
+
config: OpenAIWrapper | None = None,
|
|
573
|
+
) -> tuple[bool, str | dict[str, Any] | None]:
|
|
575
574
|
"""Reply function that generates the inner group chat reply for the DocAgent.
|
|
576
575
|
|
|
577
576
|
Args:
|
|
@@ -628,7 +627,7 @@ class DocAgent(ConversableAgent):
|
|
|
628
627
|
|
|
629
628
|
return True, chat_result.summary
|
|
630
629
|
|
|
631
|
-
def _get_document_input_message(self, messages:
|
|
630
|
+
def _get_document_input_message(self, messages: list[dict[str, Any]] | str | None) -> str: # type: ignore[type-arg]
|
|
632
631
|
"""Gets and validates the input message(s) for the document agent.
|
|
633
632
|
|
|
634
633
|
Args:
|
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
#
|
|
3
3
|
# SPDX-License-Identifier: Apache-2.0
|
|
4
4
|
|
|
5
|
-
from typing import TYPE_CHECKING, Any,
|
|
5
|
+
from typing import TYPE_CHECKING, Any, cast
|
|
6
6
|
|
|
7
7
|
from ....agentchat.group.available_condition import AvailableCondition
|
|
8
8
|
from .document_utils import Ingest, Query
|
|
@@ -40,9 +40,9 @@ class SummaryTaskAvailableCondition(AvailableCondition):
|
|
|
40
40
|
True if all conditions are met (ready for summary), False otherwise
|
|
41
41
|
"""
|
|
42
42
|
# Get variables from context with appropriate casting
|
|
43
|
-
documents_to_ingest:
|
|
43
|
+
documents_to_ingest: list[Ingest] = cast(list[Ingest], agent.context_variables.get(self.documents_var, []))
|
|
44
44
|
|
|
45
|
-
queries_to_run:
|
|
45
|
+
queries_to_run: list[Query] = cast(list[Query], agent.context_variables.get(self.queries_var, []))
|
|
46
46
|
|
|
47
47
|
completed_task_count = bool(agent.context_variables.get(self.completed_var, 0))
|
|
48
48
|
|
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
import logging
|
|
6
6
|
from enum import Enum
|
|
7
7
|
from pathlib import Path
|
|
8
|
-
from typing import Any
|
|
8
|
+
from typing import Any
|
|
9
9
|
from urllib.parse import urlparse
|
|
10
10
|
|
|
11
11
|
from pydantic import BaseModel, Field
|
|
@@ -244,7 +244,7 @@ def _is_valid_extension_for_file_type(extension: str, file_type: InputFormat) ->
|
|
|
244
244
|
|
|
245
245
|
|
|
246
246
|
@require_optional_import(["selenium", "webdriver_manager", "requests"], "rag")
|
|
247
|
-
def download_url(url: Any, output_dir:
|
|
247
|
+
def download_url(url: Any, output_dir: str | Path | None = None) -> Path:
|
|
248
248
|
"""Download the content of a URL and save it as a file.
|
|
249
249
|
|
|
250
250
|
For direct file URLs (.md, .pdf, .docx, etc.), downloads the raw file.
|
|
@@ -296,7 +296,7 @@ def download_url(url: Any, output_dir: Optional[Union[str, Path]] = None) -> Pat
|
|
|
296
296
|
return _download_binary_file(url=final_url, output_dir=output_dir)
|
|
297
297
|
|
|
298
298
|
|
|
299
|
-
def list_files(directory:
|
|
299
|
+
def list_files(directory: Path | str) -> list[Path]:
|
|
300
300
|
"""Recursively list all files in a directory.
|
|
301
301
|
|
|
302
302
|
This function will raise an exception if the directory does not exist.
|
|
@@ -310,9 +310,8 @@ def list_files(directory: Union[Path, str]) -> list[Path]:
|
|
|
310
310
|
|
|
311
311
|
|
|
312
312
|
@export_module("autogen.agents.experimental.document_agent")
|
|
313
|
-
def handle_input(input_path:
|
|
313
|
+
def handle_input(input_path: Path | str, output_dir: Path | str = "./output") -> list[Path]:
|
|
314
314
|
"""Process the input string and return the appropriate file paths"""
|
|
315
|
-
|
|
316
315
|
output_dir = preprocess_path(str_or_path=output_dir, is_dir=True, mk_path=True)
|
|
317
316
|
if isinstance(input_path, str) and is_url(input_path):
|
|
318
317
|
_logger.info("Detected URL. Downloading content...")
|
|
@@ -336,9 +335,7 @@ def handle_input(input_path: Union[Path, str], output_dir: Union[Path, str] = ".
|
|
|
336
335
|
|
|
337
336
|
|
|
338
337
|
@export_module("autogen.agents.experimental.document_agent")
|
|
339
|
-
def preprocess_path(
|
|
340
|
-
str_or_path: Union[Path, str], mk_path: bool = False, is_file: bool = False, is_dir: bool = True
|
|
341
|
-
) -> Path:
|
|
338
|
+
def preprocess_path(str_or_path: Path | str, mk_path: bool = False, is_file: bool = False, is_dir: bool = True) -> Path:
|
|
342
339
|
"""Preprocess the path for file operations.
|
|
343
340
|
|
|
344
341
|
Args:
|
|
@@ -350,7 +347,6 @@ def preprocess_path(
|
|
|
350
347
|
Returns:
|
|
351
348
|
Path: The preprocessed path.
|
|
352
349
|
"""
|
|
353
|
-
|
|
354
350
|
# Convert the input to a Path object if it's a string
|
|
355
351
|
temp_path = Path(str_or_path)
|
|
356
352
|
|
|
@@ -5,8 +5,9 @@
|
|
|
5
5
|
import copy
|
|
6
6
|
import json
|
|
7
7
|
import os
|
|
8
|
+
from collections.abc import Sequence
|
|
8
9
|
from pathlib import Path
|
|
9
|
-
from typing import TYPE_CHECKING, Any
|
|
10
|
+
from typing import TYPE_CHECKING, Any
|
|
10
11
|
|
|
11
12
|
from pydantic import BaseModel
|
|
12
13
|
|
|
@@ -38,15 +39,14 @@ class QueryAnswer(BaseModel):
|
|
|
38
39
|
|
|
39
40
|
@export_module("autogen.agents.experimental")
|
|
40
41
|
class InMemoryQueryEngine:
|
|
41
|
-
"""
|
|
42
|
-
This engine stores ingested documents in memory and then injects them into an internal agent's system message for answering queries.
|
|
42
|
+
"""This engine stores ingested documents in memory and then injects them into an internal agent's system message for answering queries.
|
|
43
43
|
|
|
44
44
|
This implements the autogen.agentchat.contrib.rag.RAGQueryEngine protocol.
|
|
45
45
|
"""
|
|
46
46
|
|
|
47
47
|
def __init__(
|
|
48
48
|
self,
|
|
49
|
-
llm_config:
|
|
49
|
+
llm_config: LLMConfig | dict[str, Any],
|
|
50
50
|
) -> None:
|
|
51
51
|
# Deep copy the llm config to avoid changing the original
|
|
52
52
|
structured_config = copy.deepcopy(llm_config)
|
|
@@ -65,7 +65,6 @@ class InMemoryQueryEngine:
|
|
|
65
65
|
|
|
66
66
|
def query(self, question: str, *args: Any, **kwargs: Any) -> str:
|
|
67
67
|
"""Run a query against the ingested documents and return the answer."""
|
|
68
|
-
|
|
69
68
|
# If no documents have been ingested, return an empty response
|
|
70
69
|
if not self._ingested_documents:
|
|
71
70
|
return QUERY_NO_INGESTIONS_REPLY
|
|
@@ -116,11 +115,10 @@ class InMemoryQueryEngine:
|
|
|
116
115
|
|
|
117
116
|
def add_docs(
|
|
118
117
|
self,
|
|
119
|
-
new_doc_dir:
|
|
120
|
-
new_doc_paths_or_urls:
|
|
118
|
+
new_doc_dir: Path | str | None = None,
|
|
119
|
+
new_doc_paths_or_urls: Sequence[Path | str] | None = None,
|
|
121
120
|
) -> None:
|
|
122
|
-
"""
|
|
123
|
-
Add additional documents to the in-memory store
|
|
121
|
+
"""Add additional documents to the in-memory store
|
|
124
122
|
|
|
125
123
|
Loads new Docling-parsed Markdown files from a specified directory or a list of file paths
|
|
126
124
|
and inserts them into the in-memory store.
|
|
@@ -135,11 +133,8 @@ class InMemoryQueryEngine:
|
|
|
135
133
|
new_doc_paths = new_doc_paths_or_urls or []
|
|
136
134
|
self._load_doc(input_dir=new_doc_dir, input_docs=new_doc_paths)
|
|
137
135
|
|
|
138
|
-
def _load_doc(
|
|
139
|
-
|
|
140
|
-
) -> None:
|
|
141
|
-
"""
|
|
142
|
-
Load documents from a directory and/or a list of file paths into the in-memory store.
|
|
136
|
+
def _load_doc(self, input_dir: Path | str | None, input_docs: Sequence[Path | str] | None) -> None:
|
|
137
|
+
"""Load documents from a directory and/or a list of file paths into the in-memory store.
|
|
143
138
|
|
|
144
139
|
This helper method reads files using native Python file operations and stores them
|
|
145
140
|
in the in-memory document store. It supports reading text-based files, with the primary
|
|
@@ -179,16 +174,15 @@ class InMemoryQueryEngine:
|
|
|
179
174
|
raise ValueError(f"Document file not found: {doc_path}")
|
|
180
175
|
self._read_and_store_file(doc_path)
|
|
181
176
|
|
|
182
|
-
def _read_and_store_file(self, file_path:
|
|
183
|
-
"""
|
|
184
|
-
Read a file and store its content in the in-memory document store.
|
|
177
|
+
def _read_and_store_file(self, file_path: Path | str) -> None:
|
|
178
|
+
"""Read a file and store its content in the in-memory document store.
|
|
185
179
|
|
|
186
180
|
Args:
|
|
187
181
|
file_path (Union[Path, str]): Path to the file to be read
|
|
188
182
|
"""
|
|
189
183
|
file_path = Path(file_path)
|
|
190
184
|
try:
|
|
191
|
-
with open(file_path,
|
|
185
|
+
with open(file_path, encoding="utf-8") as file:
|
|
192
186
|
content = file.read()
|
|
193
187
|
|
|
194
188
|
# Store the document in the in-memory store
|
|
@@ -199,8 +193,8 @@ class InMemoryQueryEngine:
|
|
|
199
193
|
|
|
200
194
|
def init_db(
|
|
201
195
|
self,
|
|
202
|
-
new_doc_dir:
|
|
203
|
-
new_doc_paths_or_urls:
|
|
196
|
+
new_doc_dir: Path | str | None = None,
|
|
197
|
+
new_doc_paths_or_urls: Sequence[Path | str] | None = None,
|
|
204
198
|
*args: Any,
|
|
205
199
|
**kwargs: Any,
|
|
206
200
|
) -> bool:
|
|
@@ -7,7 +7,7 @@ import logging
|
|
|
7
7
|
import os
|
|
8
8
|
import time
|
|
9
9
|
from pathlib import Path
|
|
10
|
-
from typing import Annotated
|
|
10
|
+
from typing import Annotated
|
|
11
11
|
|
|
12
12
|
from ....doc_utils import export_module
|
|
13
13
|
from ....import_utils import optional_import_block, require_optional_import
|
|
@@ -27,9 +27,9 @@ logger.setLevel(logging.DEBUG)
|
|
|
27
27
|
@require_optional_import(["docling"], "rag")
|
|
28
28
|
@export_module("autogen.agents.experimental.document_agent")
|
|
29
29
|
def docling_parse_docs( # type: ignore[no-any-unimported]
|
|
30
|
-
input_file_path: Annotated[
|
|
31
|
-
output_dir_path: Annotated[
|
|
32
|
-
output_formats: Annotated[
|
|
30
|
+
input_file_path: Annotated[Path | str, "Path to the input file or directory"],
|
|
31
|
+
output_dir_path: Annotated[Path | str | None, "Path to the output directory"] = None,
|
|
32
|
+
output_formats: Annotated[list[str] | None, "List of output formats (markdown, json)"] = None,
|
|
33
33
|
table_output_format: str = "html",
|
|
34
34
|
) -> list[Path]:
|
|
35
35
|
"""Convert documents into a Deep Search document format using EasyOCR
|
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
# SPDX-License-Identifier: Apache-2.0
|
|
4
4
|
|
|
5
5
|
from enum import Enum
|
|
6
|
-
from typing import Any
|
|
6
|
+
from typing import Any
|
|
7
7
|
from urllib.parse import urlparse
|
|
8
8
|
|
|
9
9
|
from ....import_utils import optional_import_block, require_optional_import
|
|
@@ -84,9 +84,7 @@ ExtensionToFormat = {
|
|
|
84
84
|
|
|
85
85
|
|
|
86
86
|
class URLAnalyzer:
|
|
87
|
-
"""
|
|
88
|
-
A class that analyzes URLs to determine if they point to web pages or files.
|
|
89
|
-
"""
|
|
87
|
+
"""A class that analyzes URLs to determine if they point to web pages or files."""
|
|
90
88
|
|
|
91
89
|
# Mapping of input formats to their corresponding MIME types
|
|
92
90
|
FormatToMimeType: dict[InputFormat, list[str]] = {
|
|
@@ -125,22 +123,20 @@ class URLAnalyzer:
|
|
|
125
123
|
MimeTypeToFormat[mime_type] = format_type
|
|
126
124
|
|
|
127
125
|
def __init__(self, url: str):
|
|
128
|
-
"""
|
|
129
|
-
Initialize the URLAnalyzer with a URL.
|
|
126
|
+
"""Initialize the URLAnalyzer with a URL.
|
|
130
127
|
|
|
131
128
|
Args:
|
|
132
129
|
url (str): The URL to analyze
|
|
133
130
|
"""
|
|
134
131
|
self.url = url
|
|
135
|
-
self.analysis_result:
|
|
136
|
-
self.final_url:
|
|
132
|
+
self.analysis_result: dict[str, Any] | None = None
|
|
133
|
+
self.final_url: str | None = None
|
|
137
134
|
self.redirect_chain: list[str] = []
|
|
138
135
|
|
|
139
136
|
def analyze(
|
|
140
137
|
self, test_url: bool = False, follow_redirects: bool = True, prioritize_extension: bool = True
|
|
141
138
|
) -> dict[str, Any]:
|
|
142
|
-
"""
|
|
143
|
-
Analyze the URL to determine if it points to a web page or a file.
|
|
139
|
+
"""Analyze the URL to determine if it points to a web page or a file.
|
|
144
140
|
|
|
145
141
|
Args:
|
|
146
142
|
test_url (bool): Whether to test the URL by making a request
|
|
@@ -213,8 +209,7 @@ class URLAnalyzer:
|
|
|
213
209
|
return result
|
|
214
210
|
|
|
215
211
|
def _analyze_by_extension(self, url: str) -> dict[str, Any]:
|
|
216
|
-
"""
|
|
217
|
-
Analyze URL based on its file extension.
|
|
212
|
+
"""Analyze URL based on its file extension.
|
|
218
213
|
|
|
219
214
|
Args:
|
|
220
215
|
url (str): The URL to analyze
|
|
@@ -247,9 +242,8 @@ class URLAnalyzer:
|
|
|
247
242
|
}
|
|
248
243
|
|
|
249
244
|
@require_optional_import(["requests"], "rag")
|
|
250
|
-
def _analyze_by_request(self, follow_redirects: bool = True) ->
|
|
251
|
-
"""
|
|
252
|
-
Analyze URL by making a HEAD request to check Content-Type.
|
|
245
|
+
def _analyze_by_request(self, follow_redirects: bool = True) -> dict[str, Any] | None:
|
|
246
|
+
"""Analyze URL by making a HEAD request to check Content-Type.
|
|
253
247
|
|
|
254
248
|
Args:
|
|
255
249
|
follow_redirects (bool): Whether to follow redirects
|
|
@@ -346,9 +340,8 @@ class URLAnalyzer:
|
|
|
346
340
|
# If the request fails for any other reason
|
|
347
341
|
return {"is_file": False, "file_type": InputFormat.INVALID, "mime_type": None, "error": str(e)}
|
|
348
342
|
|
|
349
|
-
def get_result(self) ->
|
|
350
|
-
"""
|
|
351
|
-
Get the last analysis result, or None if the URL hasn't been analyzed yet.
|
|
343
|
+
def get_result(self) -> dict[str, Any] | None:
|
|
344
|
+
"""Get the last analysis result, or None if the URL hasn't been analyzed yet.
|
|
352
345
|
|
|
353
346
|
Returns:
|
|
354
347
|
Optional[dict]: The analysis result or None
|
|
@@ -356,8 +349,7 @@ class URLAnalyzer:
|
|
|
356
349
|
return self.analysis_result
|
|
357
350
|
|
|
358
351
|
def get_redirect_info(self) -> dict[str, Any]:
|
|
359
|
-
"""
|
|
360
|
-
Get information about redirects that occurred during the last request.
|
|
352
|
+
"""Get information about redirects that occurred during the last request.
|
|
361
353
|
|
|
362
354
|
Returns:
|
|
363
355
|
dict: Information about redirects
|
|
@@ -380,9 +372,8 @@ class URLAnalyzer:
|
|
|
380
372
|
}
|
|
381
373
|
|
|
382
374
|
@require_optional_import(["requests"], "rag")
|
|
383
|
-
def follow_redirects(self) ->
|
|
384
|
-
"""
|
|
385
|
-
Follow redirects for the URL without analyzing content types.
|
|
375
|
+
def follow_redirects(self) -> tuple[str, list[str]]:
|
|
376
|
+
"""Follow redirects for the URL without analyzing content types.
|
|
386
377
|
|
|
387
378
|
Returns:
|
|
388
379
|
Tuple[str, list[str]]: The final URL and the redirect chain
|