agno 1.7.10__py3-none-any.whl → 1.7.12__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agno/agent/agent.py +13 -0
- agno/app/fastapi/app.py +3 -1
- agno/app/fastapi/async_router.py +1 -1
- agno/app/playground/app.py +1 -0
- agno/document/chunking/semantic.py +1 -3
- agno/document/reader/markdown_reader.py +2 -7
- agno/document/reader/text_reader.py +2 -2
- agno/embedder/google.py +17 -5
- agno/knowledge/agent.py +4 -5
- agno/knowledge/gcs/pdf.py +105 -1
- agno/knowledge/markdown.py +15 -2
- agno/knowledge/website.py +4 -1
- agno/media.py +2 -0
- agno/models/aws/bedrock.py +51 -21
- agno/models/dashscope/__init__.py +5 -0
- agno/models/dashscope/dashscope.py +81 -0
- agno/models/google/gemini.py +56 -19
- agno/models/openai/chat.py +8 -1
- agno/models/openai/responses.py +57 -23
- agno/models/qwen/__init__.py +5 -0
- agno/reasoning/default.py +7 -1
- agno/reasoning/helpers.py +7 -1
- agno/run/response.py +4 -0
- agno/run/team.py +4 -0
- agno/storage/dynamodb.py +18 -22
- agno/storage/in_memory.py +234 -0
- agno/team/team.py +175 -67
- agno/tools/brandfetch.py +210 -0
- agno/tools/bravesearch.py +7 -7
- agno/tools/calculator.py +8 -8
- agno/tools/discord.py +11 -11
- agno/tools/github.py +10 -18
- agno/tools/trafilatura.py +372 -0
- agno/tools/youtube.py +12 -11
- agno/vectordb/clickhouse/clickhousedb.py +1 -1
- agno/vectordb/milvus/milvus.py +89 -1
- agno/workflow/workflow.py +3 -0
- {agno-1.7.10.dist-info → agno-1.7.12.dist-info}/METADATA +4 -1
- {agno-1.7.10.dist-info → agno-1.7.12.dist-info}/RECORD +43 -37
- {agno-1.7.10.dist-info → agno-1.7.12.dist-info}/WHEEL +0 -0
- {agno-1.7.10.dist-info → agno-1.7.12.dist-info}/entry_points.txt +0 -0
- {agno-1.7.10.dist-info → agno-1.7.12.dist-info}/licenses/LICENSE +0 -0
- {agno-1.7.10.dist-info → agno-1.7.12.dist-info}/top_level.txt +0 -0
agno/agent/agent.py
CHANGED
|
@@ -5885,6 +5885,9 @@ class Agent:
|
|
|
5885
5885
|
telemetry=self.telemetry,
|
|
5886
5886
|
debug_mode=self.debug_mode,
|
|
5887
5887
|
debug_level=self.debug_level,
|
|
5888
|
+
session_state=self.session_state,
|
|
5889
|
+
context=self.context,
|
|
5890
|
+
extra_data=self.extra_data,
|
|
5888
5891
|
)
|
|
5889
5892
|
is_deepseek = is_deepseek_reasoning_model(reasoning_model)
|
|
5890
5893
|
is_groq = is_groq_reasoning_model(reasoning_model)
|
|
@@ -5974,6 +5977,9 @@ class Agent:
|
|
|
5974
5977
|
telemetry=self.telemetry,
|
|
5975
5978
|
debug_mode=self.debug_mode,
|
|
5976
5979
|
debug_level=self.debug_level,
|
|
5980
|
+
session_state=self.session_state,
|
|
5981
|
+
context=self.context,
|
|
5982
|
+
extra_data=self.extra_data,
|
|
5977
5983
|
)
|
|
5978
5984
|
|
|
5979
5985
|
# Validate reasoning agent
|
|
@@ -6108,6 +6114,9 @@ class Agent:
|
|
|
6108
6114
|
telemetry=self.telemetry,
|
|
6109
6115
|
debug_mode=self.debug_mode,
|
|
6110
6116
|
debug_level=self.debug_level,
|
|
6117
|
+
session_state=self.session_state,
|
|
6118
|
+
context=self.context,
|
|
6119
|
+
extra_data=self.extra_data,
|
|
6111
6120
|
)
|
|
6112
6121
|
is_deepseek = is_deepseek_reasoning_model(reasoning_model)
|
|
6113
6122
|
is_groq = is_groq_reasoning_model(reasoning_model)
|
|
@@ -6197,6 +6206,9 @@ class Agent:
|
|
|
6197
6206
|
telemetry=self.telemetry,
|
|
6198
6207
|
debug_mode=self.debug_mode,
|
|
6199
6208
|
debug_level=self.debug_level,
|
|
6209
|
+
session_state=self.session_state,
|
|
6210
|
+
context=self.context,
|
|
6211
|
+
extra_data=self.extra_data,
|
|
6200
6212
|
)
|
|
6201
6213
|
|
|
6202
6214
|
# Validate reasoning agent
|
|
@@ -7443,6 +7455,7 @@ class Agent:
|
|
|
7443
7455
|
if citation.url # Only include citations with valid URLs
|
|
7444
7456
|
)
|
|
7445
7457
|
if md_content: # Only create panel if there are citations
|
|
7458
|
+
md_content = md_content.strip()
|
|
7446
7459
|
citations_panel = create_panel(
|
|
7447
7460
|
content=Markdown(md_content),
|
|
7448
7461
|
title="Citations",
|
agno/app/fastapi/app.py
CHANGED
|
@@ -81,6 +81,7 @@ class FastAPIApp(BaseAPIApp):
|
|
|
81
81
|
workflow.app_id = self.app_id
|
|
82
82
|
if not workflow.workflow_id:
|
|
83
83
|
workflow.workflow_id = generate_id(workflow.name)
|
|
84
|
+
workflow.initialize_workflow()
|
|
84
85
|
|
|
85
86
|
def get_router(self) -> APIRouter:
|
|
86
87
|
return get_sync_router(agents=self.agents, teams=self.teams, workflows=self.workflows)
|
|
@@ -95,6 +96,7 @@ class FastAPIApp(BaseAPIApp):
|
|
|
95
96
|
host: str = "localhost",
|
|
96
97
|
port: int = 7777,
|
|
97
98
|
reload: bool = False,
|
|
99
|
+
workers: Optional[int] = None,
|
|
98
100
|
**kwargs,
|
|
99
101
|
):
|
|
100
102
|
self.set_app_id()
|
|
@@ -102,4 +104,4 @@ class FastAPIApp(BaseAPIApp):
|
|
|
102
104
|
|
|
103
105
|
log_info(f"Starting API on {host}:{port}")
|
|
104
106
|
|
|
105
|
-
uvicorn.run(app=app, host=host, port=port, reload=reload, **kwargs)
|
|
107
|
+
uvicorn.run(app=app, host=host, port=port, reload=reload, workers=workers, **kwargs)
|
agno/app/fastapi/async_router.py
CHANGED
agno/app/playground/app.py
CHANGED
|
@@ -87,6 +87,7 @@ class Playground:
|
|
|
87
87
|
workflow.app_id = self.app_id
|
|
88
88
|
if not workflow.workflow_id:
|
|
89
89
|
workflow.workflow_id = generate_id(workflow.name)
|
|
90
|
+
workflow.initialize_workflow()
|
|
90
91
|
|
|
91
92
|
def set_app_id(self) -> str:
|
|
92
93
|
# If app_id is already set, keep it instead of overriding with UUID
|
|
@@ -14,9 +14,7 @@ except ImportError:
|
|
|
14
14
|
class SemanticChunking(ChunkingStrategy):
|
|
15
15
|
"""Chunking strategy that splits text into semantic chunks using chonkie"""
|
|
16
16
|
|
|
17
|
-
def __init__(
|
|
18
|
-
self, embedder: Optional[Embedder] = None, chunk_size: int = 5000, similarity_threshold: Optional[float] = 0.5
|
|
19
|
-
):
|
|
17
|
+
def __init__(self, embedder: Optional[Embedder] = None, chunk_size: int = 5000, similarity_threshold: float = 0.5):
|
|
20
18
|
self.embedder = embedder or OpenAIEmbedder(id="text-embedding-3-small") # type: ignore
|
|
21
19
|
self.chunk_size = chunk_size
|
|
22
20
|
self.similarity_threshold = similarity_threshold
|
|
@@ -1,11 +1,9 @@
|
|
|
1
1
|
import asyncio
|
|
2
2
|
import uuid
|
|
3
3
|
from pathlib import Path
|
|
4
|
-
from typing import IO, Any, List,
|
|
4
|
+
from typing import IO, Any, List, Union
|
|
5
5
|
|
|
6
6
|
from agno.document.base import Document
|
|
7
|
-
from agno.document.chunking.markdown import MarkdownChunking
|
|
8
|
-
from agno.document.chunking.strategy import ChunkingStrategy
|
|
9
7
|
from agno.document.reader.base import Reader
|
|
10
8
|
from agno.utils.log import log_info, logger
|
|
11
9
|
|
|
@@ -13,9 +11,6 @@ from agno.utils.log import log_info, logger
|
|
|
13
11
|
class MarkdownReader(Reader):
|
|
14
12
|
"""Reader for Markdown files"""
|
|
15
13
|
|
|
16
|
-
def __init__(self, chunking_strategy: Optional[ChunkingStrategy] = MarkdownChunking()) -> None:
|
|
17
|
-
super().__init__(chunking_strategy=chunking_strategy)
|
|
18
|
-
|
|
19
14
|
def read(self, file: Union[Path, IO[Any]]) -> List[Document]:
|
|
20
15
|
try:
|
|
21
16
|
if isinstance(file, Path):
|
|
@@ -30,7 +25,7 @@ class MarkdownReader(Reader):
|
|
|
30
25
|
file.seek(0)
|
|
31
26
|
file_contents = file.read().decode("utf-8")
|
|
32
27
|
|
|
33
|
-
documents = [Document(name=file_name, id=str(
|
|
28
|
+
documents = [Document(name=file_name, id=str(uuid.uuid4()), content=file_contents)]
|
|
34
29
|
if self.chunk:
|
|
35
30
|
chunked_documents = []
|
|
36
31
|
for document in documents:
|
|
@@ -28,7 +28,7 @@ class TextReader(Reader):
|
|
|
28
28
|
documents = [
|
|
29
29
|
Document(
|
|
30
30
|
name=file_name,
|
|
31
|
-
id=str(
|
|
31
|
+
id=str(uuid.uuid4()),
|
|
32
32
|
content=file_contents,
|
|
33
33
|
)
|
|
34
34
|
]
|
|
@@ -67,7 +67,7 @@ class TextReader(Reader):
|
|
|
67
67
|
|
|
68
68
|
document = Document(
|
|
69
69
|
name=file_name,
|
|
70
|
-
id=str(
|
|
70
|
+
id=str(uuid.uuid4()),
|
|
71
71
|
content=file_contents,
|
|
72
72
|
)
|
|
73
73
|
|
agno/embedder/google.py
CHANGED
|
@@ -23,6 +23,10 @@ class GeminiEmbedder(Embedder):
|
|
|
23
23
|
request_params: Optional[Dict[str, Any]] = None
|
|
24
24
|
client_params: Optional[Dict[str, Any]] = None
|
|
25
25
|
gemini_client: Optional[GeminiClient] = None
|
|
26
|
+
# Vertex AI parameters
|
|
27
|
+
vertexai: bool = False
|
|
28
|
+
project_id: Optional[str] = None
|
|
29
|
+
location: Optional[str] = None
|
|
26
30
|
|
|
27
31
|
@property
|
|
28
32
|
def client(self):
|
|
@@ -30,13 +34,21 @@ class GeminiEmbedder(Embedder):
|
|
|
30
34
|
return self.gemini_client
|
|
31
35
|
|
|
32
36
|
_client_params: Dict[str, Any] = {}
|
|
37
|
+
vertexai = self.vertexai or getenv("GOOGLE_GENAI_USE_VERTEXAI", "false").lower() == "true"
|
|
33
38
|
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
if self.api_key:
|
|
39
|
+
if not vertexai:
|
|
40
|
+
self.api_key = self.api_key or getenv("GOOGLE_API_KEY")
|
|
41
|
+
if not self.api_key:
|
|
42
|
+
log_error("GOOGLE_API_KEY not set. Please set the GOOGLE_API_KEY environment variable.")
|
|
39
43
|
_client_params["api_key"] = self.api_key
|
|
44
|
+
else:
|
|
45
|
+
log_info("Using Vertex AI API for embeddings")
|
|
46
|
+
_client_params["vertexai"] = True
|
|
47
|
+
_client_params["project"] = self.project_id or getenv("GOOGLE_CLOUD_PROJECT")
|
|
48
|
+
_client_params["location"] = self.location or getenv("GOOGLE_CLOUD_LOCATION")
|
|
49
|
+
|
|
50
|
+
_client_params = {k: v for k, v in _client_params.items() if v is not None}
|
|
51
|
+
|
|
40
52
|
if self.client_params:
|
|
41
53
|
_client_params.update(self.client_params)
|
|
42
54
|
|
agno/knowledge/agent.py
CHANGED
|
@@ -2,7 +2,7 @@ import asyncio
|
|
|
2
2
|
from pathlib import Path
|
|
3
3
|
from typing import Any, AsyncIterator, Dict, Iterator, List, Optional, Set, Tuple
|
|
4
4
|
|
|
5
|
-
from pydantic import BaseModel, ConfigDict,
|
|
5
|
+
from pydantic import BaseModel, ConfigDict, model_validator
|
|
6
6
|
|
|
7
7
|
from agno.document import Document
|
|
8
8
|
from agno.document.chunking.fixed import FixedSizeChunking
|
|
@@ -24,8 +24,7 @@ class AgentKnowledge(BaseModel):
|
|
|
24
24
|
# Number of documents to optimize the vector db on
|
|
25
25
|
optimize_on: Optional[int] = 1000
|
|
26
26
|
|
|
27
|
-
chunking_strategy: ChunkingStrategy =
|
|
28
|
-
|
|
27
|
+
chunking_strategy: Optional[ChunkingStrategy] = None
|
|
29
28
|
model_config = ConfigDict(arbitrary_types_allowed=True)
|
|
30
29
|
|
|
31
30
|
valid_metadata_filters: Set[str] = None # type: ignore
|
|
@@ -33,7 +32,7 @@ class AgentKnowledge(BaseModel):
|
|
|
33
32
|
@model_validator(mode="after")
|
|
34
33
|
def update_reader(self) -> "AgentKnowledge":
|
|
35
34
|
if self.reader is not None and self.reader.chunking_strategy is None:
|
|
36
|
-
self.reader.chunking_strategy = self.chunking_strategy
|
|
35
|
+
self.reader.chunking_strategy = self.chunking_strategy or FixedSizeChunking()
|
|
37
36
|
return self
|
|
38
37
|
|
|
39
38
|
@property
|
|
@@ -237,7 +236,7 @@ class AgentKnowledge(BaseModel):
|
|
|
237
236
|
self._load_init(recreate=False, upsert=upsert)
|
|
238
237
|
if self.vector_db is None:
|
|
239
238
|
return
|
|
240
|
-
|
|
239
|
+
|
|
241
240
|
log_info("Loading knowledge base")
|
|
242
241
|
# Upsert documents if upsert is True
|
|
243
242
|
if upsert and self.vector_db.upsert_available():
|
agno/knowledge/gcs/pdf.py
CHANGED
|
@@ -1,8 +1,9 @@
|
|
|
1
|
-
from typing import AsyncIterator, Iterator, List
|
|
1
|
+
from typing import AsyncIterator, Iterator, List, Optional, Dict, Any
|
|
2
2
|
|
|
3
3
|
from agno.document import Document
|
|
4
4
|
from agno.document.reader.gcs.pdf_reader import GCSPDFReader
|
|
5
5
|
from agno.knowledge.gcs.base import GCSKnowledgeBase
|
|
6
|
+
from agno.utils.log import log_debug, log_info
|
|
6
7
|
|
|
7
8
|
|
|
8
9
|
class GCSPDFKnowledgeBase(GCSKnowledgeBase):
|
|
@@ -19,3 +20,106 @@ class GCSPDFKnowledgeBase(GCSKnowledgeBase):
|
|
|
19
20
|
for blob in self.gcs_blobs:
|
|
20
21
|
if blob.name.endswith(".pdf"):
|
|
21
22
|
yield await self.reader.async_read(blob=blob)
|
|
23
|
+
|
|
24
|
+
def load(
|
|
25
|
+
self,
|
|
26
|
+
recreate: bool = False,
|
|
27
|
+
upsert: bool = False,
|
|
28
|
+
skip_existing: bool = True,
|
|
29
|
+
) -> None:
|
|
30
|
+
"""Load the knowledge base to the vector db
|
|
31
|
+
Args:
|
|
32
|
+
recreate (bool): If True, recreates the collection in the vector db. Defaults to False.
|
|
33
|
+
upsert (bool): If True, upserts documents to the vector db. Defaults to False.
|
|
34
|
+
skip_existing (bool): If True, skips documents which already exist in the vector db when inserting. Defaults to True.
|
|
35
|
+
"""
|
|
36
|
+
self._load_init(recreate, upsert)
|
|
37
|
+
if self.vector_db is None:
|
|
38
|
+
return
|
|
39
|
+
|
|
40
|
+
log_info("Loading knowledge base")
|
|
41
|
+
num_documents = 0
|
|
42
|
+
for document_list in self.document_lists:
|
|
43
|
+
documents_to_load = document_list
|
|
44
|
+
|
|
45
|
+
# Track metadata for filtering capabilities and collect metadata for filters
|
|
46
|
+
filters_metadata: Optional[Dict[str, Any]] = None
|
|
47
|
+
for doc in document_list:
|
|
48
|
+
if doc.meta_data:
|
|
49
|
+
self._track_metadata_structure(doc.meta_data)
|
|
50
|
+
# Use the first non-None metadata for filters
|
|
51
|
+
if filters_metadata is None:
|
|
52
|
+
filters_metadata = doc.meta_data
|
|
53
|
+
|
|
54
|
+
# Skip processing if no documents in this batch
|
|
55
|
+
if not documents_to_load:
|
|
56
|
+
log_debug("Skipping empty document batch")
|
|
57
|
+
continue
|
|
58
|
+
|
|
59
|
+
# Upsert documents if upsert is True and vector db supports upsert
|
|
60
|
+
if upsert and self.vector_db.upsert_available():
|
|
61
|
+
self.vector_db.upsert(documents=documents_to_load, filters=filters_metadata)
|
|
62
|
+
# Insert documents
|
|
63
|
+
else:
|
|
64
|
+
# Filter out documents which already exist in the vector db
|
|
65
|
+
if skip_existing:
|
|
66
|
+
log_debug("Filtering out existing documents before insertion.")
|
|
67
|
+
documents_to_load = self.filter_existing_documents(document_list)
|
|
68
|
+
|
|
69
|
+
if documents_to_load:
|
|
70
|
+
self.vector_db.insert(documents=documents_to_load, filters=filters_metadata)
|
|
71
|
+
|
|
72
|
+
num_documents += len(documents_to_load)
|
|
73
|
+
log_info(f"Added {num_documents} documents to knowledge base")
|
|
74
|
+
|
|
75
|
+
async def aload(
|
|
76
|
+
self,
|
|
77
|
+
recreate: bool = False,
|
|
78
|
+
upsert: bool = False,
|
|
79
|
+
skip_existing: bool = True,
|
|
80
|
+
) -> None:
|
|
81
|
+
"""Load the knowledge base to the vector db asynchronously
|
|
82
|
+
Args:
|
|
83
|
+
recreate (bool): If True, recreates the collection in the vector db. Defaults to False.
|
|
84
|
+
upsert (bool): If True, upserts documents to the vector db. Defaults to False.
|
|
85
|
+
skip_existing (bool): If True, skips documents which already exist in the vector db when inserting. Defaults to True.
|
|
86
|
+
"""
|
|
87
|
+
await self._aload_init(recreate, upsert)
|
|
88
|
+
if self.vector_db is None:
|
|
89
|
+
return
|
|
90
|
+
|
|
91
|
+
log_info("Loading knowledge base")
|
|
92
|
+
num_documents = 0
|
|
93
|
+
document_iterator = self.async_document_lists
|
|
94
|
+
async for document_list in document_iterator: # type: ignore
|
|
95
|
+
documents_to_load = document_list
|
|
96
|
+
|
|
97
|
+
# Track metadata for filtering capabilities and collect metadata for filters
|
|
98
|
+
filters_metadata: Optional[Dict[str, Any]] = None
|
|
99
|
+
for doc in document_list:
|
|
100
|
+
if doc.meta_data:
|
|
101
|
+
self._track_metadata_structure(doc.meta_data)
|
|
102
|
+
# Use the first non-None metadata for filters
|
|
103
|
+
if filters_metadata is None:
|
|
104
|
+
filters_metadata = doc.meta_data
|
|
105
|
+
|
|
106
|
+
# Skip processing if no documents in this batch
|
|
107
|
+
if not documents_to_load:
|
|
108
|
+
log_debug("Skipping empty document batch")
|
|
109
|
+
continue
|
|
110
|
+
|
|
111
|
+
# Upsert documents if upsert is True and vector db supports upsert
|
|
112
|
+
if upsert and self.vector_db.upsert_available():
|
|
113
|
+
await self.vector_db.async_upsert(documents=documents_to_load, filters=filters_metadata)
|
|
114
|
+
# Insert documents
|
|
115
|
+
else:
|
|
116
|
+
# Filter out documents which already exist in the vector db
|
|
117
|
+
if skip_existing:
|
|
118
|
+
log_debug("Filtering out existing documents before insertion.")
|
|
119
|
+
documents_to_load = await self.async_filter_existing_documents(document_list)
|
|
120
|
+
|
|
121
|
+
if documents_to_load:
|
|
122
|
+
await self.vector_db.async_insert(documents=documents_to_load, filters=filters_metadata)
|
|
123
|
+
|
|
124
|
+
num_documents += len(documents_to_load)
|
|
125
|
+
log_info(f"Added {num_documents} documents to knowledge base")
|
agno/knowledge/markdown.py
CHANGED
|
@@ -1,7 +1,10 @@
|
|
|
1
1
|
from pathlib import Path
|
|
2
|
-
from typing import Any, AsyncIterator, Dict, Iterator, List, Optional, Union
|
|
2
|
+
from typing import Any, AsyncIterator, Dict, Iterator, List, Optional, Union, cast
|
|
3
|
+
|
|
4
|
+
from pydantic import model_validator
|
|
3
5
|
|
|
4
6
|
from agno.document import Document
|
|
7
|
+
from agno.document.chunking.markdown import MarkdownChunking
|
|
5
8
|
from agno.document.reader.markdown_reader import MarkdownReader
|
|
6
9
|
from agno.knowledge.agent import AgentKnowledge
|
|
7
10
|
from agno.utils.log import log_info, logger
|
|
@@ -10,11 +13,18 @@ from agno.utils.log import log_info, logger
|
|
|
10
13
|
class MarkdownKnowledgeBase(AgentKnowledge):
|
|
11
14
|
path: Optional[Union[str, Path, List[Dict[str, Union[str, Dict[str, Any]]]]]] = None
|
|
12
15
|
formats: List[str] = [".md"]
|
|
13
|
-
reader: MarkdownReader =
|
|
16
|
+
reader: Optional[MarkdownReader] = None
|
|
17
|
+
|
|
18
|
+
@model_validator(mode="after")
|
|
19
|
+
def set_reader(self) -> "MarkdownKnowledgeBase":
|
|
20
|
+
if self.reader is None:
|
|
21
|
+
self.reader = MarkdownReader(chunking_strategy=self.chunking_strategy or MarkdownChunking())
|
|
22
|
+
return self
|
|
14
23
|
|
|
15
24
|
@property
|
|
16
25
|
def document_lists(self) -> Iterator[List[Document]]:
|
|
17
26
|
"""Iterate over text files and yield lists of documents."""
|
|
27
|
+
self.reader = cast(MarkdownReader, self.reader)
|
|
18
28
|
if self.path is None:
|
|
19
29
|
raise ValueError("Path is not set")
|
|
20
30
|
|
|
@@ -49,6 +59,7 @@ class MarkdownKnowledgeBase(AgentKnowledge):
|
|
|
49
59
|
@property
|
|
50
60
|
async def async_document_lists(self) -> AsyncIterator[List[Document]]:
|
|
51
61
|
"""Iterate over text files and yield lists of documents asynchronously."""
|
|
62
|
+
self.reader = cast(MarkdownReader, self.reader)
|
|
52
63
|
if self.path is None:
|
|
53
64
|
raise ValueError("Path is not set")
|
|
54
65
|
|
|
@@ -85,6 +96,7 @@ class MarkdownKnowledgeBase(AgentKnowledge):
|
|
|
85
96
|
skip_existing: bool = True,
|
|
86
97
|
) -> None:
|
|
87
98
|
"""Load documents from a single text file with specific metadata into the vector DB."""
|
|
99
|
+
self.reader = cast(MarkdownReader, self.reader)
|
|
88
100
|
|
|
89
101
|
_file_path = Path(path) if isinstance(path, str) else path
|
|
90
102
|
|
|
@@ -117,6 +129,7 @@ class MarkdownKnowledgeBase(AgentKnowledge):
|
|
|
117
129
|
skip_existing: bool = True,
|
|
118
130
|
) -> None:
|
|
119
131
|
"""Load documents from a single text file with specific metadata into the vector DB."""
|
|
132
|
+
self.reader = cast(MarkdownReader, self.reader)
|
|
120
133
|
|
|
121
134
|
_file_path = Path(path) if isinstance(path, str) else path
|
|
122
135
|
|
agno/knowledge/website.py
CHANGED
|
@@ -4,6 +4,7 @@ from typing import Any, AsyncIterator, Dict, Iterator, List, Optional
|
|
|
4
4
|
from pydantic import model_validator
|
|
5
5
|
|
|
6
6
|
from agno.document import Document
|
|
7
|
+
from agno.document.chunking.fixed import FixedSizeChunking
|
|
7
8
|
from agno.document.reader.website_reader import WebsiteReader
|
|
8
9
|
from agno.knowledge.agent import AgentKnowledge
|
|
9
10
|
from agno.utils.log import log_debug, log_info, logger
|
|
@@ -21,7 +22,9 @@ class WebsiteKnowledgeBase(AgentKnowledge):
|
|
|
21
22
|
def set_reader(self) -> "WebsiteKnowledgeBase":
|
|
22
23
|
if self.reader is None:
|
|
23
24
|
self.reader = WebsiteReader(
|
|
24
|
-
max_depth=self.max_depth,
|
|
25
|
+
max_depth=self.max_depth,
|
|
26
|
+
max_links=self.max_links,
|
|
27
|
+
chunking_strategy=self.chunking_strategy or FixedSizeChunking(),
|
|
25
28
|
)
|
|
26
29
|
return self
|
|
27
30
|
|
agno/media.py
CHANGED
|
@@ -319,6 +319,8 @@ class File(BaseModel):
|
|
|
319
319
|
mime_type: Optional[str] = None
|
|
320
320
|
# External file object (e.g. GeminiFile, must be a valid object as expected by the model you are using)
|
|
321
321
|
external: Optional[Any] = None
|
|
322
|
+
format: Optional[str] = None # E.g. `pdf`, `txt`, `csv`, `xml`, etc.
|
|
323
|
+
name: Optional[str] = None # Name of the file, mandatory for AWS Bedrock document input
|
|
322
324
|
|
|
323
325
|
@model_validator(mode="before")
|
|
324
326
|
@classmethod
|
agno/models/aws/bedrock.py
CHANGED
|
@@ -27,6 +27,11 @@ except ImportError:
|
|
|
27
27
|
AIOBOTO3_AVAILABLE = False
|
|
28
28
|
|
|
29
29
|
|
|
30
|
+
BEDROCK_SUPPORTED_IMAGE_FORMATS = ["png", "jpeg", "webp", "gif"]
|
|
31
|
+
BEDROCK_SUPPORTED_VIDEO_FORMATS = ["mp4", "mov", "mkv", "webm", "flv", "mpeg", "mpg", "wmv", "three_gp"]
|
|
32
|
+
BEDROCK_SUPPORTED_FILE_FORMATS = ["pdf", "csv", "doc", "docx", "xls", "xlsx", "html", "txt", "md"]
|
|
33
|
+
|
|
34
|
+
|
|
30
35
|
@dataclass
|
|
31
36
|
class AwsBedrock(Model):
|
|
32
37
|
"""
|
|
@@ -262,11 +267,16 @@ class AwsBedrock(Model):
|
|
|
262
267
|
|
|
263
268
|
if message.images:
|
|
264
269
|
for image in message.images:
|
|
265
|
-
if not image.content
|
|
266
|
-
raise ValueError("Image content
|
|
270
|
+
if not image.content:
|
|
271
|
+
raise ValueError("Image content is required for AWS Bedrock.")
|
|
272
|
+
if not image.format:
|
|
273
|
+
raise ValueError("Image format is required for AWS Bedrock.")
|
|
267
274
|
|
|
268
|
-
if image.format not in
|
|
269
|
-
raise ValueError(
|
|
275
|
+
if image.format not in BEDROCK_SUPPORTED_IMAGE_FORMATS:
|
|
276
|
+
raise ValueError(
|
|
277
|
+
f"Unsupported image format: {image.format}. "
|
|
278
|
+
f"Supported formats: {BEDROCK_SUPPORTED_IMAGE_FORMATS}"
|
|
279
|
+
)
|
|
270
280
|
|
|
271
281
|
formatted_message["content"].append(
|
|
272
282
|
{
|
|
@@ -283,21 +293,16 @@ class AwsBedrock(Model):
|
|
|
283
293
|
|
|
284
294
|
if message.videos:
|
|
285
295
|
for video in message.videos:
|
|
286
|
-
if not video.content
|
|
287
|
-
raise ValueError("Video content
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
"mpg",
|
|
297
|
-
"wmv",
|
|
298
|
-
"three_gp",
|
|
299
|
-
]:
|
|
300
|
-
raise ValueError(f"Unsupported video format: {video.format}")
|
|
296
|
+
if not video.content:
|
|
297
|
+
raise ValueError("Video content is required for AWS Bedrock.")
|
|
298
|
+
if not video.format:
|
|
299
|
+
raise ValueError("Video format is required for AWS Bedrock.")
|
|
300
|
+
|
|
301
|
+
if video.format not in BEDROCK_SUPPORTED_VIDEO_FORMATS:
|
|
302
|
+
raise ValueError(
|
|
303
|
+
f"Unsupported video format: {video.format}. "
|
|
304
|
+
f"Supported formats: {BEDROCK_SUPPORTED_VIDEO_FORMATS}"
|
|
305
|
+
)
|
|
301
306
|
|
|
302
307
|
formatted_message["content"].append(
|
|
303
308
|
{
|
|
@@ -309,8 +314,33 @@ class AwsBedrock(Model):
|
|
|
309
314
|
}
|
|
310
315
|
}
|
|
311
316
|
)
|
|
312
|
-
|
|
313
|
-
|
|
317
|
+
|
|
318
|
+
if message.files:
|
|
319
|
+
for file in message.files:
|
|
320
|
+
if not file.content:
|
|
321
|
+
raise ValueError("File content is required for AWS Bedrock document input.")
|
|
322
|
+
if not file.format:
|
|
323
|
+
raise ValueError("File format is required for AWS Bedrock document input.")
|
|
324
|
+
if not file.name:
|
|
325
|
+
raise ValueError("File name is required for AWS Bedrock document input.")
|
|
326
|
+
|
|
327
|
+
if file.format not in BEDROCK_SUPPORTED_FILE_FORMATS:
|
|
328
|
+
raise ValueError(
|
|
329
|
+
f"Unsupported file format: {file.format}. "
|
|
330
|
+
f"Supported formats: {BEDROCK_SUPPORTED_FILE_FORMATS}"
|
|
331
|
+
)
|
|
332
|
+
|
|
333
|
+
formatted_message["content"].append(
|
|
334
|
+
{
|
|
335
|
+
"document": {
|
|
336
|
+
"format": file.format,
|
|
337
|
+
"name": file.name,
|
|
338
|
+
"source": {
|
|
339
|
+
"bytes": file.content,
|
|
340
|
+
},
|
|
341
|
+
}
|
|
342
|
+
}
|
|
343
|
+
)
|
|
314
344
|
|
|
315
345
|
formatted_messages.append(formatted_message)
|
|
316
346
|
# TODO: Add caching: https://docs.aws.amazon.com/bedrock/latest/userguide/conversation-inference-call.html
|
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
from dataclasses import dataclass
|
|
2
|
+
from os import getenv
|
|
3
|
+
from typing import Any, Dict, List, Optional, Type, Union
|
|
4
|
+
|
|
5
|
+
from pydantic import BaseModel
|
|
6
|
+
|
|
7
|
+
from agno.exceptions import ModelProviderError
|
|
8
|
+
from agno.models.openai.like import OpenAILike
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
@dataclass
|
|
12
|
+
class DashScope(OpenAILike):
|
|
13
|
+
"""
|
|
14
|
+
A class for interacting with Qwen models via DashScope API.
|
|
15
|
+
|
|
16
|
+
Attributes:
|
|
17
|
+
id (str): The model id. Defaults to "qwen-plus".
|
|
18
|
+
name (str): The model name. Defaults to "Qwen".
|
|
19
|
+
provider (str): The provider name. Defaults to "Qwen".
|
|
20
|
+
api_key (Optional[str]): The DashScope API key.
|
|
21
|
+
base_url (str): The base URL. Defaults to "https://dashscope-intl.aliyuncs.com/compatible-mode/v1".
|
|
22
|
+
enable_thinking (Optional[bool]): Enable thinking process (DashScope native parameter). Defaults to None.
|
|
23
|
+
include_thoughts (Optional[bool]): Include thinking process in response (alternative parameter). Defaults to None.
|
|
24
|
+
"""
|
|
25
|
+
|
|
26
|
+
id: str = "qwen-plus"
|
|
27
|
+
name: str = "Qwen"
|
|
28
|
+
provider: str = "Dashscope"
|
|
29
|
+
|
|
30
|
+
api_key: Optional[str] = getenv("DASHSCOPE_API_KEY") or getenv("QWEN_API_KEY")
|
|
31
|
+
base_url: str = "https://dashscope-intl.aliyuncs.com/compatible-mode/v1"
|
|
32
|
+
|
|
33
|
+
# Thinking parameters
|
|
34
|
+
enable_thinking: Optional[bool] = None
|
|
35
|
+
include_thoughts: Optional[bool] = None
|
|
36
|
+
|
|
37
|
+
# DashScope supports structured outputs
|
|
38
|
+
supports_native_structured_outputs: bool = True
|
|
39
|
+
supports_json_schema_outputs: bool = True
|
|
40
|
+
|
|
41
|
+
def _get_client_params(self) -> Dict[str, Any]:
|
|
42
|
+
if not self.api_key:
|
|
43
|
+
self.api_key = getenv("DASHSCOPE_API_KEY")
|
|
44
|
+
if not self.api_key:
|
|
45
|
+
raise ModelProviderError(
|
|
46
|
+
message="DASHSCOPE_API_KEY not set. Please set the DASHSCOPE_API_KEY environment variable.",
|
|
47
|
+
model_name=self.name,
|
|
48
|
+
model_id=self.id,
|
|
49
|
+
)
|
|
50
|
+
|
|
51
|
+
# Define base client params
|
|
52
|
+
base_params = {
|
|
53
|
+
"api_key": self.api_key,
|
|
54
|
+
"organization": self.organization,
|
|
55
|
+
"base_url": self.base_url,
|
|
56
|
+
"timeout": self.timeout,
|
|
57
|
+
"max_retries": self.max_retries,
|
|
58
|
+
"default_headers": self.default_headers,
|
|
59
|
+
"default_query": self.default_query,
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
# Create client_params dict with non-None values
|
|
63
|
+
client_params = {k: v for k, v in base_params.items() if v is not None}
|
|
64
|
+
|
|
65
|
+
# Add additional client params if provided
|
|
66
|
+
if self.client_params:
|
|
67
|
+
client_params.update(self.client_params)
|
|
68
|
+
return client_params
|
|
69
|
+
|
|
70
|
+
def get_request_params(
|
|
71
|
+
self,
|
|
72
|
+
response_format: Optional[Union[Dict, Type[BaseModel]]] = None,
|
|
73
|
+
tools: Optional[List[Dict[str, Any]]] = None,
|
|
74
|
+
tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
|
|
75
|
+
) -> Dict[str, Any]:
|
|
76
|
+
params = super().get_request_params(response_format=response_format, tools=tools, tool_choice=tool_choice)
|
|
77
|
+
|
|
78
|
+
should_include_thoughts = self.enable_thinking or self.include_thoughts
|
|
79
|
+
if should_include_thoughts:
|
|
80
|
+
params["extra_body"] = {"enable_thinking": True}
|
|
81
|
+
return params
|