agno 1.7.9__py3-none-any.whl → 1.7.11__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. agno/agent/agent.py +1 -1
  2. agno/app/fastapi/app.py +3 -1
  3. agno/app/fastapi/async_router.py +1 -1
  4. agno/app/playground/app.py +1 -0
  5. agno/document/chunking/semantic.py +1 -3
  6. agno/document/reader/markdown_reader.py +2 -7
  7. agno/document/reader/pdf_reader.py +69 -13
  8. agno/document/reader/text_reader.py +2 -2
  9. agno/knowledge/agent.py +70 -75
  10. agno/knowledge/markdown.py +15 -2
  11. agno/knowledge/pdf.py +32 -8
  12. agno/knowledge/pdf_url.py +13 -5
  13. agno/knowledge/website.py +4 -1
  14. agno/media.py +2 -0
  15. agno/models/aws/bedrock.py +51 -21
  16. agno/models/dashscope/__init__.py +5 -0
  17. agno/models/dashscope/dashscope.py +81 -0
  18. agno/models/openai/chat.py +3 -0
  19. agno/models/openai/responses.py +53 -7
  20. agno/models/qwen/__init__.py +5 -0
  21. agno/run/response.py +4 -0
  22. agno/run/team.py +4 -0
  23. agno/storage/in_memory.py +234 -0
  24. agno/team/team.py +25 -9
  25. agno/tools/brandfetch.py +210 -0
  26. agno/tools/github.py +46 -18
  27. agno/tools/trafilatura.py +372 -0
  28. agno/vectordb/clickhouse/clickhousedb.py +1 -1
  29. agno/vectordb/milvus/milvus.py +89 -1
  30. agno/vectordb/weaviate/weaviate.py +84 -18
  31. agno/workflow/workflow.py +3 -0
  32. {agno-1.7.9.dist-info → agno-1.7.11.dist-info}/METADATA +5 -1
  33. {agno-1.7.9.dist-info → agno-1.7.11.dist-info}/RECORD +37 -31
  34. {agno-1.7.9.dist-info → agno-1.7.11.dist-info}/WHEEL +0 -0
  35. {agno-1.7.9.dist-info → agno-1.7.11.dist-info}/entry_points.txt +0 -0
  36. {agno-1.7.9.dist-info → agno-1.7.11.dist-info}/licenses/LICENSE +0 -0
  37. {agno-1.7.9.dist-info → agno-1.7.11.dist-info}/top_level.txt +0 -0
agno/agent/agent.py CHANGED
@@ -6872,7 +6872,7 @@ class Agent:
6872
6872
  document_name = query.replace(" ", "_").replace("?", "").replace("!", "").replace(".", "")
6873
6873
  document_content = json.dumps({"query": query, "result": result})
6874
6874
  log_info(f"Adding document to knowledge base: {document_name}: {document_content}")
6875
- self.knowledge.add_document_to_knowledge_base(
6875
+ self.knowledge.load_document(
6876
6876
  document=Document(
6877
6877
  name=document_name,
6878
6878
  content=document_content,
agno/app/fastapi/app.py CHANGED
@@ -81,6 +81,7 @@ class FastAPIApp(BaseAPIApp):
81
81
  workflow.app_id = self.app_id
82
82
  if not workflow.workflow_id:
83
83
  workflow.workflow_id = generate_id(workflow.name)
84
+ workflow.initialize_workflow()
84
85
 
85
86
  def get_router(self) -> APIRouter:
86
87
  return get_sync_router(agents=self.agents, teams=self.teams, workflows=self.workflows)
@@ -95,6 +96,7 @@ class FastAPIApp(BaseAPIApp):
95
96
  host: str = "localhost",
96
97
  port: int = 7777,
97
98
  reload: bool = False,
99
+ workers: Optional[int] = None,
98
100
  **kwargs,
99
101
  ):
100
102
  self.set_app_id()
@@ -102,4 +104,4 @@ class FastAPIApp(BaseAPIApp):
102
104
 
103
105
  log_info(f"Starting API on {host}:{port}")
104
106
 
105
- uvicorn.run(app=app, host=host, port=port, reload=reload, **kwargs)
107
+ uvicorn.run(app=app, host=host, port=port, reload=reload, workers=workers, **kwargs)
@@ -231,7 +231,7 @@ def get_async_router(
231
231
 
232
232
  return base64_images, base64_audios, base64_videos
233
233
 
234
- def team_process_file(
234
+ async def team_process_file(
235
235
  files: List[UploadFile],
236
236
  ):
237
237
  base64_images: List[Image] = []
@@ -87,6 +87,7 @@ class Playground:
87
87
  workflow.app_id = self.app_id
88
88
  if not workflow.workflow_id:
89
89
  workflow.workflow_id = generate_id(workflow.name)
90
+ workflow.initialize_workflow()
90
91
 
91
92
  def set_app_id(self) -> str:
92
93
  # If app_id is already set, keep it instead of overriding with UUID
@@ -14,9 +14,7 @@ except ImportError:
14
14
  class SemanticChunking(ChunkingStrategy):
15
15
  """Chunking strategy that splits text into semantic chunks using chonkie"""
16
16
 
17
- def __init__(
18
- self, embedder: Optional[Embedder] = None, chunk_size: int = 5000, similarity_threshold: Optional[float] = 0.5
19
- ):
17
+ def __init__(self, embedder: Optional[Embedder] = None, chunk_size: int = 5000, similarity_threshold: float = 0.5):
20
18
  self.embedder = embedder or OpenAIEmbedder(id="text-embedding-3-small") # type: ignore
21
19
  self.chunk_size = chunk_size
22
20
  self.similarity_threshold = similarity_threshold
@@ -1,11 +1,9 @@
1
1
  import asyncio
2
2
  import uuid
3
3
  from pathlib import Path
4
- from typing import IO, Any, List, Optional, Union
4
+ from typing import IO, Any, List, Union
5
5
 
6
6
  from agno.document.base import Document
7
- from agno.document.chunking.markdown import MarkdownChunking
8
- from agno.document.chunking.strategy import ChunkingStrategy
9
7
  from agno.document.reader.base import Reader
10
8
  from agno.utils.log import log_info, logger
11
9
 
@@ -13,9 +11,6 @@ from agno.utils.log import log_info, logger
13
11
  class MarkdownReader(Reader):
14
12
  """Reader for Markdown files"""
15
13
 
16
- def __init__(self, chunking_strategy: Optional[ChunkingStrategy] = MarkdownChunking()) -> None:
17
- super().__init__(chunking_strategy=chunking_strategy)
18
-
19
14
  def read(self, file: Union[Path, IO[Any]]) -> List[Document]:
20
15
  try:
21
16
  if isinstance(file, Path):
@@ -30,7 +25,7 @@ class MarkdownReader(Reader):
30
25
  file.seek(0)
31
26
  file_contents = file.read().decode("utf-8")
32
27
 
33
- documents = [Document(name=file_name, id=str({uuid.uuid4()}), content=file_contents)]
28
+ documents = [Document(name=file_name, id=str(uuid.uuid4()), content=file_contents)]
34
29
  if self.chunk:
35
30
  chunked_documents = []
36
31
  for document in documents:
@@ -7,7 +7,7 @@ from uuid import uuid4
7
7
  from agno.document.base import Document
8
8
  from agno.document.reader.base import Reader
9
9
  from agno.utils.http import async_fetch_with_retry, fetch_with_retry
10
- from agno.utils.log import log_info, logger
10
+ from agno.utils.log import log_error, log_info, logger
11
11
 
12
12
  try:
13
13
  from pypdf import PdfReader as DocumentReader # noqa: F401
@@ -177,6 +177,7 @@ class BasePDFReader(Reader):
177
177
  split_on_pages: bool = True,
178
178
  page_start_numbering_format: Optional[str] = None,
179
179
  page_end_numbering_format: Optional[str] = None,
180
+ password: Optional[str] = None,
180
181
  **kwargs,
181
182
  ):
182
183
  if page_start_numbering_format is None:
@@ -187,6 +188,7 @@ class BasePDFReader(Reader):
187
188
  self.split_on_pages = split_on_pages
188
189
  self.page_start_numbering_format = page_start_numbering_format
189
190
  self.page_end_numbering_format = page_end_numbering_format
191
+ self.password = password
190
192
 
191
193
  super().__init__(**kwargs)
192
194
 
@@ -196,6 +198,28 @@ class BasePDFReader(Reader):
196
198
  chunked_documents.extend(self.chunk_document(document))
197
199
  return chunked_documents
198
200
 
201
+ def _decrypt_pdf(self, doc_reader: DocumentReader, doc_name: str, password: Optional[str] = None) -> bool:
202
+ if not doc_reader.is_encrypted:
203
+ return True
204
+
205
+ # Use provided password or fall back to instance password
206
+ pdf_password = password or self.password
207
+ if not pdf_password:
208
+ logger.error(f"PDF {doc_name} is password protected but no password provided")
209
+ return False
210
+
211
+ try:
212
+ decrypted_pdf = doc_reader.decrypt(pdf_password)
213
+ if decrypted_pdf:
214
+ log_info(f"Successfully decrypted PDF {doc_name} with user password")
215
+ return True
216
+ else:
217
+ log_error(f"Failed to decrypt PDF {doc_name}: incorrect password")
218
+ return False
219
+ except Exception as e:
220
+ log_error(f"Error decrypting PDF {doc_name}: {e}")
221
+ return False
222
+
199
223
  def _create_documents(self, pdf_content: List[str], doc_name: str, use_uuid_for_id: bool, page_number_shift):
200
224
  if self.split_on_pages:
201
225
  shift = page_number_shift if page_number_shift is not None else 1
@@ -282,7 +306,7 @@ class BasePDFReader(Reader):
282
306
  class PDFReader(BasePDFReader):
283
307
  """Reader for PDF files"""
284
308
 
285
- def read(self, pdf: Union[str, Path, IO[Any]]) -> List[Document]:
309
+ def read(self, pdf: Union[str, Path, IO[Any]], password: Optional[str] = None) -> List[Document]:
286
310
  try:
287
311
  if isinstance(pdf, str):
288
312
  doc_name = pdf.split("/")[-1].split(".")[0].replace(" ", "_")
@@ -299,10 +323,14 @@ class PDFReader(BasePDFReader):
299
323
  logger.error(f"Error reading PDF: {e}")
300
324
  return []
301
325
 
326
+ # Handle PDF decryption
327
+ if not self._decrypt_pdf(pdf_reader, doc_name, password):
328
+ return []
329
+
302
330
  # Read and chunk.
303
331
  return self._pdf_reader_to_documents(pdf_reader, doc_name, use_uuid_for_id=True)
304
332
 
305
- async def async_read(self, pdf: Union[str, Path, IO[Any]]) -> List[Document]:
333
+ async def async_read(self, pdf: Union[str, Path, IO[Any]], password: Optional[str] = None) -> List[Document]:
306
334
  try:
307
335
  if isinstance(pdf, str):
308
336
  doc_name = pdf.split("/")[-1].split(".")[0].replace(" ", "_")
@@ -319,6 +347,10 @@ class PDFReader(BasePDFReader):
319
347
  logger.error(f"Error reading PDF: {e}")
320
348
  return []
321
349
 
350
+ # Handle PDF decryption
351
+ if not self._decrypt_pdf(pdf_reader, doc_name, password):
352
+ return []
353
+
322
354
  # Read and chunk.
323
355
  return await self._async_pdf_reader_to_documents(pdf_reader, doc_name, use_uuid_for_id=True)
324
356
 
@@ -326,11 +358,11 @@ class PDFReader(BasePDFReader):
326
358
  class PDFUrlReader(BasePDFReader):
327
359
  """Reader for PDF files from URL"""
328
360
 
329
- def __init__(self, proxy: Optional[str] = None, **kwargs):
330
- super().__init__(**kwargs)
361
+ def __init__(self, proxy: Optional[str] = None, password: Optional[str] = None, **kwargs):
362
+ super().__init__(password=password, **kwargs)
331
363
  self.proxy = proxy
332
364
 
333
- def read(self, url: str) -> List[Document]:
365
+ def read(self, url: str, password: Optional[str] = None) -> List[Document]:
334
366
  if not url:
335
367
  raise ValueError("No url provided")
336
368
 
@@ -344,10 +376,14 @@ class PDFUrlReader(BasePDFReader):
344
376
  doc_name = url.split("/")[-1].split(".")[0].replace("/", "_").replace(" ", "_")
345
377
  pdf_reader = DocumentReader(BytesIO(response.content))
346
378
 
379
+ # Handle PDF decryption
380
+ if not self._decrypt_pdf(pdf_reader, doc_name, password):
381
+ return []
382
+
347
383
  # Read and chunk.
348
384
  return self._pdf_reader_to_documents(pdf_reader, doc_name, use_uuid_for_id=False)
349
385
 
350
- async def async_read(self, url: str) -> List[Document]:
386
+ async def async_read(self, url: str, password: Optional[str] = None) -> List[Document]:
351
387
  if not url:
352
388
  raise ValueError("No url provided")
353
389
 
@@ -364,6 +400,10 @@ class PDFUrlReader(BasePDFReader):
364
400
  doc_name = url.split("/")[-1].split(".")[0].replace("/", "_").replace(" ", "_")
365
401
  pdf_reader = DocumentReader(BytesIO(response.content))
366
402
 
403
+ # Handle PDF decryption
404
+ if not self._decrypt_pdf(pdf_reader, doc_name, password):
405
+ return []
406
+
367
407
  # Read and chunk.
368
408
  return await self._async_pdf_reader_to_documents(pdf_reader, doc_name, use_uuid_for_id=False)
369
409
 
@@ -371,7 +411,7 @@ class PDFUrlReader(BasePDFReader):
371
411
  class PDFImageReader(BasePDFReader):
372
412
  """Reader for PDF files with text and images extraction"""
373
413
 
374
- def read(self, pdf: Union[str, Path, IO[Any]]) -> List[Document]:
414
+ def read(self, pdf: Union[str, Path, IO[Any]], password: Optional[str] = None) -> List[Document]:
375
415
  if not pdf:
376
416
  raise ValueError("No pdf provided")
377
417
 
@@ -386,10 +426,14 @@ class PDFImageReader(BasePDFReader):
386
426
  log_info(f"Reading: {doc_name}")
387
427
  pdf_reader = DocumentReader(pdf)
388
428
 
429
+ # Handle PDF decryption
430
+ if not self._decrypt_pdf(pdf_reader, doc_name, password):
431
+ return []
432
+
389
433
  # Read and chunk.
390
434
  return self._pdf_reader_to_documents(pdf_reader, doc_name, read_images=True, use_uuid_for_id=False)
391
435
 
392
- async def async_read(self, pdf: Union[str, Path, IO[Any]]) -> List[Document]:
436
+ async def async_read(self, pdf: Union[str, Path, IO[Any]], password: Optional[str] = None) -> List[Document]:
393
437
  if not pdf:
394
438
  raise ValueError("No pdf provided")
395
439
 
@@ -404,6 +448,10 @@ class PDFImageReader(BasePDFReader):
404
448
  log_info(f"Reading: {doc_name}")
405
449
  pdf_reader = DocumentReader(pdf)
406
450
 
451
+ # Handle PDF decryption
452
+ if not self._decrypt_pdf(pdf_reader, doc_name, password):
453
+ return []
454
+
407
455
  # Read and chunk.
408
456
  return await self._async_pdf_reader_to_documents(pdf_reader, doc_name, read_images=True, use_uuid_for_id=False)
409
457
 
@@ -411,11 +459,11 @@ class PDFImageReader(BasePDFReader):
411
459
  class PDFUrlImageReader(BasePDFReader):
412
460
  """Reader for PDF files from URL with text and images extraction"""
413
461
 
414
- def __init__(self, proxy: Optional[str] = None, **kwargs):
415
- super().__init__(**kwargs)
462
+ def __init__(self, proxy: Optional[str] = None, password: Optional[str] = None, **kwargs):
463
+ super().__init__(password=password, **kwargs)
416
464
  self.proxy = proxy
417
465
 
418
- def read(self, url: str) -> List[Document]:
466
+ def read(self, url: str, password: Optional[str] = None) -> List[Document]:
419
467
  if not url:
420
468
  raise ValueError("No url provided")
421
469
 
@@ -430,10 +478,14 @@ class PDFUrlImageReader(BasePDFReader):
430
478
  doc_name = url.split("/")[-1].split(".")[0].replace(" ", "_")
431
479
  pdf_reader = DocumentReader(BytesIO(response.content))
432
480
 
481
+ # Handle PDF decryption
482
+ if not self._decrypt_pdf(pdf_reader, doc_name, password):
483
+ return []
484
+
433
485
  # Read and chunk.
434
486
  return self._pdf_reader_to_documents(pdf_reader, doc_name, read_images=True, use_uuid_for_id=False)
435
487
 
436
- async def async_read(self, url: str) -> List[Document]:
488
+ async def async_read(self, url: str, password: Optional[str] = None) -> List[Document]:
437
489
  if not url:
438
490
  raise ValueError("No url provided")
439
491
 
@@ -451,5 +503,9 @@ class PDFUrlImageReader(BasePDFReader):
451
503
  doc_name = url.split("/")[-1].split(".")[0].replace(" ", "_")
452
504
  pdf_reader = DocumentReader(BytesIO(response.content))
453
505
 
506
+ # Handle PDF decryption
507
+ if not self._decrypt_pdf(pdf_reader, doc_name, password):
508
+ return []
509
+
454
510
  # Read and chunk.
455
511
  return await self._async_pdf_reader_to_documents(pdf_reader, doc_name, read_images=True, use_uuid_for_id=False)
@@ -28,7 +28,7 @@ class TextReader(Reader):
28
28
  documents = [
29
29
  Document(
30
30
  name=file_name,
31
- id=str({uuid.uuid4()}),
31
+ id=str(uuid.uuid4()),
32
32
  content=file_contents,
33
33
  )
34
34
  ]
@@ -67,7 +67,7 @@ class TextReader(Reader):
67
67
 
68
68
  document = Document(
69
69
  name=file_name,
70
- id=str({uuid.uuid4()}),
70
+ id=str(uuid.uuid4()),
71
71
  content=file_contents,
72
72
  )
73
73
 
agno/knowledge/agent.py CHANGED
@@ -2,7 +2,7 @@ import asyncio
2
2
  from pathlib import Path
3
3
  from typing import Any, AsyncIterator, Dict, Iterator, List, Optional, Set, Tuple
4
4
 
5
- from pydantic import BaseModel, ConfigDict, Field, model_validator
5
+ from pydantic import BaseModel, ConfigDict, model_validator
6
6
 
7
7
  from agno.document import Document
8
8
  from agno.document.chunking.fixed import FixedSizeChunking
@@ -24,8 +24,7 @@ class AgentKnowledge(BaseModel):
24
24
  # Number of documents to optimize the vector db on
25
25
  optimize_on: Optional[int] = 1000
26
26
 
27
- chunking_strategy: ChunkingStrategy = Field(default_factory=FixedSizeChunking)
28
-
27
+ chunking_strategy: Optional[ChunkingStrategy] = None
29
28
  model_config = ConfigDict(arbitrary_types_allowed=True)
30
29
 
31
30
  valid_metadata_filters: Set[str] = None # type: ignore
@@ -33,7 +32,7 @@ class AgentKnowledge(BaseModel):
33
32
  @model_validator(mode="after")
34
33
  def update_reader(self) -> "AgentKnowledge":
35
34
  if self.reader is not None and self.reader.chunking_strategy is None:
36
- self.reader.chunking_strategy = self.chunking_strategy
35
+ self.reader.chunking_strategy = self.chunking_strategy or FixedSizeChunking()
37
36
  return self
38
37
 
39
38
  @property
@@ -50,6 +49,53 @@ class AgentKnowledge(BaseModel):
50
49
  """
51
50
  raise NotImplementedError
52
51
 
52
+ def _upsert_warning(self, upsert) -> None:
53
+ """Log a warning if upsert is not available"""
54
+ if upsert and self.vector_db is not None and not self.vector_db.upsert_available():
55
+ log_info(
56
+ f"Vector db '{self.vector_db.__class__.__module__}' does not support upsert. Falling back to insert."
57
+ )
58
+
59
+ def _load_init(self, recreate: bool, upsert: bool) -> None:
60
+ """Initial setup for loading knowledge base"""
61
+ if self.vector_db is None:
62
+ logger.warning("No vector db provided")
63
+ return
64
+
65
+ if recreate:
66
+ log_info("Dropping collection")
67
+ self.vector_db.drop()
68
+
69
+ if not self.vector_db.exists():
70
+ log_info("Creating collection")
71
+ self.vector_db.create()
72
+
73
+ self._upsert_warning(upsert)
74
+
75
+ async def _aload_init(self, recreate: bool, upsert: bool) -> None:
76
+ """Initial async setup for loading knowledge base"""
77
+ if self.vector_db is None:
78
+ logger.warning("No vector db provided")
79
+ return
80
+
81
+ if recreate:
82
+ log_info("Dropping collection")
83
+ try:
84
+ await self.vector_db.async_drop()
85
+ except NotImplementedError:
86
+ logger.warning("Vector db does not support async drop, falling back to sync drop")
87
+ self.vector_db.drop()
88
+
89
+ if not self.vector_db.exists():
90
+ log_info("Creating collection")
91
+ try:
92
+ await self.vector_db.async_create()
93
+ except NotImplementedError:
94
+ logger.warning("Vector db does not support async create, falling back to sync create")
95
+ self.vector_db.create()
96
+
97
+ self._upsert_warning(upsert)
98
+
53
99
  def search(
54
100
  self, query: str, num_documents: Optional[int] = None, filters: Optional[Dict[str, Any]] = None
55
101
  ) -> List[Document]:
@@ -80,7 +126,7 @@ class AgentKnowledge(BaseModel):
80
126
  try:
81
127
  return await self.vector_db.async_search(query=query, limit=_num_documents, filters=filters)
82
128
  except NotImplementedError:
83
- logger.info("Vector db does not support async search")
129
+ log_info("Vector db does not support async search")
84
130
  return self.search(query=query, num_documents=_num_documents, filters=filters)
85
131
  except Exception as e:
86
132
  logger.error(f"Error searching for documents: {e}")
@@ -99,18 +145,10 @@ class AgentKnowledge(BaseModel):
99
145
  upsert (bool): If True, upserts documents to the vector db. Defaults to False.
100
146
  skip_existing (bool): If True, skips documents which already exist in the vector db when inserting. Defaults to True.
101
147
  """
148
+ self._load_init(recreate, upsert)
102
149
  if self.vector_db is None:
103
- logger.warning("No vector db provided")
104
150
  return
105
151
 
106
- if recreate:
107
- log_info("Dropping collection")
108
- self.vector_db.drop()
109
-
110
- if not self.vector_db.exists():
111
- log_info("Creating collection")
112
- self.vector_db.create()
113
-
114
152
  log_info("Loading knowledge base")
115
153
  num_documents = 0
116
154
  for document_list in self.document_lists:
@@ -123,8 +161,7 @@ class AgentKnowledge(BaseModel):
123
161
 
124
162
  # Upsert documents if upsert is True and vector db supports upsert
125
163
  if upsert and self.vector_db.upsert_available():
126
- for doc in document_list:
127
- self.vector_db.upsert(documents=[doc], filters=doc.meta_data)
164
+ self.vector_db.upsert(documents=documents_to_load, filters=doc.meta_data)
128
165
  # Insert documents
129
166
  else:
130
167
  # Filter out documents which already exist in the vector db
@@ -133,11 +170,10 @@ class AgentKnowledge(BaseModel):
133
170
  documents_to_load = self.filter_existing_documents(document_list)
134
171
 
135
172
  if documents_to_load:
136
- for doc in documents_to_load:
137
- self.vector_db.insert(documents=[doc], filters=doc.meta_data)
173
+ self.vector_db.insert(documents=documents_to_load, filters=doc.meta_data)
138
174
 
139
175
  num_documents += len(documents_to_load)
140
- log_info(f"Added {len(documents_to_load)} documents to knowledge base")
176
+ log_info(f"Added {num_documents} documents to knowledge base")
141
177
 
142
178
  async def aload(
143
179
  self,
@@ -152,19 +188,10 @@ class AgentKnowledge(BaseModel):
152
188
  upsert (bool): If True, upserts documents to the vector db. Defaults to False.
153
189
  skip_existing (bool): If True, skips documents which already exist in the vector db when inserting. Defaults to True.
154
190
  """
155
-
191
+ await self._aload_init(recreate, upsert)
156
192
  if self.vector_db is None:
157
- logger.warning("No vector db provided")
158
193
  return
159
194
 
160
- if recreate:
161
- log_info("Dropping collection")
162
- await self.vector_db.async_drop()
163
-
164
- if not await self.vector_db.async_exists():
165
- log_info("Creating collection")
166
- await self.vector_db.async_create()
167
-
168
195
  log_info("Loading knowledge base")
169
196
  num_documents = 0
170
197
  document_iterator = self.async_document_lists
@@ -177,8 +204,7 @@ class AgentKnowledge(BaseModel):
177
204
 
178
205
  # Upsert documents if upsert is True and vector db supports upsert
179
206
  if upsert and self.vector_db.upsert_available():
180
- for doc in document_list:
181
- await self.vector_db.async_upsert(documents=[doc], filters=doc.meta_data)
207
+ await self.vector_db.async_upsert(documents=documents_to_load, filters=doc.meta_data)
182
208
  # Insert documents
183
209
  else:
184
210
  # Filter out documents which already exist in the vector db
@@ -187,11 +213,10 @@ class AgentKnowledge(BaseModel):
187
213
  documents_to_load = await self.async_filter_existing_documents(document_list)
188
214
 
189
215
  if documents_to_load:
190
- for doc in documents_to_load:
191
- await self.vector_db.async_insert(documents=[doc], filters=doc.meta_data)
216
+ await self.vector_db.async_insert(documents=documents_to_load, filters=doc.meta_data)
192
217
 
193
218
  num_documents += len(documents_to_load)
194
- log_info(f"Added {len(documents_to_load)} documents to knowledge base")
219
+ log_info(f"Added {num_documents} documents to knowledge base")
195
220
 
196
221
  def load_documents(
197
222
  self,
@@ -208,15 +233,11 @@ class AgentKnowledge(BaseModel):
208
233
  skip_existing (bool): If True, skips documents which already exist in the vector db when inserting. Defaults to True.
209
234
  filters (Optional[Dict[str, Any]]): Filters to add to each row that can be used to limit results during querying. Defaults to None.
210
235
  """
211
-
212
- log_info("Loading knowledge base")
236
+ self._load_init(recreate=False, upsert=upsert)
213
237
  if self.vector_db is None:
214
- logger.warning("No vector db provided")
215
238
  return
216
239
 
217
- log_debug("Creating collection")
218
- self.vector_db.create()
219
-
240
+ log_info("Loading knowledge base")
220
241
  # Upsert documents if upsert is True
221
242
  if upsert and self.vector_db.upsert_available():
222
243
  self.vector_db.upsert(documents=documents, filters=filters)
@@ -251,17 +272,11 @@ class AgentKnowledge(BaseModel):
251
272
  skip_existing (bool): If True, skips documents which already exist in the vector db when inserting. Defaults to True.
252
273
  filters (Optional[Dict[str, Any]]): Filters to add to each row that can be used to limit results during querying. Defaults to None.
253
274
  """
254
- log_info("Loading knowledge base")
275
+ await self._aload_init(recreate=False, upsert=upsert)
255
276
  if self.vector_db is None:
256
- logger.warning("No vector db provided")
257
277
  return
258
278
 
259
- log_debug("Creating collection")
260
- try:
261
- await self.vector_db.async_create()
262
- except NotImplementedError:
263
- logger.warning("Vector db does not support async create")
264
- self.vector_db.create()
279
+ log_info("Loading knowledge base")
265
280
 
266
281
  # Upsert documents if upsert is True
267
282
  if upsert and self.vector_db.upsert_available():
@@ -302,7 +317,7 @@ class AgentKnowledge(BaseModel):
302
317
  else:
303
318
  log_info("No new documents to load")
304
319
 
305
- def add_document_to_knowledge_base(
320
+ def load_document(
306
321
  self,
307
322
  document: Document,
308
323
  upsert: bool = False,
@@ -414,8 +429,6 @@ class AgentKnowledge(BaseModel):
414
429
  Returns:
415
430
  List[Document]: Filtered list of documents that don't exist in the database
416
431
  """
417
- from agno.utils.log import log_debug, log_info
418
-
419
432
  if not self.vector_db:
420
433
  log_debug("No vector database configured, skipping document filtering")
421
434
  return documents
@@ -556,20 +569,9 @@ class AgentKnowledge(BaseModel):
556
569
  self._track_metadata_structure(metadata)
557
570
 
558
571
  # 3. Prepare vector DB
572
+ self._load_init(recreate, upsert=False)
559
573
  if self.vector_db is None:
560
- logger.warning("Cannot load file: No vector db provided.")
561
574
  return False
562
-
563
- # Recreate collection if requested
564
- if recreate:
565
- # log_info(f"Recreating collection.")
566
- self.vector_db.drop()
567
-
568
- # Create collection if it doesn't exist
569
- if not self.vector_db.exists():
570
- # log_info(f"Collection does not exist. Creating.")
571
- self.vector_db.create()
572
-
573
575
  return True
574
576
 
575
577
  async def aprepare_load(
@@ -604,20 +606,9 @@ class AgentKnowledge(BaseModel):
604
606
  self._track_metadata_structure(metadata)
605
607
 
606
608
  # 3. Prepare vector DB
609
+ await self._aload_init(recreate, upsert=False)
607
610
  if self.vector_db is None:
608
- logger.warning("Cannot load file: No vector db provided.")
609
611
  return False
610
-
611
- # Recreate collection if requested
612
- if recreate:
613
- log_info("Recreating collection.")
614
- await self.vector_db.async_drop()
615
-
616
- # Create collection if it doesn't exist
617
- if not await self.vector_db.async_exists():
618
- log_info("Collection does not exist. Creating.")
619
- await self.vector_db.async_create()
620
-
621
612
  return True
622
613
 
623
614
  def process_documents(
@@ -642,6 +633,8 @@ class AgentKnowledge(BaseModel):
642
633
 
643
634
  log_info(f"Loading {len(documents)} documents from {source_info} with metadata: {metadata}")
644
635
 
636
+ self._upsert_warning(upsert)
637
+
645
638
  # Decide loading strategy: upsert or insert (with optional skip)
646
639
  if upsert and self.vector_db.upsert_available(): # type: ignore
647
640
  log_debug(f"Upserting {len(documents)} documents.") # type: ignore
@@ -681,6 +674,8 @@ class AgentKnowledge(BaseModel):
681
674
  logger.warning(f"No documents were read from {source_info}")
682
675
  return
683
676
 
677
+ self._upsert_warning(upsert)
678
+
684
679
  log_info(f"Loading {len(documents)} documents from {source_info} with metadata: {metadata}")
685
680
 
686
681
  # Decide loading strategy: upsert or insert (with optional skip)
@@ -1,7 +1,10 @@
1
1
  from pathlib import Path
2
- from typing import Any, AsyncIterator, Dict, Iterator, List, Optional, Union
2
+ from typing import Any, AsyncIterator, Dict, Iterator, List, Optional, Union, cast
3
+
4
+ from pydantic import model_validator
3
5
 
4
6
  from agno.document import Document
7
+ from agno.document.chunking.markdown import MarkdownChunking
5
8
  from agno.document.reader.markdown_reader import MarkdownReader
6
9
  from agno.knowledge.agent import AgentKnowledge
7
10
  from agno.utils.log import log_info, logger
@@ -10,11 +13,18 @@ from agno.utils.log import log_info, logger
10
13
  class MarkdownKnowledgeBase(AgentKnowledge):
11
14
  path: Optional[Union[str, Path, List[Dict[str, Union[str, Dict[str, Any]]]]]] = None
12
15
  formats: List[str] = [".md"]
13
- reader: MarkdownReader = MarkdownReader()
16
+ reader: Optional[MarkdownReader] = None
17
+
18
+ @model_validator(mode="after")
19
+ def set_reader(self) -> "MarkdownKnowledgeBase":
20
+ if self.reader is None:
21
+ self.reader = MarkdownReader(chunking_strategy=self.chunking_strategy or MarkdownChunking())
22
+ return self
14
23
 
15
24
  @property
16
25
  def document_lists(self) -> Iterator[List[Document]]:
17
26
  """Iterate over text files and yield lists of documents."""
27
+ self.reader = cast(MarkdownReader, self.reader)
18
28
  if self.path is None:
19
29
  raise ValueError("Path is not set")
20
30
 
@@ -49,6 +59,7 @@ class MarkdownKnowledgeBase(AgentKnowledge):
49
59
  @property
50
60
  async def async_document_lists(self) -> AsyncIterator[List[Document]]:
51
61
  """Iterate over text files and yield lists of documents asynchronously."""
62
+ self.reader = cast(MarkdownReader, self.reader)
52
63
  if self.path is None:
53
64
  raise ValueError("Path is not set")
54
65
 
@@ -85,6 +96,7 @@ class MarkdownKnowledgeBase(AgentKnowledge):
85
96
  skip_existing: bool = True,
86
97
  ) -> None:
87
98
  """Load documents from a single text file with specific metadata into the vector DB."""
99
+ self.reader = cast(MarkdownReader, self.reader)
88
100
 
89
101
  _file_path = Path(path) if isinstance(path, str) else path
90
102
 
@@ -117,6 +129,7 @@ class MarkdownKnowledgeBase(AgentKnowledge):
117
129
  skip_existing: bool = True,
118
130
  ) -> None:
119
131
  """Load documents from a single text file with specific metadata into the vector DB."""
132
+ self.reader = cast(MarkdownReader, self.reader)
120
133
 
121
134
  _file_path = Path(path) if isinstance(path, str) else path
122
135