agno 2.4.4__py3-none-any.whl → 2.4.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agno/agent/agent.py +47 -39
- agno/db/surrealdb/models.py +1 -1
- agno/knowledge/chunking/agentic.py +1 -5
- agno/knowledge/chunking/code.py +1 -1
- agno/knowledge/chunking/document.py +22 -42
- agno/knowledge/chunking/fixed.py +1 -5
- agno/knowledge/chunking/markdown.py +9 -25
- agno/knowledge/chunking/recursive.py +1 -3
- agno/knowledge/chunking/row.py +3 -2
- agno/knowledge/chunking/semantic.py +1 -1
- agno/knowledge/chunking/strategy.py +19 -0
- agno/knowledge/knowledge.py +181 -24
- agno/knowledge/reader/text_reader.py +1 -1
- agno/learn/stores/learned_knowledge.py +108 -131
- agno/team/team.py +27 -20
- agno/tools/seltz.py +134 -0
- agno/utils/print_response/agent.py +8 -8
- agno/utils/print_response/team.py +8 -8
- {agno-2.4.4.dist-info → agno-2.4.6.dist-info}/METADATA +36 -58
- {agno-2.4.4.dist-info → agno-2.4.6.dist-info}/RECORD +23 -22
- {agno-2.4.4.dist-info → agno-2.4.6.dist-info}/WHEEL +0 -0
- {agno-2.4.4.dist-info → agno-2.4.6.dist-info}/licenses/LICENSE +0 -0
- {agno-2.4.4.dist-info → agno-2.4.6.dist-info}/top_level.txt +0 -0
agno/knowledge/knowledge.py
CHANGED
|
@@ -823,7 +823,13 @@ class Knowledge:
|
|
|
823
823
|
log_warning(f"Invalid filter key: {key} - not present in knowledge base")
|
|
824
824
|
|
|
825
825
|
elif isinstance(filters, List):
|
|
826
|
-
# Validate
|
|
826
|
+
# Validate list filters against known metadata keys
|
|
827
|
+
if valid_metadata_filters is None or not valid_metadata_filters:
|
|
828
|
+
# Can't validate keys without metadata - return original list
|
|
829
|
+
log_warning("No valid metadata filters tracked yet. Cannot validate list filter keys.")
|
|
830
|
+
return filters, []
|
|
831
|
+
|
|
832
|
+
valid_list_filters: List[FilterExpr] = []
|
|
827
833
|
for i, filter_item in enumerate(filters):
|
|
828
834
|
if not isinstance(filter_item, FilterExpr):
|
|
829
835
|
log_warning(
|
|
@@ -832,9 +838,23 @@ class Knowledge:
|
|
|
832
838
|
f"Use filter expressions like EQ('key', 'value'), IN('key', [values]), "
|
|
833
839
|
f"AND(...), OR(...), NOT(...) from agno.filters"
|
|
834
840
|
)
|
|
835
|
-
|
|
836
|
-
|
|
837
|
-
|
|
841
|
+
continue
|
|
842
|
+
|
|
843
|
+
# Check if filter has a key attribute and validate it
|
|
844
|
+
if hasattr(filter_item, "key"):
|
|
845
|
+
key = filter_item.key
|
|
846
|
+
base_key = key.split(".")[-1] if "." in key else key
|
|
847
|
+
if base_key in valid_metadata_filters or key in valid_metadata_filters:
|
|
848
|
+
valid_list_filters.append(filter_item)
|
|
849
|
+
else:
|
|
850
|
+
invalid_keys.append(key)
|
|
851
|
+
log_warning(f"Invalid filter key: {key} - not present in knowledge base")
|
|
852
|
+
else:
|
|
853
|
+
# Complex filters (AND, OR, NOT) - keep them as-is
|
|
854
|
+
# They contain nested filters that will be validated by the vector DB
|
|
855
|
+
valid_list_filters.append(filter_item)
|
|
856
|
+
|
|
857
|
+
return valid_list_filters, invalid_keys
|
|
838
858
|
|
|
839
859
|
return valid_filters, invalid_keys
|
|
840
860
|
|
|
@@ -1541,7 +1561,49 @@ class Knowledge:
|
|
|
1541
1561
|
# 6. Chunk documents if needed
|
|
1542
1562
|
if reader and not reader.chunk:
|
|
1543
1563
|
read_documents = await reader.chunk_documents_async(read_documents)
|
|
1544
|
-
|
|
1564
|
+
|
|
1565
|
+
# 7. Group documents by source URL for multi-page readers (like WebsiteReader)
|
|
1566
|
+
docs_by_source: Dict[str, List[Document]] = {}
|
|
1567
|
+
for doc in read_documents:
|
|
1568
|
+
source_url = doc.meta_data.get("url", content.url) if doc.meta_data else content.url
|
|
1569
|
+
source_url = source_url or "unknown"
|
|
1570
|
+
if source_url not in docs_by_source:
|
|
1571
|
+
docs_by_source[source_url] = []
|
|
1572
|
+
docs_by_source[source_url].append(doc)
|
|
1573
|
+
|
|
1574
|
+
# 8. Process each source separately if multiple sources exist
|
|
1575
|
+
if len(docs_by_source) > 1:
|
|
1576
|
+
for source_url, source_docs in docs_by_source.items():
|
|
1577
|
+
# Compute per-document hash based on actual source URL
|
|
1578
|
+
doc_hash = self._build_document_content_hash(source_docs[0], content)
|
|
1579
|
+
|
|
1580
|
+
# Check skip_if_exists for each source individually
|
|
1581
|
+
if self._should_skip(doc_hash, skip_if_exists):
|
|
1582
|
+
log_debug(f"Skipping already indexed: {source_url}")
|
|
1583
|
+
continue
|
|
1584
|
+
|
|
1585
|
+
doc_id = generate_id(doc_hash)
|
|
1586
|
+
self._prepare_documents_for_insert(source_docs, doc_id, calculate_sizes=True)
|
|
1587
|
+
|
|
1588
|
+
# Insert with per-document hash
|
|
1589
|
+
if self.vector_db.upsert_available() and upsert:
|
|
1590
|
+
try:
|
|
1591
|
+
await self.vector_db.async_upsert(doc_hash, source_docs, content.metadata)
|
|
1592
|
+
except Exception as e:
|
|
1593
|
+
log_error(f"Error upserting document from {source_url}: {e}")
|
|
1594
|
+
continue
|
|
1595
|
+
else:
|
|
1596
|
+
try:
|
|
1597
|
+
await self.vector_db.async_insert(doc_hash, documents=source_docs, filters=content.metadata)
|
|
1598
|
+
except Exception as e:
|
|
1599
|
+
log_error(f"Error inserting document from {source_url}: {e}")
|
|
1600
|
+
continue
|
|
1601
|
+
|
|
1602
|
+
content.status = ContentStatus.COMPLETED
|
|
1603
|
+
await self._aupdate_content(content)
|
|
1604
|
+
return
|
|
1605
|
+
|
|
1606
|
+
# 9. Single source - use existing logic with original content hash
|
|
1545
1607
|
if not content.id:
|
|
1546
1608
|
content.id = generate_id(content.content_hash or "")
|
|
1547
1609
|
self._prepare_documents_for_insert(read_documents, content.id, calculate_sizes=True)
|
|
@@ -1648,7 +1710,48 @@ class Knowledge:
|
|
|
1648
1710
|
if reader:
|
|
1649
1711
|
read_documents = self._chunk_documents_sync(reader, read_documents)
|
|
1650
1712
|
|
|
1651
|
-
# 7.
|
|
1713
|
+
# 7. Group documents by source URL for multi-page readers (like WebsiteReader)
|
|
1714
|
+
docs_by_source: Dict[str, List[Document]] = {}
|
|
1715
|
+
for doc in read_documents:
|
|
1716
|
+
source_url = doc.meta_data.get("url", content.url) if doc.meta_data else content.url
|
|
1717
|
+
source_url = source_url or "unknown"
|
|
1718
|
+
if source_url not in docs_by_source:
|
|
1719
|
+
docs_by_source[source_url] = []
|
|
1720
|
+
docs_by_source[source_url].append(doc)
|
|
1721
|
+
|
|
1722
|
+
# 8. Process each source separately if multiple sources exist
|
|
1723
|
+
if len(docs_by_source) > 1:
|
|
1724
|
+
for source_url, source_docs in docs_by_source.items():
|
|
1725
|
+
# Compute per-document hash based on actual source URL
|
|
1726
|
+
doc_hash = self._build_document_content_hash(source_docs[0], content)
|
|
1727
|
+
|
|
1728
|
+
# Check skip_if_exists for each source individually
|
|
1729
|
+
if self._should_skip(doc_hash, skip_if_exists):
|
|
1730
|
+
log_debug(f"Skipping already indexed: {source_url}")
|
|
1731
|
+
continue
|
|
1732
|
+
|
|
1733
|
+
doc_id = generate_id(doc_hash)
|
|
1734
|
+
self._prepare_documents_for_insert(source_docs, doc_id, calculate_sizes=True)
|
|
1735
|
+
|
|
1736
|
+
# Insert with per-document hash
|
|
1737
|
+
if self.vector_db.upsert_available() and upsert:
|
|
1738
|
+
try:
|
|
1739
|
+
self.vector_db.upsert(doc_hash, source_docs, content.metadata)
|
|
1740
|
+
except Exception as e:
|
|
1741
|
+
log_error(f"Error upserting document from {source_url}: {e}")
|
|
1742
|
+
continue
|
|
1743
|
+
else:
|
|
1744
|
+
try:
|
|
1745
|
+
self.vector_db.insert(doc_hash, documents=source_docs, filters=content.metadata)
|
|
1746
|
+
except Exception as e:
|
|
1747
|
+
log_error(f"Error inserting document from {source_url}: {e}")
|
|
1748
|
+
continue
|
|
1749
|
+
|
|
1750
|
+
content.status = ContentStatus.COMPLETED
|
|
1751
|
+
self._update_content(content)
|
|
1752
|
+
return
|
|
1753
|
+
|
|
1754
|
+
# 9. Single source - use existing logic with original content hash
|
|
1652
1755
|
if not content.id:
|
|
1653
1756
|
content.id = generate_id(content.content_hash or "")
|
|
1654
1757
|
self._prepare_documents_for_insert(read_documents, content.id, calculate_sizes=True)
|
|
@@ -1900,11 +2003,11 @@ class Knowledge:
|
|
|
1900
2003
|
if self._should_skip(content.content_hash, skip_if_exists):
|
|
1901
2004
|
content.status = ContentStatus.COMPLETED
|
|
1902
2005
|
await self._aupdate_content(content)
|
|
1903
|
-
|
|
2006
|
+
continue # Skip to next topic, don't exit loop
|
|
1904
2007
|
|
|
1905
2008
|
if self.vector_db.__class__.__name__ == "LightRag":
|
|
1906
2009
|
await self._aprocess_lightrag_content(content, KnowledgeContentOrigin.TOPIC)
|
|
1907
|
-
|
|
2010
|
+
continue # Skip to next topic, don't exit loop
|
|
1908
2011
|
|
|
1909
2012
|
if self.vector_db and self.vector_db.content_hash_exists(content.content_hash) and skip_if_exists:
|
|
1910
2013
|
log_info(f"Content {content.content_hash} already exists, skipping")
|
|
@@ -1961,11 +2064,11 @@ class Knowledge:
|
|
|
1961
2064
|
if self._should_skip(content.content_hash, skip_if_exists):
|
|
1962
2065
|
content.status = ContentStatus.COMPLETED
|
|
1963
2066
|
self._update_content(content)
|
|
1964
|
-
|
|
2067
|
+
continue # Skip to next topic, don't exit loop
|
|
1965
2068
|
|
|
1966
2069
|
if self.vector_db.__class__.__name__ == "LightRag":
|
|
1967
2070
|
self._process_lightrag_content(content, KnowledgeContentOrigin.TOPIC)
|
|
1968
|
-
|
|
2071
|
+
continue # Skip to next topic, don't exit loop
|
|
1969
2072
|
|
|
1970
2073
|
if self.vector_db and self.vector_db.content_hash_exists(content.content_hash) and skip_if_exists:
|
|
1971
2074
|
log_info(f"Content {content.content_hash} already exists, skipping")
|
|
@@ -3896,6 +3999,42 @@ class Knowledge:
|
|
|
3896
3999
|
hash_input = ":".join(hash_parts)
|
|
3897
4000
|
return hashlib.sha256(hash_input.encode()).hexdigest()
|
|
3898
4001
|
|
|
4002
|
+
def _build_document_content_hash(self, document: Document, content: Content) -> str:
|
|
4003
|
+
"""
|
|
4004
|
+
Build content hash for a specific document.
|
|
4005
|
+
|
|
4006
|
+
Used for multi-page readers (like WebsiteReader) where each crawled page
|
|
4007
|
+
should have its own unique content hash based on its actual URL.
|
|
4008
|
+
|
|
4009
|
+
Args:
|
|
4010
|
+
document: The document to build the hash for
|
|
4011
|
+
content: The original content object (for fallback name/description)
|
|
4012
|
+
|
|
4013
|
+
Returns:
|
|
4014
|
+
A unique hash string for this specific document
|
|
4015
|
+
"""
|
|
4016
|
+
hash_parts = []
|
|
4017
|
+
|
|
4018
|
+
if content.name:
|
|
4019
|
+
hash_parts.append(content.name)
|
|
4020
|
+
if content.description:
|
|
4021
|
+
hash_parts.append(content.description)
|
|
4022
|
+
|
|
4023
|
+
# Use document's own URL if available (set by WebsiteReader)
|
|
4024
|
+
doc_url = document.meta_data.get("url") if document.meta_data else None
|
|
4025
|
+
if doc_url:
|
|
4026
|
+
hash_parts.append(str(doc_url))
|
|
4027
|
+
elif content.url:
|
|
4028
|
+
hash_parts.append(content.url)
|
|
4029
|
+
elif content.path:
|
|
4030
|
+
hash_parts.append(str(content.path))
|
|
4031
|
+
else:
|
|
4032
|
+
# Fallback: use content hash for uniqueness
|
|
4033
|
+
hash_parts.append(hashlib.sha256(document.content.encode()).hexdigest()[:16])
|
|
4034
|
+
|
|
4035
|
+
hash_input = ":".join(hash_parts)
|
|
4036
|
+
return hashlib.sha256(hash_input.encode()).hexdigest()
|
|
4037
|
+
|
|
3899
4038
|
def _ensure_string_field(self, value: Any, field_name: str, default: str = "") -> str:
|
|
3900
4039
|
"""
|
|
3901
4040
|
Safely ensure a field is a string, handling various edge cases.
|
|
@@ -4451,12 +4590,10 @@ class Knowledge:
|
|
|
4451
4590
|
# ========================================================================
|
|
4452
4591
|
|
|
4453
4592
|
# Shared context strings
|
|
4454
|
-
|
|
4455
|
-
"You have
|
|
4456
|
-
"
|
|
4457
|
-
"
|
|
4458
|
-
"If the user question is ambiguous (e.g., 'the candidate') do NOT ask clarifying questions first—search the "
|
|
4459
|
-
"knowledge base to identify the relevant documents.\n"
|
|
4593
|
+
_SEARCH_KNOWLEDGE_INSTRUCTIONS = (
|
|
4594
|
+
"You have a knowledge base you can search using the search_knowledge_base tool. "
|
|
4595
|
+
"Search before answering questions—don't assume you know the answer. "
|
|
4596
|
+
"For ambiguous questions, search first rather than asking for clarification."
|
|
4460
4597
|
)
|
|
4461
4598
|
|
|
4462
4599
|
_AGENTIC_FILTER_INSTRUCTION_TEMPLATE = """
|
|
@@ -4499,7 +4636,7 @@ Make sure to pass the filters as [Dict[str: Any]] to the tool. FOLLOW THIS STRUC
|
|
|
4499
4636
|
Returns:
|
|
4500
4637
|
Context string to add to system prompt.
|
|
4501
4638
|
"""
|
|
4502
|
-
context_parts: List[str] = [self.
|
|
4639
|
+
context_parts: List[str] = [self._SEARCH_KNOWLEDGE_INSTRUCTIONS]
|
|
4503
4640
|
|
|
4504
4641
|
# Add filter instructions if agentic filters are enabled
|
|
4505
4642
|
if enable_agentic_filters:
|
|
@@ -4507,7 +4644,7 @@ Make sure to pass the filters as [Dict[str: Any]] to the tool. FOLLOW THIS STRUC
|
|
|
4507
4644
|
if valid_filters:
|
|
4508
4645
|
context_parts.append(self._get_agentic_filter_instructions(valid_filters))
|
|
4509
4646
|
|
|
4510
|
-
return "\n".join(context_parts)
|
|
4647
|
+
return "<knowledge_base>\n" + "\n".join(context_parts) + "\n</knowledge_base>"
|
|
4511
4648
|
|
|
4512
4649
|
async def abuild_context(
|
|
4513
4650
|
self,
|
|
@@ -4526,7 +4663,7 @@ Make sure to pass the filters as [Dict[str: Any]] to the tool. FOLLOW THIS STRUC
|
|
|
4526
4663
|
Returns:
|
|
4527
4664
|
Context string to add to system prompt.
|
|
4528
4665
|
"""
|
|
4529
|
-
context_parts: List[str] = [self.
|
|
4666
|
+
context_parts: List[str] = [self._SEARCH_KNOWLEDGE_INSTRUCTIONS]
|
|
4530
4667
|
|
|
4531
4668
|
# Add filter instructions if agentic filters are enabled
|
|
4532
4669
|
if enable_agentic_filters:
|
|
@@ -4534,7 +4671,7 @@ Make sure to pass the filters as [Dict[str: Any]] to the tool. FOLLOW THIS STRUC
|
|
|
4534
4671
|
if valid_filters:
|
|
4535
4672
|
context_parts.append(self._get_agentic_filter_instructions(valid_filters))
|
|
4536
4673
|
|
|
4537
|
-
return "\n".join(context_parts)
|
|
4674
|
+
return "<knowledge_base>\n" + "\n".join(context_parts) + "\n</knowledge_base>"
|
|
4538
4675
|
|
|
4539
4676
|
def get_tools(
|
|
4540
4677
|
self,
|
|
@@ -4627,7 +4764,12 @@ Make sure to pass the filters as [Dict[str: Any]] to the tool. FOLLOW THIS STRUC
|
|
|
4627
4764
|
retrieval_timer = Timer()
|
|
4628
4765
|
retrieval_timer.start()
|
|
4629
4766
|
|
|
4630
|
-
|
|
4767
|
+
try:
|
|
4768
|
+
docs = self.search(query=query, filters=knowledge_filters)
|
|
4769
|
+
except Exception as e:
|
|
4770
|
+
retrieval_timer.stop()
|
|
4771
|
+
log_warning(f"Knowledge search failed: {e}")
|
|
4772
|
+
return f"Error searching knowledge base: {type(e).__name__}"
|
|
4631
4773
|
|
|
4632
4774
|
if run_response is not None and docs:
|
|
4633
4775
|
references = MessageReferences(
|
|
@@ -4659,7 +4801,12 @@ Make sure to pass the filters as [Dict[str: Any]] to the tool. FOLLOW THIS STRUC
|
|
|
4659
4801
|
retrieval_timer = Timer()
|
|
4660
4802
|
retrieval_timer.start()
|
|
4661
4803
|
|
|
4662
|
-
|
|
4804
|
+
try:
|
|
4805
|
+
docs = await self.asearch(query=query, filters=knowledge_filters)
|
|
4806
|
+
except Exception as e:
|
|
4807
|
+
retrieval_timer.stop()
|
|
4808
|
+
log_warning(f"Knowledge search failed: {e}")
|
|
4809
|
+
return f"Error searching knowledge base: {type(e).__name__}"
|
|
4663
4810
|
|
|
4664
4811
|
if run_response is not None and docs:
|
|
4665
4812
|
references = MessageReferences(
|
|
@@ -4737,7 +4884,12 @@ Make sure to pass the filters as [Dict[str: Any]] to the tool. FOLLOW THIS STRUC
|
|
|
4737
4884
|
retrieval_timer = Timer()
|
|
4738
4885
|
retrieval_timer.start()
|
|
4739
4886
|
|
|
4740
|
-
|
|
4887
|
+
try:
|
|
4888
|
+
docs = self.search(query=query, filters=search_filters)
|
|
4889
|
+
except Exception as e:
|
|
4890
|
+
retrieval_timer.stop()
|
|
4891
|
+
log_warning(f"Knowledge search failed: {e}")
|
|
4892
|
+
return f"Error searching knowledge base: {type(e).__name__}"
|
|
4741
4893
|
|
|
4742
4894
|
if run_response is not None and docs:
|
|
4743
4895
|
references = MessageReferences(
|
|
@@ -4791,7 +4943,12 @@ Make sure to pass the filters as [Dict[str: Any]] to the tool. FOLLOW THIS STRUC
|
|
|
4791
4943
|
retrieval_timer = Timer()
|
|
4792
4944
|
retrieval_timer.start()
|
|
4793
4945
|
|
|
4794
|
-
|
|
4946
|
+
try:
|
|
4947
|
+
docs = await self.asearch(query=query, filters=search_filters)
|
|
4948
|
+
except Exception as e:
|
|
4949
|
+
retrieval_timer.stop()
|
|
4950
|
+
log_warning(f"Knowledge search failed: {e}")
|
|
4951
|
+
return f"Error searching knowledge base: {type(e).__name__}"
|
|
4795
4952
|
|
|
4796
4953
|
if run_response is not None and docs:
|
|
4797
4954
|
references = MessageReferences(
|
|
@@ -110,7 +110,7 @@ class TextReader(Reader):
|
|
|
110
110
|
chunked_documents = self.chunk_document(document)
|
|
111
111
|
|
|
112
112
|
if not chunked_documents:
|
|
113
|
-
return [
|
|
113
|
+
return []
|
|
114
114
|
|
|
115
115
|
tasks = [process_chunk(chunk_doc) for chunk_doc in chunked_documents]
|
|
116
116
|
return await asyncio.gather(*tasks)
|