agno 2.4.5__py3-none-any.whl → 2.4.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
agno/agent/agent.py CHANGED
@@ -858,8 +858,9 @@ class Agent:
858
858
  return
859
859
 
860
860
  # Handle learning=True: create default LearningMachine
861
+ # Enables user_profile (structured fields) and user_memory (unstructured observations)
861
862
  if self.learning is True:
862
- self._learning = LearningMachine(db=self.db, model=self.model, user_profile=True)
863
+ self._learning = LearningMachine(db=self.db, model=self.model, user_profile=True, user_memory=True)
863
864
  return
864
865
 
865
866
  # Handle learning=LearningMachine(...): inject dependencies
@@ -48,7 +48,7 @@ def surrealize_dates(record: dict) -> dict:
48
48
  if isinstance(value, date):
49
49
  copy[key] = datetime.combine(value, datetime.min.time()).replace(tzinfo=timezone.utc)
50
50
  elif key in ["created_at", "updated_at"] and isinstance(value, (int, float)):
51
- copy[key] = datetime.fromtimestamp(value).replace(tzinfo=timezone.utc)
51
+ copy[key] = datetime.fromtimestamp(value, tz=timezone.utc)
52
52
  elif key in ["created_at", "updated_at"] and isinstance(value, str):
53
53
  # Handle ISO string format - convert back to datetime object for SurrealDB
54
54
  try:
@@ -55,11 +55,7 @@ class AgenticChunking(ChunkingStrategy):
55
55
  chunk = remaining_text[:break_point].strip()
56
56
  meta_data = chunk_meta_data.copy()
57
57
  meta_data["chunk"] = chunk_number
58
- chunk_id = None
59
- if document.id:
60
- chunk_id = f"{document.id}_{chunk_number}"
61
- elif document.name:
62
- chunk_id = f"{document.name}_{chunk_number}"
58
+ chunk_id = self._generate_chunk_id(document, chunk_number, chunk)
63
59
  meta_data["chunk_size"] = len(chunk)
64
60
  chunks.append(
65
61
  Document(
@@ -82,7 +82,7 @@ class CodeChunking(ChunkingStrategy):
82
82
  for i, chunk in enumerate(chunks, 1):
83
83
  meta_data = document.meta_data.copy()
84
84
  meta_data["chunk"] = i
85
- chunk_id = f"{document.id}_{i}" if document.id else None
85
+ chunk_id = self._generate_chunk_id(document, i, chunk.text)
86
86
  meta_data["chunk_size"] = len(chunk.text)
87
87
 
88
88
  chunked_documents.append(Document(id=chunk_id, name=document.name, meta_data=meta_data, content=chunk.text))
@@ -38,17 +38,10 @@ class DocumentChunking(ChunkingStrategy):
38
38
  if current_chunk:
39
39
  meta_data = chunk_meta_data.copy()
40
40
  meta_data["chunk"] = chunk_number
41
- chunk_id = None
42
- if document.id:
43
- chunk_id = f"{document.id}_{chunk_number}"
44
- elif document.name:
45
- chunk_id = f"{document.name}_{chunk_number}"
46
- meta_data["chunk_size"] = len("\n\n".join(current_chunk))
47
- chunks.append(
48
- Document(
49
- id=chunk_id, name=document.name, meta_data=meta_data, content="\n\n".join(current_chunk)
50
- )
51
- )
41
+ chunk_content = "\n\n".join(current_chunk)
42
+ chunk_id = self._generate_chunk_id(document, chunk_number, chunk_content)
43
+ meta_data["chunk_size"] = len(chunk_content)
44
+ chunks.append(Document(id=chunk_id, name=document.name, meta_data=meta_data, content=chunk_content))
52
45
  chunk_number += 1
53
46
  current_chunk = []
54
47
  current_size = 0
@@ -70,18 +63,15 @@ class DocumentChunking(ChunkingStrategy):
70
63
  if current_chunk:
71
64
  meta_data = chunk_meta_data.copy()
72
65
  meta_data["chunk"] = chunk_number
73
- chunk_id = None
74
- if document.id:
75
- chunk_id = f"{document.id}_{chunk_number}"
76
- elif document.name:
77
- chunk_id = f"{document.name}_{chunk_number}"
78
- meta_data["chunk_size"] = len(" ".join(current_chunk))
66
+ chunk_content = " ".join(current_chunk)
67
+ chunk_id = self._generate_chunk_id(document, chunk_number, chunk_content)
68
+ meta_data["chunk_size"] = len(chunk_content)
79
69
  chunks.append(
80
70
  Document(
81
71
  id=chunk_id,
82
72
  name=document.name,
83
73
  meta_data=meta_data,
84
- content=" ".join(current_chunk),
74
+ content=chunk_content,
85
75
  )
86
76
  )
87
77
  chunk_number += 1
@@ -94,18 +84,11 @@ class DocumentChunking(ChunkingStrategy):
94
84
  else:
95
85
  meta_data = chunk_meta_data.copy()
96
86
  meta_data["chunk"] = chunk_number
97
- chunk_id = None
98
- if document.id:
99
- chunk_id = f"{document.id}_{chunk_number}"
100
- elif document.name:
101
- chunk_id = f"{document.name}_{chunk_number}"
102
- meta_data["chunk_size"] = len("\n\n".join(current_chunk))
87
+ chunk_content = "\n\n".join(current_chunk)
88
+ chunk_id = self._generate_chunk_id(document, chunk_number, chunk_content)
89
+ meta_data["chunk_size"] = len(chunk_content)
103
90
  if current_chunk:
104
- chunks.append(
105
- Document(
106
- id=chunk_id, name=document.name, meta_data=meta_data, content="\n\n".join(current_chunk)
107
- )
108
- )
91
+ chunks.append(Document(id=chunk_id, name=document.name, meta_data=meta_data, content=chunk_content))
109
92
  chunk_number += 1
110
93
  current_chunk = [para]
111
94
  current_size = para_size
@@ -113,15 +96,10 @@ class DocumentChunking(ChunkingStrategy):
113
96
  if current_chunk:
114
97
  meta_data = chunk_meta_data.copy()
115
98
  meta_data["chunk"] = chunk_number
116
- chunk_id = None
117
- if document.id:
118
- chunk_id = f"{document.id}_{chunk_number}"
119
- elif document.name:
120
- chunk_id = f"{document.name}_{chunk_number}"
121
- meta_data["chunk_size"] = len("\n\n".join(current_chunk))
122
- chunks.append(
123
- Document(id=chunk_id, name=document.name, meta_data=meta_data, content="\n\n".join(current_chunk))
124
- )
99
+ chunk_content = "\n\n".join(current_chunk)
100
+ chunk_id = self._generate_chunk_id(document, chunk_number, chunk_content)
101
+ meta_data["chunk_size"] = len(chunk_content)
102
+ chunks.append(Document(id=chunk_id, name=document.name, meta_data=meta_data, content=chunk_content))
125
103
 
126
104
  # Handle overlap if specified
127
105
  if self.overlap > 0:
@@ -131,11 +109,11 @@ class DocumentChunking(ChunkingStrategy):
131
109
  # Add overlap from previous chunk
132
110
  prev_text = chunks[i - 1].content[-self.overlap :]
133
111
  meta_data = chunk_meta_data.copy()
134
- meta_data["chunk"] = chunk_number
135
- chunk_id = None
136
- if document.id:
137
- chunk_id = f"{document.id}_{chunk_number}"
112
+ # Use the chunk's existing metadata and ID instead of stale chunk_number
113
+ meta_data["chunk"] = chunks[i].meta_data["chunk"]
114
+ chunk_id = chunks[i].id
138
115
  meta_data["chunk_size"] = len(prev_text + chunks[i].content)
116
+
139
117
  if prev_text:
140
118
  overlapped_chunks.append(
141
119
  Document(
@@ -145,6 +123,8 @@ class DocumentChunking(ChunkingStrategy):
145
123
  content=prev_text + chunks[i].content,
146
124
  )
147
125
  )
126
+ else:
127
+ overlapped_chunks.append(chunks[i])
148
128
  else:
149
129
  overlapped_chunks.append(chunks[i])
150
130
  chunks = overlapped_chunks
@@ -38,11 +38,7 @@ class FixedSizeChunking(ChunkingStrategy):
38
38
  chunk = content[start:end]
39
39
  meta_data = chunk_meta_data.copy()
40
40
  meta_data["chunk"] = chunk_number
41
- chunk_id = None
42
- if document.id:
43
- chunk_id = f"{document.id}_{chunk_number}"
44
- elif document.name:
45
- chunk_id = f"{document.name}_{chunk_number}"
41
+ chunk_id = self._generate_chunk_id(document, chunk_number, chunk)
46
42
  meta_data["chunk_size"] = len(chunk)
47
43
  chunked_documents.append(
48
44
  Document(
@@ -267,11 +267,7 @@ class MarkdownChunking(ChunkingStrategy):
267
267
  for sub_chunk in sub_chunks:
268
268
  meta_data = chunk_meta_data.copy()
269
269
  meta_data["chunk"] = chunk_number
270
- chunk_id = None
271
- if document.id:
272
- chunk_id = f"{document.id}_{chunk_number}"
273
- elif document.name:
274
- chunk_id = f"{document.name}_{chunk_number}"
270
+ chunk_id = self._generate_chunk_id(document, chunk_number, sub_chunk)
275
271
  meta_data["chunk_size"] = len(sub_chunk)
276
272
 
277
273
  chunks.append(Document(id=chunk_id, name=document.name, meta_data=meta_data, content=sub_chunk))
@@ -282,19 +278,12 @@ class MarkdownChunking(ChunkingStrategy):
282
278
  else:
283
279
  meta_data = chunk_meta_data.copy()
284
280
  meta_data["chunk"] = chunk_number
285
- chunk_id = None
286
- if document.id:
287
- chunk_id = f"{document.id}_{chunk_number}"
288
- elif document.name:
289
- chunk_id = f"{document.name}_{chunk_number}"
290
- meta_data["chunk_size"] = len("\n\n".join(current_chunk))
281
+ chunk_content = "\n\n".join(current_chunk)
282
+ chunk_id = self._generate_chunk_id(document, chunk_number, chunk_content)
283
+ meta_data["chunk_size"] = len(chunk_content)
291
284
 
292
285
  if current_chunk:
293
- chunks.append(
294
- Document(
295
- id=chunk_id, name=document.name, meta_data=meta_data, content="\n\n".join(current_chunk)
296
- )
297
- )
286
+ chunks.append(Document(id=chunk_id, name=document.name, meta_data=meta_data, content=chunk_content))
298
287
  chunk_number += 1
299
288
 
300
289
  current_chunk = [section]
@@ -304,15 +293,10 @@ class MarkdownChunking(ChunkingStrategy):
304
293
  if current_chunk and not self.split_on_headings:
305
294
  meta_data = chunk_meta_data.copy()
306
295
  meta_data["chunk"] = chunk_number
307
- chunk_id = None
308
- if document.id:
309
- chunk_id = f"{document.id}_{chunk_number}"
310
- elif document.name:
311
- chunk_id = f"{document.name}_{chunk_number}"
312
- meta_data["chunk_size"] = len("\n\n".join(current_chunk))
313
- chunks.append(
314
- Document(id=chunk_id, name=document.name, meta_data=meta_data, content="\n\n".join(current_chunk))
315
- )
296
+ chunk_content = "\n\n".join(current_chunk)
297
+ chunk_id = self._generate_chunk_id(document, chunk_number, chunk_content)
298
+ meta_data["chunk_size"] = len(chunk_content)
299
+ chunks.append(Document(id=chunk_id, name=document.name, meta_data=meta_data, content=chunk_content))
316
300
 
317
301
  # Handle overlap if specified
318
302
  if self.overlap > 0:
@@ -46,9 +46,7 @@ class RecursiveChunking(ChunkingStrategy):
46
46
  chunk = self.clean_text(content[start:end])
47
47
  meta_data = chunk_meta_data.copy()
48
48
  meta_data["chunk"] = chunk_number
49
- chunk_id = None
50
- if document.id:
51
- chunk_id = f"{document.id}_{chunk_number}"
49
+ chunk_id = self._generate_chunk_id(document, chunk_number, chunk)
52
50
  chunk_number += 1
53
51
  meta_data["chunk_size"] = len(chunk)
54
52
  chunks.append(Document(id=chunk_id, name=document.name, meta_data=meta_data, content=chunk))
@@ -33,7 +33,8 @@ class RowChunking(ChunkingStrategy):
33
33
 
34
34
  if chunk_content: # Skip empty rows
35
35
  meta_data = document.meta_data.copy()
36
- meta_data["row_number"] = start_index + i # Preserve logical row numbering
37
- chunk_id = f"{document.id}_row_{start_index + i}" if document.id else None
36
+ row_number = start_index + i
37
+ meta_data["row_number"] = row_number # Preserve logical row numbering
38
+ chunk_id = self._generate_chunk_id(document, row_number, chunk_content, prefix="row")
38
39
  chunks.append(Document(id=chunk_id, name=document.name, meta_data=meta_data, content=chunk_content))
39
40
  return chunks
@@ -160,7 +160,7 @@ class SemanticChunking(ChunkingStrategy):
160
160
  for i, chunk in enumerate(chunks, 1):
161
161
  meta_data = document.meta_data.copy()
162
162
  meta_data["chunk"] = i
163
- chunk_id = f"{document.id}_{i}" if document.id else None
163
+ chunk_id = self._generate_chunk_id(document, i, chunk.text)
164
164
  meta_data["chunk_size"] = len(chunk.text)
165
165
 
166
166
  chunked_documents.append(Document(id=chunk_id, name=document.name, meta_data=meta_data, content=chunk.text))
@@ -1,3 +1,4 @@
1
+ import hashlib
1
2
  from abc import ABC, abstractmethod
2
3
  from enum import Enum
3
4
  from typing import List, Optional
@@ -12,6 +13,24 @@ class ChunkingStrategy(ABC):
12
13
  def chunk(self, document: Document) -> List[Document]:
13
14
  raise NotImplementedError
14
15
 
16
+ def _generate_chunk_id(
17
+ self, document: Document, chunk_number: int, content: Optional[str] = None, prefix: Optional[str] = None
18
+ ) -> Optional[str]:
19
+ """Generate a deterministic ID for the chunk."""
20
+ suffix = f"_{prefix}_{chunk_number}" if prefix else f"_{chunk_number}"
21
+
22
+ if document.id:
23
+ return f"{document.id}{suffix}"
24
+ elif document.name:
25
+ return f"{document.name}{suffix}"
26
+ else:
27
+ # Hash the chunk content for a deterministic ID when no identifier exists
28
+ hash_source = content if content else document.content
29
+ if hash_source:
30
+ content_hash = hashlib.md5(hash_source.encode("utf-8")).hexdigest()[:12] # nosec B324
31
+ return f"chunk_{content_hash}{suffix}"
32
+ return None
33
+
15
34
  async def achunk(self, document: Document) -> List[Document]:
16
35
  """Async version of chunk. Override for truly async implementations."""
17
36
  return self.chunk(document)
@@ -823,7 +823,13 @@ class Knowledge:
823
823
  log_warning(f"Invalid filter key: {key} - not present in knowledge base")
824
824
 
825
825
  elif isinstance(filters, List):
826
- # Validate that list contains FilterExpr instances
826
+ # Validate list filters against known metadata keys
827
+ if valid_metadata_filters is None or not valid_metadata_filters:
828
+ # Can't validate keys without metadata - return original list
829
+ log_warning("No valid metadata filters tracked yet. Cannot validate list filter keys.")
830
+ return filters, []
831
+
832
+ valid_list_filters: List[FilterExpr] = []
827
833
  for i, filter_item in enumerate(filters):
828
834
  if not isinstance(filter_item, FilterExpr):
829
835
  log_warning(
@@ -832,9 +838,23 @@ class Knowledge:
832
838
  f"Use filter expressions like EQ('key', 'value'), IN('key', [values]), "
833
839
  f"AND(...), OR(...), NOT(...) from agno.filters"
834
840
  )
835
- # Filter expressions are already validated, return empty dict/list
836
- # The actual filtering happens in the vector_db layer
837
- return filters, []
841
+ continue
842
+
843
+ # Check if filter has a key attribute and validate it
844
+ if hasattr(filter_item, "key"):
845
+ key = filter_item.key
846
+ base_key = key.split(".")[-1] if "." in key else key
847
+ if base_key in valid_metadata_filters or key in valid_metadata_filters:
848
+ valid_list_filters.append(filter_item)
849
+ else:
850
+ invalid_keys.append(key)
851
+ log_warning(f"Invalid filter key: {key} - not present in knowledge base")
852
+ else:
853
+ # Complex filters (AND, OR, NOT) - keep them as-is
854
+ # They contain nested filters that will be validated by the vector DB
855
+ valid_list_filters.append(filter_item)
856
+
857
+ return valid_list_filters, invalid_keys
838
858
 
839
859
  return valid_filters, invalid_keys
840
860
 
@@ -1541,7 +1561,49 @@ class Knowledge:
1541
1561
  # 6. Chunk documents if needed
1542
1562
  if reader and not reader.chunk:
1543
1563
  read_documents = await reader.chunk_documents_async(read_documents)
1544
- # 7. Prepare and insert the content in the vector database
1564
+
1565
+ # 7. Group documents by source URL for multi-page readers (like WebsiteReader)
1566
+ docs_by_source: Dict[str, List[Document]] = {}
1567
+ for doc in read_documents:
1568
+ source_url = doc.meta_data.get("url", content.url) if doc.meta_data else content.url
1569
+ source_url = source_url or "unknown"
1570
+ if source_url not in docs_by_source:
1571
+ docs_by_source[source_url] = []
1572
+ docs_by_source[source_url].append(doc)
1573
+
1574
+ # 8. Process each source separately if multiple sources exist
1575
+ if len(docs_by_source) > 1:
1576
+ for source_url, source_docs in docs_by_source.items():
1577
+ # Compute per-document hash based on actual source URL
1578
+ doc_hash = self._build_document_content_hash(source_docs[0], content)
1579
+
1580
+ # Check skip_if_exists for each source individually
1581
+ if self._should_skip(doc_hash, skip_if_exists):
1582
+ log_debug(f"Skipping already indexed: {source_url}")
1583
+ continue
1584
+
1585
+ doc_id = generate_id(doc_hash)
1586
+ self._prepare_documents_for_insert(source_docs, doc_id, calculate_sizes=True)
1587
+
1588
+ # Insert with per-document hash
1589
+ if self.vector_db.upsert_available() and upsert:
1590
+ try:
1591
+ await self.vector_db.async_upsert(doc_hash, source_docs, content.metadata)
1592
+ except Exception as e:
1593
+ log_error(f"Error upserting document from {source_url}: {e}")
1594
+ continue
1595
+ else:
1596
+ try:
1597
+ await self.vector_db.async_insert(doc_hash, documents=source_docs, filters=content.metadata)
1598
+ except Exception as e:
1599
+ log_error(f"Error inserting document from {source_url}: {e}")
1600
+ continue
1601
+
1602
+ content.status = ContentStatus.COMPLETED
1603
+ await self._aupdate_content(content)
1604
+ return
1605
+
1606
+ # 9. Single source - use existing logic with original content hash
1545
1607
  if not content.id:
1546
1608
  content.id = generate_id(content.content_hash or "")
1547
1609
  self._prepare_documents_for_insert(read_documents, content.id, calculate_sizes=True)
@@ -1648,7 +1710,48 @@ class Knowledge:
1648
1710
  if reader:
1649
1711
  read_documents = self._chunk_documents_sync(reader, read_documents)
1650
1712
 
1651
- # 7. Prepare and insert the content in the vector database
1713
+ # 7. Group documents by source URL for multi-page readers (like WebsiteReader)
1714
+ docs_by_source: Dict[str, List[Document]] = {}
1715
+ for doc in read_documents:
1716
+ source_url = doc.meta_data.get("url", content.url) if doc.meta_data else content.url
1717
+ source_url = source_url or "unknown"
1718
+ if source_url not in docs_by_source:
1719
+ docs_by_source[source_url] = []
1720
+ docs_by_source[source_url].append(doc)
1721
+
1722
+ # 8. Process each source separately if multiple sources exist
1723
+ if len(docs_by_source) > 1:
1724
+ for source_url, source_docs in docs_by_source.items():
1725
+ # Compute per-document hash based on actual source URL
1726
+ doc_hash = self._build_document_content_hash(source_docs[0], content)
1727
+
1728
+ # Check skip_if_exists for each source individually
1729
+ if self._should_skip(doc_hash, skip_if_exists):
1730
+ log_debug(f"Skipping already indexed: {source_url}")
1731
+ continue
1732
+
1733
+ doc_id = generate_id(doc_hash)
1734
+ self._prepare_documents_for_insert(source_docs, doc_id, calculate_sizes=True)
1735
+
1736
+ # Insert with per-document hash
1737
+ if self.vector_db.upsert_available() and upsert:
1738
+ try:
1739
+ self.vector_db.upsert(doc_hash, source_docs, content.metadata)
1740
+ except Exception as e:
1741
+ log_error(f"Error upserting document from {source_url}: {e}")
1742
+ continue
1743
+ else:
1744
+ try:
1745
+ self.vector_db.insert(doc_hash, documents=source_docs, filters=content.metadata)
1746
+ except Exception as e:
1747
+ log_error(f"Error inserting document from {source_url}: {e}")
1748
+ continue
1749
+
1750
+ content.status = ContentStatus.COMPLETED
1751
+ self._update_content(content)
1752
+ return
1753
+
1754
+ # 9. Single source - use existing logic with original content hash
1652
1755
  if not content.id:
1653
1756
  content.id = generate_id(content.content_hash or "")
1654
1757
  self._prepare_documents_for_insert(read_documents, content.id, calculate_sizes=True)
@@ -1900,11 +2003,11 @@ class Knowledge:
1900
2003
  if self._should_skip(content.content_hash, skip_if_exists):
1901
2004
  content.status = ContentStatus.COMPLETED
1902
2005
  await self._aupdate_content(content)
1903
- return
2006
+ continue # Skip to next topic, don't exit loop
1904
2007
 
1905
2008
  if self.vector_db.__class__.__name__ == "LightRag":
1906
2009
  await self._aprocess_lightrag_content(content, KnowledgeContentOrigin.TOPIC)
1907
- return
2010
+ continue # Skip to next topic, don't exit loop
1908
2011
 
1909
2012
  if self.vector_db and self.vector_db.content_hash_exists(content.content_hash) and skip_if_exists:
1910
2013
  log_info(f"Content {content.content_hash} already exists, skipping")
@@ -1961,11 +2064,11 @@ class Knowledge:
1961
2064
  if self._should_skip(content.content_hash, skip_if_exists):
1962
2065
  content.status = ContentStatus.COMPLETED
1963
2066
  self._update_content(content)
1964
- return
2067
+ continue # Skip to next topic, don't exit loop
1965
2068
 
1966
2069
  if self.vector_db.__class__.__name__ == "LightRag":
1967
2070
  self._process_lightrag_content(content, KnowledgeContentOrigin.TOPIC)
1968
- return
2071
+ continue # Skip to next topic, don't exit loop
1969
2072
 
1970
2073
  if self.vector_db and self.vector_db.content_hash_exists(content.content_hash) and skip_if_exists:
1971
2074
  log_info(f"Content {content.content_hash} already exists, skipping")
@@ -3896,6 +3999,42 @@ class Knowledge:
3896
3999
  hash_input = ":".join(hash_parts)
3897
4000
  return hashlib.sha256(hash_input.encode()).hexdigest()
3898
4001
 
4002
+ def _build_document_content_hash(self, document: Document, content: Content) -> str:
4003
+ """
4004
+ Build content hash for a specific document.
4005
+
4006
+ Used for multi-page readers (like WebsiteReader) where each crawled page
4007
+ should have its own unique content hash based on its actual URL.
4008
+
4009
+ Args:
4010
+ document: The document to build the hash for
4011
+ content: The original content object (for fallback name/description)
4012
+
4013
+ Returns:
4014
+ A unique hash string for this specific document
4015
+ """
4016
+ hash_parts = []
4017
+
4018
+ if content.name:
4019
+ hash_parts.append(content.name)
4020
+ if content.description:
4021
+ hash_parts.append(content.description)
4022
+
4023
+ # Use document's own URL if available (set by WebsiteReader)
4024
+ doc_url = document.meta_data.get("url") if document.meta_data else None
4025
+ if doc_url:
4026
+ hash_parts.append(str(doc_url))
4027
+ elif content.url:
4028
+ hash_parts.append(content.url)
4029
+ elif content.path:
4030
+ hash_parts.append(str(content.path))
4031
+ else:
4032
+ # Fallback: use content hash for uniqueness
4033
+ hash_parts.append(hashlib.sha256(document.content.encode()).hexdigest()[:16])
4034
+
4035
+ hash_input = ":".join(hash_parts)
4036
+ return hashlib.sha256(hash_input.encode()).hexdigest()
4037
+
3899
4038
  def _ensure_string_field(self, value: Any, field_name: str, default: str = "") -> str:
3900
4039
  """
3901
4040
  Safely ensure a field is a string, handling various edge cases.
@@ -4625,7 +4764,12 @@ Make sure to pass the filters as [Dict[str: Any]] to the tool. FOLLOW THIS STRUC
4625
4764
  retrieval_timer = Timer()
4626
4765
  retrieval_timer.start()
4627
4766
 
4628
- docs = self.search(query=query, filters=knowledge_filters)
4767
+ try:
4768
+ docs = self.search(query=query, filters=knowledge_filters)
4769
+ except Exception as e:
4770
+ retrieval_timer.stop()
4771
+ log_warning(f"Knowledge search failed: {e}")
4772
+ return f"Error searching knowledge base: {type(e).__name__}"
4629
4773
 
4630
4774
  if run_response is not None and docs:
4631
4775
  references = MessageReferences(
@@ -4657,7 +4801,12 @@ Make sure to pass the filters as [Dict[str: Any]] to the tool. FOLLOW THIS STRUC
4657
4801
  retrieval_timer = Timer()
4658
4802
  retrieval_timer.start()
4659
4803
 
4660
- docs = await self.asearch(query=query, filters=knowledge_filters)
4804
+ try:
4805
+ docs = await self.asearch(query=query, filters=knowledge_filters)
4806
+ except Exception as e:
4807
+ retrieval_timer.stop()
4808
+ log_warning(f"Knowledge search failed: {e}")
4809
+ return f"Error searching knowledge base: {type(e).__name__}"
4661
4810
 
4662
4811
  if run_response is not None and docs:
4663
4812
  references = MessageReferences(
@@ -4735,7 +4884,12 @@ Make sure to pass the filters as [Dict[str: Any]] to the tool. FOLLOW THIS STRUC
4735
4884
  retrieval_timer = Timer()
4736
4885
  retrieval_timer.start()
4737
4886
 
4738
- docs = self.search(query=query, filters=search_filters)
4887
+ try:
4888
+ docs = self.search(query=query, filters=search_filters)
4889
+ except Exception as e:
4890
+ retrieval_timer.stop()
4891
+ log_warning(f"Knowledge search failed: {e}")
4892
+ return f"Error searching knowledge base: {type(e).__name__}"
4739
4893
 
4740
4894
  if run_response is not None and docs:
4741
4895
  references = MessageReferences(
@@ -4789,7 +4943,12 @@ Make sure to pass the filters as [Dict[str: Any]] to the tool. FOLLOW THIS STRUC
4789
4943
  retrieval_timer = Timer()
4790
4944
  retrieval_timer.start()
4791
4945
 
4792
- docs = await self.asearch(query=query, filters=search_filters)
4946
+ try:
4947
+ docs = await self.asearch(query=query, filters=search_filters)
4948
+ except Exception as e:
4949
+ retrieval_timer.stop()
4950
+ log_warning(f"Knowledge search failed: {e}")
4951
+ return f"Error searching knowledge base: {type(e).__name__}"
4793
4952
 
4794
4953
  if run_response is not None and docs:
4795
4954
  references = MessageReferences(
@@ -110,7 +110,7 @@ class TextReader(Reader):
110
110
  chunked_documents = self.chunk_document(document)
111
111
 
112
112
  if not chunked_documents:
113
- return [document]
113
+ return []
114
114
 
115
115
  tasks = [process_chunk(chunk_doc) for chunk_doc in chunked_documents]
116
116
  return await asyncio.gather(*tasks)