agno 2.2.10__py3-none-any.whl → 2.2.12__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (73) hide show
  1. agno/agent/agent.py +75 -48
  2. agno/db/dynamo/utils.py +1 -1
  3. agno/db/firestore/utils.py +1 -1
  4. agno/db/gcs_json/utils.py +1 -1
  5. agno/db/in_memory/utils.py +1 -1
  6. agno/db/json/utils.py +1 -1
  7. agno/db/mongo/utils.py +3 -3
  8. agno/db/mysql/mysql.py +1 -1
  9. agno/db/mysql/utils.py +1 -1
  10. agno/db/postgres/utils.py +1 -1
  11. agno/db/redis/utils.py +1 -1
  12. agno/db/singlestore/singlestore.py +1 -1
  13. agno/db/singlestore/utils.py +1 -1
  14. agno/db/sqlite/async_sqlite.py +1 -1
  15. agno/db/sqlite/sqlite.py +1 -1
  16. agno/db/sqlite/utils.py +1 -1
  17. agno/filters.py +354 -0
  18. agno/knowledge/chunking/agentic.py +8 -9
  19. agno/knowledge/chunking/strategy.py +59 -15
  20. agno/knowledge/embedder/sentence_transformer.py +6 -2
  21. agno/knowledge/knowledge.py +43 -22
  22. agno/knowledge/reader/base.py +6 -2
  23. agno/knowledge/utils.py +20 -0
  24. agno/models/anthropic/claude.py +45 -9
  25. agno/models/base.py +4 -0
  26. agno/os/app.py +23 -7
  27. agno/os/interfaces/slack/router.py +53 -33
  28. agno/os/interfaces/slack/slack.py +9 -1
  29. agno/os/router.py +25 -1
  30. agno/os/routers/health.py +5 -3
  31. agno/os/routers/knowledge/knowledge.py +43 -17
  32. agno/os/routers/knowledge/schemas.py +4 -3
  33. agno/run/agent.py +11 -1
  34. agno/run/base.py +3 -2
  35. agno/session/agent.py +10 -5
  36. agno/team/team.py +57 -18
  37. agno/tools/file_generation.py +4 -4
  38. agno/tools/gmail.py +179 -0
  39. agno/tools/parallel.py +314 -0
  40. agno/utils/agent.py +22 -17
  41. agno/utils/gemini.py +15 -5
  42. agno/utils/knowledge.py +12 -5
  43. agno/utils/log.py +1 -0
  44. agno/utils/models/claude.py +2 -1
  45. agno/utils/print_response/agent.py +5 -4
  46. agno/utils/print_response/team.py +5 -4
  47. agno/vectordb/base.py +2 -4
  48. agno/vectordb/cassandra/cassandra.py +12 -5
  49. agno/vectordb/chroma/chromadb.py +10 -4
  50. agno/vectordb/clickhouse/clickhousedb.py +12 -4
  51. agno/vectordb/couchbase/couchbase.py +12 -3
  52. agno/vectordb/lancedb/lance_db.py +69 -144
  53. agno/vectordb/langchaindb/langchaindb.py +13 -4
  54. agno/vectordb/lightrag/lightrag.py +8 -3
  55. agno/vectordb/llamaindex/llamaindexdb.py +10 -4
  56. agno/vectordb/milvus/milvus.py +16 -5
  57. agno/vectordb/mongodb/mongodb.py +14 -3
  58. agno/vectordb/pgvector/pgvector.py +73 -15
  59. agno/vectordb/pineconedb/pineconedb.py +6 -2
  60. agno/vectordb/qdrant/qdrant.py +25 -13
  61. agno/vectordb/redis/redisdb.py +37 -30
  62. agno/vectordb/singlestore/singlestore.py +9 -4
  63. agno/vectordb/surrealdb/surrealdb.py +13 -3
  64. agno/vectordb/upstashdb/upstashdb.py +8 -5
  65. agno/vectordb/weaviate/weaviate.py +29 -12
  66. agno/workflow/step.py +3 -2
  67. agno/workflow/types.py +20 -1
  68. agno/workflow/workflow.py +103 -14
  69. {agno-2.2.10.dist-info → agno-2.2.12.dist-info}/METADATA +4 -1
  70. {agno-2.2.10.dist-info → agno-2.2.12.dist-info}/RECORD +73 -71
  71. {agno-2.2.10.dist-info → agno-2.2.12.dist-info}/WHEEL +0 -0
  72. {agno-2.2.10.dist-info → agno-2.2.12.dist-info}/licenses/LICENSE +0 -0
  73. {agno-2.2.10.dist-info → agno-2.2.12.dist-info}/top_level.txt +0 -0
agno/filters.py ADDED
@@ -0,0 +1,354 @@
1
+ """Search filter expressions for filtering knowledge base documents and search results.
2
+
3
+ This module provides a set of filter operators for constructing complex search queries
4
+ that can be applied to knowledge bases, vector databases, and other searchable content.
5
+
6
+ Filter Types:
7
+ - Comparison: EQ (equals), GT (greater than), LT (less than)
8
+ - Inclusion: IN (value in list)
9
+ - Logical: AND, OR, NOT
10
+
11
+ Example:
12
+ >>> from agno.filters import EQ, GT, IN, AND, OR, NOT
13
+ >>>
14
+ >>> # Simple equality filter
15
+ >>> filter = EQ("category", "technology")
16
+ >>>
17
+ >>> # Complex filter with multiple conditions
18
+ >>> filter = AND(
19
+ ... EQ("status", "published"),
20
+ ... GT("views", 1000),
21
+ ... IN("category", ["tech", "science"])
22
+ ... )
23
+ >>>
24
+ >>> # Using OR logic
25
+ >>> filter = OR(EQ("priority", "high"), EQ("urgent", True))
26
+ >>>
27
+ >>> # Negating conditions
28
+ >>> filter = NOT(EQ("status", "archived"))
29
+ >>>
30
+ >>> # Complex nested logic
31
+ >>> filter = OR(
32
+ ... AND(EQ("type", "article"), GT("word_count", 500)),
33
+ ... AND(EQ("type", "tutorial"), NOT(EQ("difficulty", "beginner")))
34
+ ... )
35
+ """
36
+
37
+ from __future__ import annotations
38
+
39
+ from typing import Any, List
40
+
41
+ # ============================================================
42
+ # Base Expression
43
+ # ============================================================
44
+
45
+
46
+ class FilterExpr:
47
+ """Base class for all filter expressions.
48
+
49
+ Filters can be combined using AND, OR, and NOT classes:
50
+ - AND: Combine filters where both expressions must be true
51
+ - OR: Combine filters where either expression can be true
52
+ - NOT: Negate a filter expression
53
+
54
+ Example:
55
+ >>> # Create complex filters using AND, OR, NOT
56
+ >>> filter = OR(AND(EQ("status", "active"), GT("age", 18)), EQ("role", "admin"))
57
+ >>> # Equivalent to: (status == "active" AND age > 18) OR role == "admin"
58
+ """
59
+
60
+ # Logical operator overloads
61
+ def __or__(self, other: FilterExpr) -> OR:
62
+ """Combine two filters with OR logic."""
63
+ return OR(self, other)
64
+
65
+ def __and__(self, other: FilterExpr) -> AND:
66
+ """Combine two filters with AND logic."""
67
+ return AND(self, other)
68
+
69
+ def __invert__(self) -> NOT:
70
+ """Negate a filter."""
71
+ return NOT(self)
72
+
73
+ def to_dict(self) -> dict:
74
+ """Convert the filter expression to a dictionary representation."""
75
+ raise NotImplementedError("Subclasses must implement to_dict()")
76
+
77
+ def __repr__(self) -> str:
78
+ return f"{self.__class__.__name__}({self.__dict__})"
79
+
80
+
81
+ # ============================================================
82
+ # Comparison & Inclusion Filters
83
+ # ============================================================
84
+
85
+
86
+ class EQ(FilterExpr):
87
+ """Equality filter - matches documents where a field equals a specific value.
88
+
89
+ Args:
90
+ key: The field name to compare
91
+ value: The value to match against
92
+
93
+ Example:
94
+ >>> # Match documents where status is "published"
95
+ >>> filter = EQ("status", "published")
96
+ >>>
97
+ >>> # Match documents where author_id is 123
98
+ >>> filter = EQ("author_id", 123)
99
+ """
100
+
101
+ def __init__(self, key: str, value: Any):
102
+ self.key = key
103
+ self.value = value
104
+
105
+ def to_dict(self) -> dict:
106
+ return {"op": "EQ", "key": self.key, "value": self.value}
107
+
108
+
109
+ class IN(FilterExpr):
110
+ """Inclusion filter - matches documents where a field's value is in a list of values.
111
+
112
+ Args:
113
+ key: The field name to check
114
+ values: List of acceptable values
115
+
116
+ Example:
117
+ >>> # Match documents where category is either "tech", "science", or "engineering"
118
+ >>> filter = IN("category", ["tech", "science", "engineering"])
119
+ >>>
120
+ >>> # Match documents where status is either "draft" or "published"
121
+ >>> filter = IN("status", ["draft", "published"])
122
+ """
123
+
124
+ def __init__(self, key: str, values: List[Any]):
125
+ self.key = key
126
+ self.values = values
127
+
128
+ def to_dict(self) -> dict:
129
+ return {"op": "IN", "key": self.key, "values": self.values}
130
+
131
+
132
+ class GT(FilterExpr):
133
+ """Greater than filter - matches documents where a field's value is greater than a threshold.
134
+
135
+ Args:
136
+ key: The field name to compare
137
+ value: The threshold value
138
+
139
+ Example:
140
+ >>> # Match documents where age is greater than 18
141
+ >>> filter = GT("age", 18)
142
+ >>>
143
+ >>> # Match documents where price is greater than 100.0
144
+ >>> filter = GT("price", 100.0)
145
+ >>>
146
+ >>> # Match documents created after a certain timestamp
147
+ >>> filter = GT("created_at", 1234567890)
148
+ """
149
+
150
+ def __init__(self, key: str, value: Any):
151
+ self.key = key
152
+ self.value = value
153
+
154
+ def to_dict(self) -> dict:
155
+ return {"op": "GT", "key": self.key, "value": self.value}
156
+
157
+
158
+ class LT(FilterExpr):
159
+ """Less than filter - matches documents where a field's value is less than a threshold.
160
+
161
+ Args:
162
+ key: The field name to compare
163
+ value: The threshold value
164
+
165
+ Example:
166
+ >>> # Match documents where age is less than 65
167
+ >>> filter = LT("age", 65)
168
+ >>>
169
+ >>> # Match documents where price is less than 50.0
170
+ >>> filter = LT("price", 50.0)
171
+ >>>
172
+ >>> # Match documents created before a certain timestamp
173
+ >>> filter = LT("created_at", 1234567890)
174
+ """
175
+
176
+ def __init__(self, key: str, value: Any):
177
+ self.key = key
178
+ self.value = value
179
+
180
+ def to_dict(self) -> dict:
181
+ return {"op": "LT", "key": self.key, "value": self.value}
182
+
183
+
184
+ # ============================================================
185
+ # Logical Operators
186
+ # ============================================================
187
+
188
+
189
+ class AND(FilterExpr):
190
+ """Logical AND operator - matches documents where ALL expressions are true.
191
+
192
+ Combines multiple filter expressions where every expression must be satisfied
193
+ for a document to match.
194
+
195
+ Args:
196
+ *expressions: Variable number of FilterExpr expressions to combine with AND logic
197
+
198
+ Example:
199
+ >>> # Match documents where status is "published" AND age > 18
200
+ >>> filter = AND(EQ("status", "published"), GT("age", 18))
201
+ >>>
202
+ >>> # Multiple expressions
203
+ >>> filter = AND(
204
+ ... EQ("status", "active"),
205
+ ... GT("score", 80),
206
+ ... IN("category", ["tech", "science"])
207
+ ... )
208
+ """
209
+
210
+ def __init__(self, *expressions: FilterExpr):
211
+ self.expressions = list(expressions)
212
+
213
+ def to_dict(self) -> dict:
214
+ return {"op": "AND", "conditions": [e.to_dict() for e in self.expressions]}
215
+
216
+
217
+ class OR(FilterExpr):
218
+ """Logical OR operator - matches documents where ANY expression is true.
219
+
220
+ Combines multiple filter expressions where at least one expression must be satisfied
221
+ for a document to match.
222
+
223
+ Args:
224
+ *expressions: Variable number of FilterExpr expressions to combine with OR logic
225
+
226
+ Example:
227
+ >>> # Match documents where status is "published" OR status is "archived"
228
+ >>> filter = OR(EQ("status", "published"), EQ("status", "archived"))
229
+ >>>
230
+ >>> # Complex: Match VIP users OR users with high score
231
+ >>> filter = OR(
232
+ ... EQ("membership", "VIP"),
233
+ ... GT("score", 1000)
234
+ ... )
235
+ """
236
+
237
+ def __init__(self, *expressions: FilterExpr):
238
+ self.expressions = list(expressions)
239
+
240
+ def to_dict(self) -> dict:
241
+ return {"op": "OR", "conditions": [e.to_dict() for e in self.expressions]}
242
+
243
+
244
+ class NOT(FilterExpr):
245
+ """Logical NOT operator - matches documents where the expression is NOT true.
246
+
247
+ Negates a filter expression, matching documents that don't satisfy the expression.
248
+
249
+ Args:
250
+ expression: The FilterExpr expression to negate
251
+
252
+ Example:
253
+ >>> # Match documents where status is NOT "draft"
254
+ >>> filter = NOT(EQ("status", "draft"))
255
+ >>>
256
+ >>> # Exclude inactive users with low scores
257
+ >>> filter = NOT(AND(EQ("status", "inactive"), LT("score", 10)))
258
+ >>>
259
+ >>> # Match users who are NOT in the blocked list
260
+ >>> filter = NOT(IN("user_id", [101, 102, 103]))
261
+ """
262
+
263
+ def __init__(self, expression: FilterExpr):
264
+ self.expression = expression
265
+
266
+ def to_dict(self) -> dict:
267
+ return {"op": "NOT", "condition": self.expression.to_dict()}
268
+
269
+
270
+ # ============================================================
271
+ # Deserialization
272
+ # ============================================================
273
+
274
+
275
+ def from_dict(filter_dict: dict) -> FilterExpr:
276
+ """Reconstruct a FilterExpr object from its dictionary representation.
277
+
278
+ This function deserializes filter expressions that were serialized using the
279
+ to_dict() method, enabling filters to be passed through JSON APIs and reconstructed
280
+ on the server side.
281
+
282
+ Args:
283
+ filter_dict: Dictionary representation of a filter expression with an "op" key
284
+
285
+ Returns:
286
+ FilterExpr: The reconstructed filter expression object
287
+
288
+ Raises:
289
+ ValueError: If the filter dictionary has an invalid structure or unknown operator
290
+
291
+ Example:
292
+ >>> # Serialize and deserialize a simple filter
293
+ >>> original = EQ("status", "published")
294
+ >>> serialized = original.to_dict()
295
+ >>> # {"op": "EQ", "key": "status", "value": "published"}
296
+ >>> reconstructed = from_dict(serialized)
297
+ >>>
298
+ >>> # Complex filter with nested expressions
299
+ >>> complex_filter = OR(AND(EQ("type", "article"), GT("views", 1000)), IN("priority", ["high", "urgent"]))
300
+ >>> serialized = complex_filter.to_dict()
301
+ >>> reconstructed = from_dict(serialized)
302
+ >>>
303
+ >>> # From JSON API
304
+ >>> import json
305
+ >>> json_str = '{"op": "AND", "conditions": [{"op": "EQ", "key": "status", "value": "active"}, {"op": "GT", "key": "age", "value": 18}]}'
306
+ >>> filter_dict = json.loads(json_str)
307
+ >>> filter_expr = from_dict(filter_dict)
308
+ """
309
+ if not isinstance(filter_dict, dict) or "op" not in filter_dict:
310
+ raise ValueError(f"Invalid filter dictionary: must contain 'op' key. Got: {filter_dict}")
311
+
312
+ op = filter_dict["op"]
313
+
314
+ # Comparison and inclusion operators
315
+ if op == "EQ":
316
+ if "key" not in filter_dict or "value" not in filter_dict:
317
+ raise ValueError(f"EQ filter requires 'key' and 'value' fields. Got: {filter_dict}")
318
+ return EQ(filter_dict["key"], filter_dict["value"])
319
+
320
+ elif op == "IN":
321
+ if "key" not in filter_dict or "values" not in filter_dict:
322
+ raise ValueError(f"IN filter requires 'key' and 'values' fields. Got: {filter_dict}")
323
+ return IN(filter_dict["key"], filter_dict["values"])
324
+
325
+ elif op == "GT":
326
+ if "key" not in filter_dict or "value" not in filter_dict:
327
+ raise ValueError(f"GT filter requires 'key' and 'value' fields. Got: {filter_dict}")
328
+ return GT(filter_dict["key"], filter_dict["value"])
329
+
330
+ elif op == "LT":
331
+ if "key" not in filter_dict or "value" not in filter_dict:
332
+ raise ValueError(f"LT filter requires 'key' and 'value' fields. Got: {filter_dict}")
333
+ return LT(filter_dict["key"], filter_dict["value"])
334
+
335
+ # Logical operators
336
+ elif op == "AND":
337
+ if "conditions" not in filter_dict:
338
+ raise ValueError(f"AND filter requires 'conditions' field. Got: {filter_dict}")
339
+ conditions = [from_dict(cond) for cond in filter_dict["conditions"]]
340
+ return AND(*conditions)
341
+
342
+ elif op == "OR":
343
+ if "conditions" not in filter_dict:
344
+ raise ValueError(f"OR filter requires 'conditions' field. Got: {filter_dict}")
345
+ conditions = [from_dict(cond) for cond in filter_dict["conditions"]]
346
+ return OR(*conditions)
347
+
348
+ elif op == "NOT":
349
+ if "condition" not in filter_dict:
350
+ raise ValueError(f"NOT filter requires 'condition' field. Got: {filter_dict}")
351
+ return NOT(from_dict(filter_dict["condition"]))
352
+
353
+ else:
354
+ raise ValueError(f"Unknown filter operator: {op}")
@@ -20,13 +20,12 @@ class AgenticChunking(ChunkingStrategy):
20
20
  except Exception:
21
21
  raise ValueError("`openai` isn't installed. Please install it with `pip install openai`")
22
22
  model = OpenAIChat(DEFAULT_OPENAI_MODEL_ID)
23
-
24
- self.max_chunk_size = max_chunk_size
23
+ self.chunk_size = max_chunk_size
25
24
  self.model = model
26
25
 
27
26
  def chunk(self, document: Document) -> List[Document]:
28
27
  """Split text into chunks using LLM to determine natural breakpoints based on context"""
29
- if len(document.content) <= self.max_chunk_size:
28
+ if len(document.content) <= self.chunk_size:
30
29
  return [document]
31
30
 
32
31
  chunks: List[Document] = []
@@ -35,22 +34,22 @@ class AgenticChunking(ChunkingStrategy):
35
34
  chunk_number = 1
36
35
 
37
36
  while remaining_text:
38
- # Ask model to find a good breakpoint within max_chunk_size
39
- prompt = f"""Analyze this text and determine a natural breakpoint within the first {self.max_chunk_size} characters.
37
+ # Ask model to find a good breakpoint within chunk_size
38
+ prompt = f"""Analyze this text and determine a natural breakpoint within the first {self.chunk_size} characters.
40
39
  Consider semantic completeness, paragraph boundaries, and topic transitions.
41
40
  Return only the character position number of where to break the text:
42
41
 
43
- {remaining_text[: self.max_chunk_size]}"""
42
+ {remaining_text[: self.chunk_size]}"""
44
43
 
45
44
  try:
46
45
  response = self.model.response([Message(role="user", content=prompt)])
47
46
  if response and response.content:
48
- break_point = min(int(response.content.strip()), self.max_chunk_size)
47
+ break_point = min(int(response.content.strip()), self.chunk_size)
49
48
  else:
50
- break_point = self.max_chunk_size
49
+ break_point = self.chunk_size
51
50
  except Exception:
52
51
  # Fallback to max size if model fails
53
- break_point = self.max_chunk_size
52
+ break_point = self.chunk_size
54
53
 
55
54
  # Extract chunk and update remaining text
56
55
  chunk = remaining_text[:break_point].strip()
@@ -1,6 +1,6 @@
1
1
  from abc import ABC, abstractmethod
2
2
  from enum import Enum
3
- from typing import List
3
+ from typing import List, Optional
4
4
 
5
5
  from agno.knowledge.document.base import Document
6
6
 
@@ -60,7 +60,13 @@ class ChunkingStrategyFactory:
60
60
  """Factory for creating chunking strategy instances."""
61
61
 
62
62
  @classmethod
63
- def create_strategy(cls, strategy_type: ChunkingStrategyType, **kwargs) -> ChunkingStrategy:
63
+ def create_strategy(
64
+ cls,
65
+ strategy_type: ChunkingStrategyType,
66
+ chunk_size: Optional[int] = None,
67
+ overlap: Optional[int] = None,
68
+ **kwargs,
69
+ ) -> ChunkingStrategy:
64
70
  """Create an instance of the chunking strategy with the given parameters."""
65
71
  strategy_map = {
66
72
  ChunkingStrategyType.AGENTIC_CHUNKER: cls._create_agentic_chunking,
@@ -71,51 +77,89 @@ class ChunkingStrategyFactory:
71
77
  ChunkingStrategyType.ROW_CHUNKER: cls._create_row_chunking,
72
78
  ChunkingStrategyType.MARKDOWN_CHUNKER: cls._create_markdown_chunking,
73
79
  }
74
- return strategy_map[strategy_type](**kwargs)
80
+ return strategy_map[strategy_type](chunk_size=chunk_size, overlap=overlap, **kwargs)
75
81
 
76
82
  @classmethod
77
- def _create_agentic_chunking(cls, **kwargs) -> ChunkingStrategy:
83
+ def _create_agentic_chunking(
84
+ cls, chunk_size: Optional[int] = None, overlap: Optional[int] = None, **kwargs
85
+ ) -> ChunkingStrategy:
78
86
  from agno.knowledge.chunking.agentic import AgenticChunking
79
87
 
80
- # Map chunk_size to max_chunk_size for AgenticChunking
81
- if "chunk_size" in kwargs and "max_chunk_size" not in kwargs:
82
- kwargs["max_chunk_size"] = kwargs.pop("chunk_size")
88
+ # AgenticChunking accepts max_chunk_size (not chunk_size) and no overlap
89
+ if chunk_size is not None:
90
+ kwargs["max_chunk_size"] = chunk_size
91
+ # Remove overlap since AgenticChunking doesn't support it
83
92
  return AgenticChunking(**kwargs)
84
93
 
85
94
  @classmethod
86
- def _create_document_chunking(cls, **kwargs) -> ChunkingStrategy:
95
+ def _create_document_chunking(
96
+ cls, chunk_size: Optional[int] = None, overlap: Optional[int] = None, **kwargs
97
+ ) -> ChunkingStrategy:
87
98
  from agno.knowledge.chunking.document import DocumentChunking
88
99
 
100
+ # DocumentChunking accepts both chunk_size and overlap
101
+ if chunk_size is not None:
102
+ kwargs["chunk_size"] = chunk_size
103
+ if overlap is not None:
104
+ kwargs["overlap"] = overlap
89
105
  return DocumentChunking(**kwargs)
90
106
 
91
107
  @classmethod
92
- def _create_recursive_chunking(cls, **kwargs) -> ChunkingStrategy:
108
+ def _create_recursive_chunking(
109
+ cls, chunk_size: Optional[int] = None, overlap: Optional[int] = None, **kwargs
110
+ ) -> ChunkingStrategy:
93
111
  from agno.knowledge.chunking.recursive import RecursiveChunking
94
112
 
113
+ # RecursiveChunking accepts both chunk_size and overlap
114
+ if chunk_size is not None:
115
+ kwargs["chunk_size"] = chunk_size
116
+ if overlap is not None:
117
+ kwargs["overlap"] = overlap
95
118
  return RecursiveChunking(**kwargs)
96
119
 
97
120
  @classmethod
98
- def _create_semantic_chunking(cls, **kwargs) -> ChunkingStrategy:
121
+ def _create_semantic_chunking(
122
+ cls, chunk_size: Optional[int] = None, overlap: Optional[int] = None, **kwargs
123
+ ) -> ChunkingStrategy:
99
124
  from agno.knowledge.chunking.semantic import SemanticChunking
100
125
 
126
+ # SemanticChunking accepts chunk_size but not overlap
127
+ if chunk_size is not None:
128
+ kwargs["chunk_size"] = chunk_size
129
+ # Remove overlap since SemanticChunking doesn't support it
101
130
  return SemanticChunking(**kwargs)
102
131
 
103
132
  @classmethod
104
- def _create_fixed_chunking(cls, **kwargs) -> ChunkingStrategy:
133
+ def _create_fixed_chunking(
134
+ cls, chunk_size: Optional[int] = None, overlap: Optional[int] = None, **kwargs
135
+ ) -> ChunkingStrategy:
105
136
  from agno.knowledge.chunking.fixed import FixedSizeChunking
106
137
 
138
+ # FixedSizeChunking accepts both chunk_size and overlap
139
+ if chunk_size is not None:
140
+ kwargs["chunk_size"] = chunk_size
141
+ if overlap is not None:
142
+ kwargs["overlap"] = overlap
107
143
  return FixedSizeChunking(**kwargs)
108
144
 
109
145
  @classmethod
110
- def _create_row_chunking(cls, **kwargs) -> ChunkingStrategy:
146
+ def _create_row_chunking(
147
+ cls, chunk_size: Optional[int] = None, overlap: Optional[int] = None, **kwargs
148
+ ) -> ChunkingStrategy:
111
149
  from agno.knowledge.chunking.row import RowChunking
112
150
 
113
- # Remove chunk_size if present since RowChunking doesn't use it
114
- kwargs.pop("chunk_size", None)
151
+ # RowChunking doesn't accept chunk_size or overlap, only skip_header and clean_rows
115
152
  return RowChunking(**kwargs)
116
153
 
117
154
  @classmethod
118
- def _create_markdown_chunking(cls, **kwargs) -> ChunkingStrategy:
155
+ def _create_markdown_chunking(
156
+ cls, chunk_size: Optional[int] = None, overlap: Optional[int] = None, **kwargs
157
+ ) -> ChunkingStrategy:
119
158
  from agno.knowledge.chunking.markdown import MarkdownChunking
120
159
 
160
+ # MarkdownChunking accepts both chunk_size and overlap
161
+ if chunk_size is not None:
162
+ kwargs["chunk_size"] = chunk_size
163
+ if overlap is not None:
164
+ kwargs["overlap"] = overlap
121
165
  return MarkdownChunking(**kwargs)
@@ -25,10 +25,14 @@ class SentenceTransformerEmbedder(Embedder):
25
25
  prompt: Optional[str] = None
26
26
  normalize_embeddings: bool = False
27
27
 
28
- def get_embedding(self, text: Union[str, List[str]]) -> List[float]:
29
- if not self.sentence_transformer_client:
28
+ def __post_init__(self):
29
+ # Initialize the SentenceTransformer model eagerly to avoid race conditions in async contexts
30
+ if self.sentence_transformer_client is None:
30
31
  self.sentence_transformer_client = SentenceTransformer(model_name_or_path=self.id)
31
32
 
33
+ def get_embedding(self, text: Union[str, List[str]]) -> List[float]:
34
+ if self.sentence_transformer_client is None:
35
+ raise RuntimeError("SentenceTransformer model not initialized")
32
36
  model = self.sentence_transformer_client
33
37
  embedding = model.encode(text, prompt=self.prompt, normalize_embeddings=self.normalize_embeddings)
34
38
  try:
@@ -13,6 +13,7 @@ from httpx import AsyncClient
13
13
 
14
14
  from agno.db.base import AsyncBaseDb, BaseDb
15
15
  from agno.db.schemas.knowledge import KnowledgeRow
16
+ from agno.filters import FilterExpr
16
17
  from agno.knowledge.content import Content, ContentAuth, ContentStatus, FileData
17
18
  from agno.knowledge.document import Document
18
19
  from agno.knowledge.reader import Reader, ReaderFactory
@@ -403,7 +404,7 @@ class Knowledge:
403
404
 
404
405
  if path.is_file():
405
406
  if self._should_include_file(str(path), include, exclude):
406
- log_info(f"Adding file {path} due to include/exclude filters")
407
+ log_debug(f"Adding file {path} due to include/exclude filters")
407
408
 
408
409
  await self._add_to_contents_db(content)
409
410
  if self._should_skip(content.content_hash, skip_if_exists): # type: ignore[arg-type]
@@ -1392,7 +1393,7 @@ class Knowledge:
1392
1393
  self,
1393
1394
  query: str,
1394
1395
  max_results: Optional[int] = None,
1395
- filters: Optional[Dict[str, Any]] = None,
1396
+ filters: Optional[Union[Dict[str, Any], List[FilterExpr]]] = None,
1396
1397
  search_type: Optional[str] = None,
1397
1398
  ) -> List[Document]:
1398
1399
  """Returns relevant documents matching a query"""
@@ -1423,7 +1424,7 @@ class Knowledge:
1423
1424
  self,
1424
1425
  query: str,
1425
1426
  max_results: Optional[int] = None,
1426
- filters: Optional[Dict[str, Any]] = None,
1427
+ filters: Optional[Union[Dict[str, Any], List[FilterExpr]]] = None,
1427
1428
  search_type: Optional[str] = None,
1428
1429
  ) -> List[Document]:
1429
1430
  """Returns relevant documents matching a query"""
@@ -1465,38 +1466,58 @@ class Knowledge:
1465
1466
  self.valid_metadata_filters.update(await self._aget_filters_from_db())
1466
1467
  return self.valid_metadata_filters
1467
1468
 
1468
- def _validate_filters(self, filters: Optional[Dict[str, Any]]) -> Tuple[Dict[str, Any], List[str]]:
1469
+ def _validate_filters(self, filters: Optional[Union[Dict[str, Any], List[FilterExpr]]]) -> Tuple[Any, List[str]]:
1470
+ """Internal method to validate filters against known metadata keys."""
1469
1471
  if not filters:
1470
- return {}, []
1472
+ return None, []
1471
1473
 
1472
- valid_filters: Dict[str, Any] = {}
1474
+ valid_filters: Optional[Dict[str, Any]] = None
1473
1475
  invalid_keys = []
1474
1476
 
1475
- # If no metadata filters tracked yet, all keys are considered invalid
1476
- if self.valid_metadata_filters is None:
1477
- invalid_keys = list(filters.keys())
1478
- log_debug(f"No valid metadata filters tracked yet. All filter keys considered invalid: {invalid_keys}")
1479
- return {}, invalid_keys
1480
-
1481
- for key, value in filters.items():
1482
- # Handle both normal keys and prefixed keys like meta_data.key
1483
- base_key = key.split(".")[-1] if "." in key else key
1484
- if base_key in self.valid_metadata_filters or key in self.valid_metadata_filters:
1485
- valid_filters[key] = value
1486
- else:
1487
- invalid_keys.append(key)
1488
- log_debug(f"Invalid filter key: {key} - not present in knowledge base")
1477
+ if isinstance(filters, dict):
1478
+ # If no metadata filters tracked yet, all keys are considered invalid
1479
+ if self.valid_metadata_filters is None:
1480
+ invalid_keys = list(filters.keys())
1481
+ log_debug(f"No valid metadata filters tracked yet. All filter keys considered invalid: {invalid_keys}")
1482
+ return None, invalid_keys
1483
+
1484
+ valid_filters = {}
1485
+ for key, value in filters.items():
1486
+ # Handle both normal keys and prefixed keys like meta_data.key
1487
+ base_key = key.split(".")[-1] if "." in key else key
1488
+ if base_key in self.valid_metadata_filters or key in self.valid_metadata_filters:
1489
+ valid_filters[key] = value
1490
+ else:
1491
+ invalid_keys.append(key)
1492
+ log_debug(f"Invalid filter key: {key} - not present in knowledge base")
1493
+
1494
+ elif isinstance(filters, List):
1495
+ # Validate that list contains FilterExpr instances
1496
+ for i, filter_item in enumerate(filters):
1497
+ if not isinstance(filter_item, FilterExpr):
1498
+ log_warning(
1499
+ f"Invalid filter at index {i}: expected FilterExpr instance, "
1500
+ f"got {type(filter_item).__name__}. "
1501
+ f"Use filter expressions like EQ('key', 'value'), IN('key', [values]), "
1502
+ f"AND(...), OR(...), NOT(...) from agno.filters"
1503
+ )
1504
+
1505
+ # Filter expressions are already validated, return empty dict/list
1506
+ # The actual filtering happens in the vector_db layer
1507
+ return filters, []
1489
1508
 
1490
1509
  return valid_filters, invalid_keys
1491
1510
 
1492
- def validate_filters(self, filters: Optional[Dict[str, Any]]) -> Tuple[Dict[str, Any], List[str]]:
1511
+ def validate_filters(self, filters: Optional[Union[Dict[str, Any], List[FilterExpr]]]) -> Tuple[Any, List[str]]:
1493
1512
  if self.valid_metadata_filters is None:
1494
1513
  self.valid_metadata_filters = set()
1495
1514
  self.valid_metadata_filters.update(self._get_filters_from_db())
1496
1515
 
1497
1516
  return self._validate_filters(filters)
1498
1517
 
1499
- async def async_validate_filters(self, filters: Optional[Dict[str, Any]]) -> Tuple[Dict[str, Any], List[str]]:
1518
+ async def async_validate_filters(
1519
+ self, filters: Optional[Union[Dict[str, Any], List[FilterExpr]]]
1520
+ ) -> Tuple[Any, List[str]]:
1500
1521
  if self.valid_metadata_filters is None:
1501
1522
  self.valid_metadata_filters = set()
1502
1523
  self.valid_metadata_filters.update(await self._aget_filters_from_db())
@@ -44,11 +44,15 @@ class Reader:
44
44
  self.max_results = max_results
45
45
  self.encoding = encoding
46
46
 
47
- def set_chunking_strategy_from_string(self, strategy_name: str, **kwargs) -> None:
47
+ def set_chunking_strategy_from_string(
48
+ self, strategy_name: str, chunk_size: Optional[int] = None, overlap: Optional[int] = None, **kwargs
49
+ ) -> None:
48
50
  """Set the chunking strategy from a string name."""
49
51
  try:
50
52
  strategy_type = ChunkingStrategyType.from_string(strategy_name)
51
- self.chunking_strategy = ChunkingStrategyFactory.create_strategy(strategy_type, **kwargs)
53
+ self.chunking_strategy = ChunkingStrategyFactory.create_strategy(
54
+ strategy_type, chunk_size=chunk_size, overlap=overlap, **kwargs
55
+ )
52
56
  except ValueError as e:
53
57
  raise ValueError(f"Failed to set chunking strategy: {e}")
54
58