agno 2.2.10__py3-none-any.whl → 2.2.12__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agno/agent/agent.py +75 -48
- agno/db/dynamo/utils.py +1 -1
- agno/db/firestore/utils.py +1 -1
- agno/db/gcs_json/utils.py +1 -1
- agno/db/in_memory/utils.py +1 -1
- agno/db/json/utils.py +1 -1
- agno/db/mongo/utils.py +3 -3
- agno/db/mysql/mysql.py +1 -1
- agno/db/mysql/utils.py +1 -1
- agno/db/postgres/utils.py +1 -1
- agno/db/redis/utils.py +1 -1
- agno/db/singlestore/singlestore.py +1 -1
- agno/db/singlestore/utils.py +1 -1
- agno/db/sqlite/async_sqlite.py +1 -1
- agno/db/sqlite/sqlite.py +1 -1
- agno/db/sqlite/utils.py +1 -1
- agno/filters.py +354 -0
- agno/knowledge/chunking/agentic.py +8 -9
- agno/knowledge/chunking/strategy.py +59 -15
- agno/knowledge/embedder/sentence_transformer.py +6 -2
- agno/knowledge/knowledge.py +43 -22
- agno/knowledge/reader/base.py +6 -2
- agno/knowledge/utils.py +20 -0
- agno/models/anthropic/claude.py +45 -9
- agno/models/base.py +4 -0
- agno/os/app.py +23 -7
- agno/os/interfaces/slack/router.py +53 -33
- agno/os/interfaces/slack/slack.py +9 -1
- agno/os/router.py +25 -1
- agno/os/routers/health.py +5 -3
- agno/os/routers/knowledge/knowledge.py +43 -17
- agno/os/routers/knowledge/schemas.py +4 -3
- agno/run/agent.py +11 -1
- agno/run/base.py +3 -2
- agno/session/agent.py +10 -5
- agno/team/team.py +57 -18
- agno/tools/file_generation.py +4 -4
- agno/tools/gmail.py +179 -0
- agno/tools/parallel.py +314 -0
- agno/utils/agent.py +22 -17
- agno/utils/gemini.py +15 -5
- agno/utils/knowledge.py +12 -5
- agno/utils/log.py +1 -0
- agno/utils/models/claude.py +2 -1
- agno/utils/print_response/agent.py +5 -4
- agno/utils/print_response/team.py +5 -4
- agno/vectordb/base.py +2 -4
- agno/vectordb/cassandra/cassandra.py +12 -5
- agno/vectordb/chroma/chromadb.py +10 -4
- agno/vectordb/clickhouse/clickhousedb.py +12 -4
- agno/vectordb/couchbase/couchbase.py +12 -3
- agno/vectordb/lancedb/lance_db.py +69 -144
- agno/vectordb/langchaindb/langchaindb.py +13 -4
- agno/vectordb/lightrag/lightrag.py +8 -3
- agno/vectordb/llamaindex/llamaindexdb.py +10 -4
- agno/vectordb/milvus/milvus.py +16 -5
- agno/vectordb/mongodb/mongodb.py +14 -3
- agno/vectordb/pgvector/pgvector.py +73 -15
- agno/vectordb/pineconedb/pineconedb.py +6 -2
- agno/vectordb/qdrant/qdrant.py +25 -13
- agno/vectordb/redis/redisdb.py +37 -30
- agno/vectordb/singlestore/singlestore.py +9 -4
- agno/vectordb/surrealdb/surrealdb.py +13 -3
- agno/vectordb/upstashdb/upstashdb.py +8 -5
- agno/vectordb/weaviate/weaviate.py +29 -12
- agno/workflow/step.py +3 -2
- agno/workflow/types.py +20 -1
- agno/workflow/workflow.py +103 -14
- {agno-2.2.10.dist-info → agno-2.2.12.dist-info}/METADATA +4 -1
- {agno-2.2.10.dist-info → agno-2.2.12.dist-info}/RECORD +73 -71
- {agno-2.2.10.dist-info → agno-2.2.12.dist-info}/WHEEL +0 -0
- {agno-2.2.10.dist-info → agno-2.2.12.dist-info}/licenses/LICENSE +0 -0
- {agno-2.2.10.dist-info → agno-2.2.12.dist-info}/top_level.txt +0 -0
agno/filters.py
ADDED
|
@@ -0,0 +1,354 @@
|
|
|
1
|
+
"""Search filter expressions for filtering knowledge base documents and search results.
|
|
2
|
+
|
|
3
|
+
This module provides a set of filter operators for constructing complex search queries
|
|
4
|
+
that can be applied to knowledge bases, vector databases, and other searchable content.
|
|
5
|
+
|
|
6
|
+
Filter Types:
|
|
7
|
+
- Comparison: EQ (equals), GT (greater than), LT (less than)
|
|
8
|
+
- Inclusion: IN (value in list)
|
|
9
|
+
- Logical: AND, OR, NOT
|
|
10
|
+
|
|
11
|
+
Example:
|
|
12
|
+
>>> from agno.filters import EQ, GT, IN, AND, OR, NOT
|
|
13
|
+
>>>
|
|
14
|
+
>>> # Simple equality filter
|
|
15
|
+
>>> filter = EQ("category", "technology")
|
|
16
|
+
>>>
|
|
17
|
+
>>> # Complex filter with multiple conditions
|
|
18
|
+
>>> filter = AND(
|
|
19
|
+
... EQ("status", "published"),
|
|
20
|
+
... GT("views", 1000),
|
|
21
|
+
... IN("category", ["tech", "science"])
|
|
22
|
+
... )
|
|
23
|
+
>>>
|
|
24
|
+
>>> # Using OR logic
|
|
25
|
+
>>> filter = OR(EQ("priority", "high"), EQ("urgent", True))
|
|
26
|
+
>>>
|
|
27
|
+
>>> # Negating conditions
|
|
28
|
+
>>> filter = NOT(EQ("status", "archived"))
|
|
29
|
+
>>>
|
|
30
|
+
>>> # Complex nested logic
|
|
31
|
+
>>> filter = OR(
|
|
32
|
+
... AND(EQ("type", "article"), GT("word_count", 500)),
|
|
33
|
+
... AND(EQ("type", "tutorial"), NOT(EQ("difficulty", "beginner")))
|
|
34
|
+
... )
|
|
35
|
+
"""
|
|
36
|
+
|
|
37
|
+
from __future__ import annotations
|
|
38
|
+
|
|
39
|
+
from typing import Any, List
|
|
40
|
+
|
|
41
|
+
# ============================================================
|
|
42
|
+
# Base Expression
|
|
43
|
+
# ============================================================
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
class FilterExpr:
|
|
47
|
+
"""Base class for all filter expressions.
|
|
48
|
+
|
|
49
|
+
Filters can be combined using AND, OR, and NOT classes:
|
|
50
|
+
- AND: Combine filters where both expressions must be true
|
|
51
|
+
- OR: Combine filters where either expression can be true
|
|
52
|
+
- NOT: Negate a filter expression
|
|
53
|
+
|
|
54
|
+
Example:
|
|
55
|
+
>>> # Create complex filters using AND, OR, NOT
|
|
56
|
+
>>> filter = OR(AND(EQ("status", "active"), GT("age", 18)), EQ("role", "admin"))
|
|
57
|
+
>>> # Equivalent to: (status == "active" AND age > 18) OR role == "admin"
|
|
58
|
+
"""
|
|
59
|
+
|
|
60
|
+
# Logical operator overloads
|
|
61
|
+
def __or__(self, other: FilterExpr) -> OR:
|
|
62
|
+
"""Combine two filters with OR logic."""
|
|
63
|
+
return OR(self, other)
|
|
64
|
+
|
|
65
|
+
def __and__(self, other: FilterExpr) -> AND:
|
|
66
|
+
"""Combine two filters with AND logic."""
|
|
67
|
+
return AND(self, other)
|
|
68
|
+
|
|
69
|
+
def __invert__(self) -> NOT:
|
|
70
|
+
"""Negate a filter."""
|
|
71
|
+
return NOT(self)
|
|
72
|
+
|
|
73
|
+
def to_dict(self) -> dict:
|
|
74
|
+
"""Convert the filter expression to a dictionary representation."""
|
|
75
|
+
raise NotImplementedError("Subclasses must implement to_dict()")
|
|
76
|
+
|
|
77
|
+
def __repr__(self) -> str:
|
|
78
|
+
return f"{self.__class__.__name__}({self.__dict__})"
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
# ============================================================
|
|
82
|
+
# Comparison & Inclusion Filters
|
|
83
|
+
# ============================================================
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
class EQ(FilterExpr):
|
|
87
|
+
"""Equality filter - matches documents where a field equals a specific value.
|
|
88
|
+
|
|
89
|
+
Args:
|
|
90
|
+
key: The field name to compare
|
|
91
|
+
value: The value to match against
|
|
92
|
+
|
|
93
|
+
Example:
|
|
94
|
+
>>> # Match documents where status is "published"
|
|
95
|
+
>>> filter = EQ("status", "published")
|
|
96
|
+
>>>
|
|
97
|
+
>>> # Match documents where author_id is 123
|
|
98
|
+
>>> filter = EQ("author_id", 123)
|
|
99
|
+
"""
|
|
100
|
+
|
|
101
|
+
def __init__(self, key: str, value: Any):
|
|
102
|
+
self.key = key
|
|
103
|
+
self.value = value
|
|
104
|
+
|
|
105
|
+
def to_dict(self) -> dict:
|
|
106
|
+
return {"op": "EQ", "key": self.key, "value": self.value}
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
class IN(FilterExpr):
|
|
110
|
+
"""Inclusion filter - matches documents where a field's value is in a list of values.
|
|
111
|
+
|
|
112
|
+
Args:
|
|
113
|
+
key: The field name to check
|
|
114
|
+
values: List of acceptable values
|
|
115
|
+
|
|
116
|
+
Example:
|
|
117
|
+
>>> # Match documents where category is either "tech", "science", or "engineering"
|
|
118
|
+
>>> filter = IN("category", ["tech", "science", "engineering"])
|
|
119
|
+
>>>
|
|
120
|
+
>>> # Match documents where status is either "draft" or "published"
|
|
121
|
+
>>> filter = IN("status", ["draft", "published"])
|
|
122
|
+
"""
|
|
123
|
+
|
|
124
|
+
def __init__(self, key: str, values: List[Any]):
|
|
125
|
+
self.key = key
|
|
126
|
+
self.values = values
|
|
127
|
+
|
|
128
|
+
def to_dict(self) -> dict:
|
|
129
|
+
return {"op": "IN", "key": self.key, "values": self.values}
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
class GT(FilterExpr):
|
|
133
|
+
"""Greater than filter - matches documents where a field's value is greater than a threshold.
|
|
134
|
+
|
|
135
|
+
Args:
|
|
136
|
+
key: The field name to compare
|
|
137
|
+
value: The threshold value
|
|
138
|
+
|
|
139
|
+
Example:
|
|
140
|
+
>>> # Match documents where age is greater than 18
|
|
141
|
+
>>> filter = GT("age", 18)
|
|
142
|
+
>>>
|
|
143
|
+
>>> # Match documents where price is greater than 100.0
|
|
144
|
+
>>> filter = GT("price", 100.0)
|
|
145
|
+
>>>
|
|
146
|
+
>>> # Match documents created after a certain timestamp
|
|
147
|
+
>>> filter = GT("created_at", 1234567890)
|
|
148
|
+
"""
|
|
149
|
+
|
|
150
|
+
def __init__(self, key: str, value: Any):
|
|
151
|
+
self.key = key
|
|
152
|
+
self.value = value
|
|
153
|
+
|
|
154
|
+
def to_dict(self) -> dict:
|
|
155
|
+
return {"op": "GT", "key": self.key, "value": self.value}
|
|
156
|
+
|
|
157
|
+
|
|
158
|
+
class LT(FilterExpr):
|
|
159
|
+
"""Less than filter - matches documents where a field's value is less than a threshold.
|
|
160
|
+
|
|
161
|
+
Args:
|
|
162
|
+
key: The field name to compare
|
|
163
|
+
value: The threshold value
|
|
164
|
+
|
|
165
|
+
Example:
|
|
166
|
+
>>> # Match documents where age is less than 65
|
|
167
|
+
>>> filter = LT("age", 65)
|
|
168
|
+
>>>
|
|
169
|
+
>>> # Match documents where price is less than 50.0
|
|
170
|
+
>>> filter = LT("price", 50.0)
|
|
171
|
+
>>>
|
|
172
|
+
>>> # Match documents created before a certain timestamp
|
|
173
|
+
>>> filter = LT("created_at", 1234567890)
|
|
174
|
+
"""
|
|
175
|
+
|
|
176
|
+
def __init__(self, key: str, value: Any):
|
|
177
|
+
self.key = key
|
|
178
|
+
self.value = value
|
|
179
|
+
|
|
180
|
+
def to_dict(self) -> dict:
|
|
181
|
+
return {"op": "LT", "key": self.key, "value": self.value}
|
|
182
|
+
|
|
183
|
+
|
|
184
|
+
# ============================================================
|
|
185
|
+
# Logical Operators
|
|
186
|
+
# ============================================================
|
|
187
|
+
|
|
188
|
+
|
|
189
|
+
class AND(FilterExpr):
|
|
190
|
+
"""Logical AND operator - matches documents where ALL expressions are true.
|
|
191
|
+
|
|
192
|
+
Combines multiple filter expressions where every expression must be satisfied
|
|
193
|
+
for a document to match.
|
|
194
|
+
|
|
195
|
+
Args:
|
|
196
|
+
*expressions: Variable number of FilterExpr expressions to combine with AND logic
|
|
197
|
+
|
|
198
|
+
Example:
|
|
199
|
+
>>> # Match documents where status is "published" AND age > 18
|
|
200
|
+
>>> filter = AND(EQ("status", "published"), GT("age", 18))
|
|
201
|
+
>>>
|
|
202
|
+
>>> # Multiple expressions
|
|
203
|
+
>>> filter = AND(
|
|
204
|
+
... EQ("status", "active"),
|
|
205
|
+
... GT("score", 80),
|
|
206
|
+
... IN("category", ["tech", "science"])
|
|
207
|
+
... )
|
|
208
|
+
"""
|
|
209
|
+
|
|
210
|
+
def __init__(self, *expressions: FilterExpr):
|
|
211
|
+
self.expressions = list(expressions)
|
|
212
|
+
|
|
213
|
+
def to_dict(self) -> dict:
|
|
214
|
+
return {"op": "AND", "conditions": [e.to_dict() for e in self.expressions]}
|
|
215
|
+
|
|
216
|
+
|
|
217
|
+
class OR(FilterExpr):
|
|
218
|
+
"""Logical OR operator - matches documents where ANY expression is true.
|
|
219
|
+
|
|
220
|
+
Combines multiple filter expressions where at least one expression must be satisfied
|
|
221
|
+
for a document to match.
|
|
222
|
+
|
|
223
|
+
Args:
|
|
224
|
+
*expressions: Variable number of FilterExpr expressions to combine with OR logic
|
|
225
|
+
|
|
226
|
+
Example:
|
|
227
|
+
>>> # Match documents where status is "published" OR status is "archived"
|
|
228
|
+
>>> filter = OR(EQ("status", "published"), EQ("status", "archived"))
|
|
229
|
+
>>>
|
|
230
|
+
>>> # Complex: Match VIP users OR users with high score
|
|
231
|
+
>>> filter = OR(
|
|
232
|
+
... EQ("membership", "VIP"),
|
|
233
|
+
... GT("score", 1000)
|
|
234
|
+
... )
|
|
235
|
+
"""
|
|
236
|
+
|
|
237
|
+
def __init__(self, *expressions: FilterExpr):
|
|
238
|
+
self.expressions = list(expressions)
|
|
239
|
+
|
|
240
|
+
def to_dict(self) -> dict:
|
|
241
|
+
return {"op": "OR", "conditions": [e.to_dict() for e in self.expressions]}
|
|
242
|
+
|
|
243
|
+
|
|
244
|
+
class NOT(FilterExpr):
|
|
245
|
+
"""Logical NOT operator - matches documents where the expression is NOT true.
|
|
246
|
+
|
|
247
|
+
Negates a filter expression, matching documents that don't satisfy the expression.
|
|
248
|
+
|
|
249
|
+
Args:
|
|
250
|
+
expression: The FilterExpr expression to negate
|
|
251
|
+
|
|
252
|
+
Example:
|
|
253
|
+
>>> # Match documents where status is NOT "draft"
|
|
254
|
+
>>> filter = NOT(EQ("status", "draft"))
|
|
255
|
+
>>>
|
|
256
|
+
>>> # Exclude inactive users with low scores
|
|
257
|
+
>>> filter = NOT(AND(EQ("status", "inactive"), LT("score", 10)))
|
|
258
|
+
>>>
|
|
259
|
+
>>> # Match users who are NOT in the blocked list
|
|
260
|
+
>>> filter = NOT(IN("user_id", [101, 102, 103]))
|
|
261
|
+
"""
|
|
262
|
+
|
|
263
|
+
def __init__(self, expression: FilterExpr):
|
|
264
|
+
self.expression = expression
|
|
265
|
+
|
|
266
|
+
def to_dict(self) -> dict:
|
|
267
|
+
return {"op": "NOT", "condition": self.expression.to_dict()}
|
|
268
|
+
|
|
269
|
+
|
|
270
|
+
# ============================================================
|
|
271
|
+
# Deserialization
|
|
272
|
+
# ============================================================
|
|
273
|
+
|
|
274
|
+
|
|
275
|
+
def from_dict(filter_dict: dict) -> FilterExpr:
|
|
276
|
+
"""Reconstruct a FilterExpr object from its dictionary representation.
|
|
277
|
+
|
|
278
|
+
This function deserializes filter expressions that were serialized using the
|
|
279
|
+
to_dict() method, enabling filters to be passed through JSON APIs and reconstructed
|
|
280
|
+
on the server side.
|
|
281
|
+
|
|
282
|
+
Args:
|
|
283
|
+
filter_dict: Dictionary representation of a filter expression with an "op" key
|
|
284
|
+
|
|
285
|
+
Returns:
|
|
286
|
+
FilterExpr: The reconstructed filter expression object
|
|
287
|
+
|
|
288
|
+
Raises:
|
|
289
|
+
ValueError: If the filter dictionary has an invalid structure or unknown operator
|
|
290
|
+
|
|
291
|
+
Example:
|
|
292
|
+
>>> # Serialize and deserialize a simple filter
|
|
293
|
+
>>> original = EQ("status", "published")
|
|
294
|
+
>>> serialized = original.to_dict()
|
|
295
|
+
>>> # {"op": "EQ", "key": "status", "value": "published"}
|
|
296
|
+
>>> reconstructed = from_dict(serialized)
|
|
297
|
+
>>>
|
|
298
|
+
>>> # Complex filter with nested expressions
|
|
299
|
+
>>> complex_filter = OR(AND(EQ("type", "article"), GT("views", 1000)), IN("priority", ["high", "urgent"]))
|
|
300
|
+
>>> serialized = complex_filter.to_dict()
|
|
301
|
+
>>> reconstructed = from_dict(serialized)
|
|
302
|
+
>>>
|
|
303
|
+
>>> # From JSON API
|
|
304
|
+
>>> import json
|
|
305
|
+
>>> json_str = '{"op": "AND", "conditions": [{"op": "EQ", "key": "status", "value": "active"}, {"op": "GT", "key": "age", "value": 18}]}'
|
|
306
|
+
>>> filter_dict = json.loads(json_str)
|
|
307
|
+
>>> filter_expr = from_dict(filter_dict)
|
|
308
|
+
"""
|
|
309
|
+
if not isinstance(filter_dict, dict) or "op" not in filter_dict:
|
|
310
|
+
raise ValueError(f"Invalid filter dictionary: must contain 'op' key. Got: {filter_dict}")
|
|
311
|
+
|
|
312
|
+
op = filter_dict["op"]
|
|
313
|
+
|
|
314
|
+
# Comparison and inclusion operators
|
|
315
|
+
if op == "EQ":
|
|
316
|
+
if "key" not in filter_dict or "value" not in filter_dict:
|
|
317
|
+
raise ValueError(f"EQ filter requires 'key' and 'value' fields. Got: {filter_dict}")
|
|
318
|
+
return EQ(filter_dict["key"], filter_dict["value"])
|
|
319
|
+
|
|
320
|
+
elif op == "IN":
|
|
321
|
+
if "key" not in filter_dict or "values" not in filter_dict:
|
|
322
|
+
raise ValueError(f"IN filter requires 'key' and 'values' fields. Got: {filter_dict}")
|
|
323
|
+
return IN(filter_dict["key"], filter_dict["values"])
|
|
324
|
+
|
|
325
|
+
elif op == "GT":
|
|
326
|
+
if "key" not in filter_dict or "value" not in filter_dict:
|
|
327
|
+
raise ValueError(f"GT filter requires 'key' and 'value' fields. Got: {filter_dict}")
|
|
328
|
+
return GT(filter_dict["key"], filter_dict["value"])
|
|
329
|
+
|
|
330
|
+
elif op == "LT":
|
|
331
|
+
if "key" not in filter_dict or "value" not in filter_dict:
|
|
332
|
+
raise ValueError(f"LT filter requires 'key' and 'value' fields. Got: {filter_dict}")
|
|
333
|
+
return LT(filter_dict["key"], filter_dict["value"])
|
|
334
|
+
|
|
335
|
+
# Logical operators
|
|
336
|
+
elif op == "AND":
|
|
337
|
+
if "conditions" not in filter_dict:
|
|
338
|
+
raise ValueError(f"AND filter requires 'conditions' field. Got: {filter_dict}")
|
|
339
|
+
conditions = [from_dict(cond) for cond in filter_dict["conditions"]]
|
|
340
|
+
return AND(*conditions)
|
|
341
|
+
|
|
342
|
+
elif op == "OR":
|
|
343
|
+
if "conditions" not in filter_dict:
|
|
344
|
+
raise ValueError(f"OR filter requires 'conditions' field. Got: {filter_dict}")
|
|
345
|
+
conditions = [from_dict(cond) for cond in filter_dict["conditions"]]
|
|
346
|
+
return OR(*conditions)
|
|
347
|
+
|
|
348
|
+
elif op == "NOT":
|
|
349
|
+
if "condition" not in filter_dict:
|
|
350
|
+
raise ValueError(f"NOT filter requires 'condition' field. Got: {filter_dict}")
|
|
351
|
+
return NOT(from_dict(filter_dict["condition"]))
|
|
352
|
+
|
|
353
|
+
else:
|
|
354
|
+
raise ValueError(f"Unknown filter operator: {op}")
|
|
@@ -20,13 +20,12 @@ class AgenticChunking(ChunkingStrategy):
|
|
|
20
20
|
except Exception:
|
|
21
21
|
raise ValueError("`openai` isn't installed. Please install it with `pip install openai`")
|
|
22
22
|
model = OpenAIChat(DEFAULT_OPENAI_MODEL_ID)
|
|
23
|
-
|
|
24
|
-
self.max_chunk_size = max_chunk_size
|
|
23
|
+
self.chunk_size = max_chunk_size
|
|
25
24
|
self.model = model
|
|
26
25
|
|
|
27
26
|
def chunk(self, document: Document) -> List[Document]:
|
|
28
27
|
"""Split text into chunks using LLM to determine natural breakpoints based on context"""
|
|
29
|
-
if len(document.content) <= self.
|
|
28
|
+
if len(document.content) <= self.chunk_size:
|
|
30
29
|
return [document]
|
|
31
30
|
|
|
32
31
|
chunks: List[Document] = []
|
|
@@ -35,22 +34,22 @@ class AgenticChunking(ChunkingStrategy):
|
|
|
35
34
|
chunk_number = 1
|
|
36
35
|
|
|
37
36
|
while remaining_text:
|
|
38
|
-
# Ask model to find a good breakpoint within
|
|
39
|
-
prompt = f"""Analyze this text and determine a natural breakpoint within the first {self.
|
|
37
|
+
# Ask model to find a good breakpoint within chunk_size
|
|
38
|
+
prompt = f"""Analyze this text and determine a natural breakpoint within the first {self.chunk_size} characters.
|
|
40
39
|
Consider semantic completeness, paragraph boundaries, and topic transitions.
|
|
41
40
|
Return only the character position number of where to break the text:
|
|
42
41
|
|
|
43
|
-
{remaining_text[: self.
|
|
42
|
+
{remaining_text[: self.chunk_size]}"""
|
|
44
43
|
|
|
45
44
|
try:
|
|
46
45
|
response = self.model.response([Message(role="user", content=prompt)])
|
|
47
46
|
if response and response.content:
|
|
48
|
-
break_point = min(int(response.content.strip()), self.
|
|
47
|
+
break_point = min(int(response.content.strip()), self.chunk_size)
|
|
49
48
|
else:
|
|
50
|
-
break_point = self.
|
|
49
|
+
break_point = self.chunk_size
|
|
51
50
|
except Exception:
|
|
52
51
|
# Fallback to max size if model fails
|
|
53
|
-
break_point = self.
|
|
52
|
+
break_point = self.chunk_size
|
|
54
53
|
|
|
55
54
|
# Extract chunk and update remaining text
|
|
56
55
|
chunk = remaining_text[:break_point].strip()
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
from abc import ABC, abstractmethod
|
|
2
2
|
from enum import Enum
|
|
3
|
-
from typing import List
|
|
3
|
+
from typing import List, Optional
|
|
4
4
|
|
|
5
5
|
from agno.knowledge.document.base import Document
|
|
6
6
|
|
|
@@ -60,7 +60,13 @@ class ChunkingStrategyFactory:
|
|
|
60
60
|
"""Factory for creating chunking strategy instances."""
|
|
61
61
|
|
|
62
62
|
@classmethod
|
|
63
|
-
def create_strategy(
|
|
63
|
+
def create_strategy(
|
|
64
|
+
cls,
|
|
65
|
+
strategy_type: ChunkingStrategyType,
|
|
66
|
+
chunk_size: Optional[int] = None,
|
|
67
|
+
overlap: Optional[int] = None,
|
|
68
|
+
**kwargs,
|
|
69
|
+
) -> ChunkingStrategy:
|
|
64
70
|
"""Create an instance of the chunking strategy with the given parameters."""
|
|
65
71
|
strategy_map = {
|
|
66
72
|
ChunkingStrategyType.AGENTIC_CHUNKER: cls._create_agentic_chunking,
|
|
@@ -71,51 +77,89 @@ class ChunkingStrategyFactory:
|
|
|
71
77
|
ChunkingStrategyType.ROW_CHUNKER: cls._create_row_chunking,
|
|
72
78
|
ChunkingStrategyType.MARKDOWN_CHUNKER: cls._create_markdown_chunking,
|
|
73
79
|
}
|
|
74
|
-
return strategy_map[strategy_type](**kwargs)
|
|
80
|
+
return strategy_map[strategy_type](chunk_size=chunk_size, overlap=overlap, **kwargs)
|
|
75
81
|
|
|
76
82
|
@classmethod
|
|
77
|
-
def _create_agentic_chunking(
|
|
83
|
+
def _create_agentic_chunking(
|
|
84
|
+
cls, chunk_size: Optional[int] = None, overlap: Optional[int] = None, **kwargs
|
|
85
|
+
) -> ChunkingStrategy:
|
|
78
86
|
from agno.knowledge.chunking.agentic import AgenticChunking
|
|
79
87
|
|
|
80
|
-
#
|
|
81
|
-
if
|
|
82
|
-
kwargs["max_chunk_size"] =
|
|
88
|
+
# AgenticChunking accepts max_chunk_size (not chunk_size) and no overlap
|
|
89
|
+
if chunk_size is not None:
|
|
90
|
+
kwargs["max_chunk_size"] = chunk_size
|
|
91
|
+
# Remove overlap since AgenticChunking doesn't support it
|
|
83
92
|
return AgenticChunking(**kwargs)
|
|
84
93
|
|
|
85
94
|
@classmethod
|
|
86
|
-
def _create_document_chunking(
|
|
95
|
+
def _create_document_chunking(
|
|
96
|
+
cls, chunk_size: Optional[int] = None, overlap: Optional[int] = None, **kwargs
|
|
97
|
+
) -> ChunkingStrategy:
|
|
87
98
|
from agno.knowledge.chunking.document import DocumentChunking
|
|
88
99
|
|
|
100
|
+
# DocumentChunking accepts both chunk_size and overlap
|
|
101
|
+
if chunk_size is not None:
|
|
102
|
+
kwargs["chunk_size"] = chunk_size
|
|
103
|
+
if overlap is not None:
|
|
104
|
+
kwargs["overlap"] = overlap
|
|
89
105
|
return DocumentChunking(**kwargs)
|
|
90
106
|
|
|
91
107
|
@classmethod
|
|
92
|
-
def _create_recursive_chunking(
|
|
108
|
+
def _create_recursive_chunking(
|
|
109
|
+
cls, chunk_size: Optional[int] = None, overlap: Optional[int] = None, **kwargs
|
|
110
|
+
) -> ChunkingStrategy:
|
|
93
111
|
from agno.knowledge.chunking.recursive import RecursiveChunking
|
|
94
112
|
|
|
113
|
+
# RecursiveChunking accepts both chunk_size and overlap
|
|
114
|
+
if chunk_size is not None:
|
|
115
|
+
kwargs["chunk_size"] = chunk_size
|
|
116
|
+
if overlap is not None:
|
|
117
|
+
kwargs["overlap"] = overlap
|
|
95
118
|
return RecursiveChunking(**kwargs)
|
|
96
119
|
|
|
97
120
|
@classmethod
|
|
98
|
-
def _create_semantic_chunking(
|
|
121
|
+
def _create_semantic_chunking(
|
|
122
|
+
cls, chunk_size: Optional[int] = None, overlap: Optional[int] = None, **kwargs
|
|
123
|
+
) -> ChunkingStrategy:
|
|
99
124
|
from agno.knowledge.chunking.semantic import SemanticChunking
|
|
100
125
|
|
|
126
|
+
# SemanticChunking accepts chunk_size but not overlap
|
|
127
|
+
if chunk_size is not None:
|
|
128
|
+
kwargs["chunk_size"] = chunk_size
|
|
129
|
+
# Remove overlap since SemanticChunking doesn't support it
|
|
101
130
|
return SemanticChunking(**kwargs)
|
|
102
131
|
|
|
103
132
|
@classmethod
|
|
104
|
-
def _create_fixed_chunking(
|
|
133
|
+
def _create_fixed_chunking(
|
|
134
|
+
cls, chunk_size: Optional[int] = None, overlap: Optional[int] = None, **kwargs
|
|
135
|
+
) -> ChunkingStrategy:
|
|
105
136
|
from agno.knowledge.chunking.fixed import FixedSizeChunking
|
|
106
137
|
|
|
138
|
+
# FixedSizeChunking accepts both chunk_size and overlap
|
|
139
|
+
if chunk_size is not None:
|
|
140
|
+
kwargs["chunk_size"] = chunk_size
|
|
141
|
+
if overlap is not None:
|
|
142
|
+
kwargs["overlap"] = overlap
|
|
107
143
|
return FixedSizeChunking(**kwargs)
|
|
108
144
|
|
|
109
145
|
@classmethod
|
|
110
|
-
def _create_row_chunking(
|
|
146
|
+
def _create_row_chunking(
|
|
147
|
+
cls, chunk_size: Optional[int] = None, overlap: Optional[int] = None, **kwargs
|
|
148
|
+
) -> ChunkingStrategy:
|
|
111
149
|
from agno.knowledge.chunking.row import RowChunking
|
|
112
150
|
|
|
113
|
-
#
|
|
114
|
-
kwargs.pop("chunk_size", None)
|
|
151
|
+
# RowChunking doesn't accept chunk_size or overlap, only skip_header and clean_rows
|
|
115
152
|
return RowChunking(**kwargs)
|
|
116
153
|
|
|
117
154
|
@classmethod
|
|
118
|
-
def _create_markdown_chunking(
|
|
155
|
+
def _create_markdown_chunking(
|
|
156
|
+
cls, chunk_size: Optional[int] = None, overlap: Optional[int] = None, **kwargs
|
|
157
|
+
) -> ChunkingStrategy:
|
|
119
158
|
from agno.knowledge.chunking.markdown import MarkdownChunking
|
|
120
159
|
|
|
160
|
+
# MarkdownChunking accepts both chunk_size and overlap
|
|
161
|
+
if chunk_size is not None:
|
|
162
|
+
kwargs["chunk_size"] = chunk_size
|
|
163
|
+
if overlap is not None:
|
|
164
|
+
kwargs["overlap"] = overlap
|
|
121
165
|
return MarkdownChunking(**kwargs)
|
|
@@ -25,10 +25,14 @@ class SentenceTransformerEmbedder(Embedder):
|
|
|
25
25
|
prompt: Optional[str] = None
|
|
26
26
|
normalize_embeddings: bool = False
|
|
27
27
|
|
|
28
|
-
def
|
|
29
|
-
|
|
28
|
+
def __post_init__(self):
|
|
29
|
+
# Initialize the SentenceTransformer model eagerly to avoid race conditions in async contexts
|
|
30
|
+
if self.sentence_transformer_client is None:
|
|
30
31
|
self.sentence_transformer_client = SentenceTransformer(model_name_or_path=self.id)
|
|
31
32
|
|
|
33
|
+
def get_embedding(self, text: Union[str, List[str]]) -> List[float]:
|
|
34
|
+
if self.sentence_transformer_client is None:
|
|
35
|
+
raise RuntimeError("SentenceTransformer model not initialized")
|
|
32
36
|
model = self.sentence_transformer_client
|
|
33
37
|
embedding = model.encode(text, prompt=self.prompt, normalize_embeddings=self.normalize_embeddings)
|
|
34
38
|
try:
|
agno/knowledge/knowledge.py
CHANGED
|
@@ -13,6 +13,7 @@ from httpx import AsyncClient
|
|
|
13
13
|
|
|
14
14
|
from agno.db.base import AsyncBaseDb, BaseDb
|
|
15
15
|
from agno.db.schemas.knowledge import KnowledgeRow
|
|
16
|
+
from agno.filters import FilterExpr
|
|
16
17
|
from agno.knowledge.content import Content, ContentAuth, ContentStatus, FileData
|
|
17
18
|
from agno.knowledge.document import Document
|
|
18
19
|
from agno.knowledge.reader import Reader, ReaderFactory
|
|
@@ -403,7 +404,7 @@ class Knowledge:
|
|
|
403
404
|
|
|
404
405
|
if path.is_file():
|
|
405
406
|
if self._should_include_file(str(path), include, exclude):
|
|
406
|
-
|
|
407
|
+
log_debug(f"Adding file {path} due to include/exclude filters")
|
|
407
408
|
|
|
408
409
|
await self._add_to_contents_db(content)
|
|
409
410
|
if self._should_skip(content.content_hash, skip_if_exists): # type: ignore[arg-type]
|
|
@@ -1392,7 +1393,7 @@ class Knowledge:
|
|
|
1392
1393
|
self,
|
|
1393
1394
|
query: str,
|
|
1394
1395
|
max_results: Optional[int] = None,
|
|
1395
|
-
filters: Optional[Dict[str, Any]] = None,
|
|
1396
|
+
filters: Optional[Union[Dict[str, Any], List[FilterExpr]]] = None,
|
|
1396
1397
|
search_type: Optional[str] = None,
|
|
1397
1398
|
) -> List[Document]:
|
|
1398
1399
|
"""Returns relevant documents matching a query"""
|
|
@@ -1423,7 +1424,7 @@ class Knowledge:
|
|
|
1423
1424
|
self,
|
|
1424
1425
|
query: str,
|
|
1425
1426
|
max_results: Optional[int] = None,
|
|
1426
|
-
filters: Optional[Dict[str, Any]] = None,
|
|
1427
|
+
filters: Optional[Union[Dict[str, Any], List[FilterExpr]]] = None,
|
|
1427
1428
|
search_type: Optional[str] = None,
|
|
1428
1429
|
) -> List[Document]:
|
|
1429
1430
|
"""Returns relevant documents matching a query"""
|
|
@@ -1465,38 +1466,58 @@ class Knowledge:
|
|
|
1465
1466
|
self.valid_metadata_filters.update(await self._aget_filters_from_db())
|
|
1466
1467
|
return self.valid_metadata_filters
|
|
1467
1468
|
|
|
1468
|
-
def _validate_filters(self, filters: Optional[Dict[str, Any]]) -> Tuple[
|
|
1469
|
+
def _validate_filters(self, filters: Optional[Union[Dict[str, Any], List[FilterExpr]]]) -> Tuple[Any, List[str]]:
|
|
1470
|
+
"""Internal method to validate filters against known metadata keys."""
|
|
1469
1471
|
if not filters:
|
|
1470
|
-
return
|
|
1472
|
+
return None, []
|
|
1471
1473
|
|
|
1472
|
-
valid_filters: Dict[str, Any] =
|
|
1474
|
+
valid_filters: Optional[Dict[str, Any]] = None
|
|
1473
1475
|
invalid_keys = []
|
|
1474
1476
|
|
|
1475
|
-
|
|
1476
|
-
|
|
1477
|
-
|
|
1478
|
-
|
|
1479
|
-
|
|
1480
|
-
|
|
1481
|
-
|
|
1482
|
-
|
|
1483
|
-
|
|
1484
|
-
|
|
1485
|
-
|
|
1486
|
-
|
|
1487
|
-
|
|
1488
|
-
|
|
1477
|
+
if isinstance(filters, dict):
|
|
1478
|
+
# If no metadata filters tracked yet, all keys are considered invalid
|
|
1479
|
+
if self.valid_metadata_filters is None:
|
|
1480
|
+
invalid_keys = list(filters.keys())
|
|
1481
|
+
log_debug(f"No valid metadata filters tracked yet. All filter keys considered invalid: {invalid_keys}")
|
|
1482
|
+
return None, invalid_keys
|
|
1483
|
+
|
|
1484
|
+
valid_filters = {}
|
|
1485
|
+
for key, value in filters.items():
|
|
1486
|
+
# Handle both normal keys and prefixed keys like meta_data.key
|
|
1487
|
+
base_key = key.split(".")[-1] if "." in key else key
|
|
1488
|
+
if base_key in self.valid_metadata_filters or key in self.valid_metadata_filters:
|
|
1489
|
+
valid_filters[key] = value
|
|
1490
|
+
else:
|
|
1491
|
+
invalid_keys.append(key)
|
|
1492
|
+
log_debug(f"Invalid filter key: {key} - not present in knowledge base")
|
|
1493
|
+
|
|
1494
|
+
elif isinstance(filters, List):
|
|
1495
|
+
# Validate that list contains FilterExpr instances
|
|
1496
|
+
for i, filter_item in enumerate(filters):
|
|
1497
|
+
if not isinstance(filter_item, FilterExpr):
|
|
1498
|
+
log_warning(
|
|
1499
|
+
f"Invalid filter at index {i}: expected FilterExpr instance, "
|
|
1500
|
+
f"got {type(filter_item).__name__}. "
|
|
1501
|
+
f"Use filter expressions like EQ('key', 'value'), IN('key', [values]), "
|
|
1502
|
+
f"AND(...), OR(...), NOT(...) from agno.filters"
|
|
1503
|
+
)
|
|
1504
|
+
|
|
1505
|
+
# Filter expressions are already validated, return empty dict/list
|
|
1506
|
+
# The actual filtering happens in the vector_db layer
|
|
1507
|
+
return filters, []
|
|
1489
1508
|
|
|
1490
1509
|
return valid_filters, invalid_keys
|
|
1491
1510
|
|
|
1492
|
-
def validate_filters(self, filters: Optional[Dict[str, Any]]) -> Tuple[
|
|
1511
|
+
def validate_filters(self, filters: Optional[Union[Dict[str, Any], List[FilterExpr]]]) -> Tuple[Any, List[str]]:
|
|
1493
1512
|
if self.valid_metadata_filters is None:
|
|
1494
1513
|
self.valid_metadata_filters = set()
|
|
1495
1514
|
self.valid_metadata_filters.update(self._get_filters_from_db())
|
|
1496
1515
|
|
|
1497
1516
|
return self._validate_filters(filters)
|
|
1498
1517
|
|
|
1499
|
-
async def async_validate_filters(
|
|
1518
|
+
async def async_validate_filters(
|
|
1519
|
+
self, filters: Optional[Union[Dict[str, Any], List[FilterExpr]]]
|
|
1520
|
+
) -> Tuple[Any, List[str]]:
|
|
1500
1521
|
if self.valid_metadata_filters is None:
|
|
1501
1522
|
self.valid_metadata_filters = set()
|
|
1502
1523
|
self.valid_metadata_filters.update(await self._aget_filters_from_db())
|
agno/knowledge/reader/base.py
CHANGED
|
@@ -44,11 +44,15 @@ class Reader:
|
|
|
44
44
|
self.max_results = max_results
|
|
45
45
|
self.encoding = encoding
|
|
46
46
|
|
|
47
|
-
def set_chunking_strategy_from_string(
|
|
47
|
+
def set_chunking_strategy_from_string(
|
|
48
|
+
self, strategy_name: str, chunk_size: Optional[int] = None, overlap: Optional[int] = None, **kwargs
|
|
49
|
+
) -> None:
|
|
48
50
|
"""Set the chunking strategy from a string name."""
|
|
49
51
|
try:
|
|
50
52
|
strategy_type = ChunkingStrategyType.from_string(strategy_name)
|
|
51
|
-
self.chunking_strategy = ChunkingStrategyFactory.create_strategy(
|
|
53
|
+
self.chunking_strategy = ChunkingStrategyFactory.create_strategy(
|
|
54
|
+
strategy_type, chunk_size=chunk_size, overlap=overlap, **kwargs
|
|
55
|
+
)
|
|
52
56
|
except ValueError as e:
|
|
53
57
|
raise ValueError(f"Failed to set chunking strategy: {e}")
|
|
54
58
|
|