agno 2.3.22__py3-none-any.whl → 2.3.24__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agno/agent/agent.py +28 -1
- agno/agent/remote.py +1 -1
- agno/db/mongo/mongo.py +9 -1
- agno/db/mysql/async_mysql.py +5 -7
- agno/db/mysql/mysql.py +5 -7
- agno/db/mysql/schemas.py +39 -21
- agno/db/postgres/async_postgres.py +10 -2
- agno/db/postgres/postgres.py +5 -7
- agno/db/postgres/schemas.py +39 -21
- agno/db/singlestore/schemas.py +41 -21
- agno/db/singlestore/singlestore.py +14 -3
- agno/db/sqlite/async_sqlite.py +7 -2
- agno/db/sqlite/schemas.py +36 -21
- agno/db/sqlite/sqlite.py +3 -7
- agno/knowledge/chunking/markdown.py +94 -8
- agno/knowledge/chunking/semantic.py +2 -2
- agno/knowledge/knowledge.py +215 -207
- agno/models/base.py +32 -8
- agno/models/google/gemini.py +27 -4
- agno/os/routers/agents/router.py +1 -1
- agno/os/routers/evals/evals.py +2 -2
- agno/os/routers/knowledge/knowledge.py +21 -5
- agno/os/routers/knowledge/schemas.py +1 -1
- agno/os/routers/memory/memory.py +4 -4
- agno/os/routers/session/session.py +2 -2
- agno/os/routers/teams/router.py +2 -2
- agno/os/routers/traces/traces.py +3 -3
- agno/os/routers/workflows/router.py +1 -1
- agno/os/schema.py +1 -1
- agno/os/utils.py +1 -1
- agno/remote/base.py +1 -1
- agno/team/remote.py +1 -1
- agno/team/team.py +24 -4
- agno/tools/brandfetch.py +27 -18
- agno/tools/browserbase.py +150 -13
- agno/tools/crawl4ai.py +3 -0
- agno/tools/file.py +14 -13
- agno/tools/function.py +15 -2
- agno/tools/mcp/mcp.py +1 -0
- agno/tools/mlx_transcribe.py +10 -7
- agno/tools/python.py +14 -6
- agno/tools/toolkit.py +122 -23
- agno/vectordb/cassandra/cassandra.py +1 -1
- agno/vectordb/chroma/chromadb.py +1 -1
- agno/vectordb/clickhouse/clickhousedb.py +1 -1
- agno/vectordb/couchbase/couchbase.py +1 -1
- agno/vectordb/milvus/milvus.py +1 -1
- agno/vectordb/mongodb/mongodb.py +13 -3
- agno/vectordb/pgvector/pgvector.py +1 -1
- agno/vectordb/pineconedb/pineconedb.py +2 -2
- agno/vectordb/qdrant/qdrant.py +1 -1
- agno/vectordb/redis/redisdb.py +2 -2
- agno/vectordb/singlestore/singlestore.py +1 -1
- agno/vectordb/surrealdb/surrealdb.py +2 -2
- agno/vectordb/weaviate/weaviate.py +1 -1
- agno/workflow/remote.py +1 -1
- agno/workflow/workflow.py +14 -0
- {agno-2.3.22.dist-info → agno-2.3.24.dist-info}/METADATA +1 -1
- {agno-2.3.22.dist-info → agno-2.3.24.dist-info}/RECORD +62 -62
- {agno-2.3.22.dist-info → agno-2.3.24.dist-info}/WHEEL +0 -0
- {agno-2.3.22.dist-info → agno-2.3.24.dist-info}/licenses/LICENSE +0 -0
- {agno-2.3.22.dist-info → agno-2.3.24.dist-info}/top_level.txt +0 -0
agno/knowledge/knowledge.py
CHANGED
|
@@ -34,7 +34,14 @@ class KnowledgeContentOrigin(Enum):
|
|
|
34
34
|
|
|
35
35
|
@dataclass
|
|
36
36
|
class Knowledge:
|
|
37
|
-
"""Knowledge class
|
|
37
|
+
"""Knowledge class
|
|
38
|
+
|
|
39
|
+
Args:
|
|
40
|
+
vector_db: Vector database for storing and searching embeddings (required)
|
|
41
|
+
contents_db: Optional contents database for metadata tracking and filter validation.
|
|
42
|
+
When configured, enables validation of agentic filter keys.
|
|
43
|
+
When not configured, filters are passed directly to vector_db.
|
|
44
|
+
"""
|
|
38
45
|
|
|
39
46
|
name: Optional[str] = None
|
|
40
47
|
description: Optional[str] = None
|
|
@@ -2027,13 +2034,7 @@ class Knowledge:
|
|
|
2027
2034
|
self.vector_db.update_metadata(content_id=content.id, metadata=content.metadata or {})
|
|
2028
2035
|
|
|
2029
2036
|
return content_row.to_dict()
|
|
2030
|
-
|
|
2031
|
-
else:
|
|
2032
|
-
if self.name:
|
|
2033
|
-
log_warning(f"Contents DB not found for knowledge base: {self.name}")
|
|
2034
|
-
else:
|
|
2035
|
-
log_warning("Contents DB not found for knowledge base")
|
|
2036
|
-
return None
|
|
2037
|
+
return None
|
|
2037
2038
|
|
|
2038
2039
|
async def _aupdate_content(self, content: Content) -> Optional[Dict[str, Any]]:
|
|
2039
2040
|
if self.contents_db:
|
|
@@ -2073,91 +2074,145 @@ class Knowledge:
|
|
|
2073
2074
|
self.vector_db.update_metadata(content_id=content.id, metadata=content.metadata or {})
|
|
2074
2075
|
|
|
2075
2076
|
return content_row.to_dict()
|
|
2077
|
+
return None
|
|
2076
2078
|
|
|
2077
|
-
|
|
2078
|
-
|
|
2079
|
-
|
|
2080
|
-
|
|
2081
|
-
|
|
2079
|
+
def _prepare_lightrag_path_data(self, content: Content) -> Optional[Tuple[bytes, str, str]]:
|
|
2080
|
+
"""Prepare file data from a path for LightRAG upload.
|
|
2081
|
+
|
|
2082
|
+
Returns:
|
|
2083
|
+
Tuple of (file_content, filename, file_type) or None if preparation fails.
|
|
2084
|
+
"""
|
|
2085
|
+
if content.file_data is None:
|
|
2086
|
+
log_warning("No file data provided")
|
|
2087
|
+
|
|
2088
|
+
if content.path is None:
|
|
2089
|
+
log_error("No path provided for content")
|
|
2090
|
+
return None
|
|
2091
|
+
|
|
2092
|
+
path = Path(content.path)
|
|
2093
|
+
log_info(f"Uploading file to LightRAG from path: {path}")
|
|
2094
|
+
|
|
2095
|
+
with open(path, "rb") as f:
|
|
2096
|
+
file_content = f.read()
|
|
2097
|
+
|
|
2098
|
+
file_type = content.file_type or path.suffix
|
|
2099
|
+
return (file_content, path.name, file_type)
|
|
2100
|
+
|
|
2101
|
+
def _prepare_lightrag_url_data(self, content: Content) -> Optional[Tuple[str, str]]:
|
|
2102
|
+
"""Prepare text data from a URL for LightRAG upload.
|
|
2103
|
+
|
|
2104
|
+
Returns:
|
|
2105
|
+
Tuple of (file_source, text) or None if preparation fails.
|
|
2106
|
+
"""
|
|
2107
|
+
log_info(f"Uploading file to LightRAG from URL: {content.url}")
|
|
2108
|
+
|
|
2109
|
+
reader = content.reader or self.website_reader
|
|
2110
|
+
if reader is None:
|
|
2111
|
+
log_error("No URL reader available")
|
|
2112
|
+
return None
|
|
2113
|
+
|
|
2114
|
+
reader.chunk = False
|
|
2115
|
+
read_documents = reader.read(content.url, name=content.name)
|
|
2116
|
+
if not content.id:
|
|
2117
|
+
content.id = generate_id(content.content_hash or "")
|
|
2118
|
+
self._prepare_documents_for_insert(read_documents, content.id)
|
|
2119
|
+
|
|
2120
|
+
if not read_documents:
|
|
2121
|
+
log_error("No documents read from URL")
|
|
2082
2122
|
return None
|
|
2083
2123
|
|
|
2124
|
+
return (content.url or "", read_documents[0].content)
|
|
2125
|
+
|
|
2126
|
+
def _prepare_lightrag_topic_data(self, content: Content) -> Optional[Tuple[str, str]]:
|
|
2127
|
+
"""Prepare text data from topics for LightRAG upload.
|
|
2128
|
+
|
|
2129
|
+
Returns:
|
|
2130
|
+
Tuple of (file_source, text) or None if preparation fails.
|
|
2131
|
+
"""
|
|
2132
|
+
log_info(f"Uploading file to LightRAG: {content.name}")
|
|
2133
|
+
|
|
2134
|
+
if content.reader is None:
|
|
2135
|
+
log_error("No reader available for topic content")
|
|
2136
|
+
return None
|
|
2137
|
+
|
|
2138
|
+
if not content.topics:
|
|
2139
|
+
log_error("No topics available for content")
|
|
2140
|
+
return None
|
|
2141
|
+
|
|
2142
|
+
read_documents = content.reader.read(content.topics)
|
|
2143
|
+
if not read_documents:
|
|
2144
|
+
log_warning(f"No documents found for LightRAG upload: {content.name}")
|
|
2145
|
+
return None
|
|
2146
|
+
|
|
2147
|
+
return (content.topics[0], read_documents[0].content)
|
|
2148
|
+
|
|
2149
|
+
def _prepare_lightrag_file_data(self, content: Content) -> Optional[Tuple[Union[str, bytes], str, Optional[str]]]:
|
|
2150
|
+
"""Prepare file data from file_data content for LightRAG upload.
|
|
2151
|
+
|
|
2152
|
+
Returns:
|
|
2153
|
+
Tuple of (file_content, filename, content_type) or None if preparation fails.
|
|
2154
|
+
"""
|
|
2155
|
+
filename = content.file_data.filename if content.file_data and content.file_data.filename else "uploaded_file"
|
|
2156
|
+
log_info(f"Uploading file to LightRAG: {filename}")
|
|
2157
|
+
|
|
2158
|
+
if not (content.file_data and content.file_data.content):
|
|
2159
|
+
log_warning(f"No file data available for LightRAG upload: {content.name}")
|
|
2160
|
+
return None
|
|
2161
|
+
|
|
2162
|
+
return (content.file_data.content, filename, content.file_data.type)
|
|
2163
|
+
|
|
2084
2164
|
async def _process_lightrag_content_async(self, content: Content, content_type: KnowledgeContentOrigin) -> None:
|
|
2085
2165
|
from agno.vectordb import VectorDb
|
|
2086
2166
|
|
|
2087
2167
|
self.vector_db = cast(VectorDb, self.vector_db)
|
|
2088
2168
|
|
|
2089
2169
|
await self._add_to_contents_db_async(content)
|
|
2090
|
-
if content_type == KnowledgeContentOrigin.PATH:
|
|
2091
|
-
if content.file_data is None:
|
|
2092
|
-
log_warning("No file data provided")
|
|
2093
|
-
|
|
2094
|
-
if content.path is None:
|
|
2095
|
-
log_error("No path provided for content")
|
|
2096
|
-
return
|
|
2097
2170
|
|
|
2098
|
-
|
|
2099
|
-
|
|
2100
|
-
log_info(f"Uploading file to LightRAG from path: {path}")
|
|
2171
|
+
if content_type == KnowledgeContentOrigin.PATH:
|
|
2101
2172
|
try:
|
|
2102
|
-
|
|
2103
|
-
|
|
2104
|
-
|
|
2173
|
+
path_data = self._prepare_lightrag_path_data(content)
|
|
2174
|
+
if path_data is None:
|
|
2175
|
+
content.status = ContentStatus.FAILED
|
|
2176
|
+
await self._aupdate_content(content)
|
|
2177
|
+
return
|
|
2105
2178
|
|
|
2106
|
-
|
|
2107
|
-
file_type = content.file_type or path.suffix
|
|
2179
|
+
file_content, filename, file_type = path_data
|
|
2108
2180
|
|
|
2109
2181
|
if self.vector_db and hasattr(self.vector_db, "insert_file_bytes"):
|
|
2110
2182
|
result = await self.vector_db.insert_file_bytes(
|
|
2111
2183
|
file_content=file_content,
|
|
2112
|
-
filename=
|
|
2184
|
+
filename=filename,
|
|
2113
2185
|
content_type=file_type,
|
|
2114
|
-
send_metadata=True,
|
|
2186
|
+
send_metadata=True,
|
|
2115
2187
|
)
|
|
2116
|
-
|
|
2117
2188
|
else:
|
|
2118
2189
|
log_error("Vector database does not support file insertion")
|
|
2119
2190
|
content.status = ContentStatus.FAILED
|
|
2120
2191
|
await self._aupdate_content(content)
|
|
2121
2192
|
return
|
|
2193
|
+
|
|
2122
2194
|
content.external_id = result
|
|
2123
2195
|
content.status = ContentStatus.COMPLETED
|
|
2124
2196
|
await self._aupdate_content(content)
|
|
2125
|
-
return
|
|
2126
2197
|
|
|
2127
2198
|
except Exception as e:
|
|
2128
2199
|
log_error(f"Error uploading file to LightRAG: {e}")
|
|
2129
2200
|
content.status = ContentStatus.FAILED
|
|
2130
2201
|
content.status_message = f"Could not upload to LightRAG: {str(e)}"
|
|
2131
2202
|
await self._aupdate_content(content)
|
|
2132
|
-
return
|
|
2133
2203
|
|
|
2134
2204
|
elif content_type == KnowledgeContentOrigin.URL:
|
|
2135
|
-
log_info(f"Uploading file to LightRAG from URL: {content.url}")
|
|
2136
2205
|
try:
|
|
2137
|
-
|
|
2138
|
-
if
|
|
2139
|
-
log_error("No URL reader available")
|
|
2206
|
+
url_data = self._prepare_lightrag_url_data(content)
|
|
2207
|
+
if url_data is None:
|
|
2140
2208
|
content.status = ContentStatus.FAILED
|
|
2141
2209
|
await self._aupdate_content(content)
|
|
2142
2210
|
return
|
|
2143
2211
|
|
|
2144
|
-
|
|
2145
|
-
read_documents = reader.read(content.url, name=content.name)
|
|
2146
|
-
if not content.id:
|
|
2147
|
-
content.id = generate_id(content.content_hash or "")
|
|
2148
|
-
self._prepare_documents_for_insert(read_documents, content.id)
|
|
2149
|
-
|
|
2150
|
-
if not read_documents:
|
|
2151
|
-
log_error("No documents read from URL")
|
|
2152
|
-
content.status = ContentStatus.FAILED
|
|
2153
|
-
await self._aupdate_content(content)
|
|
2154
|
-
return
|
|
2212
|
+
file_source, text = url_data
|
|
2155
2213
|
|
|
2156
2214
|
if self.vector_db and hasattr(self.vector_db, "insert_text"):
|
|
2157
|
-
result = await self.vector_db.insert_text(
|
|
2158
|
-
file_source=content.url,
|
|
2159
|
-
text=read_documents[0].content,
|
|
2160
|
-
)
|
|
2215
|
+
result = await self.vector_db.insert_text(file_source=file_source, text=text)
|
|
2161
2216
|
else:
|
|
2162
2217
|
log_error("Vector database does not support text insertion")
|
|
2163
2218
|
content.status = ContentStatus.FAILED
|
|
@@ -2167,77 +2222,58 @@ class Knowledge:
|
|
|
2167
2222
|
content.external_id = result
|
|
2168
2223
|
content.status = ContentStatus.COMPLETED
|
|
2169
2224
|
await self._aupdate_content(content)
|
|
2170
|
-
return
|
|
2171
2225
|
|
|
2172
2226
|
except Exception as e:
|
|
2173
2227
|
log_error(f"Error uploading file to LightRAG: {e}")
|
|
2174
2228
|
content.status = ContentStatus.FAILED
|
|
2175
2229
|
content.status_message = f"Could not upload to LightRAG: {str(e)}"
|
|
2176
2230
|
await self._aupdate_content(content)
|
|
2177
|
-
return
|
|
2178
2231
|
|
|
2179
2232
|
elif content_type == KnowledgeContentOrigin.CONTENT:
|
|
2180
|
-
|
|
2181
|
-
|
|
2182
|
-
|
|
2183
|
-
log_info(f"Uploading file to LightRAG: {filename}")
|
|
2184
|
-
|
|
2185
|
-
# Use the content from file_data
|
|
2186
|
-
if content.file_data and content.file_data.content:
|
|
2187
|
-
if self.vector_db and hasattr(self.vector_db, "insert_file_bytes"):
|
|
2188
|
-
result = await self.vector_db.insert_file_bytes(
|
|
2189
|
-
file_content=content.file_data.content,
|
|
2190
|
-
filename=filename,
|
|
2191
|
-
content_type=content.file_data.type,
|
|
2192
|
-
send_metadata=True, # Enable metadata so server knows the file type
|
|
2193
|
-
)
|
|
2194
|
-
else:
|
|
2195
|
-
log_error("Vector database does not support file insertion")
|
|
2196
|
-
content.status = ContentStatus.FAILED
|
|
2197
|
-
await self._aupdate_content(content)
|
|
2198
|
-
return
|
|
2199
|
-
content.external_id = result
|
|
2200
|
-
content.status = ContentStatus.COMPLETED
|
|
2201
|
-
await self._aupdate_content(content)
|
|
2202
|
-
else:
|
|
2203
|
-
log_warning(f"No file data available for LightRAG upload: {content.name}")
|
|
2204
|
-
return
|
|
2233
|
+
file_data = self._prepare_lightrag_file_data(content)
|
|
2234
|
+
if file_data is None:
|
|
2235
|
+
return
|
|
2205
2236
|
|
|
2206
|
-
|
|
2207
|
-
log_info(f"Uploading file to LightRAG: {content.name}")
|
|
2237
|
+
file_content_data, filename_data, content_type_str = file_data
|
|
2208
2238
|
|
|
2209
|
-
if
|
|
2210
|
-
|
|
2239
|
+
if self.vector_db and hasattr(self.vector_db, "insert_file_bytes"):
|
|
2240
|
+
result = await self.vector_db.insert_file_bytes(
|
|
2241
|
+
file_content=file_content_data,
|
|
2242
|
+
filename=filename_data,
|
|
2243
|
+
content_type=content_type_str,
|
|
2244
|
+
send_metadata=True,
|
|
2245
|
+
)
|
|
2246
|
+
else:
|
|
2247
|
+
log_error("Vector database does not support file insertion")
|
|
2211
2248
|
content.status = ContentStatus.FAILED
|
|
2212
2249
|
await self._aupdate_content(content)
|
|
2213
2250
|
return
|
|
2214
2251
|
|
|
2215
|
-
|
|
2216
|
-
|
|
2252
|
+
content.external_id = result
|
|
2253
|
+
content.status = ContentStatus.COMPLETED
|
|
2254
|
+
await self._aupdate_content(content)
|
|
2255
|
+
|
|
2256
|
+
elif content_type == KnowledgeContentOrigin.TOPIC:
|
|
2257
|
+
topic_data = self._prepare_lightrag_topic_data(content)
|
|
2258
|
+
if topic_data is None:
|
|
2217
2259
|
content.status = ContentStatus.FAILED
|
|
2218
2260
|
await self._aupdate_content(content)
|
|
2219
2261
|
return
|
|
2220
2262
|
|
|
2221
|
-
|
|
2222
|
-
|
|
2223
|
-
|
|
2224
|
-
|
|
2225
|
-
file_source=content.topics[0],
|
|
2226
|
-
text=read_documents[0].content,
|
|
2227
|
-
)
|
|
2228
|
-
else:
|
|
2229
|
-
log_error("Vector database does not support text insertion")
|
|
2230
|
-
content.status = ContentStatus.FAILED
|
|
2231
|
-
await self._aupdate_content(content)
|
|
2232
|
-
return
|
|
2233
|
-
content.external_id = result
|
|
2234
|
-
content.status = ContentStatus.COMPLETED
|
|
2235
|
-
await self._aupdate_content(content)
|
|
2236
|
-
return
|
|
2263
|
+
file_source, text = topic_data
|
|
2264
|
+
|
|
2265
|
+
if self.vector_db and hasattr(self.vector_db, "insert_text"):
|
|
2266
|
+
result = await self.vector_db.insert_text(file_source=file_source, text=text)
|
|
2237
2267
|
else:
|
|
2238
|
-
|
|
2268
|
+
log_error("Vector database does not support text insertion")
|
|
2269
|
+
content.status = ContentStatus.FAILED
|
|
2270
|
+
await self._aupdate_content(content)
|
|
2239
2271
|
return
|
|
2240
2272
|
|
|
2273
|
+
content.external_id = result
|
|
2274
|
+
content.status = ContentStatus.COMPLETED
|
|
2275
|
+
await self._aupdate_content(content)
|
|
2276
|
+
|
|
2241
2277
|
def _process_lightrag_content(self, content: Content, content_type: KnowledgeContentOrigin) -> None:
|
|
2242
2278
|
"""Synchronously process LightRAG content. Uses asyncio.run() only for LightRAG-specific async methods."""
|
|
2243
2279
|
from agno.vectordb import VectorDb
|
|
@@ -2245,31 +2281,22 @@ class Knowledge:
|
|
|
2245
2281
|
self.vector_db = cast(VectorDb, self.vector_db)
|
|
2246
2282
|
|
|
2247
2283
|
self._add_to_contents_db(content)
|
|
2248
|
-
if content_type == KnowledgeContentOrigin.PATH:
|
|
2249
|
-
if content.file_data is None:
|
|
2250
|
-
log_warning("No file data provided")
|
|
2251
2284
|
|
|
2252
|
-
|
|
2253
|
-
log_error("No path provided for content")
|
|
2254
|
-
return
|
|
2255
|
-
|
|
2256
|
-
path = Path(content.path)
|
|
2257
|
-
|
|
2258
|
-
log_info(f"Uploading file to LightRAG from path: {path}")
|
|
2285
|
+
if content_type == KnowledgeContentOrigin.PATH:
|
|
2259
2286
|
try:
|
|
2260
|
-
|
|
2261
|
-
|
|
2262
|
-
|
|
2287
|
+
path_data = self._prepare_lightrag_path_data(content)
|
|
2288
|
+
if path_data is None:
|
|
2289
|
+
content.status = ContentStatus.FAILED
|
|
2290
|
+
self._update_content(content)
|
|
2291
|
+
return
|
|
2263
2292
|
|
|
2264
|
-
|
|
2265
|
-
file_type = content.file_type or path.suffix
|
|
2293
|
+
file_content, filename, file_type = path_data
|
|
2266
2294
|
|
|
2267
2295
|
if self.vector_db and hasattr(self.vector_db, "insert_file_bytes"):
|
|
2268
|
-
# LightRAG only has async methods, use asyncio.run() here
|
|
2269
2296
|
result = asyncio.run(
|
|
2270
2297
|
self.vector_db.insert_file_bytes(
|
|
2271
2298
|
file_content=file_content,
|
|
2272
|
-
filename=
|
|
2299
|
+
filename=filename,
|
|
2273
2300
|
content_type=file_type,
|
|
2274
2301
|
send_metadata=True,
|
|
2275
2302
|
)
|
|
@@ -2279,48 +2306,29 @@ class Knowledge:
|
|
|
2279
2306
|
content.status = ContentStatus.FAILED
|
|
2280
2307
|
self._update_content(content)
|
|
2281
2308
|
return
|
|
2309
|
+
|
|
2282
2310
|
content.external_id = result
|
|
2283
2311
|
content.status = ContentStatus.COMPLETED
|
|
2284
2312
|
self._update_content(content)
|
|
2285
|
-
return
|
|
2286
2313
|
|
|
2287
2314
|
except Exception as e:
|
|
2288
2315
|
log_error(f"Error uploading file to LightRAG: {e}")
|
|
2289
2316
|
content.status = ContentStatus.FAILED
|
|
2290
2317
|
content.status_message = f"Could not upload to LightRAG: {str(e)}"
|
|
2291
2318
|
self._update_content(content)
|
|
2292
|
-
return
|
|
2293
2319
|
|
|
2294
2320
|
elif content_type == KnowledgeContentOrigin.URL:
|
|
2295
|
-
log_info(f"Uploading file to LightRAG from URL: {content.url}")
|
|
2296
2321
|
try:
|
|
2297
|
-
|
|
2298
|
-
if
|
|
2299
|
-
log_error("No URL reader available")
|
|
2322
|
+
url_data = self._prepare_lightrag_url_data(content)
|
|
2323
|
+
if url_data is None:
|
|
2300
2324
|
content.status = ContentStatus.FAILED
|
|
2301
2325
|
self._update_content(content)
|
|
2302
2326
|
return
|
|
2303
2327
|
|
|
2304
|
-
|
|
2305
|
-
read_documents = reader.read(content.url, name=content.name)
|
|
2306
|
-
if not content.id:
|
|
2307
|
-
content.id = generate_id(content.content_hash or "")
|
|
2308
|
-
self._prepare_documents_for_insert(read_documents, content.id)
|
|
2309
|
-
|
|
2310
|
-
if not read_documents:
|
|
2311
|
-
log_error("No documents read from URL")
|
|
2312
|
-
content.status = ContentStatus.FAILED
|
|
2313
|
-
self._update_content(content)
|
|
2314
|
-
return
|
|
2328
|
+
file_source, text = url_data
|
|
2315
2329
|
|
|
2316
2330
|
if self.vector_db and hasattr(self.vector_db, "insert_text"):
|
|
2317
|
-
|
|
2318
|
-
result = asyncio.run(
|
|
2319
|
-
self.vector_db.insert_text(
|
|
2320
|
-
file_source=content.url,
|
|
2321
|
-
text=read_documents[0].content,
|
|
2322
|
-
)
|
|
2323
|
-
)
|
|
2331
|
+
result = asyncio.run(self.vector_db.insert_text(file_source=file_source, text=text))
|
|
2324
2332
|
else:
|
|
2325
2333
|
log_error("Vector database does not support text insertion")
|
|
2326
2334
|
content.status = ContentStatus.FAILED
|
|
@@ -2330,83 +2338,60 @@ class Knowledge:
|
|
|
2330
2338
|
content.external_id = result
|
|
2331
2339
|
content.status = ContentStatus.COMPLETED
|
|
2332
2340
|
self._update_content(content)
|
|
2333
|
-
return
|
|
2334
2341
|
|
|
2335
2342
|
except Exception as e:
|
|
2336
2343
|
log_error(f"Error uploading file to LightRAG: {e}")
|
|
2337
2344
|
content.status = ContentStatus.FAILED
|
|
2338
2345
|
content.status_message = f"Could not upload to LightRAG: {str(e)}"
|
|
2339
2346
|
self._update_content(content)
|
|
2340
|
-
return
|
|
2341
2347
|
|
|
2342
2348
|
elif content_type == KnowledgeContentOrigin.CONTENT:
|
|
2343
|
-
|
|
2344
|
-
|
|
2345
|
-
|
|
2346
|
-
log_info(f"Uploading file to LightRAG: {filename}")
|
|
2349
|
+
file_data = self._prepare_lightrag_file_data(content)
|
|
2350
|
+
if file_data is None:
|
|
2351
|
+
return
|
|
2347
2352
|
|
|
2348
|
-
|
|
2349
|
-
|
|
2350
|
-
|
|
2351
|
-
|
|
2352
|
-
|
|
2353
|
-
|
|
2354
|
-
|
|
2355
|
-
|
|
2356
|
-
|
|
2357
|
-
send_metadata=True,
|
|
2358
|
-
)
|
|
2353
|
+
file_content_data, filename_data, content_type_str = file_data
|
|
2354
|
+
|
|
2355
|
+
if self.vector_db and hasattr(self.vector_db, "insert_file_bytes"):
|
|
2356
|
+
result = asyncio.run(
|
|
2357
|
+
self.vector_db.insert_file_bytes(
|
|
2358
|
+
file_content=file_content_data,
|
|
2359
|
+
filename=filename_data,
|
|
2360
|
+
content_type=content_type_str,
|
|
2361
|
+
send_metadata=True,
|
|
2359
2362
|
)
|
|
2360
|
-
|
|
2361
|
-
log_error("Vector database does not support file insertion")
|
|
2362
|
-
content.status = ContentStatus.FAILED
|
|
2363
|
-
self._update_content(content)
|
|
2364
|
-
return
|
|
2365
|
-
content.external_id = result
|
|
2366
|
-
content.status = ContentStatus.COMPLETED
|
|
2367
|
-
self._update_content(content)
|
|
2363
|
+
)
|
|
2368
2364
|
else:
|
|
2369
|
-
|
|
2370
|
-
return
|
|
2371
|
-
|
|
2372
|
-
elif content_type == KnowledgeContentOrigin.TOPIC:
|
|
2373
|
-
log_info(f"Uploading file to LightRAG: {content.name}")
|
|
2374
|
-
|
|
2375
|
-
if content.reader is None:
|
|
2376
|
-
log_error("No reader available for topic content")
|
|
2365
|
+
log_error("Vector database does not support file insertion")
|
|
2377
2366
|
content.status = ContentStatus.FAILED
|
|
2378
2367
|
self._update_content(content)
|
|
2379
2368
|
return
|
|
2380
2369
|
|
|
2381
|
-
|
|
2382
|
-
|
|
2370
|
+
content.external_id = result
|
|
2371
|
+
content.status = ContentStatus.COMPLETED
|
|
2372
|
+
self._update_content(content)
|
|
2373
|
+
|
|
2374
|
+
elif content_type == KnowledgeContentOrigin.TOPIC:
|
|
2375
|
+
topic_data = self._prepare_lightrag_topic_data(content)
|
|
2376
|
+
if topic_data is None:
|
|
2383
2377
|
content.status = ContentStatus.FAILED
|
|
2384
2378
|
self._update_content(content)
|
|
2385
2379
|
return
|
|
2386
2380
|
|
|
2387
|
-
|
|
2388
|
-
|
|
2389
|
-
|
|
2390
|
-
|
|
2391
|
-
result = asyncio.run(
|
|
2392
|
-
self.vector_db.insert_text(
|
|
2393
|
-
file_source=content.topics[0],
|
|
2394
|
-
text=read_documents[0].content,
|
|
2395
|
-
)
|
|
2396
|
-
)
|
|
2397
|
-
else:
|
|
2398
|
-
log_error("Vector database does not support text insertion")
|
|
2399
|
-
content.status = ContentStatus.FAILED
|
|
2400
|
-
self._update_content(content)
|
|
2401
|
-
return
|
|
2402
|
-
content.external_id = result
|
|
2403
|
-
content.status = ContentStatus.COMPLETED
|
|
2404
|
-
self._update_content(content)
|
|
2405
|
-
return
|
|
2381
|
+
file_source, text = topic_data
|
|
2382
|
+
|
|
2383
|
+
if self.vector_db and hasattr(self.vector_db, "insert_text"):
|
|
2384
|
+
result = asyncio.run(self.vector_db.insert_text(file_source=file_source, text=text))
|
|
2406
2385
|
else:
|
|
2407
|
-
|
|
2386
|
+
log_error("Vector database does not support text insertion")
|
|
2387
|
+
content.status = ContentStatus.FAILED
|
|
2388
|
+
self._update_content(content)
|
|
2408
2389
|
return
|
|
2409
2390
|
|
|
2391
|
+
content.external_id = result
|
|
2392
|
+
content.status = ContentStatus.COMPLETED
|
|
2393
|
+
self._update_content(content)
|
|
2394
|
+
|
|
2410
2395
|
def search(
|
|
2411
2396
|
self,
|
|
2412
2397
|
query: str,
|
|
@@ -2473,8 +2458,16 @@ class Knowledge:
|
|
|
2473
2458
|
return []
|
|
2474
2459
|
|
|
2475
2460
|
def get_valid_filters(self) -> Set[str]:
|
|
2461
|
+
"""Get set of valid filter keys from ContentsDB metadata.
|
|
2462
|
+
|
|
2463
|
+
Returns:
|
|
2464
|
+
Set of metadata keys available for filtering. Empty set if ContentsDB not configured.
|
|
2465
|
+
|
|
2466
|
+
Note:
|
|
2467
|
+
When ContentsDB is not configured, filtering still works - filters are passed
|
|
2468
|
+
directly to the vector database without validation.
|
|
2469
|
+
"""
|
|
2476
2470
|
if self.contents_db is None:
|
|
2477
|
-
log_warning("No contents db provided. This is required for filtering.")
|
|
2478
2471
|
return set()
|
|
2479
2472
|
contents, _ = self.get_content()
|
|
2480
2473
|
valid_filters: Set[str] = set()
|
|
@@ -2486,7 +2479,9 @@ class Knowledge:
|
|
|
2486
2479
|
|
|
2487
2480
|
async def async_get_valid_filters(self) -> Set[str]:
|
|
2488
2481
|
if self.contents_db is None:
|
|
2489
|
-
|
|
2482
|
+
log_info(
|
|
2483
|
+
"ContentsDB not configured. For improved filter validation and reliability, consider adding a ContentsDB."
|
|
2484
|
+
)
|
|
2490
2485
|
return set()
|
|
2491
2486
|
contents, _ = await self.aget_content()
|
|
2492
2487
|
valid_filters: Set[str] = set()
|
|
@@ -2506,13 +2501,10 @@ class Knowledge:
|
|
|
2506
2501
|
invalid_keys = []
|
|
2507
2502
|
|
|
2508
2503
|
if isinstance(filters, dict):
|
|
2509
|
-
# If no metadata filters tracked yet, all
|
|
2504
|
+
# If no metadata filters tracked yet, pass all filters through without validation
|
|
2510
2505
|
if valid_metadata_filters is None or not valid_metadata_filters:
|
|
2511
|
-
|
|
2512
|
-
|
|
2513
|
-
f"No valid metadata filters tracked yet. All filter keys considered invalid: {invalid_keys}"
|
|
2514
|
-
)
|
|
2515
|
-
return {}, invalid_keys
|
|
2506
|
+
log_debug("No metadata filter validation available. Passing filters to vector DB without validation.")
|
|
2507
|
+
return filters, []
|
|
2516
2508
|
|
|
2517
2509
|
for key, value in filters.items():
|
|
2518
2510
|
# Handle both normal keys and prefixed keys like meta_data.key
|
|
@@ -2542,10 +2534,26 @@ class Knowledge:
|
|
|
2542
2534
|
def validate_filters(
|
|
2543
2535
|
self, filters: Union[Dict[str, Any], List[FilterExpr]]
|
|
2544
2536
|
) -> Tuple[Union[Dict[str, Any], List[FilterExpr]], List[str]]:
|
|
2537
|
+
"""Validate filters against known metadata keys from ContentsDB.
|
|
2538
|
+
|
|
2539
|
+
Args:
|
|
2540
|
+
filters: Filters to validate
|
|
2541
|
+
|
|
2542
|
+
Returns:
|
|
2543
|
+
Tuple of (valid_filters, invalid_keys)
|
|
2544
|
+
|
|
2545
|
+
Note:
|
|
2546
|
+
When ContentsDB is not configured, returns (filters, []) - all filters
|
|
2547
|
+
are considered valid and passed through without validation.
|
|
2548
|
+
"""
|
|
2549
|
+
if self.contents_db is None:
|
|
2550
|
+
log_info(
|
|
2551
|
+
"ContentsDB not configured. For improved filter validation and reliability, consider adding a ContentsDB."
|
|
2552
|
+
)
|
|
2553
|
+
return filters, []
|
|
2545
2554
|
valid_filters_from_db = self.get_valid_filters()
|
|
2546
2555
|
|
|
2547
2556
|
valid_filters, invalid_keys = self._validate_filters(filters, valid_filters_from_db)
|
|
2548
|
-
|
|
2549
2557
|
return valid_filters, invalid_keys
|
|
2550
2558
|
|
|
2551
2559
|
async def async_validate_filters(
|