agno 2.4.6__py3-none-any.whl → 2.4.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. agno/agent/agent.py +5 -1
  2. agno/db/base.py +2 -0
  3. agno/db/postgres/postgres.py +5 -5
  4. agno/db/singlestore/singlestore.py +4 -5
  5. agno/db/sqlite/sqlite.py +4 -4
  6. agno/knowledge/embedder/aws_bedrock.py +325 -106
  7. agno/knowledge/knowledge.py +83 -1853
  8. agno/knowledge/loaders/__init__.py +29 -0
  9. agno/knowledge/loaders/azure_blob.py +423 -0
  10. agno/knowledge/loaders/base.py +187 -0
  11. agno/knowledge/loaders/gcs.py +267 -0
  12. agno/knowledge/loaders/github.py +415 -0
  13. agno/knowledge/loaders/s3.py +281 -0
  14. agno/knowledge/loaders/sharepoint.py +439 -0
  15. agno/knowledge/reader/website_reader.py +2 -2
  16. agno/knowledge/remote_knowledge.py +151 -0
  17. agno/knowledge/reranker/aws_bedrock.py +299 -0
  18. agno/learn/machine.py +5 -6
  19. agno/learn/stores/session_context.py +10 -2
  20. agno/models/azure/openai_chat.py +6 -11
  21. agno/models/neosantara/__init__.py +5 -0
  22. agno/models/neosantara/neosantara.py +42 -0
  23. agno/models/utils.py +5 -0
  24. agno/os/app.py +4 -1
  25. agno/os/interfaces/agui/router.py +1 -1
  26. agno/os/routers/components/components.py +2 -0
  27. agno/os/routers/knowledge/knowledge.py +0 -1
  28. agno/os/routers/registry/registry.py +340 -192
  29. agno/os/routers/workflows/router.py +7 -1
  30. agno/os/schema.py +104 -0
  31. agno/registry/registry.py +4 -0
  32. agno/run/workflow.py +3 -0
  33. agno/session/workflow.py +1 -1
  34. agno/skills/utils.py +100 -2
  35. agno/team/team.py +6 -3
  36. agno/tools/mcp/mcp.py +26 -1
  37. agno/vectordb/lancedb/lance_db.py +22 -7
  38. agno/workflow/__init__.py +4 -0
  39. agno/workflow/cel.py +299 -0
  40. agno/workflow/condition.py +280 -58
  41. agno/workflow/loop.py +177 -46
  42. agno/workflow/parallel.py +75 -4
  43. agno/workflow/router.py +260 -44
  44. agno/workflow/step.py +14 -7
  45. agno/workflow/steps.py +43 -0
  46. agno/workflow/workflow.py +104 -46
  47. {agno-2.4.6.dist-info → agno-2.4.8.dist-info}/METADATA +25 -37
  48. {agno-2.4.6.dist-info → agno-2.4.8.dist-info}/RECORD +51 -39
  49. {agno-2.4.6.dist-info → agno-2.4.8.dist-info}/WHEEL +0 -0
  50. {agno-2.4.6.dist-info → agno-2.4.8.dist-info}/licenses/LICENSE +0 -0
  51. {agno-2.4.6.dist-info → agno-2.4.8.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,439 @@
1
+ """SharePoint content loader for Knowledge.
2
+
3
+ Provides methods for loading content from Microsoft SharePoint.
4
+ """
5
+
6
+ # mypy: disable-error-code="attr-defined"
7
+
8
+ from io import BytesIO
9
+ from typing import Dict, List, Optional, cast
10
+
11
+ import httpx
12
+ from httpx import AsyncClient
13
+
14
+ from agno.knowledge.content import Content, ContentStatus
15
+ from agno.knowledge.loaders.base import BaseLoader
16
+ from agno.knowledge.reader import Reader
17
+ from agno.knowledge.remote_content.config import RemoteContentConfig, SharePointConfig
18
+ from agno.knowledge.remote_content.remote_content import SharePointContent
19
+ from agno.utils.log import log_error, log_info, log_warning
20
+
21
+
22
+ class SharePointLoader(BaseLoader):
23
+ """Loader for SharePoint content."""
24
+
25
+ # ==========================================
26
+ # SHAREPOINT HELPERS (shared between sync/async)
27
+ # ==========================================
28
+
29
+ def _validate_sharepoint_config(
30
+ self,
31
+ content: Content,
32
+ config: Optional[RemoteContentConfig],
33
+ ) -> Optional[SharePointConfig]:
34
+ """Validate and extract SharePoint config.
35
+
36
+ Returns:
37
+ SharePointConfig if valid, None otherwise
38
+ """
39
+ remote_content: SharePointContent = cast(SharePointContent, content.remote_content)
40
+ sp_config = cast(SharePointConfig, config) if isinstance(config, SharePointConfig) else None
41
+
42
+ if sp_config is None:
43
+ log_error(f"SharePoint config not found for config_id: {remote_content.config_id}")
44
+ return None
45
+
46
+ return sp_config
47
+
48
+ def _get_sharepoint_access_token(self, sp_config: SharePointConfig) -> Optional[str]:
49
+ """Get an access token for Microsoft Graph API using client credentials flow.
50
+
51
+ Requires the `msal` package: pip install msal
52
+ """
53
+ try:
54
+ from msal import ConfidentialClientApplication # type: ignore
55
+ except ImportError:
56
+ raise ImportError("The `msal` package is not installed. Please install it via `pip install msal`.")
57
+
58
+ authority = f"https://login.microsoftonline.com/{sp_config.tenant_id}"
59
+ app = ConfidentialClientApplication(
60
+ sp_config.client_id,
61
+ authority=authority,
62
+ client_credential=sp_config.client_secret,
63
+ )
64
+
65
+ scopes = ["https://graph.microsoft.com/.default"]
66
+ result = app.acquire_token_for_client(scopes=scopes)
67
+
68
+ if "access_token" in result:
69
+ return result["access_token"]
70
+ else:
71
+ log_error(f"Failed to acquire SharePoint token: {result.get('error_description', result.get('error'))}")
72
+ return None
73
+
74
+ def _get_sharepoint_site_id(self, hostname: str, site_path: Optional[str], access_token: str) -> Optional[str]:
75
+ """Get the SharePoint site ID using Microsoft Graph API (sync)."""
76
+ if site_path:
77
+ url = f"https://graph.microsoft.com/v1.0/sites/{hostname}:/{site_path}"
78
+ else:
79
+ url = f"https://graph.microsoft.com/v1.0/sites/{hostname}"
80
+
81
+ headers = {"Authorization": f"Bearer {access_token}"}
82
+
83
+ try:
84
+ response = httpx.get(url, headers=headers)
85
+ response.raise_for_status()
86
+ return response.json().get("id")
87
+ except httpx.HTTPStatusError as e:
88
+ log_error(f"Failed to get SharePoint site ID: {e.response.status_code} - {e.response.text}")
89
+ return None
90
+
91
+ async def _aget_sharepoint_site_id(
92
+ self, hostname: str, site_path: Optional[str], access_token: str
93
+ ) -> Optional[str]:
94
+ """Get the SharePoint site ID using Microsoft Graph API (async)."""
95
+ if site_path:
96
+ url = f"https://graph.microsoft.com/v1.0/sites/{hostname}:/{site_path}"
97
+ else:
98
+ url = f"https://graph.microsoft.com/v1.0/sites/{hostname}"
99
+
100
+ headers = {"Authorization": f"Bearer {access_token}"}
101
+
102
+ try:
103
+ async with httpx.AsyncClient() as client:
104
+ response = await client.get(url, headers=headers)
105
+ response.raise_for_status()
106
+ return response.json().get("id")
107
+ except httpx.HTTPStatusError as e:
108
+ log_error(f"Failed to get SharePoint site ID: {e.response.status_code} - {e.response.text}")
109
+ return None
110
+
111
+ def _list_sharepoint_folder_items(self, site_id: str, folder_path: str, access_token: str) -> List[dict]:
112
+ """List all items in a SharePoint folder (sync)."""
113
+ folder_path = folder_path.lstrip("/")
114
+ url: Optional[str] = f"https://graph.microsoft.com/v1.0/sites/{site_id}/drive/root:/{folder_path}:/children"
115
+ headers = {"Authorization": f"Bearer {access_token}"}
116
+ items: List[dict] = []
117
+
118
+ try:
119
+ while url:
120
+ response = httpx.get(url, headers=headers)
121
+ response.raise_for_status()
122
+ data = response.json()
123
+ items.extend(data.get("value", []))
124
+ url = data.get("@odata.nextLink")
125
+ except httpx.HTTPStatusError as e:
126
+ log_error(f"Failed to list SharePoint folder: {e.response.status_code} - {e.response.text}")
127
+
128
+ return items
129
+
130
+ async def _alist_sharepoint_folder_items(self, site_id: str, folder_path: str, access_token: str) -> List[dict]:
131
+ """List all items in a SharePoint folder (async)."""
132
+ folder_path = folder_path.lstrip("/")
133
+ url: Optional[str] = f"https://graph.microsoft.com/v1.0/sites/{site_id}/drive/root:/{folder_path}:/children"
134
+ headers = {"Authorization": f"Bearer {access_token}"}
135
+ items: List[dict] = []
136
+
137
+ try:
138
+ async with httpx.AsyncClient() as client:
139
+ while url:
140
+ response = await client.get(url, headers=headers)
141
+ response.raise_for_status()
142
+ data = response.json()
143
+ items.extend(data.get("value", []))
144
+ url = data.get("@odata.nextLink")
145
+ except httpx.HTTPStatusError as e:
146
+ log_error(f"Failed to list SharePoint folder: {e.response.status_code} - {e.response.text}")
147
+
148
+ return items
149
+
150
+ def _download_sharepoint_file(self, site_id: str, file_path: str, access_token: str) -> Optional[BytesIO]:
151
+ """Download a file from SharePoint (sync)."""
152
+ file_path = file_path.lstrip("/")
153
+ url = f"https://graph.microsoft.com/v1.0/sites/{site_id}/drive/root:/{file_path}:/content"
154
+ headers = {"Authorization": f"Bearer {access_token}"}
155
+
156
+ try:
157
+ response = httpx.get(url, headers=headers, follow_redirects=True)
158
+ response.raise_for_status()
159
+ return BytesIO(response.content)
160
+ except httpx.HTTPStatusError as e:
161
+ log_error(f"Failed to download SharePoint file {file_path}: {e.response.status_code} - {e.response.text}")
162
+ return None
163
+
164
+ async def _adownload_sharepoint_file(self, site_id: str, file_path: str, access_token: str) -> Optional[BytesIO]:
165
+ """Download a file from SharePoint (async)."""
166
+ file_path = file_path.lstrip("/")
167
+ url = f"https://graph.microsoft.com/v1.0/sites/{site_id}/drive/root:/{file_path}:/content"
168
+ headers = {"Authorization": f"Bearer {access_token}"}
169
+
170
+ try:
171
+ async with httpx.AsyncClient() as client:
172
+ response = await client.get(url, headers=headers, follow_redirects=True)
173
+ response.raise_for_status()
174
+ return BytesIO(response.content)
175
+ except httpx.HTTPStatusError as e:
176
+ log_error(f"Failed to download SharePoint file {file_path}: {e.response.status_code} - {e.response.text}")
177
+ return None
178
+
179
+ def _build_sharepoint_metadata(
180
+ self,
181
+ sp_config: SharePointConfig,
182
+ site_id: str,
183
+ file_path: str,
184
+ file_name: str,
185
+ ) -> Dict[str, str]:
186
+ """Build SharePoint-specific metadata dictionary."""
187
+ return {
188
+ "source_type": "sharepoint",
189
+ "source_config_id": sp_config.id,
190
+ "source_config_name": sp_config.name,
191
+ "sharepoint_hostname": sp_config.hostname,
192
+ "sharepoint_site_id": site_id,
193
+ "sharepoint_path": file_path,
194
+ "sharepoint_filename": file_name,
195
+ }
196
+
197
+ def _build_sharepoint_virtual_path(self, hostname: str, site_id: str, file_path: str) -> str:
198
+ """Build virtual path for SharePoint content."""
199
+ return f"sharepoint://{hostname}/{site_id}/{file_path}"
200
+
201
+ def _get_sharepoint_path_to_process(self, remote_content: SharePointContent) -> str:
202
+ """Get the path to process from remote content."""
203
+ return (remote_content.file_path or remote_content.folder_path or "").strip("/")
204
+
205
+ # ==========================================
206
+ # SHAREPOINT LOADERS
207
+ # ==========================================
208
+
209
+ async def _aload_from_sharepoint(
210
+ self,
211
+ content: Content,
212
+ upsert: bool,
213
+ skip_if_exists: bool,
214
+ config: Optional[RemoteContentConfig] = None,
215
+ ):
216
+ """Load content from SharePoint (async).
217
+
218
+ Requires the SharePoint config to contain tenant_id, client_id, client_secret, and hostname.
219
+ """
220
+ remote_content: SharePointContent = cast(SharePointContent, content.remote_content)
221
+ sp_config = self._validate_sharepoint_config(content, config)
222
+ if sp_config is None:
223
+ return
224
+
225
+ # Get access token
226
+ access_token = self._get_sharepoint_access_token(sp_config)
227
+ if not access_token:
228
+ return
229
+
230
+ # Get site ID
231
+ site_id: Optional[str] = sp_config.site_id
232
+ if not site_id:
233
+ site_path = remote_content.site_path or sp_config.site_path
234
+ site_id = await self._aget_sharepoint_site_id(sp_config.hostname, site_path, access_token)
235
+ if not site_id:
236
+ log_error(f"Failed to get SharePoint site ID for {sp_config.hostname}/{site_path}")
237
+ return
238
+
239
+ # Identify files to download
240
+ files_to_process: List[tuple] = []
241
+ path_to_process = self._get_sharepoint_path_to_process(remote_content)
242
+
243
+ # Helper function to recursively list all files in a folder
244
+ async def list_files_recursive(folder: str) -> List[tuple]:
245
+ """Recursively list all files in a SharePoint folder."""
246
+ files: List[tuple] = []
247
+ items = await self._alist_sharepoint_folder_items(site_id, folder, access_token) # type: ignore
248
+ for item in items:
249
+ if "file" in item:
250
+ item_path = f"{folder}/{item['name']}"
251
+ files.append((item_path, item["name"]))
252
+ elif "folder" in item:
253
+ subdir_path = f"{folder}/{item['name']}"
254
+ subdir_files = await list_files_recursive(subdir_path)
255
+ files.extend(subdir_files)
256
+ return files
257
+
258
+ if path_to_process:
259
+ try:
260
+ async with AsyncClient() as client:
261
+ url = f"https://graph.microsoft.com/v1.0/sites/{site_id}/drive/root:/{path_to_process}"
262
+ headers = {"Authorization": f"Bearer {access_token}"}
263
+ response = await client.get(url, headers=headers, timeout=30.0)
264
+ response.raise_for_status()
265
+ item_data = response.json()
266
+
267
+ if "folder" in item_data:
268
+ files_to_process = await list_files_recursive(path_to_process)
269
+ elif "file" in item_data:
270
+ files_to_process.append((path_to_process, item_data["name"]))
271
+ else:
272
+ log_warning(f"SharePoint path {path_to_process} is neither file nor folder")
273
+ return
274
+ except Exception as e:
275
+ log_error(f"Error checking SharePoint path {path_to_process}: {e}")
276
+ return
277
+
278
+ if not files_to_process:
279
+ log_warning(f"No files found at SharePoint path: {path_to_process}")
280
+ return
281
+
282
+ log_info(f"Processing {len(files_to_process)} file(s) from SharePoint")
283
+ is_folder_upload = len(files_to_process) > 1
284
+
285
+ for file_path, file_name in files_to_process:
286
+ # Build metadata and virtual path using helpers
287
+ virtual_path = self._build_sharepoint_virtual_path(sp_config.hostname, site_id, file_path)
288
+ sharepoint_metadata = self._build_sharepoint_metadata(sp_config, site_id, file_path, file_name)
289
+ merged_metadata = self._merge_metadata(sharepoint_metadata, content.metadata)
290
+
291
+ # Compute content name using base helper
292
+ content_name = self._compute_content_name(
293
+ file_path, file_name, content.name, path_to_process, is_folder_upload
294
+ )
295
+
296
+ # Create content entry using base helper
297
+ content_entry = self._create_content_entry(
298
+ content, content_name, virtual_path, merged_metadata, "sharepoint", is_folder_upload
299
+ )
300
+
301
+ await self._ainsert_contents_db(content_entry)
302
+
303
+ if self._should_skip(content_entry.content_hash, skip_if_exists):
304
+ content_entry.status = ContentStatus.COMPLETED
305
+ await self._aupdate_content(content_entry)
306
+ continue
307
+
308
+ # Select reader and download file
309
+ reader = self._select_reader_by_uri(file_name, content.reader)
310
+ reader = cast(Reader, reader)
311
+
312
+ file_content = await self._adownload_sharepoint_file(site_id, file_path, access_token)
313
+ if not file_content:
314
+ content_entry.status = ContentStatus.FAILED
315
+ await self._aupdate_content(content_entry)
316
+ continue
317
+
318
+ # Read the content
319
+ read_documents = await reader.async_read(file_content, name=file_name)
320
+
321
+ # Prepare and insert to vector database
322
+ self._prepare_documents_for_insert(read_documents, content_entry.id)
323
+ await self._ahandle_vector_db_insert(content_entry, read_documents, upsert)
324
+
325
+ def _load_from_sharepoint(
326
+ self,
327
+ content: Content,
328
+ upsert: bool,
329
+ skip_if_exists: bool,
330
+ config: Optional[RemoteContentConfig] = None,
331
+ ):
332
+ """Load content from SharePoint (sync).
333
+
334
+ Requires the SharePoint config to contain tenant_id, client_id, client_secret, and hostname.
335
+ """
336
+ remote_content: SharePointContent = cast(SharePointContent, content.remote_content)
337
+ sp_config = self._validate_sharepoint_config(content, config)
338
+ if sp_config is None:
339
+ return
340
+
341
+ # Get access token
342
+ access_token = self._get_sharepoint_access_token(sp_config)
343
+ if not access_token:
344
+ return
345
+
346
+ # Get site ID
347
+ site_id: Optional[str] = sp_config.site_id
348
+ if not site_id:
349
+ site_path = remote_content.site_path or sp_config.site_path
350
+ site_id = self._get_sharepoint_site_id(sp_config.hostname, site_path, access_token)
351
+ if not site_id:
352
+ log_error(f"Failed to get SharePoint site ID for {sp_config.hostname}/{site_path}")
353
+ return
354
+
355
+ # Identify files to download
356
+ files_to_process: List[tuple] = []
357
+ path_to_process = self._get_sharepoint_path_to_process(remote_content)
358
+
359
+ # Helper function to recursively list all files in a folder
360
+ def list_files_recursive(folder: str) -> List[tuple]:
361
+ """Recursively list all files in a SharePoint folder."""
362
+ files: List[tuple] = []
363
+ items = self._list_sharepoint_folder_items(site_id, folder, access_token) # type: ignore
364
+ for item in items:
365
+ if "file" in item:
366
+ item_path = f"{folder}/{item['name']}"
367
+ files.append((item_path, item["name"]))
368
+ elif "folder" in item:
369
+ subdir_path = f"{folder}/{item['name']}"
370
+ subdir_files = list_files_recursive(subdir_path)
371
+ files.extend(subdir_files)
372
+ return files
373
+
374
+ if path_to_process:
375
+ try:
376
+ with httpx.Client() as client:
377
+ url = f"https://graph.microsoft.com/v1.0/sites/{site_id}/drive/root:/{path_to_process}"
378
+ headers = {"Authorization": f"Bearer {access_token}"}
379
+ response = client.get(url, headers=headers, timeout=30.0)
380
+ response.raise_for_status()
381
+ item_data = response.json()
382
+
383
+ if "folder" in item_data:
384
+ files_to_process = list_files_recursive(path_to_process)
385
+ elif "file" in item_data:
386
+ files_to_process.append((path_to_process, item_data["name"]))
387
+ else:
388
+ log_warning(f"SharePoint path {path_to_process} is neither file nor folder")
389
+ return
390
+ except Exception as e:
391
+ log_error(f"Error checking SharePoint path {path_to_process}: {e}")
392
+ return
393
+
394
+ if not files_to_process:
395
+ log_warning(f"No files found at SharePoint path: {path_to_process}")
396
+ return
397
+
398
+ log_info(f"Processing {len(files_to_process)} file(s) from SharePoint")
399
+ is_folder_upload = len(files_to_process) > 1
400
+
401
+ for file_path, file_name in files_to_process:
402
+ # Build metadata and virtual path using helpers
403
+ virtual_path = self._build_sharepoint_virtual_path(sp_config.hostname, site_id, file_path)
404
+ sharepoint_metadata = self._build_sharepoint_metadata(sp_config, site_id, file_path, file_name)
405
+ merged_metadata = self._merge_metadata(sharepoint_metadata, content.metadata)
406
+
407
+ # Compute content name using base helper
408
+ content_name = self._compute_content_name(
409
+ file_path, file_name, content.name, path_to_process, is_folder_upload
410
+ )
411
+
412
+ # Create content entry using base helper
413
+ content_entry = self._create_content_entry(
414
+ content, content_name, virtual_path, merged_metadata, "sharepoint", is_folder_upload
415
+ )
416
+
417
+ self._insert_contents_db(content_entry)
418
+
419
+ if self._should_skip(content_entry.content_hash, skip_if_exists):
420
+ content_entry.status = ContentStatus.COMPLETED
421
+ self._update_content(content_entry)
422
+ continue
423
+
424
+ # Select reader and download file
425
+ reader = self._select_reader_by_uri(file_name, content.reader)
426
+ reader = cast(Reader, reader)
427
+
428
+ file_content = self._download_sharepoint_file(site_id, file_path, access_token)
429
+ if not file_content:
430
+ content_entry.status = ContentStatus.FAILED
431
+ self._update_content(content_entry)
432
+ continue
433
+
434
+ # Read the content
435
+ read_documents = reader.read(file_content, name=file_name)
436
+
437
+ # Prepare and insert to vector database
438
+ self._prepare_documents_for_insert(read_documents, content_entry.id)
439
+ self._handle_vector_db_insert(content_entry, read_documents, upsert)
@@ -7,7 +7,7 @@ from urllib.parse import urljoin, urlparse
7
7
 
8
8
  import httpx
9
9
 
10
- from agno.knowledge.chunking.semantic import SemanticChunking
10
+ from agno.knowledge.chunking.fixed import FixedSizeChunking
11
11
  from agno.knowledge.chunking.strategy import ChunkingStrategy, ChunkingStrategyType
12
12
  from agno.knowledge.document.base import Document
13
13
  from agno.knowledge.reader.base import Reader
@@ -32,7 +32,7 @@ class WebsiteReader(Reader):
32
32
 
33
33
  def __init__(
34
34
  self,
35
- chunking_strategy: Optional[ChunkingStrategy] = SemanticChunking(),
35
+ chunking_strategy: Optional[ChunkingStrategy] = FixedSizeChunking(),
36
36
  max_depth: int = 3,
37
37
  max_links: int = 10,
38
38
  timeout: int = 10,
@@ -0,0 +1,151 @@
1
+ """Remote content loading for Knowledge.
2
+
3
+ Provides methods for loading content from cloud storage providers:
4
+ - S3, GCS, SharePoint, GitHub, Azure Blob Storage
5
+
6
+ This module contains the RemoteKnowledge class which combines all loader
7
+ capabilities through inheritance. The Knowledge class inherits from this
8
+ to gain remote content loading capabilities.
9
+ """
10
+
11
+ from typing import List, Optional
12
+
13
+ from agno.knowledge.content import Content
14
+ from agno.knowledge.loaders.azure_blob import AzureBlobLoader
15
+ from agno.knowledge.loaders.gcs import GCSLoader
16
+ from agno.knowledge.loaders.github import GitHubLoader
17
+ from agno.knowledge.loaders.s3 import S3Loader
18
+ from agno.knowledge.loaders.sharepoint import SharePointLoader
19
+ from agno.knowledge.remote_content.config import RemoteContentConfig
20
+ from agno.knowledge.remote_content.remote_content import (
21
+ AzureBlobContent,
22
+ GCSContent,
23
+ GitHubContent,
24
+ S3Content,
25
+ SharePointContent,
26
+ )
27
+ from agno.utils.log import log_warning
28
+
29
+
30
+ class RemoteKnowledge(S3Loader, GCSLoader, SharePointLoader, GitHubLoader, AzureBlobLoader):
31
+ """Base class providing remote content loading capabilities.
32
+
33
+ Inherits from all provider-specific loaders:
34
+ - S3Loader: AWS S3 content loading
35
+ - GCSLoader: Google Cloud Storage content loading
36
+ - SharePointLoader: Microsoft SharePoint content loading
37
+ - GitHubLoader: GitHub repository content loading
38
+ - AzureBlobLoader: Azure Blob Storage content loading
39
+
40
+ Knowledge inherits from this class and provides:
41
+ - content_sources: List[RemoteContentConfig]
42
+ - vector_db, contents_db attributes
43
+ - _should_skip(), _select_reader_by_uri(), _prepare_documents_for_insert() methods
44
+ - _ahandle_vector_db_insert(), _handle_vector_db_insert() methods
45
+ - _ainsert_contents_db(), _insert_contents_db() methods
46
+ - _aupdate_content(), _update_content() methods
47
+ - _build_content_hash() method
48
+ """
49
+
50
+ # These attributes are provided by the Knowledge subclass
51
+ content_sources: Optional[List[RemoteContentConfig]]
52
+
53
+ # ==========================================
54
+ # REMOTE CONTENT DISPATCHERS
55
+ # ==========================================
56
+
57
+ async def _aload_from_remote_content(
58
+ self,
59
+ content: Content,
60
+ upsert: bool,
61
+ skip_if_exists: bool,
62
+ ):
63
+ """Async dispatcher for remote content loading.
64
+
65
+ Routes to the appropriate provider-specific loader based on content type.
66
+ """
67
+ if content.remote_content is None:
68
+ log_warning("No remote content provided for content")
69
+ return
70
+
71
+ remote_content = content.remote_content
72
+
73
+ # Look up config if config_id is provided
74
+ config = None
75
+ if hasattr(remote_content, "config_id") and remote_content.config_id:
76
+ config = self._get_remote_config_by_id(remote_content.config_id)
77
+ if config is None:
78
+ log_warning(f"No config found for config_id: {remote_content.config_id}")
79
+
80
+ if isinstance(remote_content, S3Content):
81
+ await self._aload_from_s3(content, upsert, skip_if_exists, config)
82
+
83
+ elif isinstance(remote_content, GCSContent):
84
+ await self._aload_from_gcs(content, upsert, skip_if_exists, config)
85
+
86
+ elif isinstance(remote_content, SharePointContent):
87
+ await self._aload_from_sharepoint(content, upsert, skip_if_exists, config)
88
+
89
+ elif isinstance(remote_content, GitHubContent):
90
+ await self._aload_from_github(content, upsert, skip_if_exists, config)
91
+
92
+ elif isinstance(remote_content, AzureBlobContent):
93
+ await self._aload_from_azure_blob(content, upsert, skip_if_exists, config)
94
+
95
+ else:
96
+ log_warning(f"Unsupported remote content type: {type(remote_content)}")
97
+
98
+ def _load_from_remote_content(
99
+ self,
100
+ content: Content,
101
+ upsert: bool,
102
+ skip_if_exists: bool,
103
+ ):
104
+ """Sync dispatcher for remote content loading.
105
+
106
+ Routes to the appropriate provider-specific loader based on content type.
107
+ """
108
+ if content.remote_content is None:
109
+ log_warning("No remote content provided for content")
110
+ return
111
+
112
+ remote_content = content.remote_content
113
+
114
+ # Look up config if config_id is provided
115
+ config = None
116
+ if hasattr(remote_content, "config_id") and remote_content.config_id:
117
+ config = self._get_remote_config_by_id(remote_content.config_id)
118
+ if config is None:
119
+ log_warning(f"No config found for config_id: {remote_content.config_id}")
120
+
121
+ if isinstance(remote_content, S3Content):
122
+ self._load_from_s3(content, upsert, skip_if_exists, config)
123
+
124
+ elif isinstance(remote_content, GCSContent):
125
+ self._load_from_gcs(content, upsert, skip_if_exists, config)
126
+
127
+ elif isinstance(remote_content, SharePointContent):
128
+ self._load_from_sharepoint(content, upsert, skip_if_exists, config)
129
+
130
+ elif isinstance(remote_content, GitHubContent):
131
+ self._load_from_github(content, upsert, skip_if_exists, config)
132
+
133
+ elif isinstance(remote_content, AzureBlobContent):
134
+ self._load_from_azure_blob(content, upsert, skip_if_exists, config)
135
+
136
+ else:
137
+ log_warning(f"Unsupported remote content type: {type(remote_content)}")
138
+
139
+ # ==========================================
140
+ # REMOTE CONFIG HELPERS
141
+ # ==========================================
142
+
143
+ def _get_remote_configs(self) -> List[RemoteContentConfig]:
144
+ """Return configured remote content sources."""
145
+ return self.content_sources or []
146
+
147
+ def _get_remote_config_by_id(self, config_id: str) -> Optional[RemoteContentConfig]:
148
+ """Get a remote content config by its ID."""
149
+ if not self.content_sources:
150
+ return None
151
+ return next((c for c in self.content_sources if c.id == config_id), None)