agno 2.4.6__py3-none-any.whl → 2.4.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. agno/agent/agent.py +5 -1
  2. agno/db/base.py +2 -0
  3. agno/db/postgres/postgres.py +5 -5
  4. agno/db/singlestore/singlestore.py +4 -5
  5. agno/db/sqlite/sqlite.py +4 -4
  6. agno/knowledge/embedder/aws_bedrock.py +325 -106
  7. agno/knowledge/knowledge.py +83 -1853
  8. agno/knowledge/loaders/__init__.py +29 -0
  9. agno/knowledge/loaders/azure_blob.py +423 -0
  10. agno/knowledge/loaders/base.py +187 -0
  11. agno/knowledge/loaders/gcs.py +267 -0
  12. agno/knowledge/loaders/github.py +415 -0
  13. agno/knowledge/loaders/s3.py +281 -0
  14. agno/knowledge/loaders/sharepoint.py +439 -0
  15. agno/knowledge/reader/website_reader.py +2 -2
  16. agno/knowledge/remote_knowledge.py +151 -0
  17. agno/knowledge/reranker/aws_bedrock.py +299 -0
  18. agno/learn/machine.py +5 -6
  19. agno/learn/stores/session_context.py +10 -2
  20. agno/models/azure/openai_chat.py +6 -11
  21. agno/models/neosantara/__init__.py +5 -0
  22. agno/models/neosantara/neosantara.py +42 -0
  23. agno/models/utils.py +5 -0
  24. agno/os/app.py +4 -1
  25. agno/os/interfaces/agui/router.py +1 -1
  26. agno/os/routers/components/components.py +2 -0
  27. agno/os/routers/knowledge/knowledge.py +0 -1
  28. agno/os/routers/registry/registry.py +340 -192
  29. agno/os/routers/workflows/router.py +7 -1
  30. agno/os/schema.py +104 -0
  31. agno/registry/registry.py +4 -0
  32. agno/run/workflow.py +3 -0
  33. agno/session/workflow.py +1 -1
  34. agno/skills/utils.py +100 -2
  35. agno/team/team.py +6 -3
  36. agno/tools/mcp/mcp.py +26 -1
  37. agno/vectordb/lancedb/lance_db.py +22 -7
  38. agno/workflow/__init__.py +4 -0
  39. agno/workflow/cel.py +299 -0
  40. agno/workflow/condition.py +280 -58
  41. agno/workflow/loop.py +177 -46
  42. agno/workflow/parallel.py +75 -4
  43. agno/workflow/router.py +260 -44
  44. agno/workflow/step.py +14 -7
  45. agno/workflow/steps.py +43 -0
  46. agno/workflow/workflow.py +104 -46
  47. {agno-2.4.6.dist-info → agno-2.4.8.dist-info}/METADATA +25 -37
  48. {agno-2.4.6.dist-info → agno-2.4.8.dist-info}/RECORD +51 -39
  49. {agno-2.4.6.dist-info → agno-2.4.8.dist-info}/WHEEL +0 -0
  50. {agno-2.4.6.dist-info → agno-2.4.8.dist-info}/licenses/LICENSE +0 -0
  51. {agno-2.4.6.dist-info → agno-2.4.8.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,29 @@
1
+ """Remote content loaders for Knowledge.
2
+
3
+ This module provides loaders for various cloud storage providers:
4
+ - S3Loader: AWS S3
5
+ - GCSLoader: Google Cloud Storage
6
+ - SharePointLoader: Microsoft SharePoint
7
+ - GitHubLoader: GitHub repositories
8
+ - AzureBlobLoader: Azure Blob Storage
9
+
10
+ All loaders inherit from BaseLoader which provides common utilities for
11
+ computing content names, creating content entries, and merging metadata.
12
+ """
13
+
14
+ from agno.knowledge.loaders.azure_blob import AzureBlobLoader
15
+ from agno.knowledge.loaders.base import BaseLoader, FileToProcess
16
+ from agno.knowledge.loaders.gcs import GCSLoader
17
+ from agno.knowledge.loaders.github import GitHubLoader
18
+ from agno.knowledge.loaders.s3 import S3Loader
19
+ from agno.knowledge.loaders.sharepoint import SharePointLoader
20
+
21
+ __all__ = [
22
+ "BaseLoader",
23
+ "FileToProcess",
24
+ "S3Loader",
25
+ "GCSLoader",
26
+ "SharePointLoader",
27
+ "GitHubLoader",
28
+ "AzureBlobLoader",
29
+ ]
@@ -0,0 +1,423 @@
1
+ """Azure Blob Storage content loader for Knowledge.
2
+
3
+ Provides methods for loading content from Azure Blob Storage.
4
+ """
5
+
6
+ # mypy: disable-error-code="attr-defined"
7
+
8
+ from io import BytesIO
9
+ from typing import Any, Dict, List, Optional, cast
10
+
11
+ from agno.knowledge.content import Content, ContentStatus
12
+ from agno.knowledge.loaders.base import BaseLoader
13
+ from agno.knowledge.reader import Reader
14
+ from agno.knowledge.remote_content.config import AzureBlobConfig, RemoteContentConfig
15
+ from agno.knowledge.remote_content.remote_content import AzureBlobContent
16
+ from agno.utils.log import log_debug, log_error, log_info, log_warning
17
+ from agno.utils.string import generate_id
18
+
19
+
20
+ class AzureBlobLoader(BaseLoader):
21
+ """Loader for Azure Blob Storage content."""
22
+
23
+ # ==========================================
24
+ # AZURE BLOB HELPERS (shared between sync/async)
25
+ # ==========================================
26
+
27
+ def _validate_azure_config(
28
+ self,
29
+ content: Content,
30
+ config: Optional[RemoteContentConfig],
31
+ ) -> Optional[AzureBlobConfig]:
32
+ """Validate and extract Azure Blob config.
33
+
34
+ Returns:
35
+ AzureBlobConfig if valid, None otherwise
36
+ """
37
+ remote_content: AzureBlobContent = cast(AzureBlobContent, content.remote_content)
38
+ azure_config = cast(AzureBlobConfig, config) if isinstance(config, AzureBlobConfig) else None
39
+
40
+ if azure_config is None:
41
+ log_error(f"Azure Blob config not found for config_id: {remote_content.config_id}")
42
+ return None
43
+
44
+ return azure_config
45
+
46
+ def _get_azure_blob_client(self, azure_config: AzureBlobConfig):
47
+ """Get a sync Azure Blob Service Client using client credentials flow.
48
+
49
+ Requires the `azure-identity` and `azure-storage-blob` packages.
50
+ """
51
+ try:
52
+ from azure.identity import ClientSecretCredential # type: ignore
53
+ from azure.storage.blob import BlobServiceClient # type: ignore
54
+ except ImportError:
55
+ raise ImportError(
56
+ "The `azure-identity` and `azure-storage-blob` packages are not installed. "
57
+ "Please install them via `pip install azure-identity azure-storage-blob`."
58
+ )
59
+
60
+ credential = ClientSecretCredential(
61
+ tenant_id=azure_config.tenant_id,
62
+ client_id=azure_config.client_id,
63
+ client_secret=azure_config.client_secret,
64
+ )
65
+
66
+ blob_service = BlobServiceClient(
67
+ account_url=f"https://{azure_config.storage_account}.blob.core.windows.net",
68
+ credential=credential,
69
+ )
70
+
71
+ return blob_service
72
+
73
+ def _get_azure_blob_client_async(self, azure_config: AzureBlobConfig):
74
+ """Get an async Azure Blob Service Client using client credentials flow.
75
+
76
+ Requires the `azure-identity` and `azure-storage-blob` packages.
77
+ Uses the async versions from azure.storage.blob.aio and azure.identity.aio.
78
+ """
79
+ try:
80
+ from azure.identity.aio import ClientSecretCredential # type: ignore
81
+ from azure.storage.blob.aio import BlobServiceClient # type: ignore
82
+ except ImportError:
83
+ raise ImportError(
84
+ "The `azure-identity` and `azure-storage-blob` packages are not installed. "
85
+ "Please install them via `pip install azure-identity azure-storage-blob`."
86
+ )
87
+
88
+ credential = ClientSecretCredential(
89
+ tenant_id=azure_config.tenant_id,
90
+ client_id=azure_config.client_id,
91
+ client_secret=azure_config.client_secret,
92
+ )
93
+
94
+ blob_service = BlobServiceClient(
95
+ account_url=f"https://{azure_config.storage_account}.blob.core.windows.net",
96
+ credential=credential,
97
+ )
98
+
99
+ return blob_service
100
+
101
+ def _build_azure_metadata(
102
+ self,
103
+ azure_config: AzureBlobConfig,
104
+ blob_name: str,
105
+ file_name: str,
106
+ ) -> Dict[str, str]:
107
+ """Build Azure Blob-specific metadata dictionary."""
108
+ return {
109
+ "source_type": "azure_blob",
110
+ "source_config_id": azure_config.id,
111
+ "source_config_name": azure_config.name,
112
+ "azure_storage_account": azure_config.storage_account,
113
+ "azure_container": azure_config.container,
114
+ "azure_blob_name": blob_name,
115
+ "azure_filename": file_name,
116
+ }
117
+
118
+ def _build_azure_virtual_path(
119
+ self,
120
+ storage_account: str,
121
+ container: str,
122
+ blob_name: str,
123
+ ) -> str:
124
+ """Build virtual path for Azure Blob content."""
125
+ return f"azure://{storage_account}/{container}/{blob_name}"
126
+
127
+ def _get_azure_root_path(self, remote_content: AzureBlobContent) -> str:
128
+ """Get the root path for computing relative paths."""
129
+ return remote_content.prefix or ""
130
+
131
+ # ==========================================
132
+ # AZURE BLOB LOADERS
133
+ # ==========================================
134
+
135
+ async def _aload_from_azure_blob(
136
+ self,
137
+ content: Content,
138
+ upsert: bool,
139
+ skip_if_exists: bool,
140
+ config: Optional[RemoteContentConfig] = None,
141
+ ):
142
+ """Load content from Azure Blob Storage (async).
143
+
144
+ Requires the AzureBlobConfig to contain tenant_id, client_id, client_secret,
145
+ storage_account, and container.
146
+
147
+ Uses the async Azure SDK to avoid blocking the event loop.
148
+ """
149
+ remote_content: AzureBlobContent = cast(AzureBlobContent, content.remote_content)
150
+ azure_config = self._validate_azure_config(content, config)
151
+ if azure_config is None:
152
+ return
153
+
154
+ # Get async blob service client
155
+ try:
156
+ blob_service = self._get_azure_blob_client_async(azure_config)
157
+ except ImportError as e:
158
+ log_error(str(e))
159
+ return
160
+ except Exception as e:
161
+ log_error(f"Error creating Azure Blob client: {e}")
162
+ return
163
+
164
+ # Use async context manager for proper resource cleanup
165
+ async with blob_service:
166
+ container_client = blob_service.get_container_client(azure_config.container)
167
+
168
+ # Helper to list blobs with a given prefix (async)
169
+ async def list_blobs_with_prefix(prefix: str) -> List[Dict[str, Any]]:
170
+ """List all blobs under a given prefix (folder)."""
171
+ results: List[Dict[str, Any]] = []
172
+ normalized_prefix = prefix.rstrip("/") + "/" if not prefix.endswith("/") else prefix
173
+ async for blob in container_client.list_blobs(name_starts_with=normalized_prefix):
174
+ if not blob.name.endswith("/"):
175
+ results.append(
176
+ {
177
+ "name": blob.name,
178
+ "size": blob.size,
179
+ "content_type": blob.content_settings.content_type if blob.content_settings else None,
180
+ }
181
+ )
182
+ return results
183
+
184
+ # Identify blobs to process
185
+ blobs_to_process: List[Dict[str, Any]] = []
186
+
187
+ try:
188
+ if remote_content.blob_name:
189
+ blob_client = container_client.get_blob_client(remote_content.blob_name)
190
+ try:
191
+ props = await blob_client.get_blob_properties()
192
+ blobs_to_process.append(
193
+ {
194
+ "name": remote_content.blob_name,
195
+ "size": props.size,
196
+ "content_type": props.content_settings.content_type if props.content_settings else None,
197
+ }
198
+ )
199
+ except Exception:
200
+ log_debug(f"Blob {remote_content.blob_name} not found, checking if it's a folder...")
201
+ blobs_to_process = await list_blobs_with_prefix(remote_content.blob_name)
202
+ if not blobs_to_process:
203
+ log_error(
204
+ f"No blob or folder found at path: {remote_content.blob_name}. "
205
+ "If this is a folder, ensure files exist inside it."
206
+ )
207
+ return
208
+ elif remote_content.prefix:
209
+ blobs_to_process = await list_blobs_with_prefix(remote_content.prefix)
210
+ except Exception as e:
211
+ log_error(f"Error listing Azure blobs: {e}")
212
+ return
213
+
214
+ if not blobs_to_process:
215
+ log_warning(f"No blobs found in Azure container: {azure_config.container}")
216
+ return
217
+
218
+ log_info(f"Processing {len(blobs_to_process)} file(s) from Azure Blob Storage")
219
+ is_folder_upload = len(blobs_to_process) > 1
220
+ root_path = self._get_azure_root_path(remote_content)
221
+
222
+ for blob_info in blobs_to_process:
223
+ blob_name = blob_info["name"]
224
+ file_name = blob_name.split("/")[-1]
225
+
226
+ # Build metadata and virtual path using helpers
227
+ virtual_path = self._build_azure_virtual_path(
228
+ azure_config.storage_account, azure_config.container, blob_name
229
+ )
230
+ azure_metadata = self._build_azure_metadata(azure_config, blob_name, file_name)
231
+ merged_metadata = self._merge_metadata(azure_metadata, content.metadata)
232
+
233
+ # Compute content name using base helper
234
+ content_name = self._compute_content_name(
235
+ blob_name, file_name, content.name, root_path, is_folder_upload
236
+ )
237
+
238
+ # Create content entry using base helper
239
+ content_entry = self._create_content_entry(
240
+ content, content_name, virtual_path, merged_metadata, "azure_blob", is_folder_upload
241
+ )
242
+
243
+ await self._ainsert_contents_db(content_entry)
244
+
245
+ if self._should_skip(content_entry.content_hash, skip_if_exists):
246
+ content_entry.status = ContentStatus.COMPLETED
247
+ await self._aupdate_content(content_entry)
248
+ continue
249
+
250
+ # Download blob (async)
251
+ try:
252
+ blob_client = container_client.get_blob_client(blob_name)
253
+ download_stream = await blob_client.download_blob()
254
+ blob_data = await download_stream.readall()
255
+ file_content = BytesIO(blob_data)
256
+ except Exception as e:
257
+ log_error(f"Error downloading Azure blob {blob_name}: {e}")
258
+ content_entry.status = ContentStatus.FAILED
259
+ content_entry.status_message = str(e)
260
+ await self._aupdate_content(content_entry)
261
+ continue
262
+
263
+ # Select reader and read content
264
+ reader = self._select_reader_by_uri(file_name, content.reader)
265
+ if reader is None:
266
+ log_warning(f"No reader found for file: {file_name}")
267
+ content_entry.status = ContentStatus.FAILED
268
+ content_entry.status_message = "No suitable reader found"
269
+ await self._aupdate_content(content_entry)
270
+ continue
271
+
272
+ reader = cast(Reader, reader)
273
+ read_documents = await reader.async_read(file_content, name=file_name)
274
+
275
+ # Prepare and insert into vector database
276
+ if not content_entry.id:
277
+ content_entry.id = generate_id(content_entry.content_hash or "")
278
+ self._prepare_documents_for_insert(read_documents, content_entry.id)
279
+ await self._ahandle_vector_db_insert(content_entry, read_documents, upsert)
280
+
281
+ def _load_from_azure_blob(
282
+ self,
283
+ content: Content,
284
+ upsert: bool,
285
+ skip_if_exists: bool,
286
+ config: Optional[RemoteContentConfig] = None,
287
+ ):
288
+ """Load content from Azure Blob Storage (sync).
289
+
290
+ Requires the AzureBlobConfig to contain tenant_id, client_id, client_secret,
291
+ storage_account, and container.
292
+ """
293
+ remote_content: AzureBlobContent = cast(AzureBlobContent, content.remote_content)
294
+ azure_config = self._validate_azure_config(content, config)
295
+ if azure_config is None:
296
+ return
297
+
298
+ # Get blob service client
299
+ try:
300
+ blob_service = self._get_azure_blob_client(azure_config)
301
+ except ImportError as e:
302
+ log_error(str(e))
303
+ return
304
+ except Exception as e:
305
+ log_error(f"Error creating Azure Blob client: {e}")
306
+ return
307
+
308
+ # Use context manager for proper resource cleanup
309
+ with blob_service:
310
+ container_client = blob_service.get_container_client(azure_config.container)
311
+
312
+ # Helper to list blobs with a given prefix
313
+ def list_blobs_with_prefix(prefix: str) -> List[Dict[str, Any]]:
314
+ """List all blobs under a given prefix (folder)."""
315
+ results: List[Dict[str, Any]] = []
316
+ normalized_prefix = prefix.rstrip("/") + "/" if not prefix.endswith("/") else prefix
317
+ blobs = container_client.list_blobs(name_starts_with=normalized_prefix)
318
+ for blob in blobs:
319
+ if not blob.name.endswith("/"):
320
+ results.append(
321
+ {
322
+ "name": blob.name,
323
+ "size": blob.size,
324
+ "content_type": blob.content_settings.content_type if blob.content_settings else None,
325
+ }
326
+ )
327
+ return results
328
+
329
+ # Identify blobs to process
330
+ blobs_to_process: List[Dict[str, Any]] = []
331
+
332
+ try:
333
+ if remote_content.blob_name:
334
+ blob_client = container_client.get_blob_client(remote_content.blob_name)
335
+ try:
336
+ props = blob_client.get_blob_properties()
337
+ blobs_to_process.append(
338
+ {
339
+ "name": remote_content.blob_name,
340
+ "size": props.size,
341
+ "content_type": props.content_settings.content_type if props.content_settings else None,
342
+ }
343
+ )
344
+ except Exception:
345
+ log_debug(f"Blob {remote_content.blob_name} not found, checking if it's a folder...")
346
+ blobs_to_process = list_blobs_with_prefix(remote_content.blob_name)
347
+ if not blobs_to_process:
348
+ log_error(
349
+ f"No blob or folder found at path: {remote_content.blob_name}. "
350
+ "If this is a folder, ensure files exist inside it."
351
+ )
352
+ return
353
+ elif remote_content.prefix:
354
+ blobs_to_process = list_blobs_with_prefix(remote_content.prefix)
355
+ except Exception as e:
356
+ log_error(f"Error listing Azure blobs: {e}")
357
+ return
358
+
359
+ if not blobs_to_process:
360
+ log_warning(f"No blobs found in Azure container: {azure_config.container}")
361
+ return
362
+
363
+ log_info(f"Processing {len(blobs_to_process)} file(s) from Azure Blob Storage")
364
+ is_folder_upload = len(blobs_to_process) > 1
365
+ root_path = self._get_azure_root_path(remote_content)
366
+
367
+ for blob_info in blobs_to_process:
368
+ blob_name = blob_info["name"]
369
+ file_name = blob_name.split("/")[-1]
370
+
371
+ # Build metadata and virtual path using helpers
372
+ virtual_path = self._build_azure_virtual_path(
373
+ azure_config.storage_account, azure_config.container, blob_name
374
+ )
375
+ azure_metadata = self._build_azure_metadata(azure_config, blob_name, file_name)
376
+ merged_metadata = self._merge_metadata(azure_metadata, content.metadata)
377
+
378
+ # Compute content name using base helper
379
+ content_name = self._compute_content_name(
380
+ blob_name, file_name, content.name, root_path, is_folder_upload
381
+ )
382
+
383
+ # Create content entry using base helper
384
+ content_entry = self._create_content_entry(
385
+ content, content_name, virtual_path, merged_metadata, "azure_blob", is_folder_upload
386
+ )
387
+
388
+ self._insert_contents_db(content_entry)
389
+
390
+ if self._should_skip(content_entry.content_hash, skip_if_exists):
391
+ content_entry.status = ContentStatus.COMPLETED
392
+ self._update_content(content_entry)
393
+ continue
394
+
395
+ # Download blob
396
+ try:
397
+ blob_client = container_client.get_blob_client(blob_name)
398
+ download_stream = blob_client.download_blob()
399
+ file_content = BytesIO(download_stream.readall())
400
+ except Exception as e:
401
+ log_error(f"Error downloading Azure blob {blob_name}: {e}")
402
+ content_entry.status = ContentStatus.FAILED
403
+ content_entry.status_message = str(e)
404
+ self._update_content(content_entry)
405
+ continue
406
+
407
+ # Select reader and read content
408
+ reader = self._select_reader_by_uri(file_name, content.reader)
409
+ if reader is None:
410
+ log_warning(f"No reader found for file: {file_name}")
411
+ content_entry.status = ContentStatus.FAILED
412
+ content_entry.status_message = "No suitable reader found"
413
+ self._update_content(content_entry)
414
+ continue
415
+
416
+ reader = cast(Reader, reader)
417
+ read_documents = reader.read(file_content, name=file_name)
418
+
419
+ # Prepare and insert into vector database
420
+ if not content_entry.id:
421
+ content_entry.id = generate_id(content_entry.content_hash or "")
422
+ self._prepare_documents_for_insert(read_documents, content_entry.id)
423
+ self._handle_vector_db_insert(content_entry, read_documents, upsert)
@@ -0,0 +1,187 @@
1
+ """Base loader class with shared utilities for all content loaders.
2
+
3
+ Provides common helpers for:
4
+ - Computing content names for files
5
+ - Creating Content entries
6
+ - Building metadata dictionaries
7
+ """
8
+
9
+ from dataclasses import dataclass
10
+ from typing import Any, Dict, List, Optional
11
+
12
+ from agno.knowledge.content import Content, ContentStatus
13
+ from agno.utils.string import generate_id
14
+
15
+
16
+ @dataclass
17
+ class FileToProcess:
18
+ """Represents a file identified for processing."""
19
+
20
+ path: str
21
+ name: str
22
+ size: Optional[int] = None
23
+ content_type: Optional[str] = None
24
+
25
+
26
+ class BaseLoader:
27
+ """Base class with shared loader utilities.
28
+
29
+ This class provides common methods used by all content loaders to reduce
30
+ code duplication between sync and async implementations.
31
+
32
+ Methods that call self._build_content_hash() assume they are mixed into
33
+ a class that provides this method (e.g., Knowledge via RemoteKnowledge).
34
+ """
35
+
36
+ def _compute_content_name(
37
+ self,
38
+ file_path: str,
39
+ file_name: str,
40
+ base_name: Optional[str],
41
+ root_path: str,
42
+ is_folder_upload: bool,
43
+ ) -> str:
44
+ """Compute the content name for a file.
45
+
46
+ Args:
47
+ file_path: Full path to the file
48
+ file_name: Name of the file
49
+ base_name: User-provided base name for the content
50
+ root_path: Root path of the upload (for computing relative paths)
51
+ is_folder_upload: Whether this is part of a folder upload
52
+
53
+ Returns:
54
+ The computed content name
55
+ """
56
+ if is_folder_upload:
57
+ relative_path = file_path
58
+ if root_path and file_path.startswith(root_path + "/"):
59
+ relative_path = file_path[len(root_path) + 1 :]
60
+ return f"{base_name}/{relative_path}" if base_name else file_path
61
+ return base_name or file_name
62
+
63
+ def _create_content_entry_for_folder(
64
+ self,
65
+ content: Content,
66
+ content_name: str,
67
+ virtual_path: str,
68
+ metadata: Dict[str, Any],
69
+ file_type: str,
70
+ ) -> Content:
71
+ """Create a new Content entry for a file in a folder upload.
72
+
73
+ Args:
74
+ content: Original content object (used for description)
75
+ content_name: Name for the new content entry
76
+ virtual_path: Virtual path for hashing
77
+ metadata: Metadata dictionary
78
+ file_type: Type of file (e.g., 'github', 'azure_blob')
79
+
80
+ Returns:
81
+ New Content entry with hash and ID set
82
+ """
83
+ entry = Content(
84
+ name=content_name,
85
+ description=content.description,
86
+ path=virtual_path,
87
+ status=ContentStatus.PROCESSING,
88
+ metadata=metadata,
89
+ file_type=file_type,
90
+ )
91
+ entry.content_hash = self._build_content_hash(entry) # type: ignore[attr-defined]
92
+ entry.id = generate_id(entry.content_hash)
93
+ return entry
94
+
95
+ def _update_content_entry_for_single_file(
96
+ self,
97
+ content: Content,
98
+ virtual_path: str,
99
+ metadata: Dict[str, Any],
100
+ file_type: str,
101
+ ) -> Content:
102
+ """Update an existing Content entry for a single file upload.
103
+
104
+ Args:
105
+ content: Original content object to update
106
+ virtual_path: Virtual path for hashing
107
+ metadata: Metadata dictionary
108
+ file_type: Type of file (e.g., 'github', 'azure_blob')
109
+
110
+ Returns:
111
+ Updated Content entry with hash and ID set if not already present
112
+ """
113
+ content.path = virtual_path
114
+ content.status = ContentStatus.PROCESSING
115
+ content.metadata = metadata
116
+ content.file_type = file_type
117
+ if not content.content_hash:
118
+ content.content_hash = self._build_content_hash(content) # type: ignore[attr-defined]
119
+ if not content.id:
120
+ content.id = generate_id(content.content_hash)
121
+ return content
122
+
123
+ def _create_content_entry(
124
+ self,
125
+ content: Content,
126
+ content_name: str,
127
+ virtual_path: str,
128
+ metadata: Dict[str, Any],
129
+ file_type: str,
130
+ is_folder_upload: bool,
131
+ ) -> Content:
132
+ """Create or update a Content entry for a file.
133
+
134
+ For folder uploads, creates a new Content entry.
135
+ For single file uploads, updates the original Content object.
136
+
137
+ Args:
138
+ content: Original content object
139
+ content_name: Name for the content entry
140
+ virtual_path: Virtual path for hashing
141
+ metadata: Metadata dictionary
142
+ file_type: Type of file (e.g., 'github', 'azure_blob')
143
+ is_folder_upload: Whether this is part of a folder upload
144
+
145
+ Returns:
146
+ Content entry with hash and ID set
147
+ """
148
+ if is_folder_upload:
149
+ return self._create_content_entry_for_folder(content, content_name, virtual_path, metadata, file_type)
150
+ return self._update_content_entry_for_single_file(content, virtual_path, metadata, file_type)
151
+
152
+ def _merge_metadata(
153
+ self,
154
+ provider_metadata: Dict[str, str],
155
+ user_metadata: Optional[Dict[str, Any]],
156
+ ) -> Dict[str, Any]:
157
+ """Merge provider metadata with user-provided metadata.
158
+
159
+ User metadata takes precedence over provider metadata.
160
+
161
+ Args:
162
+ provider_metadata: Metadata from the provider (e.g., GitHub, Azure)
163
+ user_metadata: User-provided metadata
164
+
165
+ Returns:
166
+ Merged metadata dictionary
167
+ """
168
+ return {**provider_metadata, **(user_metadata or {})}
169
+
170
+ def _files_to_dict_list(self, files: List[FileToProcess]) -> List[Dict[str, Any]]:
171
+ """Convert FileToProcess objects to dict list for compatibility.
172
+
173
+ Args:
174
+ files: List of FileToProcess objects
175
+
176
+ Returns:
177
+ List of dictionaries with file info
178
+ """
179
+ return [
180
+ {
181
+ "path": f.path,
182
+ "name": f.name,
183
+ "size": f.size,
184
+ "content_type": f.content_type,
185
+ }
186
+ for f in files
187
+ ]