agent-runtime-core 0.7.1__py3-none-any.whl → 0.9.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -24,11 +24,15 @@ import json
24
24
  import os
25
25
  from datetime import datetime
26
26
  from pathlib import Path
27
- from typing import Any, Callable, Optional
27
+ from typing import Any, Callable, Optional, TYPE_CHECKING
28
28
  from uuid import UUID, uuid4
29
29
 
30
30
  from agent_runtime_core.interfaces import EventType, Message, ToolRegistry
31
31
 
32
+ if TYPE_CHECKING:
33
+ from agent_runtime_core.multi_agent import SystemContext
34
+ from agent_runtime_core.privacy import PrivacyConfig, UserContext
35
+
32
36
 
33
37
  class InMemoryRunContext:
34
38
  """
@@ -65,10 +69,13 @@ class InMemoryRunContext:
65
69
  metadata: Optional[dict] = None,
66
70
  tool_registry: Optional[ToolRegistry] = None,
67
71
  on_event: Optional[Callable[[str, dict], None]] = None,
72
+ system_context: Optional["SystemContext"] = None,
73
+ user_context: Optional["UserContext"] = None,
74
+ privacy_config: Optional["PrivacyConfig"] = None,
68
75
  ):
69
76
  """
70
77
  Initialize an in-memory run context.
71
-
78
+
72
79
  Args:
73
80
  run_id: Unique identifier for this run (auto-generated if not provided)
74
81
  conversation_id: Associated conversation ID (optional)
@@ -77,6 +84,9 @@ class InMemoryRunContext:
77
84
  metadata: Run metadata
78
85
  tool_registry: Registry of available tools
79
86
  on_event: Optional callback for events (for testing/debugging)
87
+ system_context: Optional SystemContext for multi-agent systems with shared knowledge
88
+ user_context: Optional UserContext for user isolation and privacy
89
+ privacy_config: Optional PrivacyConfig for privacy settings (defaults to max privacy)
80
90
  """
81
91
  self._run_id = run_id or uuid4()
82
92
  self._conversation_id = conversation_id
@@ -88,7 +98,18 @@ class InMemoryRunContext:
88
98
  self._state: Optional[dict] = None
89
99
  self._events: list[dict] = []
90
100
  self._on_event = on_event
91
-
101
+ self._system_context = system_context
102
+
103
+ # Import here to avoid circular imports
104
+ from agent_runtime_core.privacy import (
105
+ DEFAULT_PRIVACY_CONFIG,
106
+ ANONYMOUS_USER,
107
+ )
108
+
109
+ # Default to secure settings: anonymous user + strict privacy
110
+ self._user_context = user_context if user_context is not None else ANONYMOUS_USER
111
+ self._privacy_config = privacy_config if privacy_config is not None else DEFAULT_PRIVACY_CONFIG
112
+
92
113
  @property
93
114
  def run_id(self) -> UUID:
94
115
  """Unique identifier for this run."""
@@ -118,7 +139,22 @@ class InMemoryRunContext:
118
139
  def tool_registry(self) -> ToolRegistry:
119
140
  """Registry of available tools for this agent."""
120
141
  return self._tool_registry
121
-
142
+
143
+ @property
144
+ def system_context(self) -> Optional["SystemContext"]:
145
+ """System context for multi-agent systems with shared knowledge."""
146
+ return self._system_context
147
+
148
+ @property
149
+ def user_context(self) -> "UserContext":
150
+ """User context for privacy and data isolation. Defaults to ANONYMOUS_USER."""
151
+ return self._user_context
152
+
153
+ @property
154
+ def privacy_config(self) -> "PrivacyConfig":
155
+ """Privacy configuration for this run. Defaults to DEFAULT_PRIVACY_CONFIG (strict)."""
156
+ return self._privacy_config
157
+
122
158
  async def emit(self, event_type: EventType | str, payload: dict) -> None:
123
159
  """Emit an event (stored in memory)."""
124
160
  event_type_str = event_type.value if hasattr(event_type, 'value') else str(event_type)
@@ -195,6 +231,9 @@ class FileRunContext:
195
231
  metadata: Optional[dict] = None,
196
232
  tool_registry: Optional[ToolRegistry] = None,
197
233
  on_event: Optional[Callable[[str, dict], None]] = None,
234
+ system_context: Optional["SystemContext"] = None,
235
+ user_context: Optional["UserContext"] = None,
236
+ privacy_config: Optional["PrivacyConfig"] = None,
198
237
  ):
199
238
  """
200
239
  Initialize a file-based run context.
@@ -208,6 +247,9 @@ class FileRunContext:
208
247
  metadata: Run metadata
209
248
  tool_registry: Registry of available tools
210
249
  on_event: Optional callback for events
250
+ system_context: Optional SystemContext for multi-agent systems with shared knowledge
251
+ user_context: Optional UserContext for user isolation and privacy
252
+ privacy_config: Optional PrivacyConfig for privacy settings (defaults to max privacy)
211
253
  """
212
254
  self._run_id = run_id or uuid4()
213
255
  self._checkpoint_dir = Path(checkpoint_dir)
@@ -219,6 +261,17 @@ class FileRunContext:
219
261
  self._cancelled = False
220
262
  self._on_event = on_event
221
263
  self._state_cache: Optional[dict] = None
264
+ self._system_context = system_context
265
+
266
+ # Import here to avoid circular imports
267
+ from agent_runtime_core.privacy import (
268
+ DEFAULT_PRIVACY_CONFIG,
269
+ ANONYMOUS_USER,
270
+ )
271
+
272
+ # Default to secure settings: anonymous user + strict privacy
273
+ self._user_context = user_context if user_context is not None else ANONYMOUS_USER
274
+ self._privacy_config = privacy_config if privacy_config is not None else DEFAULT_PRIVACY_CONFIG
222
275
 
223
276
  # Ensure checkpoint directory exists
224
277
  self._checkpoint_dir.mkdir(parents=True, exist_ok=True)
@@ -253,6 +306,21 @@ class FileRunContext:
253
306
  """Registry of available tools for this agent."""
254
307
  return self._tool_registry
255
308
 
309
+ @property
310
+ def system_context(self) -> Optional["SystemContext"]:
311
+ """System context for multi-agent systems with shared knowledge."""
312
+ return self._system_context
313
+
314
+ @property
315
+ def user_context(self) -> "UserContext":
316
+ """User context for privacy and data isolation. Defaults to ANONYMOUS_USER."""
317
+ return self._user_context
318
+
319
+ @property
320
+ def privacy_config(self) -> "PrivacyConfig":
321
+ """Privacy configuration for this run. Defaults to DEFAULT_PRIVACY_CONFIG (strict)."""
322
+ return self._privacy_config
323
+
256
324
  def _checkpoint_path(self) -> Path:
257
325
  """Get the path to the checkpoint file for this run."""
258
326
  return self._checkpoint_dir / f"{self._run_id}.json"
@@ -0,0 +1,88 @@
1
+ """
2
+ File processing module for agent_runtime_core.
3
+
4
+ Provides pluggable file processors for reading various file types,
5
+ OCR integration, and AI vision capabilities.
6
+
7
+ Example:
8
+ from agent_runtime_core.files import FileProcessorRegistry, process_file
9
+
10
+ # Register processors
11
+ registry = FileProcessorRegistry()
12
+ registry.auto_register() # Register all available processors
13
+
14
+ # Process a file
15
+ result = await registry.process("document.pdf", file_bytes)
16
+ print(result.text) # Extracted text
17
+ print(result.metadata) # File metadata
18
+ """
19
+
20
+ from .base import (
21
+ FileProcessor,
22
+ FileProcessorRegistry,
23
+ ProcessedFile,
24
+ FileType,
25
+ ProcessingOptions,
26
+ )
27
+ from .processors import (
28
+ TextFileProcessor,
29
+ PDFProcessor,
30
+ ImageProcessor,
31
+ DocxProcessor,
32
+ XlsxProcessor,
33
+ CsvProcessor,
34
+ )
35
+ from .ocr import (
36
+ OCRProvider,
37
+ TesseractOCR,
38
+ GoogleVisionOCR,
39
+ AWSTextractOCR,
40
+ AzureDocumentOCR,
41
+ )
42
+ from .vision import (
43
+ VisionProvider,
44
+ OpenAIVision,
45
+ AnthropicVision,
46
+ GeminiVision,
47
+ )
48
+ from .tools import (
49
+ FileTools,
50
+ FileToolsConfig,
51
+ get_file_read_schema,
52
+ get_file_write_schema,
53
+ get_file_list_schema,
54
+ )
55
+
56
+ __all__ = [
57
+ # Base classes
58
+ "FileProcessor",
59
+ "FileProcessorRegistry",
60
+ "ProcessedFile",
61
+ "FileType",
62
+ "ProcessingOptions",
63
+ # Processors
64
+ "TextFileProcessor",
65
+ "PDFProcessor",
66
+ "ImageProcessor",
67
+ "DocxProcessor",
68
+ "XlsxProcessor",
69
+ "CsvProcessor",
70
+ # OCR
71
+ "OCRProvider",
72
+ "TesseractOCR",
73
+ "GoogleVisionOCR",
74
+ "AWSTextractOCR",
75
+ "AzureDocumentOCR",
76
+ # Vision
77
+ "VisionProvider",
78
+ "OpenAIVision",
79
+ "AnthropicVision",
80
+ "GeminiVision",
81
+ # Tools
82
+ "FileTools",
83
+ "FileToolsConfig",
84
+ "get_file_read_schema",
85
+ "get_file_write_schema",
86
+ "get_file_list_schema",
87
+ ]
88
+
@@ -0,0 +1,343 @@
1
+ """
2
+ Base classes for file processing.
3
+
4
+ Provides the FileProcessor abstract base class and registry pattern
5
+ for pluggable file type handling.
6
+ """
7
+
8
+ from abc import ABC, abstractmethod
9
+ from dataclasses import dataclass, field
10
+ from enum import Enum
11
+ from pathlib import Path
12
+ from typing import Any, Optional, Type, Union
13
+ import mimetypes
14
+
15
+
16
+ class FileType(str, Enum):
17
+ """Supported file types."""
18
+ TEXT = "text"
19
+ PDF = "pdf"
20
+ IMAGE = "image"
21
+ DOCX = "docx"
22
+ XLSX = "xlsx"
23
+ CSV = "csv"
24
+ JSON = "json"
25
+ MARKDOWN = "markdown"
26
+ HTML = "html"
27
+ UNKNOWN = "unknown"
28
+
29
+
30
+ @dataclass
31
+ class ProcessingOptions:
32
+ """Options for file processing."""
33
+ # General options
34
+ max_size_bytes: int = 100 * 1024 * 1024 # 100MB default
35
+ extract_text: bool = True
36
+ extract_metadata: bool = True
37
+
38
+ # OCR options
39
+ use_ocr: bool = False
40
+ ocr_provider: Optional[str] = None # tesseract, google, aws, azure
41
+ ocr_language: str = "eng"
42
+
43
+ # Vision AI options
44
+ use_vision: bool = False
45
+ vision_provider: Optional[str] = None # openai, anthropic, gemini
46
+ vision_prompt: Optional[str] = None # Custom prompt for vision analysis
47
+
48
+ # Image options
49
+ generate_thumbnail: bool = True
50
+ thumbnail_size: tuple[int, int] = (200, 200)
51
+
52
+ # PDF options
53
+ pdf_extract_images: bool = False
54
+ pdf_page_limit: Optional[int] = None # Limit pages to process
55
+
56
+ # Additional provider-specific options
57
+ extra: dict = field(default_factory=dict)
58
+
59
+
60
+ @dataclass
61
+ class ProcessedFile:
62
+ """Result of processing a file."""
63
+ # Core data
64
+ filename: str
65
+ file_type: FileType
66
+ mime_type: str
67
+ size_bytes: int
68
+
69
+ # Extracted content
70
+ text: str = ""
71
+ text_chunks: list[str] = field(default_factory=list) # For chunked processing
72
+
73
+ # Metadata
74
+ metadata: dict = field(default_factory=dict)
75
+
76
+ # Visual data
77
+ thumbnail_base64: Optional[str] = None
78
+ preview_url: Optional[str] = None
79
+
80
+ # OCR/Vision results
81
+ ocr_text: Optional[str] = None
82
+ vision_description: Optional[str] = None
83
+ vision_analysis: Optional[dict] = None
84
+
85
+ # Processing info
86
+ processor_used: str = ""
87
+ processing_time_ms: float = 0
88
+ warnings: list[str] = field(default_factory=list)
89
+
90
+ # Raw data (optional, for further processing)
91
+ raw_content: Optional[bytes] = None
92
+
93
+
94
+ class FileProcessor(ABC):
95
+ """
96
+ Abstract base class for file processors.
97
+
98
+ Subclass this to create processors for specific file types.
99
+ Each processor declares which file types and MIME types it handles.
100
+ """
101
+
102
+ @property
103
+ @abstractmethod
104
+ def name(self) -> str:
105
+ """Unique name for this processor."""
106
+ ...
107
+
108
+ @property
109
+ @abstractmethod
110
+ def supported_types(self) -> list[FileType]:
111
+ """List of FileType enums this processor handles."""
112
+ ...
113
+
114
+ @property
115
+ @abstractmethod
116
+ def supported_extensions(self) -> list[str]:
117
+ """List of file extensions this processor handles (e.g., ['.pdf', '.PDF'])."""
118
+ ...
119
+
120
+ @property
121
+ def supported_mime_types(self) -> list[str]:
122
+ """List of MIME types this processor handles. Override if needed."""
123
+ return []
124
+
125
+ @abstractmethod
126
+ async def process(
127
+ self,
128
+ content: bytes,
129
+ filename: str,
130
+ options: ProcessingOptions,
131
+ ) -> ProcessedFile:
132
+ """
133
+ Process file content and extract text/metadata.
134
+
135
+ Args:
136
+ content: Raw file bytes
137
+ filename: Original filename
138
+ options: Processing options
139
+
140
+ Returns:
141
+ ProcessedFile with extracted content
142
+ """
143
+ ...
144
+
145
+ def can_process(self, filename: str, mime_type: Optional[str] = None) -> bool:
146
+ """Check if this processor can handle the given file."""
147
+ ext = Path(filename).suffix.lower()
148
+ if ext in [e.lower() for e in self.supported_extensions]:
149
+ return True
150
+ if mime_type and mime_type in self.supported_mime_types:
151
+ return True
152
+ return False
153
+
154
+
155
+ class FileProcessorRegistry:
156
+ """
157
+ Registry of file processors.
158
+
159
+ Manages processor registration and selection based on file type.
160
+ """
161
+
162
+ def __init__(self):
163
+ self._processors: dict[str, FileProcessor] = {}
164
+ self._type_map: dict[FileType, list[str]] = {}
165
+ self._extension_map: dict[str, str] = {}
166
+
167
+ def register(self, processor: FileProcessor) -> None:
168
+ """Register a file processor."""
169
+ self._processors[processor.name] = processor
170
+
171
+ # Map file types to processor
172
+ for file_type in processor.supported_types:
173
+ if file_type not in self._type_map:
174
+ self._type_map[file_type] = []
175
+ self._type_map[file_type].append(processor.name)
176
+
177
+ # Map extensions to processor
178
+ for ext in processor.supported_extensions:
179
+ self._extension_map[ext.lower()] = processor.name
180
+
181
+ def get(self, name: str) -> Optional[FileProcessor]:
182
+ """Get a processor by name."""
183
+ return self._processors.get(name)
184
+
185
+ def get_for_file(
186
+ self,
187
+ filename: str,
188
+ mime_type: Optional[str] = None,
189
+ ) -> Optional[FileProcessor]:
190
+ """Get the best processor for a file."""
191
+ ext = Path(filename).suffix.lower()
192
+
193
+ # Try extension first
194
+ if ext in self._extension_map:
195
+ return self._processors[self._extension_map[ext]]
196
+
197
+ # Try MIME type
198
+ if mime_type:
199
+ for processor in self._processors.values():
200
+ if mime_type in processor.supported_mime_types:
201
+ return processor
202
+
203
+ # Guess MIME type from filename
204
+ guessed_mime, _ = mimetypes.guess_type(filename)
205
+ if guessed_mime:
206
+ for processor in self._processors.values():
207
+ if guessed_mime in processor.supported_mime_types:
208
+ return processor
209
+
210
+ return None
211
+
212
+ async def process(
213
+ self,
214
+ filename: str,
215
+ content: bytes,
216
+ options: Optional[ProcessingOptions] = None,
217
+ mime_type: Optional[str] = None,
218
+ ) -> ProcessedFile:
219
+ """
220
+ Process a file using the appropriate processor.
221
+
222
+ Args:
223
+ filename: Original filename
224
+ content: Raw file bytes
225
+ options: Processing options (uses defaults if not provided)
226
+ mime_type: Optional MIME type hint
227
+
228
+ Returns:
229
+ ProcessedFile with extracted content
230
+
231
+ Raises:
232
+ ValueError: If no processor found for file type
233
+ ValueError: If file exceeds size limit
234
+ """
235
+ if options is None:
236
+ options = ProcessingOptions()
237
+
238
+ # Check size limit
239
+ if len(content) > options.max_size_bytes:
240
+ raise ValueError(
241
+ f"File size ({len(content)} bytes) exceeds limit "
242
+ f"({options.max_size_bytes} bytes)"
243
+ )
244
+
245
+ # Find processor
246
+ processor = self.get_for_file(filename, mime_type)
247
+ if not processor:
248
+ raise ValueError(f"No processor found for file: {filename}")
249
+
250
+ # Process
251
+ return await processor.process(content, filename, options)
252
+
253
+ def list_processors(self) -> list[FileProcessor]:
254
+ """List all registered processors."""
255
+ return list(self._processors.values())
256
+
257
+ def supported_extensions(self) -> list[str]:
258
+ """List all supported file extensions."""
259
+ return list(self._extension_map.keys())
260
+
261
+ def auto_register(self) -> None:
262
+ """
263
+ Auto-register all available processors.
264
+
265
+ Registers built-in processors and checks for optional dependencies.
266
+ """
267
+ from .processors import (
268
+ TextFileProcessor,
269
+ PDFProcessor,
270
+ ImageProcessor,
271
+ DocxProcessor,
272
+ XlsxProcessor,
273
+ CsvProcessor,
274
+ )
275
+
276
+ # Always available
277
+ self.register(TextFileProcessor())
278
+ self.register(CsvProcessor())
279
+
280
+ # Check for optional dependencies
281
+ try:
282
+ import pypdf
283
+ self.register(PDFProcessor())
284
+ except ImportError:
285
+ pass
286
+
287
+ try:
288
+ from PIL import Image
289
+ self.register(ImageProcessor())
290
+ except ImportError:
291
+ pass
292
+
293
+ try:
294
+ import docx
295
+ self.register(DocxProcessor())
296
+ except ImportError:
297
+ pass
298
+
299
+ try:
300
+ import openpyxl
301
+ self.register(XlsxProcessor())
302
+ except ImportError:
303
+ pass
304
+
305
+
306
+ def detect_file_type(filename: str, content: Optional[bytes] = None) -> FileType:
307
+ """
308
+ Detect file type from filename and optionally content.
309
+
310
+ Args:
311
+ filename: Filename with extension
312
+ content: Optional file content for magic number detection
313
+
314
+ Returns:
315
+ Detected FileType
316
+ """
317
+ ext = Path(filename).suffix.lower()
318
+
319
+ extension_map = {
320
+ ".txt": FileType.TEXT,
321
+ ".text": FileType.TEXT,
322
+ ".log": FileType.TEXT,
323
+ ".pdf": FileType.PDF,
324
+ ".png": FileType.IMAGE,
325
+ ".jpg": FileType.IMAGE,
326
+ ".jpeg": FileType.IMAGE,
327
+ ".gif": FileType.IMAGE,
328
+ ".webp": FileType.IMAGE,
329
+ ".bmp": FileType.IMAGE,
330
+ ".docx": FileType.DOCX,
331
+ ".doc": FileType.DOCX,
332
+ ".xlsx": FileType.XLSX,
333
+ ".xls": FileType.XLSX,
334
+ ".csv": FileType.CSV,
335
+ ".json": FileType.JSON,
336
+ ".md": FileType.MARKDOWN,
337
+ ".markdown": FileType.MARKDOWN,
338
+ ".html": FileType.HTML,
339
+ ".htm": FileType.HTML,
340
+ }
341
+
342
+ return extension_map.get(ext, FileType.UNKNOWN)
343
+