slide-narrator 5.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,536 @@
1
+ """File storage implementation"""
2
+ from abc import ABC, abstractmethod
3
+ from typing import Optional, Dict, Any, Set, List, Tuple
4
+ from pathlib import Path
5
+ import os
6
+ import uuid
7
+ import shutil
8
+ import hashlib
9
+ import asyncio
10
+ import mimetypes
11
+ from datetime import datetime, UTC
12
+ from sqlalchemy import select
13
+ from ..utils.logging import get_logger
14
+ import filetype
15
+ import base64
16
+
17
+ # Get configured logger
18
+ logger = get_logger(__name__)
19
+
20
+ class FileStoreError(Exception):
21
+ """Base exception for file store errors"""
22
+ pass
23
+
24
+ class FileNotFoundError(FileStoreError):
25
+ """Raised when a file is not found in storage"""
26
+ pass
27
+
28
+ class StorageFullError(FileStoreError):
29
+ """Raised when storage capacity is exceeded"""
30
+ pass
31
+
32
+ class UnsupportedFileTypeError(FileStoreError):
33
+ """Raised when file type is not allowed"""
34
+ pass
35
+
36
+ class FileTooLargeError(FileStoreError):
37
+ """Raised when file exceeds size limit"""
38
+ pass
39
+
40
+ class FileStore:
41
+ """
42
+ File storage implementation
43
+
44
+ The recommended way to create a FileStore is using the factory method:
45
+
46
+ ```python
47
+ # Create with default settings
48
+ store = await FileStore.create()
49
+
50
+ # Specify custom path
51
+ store = await FileStore.create("/path/to/files")
52
+
53
+ # Customize all settings
54
+ store = await FileStore.create(
55
+ base_path="/path/to/files",
56
+ max_file_size=100*1024*1024, # 100MB
57
+ allowed_mime_types={"image/jpeg", "image/png"},
58
+ max_storage_size=10*1024*1024*1024 # 10GB
59
+ )
60
+ ```
61
+
62
+ The factory method validates storage configuration immediately, allowing
63
+ early detection of storage access issues.
64
+ """
65
+
66
+ # Default configuration
67
+ DEFAULT_MAX_FILE_SIZE = 50 * 1024 * 1024 # 50MB
68
+ DEFAULT_MAX_STORAGE_SIZE = 5 * 1024 * 1024 * 1024 # 5GB
69
+ DEFAULT_ALLOWED_MIME_TYPES = {
70
+ # Documents
71
+ 'application/pdf',
72
+ 'application/msword',
73
+ 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
74
+ 'text/plain',
75
+ 'text/csv',
76
+ 'application/json',
77
+ # Images
78
+ 'image/jpeg',
79
+ 'image/png',
80
+ 'image/gif',
81
+ 'image/webp',
82
+ 'image/svg+xml',
83
+ # Archives
84
+ 'application/zip',
85
+ 'application/x-tar',
86
+ 'application/gzip',
87
+ # Audio formats
88
+ 'audio/mpeg',
89
+ 'audio/mp3',
90
+ 'audio/mp4',
91
+ 'audio/opus',
92
+ 'audio/ogg',
93
+ 'audio/wav',
94
+ 'audio/webm',
95
+ 'audio/aac',
96
+ 'audio/flac',
97
+ 'audio/x-m4a',
98
+ }
99
+
100
+ @classmethod
101
+ async def create(cls, base_path: Optional[str] = None, max_file_size: Optional[int] = None,
102
+ allowed_mime_types: Optional[Set[str]] = None, max_storage_size: Optional[int] = None) -> 'FileStore':
103
+ """
104
+ Factory method to create and validate a FileStore instance.
105
+
106
+ This method validates storage configuration immediately, allowing
107
+ early detection of storage access issues.
108
+
109
+ Args:
110
+ base_path: Base directory for file storage
111
+ max_file_size: Maximum allowed file size in bytes
112
+ allowed_mime_types: Set of allowed MIME types
113
+ max_storage_size: Maximum total storage size in bytes
114
+
115
+ Returns:
116
+ Initialized FileStore instance
117
+
118
+ Raises:
119
+ FileStoreError: If storage directory cannot be created or accessed
120
+ """
121
+ # Create instance
122
+ store = cls(base_path, max_file_size, allowed_mime_types, max_storage_size)
123
+
124
+ try:
125
+ # Validate storage by writing and reading a test file
126
+ test_id = str(uuid.uuid4())
127
+ test_path = store._get_file_path(test_id)
128
+ test_path.parent.mkdir(parents=True, exist_ok=True)
129
+ test_content = b"storage validation"
130
+ test_path.write_bytes(test_content)
131
+ read_content = test_path.read_bytes()
132
+ test_path.unlink() # Clean up
133
+
134
+ # Ensure content matches what we wrote
135
+ if read_content != test_content:
136
+ raise FileStoreError("Storage validation failed: content mismatch")
137
+
138
+ # Check that we can create storage stats
139
+ storage_size = await store.get_storage_size()
140
+ logger.debug(f"Storage validation successful. Current storage size: {storage_size} bytes")
141
+
142
+ return store
143
+ except Exception as e:
144
+ logger.error(f"Storage validation failed: {e}")
145
+ raise FileStoreError(f"Storage validation failed: {e}")
146
+
147
+ def __init__(self, base_path: Optional[str] = None, max_file_size: Optional[int] = None,
148
+ allowed_mime_types: Optional[Set[str]] = None, max_storage_size: Optional[int] = None):
149
+ """Initialize file store
150
+
151
+ Args:
152
+ base_path: Base directory for file storage. If not provided,
153
+ uses NARRATOR_FILE_STORAGE_PATH env var or defaults to ~/.narrator/files
154
+ max_file_size: Maximum allowed file size in bytes. If not provided,
155
+ uses NARRATOR_MAX_FILE_SIZE env var or defaults to 50MB
156
+ allowed_mime_types: Set of allowed MIME types
157
+ max_storage_size: Maximum total storage size in bytes. If not provided,
158
+ uses NARRATOR_MAX_STORAGE_SIZE env var or defaults to 5GB
159
+ """
160
+ # Set storage backend type
161
+ self.storage_backend = "local"
162
+
163
+ # Get max file size from env var or default
164
+ env_max_file_size = os.getenv('NARRATOR_MAX_FILE_SIZE')
165
+ if max_file_size is not None:
166
+ self.max_file_size = max_file_size
167
+ elif env_max_file_size is not None:
168
+ try:
169
+ self.max_file_size = int(env_max_file_size)
170
+ except ValueError:
171
+ logger.warning(f"Invalid NARRATOR_MAX_FILE_SIZE value: {env_max_file_size}. Using default.")
172
+ self.max_file_size = self.DEFAULT_MAX_FILE_SIZE
173
+ else:
174
+ self.max_file_size = self.DEFAULT_MAX_FILE_SIZE
175
+
176
+ # Get max storage size from env var or default
177
+ env_max_storage_size = os.getenv('NARRATOR_MAX_STORAGE_SIZE')
178
+ if max_storage_size is not None:
179
+ self.max_storage_size = max_storage_size
180
+ elif env_max_storage_size is not None:
181
+ try:
182
+ self.max_storage_size = int(env_max_storage_size)
183
+ except ValueError:
184
+ logger.warning(f"Invalid NARRATOR_MAX_STORAGE_SIZE value: {env_max_storage_size}. Using default.")
185
+ self.max_storage_size = self.DEFAULT_MAX_STORAGE_SIZE
186
+ else:
187
+ self.max_storage_size = self.DEFAULT_MAX_STORAGE_SIZE
188
+
189
+ # Get allowed MIME types from env var or default
190
+ env_mime_types = os.getenv('NARRATOR_ALLOWED_MIME_TYPES')
191
+ if allowed_mime_types is not None:
192
+ self.allowed_mime_types = allowed_mime_types
193
+ elif env_mime_types is not None:
194
+ try:
195
+ # Split comma-separated list and strip whitespace
196
+ mime_types = {mime.strip() for mime in env_mime_types.split(',')}
197
+ # Validate each MIME type
198
+ invalid_types = [mime for mime in mime_types if '/' not in mime]
199
+ if invalid_types:
200
+ logger.warning(f"Invalid MIME types in NARRATOR_ALLOWED_MIME_TYPES: {invalid_types}. Using default.")
201
+ self.allowed_mime_types = self.DEFAULT_ALLOWED_MIME_TYPES
202
+ else:
203
+ self.allowed_mime_types = mime_types
204
+ except Exception as e:
205
+ logger.warning(f"Error parsing NARRATOR_ALLOWED_MIME_TYPES: {e}. Using default.")
206
+ self.allowed_mime_types = self.DEFAULT_ALLOWED_MIME_TYPES
207
+ else:
208
+ self.allowed_mime_types = self.DEFAULT_ALLOWED_MIME_TYPES
209
+
210
+ if base_path:
211
+ self.base_path = Path(base_path)
212
+ else:
213
+ env_path = os.getenv('NARRATOR_FILE_STORAGE_PATH')
214
+ if env_path:
215
+ self.base_path = Path(env_path).expanduser()
216
+ else:
217
+ # Default to ~/.narrator/files
218
+ self.base_path = Path.home() / '.narrator' / 'files'
219
+
220
+ # Ensure base directory exists with proper permissions
221
+ self._ensure_directory()
222
+ logger.debug(
223
+ f"Initialized FileStore at {self.base_path} ("
224
+ f"max_file_size={self.max_file_size}, "
225
+ f"max_storage_size={self.max_storage_size}, "
226
+ f"allowed_mime_types={sorted(self.allowed_mime_types)})"
227
+ )
228
+
229
+ def _ensure_directory(self) -> None:
230
+ """Ensure the storage directory exists with proper permissions"""
231
+ try:
232
+ self.base_path.mkdir(parents=True, exist_ok=True)
233
+ # Set directory permissions to 755 (rwxr-xr-x)
234
+ self.base_path.chmod(0o755)
235
+ except Exception as e:
236
+ logger.error(f"Failed to create or set permissions on storage directory {self.base_path}: {e}")
237
+ raise FileStoreError(f"Storage directory initialization failed: {e}")
238
+
239
+ @classmethod
240
+ def get_default_path(cls) -> Path:
241
+ """Get the default file storage path based on environment or defaults"""
242
+ env_path = os.getenv('NARRATOR_FILE_STORAGE_PATH')
243
+ if env_path:
244
+ return Path(env_path).expanduser()
245
+ return Path.home() / '.narrator' / 'files'
246
+
247
+ @classmethod
248
+ def initialize_storage(cls) -> Path:
249
+ """Initialize the file storage directory
250
+
251
+ This can be called during application setup to ensure the storage
252
+ directory exists before the FileStore is instantiated.
253
+
254
+ Returns:
255
+ Path to the initialized storage directory
256
+ """
257
+ storage_path = cls.get_default_path()
258
+ storage_path.mkdir(parents=True, exist_ok=True)
259
+ storage_path.chmod(0o755)
260
+ return storage_path
261
+
262
+ async def validate_file(self, content: bytes, filename: str, mime_type: Optional[str] = None) -> str:
263
+ """Validate file content and type
264
+
265
+ Args:
266
+ content: File content as bytes
267
+ filename: Original filename
268
+ mime_type: Optional MIME type (will be detected if not provided)
269
+
270
+ Returns:
271
+ Validated MIME type
272
+
273
+ Raises:
274
+ UnsupportedFileTypeError: If file type is not allowed
275
+ FileTooLargeError: If file exceeds size limit
276
+ """
277
+ # Check file size
278
+ if len(content) > self.max_file_size:
279
+ raise FileTooLargeError(
280
+ f"File too large: {len(content)} bytes. Maximum allowed: {self.max_file_size} bytes"
281
+ )
282
+
283
+ # Detect or validate MIME type
284
+ if not mime_type:
285
+ # Primary: content-based detection
286
+ mime_type = filetype.guess_mime(content)
287
+
288
+ if not mime_type:
289
+ # Fallback: extension-based detection
290
+ mime_type, _ = mimetypes.guess_type(filename)
291
+
292
+ if not mime_type:
293
+ # Default: binary
294
+ mime_type = 'application/octet-stream'
295
+
296
+ logger.debug(f"Detected MIME type for {filename}: {mime_type}")
297
+
298
+ if mime_type not in self.allowed_mime_types:
299
+ raise UnsupportedFileTypeError(f"Unsupported file type: {mime_type}")
300
+
301
+ return mime_type
302
+
303
+ def _get_file_path(self, file_id: str, extension: Optional[str] = None) -> Path:
304
+ """Get full path for file ID using sharded directory structure"""
305
+ # Use first 2 chars of ID as subdirectory to avoid too many files in one dir
306
+ filename = file_id[2:]
307
+ if extension:
308
+ filename = f"{filename}.{extension.lstrip('.')}"
309
+ return self.base_path / file_id[:2] / filename
310
+
311
+ async def save(self, content: bytes, filename: str, mime_type: Optional[str] = None) -> Dict[str, Any]:
312
+ """Save file to storage"""
313
+ # Validate file
314
+ mime_type = await self.validate_file(content, filename, mime_type)
315
+
316
+ # Check storage capacity if limit set
317
+ if self.max_storage_size:
318
+ current_size = await self.get_storage_size()
319
+ if len(content) + current_size > self.max_storage_size:
320
+ raise StorageFullError(
321
+ f"Storage full: {current_size} bytes used, {len(content)} bytes needed, "
322
+ f"{self.max_storage_size} bytes maximum"
323
+ )
324
+
325
+ # Generate unique ID
326
+ file_id = str(uuid.uuid4())
327
+
328
+ # Get file extension from original filename
329
+ extension = Path(filename).suffix.lstrip('.')
330
+
331
+ # Get sharded path with extension
332
+ file_path = self._get_file_path(file_id, extension)
333
+ file_path.parent.mkdir(parents=True, exist_ok=True)
334
+
335
+ # Write content
336
+ file_path.write_bytes(content)
337
+
338
+ metadata = {
339
+ 'id': file_id,
340
+ 'filename': filename,
341
+ 'mime_type': mime_type,
342
+ 'storage_path': str(file_path.relative_to(self.base_path)),
343
+ 'storage_backend': 'local',
344
+ 'created_at': datetime.now(UTC),
345
+ 'metadata': {
346
+ 'size': len(content)
347
+ }
348
+ }
349
+
350
+ logger.debug(f"Saved file {filename} ({len(content)} bytes) to {file_path}")
351
+ logger.debug(f"Successfully stored attachment {filename} with MIME type {mime_type}")
352
+ return metadata
353
+
354
+ async def get(self, file_id: str, storage_path: Optional[str] = None) -> bytes:
355
+ """Get file content from storage
356
+
357
+ Args:
358
+ file_id: The unique file identifier
359
+ storage_path: Optional storage path from metadata (preferred if available)
360
+
361
+ Returns:
362
+ File content as bytes
363
+
364
+ Raises:
365
+ FileNotFoundError: If file cannot be found
366
+ """
367
+ if storage_path:
368
+ # Use the exact path from metadata if available
369
+ file_path = self.base_path / storage_path
370
+ else:
371
+ # Fallback to constructing path from ID (legacy support)
372
+ file_path = self._get_file_path(file_id)
373
+
374
+ if not file_path.exists():
375
+ raise FileNotFoundError(f"File {file_id} not found at {file_path}")
376
+
377
+ return file_path.read_bytes()
378
+
379
+ async def delete(self, file_id: str, storage_path: Optional[str] = None) -> None:
380
+ """Delete file from storage
381
+
382
+ Args:
383
+ file_id: The unique file identifier
384
+ storage_path: Optional storage path from metadata (preferred if available)
385
+ """
386
+ if storage_path:
387
+ # Use the exact path from metadata if available
388
+ file_path = self.base_path / storage_path
389
+ else:
390
+ # Fallback to constructing path from ID (legacy support)
391
+ file_path = self._get_file_path(file_id)
392
+
393
+ if not file_path.exists():
394
+ raise FileNotFoundError(f"File {file_id} not found at {file_path}")
395
+
396
+ file_path.unlink()
397
+
398
+ # Try to remove parent directory if empty
399
+ try:
400
+ file_path.parent.rmdir()
401
+ except OSError:
402
+ # Directory not empty, ignore
403
+ pass
404
+
405
+ async def get_storage_size(self) -> int:
406
+ """Get total storage size in bytes"""
407
+ total = 0
408
+ for path in self.base_path.rglob('*'):
409
+ if path.is_file():
410
+ total += path.stat().st_size
411
+ return total
412
+
413
+ async def get_file_count(self) -> int:
414
+ """Get total number of files (excluding directories)"""
415
+ count = 0
416
+ for path in self.base_path.rglob('*'):
417
+ if path.is_file():
418
+ count += 1
419
+ return count
420
+
421
+ async def check_health(self) -> Dict[str, Any]:
422
+ """Check storage health and return metrics"""
423
+ try:
424
+ total_size = await self.get_storage_size()
425
+ file_count = await self.get_file_count()
426
+ return {
427
+ 'healthy': True,
428
+ 'total_size': total_size,
429
+ 'file_count': file_count,
430
+ 'errors': []
431
+ }
432
+ except Exception as e:
433
+ return {
434
+ 'healthy': False,
435
+ 'errors': [str(e)]
436
+ }
437
+
438
+ async def batch_save(self, files: List[Tuple[bytes, str, str]]) -> List[Dict[str, Any]]:
439
+ """Save multiple files in one operation
440
+
441
+ Args:
442
+ files: List of tuples (content, filename, mime_type)
443
+
444
+ Returns:
445
+ List of file metadata dictionaries
446
+ """
447
+ return [await self.save(content, filename, mime_type)
448
+ for content, filename, mime_type in files]
449
+
450
+ async def batch_delete(self, file_ids: List[str]) -> None:
451
+ """Delete multiple files in one operation"""
452
+ await asyncio.gather(*[self.delete(file_id) for file_id in file_ids])
453
+
454
+ async def cleanup_orphaned_files(self, session) -> Tuple[int, List[str]]:
455
+ """Clean up files that aren't referenced in the database
456
+
457
+ Args:
458
+ session: SQLAlchemy async session
459
+
460
+ Returns:
461
+ Tuple of (number of files deleted, list of errors)
462
+ """
463
+ # Import here to avoid circular dependency
464
+ from ..database.models import MessageRecord
465
+
466
+ # Get all file IDs from messages
467
+ query = select(MessageRecord.attachments)
468
+ result = await session.execute(query)
469
+ db_files = set()
470
+ for row in result.scalars():
471
+ if row:
472
+ for attachment in row:
473
+ if attachment.get('file_id'):
474
+ db_files.add(attachment['file_id'])
475
+
476
+ # Get all files in storage
477
+ stored_files = await self.list_files()
478
+
479
+ # Find orphaned files
480
+ orphaned = set(stored_files) - db_files
481
+
482
+ # Delete orphaned files
483
+ errors = []
484
+ deleted = 0
485
+ for file_id in orphaned:
486
+ try:
487
+ await self.delete(file_id)
488
+ deleted += 1
489
+ except Exception as e:
490
+ errors.append(f"Failed to delete {file_id}: {str(e)}")
491
+
492
+ return deleted, errors
493
+
494
+ async def list_files(self) -> List[str]:
495
+ """List all file IDs in storage"""
496
+ files = []
497
+ for path in self.base_path.rglob('*'):
498
+ if path.is_file():
499
+ # Reconstruct file ID from path - include parent dir name and full filename
500
+ file_id = path.parent.name + path.stem
501
+ files.append(file_id)
502
+ return files
503
+
504
+ # Note: data URL handling is performed at the Attachment layer where the content type is known.
505
+
506
+ @classmethod
507
+ def get_base_path(cls) -> str:
508
+ """Get the base storage path from environment variable"""
509
+ env_path = os.getenv('NARRATOR_FILE_STORAGE_PATH')
510
+ if not env_path:
511
+ # For the extracted package, we'll use a default instead of raising an error
512
+ return str(Path.home() / '.narrator' / 'files')
513
+ return env_path
514
+
515
+ @classmethod
516
+ def get_file_url(cls, relative_path: str) -> str:
517
+ """
518
+ Get the full URL for a file based on its relative path.
519
+
520
+ Args:
521
+ relative_path: The path relative to the base storage path
522
+
523
+ Returns:
524
+ The full URL for the file
525
+ """
526
+ # Get the base path
527
+ base_path = cls.get_base_path()
528
+
529
+ # Construct the full URL by combining the base path and relative path
530
+ # Make sure there's exactly one slash between them
531
+ if base_path.endswith('/'):
532
+ base_path = base_path[:-1]
533
+ if relative_path.startswith('/'):
534
+ relative_path = relative_path[1:]
535
+
536
+ return f"{base_path}/{relative_path}"
@@ -0,0 +1,9 @@
1
+ """
2
+ Utilities package for Tyler Stores
3
+ """
4
+
5
+ from .logging import get_logger
6
+
7
+ __all__ = [
8
+ "get_logger",
9
+ ]
@@ -0,0 +1,52 @@
1
+ """Logging configuration for narrator package."""
2
+ import os
3
+ import logging
4
+ from typing import Optional
5
+
6
+ class _NarratorNullHandler(logging.Handler):
7
+ def emit(self, record):
8
+ pass
9
+
10
+ _is_configured = False
11
+
12
+ def _ensure_logging_configured():
13
+ """Attach a NullHandler and optionally set level based on env without overriding app config."""
14
+ global _is_configured
15
+ if _is_configured:
16
+ return
17
+
18
+ logger = logging.getLogger('narrator')
19
+ # Avoid duplicate handlers
20
+ if not any(isinstance(h, _NarratorNullHandler) for h in logger.handlers):
21
+ logger.addHandler(_NarratorNullHandler())
22
+
23
+ # Respect env level but do not call basicConfig or force reconfigure
24
+ log_level_str = os.getenv('NARRATOR_LOG_LEVEL', os.getenv('LOG_LEVEL', '')).upper()
25
+ if log_level_str:
26
+ level = getattr(logging, log_level_str, None)
27
+ if isinstance(level, int):
28
+ logger.setLevel(level)
29
+
30
+ _is_configured = True
31
+
32
+ def get_logger(name: Optional[str] = None) -> logging.Logger:
33
+ """Get a configured logger.
34
+
35
+ This function ensures logging is configured with the appropriate level from
36
+ the NARRATOR_LOG_LEVEL (or LOG_LEVEL) environment variable before returning a logger.
37
+ Configuration happens automatically the first time this function is called.
38
+
39
+ Args:
40
+ name: The name for the logger. If None, uses the caller's module name.
41
+
42
+ Returns:
43
+ A configured logger instance.
44
+
45
+ Usage:
46
+ # In any file:
47
+ from narrator.utils.logging import get_logger
48
+ logger = get_logger(__name__) # Automatically configures logging
49
+ logger.debug("Debug message") # Will respect NARRATOR_LOG_LEVEL from .env
50
+ """
51
+ _ensure_logging_configured()
52
+ return logging.getLogger(name or 'narrator.unknown')